{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.0, "eval_steps": 61000, "global_step": 122230, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00016362595107584062, "grad_norm": 6.450747489929199, "learning_rate": 8.181297553792031e-09, "loss": 0.4893, "step": 10 }, { "epoch": 0.00032725190215168124, "grad_norm": 6.003659725189209, "learning_rate": 1.6362595107584062e-08, "loss": 0.4797, "step": 20 }, { "epoch": 0.0004908778532275219, "grad_norm": 7.656006813049316, "learning_rate": 2.4543892661376094e-08, "loss": 0.4769, "step": 30 }, { "epoch": 0.0006545038043033625, "grad_norm": 4.77230978012085, "learning_rate": 3.2725190215168125e-08, "loss": 0.4858, "step": 40 }, { "epoch": 0.0008181297553792031, "grad_norm": 10.098441123962402, "learning_rate": 4.090648776896016e-08, "loss": 0.5014, "step": 50 }, { "epoch": 0.0009817557064550437, "grad_norm": 7.6441650390625, "learning_rate": 4.908778532275219e-08, "loss": 0.4555, "step": 60 }, { "epoch": 0.0011453816575308843, "grad_norm": 6.458242416381836, "learning_rate": 5.7269082876544225e-08, "loss": 0.481, "step": 70 }, { "epoch": 0.001309007608606725, "grad_norm": 5.756002902984619, "learning_rate": 6.545038043033625e-08, "loss": 0.4646, "step": 80 }, { "epoch": 0.0014726335596825656, "grad_norm": 6.363128662109375, "learning_rate": 7.363167798412829e-08, "loss": 0.4534, "step": 90 }, { "epoch": 0.0016362595107584062, "grad_norm": 8.146249771118164, "learning_rate": 8.181297553792032e-08, "loss": 0.4646, "step": 100 }, { "epoch": 0.0017998854618342468, "grad_norm": 6.564874649047852, "learning_rate": 8.999427309171235e-08, "loss": 0.4325, "step": 110 }, { "epoch": 0.0019635114129100874, "grad_norm": 6.186850070953369, "learning_rate": 9.817557064550437e-08, "loss": 0.4514, "step": 120 }, { "epoch": 0.002127137363985928, "grad_norm": 7.0988969802856445, "learning_rate": 1.0635686819929643e-07, "loss": 0.4162, "step": 130 }, { "epoch": 0.0022907633150617687, "grad_norm": 5.64640998840332, "learning_rate": 1.1453816575308845e-07, "loss": 0.3946, "step": 140 }, { "epoch": 0.0024543892661376093, "grad_norm": 5.670436859130859, "learning_rate": 1.2271946330688047e-07, "loss": 0.3344, "step": 150 }, { "epoch": 0.00261801521721345, "grad_norm": 5.174366474151611, "learning_rate": 1.309007608606725e-07, "loss": 0.3333, "step": 160 }, { "epoch": 0.0027816411682892906, "grad_norm": 3.695723533630371, "learning_rate": 1.3908205841446455e-07, "loss": 0.2925, "step": 170 }, { "epoch": 0.002945267119365131, "grad_norm": 4.6453046798706055, "learning_rate": 1.4726335596825657e-07, "loss": 0.2809, "step": 180 }, { "epoch": 0.003108893070440972, "grad_norm": 2.976259469985962, "learning_rate": 1.554446535220486e-07, "loss": 0.2577, "step": 190 }, { "epoch": 0.0032725190215168124, "grad_norm": 2.8548524379730225, "learning_rate": 1.6362595107584065e-07, "loss": 0.2132, "step": 200 }, { "epoch": 0.003436144972592653, "grad_norm": 2.73585844039917, "learning_rate": 1.7180724862963267e-07, "loss": 0.1914, "step": 210 }, { "epoch": 0.0035997709236684937, "grad_norm": 2.198151111602783, "learning_rate": 1.799885461834247e-07, "loss": 0.1853, "step": 220 }, { "epoch": 0.0037633968747443343, "grad_norm": 2.124385118484497, "learning_rate": 1.8816984373721675e-07, "loss": 0.178, "step": 230 }, { "epoch": 0.003927022825820175, "grad_norm": 1.9803823232650757, "learning_rate": 1.9635114129100875e-07, "loss": 0.1603, "step": 240 }, { "epoch": 0.004090648776896016, "grad_norm": 1.675918698310852, "learning_rate": 2.045324388448008e-07, "loss": 0.1302, "step": 250 }, { "epoch": 0.004254274727971856, "grad_norm": 2.0964345932006836, "learning_rate": 2.1271373639859285e-07, "loss": 0.1478, "step": 260 }, { "epoch": 0.004417900679047697, "grad_norm": 1.632344365119934, "learning_rate": 2.2089503395238485e-07, "loss": 0.1341, "step": 270 }, { "epoch": 0.004581526630123537, "grad_norm": 1.394518494606018, "learning_rate": 2.290763315061769e-07, "loss": 0.1131, "step": 280 }, { "epoch": 0.004745152581199378, "grad_norm": 2.0453503131866455, "learning_rate": 2.3725762905996892e-07, "loss": 0.1112, "step": 290 }, { "epoch": 0.004908778532275219, "grad_norm": 1.4522521495819092, "learning_rate": 2.4543892661376095e-07, "loss": 0.1101, "step": 300 }, { "epoch": 0.00507240448335106, "grad_norm": 1.6254401206970215, "learning_rate": 2.53620224167553e-07, "loss": 0.108, "step": 310 }, { "epoch": 0.0052360304344269, "grad_norm": 1.332760214805603, "learning_rate": 2.61801521721345e-07, "loss": 0.0942, "step": 320 }, { "epoch": 0.005399656385502741, "grad_norm": 1.449270486831665, "learning_rate": 2.6998281927513705e-07, "loss": 0.0873, "step": 330 }, { "epoch": 0.005563282336578581, "grad_norm": 1.3381404876708984, "learning_rate": 2.781641168289291e-07, "loss": 0.0732, "step": 340 }, { "epoch": 0.005726908287654422, "grad_norm": 1.650217890739441, "learning_rate": 2.863454143827211e-07, "loss": 0.0941, "step": 350 }, { "epoch": 0.005890534238730262, "grad_norm": 1.642314076423645, "learning_rate": 2.9452671193651315e-07, "loss": 0.0854, "step": 360 }, { "epoch": 0.006054160189806103, "grad_norm": 1.5752111673355103, "learning_rate": 3.027080094903052e-07, "loss": 0.0859, "step": 370 }, { "epoch": 0.006217786140881944, "grad_norm": 2.0331008434295654, "learning_rate": 3.108893070440972e-07, "loss": 0.0832, "step": 380 }, { "epoch": 0.006381412091957785, "grad_norm": 1.6878950595855713, "learning_rate": 3.1907060459788925e-07, "loss": 0.063, "step": 390 }, { "epoch": 0.006545038043033625, "grad_norm": 1.2223267555236816, "learning_rate": 3.272519021516813e-07, "loss": 0.0877, "step": 400 }, { "epoch": 0.006708663994109466, "grad_norm": 1.5864821672439575, "learning_rate": 3.3543319970547335e-07, "loss": 0.0768, "step": 410 }, { "epoch": 0.006872289945185306, "grad_norm": 1.4420173168182373, "learning_rate": 3.4361449725926535e-07, "loss": 0.0768, "step": 420 }, { "epoch": 0.007035915896261147, "grad_norm": 1.4067822694778442, "learning_rate": 3.5179579481305735e-07, "loss": 0.0627, "step": 430 }, { "epoch": 0.007199541847336987, "grad_norm": 1.43185555934906, "learning_rate": 3.599770923668494e-07, "loss": 0.078, "step": 440 }, { "epoch": 0.007363167798412828, "grad_norm": 1.3940660953521729, "learning_rate": 3.6815838992064145e-07, "loss": 0.074, "step": 450 }, { "epoch": 0.0075267937494886685, "grad_norm": 1.532396912574768, "learning_rate": 3.763396874744335e-07, "loss": 0.0712, "step": 460 }, { "epoch": 0.00769041970056451, "grad_norm": 1.0307947397232056, "learning_rate": 3.845209850282255e-07, "loss": 0.06, "step": 470 }, { "epoch": 0.00785404565164035, "grad_norm": 1.2340439558029175, "learning_rate": 3.927022825820175e-07, "loss": 0.0723, "step": 480 }, { "epoch": 0.00801767160271619, "grad_norm": 1.3179080486297607, "learning_rate": 4.0088358013580955e-07, "loss": 0.0575, "step": 490 }, { "epoch": 0.008181297553792032, "grad_norm": 1.514940857887268, "learning_rate": 4.090648776896016e-07, "loss": 0.0609, "step": 500 }, { "epoch": 0.008344923504867872, "grad_norm": 1.3926035165786743, "learning_rate": 4.1724617524339365e-07, "loss": 0.0548, "step": 510 }, { "epoch": 0.008508549455943712, "grad_norm": 1.7142009735107422, "learning_rate": 4.254274727971857e-07, "loss": 0.0712, "step": 520 }, { "epoch": 0.008672175407019552, "grad_norm": 1.7795113325119019, "learning_rate": 4.336087703509777e-07, "loss": 0.0631, "step": 530 }, { "epoch": 0.008835801358095394, "grad_norm": 1.0693011283874512, "learning_rate": 4.417900679047697e-07, "loss": 0.0551, "step": 540 }, { "epoch": 0.008999427309171235, "grad_norm": 1.2251043319702148, "learning_rate": 4.4997136545856175e-07, "loss": 0.0483, "step": 550 }, { "epoch": 0.009163053260247075, "grad_norm": 1.2896251678466797, "learning_rate": 4.581526630123538e-07, "loss": 0.0549, "step": 560 }, { "epoch": 0.009326679211322917, "grad_norm": 1.7514303922653198, "learning_rate": 4.6633396056614585e-07, "loss": 0.0491, "step": 570 }, { "epoch": 0.009490305162398757, "grad_norm": 1.3628102540969849, "learning_rate": 4.7451525811993785e-07, "loss": 0.0584, "step": 580 }, { "epoch": 0.009653931113474597, "grad_norm": 1.2788424491882324, "learning_rate": 4.826965556737298e-07, "loss": 0.0512, "step": 590 }, { "epoch": 0.009817557064550437, "grad_norm": 1.4374266862869263, "learning_rate": 4.908778532275219e-07, "loss": 0.048, "step": 600 }, { "epoch": 0.00998118301562628, "grad_norm": 1.0559022426605225, "learning_rate": 4.99059150781314e-07, "loss": 0.048, "step": 610 }, { "epoch": 0.01014480896670212, "grad_norm": 1.654038906097412, "learning_rate": 5.07240448335106e-07, "loss": 0.0463, "step": 620 }, { "epoch": 0.01030843491777796, "grad_norm": 1.4024804830551147, "learning_rate": 5.15421745888898e-07, "loss": 0.0444, "step": 630 }, { "epoch": 0.0104720608688538, "grad_norm": 1.0499006509780884, "learning_rate": 5.2360304344269e-07, "loss": 0.0561, "step": 640 }, { "epoch": 0.010635686819929642, "grad_norm": 1.0363799333572388, "learning_rate": 5.31784340996482e-07, "loss": 0.0366, "step": 650 }, { "epoch": 0.010799312771005482, "grad_norm": 0.8694953918457031, "learning_rate": 5.399656385502741e-07, "loss": 0.0416, "step": 660 }, { "epoch": 0.010962938722081322, "grad_norm": 1.33260178565979, "learning_rate": 5.481469361040661e-07, "loss": 0.0476, "step": 670 }, { "epoch": 0.011126564673157162, "grad_norm": 1.2754524946212769, "learning_rate": 5.563282336578582e-07, "loss": 0.0479, "step": 680 }, { "epoch": 0.011290190624233004, "grad_norm": 1.1440926790237427, "learning_rate": 5.645095312116501e-07, "loss": 0.0419, "step": 690 }, { "epoch": 0.011453816575308844, "grad_norm": 1.6302803754806519, "learning_rate": 5.726908287654422e-07, "loss": 0.0427, "step": 700 }, { "epoch": 0.011617442526384684, "grad_norm": 1.178799033164978, "learning_rate": 5.808721263192342e-07, "loss": 0.0415, "step": 710 }, { "epoch": 0.011781068477460525, "grad_norm": 1.1128038167953491, "learning_rate": 5.890534238730263e-07, "loss": 0.0439, "step": 720 }, { "epoch": 0.011944694428536367, "grad_norm": 1.201760172843933, "learning_rate": 5.972347214268183e-07, "loss": 0.0376, "step": 730 }, { "epoch": 0.012108320379612207, "grad_norm": 1.2795042991638184, "learning_rate": 6.054160189806104e-07, "loss": 0.0419, "step": 740 }, { "epoch": 0.012271946330688047, "grad_norm": 1.1800705194473267, "learning_rate": 6.135973165344023e-07, "loss": 0.0467, "step": 750 }, { "epoch": 0.012435572281763887, "grad_norm": 1.050722599029541, "learning_rate": 6.217786140881944e-07, "loss": 0.0408, "step": 760 }, { "epoch": 0.012599198232839729, "grad_norm": 1.1177846193313599, "learning_rate": 6.299599116419864e-07, "loss": 0.0506, "step": 770 }, { "epoch": 0.01276282418391557, "grad_norm": 1.4573966264724731, "learning_rate": 6.381412091957785e-07, "loss": 0.0389, "step": 780 }, { "epoch": 0.01292645013499141, "grad_norm": 1.0579060316085815, "learning_rate": 6.463225067495705e-07, "loss": 0.0447, "step": 790 }, { "epoch": 0.01309007608606725, "grad_norm": 1.2879925966262817, "learning_rate": 6.545038043033626e-07, "loss": 0.0423, "step": 800 }, { "epoch": 0.013253702037143092, "grad_norm": 0.8808040022850037, "learning_rate": 6.626851018571547e-07, "loss": 0.031, "step": 810 }, { "epoch": 0.013417327988218932, "grad_norm": 1.0718401670455933, "learning_rate": 6.708663994109467e-07, "loss": 0.0448, "step": 820 }, { "epoch": 0.013580953939294772, "grad_norm": 1.5772994756698608, "learning_rate": 6.790476969647386e-07, "loss": 0.0388, "step": 830 }, { "epoch": 0.013744579890370612, "grad_norm": 0.9327505826950073, "learning_rate": 6.872289945185307e-07, "loss": 0.0381, "step": 840 }, { "epoch": 0.013908205841446454, "grad_norm": 1.0323652029037476, "learning_rate": 6.954102920723226e-07, "loss": 0.0345, "step": 850 }, { "epoch": 0.014071831792522294, "grad_norm": 1.0653870105743408, "learning_rate": 7.035915896261147e-07, "loss": 0.0364, "step": 860 }, { "epoch": 0.014235457743598134, "grad_norm": 0.8184215426445007, "learning_rate": 7.117728871799067e-07, "loss": 0.0324, "step": 870 }, { "epoch": 0.014399083694673975, "grad_norm": 1.2273850440979004, "learning_rate": 7.199541847336988e-07, "loss": 0.0419, "step": 880 }, { "epoch": 0.014562709645749817, "grad_norm": 0.9224192500114441, "learning_rate": 7.281354822874908e-07, "loss": 0.0267, "step": 890 }, { "epoch": 0.014726335596825657, "grad_norm": 1.046655535697937, "learning_rate": 7.363167798412829e-07, "loss": 0.0353, "step": 900 }, { "epoch": 0.014889961547901497, "grad_norm": 0.9340174198150635, "learning_rate": 7.44498077395075e-07, "loss": 0.0289, "step": 910 }, { "epoch": 0.015053587498977337, "grad_norm": 1.1622369289398193, "learning_rate": 7.52679374948867e-07, "loss": 0.0321, "step": 920 }, { "epoch": 0.015217213450053179, "grad_norm": 0.6142399311065674, "learning_rate": 7.60860672502659e-07, "loss": 0.0304, "step": 930 }, { "epoch": 0.01538083940112902, "grad_norm": 1.0446439981460571, "learning_rate": 7.69041970056451e-07, "loss": 0.0312, "step": 940 }, { "epoch": 0.01554446535220486, "grad_norm": 0.8211088180541992, "learning_rate": 7.77223267610243e-07, "loss": 0.0312, "step": 950 }, { "epoch": 0.0157080913032807, "grad_norm": 1.1001378297805786, "learning_rate": 7.85404565164035e-07, "loss": 0.034, "step": 960 }, { "epoch": 0.01587171725435654, "grad_norm": 1.4075452089309692, "learning_rate": 7.93585862717827e-07, "loss": 0.0216, "step": 970 }, { "epoch": 0.01603534320543238, "grad_norm": 1.2252289056777954, "learning_rate": 8.017671602716191e-07, "loss": 0.0262, "step": 980 }, { "epoch": 0.016198969156508224, "grad_norm": 0.9565718173980713, "learning_rate": 8.099484578254111e-07, "loss": 0.0422, "step": 990 }, { "epoch": 0.016362595107584064, "grad_norm": 1.0116455554962158, "learning_rate": 8.181297553792032e-07, "loss": 0.022, "step": 1000 }, { "epoch": 0.016526221058659904, "grad_norm": 1.3713945150375366, "learning_rate": 8.263110529329952e-07, "loss": 0.0281, "step": 1010 }, { "epoch": 0.016689847009735744, "grad_norm": 0.9518176913261414, "learning_rate": 8.344923504867873e-07, "loss": 0.0307, "step": 1020 }, { "epoch": 0.016853472960811584, "grad_norm": 1.0284076929092407, "learning_rate": 8.426736480405793e-07, "loss": 0.0325, "step": 1030 }, { "epoch": 0.017017098911887425, "grad_norm": 0.9852408766746521, "learning_rate": 8.508549455943714e-07, "loss": 0.0341, "step": 1040 }, { "epoch": 0.017180724862963265, "grad_norm": 1.060655117034912, "learning_rate": 8.590362431481633e-07, "loss": 0.0347, "step": 1050 }, { "epoch": 0.017344350814039105, "grad_norm": 1.2484509944915771, "learning_rate": 8.672175407019554e-07, "loss": 0.0271, "step": 1060 }, { "epoch": 0.01750797676511495, "grad_norm": 1.2672250270843506, "learning_rate": 8.753988382557474e-07, "loss": 0.0318, "step": 1070 }, { "epoch": 0.01767160271619079, "grad_norm": 0.7602401971817017, "learning_rate": 8.835801358095394e-07, "loss": 0.0284, "step": 1080 }, { "epoch": 0.01783522866726663, "grad_norm": 1.3558201789855957, "learning_rate": 8.917614333633314e-07, "loss": 0.0273, "step": 1090 }, { "epoch": 0.01799885461834247, "grad_norm": 0.8952217698097229, "learning_rate": 8.999427309171235e-07, "loss": 0.0368, "step": 1100 }, { "epoch": 0.01816248056941831, "grad_norm": 1.0355019569396973, "learning_rate": 9.081240284709155e-07, "loss": 0.0248, "step": 1110 }, { "epoch": 0.01832610652049415, "grad_norm": 0.6996469497680664, "learning_rate": 9.163053260247076e-07, "loss": 0.0224, "step": 1120 }, { "epoch": 0.01848973247156999, "grad_norm": 1.456304907798767, "learning_rate": 9.244866235784996e-07, "loss": 0.0305, "step": 1130 }, { "epoch": 0.018653358422645833, "grad_norm": 0.8071394562721252, "learning_rate": 9.326679211322917e-07, "loss": 0.0221, "step": 1140 }, { "epoch": 0.018816984373721674, "grad_norm": 1.0477321147918701, "learning_rate": 9.408492186860837e-07, "loss": 0.0299, "step": 1150 }, { "epoch": 0.018980610324797514, "grad_norm": 0.8900778293609619, "learning_rate": 9.490305162398757e-07, "loss": 0.0211, "step": 1160 }, { "epoch": 0.019144236275873354, "grad_norm": 1.1308872699737549, "learning_rate": 9.572118137936676e-07, "loss": 0.0215, "step": 1170 }, { "epoch": 0.019307862226949194, "grad_norm": 0.9169854521751404, "learning_rate": 9.653931113474597e-07, "loss": 0.0232, "step": 1180 }, { "epoch": 0.019471488178025034, "grad_norm": 0.8490586876869202, "learning_rate": 9.735744089012517e-07, "loss": 0.0182, "step": 1190 }, { "epoch": 0.019635114129100874, "grad_norm": 0.9158782958984375, "learning_rate": 9.817557064550438e-07, "loss": 0.0335, "step": 1200 }, { "epoch": 0.019798740080176715, "grad_norm": 0.9684118628501892, "learning_rate": 9.899370040088358e-07, "loss": 0.0258, "step": 1210 }, { "epoch": 0.01996236603125256, "grad_norm": 0.8719280362129211, "learning_rate": 9.98118301562628e-07, "loss": 0.0223, "step": 1220 }, { "epoch": 0.0201259919823284, "grad_norm": 0.8022724986076355, "learning_rate": 1.00629959911642e-06, "loss": 0.0244, "step": 1230 }, { "epoch": 0.02028961793340424, "grad_norm": 1.1774309873580933, "learning_rate": 1.014480896670212e-06, "loss": 0.025, "step": 1240 }, { "epoch": 0.02045324388448008, "grad_norm": 0.7491937875747681, "learning_rate": 1.022662194224004e-06, "loss": 0.0246, "step": 1250 }, { "epoch": 0.02061686983555592, "grad_norm": 0.7116556763648987, "learning_rate": 1.030843491777796e-06, "loss": 0.0275, "step": 1260 }, { "epoch": 0.02078049578663176, "grad_norm": 0.9816871881484985, "learning_rate": 1.039024789331588e-06, "loss": 0.0247, "step": 1270 }, { "epoch": 0.0209441217377076, "grad_norm": 0.6181216835975647, "learning_rate": 1.04720608688538e-06, "loss": 0.0235, "step": 1280 }, { "epoch": 0.02110774768878344, "grad_norm": 1.024975299835205, "learning_rate": 1.055387384439172e-06, "loss": 0.0277, "step": 1290 }, { "epoch": 0.021271373639859283, "grad_norm": 1.5072187185287476, "learning_rate": 1.063568681992964e-06, "loss": 0.0258, "step": 1300 }, { "epoch": 0.021434999590935123, "grad_norm": 0.7752158641815186, "learning_rate": 1.0717499795467561e-06, "loss": 0.0235, "step": 1310 }, { "epoch": 0.021598625542010964, "grad_norm": 1.258660912513733, "learning_rate": 1.0799312771005482e-06, "loss": 0.0255, "step": 1320 }, { "epoch": 0.021762251493086804, "grad_norm": 0.997409999370575, "learning_rate": 1.0881125746543402e-06, "loss": 0.0181, "step": 1330 }, { "epoch": 0.021925877444162644, "grad_norm": 1.1911503076553345, "learning_rate": 1.0962938722081323e-06, "loss": 0.0189, "step": 1340 }, { "epoch": 0.022089503395238484, "grad_norm": 0.8701474070549011, "learning_rate": 1.1044751697619243e-06, "loss": 0.0257, "step": 1350 }, { "epoch": 0.022253129346314324, "grad_norm": 0.8997549414634705, "learning_rate": 1.1126564673157164e-06, "loss": 0.0246, "step": 1360 }, { "epoch": 0.022416755297390165, "grad_norm": 1.4192978143692017, "learning_rate": 1.1208377648695084e-06, "loss": 0.0306, "step": 1370 }, { "epoch": 0.022580381248466008, "grad_norm": 0.9754282832145691, "learning_rate": 1.1290190624233003e-06, "loss": 0.0278, "step": 1380 }, { "epoch": 0.02274400719954185, "grad_norm": 0.9182608127593994, "learning_rate": 1.1372003599770923e-06, "loss": 0.0223, "step": 1390 }, { "epoch": 0.02290763315061769, "grad_norm": 0.9532468914985657, "learning_rate": 1.1453816575308844e-06, "loss": 0.0249, "step": 1400 }, { "epoch": 0.02307125910169353, "grad_norm": 0.8933977484703064, "learning_rate": 1.1535629550846764e-06, "loss": 0.0257, "step": 1410 }, { "epoch": 0.02323488505276937, "grad_norm": 0.9456914067268372, "learning_rate": 1.1617442526384685e-06, "loss": 0.0256, "step": 1420 }, { "epoch": 0.02339851100384521, "grad_norm": 0.9724444150924683, "learning_rate": 1.1699255501922605e-06, "loss": 0.0245, "step": 1430 }, { "epoch": 0.02356213695492105, "grad_norm": 0.8060034513473511, "learning_rate": 1.1781068477460526e-06, "loss": 0.0288, "step": 1440 }, { "epoch": 0.02372576290599689, "grad_norm": 0.8337758779525757, "learning_rate": 1.1862881452998446e-06, "loss": 0.0209, "step": 1450 }, { "epoch": 0.023889388857072733, "grad_norm": 0.6603108048439026, "learning_rate": 1.1944694428536367e-06, "loss": 0.024, "step": 1460 }, { "epoch": 0.024053014808148573, "grad_norm": 1.0457243919372559, "learning_rate": 1.2026507404074287e-06, "loss": 0.0168, "step": 1470 }, { "epoch": 0.024216640759224414, "grad_norm": 0.9682090878486633, "learning_rate": 1.2108320379612208e-06, "loss": 0.024, "step": 1480 }, { "epoch": 0.024380266710300254, "grad_norm": 0.7535257339477539, "learning_rate": 1.2190133355150126e-06, "loss": 0.0189, "step": 1490 }, { "epoch": 0.024543892661376094, "grad_norm": 0.6845926642417908, "learning_rate": 1.2271946330688047e-06, "loss": 0.0194, "step": 1500 }, { "epoch": 0.024707518612451934, "grad_norm": 0.9407981038093567, "learning_rate": 1.2353759306225967e-06, "loss": 0.019, "step": 1510 }, { "epoch": 0.024871144563527774, "grad_norm": 0.97010737657547, "learning_rate": 1.2435572281763888e-06, "loss": 0.0222, "step": 1520 }, { "epoch": 0.025034770514603615, "grad_norm": 1.3376246690750122, "learning_rate": 1.2517385257301808e-06, "loss": 0.028, "step": 1530 }, { "epoch": 0.025198396465679458, "grad_norm": 0.6298123002052307, "learning_rate": 1.2599198232839729e-06, "loss": 0.0315, "step": 1540 }, { "epoch": 0.0253620224167553, "grad_norm": 0.7916223406791687, "learning_rate": 1.268101120837765e-06, "loss": 0.0243, "step": 1550 }, { "epoch": 0.02552564836783114, "grad_norm": 0.7279757857322693, "learning_rate": 1.276282418391557e-06, "loss": 0.0192, "step": 1560 }, { "epoch": 0.02568927431890698, "grad_norm": 0.5921642780303955, "learning_rate": 1.284463715945349e-06, "loss": 0.0198, "step": 1570 }, { "epoch": 0.02585290026998282, "grad_norm": 0.6908919811248779, "learning_rate": 1.292645013499141e-06, "loss": 0.0188, "step": 1580 }, { "epoch": 0.02601652622105866, "grad_norm": 0.7825183272361755, "learning_rate": 1.3008263110529331e-06, "loss": 0.0233, "step": 1590 }, { "epoch": 0.0261801521721345, "grad_norm": 0.5276881456375122, "learning_rate": 1.3090076086067252e-06, "loss": 0.024, "step": 1600 }, { "epoch": 0.02634377812321034, "grad_norm": 0.7823368310928345, "learning_rate": 1.3171889061605172e-06, "loss": 0.021, "step": 1610 }, { "epoch": 0.026507404074286183, "grad_norm": 0.531443178653717, "learning_rate": 1.3253702037143093e-06, "loss": 0.0189, "step": 1620 }, { "epoch": 0.026671030025362023, "grad_norm": 0.7160040140151978, "learning_rate": 1.3335515012681014e-06, "loss": 0.0142, "step": 1630 }, { "epoch": 0.026834655976437864, "grad_norm": 0.945673406124115, "learning_rate": 1.3417327988218934e-06, "loss": 0.019, "step": 1640 }, { "epoch": 0.026998281927513704, "grad_norm": 1.1356985569000244, "learning_rate": 1.3499140963756852e-06, "loss": 0.0231, "step": 1650 }, { "epoch": 0.027161907878589544, "grad_norm": 0.8492401838302612, "learning_rate": 1.3580953939294773e-06, "loss": 0.0236, "step": 1660 }, { "epoch": 0.027325533829665384, "grad_norm": 0.721347987651825, "learning_rate": 1.3662766914832693e-06, "loss": 0.0204, "step": 1670 }, { "epoch": 0.027489159780741224, "grad_norm": 0.9221570491790771, "learning_rate": 1.3744579890370614e-06, "loss": 0.0233, "step": 1680 }, { "epoch": 0.027652785731817068, "grad_norm": 0.7661713361740112, "learning_rate": 1.3826392865908532e-06, "loss": 0.0211, "step": 1690 }, { "epoch": 0.027816411682892908, "grad_norm": 0.9114108085632324, "learning_rate": 1.3908205841446453e-06, "loss": 0.0208, "step": 1700 }, { "epoch": 0.02798003763396875, "grad_norm": 0.7525148391723633, "learning_rate": 1.3990018816984373e-06, "loss": 0.0227, "step": 1710 }, { "epoch": 0.02814366358504459, "grad_norm": 0.7764679789543152, "learning_rate": 1.4071831792522294e-06, "loss": 0.0264, "step": 1720 }, { "epoch": 0.02830728953612043, "grad_norm": 0.6332064867019653, "learning_rate": 1.4153644768060214e-06, "loss": 0.0171, "step": 1730 }, { "epoch": 0.02847091548719627, "grad_norm": 0.5143387317657471, "learning_rate": 1.4235457743598135e-06, "loss": 0.0156, "step": 1740 }, { "epoch": 0.02863454143827211, "grad_norm": 0.7839280962944031, "learning_rate": 1.4317270719136055e-06, "loss": 0.0161, "step": 1750 }, { "epoch": 0.02879816738934795, "grad_norm": 0.7096639275550842, "learning_rate": 1.4399083694673976e-06, "loss": 0.0206, "step": 1760 }, { "epoch": 0.028961793340423793, "grad_norm": 0.9041060209274292, "learning_rate": 1.4480896670211896e-06, "loss": 0.0264, "step": 1770 }, { "epoch": 0.029125419291499633, "grad_norm": 0.6815425157546997, "learning_rate": 1.4562709645749817e-06, "loss": 0.0221, "step": 1780 }, { "epoch": 0.029289045242575473, "grad_norm": 1.123404622077942, "learning_rate": 1.4644522621287737e-06, "loss": 0.0183, "step": 1790 }, { "epoch": 0.029452671193651313, "grad_norm": 0.8858315944671631, "learning_rate": 1.4726335596825658e-06, "loss": 0.0203, "step": 1800 }, { "epoch": 0.029616297144727154, "grad_norm": 0.8124191761016846, "learning_rate": 1.4808148572363578e-06, "loss": 0.0118, "step": 1810 }, { "epoch": 0.029779923095802994, "grad_norm": 0.8867793679237366, "learning_rate": 1.48899615479015e-06, "loss": 0.0133, "step": 1820 }, { "epoch": 0.029943549046878834, "grad_norm": 0.7105574607849121, "learning_rate": 1.497177452343942e-06, "loss": 0.024, "step": 1830 }, { "epoch": 0.030107174997954674, "grad_norm": 0.9919645190238953, "learning_rate": 1.505358749897734e-06, "loss": 0.016, "step": 1840 }, { "epoch": 0.030270800949030518, "grad_norm": 0.49908286333084106, "learning_rate": 1.513540047451526e-06, "loss": 0.0201, "step": 1850 }, { "epoch": 0.030434426900106358, "grad_norm": 0.7252497673034668, "learning_rate": 1.521721345005318e-06, "loss": 0.0179, "step": 1860 }, { "epoch": 0.030598052851182198, "grad_norm": 0.666792631149292, "learning_rate": 1.52990264255911e-06, "loss": 0.0267, "step": 1870 }, { "epoch": 0.03076167880225804, "grad_norm": 1.1081174612045288, "learning_rate": 1.538083940112902e-06, "loss": 0.0242, "step": 1880 }, { "epoch": 0.03092530475333388, "grad_norm": 0.8116829991340637, "learning_rate": 1.546265237666694e-06, "loss": 0.0182, "step": 1890 }, { "epoch": 0.03108893070440972, "grad_norm": 0.7736301422119141, "learning_rate": 1.554446535220486e-06, "loss": 0.0178, "step": 1900 }, { "epoch": 0.03125255665548556, "grad_norm": 0.6938651204109192, "learning_rate": 1.5626278327742781e-06, "loss": 0.0195, "step": 1910 }, { "epoch": 0.0314161826065614, "grad_norm": 0.9768052101135254, "learning_rate": 1.57080913032807e-06, "loss": 0.0177, "step": 1920 }, { "epoch": 0.03157980855763724, "grad_norm": 0.8962604999542236, "learning_rate": 1.578990427881862e-06, "loss": 0.017, "step": 1930 }, { "epoch": 0.03174343450871308, "grad_norm": 0.7683470845222473, "learning_rate": 1.587171725435654e-06, "loss": 0.017, "step": 1940 }, { "epoch": 0.03190706045978892, "grad_norm": 0.8589656352996826, "learning_rate": 1.5953530229894461e-06, "loss": 0.0134, "step": 1950 }, { "epoch": 0.03207068641086476, "grad_norm": 0.47683417797088623, "learning_rate": 1.6035343205432382e-06, "loss": 0.0209, "step": 1960 }, { "epoch": 0.032234312361940604, "grad_norm": 0.7166604399681091, "learning_rate": 1.6117156180970302e-06, "loss": 0.0147, "step": 1970 }, { "epoch": 0.03239793831301645, "grad_norm": 0.7298545241355896, "learning_rate": 1.6198969156508223e-06, "loss": 0.0213, "step": 1980 }, { "epoch": 0.032561564264092284, "grad_norm": 0.741427481174469, "learning_rate": 1.6280782132046143e-06, "loss": 0.0178, "step": 1990 }, { "epoch": 0.03272519021516813, "grad_norm": 0.6702003479003906, "learning_rate": 1.6362595107584064e-06, "loss": 0.019, "step": 2000 }, { "epoch": 0.032888816166243964, "grad_norm": 1.0223054885864258, "learning_rate": 1.6444408083121984e-06, "loss": 0.0219, "step": 2010 }, { "epoch": 0.03305244211731981, "grad_norm": 0.7337533831596375, "learning_rate": 1.6526221058659905e-06, "loss": 0.0215, "step": 2020 }, { "epoch": 0.033216068068395645, "grad_norm": 0.698671281337738, "learning_rate": 1.6608034034197825e-06, "loss": 0.0151, "step": 2030 }, { "epoch": 0.03337969401947149, "grad_norm": 0.8325813412666321, "learning_rate": 1.6689847009735746e-06, "loss": 0.0161, "step": 2040 }, { "epoch": 0.03354331997054733, "grad_norm": 0.7636258006095886, "learning_rate": 1.6771659985273666e-06, "loss": 0.0211, "step": 2050 }, { "epoch": 0.03370694592162317, "grad_norm": 0.6166092157363892, "learning_rate": 1.6853472960811587e-06, "loss": 0.0145, "step": 2060 }, { "epoch": 0.03387057187269901, "grad_norm": 0.47421813011169434, "learning_rate": 1.6935285936349507e-06, "loss": 0.0159, "step": 2070 }, { "epoch": 0.03403419782377485, "grad_norm": 0.627687394618988, "learning_rate": 1.7017098911887428e-06, "loss": 0.0153, "step": 2080 }, { "epoch": 0.03419782377485069, "grad_norm": 0.5683699250221252, "learning_rate": 1.7098911887425346e-06, "loss": 0.0152, "step": 2090 }, { "epoch": 0.03436144972592653, "grad_norm": 0.702182948589325, "learning_rate": 1.7180724862963267e-06, "loss": 0.0147, "step": 2100 }, { "epoch": 0.03452507567700237, "grad_norm": 0.6274211406707764, "learning_rate": 1.7262537838501187e-06, "loss": 0.0295, "step": 2110 }, { "epoch": 0.03468870162807821, "grad_norm": 0.6848294734954834, "learning_rate": 1.7344350814039108e-06, "loss": 0.0156, "step": 2120 }, { "epoch": 0.034852327579154053, "grad_norm": 0.7503065466880798, "learning_rate": 1.7426163789577028e-06, "loss": 0.0205, "step": 2130 }, { "epoch": 0.0350159535302299, "grad_norm": 0.3985742926597595, "learning_rate": 1.750797676511495e-06, "loss": 0.0133, "step": 2140 }, { "epoch": 0.035179579481305734, "grad_norm": 0.5512229204177856, "learning_rate": 1.7589789740652867e-06, "loss": 0.022, "step": 2150 }, { "epoch": 0.03534320543238158, "grad_norm": 0.3374076783657074, "learning_rate": 1.7671602716190788e-06, "loss": 0.0163, "step": 2160 }, { "epoch": 0.035506831383457414, "grad_norm": 0.680812418460846, "learning_rate": 1.7753415691728708e-06, "loss": 0.0154, "step": 2170 }, { "epoch": 0.03567045733453326, "grad_norm": 0.7778629660606384, "learning_rate": 1.7835228667266629e-06, "loss": 0.0181, "step": 2180 }, { "epoch": 0.035834083285609095, "grad_norm": 0.9291898012161255, "learning_rate": 1.791704164280455e-06, "loss": 0.0167, "step": 2190 }, { "epoch": 0.03599770923668494, "grad_norm": 0.9499353170394897, "learning_rate": 1.799885461834247e-06, "loss": 0.0146, "step": 2200 }, { "epoch": 0.03616133518776078, "grad_norm": 1.1274068355560303, "learning_rate": 1.808066759388039e-06, "loss": 0.0212, "step": 2210 }, { "epoch": 0.03632496113883662, "grad_norm": 0.7596690058708191, "learning_rate": 1.816248056941831e-06, "loss": 0.017, "step": 2220 }, { "epoch": 0.03648858708991246, "grad_norm": 0.9302141070365906, "learning_rate": 1.8244293544956231e-06, "loss": 0.0231, "step": 2230 }, { "epoch": 0.0366522130409883, "grad_norm": 0.8380283713340759, "learning_rate": 1.8326106520494152e-06, "loss": 0.0132, "step": 2240 }, { "epoch": 0.03681583899206414, "grad_norm": 0.7264215350151062, "learning_rate": 1.8407919496032072e-06, "loss": 0.0175, "step": 2250 }, { "epoch": 0.03697946494313998, "grad_norm": 0.6638109683990479, "learning_rate": 1.8489732471569993e-06, "loss": 0.0145, "step": 2260 }, { "epoch": 0.03714309089421582, "grad_norm": 0.6979623436927795, "learning_rate": 1.8571545447107913e-06, "loss": 0.015, "step": 2270 }, { "epoch": 0.03730671684529167, "grad_norm": 0.4812341332435608, "learning_rate": 1.8653358422645834e-06, "loss": 0.0143, "step": 2280 }, { "epoch": 0.0374703427963675, "grad_norm": 0.5599238872528076, "learning_rate": 1.8735171398183754e-06, "loss": 0.0131, "step": 2290 }, { "epoch": 0.03763396874744335, "grad_norm": 0.9793543815612793, "learning_rate": 1.8816984373721675e-06, "loss": 0.0196, "step": 2300 }, { "epoch": 0.037797594698519184, "grad_norm": 0.5340746641159058, "learning_rate": 1.8898797349259593e-06, "loss": 0.0161, "step": 2310 }, { "epoch": 0.03796122064959503, "grad_norm": 0.47447285056114197, "learning_rate": 1.8980610324797514e-06, "loss": 0.0147, "step": 2320 }, { "epoch": 0.038124846600670864, "grad_norm": 0.9897047877311707, "learning_rate": 1.9062423300335434e-06, "loss": 0.0196, "step": 2330 }, { "epoch": 0.03828847255174671, "grad_norm": 1.0041579008102417, "learning_rate": 1.9144236275873353e-06, "loss": 0.0155, "step": 2340 }, { "epoch": 0.038452098502822545, "grad_norm": 0.6637380123138428, "learning_rate": 1.9226049251411275e-06, "loss": 0.016, "step": 2350 }, { "epoch": 0.03861572445389839, "grad_norm": 0.5131836533546448, "learning_rate": 1.9307862226949194e-06, "loss": 0.0147, "step": 2360 }, { "epoch": 0.03877935040497423, "grad_norm": 0.7195000052452087, "learning_rate": 1.9389675202487116e-06, "loss": 0.0137, "step": 2370 }, { "epoch": 0.03894297635605007, "grad_norm": 0.5455965995788574, "learning_rate": 1.9471488178025035e-06, "loss": 0.014, "step": 2380 }, { "epoch": 0.03910660230712591, "grad_norm": 1.1191215515136719, "learning_rate": 1.9553301153562957e-06, "loss": 0.0131, "step": 2390 }, { "epoch": 0.03927022825820175, "grad_norm": 0.4611370265483856, "learning_rate": 1.9635114129100876e-06, "loss": 0.0111, "step": 2400 }, { "epoch": 0.03943385420927759, "grad_norm": 0.7953206896781921, "learning_rate": 1.97169271046388e-06, "loss": 0.0131, "step": 2410 }, { "epoch": 0.03959748016035343, "grad_norm": 0.5053262710571289, "learning_rate": 1.9798740080176717e-06, "loss": 0.0124, "step": 2420 }, { "epoch": 0.03976110611142927, "grad_norm": 0.7063040137290955, "learning_rate": 1.988055305571464e-06, "loss": 0.0163, "step": 2430 }, { "epoch": 0.03992473206250512, "grad_norm": 0.7882359623908997, "learning_rate": 1.996236603125256e-06, "loss": 0.0176, "step": 2440 }, { "epoch": 0.04008835801358095, "grad_norm": 0.5648556351661682, "learning_rate": 2.004417900679048e-06, "loss": 0.0178, "step": 2450 }, { "epoch": 0.0402519839646568, "grad_norm": 0.7448477745056152, "learning_rate": 2.01259919823284e-06, "loss": 0.0164, "step": 2460 }, { "epoch": 0.040415609915732634, "grad_norm": 0.9597516655921936, "learning_rate": 2.020780495786632e-06, "loss": 0.0124, "step": 2470 }, { "epoch": 0.04057923586680848, "grad_norm": 0.5111032128334045, "learning_rate": 2.028961793340424e-06, "loss": 0.0121, "step": 2480 }, { "epoch": 0.040742861817884314, "grad_norm": 0.8775847554206848, "learning_rate": 2.0371430908942163e-06, "loss": 0.0168, "step": 2490 }, { "epoch": 0.04090648776896016, "grad_norm": 0.6906808614730835, "learning_rate": 2.045324388448008e-06, "loss": 0.0121, "step": 2500 }, { "epoch": 0.041070113720035994, "grad_norm": 0.39874231815338135, "learning_rate": 2.0535056860018004e-06, "loss": 0.0159, "step": 2510 }, { "epoch": 0.04123373967111184, "grad_norm": 0.9298045039176941, "learning_rate": 2.061686983555592e-06, "loss": 0.0147, "step": 2520 }, { "epoch": 0.04139736562218768, "grad_norm": 0.5053309798240662, "learning_rate": 2.069868281109384e-06, "loss": 0.015, "step": 2530 }, { "epoch": 0.04156099157326352, "grad_norm": 0.6905393004417419, "learning_rate": 2.078049578663176e-06, "loss": 0.0158, "step": 2540 }, { "epoch": 0.04172461752433936, "grad_norm": 0.6831179261207581, "learning_rate": 2.086230876216968e-06, "loss": 0.0142, "step": 2550 }, { "epoch": 0.0418882434754152, "grad_norm": 0.7343636751174927, "learning_rate": 2.09441217377076e-06, "loss": 0.0142, "step": 2560 }, { "epoch": 0.04205186942649104, "grad_norm": 0.6231259703636169, "learning_rate": 2.1025934713245522e-06, "loss": 0.0103, "step": 2570 }, { "epoch": 0.04221549537756688, "grad_norm": 0.9527928233146667, "learning_rate": 2.110774768878344e-06, "loss": 0.0105, "step": 2580 }, { "epoch": 0.04237912132864272, "grad_norm": 0.40905502438545227, "learning_rate": 2.1189560664321363e-06, "loss": 0.0151, "step": 2590 }, { "epoch": 0.04254274727971857, "grad_norm": 0.21875037252902985, "learning_rate": 2.127137363985928e-06, "loss": 0.0108, "step": 2600 }, { "epoch": 0.0427063732307944, "grad_norm": 0.22272948920726776, "learning_rate": 2.1353186615397204e-06, "loss": 0.0127, "step": 2610 }, { "epoch": 0.04286999918187025, "grad_norm": 1.1066583395004272, "learning_rate": 2.1434999590935123e-06, "loss": 0.011, "step": 2620 }, { "epoch": 0.043033625132946084, "grad_norm": 0.6232113838195801, "learning_rate": 2.1516812566473045e-06, "loss": 0.012, "step": 2630 }, { "epoch": 0.04319725108402193, "grad_norm": 0.7276982069015503, "learning_rate": 2.1598625542010964e-06, "loss": 0.0156, "step": 2640 }, { "epoch": 0.043360877035097764, "grad_norm": 0.773159384727478, "learning_rate": 2.1680438517548887e-06, "loss": 0.012, "step": 2650 }, { "epoch": 0.04352450298617361, "grad_norm": 0.5005454421043396, "learning_rate": 2.1762251493086805e-06, "loss": 0.0161, "step": 2660 }, { "epoch": 0.043688128937249444, "grad_norm": 0.5888230800628662, "learning_rate": 2.1844064468624728e-06, "loss": 0.0111, "step": 2670 }, { "epoch": 0.04385175488832529, "grad_norm": 0.7097195386886597, "learning_rate": 2.1925877444162646e-06, "loss": 0.0113, "step": 2680 }, { "epoch": 0.04401538083940113, "grad_norm": 0.4649309813976288, "learning_rate": 2.200769041970057e-06, "loss": 0.0137, "step": 2690 }, { "epoch": 0.04417900679047697, "grad_norm": 0.5561385154724121, "learning_rate": 2.2089503395238487e-06, "loss": 0.0142, "step": 2700 }, { "epoch": 0.04434263274155281, "grad_norm": 0.6173912286758423, "learning_rate": 2.217131637077641e-06, "loss": 0.0141, "step": 2710 }, { "epoch": 0.04450625869262865, "grad_norm": 0.38627490401268005, "learning_rate": 2.225312934631433e-06, "loss": 0.0119, "step": 2720 }, { "epoch": 0.04466988464370449, "grad_norm": 0.4352302551269531, "learning_rate": 2.233494232185225e-06, "loss": 0.013, "step": 2730 }, { "epoch": 0.04483351059478033, "grad_norm": 0.5472245812416077, "learning_rate": 2.241675529739017e-06, "loss": 0.0117, "step": 2740 }, { "epoch": 0.04499713654585617, "grad_norm": 0.8657650947570801, "learning_rate": 2.2498568272928087e-06, "loss": 0.0168, "step": 2750 }, { "epoch": 0.045160762496932016, "grad_norm": 0.4442863166332245, "learning_rate": 2.2580381248466006e-06, "loss": 0.0105, "step": 2760 }, { "epoch": 0.04532438844800785, "grad_norm": 0.6586319804191589, "learning_rate": 2.266219422400393e-06, "loss": 0.0112, "step": 2770 }, { "epoch": 0.0454880143990837, "grad_norm": 1.1256358623504639, "learning_rate": 2.2744007199541847e-06, "loss": 0.019, "step": 2780 }, { "epoch": 0.045651640350159534, "grad_norm": 0.8728662729263306, "learning_rate": 2.282582017507977e-06, "loss": 0.0131, "step": 2790 }, { "epoch": 0.04581526630123538, "grad_norm": 0.5684436559677124, "learning_rate": 2.2907633150617688e-06, "loss": 0.0133, "step": 2800 }, { "epoch": 0.045978892252311214, "grad_norm": 0.41163626313209534, "learning_rate": 2.298944612615561e-06, "loss": 0.0149, "step": 2810 }, { "epoch": 0.04614251820338706, "grad_norm": 0.318732351064682, "learning_rate": 2.307125910169353e-06, "loss": 0.0105, "step": 2820 }, { "epoch": 0.0463061441544629, "grad_norm": 0.47379904985427856, "learning_rate": 2.315307207723145e-06, "loss": 0.0128, "step": 2830 }, { "epoch": 0.04646977010553874, "grad_norm": 0.5605900287628174, "learning_rate": 2.323488505276937e-06, "loss": 0.0126, "step": 2840 }, { "epoch": 0.04663339605661458, "grad_norm": 0.6339355111122131, "learning_rate": 2.3316698028307292e-06, "loss": 0.0156, "step": 2850 }, { "epoch": 0.04679702200769042, "grad_norm": 0.4461880028247833, "learning_rate": 2.339851100384521e-06, "loss": 0.0133, "step": 2860 }, { "epoch": 0.04696064795876626, "grad_norm": 0.493367075920105, "learning_rate": 2.3480323979383133e-06, "loss": 0.0141, "step": 2870 }, { "epoch": 0.0471242739098421, "grad_norm": 0.7958556413650513, "learning_rate": 2.356213695492105e-06, "loss": 0.018, "step": 2880 }, { "epoch": 0.04728789986091794, "grad_norm": 0.7448601126670837, "learning_rate": 2.3643949930458975e-06, "loss": 0.0121, "step": 2890 }, { "epoch": 0.04745152581199378, "grad_norm": 0.20599818229675293, "learning_rate": 2.3725762905996893e-06, "loss": 0.0149, "step": 2900 }, { "epoch": 0.04761515176306962, "grad_norm": 0.5559225678443909, "learning_rate": 2.3807575881534816e-06, "loss": 0.0142, "step": 2910 }, { "epoch": 0.047778777714145466, "grad_norm": 0.8967880606651306, "learning_rate": 2.3889388857072734e-06, "loss": 0.0159, "step": 2920 }, { "epoch": 0.0479424036652213, "grad_norm": 0.28435468673706055, "learning_rate": 2.3971201832610657e-06, "loss": 0.0113, "step": 2930 }, { "epoch": 0.04810602961629715, "grad_norm": 0.4912911355495453, "learning_rate": 2.4053014808148575e-06, "loss": 0.0149, "step": 2940 }, { "epoch": 0.048269655567372984, "grad_norm": 0.6454976797103882, "learning_rate": 2.4134827783686498e-06, "loss": 0.0137, "step": 2950 }, { "epoch": 0.04843328151844883, "grad_norm": 0.4994121491909027, "learning_rate": 2.4216640759224416e-06, "loss": 0.0112, "step": 2960 }, { "epoch": 0.048596907469524664, "grad_norm": 0.6314337849617004, "learning_rate": 2.4298453734762334e-06, "loss": 0.0113, "step": 2970 }, { "epoch": 0.04876053342060051, "grad_norm": 0.4771454334259033, "learning_rate": 2.4380266710300253e-06, "loss": 0.0132, "step": 2980 }, { "epoch": 0.04892415937167635, "grad_norm": 0.4665985703468323, "learning_rate": 2.4462079685838175e-06, "loss": 0.0112, "step": 2990 }, { "epoch": 0.04908778532275219, "grad_norm": 0.7331014275550842, "learning_rate": 2.4543892661376094e-06, "loss": 0.0122, "step": 3000 }, { "epoch": 0.04925141127382803, "grad_norm": 0.614673376083374, "learning_rate": 2.4625705636914016e-06, "loss": 0.014, "step": 3010 }, { "epoch": 0.04941503722490387, "grad_norm": 0.693922758102417, "learning_rate": 2.4707518612451935e-06, "loss": 0.0107, "step": 3020 }, { "epoch": 0.04957866317597971, "grad_norm": 0.9129398465156555, "learning_rate": 2.4789331587989857e-06, "loss": 0.0088, "step": 3030 }, { "epoch": 0.04974228912705555, "grad_norm": 0.4695659279823303, "learning_rate": 2.4871144563527776e-06, "loss": 0.0153, "step": 3040 }, { "epoch": 0.04990591507813139, "grad_norm": 0.41599157452583313, "learning_rate": 2.49529575390657e-06, "loss": 0.0128, "step": 3050 }, { "epoch": 0.05006954102920723, "grad_norm": 0.6311373710632324, "learning_rate": 2.5034770514603617e-06, "loss": 0.0157, "step": 3060 }, { "epoch": 0.05023316698028307, "grad_norm": 1.2424720525741577, "learning_rate": 2.511658349014154e-06, "loss": 0.0098, "step": 3070 }, { "epoch": 0.050396792931358916, "grad_norm": 1.679995059967041, "learning_rate": 2.5198396465679458e-06, "loss": 0.0155, "step": 3080 }, { "epoch": 0.05056041888243475, "grad_norm": 0.5821516513824463, "learning_rate": 2.528020944121738e-06, "loss": 0.0125, "step": 3090 }, { "epoch": 0.0507240448335106, "grad_norm": 0.8905674815177917, "learning_rate": 2.53620224167553e-06, "loss": 0.0114, "step": 3100 }, { "epoch": 0.05088767078458643, "grad_norm": 0.78466796875, "learning_rate": 2.544383539229322e-06, "loss": 0.0112, "step": 3110 }, { "epoch": 0.05105129673566228, "grad_norm": 0.8018458485603333, "learning_rate": 2.552564836783114e-06, "loss": 0.0145, "step": 3120 }, { "epoch": 0.051214922686738114, "grad_norm": 0.6496682167053223, "learning_rate": 2.5607461343369063e-06, "loss": 0.0116, "step": 3130 }, { "epoch": 0.05137854863781396, "grad_norm": 0.3492189049720764, "learning_rate": 2.568927431890698e-06, "loss": 0.0098, "step": 3140 }, { "epoch": 0.0515421745888898, "grad_norm": 0.5741250514984131, "learning_rate": 2.5771087294444904e-06, "loss": 0.0122, "step": 3150 }, { "epoch": 0.05170580053996564, "grad_norm": 0.42342206835746765, "learning_rate": 2.585290026998282e-06, "loss": 0.0135, "step": 3160 }, { "epoch": 0.05186942649104148, "grad_norm": 0.4527572691440582, "learning_rate": 2.5934713245520745e-06, "loss": 0.0091, "step": 3170 }, { "epoch": 0.05203305244211732, "grad_norm": 0.6240607500076294, "learning_rate": 2.6016526221058663e-06, "loss": 0.0151, "step": 3180 }, { "epoch": 0.05219667839319316, "grad_norm": 0.4512414038181305, "learning_rate": 2.6098339196596586e-06, "loss": 0.0122, "step": 3190 }, { "epoch": 0.052360304344269, "grad_norm": 0.8511449098587036, "learning_rate": 2.6180152172134504e-06, "loss": 0.0139, "step": 3200 }, { "epoch": 0.05252393029534484, "grad_norm": 0.5736969113349915, "learning_rate": 2.6261965147672427e-06, "loss": 0.0124, "step": 3210 }, { "epoch": 0.05268755624642068, "grad_norm": 0.6660225987434387, "learning_rate": 2.6343778123210345e-06, "loss": 0.0135, "step": 3220 }, { "epoch": 0.05285118219749652, "grad_norm": 0.5948995351791382, "learning_rate": 2.6425591098748268e-06, "loss": 0.0125, "step": 3230 }, { "epoch": 0.053014808148572366, "grad_norm": 0.6419150829315186, "learning_rate": 2.6507404074286186e-06, "loss": 0.013, "step": 3240 }, { "epoch": 0.0531784340996482, "grad_norm": 0.5840580463409424, "learning_rate": 2.6589217049824104e-06, "loss": 0.0099, "step": 3250 }, { "epoch": 0.05334206005072405, "grad_norm": 0.7142891883850098, "learning_rate": 2.6671030025362027e-06, "loss": 0.0122, "step": 3260 }, { "epoch": 0.05350568600179988, "grad_norm": 0.7479686737060547, "learning_rate": 2.6752843000899945e-06, "loss": 0.0125, "step": 3270 }, { "epoch": 0.05366931195287573, "grad_norm": 0.18943554162979126, "learning_rate": 2.683465597643787e-06, "loss": 0.0118, "step": 3280 }, { "epoch": 0.053832937903951564, "grad_norm": 0.47382476925849915, "learning_rate": 2.6916468951975786e-06, "loss": 0.0156, "step": 3290 }, { "epoch": 0.05399656385502741, "grad_norm": 0.290359765291214, "learning_rate": 2.6998281927513705e-06, "loss": 0.0117, "step": 3300 }, { "epoch": 0.05416018980610325, "grad_norm": 0.4674018621444702, "learning_rate": 2.7080094903051623e-06, "loss": 0.0101, "step": 3310 }, { "epoch": 0.05432381575717909, "grad_norm": 0.6049782037734985, "learning_rate": 2.7161907878589546e-06, "loss": 0.0093, "step": 3320 }, { "epoch": 0.05448744170825493, "grad_norm": 0.5735464096069336, "learning_rate": 2.7243720854127464e-06, "loss": 0.0106, "step": 3330 }, { "epoch": 0.05465106765933077, "grad_norm": 0.4310954511165619, "learning_rate": 2.7325533829665387e-06, "loss": 0.0088, "step": 3340 }, { "epoch": 0.05481469361040661, "grad_norm": 0.49559855461120605, "learning_rate": 2.7407346805203305e-06, "loss": 0.0111, "step": 3350 }, { "epoch": 0.05497831956148245, "grad_norm": 0.47451549768447876, "learning_rate": 2.748915978074123e-06, "loss": 0.0092, "step": 3360 }, { "epoch": 0.05514194551255829, "grad_norm": 0.4467151463031769, "learning_rate": 2.7570972756279146e-06, "loss": 0.0096, "step": 3370 }, { "epoch": 0.055305571463634136, "grad_norm": 0.6338276267051697, "learning_rate": 2.7652785731817065e-06, "loss": 0.0189, "step": 3380 }, { "epoch": 0.05546919741470997, "grad_norm": 0.4194999933242798, "learning_rate": 2.7734598707354987e-06, "loss": 0.0151, "step": 3390 }, { "epoch": 0.055632823365785816, "grad_norm": 0.42343956232070923, "learning_rate": 2.7816411682892906e-06, "loss": 0.0089, "step": 3400 }, { "epoch": 0.05579644931686165, "grad_norm": 0.5507587790489197, "learning_rate": 2.789822465843083e-06, "loss": 0.0121, "step": 3410 }, { "epoch": 0.0559600752679375, "grad_norm": 0.6585989594459534, "learning_rate": 2.7980037633968747e-06, "loss": 0.0131, "step": 3420 }, { "epoch": 0.05612370121901333, "grad_norm": 0.3411996066570282, "learning_rate": 2.806185060950667e-06, "loss": 0.0097, "step": 3430 }, { "epoch": 0.05628732717008918, "grad_norm": 0.594822347164154, "learning_rate": 2.8143663585044588e-06, "loss": 0.0119, "step": 3440 }, { "epoch": 0.056450953121165014, "grad_norm": 0.7176277041435242, "learning_rate": 2.822547656058251e-06, "loss": 0.0127, "step": 3450 }, { "epoch": 0.05661457907224086, "grad_norm": 0.2711975872516632, "learning_rate": 2.830728953612043e-06, "loss": 0.0103, "step": 3460 }, { "epoch": 0.0567782050233167, "grad_norm": 0.4954574406147003, "learning_rate": 2.838910251165835e-06, "loss": 0.0141, "step": 3470 }, { "epoch": 0.05694183097439254, "grad_norm": 0.7648645043373108, "learning_rate": 2.847091548719627e-06, "loss": 0.0097, "step": 3480 }, { "epoch": 0.05710545692546838, "grad_norm": 0.7074746489524841, "learning_rate": 2.8552728462734192e-06, "loss": 0.0097, "step": 3490 }, { "epoch": 0.05726908287654422, "grad_norm": 0.7583217024803162, "learning_rate": 2.863454143827211e-06, "loss": 0.0093, "step": 3500 }, { "epoch": 0.05743270882762006, "grad_norm": 0.4210297763347626, "learning_rate": 2.8716354413810033e-06, "loss": 0.0123, "step": 3510 }, { "epoch": 0.0575963347786959, "grad_norm": 0.3861912786960602, "learning_rate": 2.879816738934795e-06, "loss": 0.0112, "step": 3520 }, { "epoch": 0.05775996072977174, "grad_norm": 0.500217080116272, "learning_rate": 2.8879980364885874e-06, "loss": 0.0126, "step": 3530 }, { "epoch": 0.057923586680847586, "grad_norm": 0.2987768352031708, "learning_rate": 2.8961793340423793e-06, "loss": 0.015, "step": 3540 }, { "epoch": 0.05808721263192342, "grad_norm": 0.4511184096336365, "learning_rate": 2.9043606315961715e-06, "loss": 0.0119, "step": 3550 }, { "epoch": 0.058250838582999266, "grad_norm": 0.7617406845092773, "learning_rate": 2.9125419291499634e-06, "loss": 0.0099, "step": 3560 }, { "epoch": 0.0584144645340751, "grad_norm": 0.5174741148948669, "learning_rate": 2.9207232267037557e-06, "loss": 0.0123, "step": 3570 }, { "epoch": 0.058578090485150947, "grad_norm": 0.5802791714668274, "learning_rate": 2.9289045242575475e-06, "loss": 0.0112, "step": 3580 }, { "epoch": 0.05874171643622678, "grad_norm": 0.74256432056427, "learning_rate": 2.9370858218113398e-06, "loss": 0.0107, "step": 3590 }, { "epoch": 0.05890534238730263, "grad_norm": 0.5330492258071899, "learning_rate": 2.9452671193651316e-06, "loss": 0.011, "step": 3600 }, { "epoch": 0.059068968338378464, "grad_norm": 0.5939335823059082, "learning_rate": 2.953448416918924e-06, "loss": 0.0129, "step": 3610 }, { "epoch": 0.05923259428945431, "grad_norm": 0.4373001158237457, "learning_rate": 2.9616297144727157e-06, "loss": 0.0113, "step": 3620 }, { "epoch": 0.05939622024053015, "grad_norm": 0.6011515855789185, "learning_rate": 2.969811012026508e-06, "loss": 0.0148, "step": 3630 }, { "epoch": 0.05955984619160599, "grad_norm": 0.46428272128105164, "learning_rate": 2.9779923095803e-06, "loss": 0.0129, "step": 3640 }, { "epoch": 0.05972347214268183, "grad_norm": 0.6666850447654724, "learning_rate": 2.986173607134092e-06, "loss": 0.0107, "step": 3650 }, { "epoch": 0.05988709809375767, "grad_norm": 0.6814191341400146, "learning_rate": 2.994354904687884e-06, "loss": 0.0156, "step": 3660 }, { "epoch": 0.06005072404483351, "grad_norm": 0.4391625225543976, "learning_rate": 3.002536202241676e-06, "loss": 0.0125, "step": 3670 }, { "epoch": 0.06021434999590935, "grad_norm": 0.45413750410079956, "learning_rate": 3.010717499795468e-06, "loss": 0.0096, "step": 3680 }, { "epoch": 0.06037797594698519, "grad_norm": 0.573988676071167, "learning_rate": 3.0188987973492603e-06, "loss": 0.0098, "step": 3690 }, { "epoch": 0.060541601898061036, "grad_norm": 0.4897746741771698, "learning_rate": 3.027080094903052e-06, "loss": 0.0141, "step": 3700 }, { "epoch": 0.06070522784913687, "grad_norm": 0.4950472116470337, "learning_rate": 3.035261392456844e-06, "loss": 0.0071, "step": 3710 }, { "epoch": 0.060868853800212716, "grad_norm": 0.2800094783306122, "learning_rate": 3.043442690010636e-06, "loss": 0.0151, "step": 3720 }, { "epoch": 0.06103247975128855, "grad_norm": 0.5631102919578552, "learning_rate": 3.051623987564428e-06, "loss": 0.0129, "step": 3730 }, { "epoch": 0.061196105702364396, "grad_norm": 0.4482916593551636, "learning_rate": 3.05980528511822e-06, "loss": 0.0115, "step": 3740 }, { "epoch": 0.06135973165344023, "grad_norm": 0.7123658657073975, "learning_rate": 3.0679865826720117e-06, "loss": 0.0137, "step": 3750 }, { "epoch": 0.06152335760451608, "grad_norm": 0.535690188407898, "learning_rate": 3.076167880225804e-06, "loss": 0.0122, "step": 3760 }, { "epoch": 0.061686983555591914, "grad_norm": 0.20516180992126465, "learning_rate": 3.084349177779596e-06, "loss": 0.0126, "step": 3770 }, { "epoch": 0.06185060950666776, "grad_norm": 0.48727574944496155, "learning_rate": 3.092530475333388e-06, "loss": 0.011, "step": 3780 }, { "epoch": 0.0620142354577436, "grad_norm": 0.40629613399505615, "learning_rate": 3.10071177288718e-06, "loss": 0.0108, "step": 3790 }, { "epoch": 0.06217786140881944, "grad_norm": 0.4044995605945587, "learning_rate": 3.108893070440972e-06, "loss": 0.011, "step": 3800 }, { "epoch": 0.06234148735989528, "grad_norm": 0.7297495007514954, "learning_rate": 3.117074367994764e-06, "loss": 0.0101, "step": 3810 }, { "epoch": 0.06250511331097112, "grad_norm": 0.47363176941871643, "learning_rate": 3.1252556655485563e-06, "loss": 0.0092, "step": 3820 }, { "epoch": 0.06266873926204695, "grad_norm": 0.38744133710861206, "learning_rate": 3.133436963102348e-06, "loss": 0.0183, "step": 3830 }, { "epoch": 0.0628323652131228, "grad_norm": 0.5381457209587097, "learning_rate": 3.14161826065614e-06, "loss": 0.0103, "step": 3840 }, { "epoch": 0.06299599116419864, "grad_norm": 0.3486247658729553, "learning_rate": 3.1497995582099322e-06, "loss": 0.0088, "step": 3850 }, { "epoch": 0.06315961711527449, "grad_norm": 0.4323013722896576, "learning_rate": 3.157980855763724e-06, "loss": 0.0145, "step": 3860 }, { "epoch": 0.06332324306635033, "grad_norm": 0.28973954916000366, "learning_rate": 3.1661621533175163e-06, "loss": 0.0097, "step": 3870 }, { "epoch": 0.06348686901742616, "grad_norm": 0.6435532569885254, "learning_rate": 3.174343450871308e-06, "loss": 0.0075, "step": 3880 }, { "epoch": 0.063650494968502, "grad_norm": 0.3044186532497406, "learning_rate": 3.1825247484251004e-06, "loss": 0.0122, "step": 3890 }, { "epoch": 0.06381412091957785, "grad_norm": 0.5144074559211731, "learning_rate": 3.1907060459788923e-06, "loss": 0.0068, "step": 3900 }, { "epoch": 0.06397774687065369, "grad_norm": 0.3963913917541504, "learning_rate": 3.1988873435326845e-06, "loss": 0.0086, "step": 3910 }, { "epoch": 0.06414137282172952, "grad_norm": 0.6809449195861816, "learning_rate": 3.2070686410864764e-06, "loss": 0.0102, "step": 3920 }, { "epoch": 0.06430499877280536, "grad_norm": 0.5388890504837036, "learning_rate": 3.2152499386402686e-06, "loss": 0.0121, "step": 3930 }, { "epoch": 0.06446862472388121, "grad_norm": 0.5491735935211182, "learning_rate": 3.2234312361940605e-06, "loss": 0.0066, "step": 3940 }, { "epoch": 0.06463225067495705, "grad_norm": 0.624646782875061, "learning_rate": 3.2316125337478527e-06, "loss": 0.0103, "step": 3950 }, { "epoch": 0.0647958766260329, "grad_norm": 0.24559050798416138, "learning_rate": 3.2397938313016446e-06, "loss": 0.0098, "step": 3960 }, { "epoch": 0.06495950257710872, "grad_norm": 0.5081912875175476, "learning_rate": 3.247975128855437e-06, "loss": 0.0116, "step": 3970 }, { "epoch": 0.06512312852818457, "grad_norm": 0.5179296135902405, "learning_rate": 3.2561564264092287e-06, "loss": 0.0118, "step": 3980 }, { "epoch": 0.06528675447926041, "grad_norm": 0.3036271035671234, "learning_rate": 3.264337723963021e-06, "loss": 0.0113, "step": 3990 }, { "epoch": 0.06545038043033626, "grad_norm": 0.4160747826099396, "learning_rate": 3.2725190215168128e-06, "loss": 0.014, "step": 4000 }, { "epoch": 0.0656140063814121, "grad_norm": 0.6022220253944397, "learning_rate": 3.280700319070605e-06, "loss": 0.0096, "step": 4010 }, { "epoch": 0.06577763233248793, "grad_norm": 0.5766621232032776, "learning_rate": 3.288881616624397e-06, "loss": 0.0099, "step": 4020 }, { "epoch": 0.06594125828356377, "grad_norm": 0.3600672483444214, "learning_rate": 3.297062914178189e-06, "loss": 0.0131, "step": 4030 }, { "epoch": 0.06610488423463962, "grad_norm": 0.6038428544998169, "learning_rate": 3.305244211731981e-06, "loss": 0.0082, "step": 4040 }, { "epoch": 0.06626851018571546, "grad_norm": 0.37434881925582886, "learning_rate": 3.3134255092857733e-06, "loss": 0.0129, "step": 4050 }, { "epoch": 0.06643213613679129, "grad_norm": 0.5213479995727539, "learning_rate": 3.321606806839565e-06, "loss": 0.009, "step": 4060 }, { "epoch": 0.06659576208786713, "grad_norm": 0.28980115056037903, "learning_rate": 3.3297881043933574e-06, "loss": 0.0082, "step": 4070 }, { "epoch": 0.06675938803894298, "grad_norm": 0.6069865226745605, "learning_rate": 3.337969401947149e-06, "loss": 0.0087, "step": 4080 }, { "epoch": 0.06692301399001882, "grad_norm": 0.5408406853675842, "learning_rate": 3.3461506995009415e-06, "loss": 0.0118, "step": 4090 }, { "epoch": 0.06708663994109466, "grad_norm": 0.8828417062759399, "learning_rate": 3.3543319970547333e-06, "loss": 0.008, "step": 4100 }, { "epoch": 0.0672502658921705, "grad_norm": 0.5868886709213257, "learning_rate": 3.3625132946085256e-06, "loss": 0.0104, "step": 4110 }, { "epoch": 0.06741389184324634, "grad_norm": 0.4467138946056366, "learning_rate": 3.3706945921623174e-06, "loss": 0.0148, "step": 4120 }, { "epoch": 0.06757751779432218, "grad_norm": 1.0529546737670898, "learning_rate": 3.3788758897161097e-06, "loss": 0.0095, "step": 4130 }, { "epoch": 0.06774114374539802, "grad_norm": 0.8397409915924072, "learning_rate": 3.3870571872699015e-06, "loss": 0.0088, "step": 4140 }, { "epoch": 0.06790476969647385, "grad_norm": 0.6100178956985474, "learning_rate": 3.3952384848236938e-06, "loss": 0.0113, "step": 4150 }, { "epoch": 0.0680683956475497, "grad_norm": 0.4465765953063965, "learning_rate": 3.4034197823774856e-06, "loss": 0.0079, "step": 4160 }, { "epoch": 0.06823202159862554, "grad_norm": 0.2734662592411041, "learning_rate": 3.4116010799312774e-06, "loss": 0.0055, "step": 4170 }, { "epoch": 0.06839564754970139, "grad_norm": 0.3067326545715332, "learning_rate": 3.4197823774850693e-06, "loss": 0.0113, "step": 4180 }, { "epoch": 0.06855927350077723, "grad_norm": 0.47589603066444397, "learning_rate": 3.427963675038861e-06, "loss": 0.007, "step": 4190 }, { "epoch": 0.06872289945185306, "grad_norm": 0.4676600694656372, "learning_rate": 3.4361449725926534e-06, "loss": 0.0129, "step": 4200 }, { "epoch": 0.0688865254029289, "grad_norm": 0.6486285328865051, "learning_rate": 3.4443262701464452e-06, "loss": 0.0107, "step": 4210 }, { "epoch": 0.06905015135400475, "grad_norm": 0.28594696521759033, "learning_rate": 3.4525075677002375e-06, "loss": 0.0099, "step": 4220 }, { "epoch": 0.06921377730508059, "grad_norm": 0.49257415533065796, "learning_rate": 3.4606888652540293e-06, "loss": 0.0086, "step": 4230 }, { "epoch": 0.06937740325615642, "grad_norm": 0.5153892040252686, "learning_rate": 3.4688701628078216e-06, "loss": 0.0103, "step": 4240 }, { "epoch": 0.06954102920723226, "grad_norm": 0.5089650750160217, "learning_rate": 3.4770514603616134e-06, "loss": 0.0239, "step": 4250 }, { "epoch": 0.06970465515830811, "grad_norm": 0.3236147463321686, "learning_rate": 3.4852327579154057e-06, "loss": 0.0129, "step": 4260 }, { "epoch": 0.06986828110938395, "grad_norm": 0.43026453256607056, "learning_rate": 3.4934140554691975e-06, "loss": 0.0111, "step": 4270 }, { "epoch": 0.0700319070604598, "grad_norm": 0.4745500385761261, "learning_rate": 3.50159535302299e-06, "loss": 0.0077, "step": 4280 }, { "epoch": 0.07019553301153562, "grad_norm": 0.6749144792556763, "learning_rate": 3.5097766505767816e-06, "loss": 0.0123, "step": 4290 }, { "epoch": 0.07035915896261147, "grad_norm": 0.3357636034488678, "learning_rate": 3.5179579481305735e-06, "loss": 0.0076, "step": 4300 }, { "epoch": 0.07052278491368731, "grad_norm": 0.5458382368087769, "learning_rate": 3.5261392456843657e-06, "loss": 0.0076, "step": 4310 }, { "epoch": 0.07068641086476316, "grad_norm": 0.3887699246406555, "learning_rate": 3.5343205432381576e-06, "loss": 0.0075, "step": 4320 }, { "epoch": 0.070850036815839, "grad_norm": 0.8280920386314392, "learning_rate": 3.54250184079195e-06, "loss": 0.014, "step": 4330 }, { "epoch": 0.07101366276691483, "grad_norm": 0.3678774833679199, "learning_rate": 3.5506831383457417e-06, "loss": 0.0103, "step": 4340 }, { "epoch": 0.07117728871799067, "grad_norm": 0.4992738366127014, "learning_rate": 3.558864435899534e-06, "loss": 0.0079, "step": 4350 }, { "epoch": 0.07134091466906652, "grad_norm": 0.43329018354415894, "learning_rate": 3.5670457334533258e-06, "loss": 0.0088, "step": 4360 }, { "epoch": 0.07150454062014236, "grad_norm": 0.4352242052555084, "learning_rate": 3.575227031007118e-06, "loss": 0.0149, "step": 4370 }, { "epoch": 0.07166816657121819, "grad_norm": 0.037003155797719955, "learning_rate": 3.58340832856091e-06, "loss": 0.0167, "step": 4380 }, { "epoch": 0.07183179252229403, "grad_norm": 0.39874985814094543, "learning_rate": 3.591589626114702e-06, "loss": 0.0109, "step": 4390 }, { "epoch": 0.07199541847336988, "grad_norm": 0.5194030404090881, "learning_rate": 3.599770923668494e-06, "loss": 0.0099, "step": 4400 }, { "epoch": 0.07215904442444572, "grad_norm": 0.6235454082489014, "learning_rate": 3.6079522212222862e-06, "loss": 0.0069, "step": 4410 }, { "epoch": 0.07232267037552156, "grad_norm": 0.3940913677215576, "learning_rate": 3.616133518776078e-06, "loss": 0.0107, "step": 4420 }, { "epoch": 0.0724862963265974, "grad_norm": 0.38981983065605164, "learning_rate": 3.6243148163298703e-06, "loss": 0.0117, "step": 4430 }, { "epoch": 0.07264992227767324, "grad_norm": 0.5303213000297546, "learning_rate": 3.632496113883662e-06, "loss": 0.0095, "step": 4440 }, { "epoch": 0.07281354822874908, "grad_norm": 0.588760256767273, "learning_rate": 3.6406774114374544e-06, "loss": 0.0131, "step": 4450 }, { "epoch": 0.07297717417982492, "grad_norm": 0.33309927582740784, "learning_rate": 3.6488587089912463e-06, "loss": 0.0098, "step": 4460 }, { "epoch": 0.07314080013090075, "grad_norm": 0.591569721698761, "learning_rate": 3.6570400065450385e-06, "loss": 0.009, "step": 4470 }, { "epoch": 0.0733044260819766, "grad_norm": 0.3031422197818756, "learning_rate": 3.6652213040988304e-06, "loss": 0.0082, "step": 4480 }, { "epoch": 0.07346805203305244, "grad_norm": 0.43153369426727295, "learning_rate": 3.6734026016526226e-06, "loss": 0.007, "step": 4490 }, { "epoch": 0.07363167798412829, "grad_norm": 0.5362528562545776, "learning_rate": 3.6815838992064145e-06, "loss": 0.0088, "step": 4500 }, { "epoch": 0.07379530393520413, "grad_norm": 0.5695440769195557, "learning_rate": 3.6897651967602068e-06, "loss": 0.014, "step": 4510 }, { "epoch": 0.07395892988627996, "grad_norm": 0.26773330569267273, "learning_rate": 3.6979464943139986e-06, "loss": 0.0123, "step": 4520 }, { "epoch": 0.0741225558373558, "grad_norm": 0.544951856136322, "learning_rate": 3.706127791867791e-06, "loss": 0.0133, "step": 4530 }, { "epoch": 0.07428618178843165, "grad_norm": 0.3702234625816345, "learning_rate": 3.7143090894215827e-06, "loss": 0.0102, "step": 4540 }, { "epoch": 0.07444980773950749, "grad_norm": 0.34125208854675293, "learning_rate": 3.722490386975375e-06, "loss": 0.0094, "step": 4550 }, { "epoch": 0.07461343369058333, "grad_norm": 0.3214631676673889, "learning_rate": 3.730671684529167e-06, "loss": 0.0109, "step": 4560 }, { "epoch": 0.07477705964165916, "grad_norm": 0.3399805426597595, "learning_rate": 3.738852982082959e-06, "loss": 0.0077, "step": 4570 }, { "epoch": 0.074940685592735, "grad_norm": 0.5178592801094055, "learning_rate": 3.747034279636751e-06, "loss": 0.0082, "step": 4580 }, { "epoch": 0.07510431154381085, "grad_norm": 0.5321545004844666, "learning_rate": 3.755215577190543e-06, "loss": 0.0074, "step": 4590 }, { "epoch": 0.0752679374948867, "grad_norm": 0.4659704566001892, "learning_rate": 3.763396874744335e-06, "loss": 0.0101, "step": 4600 }, { "epoch": 0.07543156344596252, "grad_norm": 0.4055034816265106, "learning_rate": 3.7715781722981273e-06, "loss": 0.0093, "step": 4610 }, { "epoch": 0.07559518939703837, "grad_norm": 0.5214939117431641, "learning_rate": 3.7797594698519187e-06, "loss": 0.0087, "step": 4620 }, { "epoch": 0.07575881534811421, "grad_norm": 0.7674144506454468, "learning_rate": 3.7879407674057105e-06, "loss": 0.0127, "step": 4630 }, { "epoch": 0.07592244129919005, "grad_norm": 0.5002004504203796, "learning_rate": 3.7961220649595028e-06, "loss": 0.0121, "step": 4640 }, { "epoch": 0.0760860672502659, "grad_norm": 0.6141252517700195, "learning_rate": 3.8043033625132946e-06, "loss": 0.0104, "step": 4650 }, { "epoch": 0.07624969320134173, "grad_norm": 0.41309961676597595, "learning_rate": 3.812484660067087e-06, "loss": 0.0098, "step": 4660 }, { "epoch": 0.07641331915241757, "grad_norm": 0.18280814588069916, "learning_rate": 3.820665957620879e-06, "loss": 0.0119, "step": 4670 }, { "epoch": 0.07657694510349342, "grad_norm": 0.33989739418029785, "learning_rate": 3.8288472551746706e-06, "loss": 0.0101, "step": 4680 }, { "epoch": 0.07674057105456926, "grad_norm": 0.40718260407447815, "learning_rate": 3.837028552728463e-06, "loss": 0.0108, "step": 4690 }, { "epoch": 0.07690419700564509, "grad_norm": 0.9239081144332886, "learning_rate": 3.845209850282255e-06, "loss": 0.0104, "step": 4700 }, { "epoch": 0.07706782295672093, "grad_norm": 0.4057515859603882, "learning_rate": 3.853391147836047e-06, "loss": 0.0098, "step": 4710 }, { "epoch": 0.07723144890779678, "grad_norm": 0.3407362401485443, "learning_rate": 3.861572445389839e-06, "loss": 0.0079, "step": 4720 }, { "epoch": 0.07739507485887262, "grad_norm": 1.244737148284912, "learning_rate": 3.869753742943631e-06, "loss": 0.0099, "step": 4730 }, { "epoch": 0.07755870080994846, "grad_norm": 0.47368761897087097, "learning_rate": 3.877935040497423e-06, "loss": 0.0104, "step": 4740 }, { "epoch": 0.0777223267610243, "grad_norm": 0.5678014755249023, "learning_rate": 3.8861163380512156e-06, "loss": 0.015, "step": 4750 }, { "epoch": 0.07788595271210014, "grad_norm": 0.38244327902793884, "learning_rate": 3.894297635605007e-06, "loss": 0.0069, "step": 4760 }, { "epoch": 0.07804957866317598, "grad_norm": 0.2525394558906555, "learning_rate": 3.902478933158799e-06, "loss": 0.0088, "step": 4770 }, { "epoch": 0.07821320461425182, "grad_norm": 0.3700346350669861, "learning_rate": 3.9106602307125915e-06, "loss": 0.0068, "step": 4780 }, { "epoch": 0.07837683056532765, "grad_norm": 0.3476503789424896, "learning_rate": 3.918841528266383e-06, "loss": 0.0075, "step": 4790 }, { "epoch": 0.0785404565164035, "grad_norm": 0.6082961559295654, "learning_rate": 3.927022825820175e-06, "loss": 0.0083, "step": 4800 }, { "epoch": 0.07870408246747934, "grad_norm": 0.5181087255477905, "learning_rate": 3.9352041233739674e-06, "loss": 0.0085, "step": 4810 }, { "epoch": 0.07886770841855519, "grad_norm": 0.5160667896270752, "learning_rate": 3.94338542092776e-06, "loss": 0.0092, "step": 4820 }, { "epoch": 0.07903133436963103, "grad_norm": 0.8015368580818176, "learning_rate": 3.951566718481551e-06, "loss": 0.0092, "step": 4830 }, { "epoch": 0.07919496032070686, "grad_norm": 0.6088091731071472, "learning_rate": 3.959748016035343e-06, "loss": 0.009, "step": 4840 }, { "epoch": 0.0793585862717827, "grad_norm": 0.38970452547073364, "learning_rate": 3.967929313589136e-06, "loss": 0.0092, "step": 4850 }, { "epoch": 0.07952221222285855, "grad_norm": 0.41453033685684204, "learning_rate": 3.976110611142928e-06, "loss": 0.0113, "step": 4860 }, { "epoch": 0.07968583817393439, "grad_norm": 0.3551105856895447, "learning_rate": 3.984291908696719e-06, "loss": 0.009, "step": 4870 }, { "epoch": 0.07984946412501023, "grad_norm": 0.2863304316997528, "learning_rate": 3.992473206250512e-06, "loss": 0.0084, "step": 4880 }, { "epoch": 0.08001309007608606, "grad_norm": 0.2288428395986557, "learning_rate": 4.000654503804304e-06, "loss": 0.0061, "step": 4890 }, { "epoch": 0.0801767160271619, "grad_norm": 0.3824857771396637, "learning_rate": 4.008835801358096e-06, "loss": 0.0097, "step": 4900 }, { "epoch": 0.08034034197823775, "grad_norm": 0.4002744257450104, "learning_rate": 4.0170170989118875e-06, "loss": 0.0076, "step": 4910 }, { "epoch": 0.0805039679293136, "grad_norm": 0.47109052538871765, "learning_rate": 4.02519839646568e-06, "loss": 0.0073, "step": 4920 }, { "epoch": 0.08066759388038942, "grad_norm": 0.46207544207572937, "learning_rate": 4.033379694019472e-06, "loss": 0.0102, "step": 4930 }, { "epoch": 0.08083121983146527, "grad_norm": 0.6554605960845947, "learning_rate": 4.041560991573264e-06, "loss": 0.0092, "step": 4940 }, { "epoch": 0.08099484578254111, "grad_norm": 0.7463805079460144, "learning_rate": 4.049742289127056e-06, "loss": 0.009, "step": 4950 }, { "epoch": 0.08115847173361695, "grad_norm": 0.5068933367729187, "learning_rate": 4.057923586680848e-06, "loss": 0.0119, "step": 4960 }, { "epoch": 0.0813220976846928, "grad_norm": 0.17575666308403015, "learning_rate": 4.06610488423464e-06, "loss": 0.0089, "step": 4970 }, { "epoch": 0.08148572363576863, "grad_norm": 0.553342342376709, "learning_rate": 4.0742861817884325e-06, "loss": 0.0087, "step": 4980 }, { "epoch": 0.08164934958684447, "grad_norm": 0.25047600269317627, "learning_rate": 4.082467479342224e-06, "loss": 0.0083, "step": 4990 }, { "epoch": 0.08181297553792032, "grad_norm": 0.2992837131023407, "learning_rate": 4.090648776896016e-06, "loss": 0.0104, "step": 5000 }, { "epoch": 0.08197660148899616, "grad_norm": 0.630729615688324, "learning_rate": 4.0988300744498085e-06, "loss": 0.0095, "step": 5010 }, { "epoch": 0.08214022744007199, "grad_norm": 0.41234448552131653, "learning_rate": 4.107011372003601e-06, "loss": 0.0064, "step": 5020 }, { "epoch": 0.08230385339114783, "grad_norm": 0.45269840955734253, "learning_rate": 4.115192669557392e-06, "loss": 0.0092, "step": 5030 }, { "epoch": 0.08246747934222368, "grad_norm": 0.18220584094524384, "learning_rate": 4.123373967111184e-06, "loss": 0.0099, "step": 5040 }, { "epoch": 0.08263110529329952, "grad_norm": 0.5539675951004028, "learning_rate": 4.131555264664977e-06, "loss": 0.0106, "step": 5050 }, { "epoch": 0.08279473124437536, "grad_norm": 0.39362144470214844, "learning_rate": 4.139736562218768e-06, "loss": 0.0091, "step": 5060 }, { "epoch": 0.0829583571954512, "grad_norm": 0.24591320753097534, "learning_rate": 4.14791785977256e-06, "loss": 0.0103, "step": 5070 }, { "epoch": 0.08312198314652704, "grad_norm": 0.41742563247680664, "learning_rate": 4.156099157326352e-06, "loss": 0.0079, "step": 5080 }, { "epoch": 0.08328560909760288, "grad_norm": 0.28923746943473816, "learning_rate": 4.164280454880144e-06, "loss": 0.0097, "step": 5090 }, { "epoch": 0.08344923504867872, "grad_norm": 0.19561634957790375, "learning_rate": 4.172461752433936e-06, "loss": 0.0086, "step": 5100 }, { "epoch": 0.08361286099975457, "grad_norm": 0.3338703513145447, "learning_rate": 4.1806430499877285e-06, "loss": 0.0081, "step": 5110 }, { "epoch": 0.0837764869508304, "grad_norm": 0.6227090954780579, "learning_rate": 4.18882434754152e-06, "loss": 0.0132, "step": 5120 }, { "epoch": 0.08394011290190624, "grad_norm": 0.5862659215927124, "learning_rate": 4.197005645095312e-06, "loss": 0.0082, "step": 5130 }, { "epoch": 0.08410373885298209, "grad_norm": 0.40953049063682556, "learning_rate": 4.2051869426491045e-06, "loss": 0.0131, "step": 5140 }, { "epoch": 0.08426736480405793, "grad_norm": 0.1880975216627121, "learning_rate": 4.213368240202897e-06, "loss": 0.0067, "step": 5150 }, { "epoch": 0.08443099075513376, "grad_norm": 0.3912363648414612, "learning_rate": 4.221549537756688e-06, "loss": 0.0132, "step": 5160 }, { "epoch": 0.0845946167062096, "grad_norm": 0.7005236148834229, "learning_rate": 4.2297308353104804e-06, "loss": 0.0133, "step": 5170 }, { "epoch": 0.08475824265728545, "grad_norm": 0.25200846791267395, "learning_rate": 4.237912132864273e-06, "loss": 0.0085, "step": 5180 }, { "epoch": 0.08492186860836129, "grad_norm": 0.341741681098938, "learning_rate": 4.246093430418065e-06, "loss": 0.0079, "step": 5190 }, { "epoch": 0.08508549455943713, "grad_norm": 0.4006780982017517, "learning_rate": 4.254274727971856e-06, "loss": 0.013, "step": 5200 }, { "epoch": 0.08524912051051296, "grad_norm": 0.4393010139465332, "learning_rate": 4.262456025525649e-06, "loss": 0.0107, "step": 5210 }, { "epoch": 0.0854127464615888, "grad_norm": 0.2482386976480484, "learning_rate": 4.270637323079441e-06, "loss": 0.0122, "step": 5220 }, { "epoch": 0.08557637241266465, "grad_norm": 0.2598719000816345, "learning_rate": 4.278818620633232e-06, "loss": 0.0074, "step": 5230 }, { "epoch": 0.0857399983637405, "grad_norm": 0.28431206941604614, "learning_rate": 4.2869999181870246e-06, "loss": 0.0091, "step": 5240 }, { "epoch": 0.08590362431481632, "grad_norm": 0.1387432962656021, "learning_rate": 4.295181215740817e-06, "loss": 0.0101, "step": 5250 }, { "epoch": 0.08606725026589217, "grad_norm": 0.22250521183013916, "learning_rate": 4.303362513294609e-06, "loss": 0.0093, "step": 5260 }, { "epoch": 0.08623087621696801, "grad_norm": 0.4605426788330078, "learning_rate": 4.3115438108484005e-06, "loss": 0.0059, "step": 5270 }, { "epoch": 0.08639450216804385, "grad_norm": 0.19569845497608185, "learning_rate": 4.319725108402193e-06, "loss": 0.0075, "step": 5280 }, { "epoch": 0.0865581281191197, "grad_norm": 0.39840999245643616, "learning_rate": 4.327906405955985e-06, "loss": 0.0093, "step": 5290 }, { "epoch": 0.08672175407019553, "grad_norm": 0.35140636563301086, "learning_rate": 4.336087703509777e-06, "loss": 0.0097, "step": 5300 }, { "epoch": 0.08688538002127137, "grad_norm": 0.37227746844291687, "learning_rate": 4.344269001063569e-06, "loss": 0.007, "step": 5310 }, { "epoch": 0.08704900597234722, "grad_norm": 0.37834489345550537, "learning_rate": 4.352450298617361e-06, "loss": 0.0114, "step": 5320 }, { "epoch": 0.08721263192342306, "grad_norm": 0.18501155078411102, "learning_rate": 4.360631596171153e-06, "loss": 0.0103, "step": 5330 }, { "epoch": 0.08737625787449889, "grad_norm": 0.4748477339744568, "learning_rate": 4.3688128937249455e-06, "loss": 0.0088, "step": 5340 }, { "epoch": 0.08753988382557473, "grad_norm": 0.42865192890167236, "learning_rate": 4.376994191278737e-06, "loss": 0.0075, "step": 5350 }, { "epoch": 0.08770350977665058, "grad_norm": 0.3388519585132599, "learning_rate": 4.385175488832529e-06, "loss": 0.0066, "step": 5360 }, { "epoch": 0.08786713572772642, "grad_norm": 0.3161774277687073, "learning_rate": 4.3933567863863214e-06, "loss": 0.0079, "step": 5370 }, { "epoch": 0.08803076167880226, "grad_norm": 0.3384650945663452, "learning_rate": 4.401538083940114e-06, "loss": 0.0128, "step": 5380 }, { "epoch": 0.0881943876298781, "grad_norm": 0.39423316717147827, "learning_rate": 4.409719381493905e-06, "loss": 0.0093, "step": 5390 }, { "epoch": 0.08835801358095394, "grad_norm": 0.5899034142494202, "learning_rate": 4.417900679047697e-06, "loss": 0.0087, "step": 5400 }, { "epoch": 0.08852163953202978, "grad_norm": 0.3199603855609894, "learning_rate": 4.42608197660149e-06, "loss": 0.0076, "step": 5410 }, { "epoch": 0.08868526548310562, "grad_norm": 0.334506630897522, "learning_rate": 4.434263274155282e-06, "loss": 0.0064, "step": 5420 }, { "epoch": 0.08884889143418147, "grad_norm": 0.24528226256370544, "learning_rate": 4.442444571709073e-06, "loss": 0.0063, "step": 5430 }, { "epoch": 0.0890125173852573, "grad_norm": 0.5676206946372986, "learning_rate": 4.450625869262866e-06, "loss": 0.0079, "step": 5440 }, { "epoch": 0.08917614333633314, "grad_norm": 0.13611209392547607, "learning_rate": 4.458807166816658e-06, "loss": 0.0056, "step": 5450 }, { "epoch": 0.08933976928740898, "grad_norm": 0.34977057576179504, "learning_rate": 4.46698846437045e-06, "loss": 0.0095, "step": 5460 }, { "epoch": 0.08950339523848483, "grad_norm": 0.39309054613113403, "learning_rate": 4.4751697619242415e-06, "loss": 0.0057, "step": 5470 }, { "epoch": 0.08966702118956066, "grad_norm": 0.35844671726226807, "learning_rate": 4.483351059478034e-06, "loss": 0.0112, "step": 5480 }, { "epoch": 0.0898306471406365, "grad_norm": 0.3857707977294922, "learning_rate": 4.491532357031826e-06, "loss": 0.0071, "step": 5490 }, { "epoch": 0.08999427309171235, "grad_norm": 0.2441084235906601, "learning_rate": 4.4997136545856175e-06, "loss": 0.0077, "step": 5500 }, { "epoch": 0.09015789904278819, "grad_norm": 0.18826180696487427, "learning_rate": 4.50789495213941e-06, "loss": 0.008, "step": 5510 }, { "epoch": 0.09032152499386403, "grad_norm": 0.36036619544029236, "learning_rate": 4.516076249693201e-06, "loss": 0.0072, "step": 5520 }, { "epoch": 0.09048515094493986, "grad_norm": 0.34049272537231445, "learning_rate": 4.524257547246993e-06, "loss": 0.0096, "step": 5530 }, { "epoch": 0.0906487768960157, "grad_norm": 0.32959064841270447, "learning_rate": 4.532438844800786e-06, "loss": 0.008, "step": 5540 }, { "epoch": 0.09081240284709155, "grad_norm": 0.29600727558135986, "learning_rate": 4.540620142354578e-06, "loss": 0.0103, "step": 5550 }, { "epoch": 0.0909760287981674, "grad_norm": 0.3910512626171112, "learning_rate": 4.548801439908369e-06, "loss": 0.0074, "step": 5560 }, { "epoch": 0.09113965474924322, "grad_norm": 0.28621724247932434, "learning_rate": 4.556982737462162e-06, "loss": 0.0103, "step": 5570 }, { "epoch": 0.09130328070031907, "grad_norm": 0.3845384120941162, "learning_rate": 4.565164035015954e-06, "loss": 0.009, "step": 5580 }, { "epoch": 0.09146690665139491, "grad_norm": 0.07460370659828186, "learning_rate": 4.573345332569746e-06, "loss": 0.0068, "step": 5590 }, { "epoch": 0.09163053260247075, "grad_norm": 0.36962124705314636, "learning_rate": 4.5815266301235376e-06, "loss": 0.0084, "step": 5600 }, { "epoch": 0.0917941585535466, "grad_norm": 0.5404472947120667, "learning_rate": 4.58970792767733e-06, "loss": 0.0077, "step": 5610 }, { "epoch": 0.09195778450462243, "grad_norm": 0.3260168731212616, "learning_rate": 4.597889225231122e-06, "loss": 0.0121, "step": 5620 }, { "epoch": 0.09212141045569827, "grad_norm": 0.5107181668281555, "learning_rate": 4.606070522784914e-06, "loss": 0.0092, "step": 5630 }, { "epoch": 0.09228503640677412, "grad_norm": 0.3145920932292938, "learning_rate": 4.614251820338706e-06, "loss": 0.0124, "step": 5640 }, { "epoch": 0.09244866235784996, "grad_norm": 0.336105614900589, "learning_rate": 4.622433117892498e-06, "loss": 0.0071, "step": 5650 }, { "epoch": 0.0926122883089258, "grad_norm": 0.27319103479385376, "learning_rate": 4.63061441544629e-06, "loss": 0.0063, "step": 5660 }, { "epoch": 0.09277591426000163, "grad_norm": 0.38768184185028076, "learning_rate": 4.6387957130000826e-06, "loss": 0.0096, "step": 5670 }, { "epoch": 0.09293954021107748, "grad_norm": 0.2965134382247925, "learning_rate": 4.646977010553874e-06, "loss": 0.009, "step": 5680 }, { "epoch": 0.09310316616215332, "grad_norm": 0.4633827209472656, "learning_rate": 4.655158308107666e-06, "loss": 0.0074, "step": 5690 }, { "epoch": 0.09326679211322916, "grad_norm": 0.18506695330142975, "learning_rate": 4.6633396056614585e-06, "loss": 0.0095, "step": 5700 }, { "epoch": 0.093430418064305, "grad_norm": 0.3332703709602356, "learning_rate": 4.67152090321525e-06, "loss": 0.0163, "step": 5710 }, { "epoch": 0.09359404401538084, "grad_norm": 0.15415699779987335, "learning_rate": 4.679702200769042e-06, "loss": 0.008, "step": 5720 }, { "epoch": 0.09375766996645668, "grad_norm": 0.2118310183286667, "learning_rate": 4.6878834983228344e-06, "loss": 0.0074, "step": 5730 }, { "epoch": 0.09392129591753252, "grad_norm": 0.546164333820343, "learning_rate": 4.696064795876627e-06, "loss": 0.0087, "step": 5740 }, { "epoch": 0.09408492186860837, "grad_norm": 0.2127242386341095, "learning_rate": 4.704246093430418e-06, "loss": 0.0075, "step": 5750 }, { "epoch": 0.0942485478196842, "grad_norm": 0.3988535404205322, "learning_rate": 4.71242739098421e-06, "loss": 0.0107, "step": 5760 }, { "epoch": 0.09441217377076004, "grad_norm": 0.32979124784469604, "learning_rate": 4.720608688538003e-06, "loss": 0.0091, "step": 5770 }, { "epoch": 0.09457579972183588, "grad_norm": 0.3586482107639313, "learning_rate": 4.728789986091795e-06, "loss": 0.0081, "step": 5780 }, { "epoch": 0.09473942567291173, "grad_norm": 0.2211388796567917, "learning_rate": 4.736971283645586e-06, "loss": 0.009, "step": 5790 }, { "epoch": 0.09490305162398756, "grad_norm": 0.6729218363761902, "learning_rate": 4.745152581199379e-06, "loss": 0.0102, "step": 5800 }, { "epoch": 0.0950666775750634, "grad_norm": 0.4604661464691162, "learning_rate": 4.753333878753171e-06, "loss": 0.0081, "step": 5810 }, { "epoch": 0.09523030352613925, "grad_norm": 0.25674644112586975, "learning_rate": 4.761515176306963e-06, "loss": 0.008, "step": 5820 }, { "epoch": 0.09539392947721509, "grad_norm": 0.2134886533021927, "learning_rate": 4.7696964738607545e-06, "loss": 0.0106, "step": 5830 }, { "epoch": 0.09555755542829093, "grad_norm": 0.10825169086456299, "learning_rate": 4.777877771414547e-06, "loss": 0.0066, "step": 5840 }, { "epoch": 0.09572118137936676, "grad_norm": 0.24757535755634308, "learning_rate": 4.786059068968339e-06, "loss": 0.0091, "step": 5850 }, { "epoch": 0.0958848073304426, "grad_norm": 0.25470975041389465, "learning_rate": 4.794240366522131e-06, "loss": 0.0094, "step": 5860 }, { "epoch": 0.09604843328151845, "grad_norm": 0.3910082280635834, "learning_rate": 4.802421664075923e-06, "loss": 0.0098, "step": 5870 }, { "epoch": 0.0962120592325943, "grad_norm": 0.24805599451065063, "learning_rate": 4.810602961629715e-06, "loss": 0.0053, "step": 5880 }, { "epoch": 0.09637568518367012, "grad_norm": 0.3489004671573639, "learning_rate": 4.818784259183507e-06, "loss": 0.0067, "step": 5890 }, { "epoch": 0.09653931113474597, "grad_norm": 0.5266171097755432, "learning_rate": 4.8269655567372995e-06, "loss": 0.0096, "step": 5900 }, { "epoch": 0.09670293708582181, "grad_norm": 0.2186962217092514, "learning_rate": 4.835146854291091e-06, "loss": 0.0075, "step": 5910 }, { "epoch": 0.09686656303689765, "grad_norm": 0.39976468682289124, "learning_rate": 4.843328151844883e-06, "loss": 0.0067, "step": 5920 }, { "epoch": 0.0970301889879735, "grad_norm": 0.23606425523757935, "learning_rate": 4.8515094493986755e-06, "loss": 0.0063, "step": 5930 }, { "epoch": 0.09719381493904933, "grad_norm": 0.1271335631608963, "learning_rate": 4.859690746952467e-06, "loss": 0.0064, "step": 5940 }, { "epoch": 0.09735744089012517, "grad_norm": 0.41366955637931824, "learning_rate": 4.867872044506259e-06, "loss": 0.0105, "step": 5950 }, { "epoch": 0.09752106684120102, "grad_norm": 0.533977210521698, "learning_rate": 4.8760533420600506e-06, "loss": 0.0095, "step": 5960 }, { "epoch": 0.09768469279227686, "grad_norm": 0.4168972969055176, "learning_rate": 4.884234639613843e-06, "loss": 0.0093, "step": 5970 }, { "epoch": 0.0978483187433527, "grad_norm": 0.8071966767311096, "learning_rate": 4.892415937167635e-06, "loss": 0.0076, "step": 5980 }, { "epoch": 0.09801194469442853, "grad_norm": 0.5576759576797485, "learning_rate": 4.900597234721427e-06, "loss": 0.006, "step": 5990 }, { "epoch": 0.09817557064550438, "grad_norm": 0.3682885468006134, "learning_rate": 4.908778532275219e-06, "loss": 0.0089, "step": 6000 }, { "epoch": 0.09833919659658022, "grad_norm": 0.17927001416683197, "learning_rate": 4.916959829829011e-06, "loss": 0.0104, "step": 6010 }, { "epoch": 0.09850282254765606, "grad_norm": 0.3189915716648102, "learning_rate": 4.925141127382803e-06, "loss": 0.0089, "step": 6020 }, { "epoch": 0.09866644849873189, "grad_norm": 0.16469670832157135, "learning_rate": 4.9333224249365955e-06, "loss": 0.0057, "step": 6030 }, { "epoch": 0.09883007444980774, "grad_norm": 0.3925216495990753, "learning_rate": 4.941503722490387e-06, "loss": 0.0073, "step": 6040 }, { "epoch": 0.09899370040088358, "grad_norm": 0.2063838541507721, "learning_rate": 4.949685020044179e-06, "loss": 0.0068, "step": 6050 }, { "epoch": 0.09915732635195942, "grad_norm": 0.05661248788237572, "learning_rate": 4.9578663175979715e-06, "loss": 0.0051, "step": 6060 }, { "epoch": 0.09932095230303527, "grad_norm": 0.6064292192459106, "learning_rate": 4.966047615151764e-06, "loss": 0.0073, "step": 6070 }, { "epoch": 0.0994845782541111, "grad_norm": 0.4550766050815582, "learning_rate": 4.974228912705555e-06, "loss": 0.0062, "step": 6080 }, { "epoch": 0.09964820420518694, "grad_norm": 0.2789970338344574, "learning_rate": 4.9824102102593474e-06, "loss": 0.01, "step": 6090 }, { "epoch": 0.09981183015626278, "grad_norm": 0.3552861213684082, "learning_rate": 4.99059150781314e-06, "loss": 0.0047, "step": 6100 }, { "epoch": 0.09997545610733863, "grad_norm": 0.13682997226715088, "learning_rate": 4.998772805366932e-06, "loss": 0.0069, "step": 6110 }, { "epoch": 0.10013908205841446, "grad_norm": 0.2569979131221771, "learning_rate": 5.006954102920723e-06, "loss": 0.0051, "step": 6120 }, { "epoch": 0.1003027080094903, "grad_norm": 0.28847816586494446, "learning_rate": 5.015135400474516e-06, "loss": 0.0103, "step": 6130 }, { "epoch": 0.10046633396056615, "grad_norm": 0.43194544315338135, "learning_rate": 5.023316698028308e-06, "loss": 0.0073, "step": 6140 }, { "epoch": 0.10062995991164199, "grad_norm": 0.3517124652862549, "learning_rate": 5.031497995582099e-06, "loss": 0.0093, "step": 6150 }, { "epoch": 0.10079358586271783, "grad_norm": 0.46990880370140076, "learning_rate": 5.0396792931358916e-06, "loss": 0.0092, "step": 6160 }, { "epoch": 0.10095721181379366, "grad_norm": 0.43302083015441895, "learning_rate": 5.047860590689684e-06, "loss": 0.0099, "step": 6170 }, { "epoch": 0.1011208377648695, "grad_norm": 0.7122765183448792, "learning_rate": 5.056041888243476e-06, "loss": 0.0084, "step": 6180 }, { "epoch": 0.10128446371594535, "grad_norm": 0.4819414019584656, "learning_rate": 5.0642231857972675e-06, "loss": 0.0089, "step": 6190 }, { "epoch": 0.1014480896670212, "grad_norm": 0.4990685284137726, "learning_rate": 5.07240448335106e-06, "loss": 0.0083, "step": 6200 }, { "epoch": 0.10161171561809704, "grad_norm": 0.5149896144866943, "learning_rate": 5.080585780904852e-06, "loss": 0.0059, "step": 6210 }, { "epoch": 0.10177534156917287, "grad_norm": 0.20002731680870056, "learning_rate": 5.088767078458644e-06, "loss": 0.009, "step": 6220 }, { "epoch": 0.10193896752024871, "grad_norm": 0.46108001470565796, "learning_rate": 5.096948376012436e-06, "loss": 0.0081, "step": 6230 }, { "epoch": 0.10210259347132455, "grad_norm": 0.24710477888584137, "learning_rate": 5.105129673566228e-06, "loss": 0.007, "step": 6240 }, { "epoch": 0.1022662194224004, "grad_norm": 0.24435612559318542, "learning_rate": 5.11331097112002e-06, "loss": 0.0058, "step": 6250 }, { "epoch": 0.10242984537347623, "grad_norm": 0.3854849934577942, "learning_rate": 5.1214922686738125e-06, "loss": 0.0056, "step": 6260 }, { "epoch": 0.10259347132455207, "grad_norm": 0.13587263226509094, "learning_rate": 5.129673566227604e-06, "loss": 0.0066, "step": 6270 }, { "epoch": 0.10275709727562791, "grad_norm": 0.44002869725227356, "learning_rate": 5.137854863781396e-06, "loss": 0.0099, "step": 6280 }, { "epoch": 0.10292072322670376, "grad_norm": 0.4461953938007355, "learning_rate": 5.1460361613351884e-06, "loss": 0.0088, "step": 6290 }, { "epoch": 0.1030843491777796, "grad_norm": 0.26877957582473755, "learning_rate": 5.154217458888981e-06, "loss": 0.0068, "step": 6300 }, { "epoch": 0.10324797512885543, "grad_norm": 0.34392890334129333, "learning_rate": 5.162398756442772e-06, "loss": 0.0077, "step": 6310 }, { "epoch": 0.10341160107993128, "grad_norm": 0.30123746395111084, "learning_rate": 5.170580053996564e-06, "loss": 0.0122, "step": 6320 }, { "epoch": 0.10357522703100712, "grad_norm": 0.33717355132102966, "learning_rate": 5.178761351550357e-06, "loss": 0.0099, "step": 6330 }, { "epoch": 0.10373885298208296, "grad_norm": 0.2597667872905731, "learning_rate": 5.186942649104149e-06, "loss": 0.0093, "step": 6340 }, { "epoch": 0.10390247893315879, "grad_norm": 0.27270177006721497, "learning_rate": 5.19512394665794e-06, "loss": 0.008, "step": 6350 }, { "epoch": 0.10406610488423464, "grad_norm": 0.21485239267349243, "learning_rate": 5.203305244211733e-06, "loss": 0.0105, "step": 6360 }, { "epoch": 0.10422973083531048, "grad_norm": 0.42165106534957886, "learning_rate": 5.211486541765525e-06, "loss": 0.0083, "step": 6370 }, { "epoch": 0.10439335678638632, "grad_norm": 0.6050042510032654, "learning_rate": 5.219667839319317e-06, "loss": 0.0063, "step": 6380 }, { "epoch": 0.10455698273746217, "grad_norm": 0.19433382153511047, "learning_rate": 5.2278491368731085e-06, "loss": 0.0075, "step": 6390 }, { "epoch": 0.104720608688538, "grad_norm": 0.33693307638168335, "learning_rate": 5.236030434426901e-06, "loss": 0.0086, "step": 6400 }, { "epoch": 0.10488423463961384, "grad_norm": 1.0649855136871338, "learning_rate": 5.244211731980693e-06, "loss": 0.0099, "step": 6410 }, { "epoch": 0.10504786059068968, "grad_norm": 0.13170677423477173, "learning_rate": 5.252393029534485e-06, "loss": 0.0072, "step": 6420 }, { "epoch": 0.10521148654176553, "grad_norm": 0.4372705817222595, "learning_rate": 5.260574327088277e-06, "loss": 0.0093, "step": 6430 }, { "epoch": 0.10537511249284136, "grad_norm": 0.15160751342773438, "learning_rate": 5.268755624642069e-06, "loss": 0.006, "step": 6440 }, { "epoch": 0.1055387384439172, "grad_norm": 0.13425381481647491, "learning_rate": 5.276936922195861e-06, "loss": 0.0081, "step": 6450 }, { "epoch": 0.10570236439499305, "grad_norm": 0.2675306499004364, "learning_rate": 5.2851182197496535e-06, "loss": 0.0088, "step": 6460 }, { "epoch": 0.10586599034606889, "grad_norm": 0.5187183618545532, "learning_rate": 5.293299517303445e-06, "loss": 0.0102, "step": 6470 }, { "epoch": 0.10602961629714473, "grad_norm": 0.24732357263565063, "learning_rate": 5.301480814857237e-06, "loss": 0.0079, "step": 6480 }, { "epoch": 0.10619324224822056, "grad_norm": 0.8667769432067871, "learning_rate": 5.3096621124110295e-06, "loss": 0.011, "step": 6490 }, { "epoch": 0.1063568681992964, "grad_norm": 0.383929044008255, "learning_rate": 5.317843409964821e-06, "loss": 0.006, "step": 6500 }, { "epoch": 0.10652049415037225, "grad_norm": 0.24571135640144348, "learning_rate": 5.326024707518613e-06, "loss": 0.0095, "step": 6510 }, { "epoch": 0.1066841201014481, "grad_norm": 0.33501678705215454, "learning_rate": 5.334206005072405e-06, "loss": 0.0082, "step": 6520 }, { "epoch": 0.10684774605252394, "grad_norm": 0.3351859450340271, "learning_rate": 5.342387302626198e-06, "loss": 0.0076, "step": 6530 }, { "epoch": 0.10701137200359977, "grad_norm": 0.3088167607784271, "learning_rate": 5.350568600179989e-06, "loss": 0.0093, "step": 6540 }, { "epoch": 0.10717499795467561, "grad_norm": 0.48187342286109924, "learning_rate": 5.358749897733781e-06, "loss": 0.0062, "step": 6550 }, { "epoch": 0.10733862390575145, "grad_norm": 0.5277681946754456, "learning_rate": 5.366931195287574e-06, "loss": 0.0071, "step": 6560 }, { "epoch": 0.1075022498568273, "grad_norm": 0.33420518040657043, "learning_rate": 5.375112492841366e-06, "loss": 0.0101, "step": 6570 }, { "epoch": 0.10766587580790313, "grad_norm": 0.4426640570163727, "learning_rate": 5.383293790395157e-06, "loss": 0.0059, "step": 6580 }, { "epoch": 0.10782950175897897, "grad_norm": 1.3722050189971924, "learning_rate": 5.3914750879489496e-06, "loss": 0.0094, "step": 6590 }, { "epoch": 0.10799312771005481, "grad_norm": 0.5037874579429626, "learning_rate": 5.399656385502741e-06, "loss": 0.0078, "step": 6600 }, { "epoch": 0.10815675366113066, "grad_norm": 0.3484451174736023, "learning_rate": 5.407837683056532e-06, "loss": 0.0157, "step": 6610 }, { "epoch": 0.1083203796122065, "grad_norm": 0.4415138363838196, "learning_rate": 5.416018980610325e-06, "loss": 0.0067, "step": 6620 }, { "epoch": 0.10848400556328233, "grad_norm": 0.34858110547065735, "learning_rate": 5.424200278164117e-06, "loss": 0.0054, "step": 6630 }, { "epoch": 0.10864763151435818, "grad_norm": 0.2884027361869812, "learning_rate": 5.432381575717909e-06, "loss": 0.0108, "step": 6640 }, { "epoch": 0.10881125746543402, "grad_norm": 0.1966114342212677, "learning_rate": 5.440562873271701e-06, "loss": 0.0049, "step": 6650 }, { "epoch": 0.10897488341650986, "grad_norm": 0.1636902391910553, "learning_rate": 5.448744170825493e-06, "loss": 0.0071, "step": 6660 }, { "epoch": 0.10913850936758569, "grad_norm": 0.21612730622291565, "learning_rate": 5.456925468379285e-06, "loss": 0.0077, "step": 6670 }, { "epoch": 0.10930213531866154, "grad_norm": 0.36195504665374756, "learning_rate": 5.465106765933077e-06, "loss": 0.0069, "step": 6680 }, { "epoch": 0.10946576126973738, "grad_norm": 0.4107455909252167, "learning_rate": 5.473288063486869e-06, "loss": 0.0072, "step": 6690 }, { "epoch": 0.10962938722081322, "grad_norm": 0.3034416437149048, "learning_rate": 5.481469361040661e-06, "loss": 0.0119, "step": 6700 }, { "epoch": 0.10979301317188907, "grad_norm": 0.3874293267726898, "learning_rate": 5.489650658594453e-06, "loss": 0.0069, "step": 6710 }, { "epoch": 0.1099566391229649, "grad_norm": 0.6343787908554077, "learning_rate": 5.497831956148246e-06, "loss": 0.0124, "step": 6720 }, { "epoch": 0.11012026507404074, "grad_norm": 0.20312941074371338, "learning_rate": 5.506013253702037e-06, "loss": 0.0065, "step": 6730 }, { "epoch": 0.11028389102511658, "grad_norm": 0.2851080000400543, "learning_rate": 5.514194551255829e-06, "loss": 0.006, "step": 6740 }, { "epoch": 0.11044751697619243, "grad_norm": 0.13193948566913605, "learning_rate": 5.5223758488096215e-06, "loss": 0.0063, "step": 6750 }, { "epoch": 0.11061114292726827, "grad_norm": 0.5235950946807861, "learning_rate": 5.530557146363413e-06, "loss": 0.0091, "step": 6760 }, { "epoch": 0.1107747688783441, "grad_norm": 0.3150160014629364, "learning_rate": 5.538738443917205e-06, "loss": 0.007, "step": 6770 }, { "epoch": 0.11093839482941995, "grad_norm": 0.9744367003440857, "learning_rate": 5.5469197414709975e-06, "loss": 0.0067, "step": 6780 }, { "epoch": 0.11110202078049579, "grad_norm": 0.4049106538295746, "learning_rate": 5.55510103902479e-06, "loss": 0.0055, "step": 6790 }, { "epoch": 0.11126564673157163, "grad_norm": 0.18236057460308075, "learning_rate": 5.563282336578581e-06, "loss": 0.0054, "step": 6800 }, { "epoch": 0.11142927268264746, "grad_norm": 0.2937847971916199, "learning_rate": 5.571463634132373e-06, "loss": 0.0116, "step": 6810 }, { "epoch": 0.1115928986337233, "grad_norm": 0.3066152334213257, "learning_rate": 5.579644931686166e-06, "loss": 0.0107, "step": 6820 }, { "epoch": 0.11175652458479915, "grad_norm": 0.4665585458278656, "learning_rate": 5.587826229239958e-06, "loss": 0.0097, "step": 6830 }, { "epoch": 0.111920150535875, "grad_norm": 0.11566019058227539, "learning_rate": 5.596007526793749e-06, "loss": 0.0086, "step": 6840 }, { "epoch": 0.11208377648695084, "grad_norm": 0.29676222801208496, "learning_rate": 5.604188824347542e-06, "loss": 0.0082, "step": 6850 }, { "epoch": 0.11224740243802667, "grad_norm": 0.3066006898880005, "learning_rate": 5.612370121901334e-06, "loss": 0.0094, "step": 6860 }, { "epoch": 0.11241102838910251, "grad_norm": 0.44505757093429565, "learning_rate": 5.620551419455126e-06, "loss": 0.0138, "step": 6870 }, { "epoch": 0.11257465434017835, "grad_norm": 0.2828943729400635, "learning_rate": 5.6287327170089176e-06, "loss": 0.0094, "step": 6880 }, { "epoch": 0.1127382802912542, "grad_norm": 0.19990353286266327, "learning_rate": 5.63691401456271e-06, "loss": 0.0039, "step": 6890 }, { "epoch": 0.11290190624233003, "grad_norm": 0.11108624190092087, "learning_rate": 5.645095312116502e-06, "loss": 0.0117, "step": 6900 }, { "epoch": 0.11306553219340587, "grad_norm": 0.2445783168077469, "learning_rate": 5.653276609670294e-06, "loss": 0.0078, "step": 6910 }, { "epoch": 0.11322915814448171, "grad_norm": 0.1478024274110794, "learning_rate": 5.661457907224086e-06, "loss": 0.0068, "step": 6920 }, { "epoch": 0.11339278409555756, "grad_norm": 0.30458658933639526, "learning_rate": 5.669639204777878e-06, "loss": 0.008, "step": 6930 }, { "epoch": 0.1135564100466334, "grad_norm": 0.254777729511261, "learning_rate": 5.67782050233167e-06, "loss": 0.0073, "step": 6940 }, { "epoch": 0.11372003599770923, "grad_norm": 0.3834642469882965, "learning_rate": 5.6860017998854625e-06, "loss": 0.0062, "step": 6950 }, { "epoch": 0.11388366194878508, "grad_norm": 0.275937557220459, "learning_rate": 5.694183097439254e-06, "loss": 0.0077, "step": 6960 }, { "epoch": 0.11404728789986092, "grad_norm": 0.2779551148414612, "learning_rate": 5.702364394993046e-06, "loss": 0.0096, "step": 6970 }, { "epoch": 0.11421091385093676, "grad_norm": 0.1703820675611496, "learning_rate": 5.7105456925468385e-06, "loss": 0.0087, "step": 6980 }, { "epoch": 0.11437453980201259, "grad_norm": 0.26137998700141907, "learning_rate": 5.718726990100631e-06, "loss": 0.0079, "step": 6990 }, { "epoch": 0.11453816575308844, "grad_norm": 0.3290702700614929, "learning_rate": 5.726908287654422e-06, "loss": 0.0062, "step": 7000 }, { "epoch": 0.11470179170416428, "grad_norm": 0.2608306109905243, "learning_rate": 5.7350895852082144e-06, "loss": 0.0062, "step": 7010 }, { "epoch": 0.11486541765524012, "grad_norm": 0.36139845848083496, "learning_rate": 5.743270882762007e-06, "loss": 0.0068, "step": 7020 }, { "epoch": 0.11502904360631597, "grad_norm": 0.32653486728668213, "learning_rate": 5.751452180315799e-06, "loss": 0.0093, "step": 7030 }, { "epoch": 0.1151926695573918, "grad_norm": 0.4448296129703522, "learning_rate": 5.75963347786959e-06, "loss": 0.0102, "step": 7040 }, { "epoch": 0.11535629550846764, "grad_norm": 0.7444778084754944, "learning_rate": 5.767814775423383e-06, "loss": 0.0078, "step": 7050 }, { "epoch": 0.11551992145954348, "grad_norm": 0.1455536186695099, "learning_rate": 5.775996072977175e-06, "loss": 0.0079, "step": 7060 }, { "epoch": 0.11568354741061933, "grad_norm": 0.10789697617292404, "learning_rate": 5.784177370530966e-06, "loss": 0.0097, "step": 7070 }, { "epoch": 0.11584717336169517, "grad_norm": 0.3874007761478424, "learning_rate": 5.7923586680847586e-06, "loss": 0.0116, "step": 7080 }, { "epoch": 0.116010799312771, "grad_norm": 0.10515554994344711, "learning_rate": 5.800539965638551e-06, "loss": 0.0092, "step": 7090 }, { "epoch": 0.11617442526384684, "grad_norm": 0.6693029403686523, "learning_rate": 5.808721263192343e-06, "loss": 0.0081, "step": 7100 }, { "epoch": 0.11633805121492269, "grad_norm": 0.2853403091430664, "learning_rate": 5.8169025607461345e-06, "loss": 0.0073, "step": 7110 }, { "epoch": 0.11650167716599853, "grad_norm": 0.27809441089630127, "learning_rate": 5.825083858299927e-06, "loss": 0.0084, "step": 7120 }, { "epoch": 0.11666530311707436, "grad_norm": 0.340063214302063, "learning_rate": 5.833265155853719e-06, "loss": 0.0086, "step": 7130 }, { "epoch": 0.1168289290681502, "grad_norm": 0.1692095696926117, "learning_rate": 5.841446453407511e-06, "loss": 0.0047, "step": 7140 }, { "epoch": 0.11699255501922605, "grad_norm": 0.19236648082733154, "learning_rate": 5.849627750961303e-06, "loss": 0.0086, "step": 7150 }, { "epoch": 0.11715618097030189, "grad_norm": 0.3904006779193878, "learning_rate": 5.857809048515095e-06, "loss": 0.0096, "step": 7160 }, { "epoch": 0.11731980692137774, "grad_norm": 0.40139034390449524, "learning_rate": 5.865990346068887e-06, "loss": 0.0066, "step": 7170 }, { "epoch": 0.11748343287245357, "grad_norm": 0.1781042069196701, "learning_rate": 5.8741716436226795e-06, "loss": 0.0073, "step": 7180 }, { "epoch": 0.11764705882352941, "grad_norm": 0.3853304982185364, "learning_rate": 5.882352941176471e-06, "loss": 0.0091, "step": 7190 }, { "epoch": 0.11781068477460525, "grad_norm": 0.19263824820518494, "learning_rate": 5.890534238730263e-06, "loss": 0.0046, "step": 7200 }, { "epoch": 0.1179743107256811, "grad_norm": 0.1833399087190628, "learning_rate": 5.8987155362840554e-06, "loss": 0.0069, "step": 7210 }, { "epoch": 0.11813793667675693, "grad_norm": 0.4048713743686676, "learning_rate": 5.906896833837848e-06, "loss": 0.0071, "step": 7220 }, { "epoch": 0.11830156262783277, "grad_norm": 0.5216386318206787, "learning_rate": 5.915078131391639e-06, "loss": 0.0046, "step": 7230 }, { "epoch": 0.11846518857890861, "grad_norm": 0.2489604651927948, "learning_rate": 5.923259428945431e-06, "loss": 0.0095, "step": 7240 }, { "epoch": 0.11862881452998446, "grad_norm": 0.17428308725357056, "learning_rate": 5.931440726499224e-06, "loss": 0.0038, "step": 7250 }, { "epoch": 0.1187924404810603, "grad_norm": 0.06524112820625305, "learning_rate": 5.939622024053016e-06, "loss": 0.0106, "step": 7260 }, { "epoch": 0.11895606643213613, "grad_norm": 0.3374137878417969, "learning_rate": 5.947803321606807e-06, "loss": 0.0065, "step": 7270 }, { "epoch": 0.11911969238321198, "grad_norm": 0.13164451718330383, "learning_rate": 5.9559846191606e-06, "loss": 0.0064, "step": 7280 }, { "epoch": 0.11928331833428782, "grad_norm": 0.46344688534736633, "learning_rate": 5.964165916714392e-06, "loss": 0.0079, "step": 7290 }, { "epoch": 0.11944694428536366, "grad_norm": 0.2627471089363098, "learning_rate": 5.972347214268184e-06, "loss": 0.0054, "step": 7300 }, { "epoch": 0.11961057023643949, "grad_norm": 0.1846577227115631, "learning_rate": 5.9805285118219755e-06, "loss": 0.008, "step": 7310 }, { "epoch": 0.11977419618751534, "grad_norm": 0.20860180258750916, "learning_rate": 5.988709809375768e-06, "loss": 0.0077, "step": 7320 }, { "epoch": 0.11993782213859118, "grad_norm": 0.48983854055404663, "learning_rate": 5.99689110692956e-06, "loss": 0.0065, "step": 7330 }, { "epoch": 0.12010144808966702, "grad_norm": 0.22185811400413513, "learning_rate": 6.005072404483352e-06, "loss": 0.0064, "step": 7340 }, { "epoch": 0.12026507404074287, "grad_norm": 0.29743048548698425, "learning_rate": 6.013253702037144e-06, "loss": 0.0127, "step": 7350 }, { "epoch": 0.1204286999918187, "grad_norm": 0.24011510610580444, "learning_rate": 6.021434999590936e-06, "loss": 0.0082, "step": 7360 }, { "epoch": 0.12059232594289454, "grad_norm": 0.5802230834960938, "learning_rate": 6.029616297144728e-06, "loss": 0.0092, "step": 7370 }, { "epoch": 0.12075595189397038, "grad_norm": 0.29700222611427307, "learning_rate": 6.0377975946985205e-06, "loss": 0.007, "step": 7380 }, { "epoch": 0.12091957784504623, "grad_norm": 0.4346696436405182, "learning_rate": 6.045978892252312e-06, "loss": 0.0126, "step": 7390 }, { "epoch": 0.12108320379612207, "grad_norm": 0.27801400423049927, "learning_rate": 6.054160189806104e-06, "loss": 0.0058, "step": 7400 }, { "epoch": 0.1212468297471979, "grad_norm": 0.17639249563217163, "learning_rate": 6.0623414873598965e-06, "loss": 0.0058, "step": 7410 }, { "epoch": 0.12141045569827374, "grad_norm": 0.20532791316509247, "learning_rate": 6.070522784913688e-06, "loss": 0.0102, "step": 7420 }, { "epoch": 0.12157408164934959, "grad_norm": 0.28346556425094604, "learning_rate": 6.07870408246748e-06, "loss": 0.0085, "step": 7430 }, { "epoch": 0.12173770760042543, "grad_norm": 0.21420612931251526, "learning_rate": 6.086885380021272e-06, "loss": 0.006, "step": 7440 }, { "epoch": 0.12190133355150126, "grad_norm": 0.14299224317073822, "learning_rate": 6.095066677575065e-06, "loss": 0.0078, "step": 7450 }, { "epoch": 0.1220649595025771, "grad_norm": 0.3003006875514984, "learning_rate": 6.103247975128856e-06, "loss": 0.0093, "step": 7460 }, { "epoch": 0.12222858545365295, "grad_norm": 0.22845470905303955, "learning_rate": 6.111429272682648e-06, "loss": 0.0052, "step": 7470 }, { "epoch": 0.12239221140472879, "grad_norm": 0.2701752483844757, "learning_rate": 6.11961057023644e-06, "loss": 0.0046, "step": 7480 }, { "epoch": 0.12255583735580464, "grad_norm": 0.20579245686531067, "learning_rate": 6.127791867790231e-06, "loss": 0.0065, "step": 7490 }, { "epoch": 0.12271946330688047, "grad_norm": 0.31625252962112427, "learning_rate": 6.1359731653440234e-06, "loss": 0.0077, "step": 7500 }, { "epoch": 0.12288308925795631, "grad_norm": 0.2839002013206482, "learning_rate": 6.144154462897816e-06, "loss": 0.0107, "step": 7510 }, { "epoch": 0.12304671520903215, "grad_norm": 0.9018831253051758, "learning_rate": 6.152335760451608e-06, "loss": 0.0081, "step": 7520 }, { "epoch": 0.123210341160108, "grad_norm": 0.6090940237045288, "learning_rate": 6.160517058005399e-06, "loss": 0.0052, "step": 7530 }, { "epoch": 0.12337396711118383, "grad_norm": 0.3708903193473816, "learning_rate": 6.168698355559192e-06, "loss": 0.0098, "step": 7540 }, { "epoch": 0.12353759306225967, "grad_norm": 0.2477022409439087, "learning_rate": 6.176879653112984e-06, "loss": 0.0084, "step": 7550 }, { "epoch": 0.12370121901333551, "grad_norm": 0.21271197497844696, "learning_rate": 6.185060950666776e-06, "loss": 0.0073, "step": 7560 }, { "epoch": 0.12386484496441136, "grad_norm": 0.45584172010421753, "learning_rate": 6.193242248220568e-06, "loss": 0.0077, "step": 7570 }, { "epoch": 0.1240284709154872, "grad_norm": 0.2077159285545349, "learning_rate": 6.20142354577436e-06, "loss": 0.0071, "step": 7580 }, { "epoch": 0.12419209686656303, "grad_norm": 0.3583526313304901, "learning_rate": 6.209604843328152e-06, "loss": 0.0095, "step": 7590 }, { "epoch": 0.12435572281763888, "grad_norm": 0.3761270046234131, "learning_rate": 6.217786140881944e-06, "loss": 0.0079, "step": 7600 }, { "epoch": 0.12451934876871472, "grad_norm": 0.36139416694641113, "learning_rate": 6.225967438435736e-06, "loss": 0.0079, "step": 7610 }, { "epoch": 0.12468297471979056, "grad_norm": 0.33850106596946716, "learning_rate": 6.234148735989528e-06, "loss": 0.0081, "step": 7620 }, { "epoch": 0.1248466006708664, "grad_norm": 0.5654309988021851, "learning_rate": 6.24233003354332e-06, "loss": 0.0063, "step": 7630 }, { "epoch": 0.12501022662194225, "grad_norm": 0.4743385314941406, "learning_rate": 6.250511331097113e-06, "loss": 0.0057, "step": 7640 }, { "epoch": 0.12517385257301808, "grad_norm": 0.4442897439002991, "learning_rate": 6.258692628650904e-06, "loss": 0.0058, "step": 7650 }, { "epoch": 0.1253374785240939, "grad_norm": 0.3616870045661926, "learning_rate": 6.266873926204696e-06, "loss": 0.0064, "step": 7660 }, { "epoch": 0.12550110447516977, "grad_norm": 0.2377433180809021, "learning_rate": 6.2750552237584885e-06, "loss": 0.0058, "step": 7670 }, { "epoch": 0.1256647304262456, "grad_norm": 0.29012423753738403, "learning_rate": 6.28323652131228e-06, "loss": 0.0088, "step": 7680 }, { "epoch": 0.12582835637732145, "grad_norm": 0.5089976191520691, "learning_rate": 6.291417818866072e-06, "loss": 0.0113, "step": 7690 }, { "epoch": 0.12599198232839728, "grad_norm": 0.19199173152446747, "learning_rate": 6.2995991164198645e-06, "loss": 0.0059, "step": 7700 }, { "epoch": 0.1261556082794731, "grad_norm": 0.16154931485652924, "learning_rate": 6.307780413973657e-06, "loss": 0.0059, "step": 7710 }, { "epoch": 0.12631923423054897, "grad_norm": 0.18283475935459137, "learning_rate": 6.315961711527448e-06, "loss": 0.0052, "step": 7720 }, { "epoch": 0.1264828601816248, "grad_norm": 0.08653350174427032, "learning_rate": 6.32414300908124e-06, "loss": 0.0062, "step": 7730 }, { "epoch": 0.12664648613270066, "grad_norm": 0.44227510690689087, "learning_rate": 6.332324306635033e-06, "loss": 0.0075, "step": 7740 }, { "epoch": 0.1268101120837765, "grad_norm": 0.45884716510772705, "learning_rate": 6.340505604188825e-06, "loss": 0.007, "step": 7750 }, { "epoch": 0.12697373803485232, "grad_norm": 0.3251267671585083, "learning_rate": 6.348686901742616e-06, "loss": 0.009, "step": 7760 }, { "epoch": 0.12713736398592818, "grad_norm": 0.39178067445755005, "learning_rate": 6.356868199296409e-06, "loss": 0.0105, "step": 7770 }, { "epoch": 0.127300989937004, "grad_norm": 0.5826765894889832, "learning_rate": 6.365049496850201e-06, "loss": 0.0056, "step": 7780 }, { "epoch": 0.12746461588807986, "grad_norm": 0.34660714864730835, "learning_rate": 6.373230794403993e-06, "loss": 0.0099, "step": 7790 }, { "epoch": 0.1276282418391557, "grad_norm": 0.17637041211128235, "learning_rate": 6.3814120919577845e-06, "loss": 0.0089, "step": 7800 }, { "epoch": 0.12779186779023152, "grad_norm": 0.40063443779945374, "learning_rate": 6.389593389511577e-06, "loss": 0.0101, "step": 7810 }, { "epoch": 0.12795549374130738, "grad_norm": 0.38279202580451965, "learning_rate": 6.397774687065369e-06, "loss": 0.0035, "step": 7820 }, { "epoch": 0.1281191196923832, "grad_norm": 0.49693751335144043, "learning_rate": 6.405955984619161e-06, "loss": 0.0068, "step": 7830 }, { "epoch": 0.12828274564345904, "grad_norm": 0.40038615465164185, "learning_rate": 6.414137282172953e-06, "loss": 0.0088, "step": 7840 }, { "epoch": 0.1284463715945349, "grad_norm": 0.10365238785743713, "learning_rate": 6.422318579726745e-06, "loss": 0.004, "step": 7850 }, { "epoch": 0.12860999754561073, "grad_norm": 0.16106294095516205, "learning_rate": 6.430499877280537e-06, "loss": 0.009, "step": 7860 }, { "epoch": 0.12877362349668658, "grad_norm": 0.6523407697677612, "learning_rate": 6.4386811748343295e-06, "loss": 0.0054, "step": 7870 }, { "epoch": 0.12893724944776241, "grad_norm": 0.2684585750102997, "learning_rate": 6.446862472388121e-06, "loss": 0.0075, "step": 7880 }, { "epoch": 0.12910087539883824, "grad_norm": 0.2763690948486328, "learning_rate": 6.455043769941913e-06, "loss": 0.0071, "step": 7890 }, { "epoch": 0.1292645013499141, "grad_norm": 0.2814697325229645, "learning_rate": 6.4632250674957055e-06, "loss": 0.0079, "step": 7900 }, { "epoch": 0.12942812730098993, "grad_norm": 0.3585580289363861, "learning_rate": 6.471406365049498e-06, "loss": 0.0105, "step": 7910 }, { "epoch": 0.1295917532520658, "grad_norm": 0.16478231549263, "learning_rate": 6.479587662603289e-06, "loss": 0.0056, "step": 7920 }, { "epoch": 0.12975537920314162, "grad_norm": 0.2760835289955139, "learning_rate": 6.4877689601570814e-06, "loss": 0.0057, "step": 7930 }, { "epoch": 0.12991900515421745, "grad_norm": 0.11307619512081146, "learning_rate": 6.495950257710874e-06, "loss": 0.0053, "step": 7940 }, { "epoch": 0.1300826311052933, "grad_norm": 0.2428617775440216, "learning_rate": 6.504131555264666e-06, "loss": 0.0073, "step": 7950 }, { "epoch": 0.13024625705636914, "grad_norm": 0.31759703159332275, "learning_rate": 6.512312852818457e-06, "loss": 0.0054, "step": 7960 }, { "epoch": 0.130409883007445, "grad_norm": 0.25207120180130005, "learning_rate": 6.52049415037225e-06, "loss": 0.0059, "step": 7970 }, { "epoch": 0.13057350895852082, "grad_norm": 0.1636662483215332, "learning_rate": 6.528675447926042e-06, "loss": 0.0061, "step": 7980 }, { "epoch": 0.13073713490959665, "grad_norm": 0.27267494797706604, "learning_rate": 6.536856745479834e-06, "loss": 0.008, "step": 7990 }, { "epoch": 0.1309007608606725, "grad_norm": 0.4470721185207367, "learning_rate": 6.5450380430336256e-06, "loss": 0.0088, "step": 8000 }, { "epoch": 0.13106438681174834, "grad_norm": 0.47626909613609314, "learning_rate": 6.553219340587418e-06, "loss": 0.0081, "step": 8010 }, { "epoch": 0.1312280127628242, "grad_norm": 0.4098890721797943, "learning_rate": 6.56140063814121e-06, "loss": 0.0057, "step": 8020 }, { "epoch": 0.13139163871390003, "grad_norm": 0.35376524925231934, "learning_rate": 6.5695819356950015e-06, "loss": 0.0069, "step": 8030 }, { "epoch": 0.13155526466497586, "grad_norm": 0.10104166716337204, "learning_rate": 6.577763233248794e-06, "loss": 0.0068, "step": 8040 }, { "epoch": 0.13171889061605171, "grad_norm": 0.20320932567119598, "learning_rate": 6.585944530802586e-06, "loss": 0.0052, "step": 8050 }, { "epoch": 0.13188251656712754, "grad_norm": 0.4781796634197235, "learning_rate": 6.594125828356378e-06, "loss": 0.0042, "step": 8060 }, { "epoch": 0.13204614251820337, "grad_norm": 0.35332396626472473, "learning_rate": 6.60230712591017e-06, "loss": 0.0064, "step": 8070 }, { "epoch": 0.13220976846927923, "grad_norm": 0.2873738706111908, "learning_rate": 6.610488423463962e-06, "loss": 0.0066, "step": 8080 }, { "epoch": 0.13237339442035506, "grad_norm": 0.27414822578430176, "learning_rate": 6.618669721017754e-06, "loss": 0.0063, "step": 8090 }, { "epoch": 0.13253702037143092, "grad_norm": 0.24895186722278595, "learning_rate": 6.6268510185715465e-06, "loss": 0.0049, "step": 8100 }, { "epoch": 0.13270064632250675, "grad_norm": 0.17847618460655212, "learning_rate": 6.635032316125338e-06, "loss": 0.0055, "step": 8110 }, { "epoch": 0.13286427227358258, "grad_norm": 0.28083527088165283, "learning_rate": 6.64321361367913e-06, "loss": 0.0058, "step": 8120 }, { "epoch": 0.13302789822465844, "grad_norm": 0.4541753828525543, "learning_rate": 6.6513949112329224e-06, "loss": 0.0073, "step": 8130 }, { "epoch": 0.13319152417573427, "grad_norm": 0.41221702098846436, "learning_rate": 6.659576208786715e-06, "loss": 0.0048, "step": 8140 }, { "epoch": 0.13335515012681012, "grad_norm": 0.49959853291511536, "learning_rate": 6.667757506340506e-06, "loss": 0.0073, "step": 8150 }, { "epoch": 0.13351877607788595, "grad_norm": 0.2927177846431732, "learning_rate": 6.675938803894298e-06, "loss": 0.0101, "step": 8160 }, { "epoch": 0.13368240202896178, "grad_norm": 0.42072921991348267, "learning_rate": 6.684120101448091e-06, "loss": 0.0066, "step": 8170 }, { "epoch": 0.13384602798003764, "grad_norm": 0.49477970600128174, "learning_rate": 6.692301399001883e-06, "loss": 0.0077, "step": 8180 }, { "epoch": 0.13400965393111347, "grad_norm": 0.2319003790616989, "learning_rate": 6.700482696555674e-06, "loss": 0.006, "step": 8190 }, { "epoch": 0.13417327988218933, "grad_norm": 0.44070616364479065, "learning_rate": 6.708663994109467e-06, "loss": 0.0106, "step": 8200 }, { "epoch": 0.13433690583326516, "grad_norm": 0.39652708172798157, "learning_rate": 6.716845291663259e-06, "loss": 0.007, "step": 8210 }, { "epoch": 0.134500531784341, "grad_norm": 0.3773319125175476, "learning_rate": 6.725026589217051e-06, "loss": 0.0062, "step": 8220 }, { "epoch": 0.13466415773541685, "grad_norm": 0.2651503384113312, "learning_rate": 6.7332078867708425e-06, "loss": 0.0076, "step": 8230 }, { "epoch": 0.13482778368649267, "grad_norm": 0.2574477195739746, "learning_rate": 6.741389184324635e-06, "loss": 0.0067, "step": 8240 }, { "epoch": 0.1349914096375685, "grad_norm": 0.18956246972084045, "learning_rate": 6.749570481878427e-06, "loss": 0.0062, "step": 8250 }, { "epoch": 0.13515503558864436, "grad_norm": 0.40049681067466736, "learning_rate": 6.757751779432219e-06, "loss": 0.0064, "step": 8260 }, { "epoch": 0.1353186615397202, "grad_norm": 0.3965357840061188, "learning_rate": 6.765933076986011e-06, "loss": 0.0056, "step": 8270 }, { "epoch": 0.13548228749079605, "grad_norm": 0.2578824758529663, "learning_rate": 6.774114374539803e-06, "loss": 0.0064, "step": 8280 }, { "epoch": 0.13564591344187188, "grad_norm": 0.3541015386581421, "learning_rate": 6.782295672093595e-06, "loss": 0.0079, "step": 8290 }, { "epoch": 0.1358095393929477, "grad_norm": 0.4172951579093933, "learning_rate": 6.7904769696473875e-06, "loss": 0.0104, "step": 8300 }, { "epoch": 0.13597316534402357, "grad_norm": 0.21050603687763214, "learning_rate": 6.798658267201179e-06, "loss": 0.0113, "step": 8310 }, { "epoch": 0.1361367912950994, "grad_norm": 0.22825267910957336, "learning_rate": 6.806839564754971e-06, "loss": 0.0076, "step": 8320 }, { "epoch": 0.13630041724617525, "grad_norm": 0.22266684472560883, "learning_rate": 6.8150208623087635e-06, "loss": 0.007, "step": 8330 }, { "epoch": 0.13646404319725108, "grad_norm": 0.15727634727954865, "learning_rate": 6.823202159862555e-06, "loss": 0.0078, "step": 8340 }, { "epoch": 0.1366276691483269, "grad_norm": 0.20662276446819305, "learning_rate": 6.831383457416347e-06, "loss": 0.0053, "step": 8350 }, { "epoch": 0.13679129509940277, "grad_norm": 0.3878669738769531, "learning_rate": 6.8395647549701386e-06, "loss": 0.0045, "step": 8360 }, { "epoch": 0.1369549210504786, "grad_norm": 0.3964359760284424, "learning_rate": 6.84774605252393e-06, "loss": 0.0055, "step": 8370 }, { "epoch": 0.13711854700155446, "grad_norm": 0.44892069697380066, "learning_rate": 6.855927350077722e-06, "loss": 0.0083, "step": 8380 }, { "epoch": 0.1372821729526303, "grad_norm": 0.32658880949020386, "learning_rate": 6.8641086476315145e-06, "loss": 0.0066, "step": 8390 }, { "epoch": 0.13744579890370612, "grad_norm": 0.21312038600444794, "learning_rate": 6.872289945185307e-06, "loss": 0.0085, "step": 8400 }, { "epoch": 0.13760942485478198, "grad_norm": 0.3220922350883484, "learning_rate": 6.880471242739098e-06, "loss": 0.0058, "step": 8410 }, { "epoch": 0.1377730508058578, "grad_norm": 0.2851550877094269, "learning_rate": 6.8886525402928904e-06, "loss": 0.0058, "step": 8420 }, { "epoch": 0.13793667675693366, "grad_norm": 0.5871424674987793, "learning_rate": 6.896833837846683e-06, "loss": 0.0083, "step": 8430 }, { "epoch": 0.1381003027080095, "grad_norm": 0.21022167801856995, "learning_rate": 6.905015135400475e-06, "loss": 0.007, "step": 8440 }, { "epoch": 0.13826392865908532, "grad_norm": 0.3552643358707428, "learning_rate": 6.913196432954266e-06, "loss": 0.0072, "step": 8450 }, { "epoch": 0.13842755461016118, "grad_norm": 0.2583826184272766, "learning_rate": 6.921377730508059e-06, "loss": 0.007, "step": 8460 }, { "epoch": 0.138591180561237, "grad_norm": 0.05755360797047615, "learning_rate": 6.929559028061851e-06, "loss": 0.0083, "step": 8470 }, { "epoch": 0.13875480651231284, "grad_norm": 0.34241628646850586, "learning_rate": 6.937740325615643e-06, "loss": 0.0065, "step": 8480 }, { "epoch": 0.1389184324633887, "grad_norm": 0.16852635145187378, "learning_rate": 6.945921623169435e-06, "loss": 0.0092, "step": 8490 }, { "epoch": 0.13908205841446453, "grad_norm": 0.3847416937351227, "learning_rate": 6.954102920723227e-06, "loss": 0.0058, "step": 8500 }, { "epoch": 0.13924568436554038, "grad_norm": 0.4543159008026123, "learning_rate": 6.962284218277019e-06, "loss": 0.0075, "step": 8510 }, { "epoch": 0.13940931031661621, "grad_norm": 0.20325085520744324, "learning_rate": 6.970465515830811e-06, "loss": 0.0051, "step": 8520 }, { "epoch": 0.13957293626769204, "grad_norm": 0.26069197058677673, "learning_rate": 6.978646813384603e-06, "loss": 0.0084, "step": 8530 }, { "epoch": 0.1397365622187679, "grad_norm": 0.27305662631988525, "learning_rate": 6.986828110938395e-06, "loss": 0.0067, "step": 8540 }, { "epoch": 0.13990018816984373, "grad_norm": 0.14777232706546783, "learning_rate": 6.995009408492187e-06, "loss": 0.0065, "step": 8550 }, { "epoch": 0.1400638141209196, "grad_norm": 0.22705182433128357, "learning_rate": 7.00319070604598e-06, "loss": 0.006, "step": 8560 }, { "epoch": 0.14022744007199542, "grad_norm": 0.2742723822593689, "learning_rate": 7.011372003599771e-06, "loss": 0.0074, "step": 8570 }, { "epoch": 0.14039106602307125, "grad_norm": 0.5873104929924011, "learning_rate": 7.019553301153563e-06, "loss": 0.0079, "step": 8580 }, { "epoch": 0.1405546919741471, "grad_norm": 0.4223281145095825, "learning_rate": 7.0277345987073555e-06, "loss": 0.0091, "step": 8590 }, { "epoch": 0.14071831792522294, "grad_norm": 0.31218650937080383, "learning_rate": 7.035915896261147e-06, "loss": 0.0085, "step": 8600 }, { "epoch": 0.1408819438762988, "grad_norm": 0.19783629477024078, "learning_rate": 7.044097193814939e-06, "loss": 0.0052, "step": 8610 }, { "epoch": 0.14104556982737462, "grad_norm": 0.22947272658348083, "learning_rate": 7.0522784913687315e-06, "loss": 0.0062, "step": 8620 }, { "epoch": 0.14120919577845045, "grad_norm": 0.2573065459728241, "learning_rate": 7.060459788922524e-06, "loss": 0.0082, "step": 8630 }, { "epoch": 0.1413728217295263, "grad_norm": 0.13020943105220795, "learning_rate": 7.068641086476315e-06, "loss": 0.01, "step": 8640 }, { "epoch": 0.14153644768060214, "grad_norm": 0.11191508919000626, "learning_rate": 7.076822384030107e-06, "loss": 0.0072, "step": 8650 }, { "epoch": 0.141700073631678, "grad_norm": 0.16540056467056274, "learning_rate": 7.0850036815839e-06, "loss": 0.0056, "step": 8660 }, { "epoch": 0.14186369958275383, "grad_norm": 0.3133178949356079, "learning_rate": 7.093184979137692e-06, "loss": 0.0097, "step": 8670 }, { "epoch": 0.14202732553382966, "grad_norm": 0.4946599304676056, "learning_rate": 7.101366276691483e-06, "loss": 0.0067, "step": 8680 }, { "epoch": 0.14219095148490551, "grad_norm": 0.3116682767868042, "learning_rate": 7.109547574245276e-06, "loss": 0.0058, "step": 8690 }, { "epoch": 0.14235457743598134, "grad_norm": 0.5580878257751465, "learning_rate": 7.117728871799068e-06, "loss": 0.008, "step": 8700 }, { "epoch": 0.14251820338705717, "grad_norm": 0.26291677355766296, "learning_rate": 7.12591016935286e-06, "loss": 0.0088, "step": 8710 }, { "epoch": 0.14268182933813303, "grad_norm": 0.4729841649532318, "learning_rate": 7.1340914669066515e-06, "loss": 0.01, "step": 8720 }, { "epoch": 0.14284545528920886, "grad_norm": 0.08003254979848862, "learning_rate": 7.142272764460444e-06, "loss": 0.0132, "step": 8730 }, { "epoch": 0.14300908124028472, "grad_norm": 0.37936556339263916, "learning_rate": 7.150454062014236e-06, "loss": 0.0078, "step": 8740 }, { "epoch": 0.14317270719136055, "grad_norm": 0.41632506251335144, "learning_rate": 7.158635359568028e-06, "loss": 0.0103, "step": 8750 }, { "epoch": 0.14333633314243638, "grad_norm": 0.1888243407011032, "learning_rate": 7.16681665712182e-06, "loss": 0.0074, "step": 8760 }, { "epoch": 0.14349995909351224, "grad_norm": 0.6259981989860535, "learning_rate": 7.174997954675612e-06, "loss": 0.0095, "step": 8770 }, { "epoch": 0.14366358504458807, "grad_norm": 0.17268413305282593, "learning_rate": 7.183179252229404e-06, "loss": 0.0056, "step": 8780 }, { "epoch": 0.14382721099566392, "grad_norm": 0.18798314034938812, "learning_rate": 7.1913605497831965e-06, "loss": 0.0056, "step": 8790 }, { "epoch": 0.14399083694673975, "grad_norm": 0.36550891399383545, "learning_rate": 7.199541847336988e-06, "loss": 0.0064, "step": 8800 }, { "epoch": 0.14415446289781558, "grad_norm": 0.3121103048324585, "learning_rate": 7.20772314489078e-06, "loss": 0.0045, "step": 8810 }, { "epoch": 0.14431808884889144, "grad_norm": 0.2535233199596405, "learning_rate": 7.2159044424445725e-06, "loss": 0.0065, "step": 8820 }, { "epoch": 0.14448171479996727, "grad_norm": 0.36904042959213257, "learning_rate": 7.224085739998365e-06, "loss": 0.0071, "step": 8830 }, { "epoch": 0.14464534075104313, "grad_norm": 0.07922352105379105, "learning_rate": 7.232267037552156e-06, "loss": 0.0046, "step": 8840 }, { "epoch": 0.14480896670211896, "grad_norm": 0.22944341599941254, "learning_rate": 7.2404483351059484e-06, "loss": 0.0076, "step": 8850 }, { "epoch": 0.1449725926531948, "grad_norm": 0.19778549671173096, "learning_rate": 7.248629632659741e-06, "loss": 0.0081, "step": 8860 }, { "epoch": 0.14513621860427064, "grad_norm": 0.20291730761528015, "learning_rate": 7.256810930213533e-06, "loss": 0.0056, "step": 8870 }, { "epoch": 0.14529984455534647, "grad_norm": 0.17763963341712952, "learning_rate": 7.264992227767324e-06, "loss": 0.0075, "step": 8880 }, { "epoch": 0.14546347050642233, "grad_norm": 0.35394352674484253, "learning_rate": 7.273173525321117e-06, "loss": 0.0061, "step": 8890 }, { "epoch": 0.14562709645749816, "grad_norm": 0.3305015563964844, "learning_rate": 7.281354822874909e-06, "loss": 0.0058, "step": 8900 }, { "epoch": 0.145790722408574, "grad_norm": 0.25384292006492615, "learning_rate": 7.289536120428701e-06, "loss": 0.0051, "step": 8910 }, { "epoch": 0.14595434835964985, "grad_norm": 0.9204688668251038, "learning_rate": 7.2977174179824926e-06, "loss": 0.0081, "step": 8920 }, { "epoch": 0.14611797431072568, "grad_norm": 0.20166493952274323, "learning_rate": 7.305898715536285e-06, "loss": 0.0073, "step": 8930 }, { "epoch": 0.1462816002618015, "grad_norm": 0.2840687930583954, "learning_rate": 7.314080013090077e-06, "loss": 0.0054, "step": 8940 }, { "epoch": 0.14644522621287737, "grad_norm": 0.2120349258184433, "learning_rate": 7.3222613106438685e-06, "loss": 0.0055, "step": 8950 }, { "epoch": 0.1466088521639532, "grad_norm": 0.29277750849723816, "learning_rate": 7.330442608197661e-06, "loss": 0.0047, "step": 8960 }, { "epoch": 0.14677247811502905, "grad_norm": 0.5808455348014832, "learning_rate": 7.338623905751453e-06, "loss": 0.007, "step": 8970 }, { "epoch": 0.14693610406610488, "grad_norm": 0.2462371289730072, "learning_rate": 7.346805203305245e-06, "loss": 0.0057, "step": 8980 }, { "epoch": 0.1470997300171807, "grad_norm": 0.44763612747192383, "learning_rate": 7.354986500859037e-06, "loss": 0.0056, "step": 8990 }, { "epoch": 0.14726335596825657, "grad_norm": 0.230062335729599, "learning_rate": 7.363167798412829e-06, "loss": 0.0056, "step": 9000 }, { "epoch": 0.1474269819193324, "grad_norm": 0.25067564845085144, "learning_rate": 7.371349095966621e-06, "loss": 0.0074, "step": 9010 }, { "epoch": 0.14759060787040826, "grad_norm": 0.2506806254386902, "learning_rate": 7.3795303935204135e-06, "loss": 0.0059, "step": 9020 }, { "epoch": 0.1477542338214841, "grad_norm": 0.35419762134552, "learning_rate": 7.387711691074205e-06, "loss": 0.0065, "step": 9030 }, { "epoch": 0.14791785977255992, "grad_norm": 0.33476555347442627, "learning_rate": 7.395892988627997e-06, "loss": 0.0065, "step": 9040 }, { "epoch": 0.14808148572363578, "grad_norm": 0.22554193437099457, "learning_rate": 7.4040742861817894e-06, "loss": 0.0069, "step": 9050 }, { "epoch": 0.1482451116747116, "grad_norm": 0.19346824288368225, "learning_rate": 7.412255583735582e-06, "loss": 0.0072, "step": 9060 }, { "epoch": 0.14840873762578746, "grad_norm": 0.4060174822807312, "learning_rate": 7.420436881289373e-06, "loss": 0.008, "step": 9070 }, { "epoch": 0.1485723635768633, "grad_norm": 0.32242077589035034, "learning_rate": 7.428618178843165e-06, "loss": 0.0077, "step": 9080 }, { "epoch": 0.14873598952793912, "grad_norm": 0.28858497738838196, "learning_rate": 7.436799476396958e-06, "loss": 0.0046, "step": 9090 }, { "epoch": 0.14889961547901498, "grad_norm": 0.1785849779844284, "learning_rate": 7.44498077395075e-06, "loss": 0.0056, "step": 9100 }, { "epoch": 0.1490632414300908, "grad_norm": 0.12223095446825027, "learning_rate": 7.453162071504541e-06, "loss": 0.0055, "step": 9110 }, { "epoch": 0.14922686738116667, "grad_norm": 0.20449209213256836, "learning_rate": 7.461343369058334e-06, "loss": 0.0101, "step": 9120 }, { "epoch": 0.1493904933322425, "grad_norm": 0.1937507539987564, "learning_rate": 7.469524666612126e-06, "loss": 0.0073, "step": 9130 }, { "epoch": 0.14955411928331833, "grad_norm": 0.15817973017692566, "learning_rate": 7.477705964165918e-06, "loss": 0.0054, "step": 9140 }, { "epoch": 0.14971774523439418, "grad_norm": 0.3396613299846649, "learning_rate": 7.4858872617197095e-06, "loss": 0.0066, "step": 9150 }, { "epoch": 0.14988137118547, "grad_norm": 0.3950789272785187, "learning_rate": 7.494068559273502e-06, "loss": 0.009, "step": 9160 }, { "epoch": 0.15004499713654584, "grad_norm": 0.2544097304344177, "learning_rate": 7.502249856827294e-06, "loss": 0.0076, "step": 9170 }, { "epoch": 0.1502086230876217, "grad_norm": 0.3011780381202698, "learning_rate": 7.510431154381086e-06, "loss": 0.0066, "step": 9180 }, { "epoch": 0.15037224903869753, "grad_norm": 0.2967643737792969, "learning_rate": 7.518612451934878e-06, "loss": 0.006, "step": 9190 }, { "epoch": 0.1505358749897734, "grad_norm": 0.34138962626457214, "learning_rate": 7.52679374948867e-06, "loss": 0.0072, "step": 9200 }, { "epoch": 0.15069950094084922, "grad_norm": 0.36358416080474854, "learning_rate": 7.534975047042462e-06, "loss": 0.0052, "step": 9210 }, { "epoch": 0.15086312689192505, "grad_norm": 0.06522957235574722, "learning_rate": 7.5431563445962545e-06, "loss": 0.0043, "step": 9220 }, { "epoch": 0.1510267528430009, "grad_norm": 0.2662425935268402, "learning_rate": 7.551337642150046e-06, "loss": 0.0073, "step": 9230 }, { "epoch": 0.15119037879407674, "grad_norm": 0.11464164406061172, "learning_rate": 7.559518939703837e-06, "loss": 0.0053, "step": 9240 }, { "epoch": 0.1513540047451526, "grad_norm": 0.13053202629089355, "learning_rate": 7.567700237257629e-06, "loss": 0.004, "step": 9250 }, { "epoch": 0.15151763069622842, "grad_norm": 0.21175773441791534, "learning_rate": 7.575881534811421e-06, "loss": 0.0059, "step": 9260 }, { "epoch": 0.15168125664730425, "grad_norm": 0.11414758116006851, "learning_rate": 7.584062832365213e-06, "loss": 0.0051, "step": 9270 }, { "epoch": 0.1518448825983801, "grad_norm": 0.21577127277851105, "learning_rate": 7.5922441299190056e-06, "loss": 0.0045, "step": 9280 }, { "epoch": 0.15200850854945594, "grad_norm": 0.45842885971069336, "learning_rate": 7.600425427472797e-06, "loss": 0.0055, "step": 9290 }, { "epoch": 0.1521721345005318, "grad_norm": 0.40762707591056824, "learning_rate": 7.608606725026589e-06, "loss": 0.0053, "step": 9300 }, { "epoch": 0.15233576045160763, "grad_norm": 0.1374872475862503, "learning_rate": 7.6167880225803815e-06, "loss": 0.0088, "step": 9310 }, { "epoch": 0.15249938640268346, "grad_norm": 0.23885613679885864, "learning_rate": 7.624969320134174e-06, "loss": 0.0082, "step": 9320 }, { "epoch": 0.15266301235375931, "grad_norm": 0.09828246384859085, "learning_rate": 7.633150617687965e-06, "loss": 0.005, "step": 9330 }, { "epoch": 0.15282663830483514, "grad_norm": 0.39649564027786255, "learning_rate": 7.641331915241758e-06, "loss": 0.0085, "step": 9340 }, { "epoch": 0.15299026425591097, "grad_norm": 0.40593019127845764, "learning_rate": 7.64951321279555e-06, "loss": 0.0127, "step": 9350 }, { "epoch": 0.15315389020698683, "grad_norm": 0.26107296347618103, "learning_rate": 7.657694510349341e-06, "loss": 0.007, "step": 9360 }, { "epoch": 0.15331751615806266, "grad_norm": 0.39782312512397766, "learning_rate": 7.665875807903134e-06, "loss": 0.0084, "step": 9370 }, { "epoch": 0.15348114210913852, "grad_norm": 0.40781745314598083, "learning_rate": 7.674057105456926e-06, "loss": 0.0066, "step": 9380 }, { "epoch": 0.15364476806021435, "grad_norm": 0.27887535095214844, "learning_rate": 7.682238403010717e-06, "loss": 0.0052, "step": 9390 }, { "epoch": 0.15380839401129018, "grad_norm": 0.2981220781803131, "learning_rate": 7.69041970056451e-06, "loss": 0.0063, "step": 9400 }, { "epoch": 0.15397201996236604, "grad_norm": 0.23114238679409027, "learning_rate": 7.698600998118302e-06, "loss": 0.0074, "step": 9410 }, { "epoch": 0.15413564591344187, "grad_norm": 0.26542651653289795, "learning_rate": 7.706782295672095e-06, "loss": 0.0042, "step": 9420 }, { "epoch": 0.15429927186451772, "grad_norm": 0.41108861565589905, "learning_rate": 7.714963593225886e-06, "loss": 0.0072, "step": 9430 }, { "epoch": 0.15446289781559355, "grad_norm": 0.06760261207818985, "learning_rate": 7.723144890779678e-06, "loss": 0.0082, "step": 9440 }, { "epoch": 0.15462652376666938, "grad_norm": 0.4643831253051758, "learning_rate": 7.73132618833347e-06, "loss": 0.0081, "step": 9450 }, { "epoch": 0.15479014971774524, "grad_norm": 0.5153923630714417, "learning_rate": 7.739507485887262e-06, "loss": 0.0078, "step": 9460 }, { "epoch": 0.15495377566882107, "grad_norm": 0.45792943239212036, "learning_rate": 7.747688783441053e-06, "loss": 0.0071, "step": 9470 }, { "epoch": 0.15511740161989693, "grad_norm": 0.27561572194099426, "learning_rate": 7.755870080994847e-06, "loss": 0.0083, "step": 9480 }, { "epoch": 0.15528102757097276, "grad_norm": 0.08852320164442062, "learning_rate": 7.764051378548638e-06, "loss": 0.007, "step": 9490 }, { "epoch": 0.1554446535220486, "grad_norm": 0.42838406562805176, "learning_rate": 7.772232676102431e-06, "loss": 0.0056, "step": 9500 }, { "epoch": 0.15560827947312444, "grad_norm": 0.5333880186080933, "learning_rate": 7.780413973656223e-06, "loss": 0.0058, "step": 9510 }, { "epoch": 0.15577190542420027, "grad_norm": 0.12857618927955627, "learning_rate": 7.788595271210014e-06, "loss": 0.0043, "step": 9520 }, { "epoch": 0.15593553137527613, "grad_norm": 0.2167769968509674, "learning_rate": 7.796776568763807e-06, "loss": 0.008, "step": 9530 }, { "epoch": 0.15609915732635196, "grad_norm": 0.17934668064117432, "learning_rate": 7.804957866317598e-06, "loss": 0.0057, "step": 9540 }, { "epoch": 0.1562627832774278, "grad_norm": 0.22155557572841644, "learning_rate": 7.81313916387139e-06, "loss": 0.006, "step": 9550 }, { "epoch": 0.15642640922850365, "grad_norm": 0.3502984344959259, "learning_rate": 7.821320461425183e-06, "loss": 0.0058, "step": 9560 }, { "epoch": 0.15659003517957948, "grad_norm": 0.07560534030199051, "learning_rate": 7.829501758978974e-06, "loss": 0.0089, "step": 9570 }, { "epoch": 0.1567536611306553, "grad_norm": 0.19566108286380768, "learning_rate": 7.837683056532766e-06, "loss": 0.0056, "step": 9580 }, { "epoch": 0.15691728708173117, "grad_norm": 0.17508867383003235, "learning_rate": 7.845864354086559e-06, "loss": 0.0077, "step": 9590 }, { "epoch": 0.157080913032807, "grad_norm": 0.31950974464416504, "learning_rate": 7.85404565164035e-06, "loss": 0.0097, "step": 9600 }, { "epoch": 0.15724453898388285, "grad_norm": 0.210434690117836, "learning_rate": 7.862226949194143e-06, "loss": 0.0068, "step": 9610 }, { "epoch": 0.15740816493495868, "grad_norm": 0.13805869221687317, "learning_rate": 7.870408246747935e-06, "loss": 0.0054, "step": 9620 }, { "epoch": 0.1575717908860345, "grad_norm": 0.1168903037905693, "learning_rate": 7.878589544301726e-06, "loss": 0.0039, "step": 9630 }, { "epoch": 0.15773541683711037, "grad_norm": 0.48040199279785156, "learning_rate": 7.88677084185552e-06, "loss": 0.0092, "step": 9640 }, { "epoch": 0.1578990427881862, "grad_norm": 0.20810607075691223, "learning_rate": 7.89495213940931e-06, "loss": 0.0067, "step": 9650 }, { "epoch": 0.15806266873926206, "grad_norm": 0.2741049826145172, "learning_rate": 7.903133436963102e-06, "loss": 0.0068, "step": 9660 }, { "epoch": 0.1582262946903379, "grad_norm": 0.25321799516677856, "learning_rate": 7.911314734516895e-06, "loss": 0.0078, "step": 9670 }, { "epoch": 0.15838992064141372, "grad_norm": 0.14259810745716095, "learning_rate": 7.919496032070687e-06, "loss": 0.0073, "step": 9680 }, { "epoch": 0.15855354659248957, "grad_norm": 0.29586896300315857, "learning_rate": 7.92767732962448e-06, "loss": 0.0094, "step": 9690 }, { "epoch": 0.1587171725435654, "grad_norm": 0.8661742806434631, "learning_rate": 7.935858627178271e-06, "loss": 0.0058, "step": 9700 }, { "epoch": 0.15888079849464126, "grad_norm": 0.4148622155189514, "learning_rate": 7.944039924732063e-06, "loss": 0.006, "step": 9710 }, { "epoch": 0.1590444244457171, "grad_norm": 0.2772292196750641, "learning_rate": 7.952221222285856e-06, "loss": 0.0055, "step": 9720 }, { "epoch": 0.15920805039679292, "grad_norm": 0.3952766954898834, "learning_rate": 7.960402519839647e-06, "loss": 0.0064, "step": 9730 }, { "epoch": 0.15937167634786878, "grad_norm": 0.19145947694778442, "learning_rate": 7.968583817393439e-06, "loss": 0.0039, "step": 9740 }, { "epoch": 0.1595353022989446, "grad_norm": 0.255990594625473, "learning_rate": 7.976765114947232e-06, "loss": 0.0056, "step": 9750 }, { "epoch": 0.15969892825002047, "grad_norm": 0.05305280536413193, "learning_rate": 7.984946412501023e-06, "loss": 0.0047, "step": 9760 }, { "epoch": 0.1598625542010963, "grad_norm": 0.07129017263650894, "learning_rate": 7.993127710054816e-06, "loss": 0.0054, "step": 9770 }, { "epoch": 0.16002618015217213, "grad_norm": 0.2662150263786316, "learning_rate": 8.001309007608608e-06, "loss": 0.0058, "step": 9780 }, { "epoch": 0.16018980610324798, "grad_norm": 0.3693317174911499, "learning_rate": 8.009490305162399e-06, "loss": 0.0065, "step": 9790 }, { "epoch": 0.1603534320543238, "grad_norm": 0.24876077473163605, "learning_rate": 8.017671602716192e-06, "loss": 0.0081, "step": 9800 }, { "epoch": 0.16051705800539964, "grad_norm": 0.14239034056663513, "learning_rate": 8.025852900269984e-06, "loss": 0.0067, "step": 9810 }, { "epoch": 0.1606806839564755, "grad_norm": 0.6243664622306824, "learning_rate": 8.034034197823775e-06, "loss": 0.0057, "step": 9820 }, { "epoch": 0.16084430990755133, "grad_norm": 0.5805561542510986, "learning_rate": 8.042215495377568e-06, "loss": 0.0074, "step": 9830 }, { "epoch": 0.1610079358586272, "grad_norm": 0.18034577369689941, "learning_rate": 8.05039679293136e-06, "loss": 0.0053, "step": 9840 }, { "epoch": 0.16117156180970302, "grad_norm": 0.20209714770317078, "learning_rate": 8.058578090485151e-06, "loss": 0.0087, "step": 9850 }, { "epoch": 0.16133518776077885, "grad_norm": 0.2705497443675995, "learning_rate": 8.066759388038944e-06, "loss": 0.0078, "step": 9860 }, { "epoch": 0.1614988137118547, "grad_norm": 0.35468733310699463, "learning_rate": 8.074940685592736e-06, "loss": 0.0074, "step": 9870 }, { "epoch": 0.16166243966293053, "grad_norm": 0.14945657551288605, "learning_rate": 8.083121983146529e-06, "loss": 0.0077, "step": 9880 }, { "epoch": 0.1618260656140064, "grad_norm": 0.2539266347885132, "learning_rate": 8.09130328070032e-06, "loss": 0.0032, "step": 9890 }, { "epoch": 0.16198969156508222, "grad_norm": 0.2711421549320221, "learning_rate": 8.099484578254111e-06, "loss": 0.0085, "step": 9900 }, { "epoch": 0.16215331751615805, "grad_norm": 0.2099442034959793, "learning_rate": 8.107665875807905e-06, "loss": 0.0075, "step": 9910 }, { "epoch": 0.1623169434672339, "grad_norm": 0.11686094850301743, "learning_rate": 8.115847173361696e-06, "loss": 0.0059, "step": 9920 }, { "epoch": 0.16248056941830974, "grad_norm": 0.18301193416118622, "learning_rate": 8.124028470915487e-06, "loss": 0.0068, "step": 9930 }, { "epoch": 0.1626441953693856, "grad_norm": 0.1897609382867813, "learning_rate": 8.13220976846928e-06, "loss": 0.0079, "step": 9940 }, { "epoch": 0.16280782132046143, "grad_norm": 0.32352903485298157, "learning_rate": 8.140391066023072e-06, "loss": 0.0074, "step": 9950 }, { "epoch": 0.16297144727153726, "grad_norm": 0.10327289253473282, "learning_rate": 8.148572363576865e-06, "loss": 0.0032, "step": 9960 }, { "epoch": 0.16313507322261311, "grad_norm": 0.18615023791790009, "learning_rate": 8.156753661130656e-06, "loss": 0.0085, "step": 9970 }, { "epoch": 0.16329869917368894, "grad_norm": 0.26965397596359253, "learning_rate": 8.164934958684448e-06, "loss": 0.0055, "step": 9980 }, { "epoch": 0.1634623251247648, "grad_norm": 0.15522529184818268, "learning_rate": 8.173116256238241e-06, "loss": 0.007, "step": 9990 }, { "epoch": 0.16362595107584063, "grad_norm": 0.22529001533985138, "learning_rate": 8.181297553792032e-06, "loss": 0.005, "step": 10000 }, { "epoch": 0.16378957702691646, "grad_norm": 0.24302367866039276, "learning_rate": 8.189478851345824e-06, "loss": 0.0082, "step": 10010 }, { "epoch": 0.16395320297799232, "grad_norm": 0.25101688504219055, "learning_rate": 8.197660148899617e-06, "loss": 0.0071, "step": 10020 }, { "epoch": 0.16411682892906815, "grad_norm": 0.3425748348236084, "learning_rate": 8.205841446453408e-06, "loss": 0.0091, "step": 10030 }, { "epoch": 0.16428045488014398, "grad_norm": 0.270454466342926, "learning_rate": 8.214022744007201e-06, "loss": 0.0054, "step": 10040 }, { "epoch": 0.16444408083121984, "grad_norm": 0.21871308982372284, "learning_rate": 8.222204041560993e-06, "loss": 0.0063, "step": 10050 }, { "epoch": 0.16460770678229567, "grad_norm": 0.21748510003089905, "learning_rate": 8.230385339114784e-06, "loss": 0.0057, "step": 10060 }, { "epoch": 0.16477133273337152, "grad_norm": 0.365843266248703, "learning_rate": 8.238566636668577e-06, "loss": 0.007, "step": 10070 }, { "epoch": 0.16493495868444735, "grad_norm": 0.20976294577121735, "learning_rate": 8.246747934222369e-06, "loss": 0.0066, "step": 10080 }, { "epoch": 0.16509858463552318, "grad_norm": 0.37279391288757324, "learning_rate": 8.25492923177616e-06, "loss": 0.0063, "step": 10090 }, { "epoch": 0.16526221058659904, "grad_norm": 0.22895392775535583, "learning_rate": 8.263110529329953e-06, "loss": 0.0065, "step": 10100 }, { "epoch": 0.16542583653767487, "grad_norm": 0.27912771701812744, "learning_rate": 8.271291826883745e-06, "loss": 0.0091, "step": 10110 }, { "epoch": 0.16558946248875073, "grad_norm": 0.4469371736049652, "learning_rate": 8.279473124437536e-06, "loss": 0.0064, "step": 10120 }, { "epoch": 0.16575308843982656, "grad_norm": 0.3388589322566986, "learning_rate": 8.287654421991328e-06, "loss": 0.0076, "step": 10130 }, { "epoch": 0.1659167143909024, "grad_norm": 0.22695420682430267, "learning_rate": 8.29583571954512e-06, "loss": 0.0059, "step": 10140 }, { "epoch": 0.16608034034197824, "grad_norm": 0.1658775806427002, "learning_rate": 8.304017017098912e-06, "loss": 0.0088, "step": 10150 }, { "epoch": 0.16624396629305407, "grad_norm": 0.2975265383720398, "learning_rate": 8.312198314652704e-06, "loss": 0.0076, "step": 10160 }, { "epoch": 0.16640759224412993, "grad_norm": 0.1491502821445465, "learning_rate": 8.320379612206497e-06, "loss": 0.0063, "step": 10170 }, { "epoch": 0.16657121819520576, "grad_norm": 0.2581508457660675, "learning_rate": 8.328560909760288e-06, "loss": 0.0082, "step": 10180 }, { "epoch": 0.1667348441462816, "grad_norm": 0.17519281804561615, "learning_rate": 8.33674220731408e-06, "loss": 0.0042, "step": 10190 }, { "epoch": 0.16689847009735745, "grad_norm": 0.1720351129770279, "learning_rate": 8.344923504867873e-06, "loss": 0.0056, "step": 10200 }, { "epoch": 0.16706209604843328, "grad_norm": 0.22692690789699554, "learning_rate": 8.353104802421664e-06, "loss": 0.0077, "step": 10210 }, { "epoch": 0.16722572199950914, "grad_norm": 0.16719768941402435, "learning_rate": 8.361286099975457e-06, "loss": 0.0058, "step": 10220 }, { "epoch": 0.16738934795058497, "grad_norm": 0.21581275761127472, "learning_rate": 8.369467397529249e-06, "loss": 0.0065, "step": 10230 }, { "epoch": 0.1675529739016608, "grad_norm": 0.40083274245262146, "learning_rate": 8.37764869508304e-06, "loss": 0.0083, "step": 10240 }, { "epoch": 0.16771659985273665, "grad_norm": 0.5954267382621765, "learning_rate": 8.385829992636833e-06, "loss": 0.0063, "step": 10250 }, { "epoch": 0.16788022580381248, "grad_norm": 0.3549838066101074, "learning_rate": 8.394011290190624e-06, "loss": 0.0068, "step": 10260 }, { "epoch": 0.1680438517548883, "grad_norm": 0.1814076453447342, "learning_rate": 8.402192587744416e-06, "loss": 0.0105, "step": 10270 }, { "epoch": 0.16820747770596417, "grad_norm": 0.19932052493095398, "learning_rate": 8.410373885298209e-06, "loss": 0.0082, "step": 10280 }, { "epoch": 0.16837110365704, "grad_norm": 0.1761980503797531, "learning_rate": 8.418555182852e-06, "loss": 0.0059, "step": 10290 }, { "epoch": 0.16853472960811586, "grad_norm": 0.2077781707048416, "learning_rate": 8.426736480405793e-06, "loss": 0.0082, "step": 10300 }, { "epoch": 0.1686983555591917, "grad_norm": 0.3204112946987152, "learning_rate": 8.434917777959585e-06, "loss": 0.0067, "step": 10310 }, { "epoch": 0.16886198151026752, "grad_norm": 0.08514495193958282, "learning_rate": 8.443099075513376e-06, "loss": 0.007, "step": 10320 }, { "epoch": 0.16902560746134337, "grad_norm": 0.16202858090400696, "learning_rate": 8.45128037306717e-06, "loss": 0.0086, "step": 10330 }, { "epoch": 0.1691892334124192, "grad_norm": 0.3120019733905792, "learning_rate": 8.459461670620961e-06, "loss": 0.0054, "step": 10340 }, { "epoch": 0.16935285936349506, "grad_norm": 0.7790592312812805, "learning_rate": 8.467642968174752e-06, "loss": 0.0085, "step": 10350 }, { "epoch": 0.1695164853145709, "grad_norm": 0.6006662249565125, "learning_rate": 8.475824265728545e-06, "loss": 0.0081, "step": 10360 }, { "epoch": 0.16968011126564672, "grad_norm": 0.34162044525146484, "learning_rate": 8.484005563282337e-06, "loss": 0.0061, "step": 10370 }, { "epoch": 0.16984373721672258, "grad_norm": 0.061774272471666336, "learning_rate": 8.49218686083613e-06, "loss": 0.0069, "step": 10380 }, { "epoch": 0.1700073631677984, "grad_norm": 0.22786760330200195, "learning_rate": 8.500368158389921e-06, "loss": 0.0114, "step": 10390 }, { "epoch": 0.17017098911887427, "grad_norm": 0.3290214240550995, "learning_rate": 8.508549455943713e-06, "loss": 0.0063, "step": 10400 }, { "epoch": 0.1703346150699501, "grad_norm": 0.19510148465633392, "learning_rate": 8.516730753497506e-06, "loss": 0.0063, "step": 10410 }, { "epoch": 0.17049824102102593, "grad_norm": 0.07396508753299713, "learning_rate": 8.524912051051297e-06, "loss": 0.0051, "step": 10420 }, { "epoch": 0.17066186697210178, "grad_norm": 0.25472307205200195, "learning_rate": 8.533093348605089e-06, "loss": 0.0079, "step": 10430 }, { "epoch": 0.1708254929231776, "grad_norm": 0.18875336647033691, "learning_rate": 8.541274646158882e-06, "loss": 0.004, "step": 10440 }, { "epoch": 0.17098911887425344, "grad_norm": 0.21490013599395752, "learning_rate": 8.549455943712673e-06, "loss": 0.0047, "step": 10450 }, { "epoch": 0.1711527448253293, "grad_norm": 0.08458748459815979, "learning_rate": 8.557637241266465e-06, "loss": 0.0075, "step": 10460 }, { "epoch": 0.17131637077640513, "grad_norm": 0.29996681213378906, "learning_rate": 8.565818538820258e-06, "loss": 0.0066, "step": 10470 }, { "epoch": 0.171479996727481, "grad_norm": 0.17372117936611176, "learning_rate": 8.573999836374049e-06, "loss": 0.008, "step": 10480 }, { "epoch": 0.17164362267855682, "grad_norm": 0.21046558022499084, "learning_rate": 8.582181133927842e-06, "loss": 0.0099, "step": 10490 }, { "epoch": 0.17180724862963265, "grad_norm": 0.48645058274269104, "learning_rate": 8.590362431481634e-06, "loss": 0.0048, "step": 10500 }, { "epoch": 0.1719708745807085, "grad_norm": 0.16859287023544312, "learning_rate": 8.598543729035425e-06, "loss": 0.0076, "step": 10510 }, { "epoch": 0.17213450053178433, "grad_norm": 0.37864935398101807, "learning_rate": 8.606725026589218e-06, "loss": 0.0075, "step": 10520 }, { "epoch": 0.1722981264828602, "grad_norm": 0.2491617202758789, "learning_rate": 8.61490632414301e-06, "loss": 0.0059, "step": 10530 }, { "epoch": 0.17246175243393602, "grad_norm": 0.29351696372032166, "learning_rate": 8.623087621696801e-06, "loss": 0.0086, "step": 10540 }, { "epoch": 0.17262537838501185, "grad_norm": 0.30968332290649414, "learning_rate": 8.631268919250594e-06, "loss": 0.0075, "step": 10550 }, { "epoch": 0.1727890043360877, "grad_norm": 0.11242213845252991, "learning_rate": 8.639450216804386e-06, "loss": 0.007, "step": 10560 }, { "epoch": 0.17295263028716354, "grad_norm": 0.0518312007188797, "learning_rate": 8.647631514358179e-06, "loss": 0.0079, "step": 10570 }, { "epoch": 0.1731162562382394, "grad_norm": 0.14475904405117035, "learning_rate": 8.65581281191197e-06, "loss": 0.0055, "step": 10580 }, { "epoch": 0.17327988218931523, "grad_norm": 0.08665602654218674, "learning_rate": 8.663994109465761e-06, "loss": 0.0061, "step": 10590 }, { "epoch": 0.17344350814039106, "grad_norm": 0.23037134110927582, "learning_rate": 8.672175407019555e-06, "loss": 0.0073, "step": 10600 }, { "epoch": 0.1736071340914669, "grad_norm": 0.33483126759529114, "learning_rate": 8.680356704573346e-06, "loss": 0.0034, "step": 10610 }, { "epoch": 0.17377076004254274, "grad_norm": 0.3462408781051636, "learning_rate": 8.688538002127137e-06, "loss": 0.0058, "step": 10620 }, { "epoch": 0.1739343859936186, "grad_norm": 0.2189795821905136, "learning_rate": 8.69671929968093e-06, "loss": 0.005, "step": 10630 }, { "epoch": 0.17409801194469443, "grad_norm": 0.14973264932632446, "learning_rate": 8.704900597234722e-06, "loss": 0.0064, "step": 10640 }, { "epoch": 0.17426163789577026, "grad_norm": 0.2500171959400177, "learning_rate": 8.713081894788515e-06, "loss": 0.0052, "step": 10650 }, { "epoch": 0.17442526384684612, "grad_norm": 0.349170982837677, "learning_rate": 8.721263192342306e-06, "loss": 0.005, "step": 10660 }, { "epoch": 0.17458888979792195, "grad_norm": 0.22084467113018036, "learning_rate": 8.729444489896098e-06, "loss": 0.0045, "step": 10670 }, { "epoch": 0.17475251574899778, "grad_norm": 0.2274869978427887, "learning_rate": 8.737625787449891e-06, "loss": 0.0084, "step": 10680 }, { "epoch": 0.17491614170007364, "grad_norm": 0.24188733100891113, "learning_rate": 8.745807085003682e-06, "loss": 0.0045, "step": 10690 }, { "epoch": 0.17507976765114946, "grad_norm": 0.16240046918392181, "learning_rate": 8.753988382557474e-06, "loss": 0.0059, "step": 10700 }, { "epoch": 0.17524339360222532, "grad_norm": 0.12881557643413544, "learning_rate": 8.762169680111267e-06, "loss": 0.0041, "step": 10710 }, { "epoch": 0.17540701955330115, "grad_norm": 0.24191954731941223, "learning_rate": 8.770350977665058e-06, "loss": 0.0065, "step": 10720 }, { "epoch": 0.17557064550437698, "grad_norm": 0.08183182030916214, "learning_rate": 8.778532275218851e-06, "loss": 0.0063, "step": 10730 }, { "epoch": 0.17573427145545284, "grad_norm": 0.29195988178253174, "learning_rate": 8.786713572772643e-06, "loss": 0.0059, "step": 10740 }, { "epoch": 0.17589789740652867, "grad_norm": 0.19651706516742706, "learning_rate": 8.794894870326434e-06, "loss": 0.0034, "step": 10750 }, { "epoch": 0.17606152335760453, "grad_norm": 0.34899449348449707, "learning_rate": 8.803076167880227e-06, "loss": 0.0067, "step": 10760 }, { "epoch": 0.17622514930868036, "grad_norm": 0.06525447964668274, "learning_rate": 8.811257465434019e-06, "loss": 0.0054, "step": 10770 }, { "epoch": 0.1763887752597562, "grad_norm": 0.34989553689956665, "learning_rate": 8.81943876298781e-06, "loss": 0.0051, "step": 10780 }, { "epoch": 0.17655240121083204, "grad_norm": 0.3499177396297455, "learning_rate": 8.827620060541603e-06, "loss": 0.0053, "step": 10790 }, { "epoch": 0.17671602716190787, "grad_norm": 0.02204432711005211, "learning_rate": 8.835801358095395e-06, "loss": 0.004, "step": 10800 }, { "epoch": 0.17687965311298373, "grad_norm": 0.17593590915203094, "learning_rate": 8.843982655649186e-06, "loss": 0.0057, "step": 10810 }, { "epoch": 0.17704327906405956, "grad_norm": 0.034615494310855865, "learning_rate": 8.85216395320298e-06, "loss": 0.0073, "step": 10820 }, { "epoch": 0.1772069050151354, "grad_norm": 0.22318828105926514, "learning_rate": 8.86034525075677e-06, "loss": 0.0044, "step": 10830 }, { "epoch": 0.17737053096621125, "grad_norm": 0.24265556037425995, "learning_rate": 8.868526548310564e-06, "loss": 0.0039, "step": 10840 }, { "epoch": 0.17753415691728708, "grad_norm": 0.06994540989398956, "learning_rate": 8.876707845864355e-06, "loss": 0.0065, "step": 10850 }, { "epoch": 0.17769778286836294, "grad_norm": 0.2045663446187973, "learning_rate": 8.884889143418147e-06, "loss": 0.0058, "step": 10860 }, { "epoch": 0.17786140881943877, "grad_norm": 0.38222843408584595, "learning_rate": 8.89307044097194e-06, "loss": 0.0073, "step": 10870 }, { "epoch": 0.1780250347705146, "grad_norm": 0.29041460156440735, "learning_rate": 8.901251738525731e-06, "loss": 0.0054, "step": 10880 }, { "epoch": 0.17818866072159045, "grad_norm": 0.2893683910369873, "learning_rate": 8.909433036079523e-06, "loss": 0.0059, "step": 10890 }, { "epoch": 0.17835228667266628, "grad_norm": 0.3193666338920593, "learning_rate": 8.917614333633316e-06, "loss": 0.0057, "step": 10900 }, { "epoch": 0.1785159126237421, "grad_norm": 0.4353903830051422, "learning_rate": 8.925795631187107e-06, "loss": 0.0065, "step": 10910 }, { "epoch": 0.17867953857481797, "grad_norm": 0.15185204148292542, "learning_rate": 8.9339769287409e-06, "loss": 0.0052, "step": 10920 }, { "epoch": 0.1788431645258938, "grad_norm": 0.01609669253230095, "learning_rate": 8.942158226294692e-06, "loss": 0.0075, "step": 10930 }, { "epoch": 0.17900679047696966, "grad_norm": 0.20663630962371826, "learning_rate": 8.950339523848483e-06, "loss": 0.0053, "step": 10940 }, { "epoch": 0.1791704164280455, "grad_norm": 0.17413762211799622, "learning_rate": 8.958520821402276e-06, "loss": 0.0077, "step": 10950 }, { "epoch": 0.17933404237912132, "grad_norm": 0.18656152486801147, "learning_rate": 8.966702118956068e-06, "loss": 0.0076, "step": 10960 }, { "epoch": 0.17949766833019717, "grad_norm": 0.2067999392747879, "learning_rate": 8.974883416509859e-06, "loss": 0.0071, "step": 10970 }, { "epoch": 0.179661294281273, "grad_norm": 0.182582288980484, "learning_rate": 8.983064714063652e-06, "loss": 0.0052, "step": 10980 }, { "epoch": 0.17982492023234886, "grad_norm": 0.14651651680469513, "learning_rate": 8.991246011617444e-06, "loss": 0.0074, "step": 10990 }, { "epoch": 0.1799885461834247, "grad_norm": 0.15958207845687866, "learning_rate": 8.999427309171235e-06, "loss": 0.0069, "step": 11000 }, { "epoch": 0.18015217213450052, "grad_norm": 0.2562597990036011, "learning_rate": 9.007608606725026e-06, "loss": 0.0059, "step": 11010 }, { "epoch": 0.18031579808557638, "grad_norm": 0.19369298219680786, "learning_rate": 9.01578990427882e-06, "loss": 0.0074, "step": 11020 }, { "epoch": 0.1804794240366522, "grad_norm": 0.06816922128200531, "learning_rate": 9.023971201832611e-06, "loss": 0.0088, "step": 11030 }, { "epoch": 0.18064304998772807, "grad_norm": 0.3692319095134735, "learning_rate": 9.032152499386402e-06, "loss": 0.0062, "step": 11040 }, { "epoch": 0.1808066759388039, "grad_norm": 0.39277228713035583, "learning_rate": 9.040333796940195e-06, "loss": 0.005, "step": 11050 }, { "epoch": 0.18097030188987973, "grad_norm": 0.31312334537506104, "learning_rate": 9.048515094493987e-06, "loss": 0.0078, "step": 11060 }, { "epoch": 0.18113392784095558, "grad_norm": 0.11233723163604736, "learning_rate": 9.056696392047778e-06, "loss": 0.0068, "step": 11070 }, { "epoch": 0.1812975537920314, "grad_norm": 0.21046818792819977, "learning_rate": 9.064877689601571e-06, "loss": 0.0093, "step": 11080 }, { "epoch": 0.18146117974310727, "grad_norm": 0.22377535700798035, "learning_rate": 9.073058987155363e-06, "loss": 0.0075, "step": 11090 }, { "epoch": 0.1816248056941831, "grad_norm": 0.12424563616514206, "learning_rate": 9.081240284709156e-06, "loss": 0.0037, "step": 11100 }, { "epoch": 0.18178843164525893, "grad_norm": 0.41122204065322876, "learning_rate": 9.089421582262947e-06, "loss": 0.0068, "step": 11110 }, { "epoch": 0.1819520575963348, "grad_norm": 0.2757875919342041, "learning_rate": 9.097602879816739e-06, "loss": 0.0064, "step": 11120 }, { "epoch": 0.18211568354741062, "grad_norm": 0.20527689158916473, "learning_rate": 9.105784177370532e-06, "loss": 0.004, "step": 11130 }, { "epoch": 0.18227930949848645, "grad_norm": 0.24801987409591675, "learning_rate": 9.113965474924323e-06, "loss": 0.0058, "step": 11140 }, { "epoch": 0.1824429354495623, "grad_norm": 0.4577740430831909, "learning_rate": 9.122146772478115e-06, "loss": 0.0051, "step": 11150 }, { "epoch": 0.18260656140063813, "grad_norm": 0.0874112993478775, "learning_rate": 9.130328070031908e-06, "loss": 0.0052, "step": 11160 }, { "epoch": 0.182770187351714, "grad_norm": 0.07761070132255554, "learning_rate": 9.1385093675857e-06, "loss": 0.0048, "step": 11170 }, { "epoch": 0.18293381330278982, "grad_norm": 0.17391850054264069, "learning_rate": 9.146690665139492e-06, "loss": 0.0054, "step": 11180 }, { "epoch": 0.18309743925386565, "grad_norm": 0.2146953046321869, "learning_rate": 9.154871962693284e-06, "loss": 0.0066, "step": 11190 }, { "epoch": 0.1832610652049415, "grad_norm": 0.11834721267223358, "learning_rate": 9.163053260247075e-06, "loss": 0.0032, "step": 11200 }, { "epoch": 0.18342469115601734, "grad_norm": 0.3615259826183319, "learning_rate": 9.171234557800868e-06, "loss": 0.0069, "step": 11210 }, { "epoch": 0.1835883171070932, "grad_norm": 0.3771328330039978, "learning_rate": 9.17941585535466e-06, "loss": 0.0053, "step": 11220 }, { "epoch": 0.18375194305816903, "grad_norm": 0.11126955598592758, "learning_rate": 9.187597152908451e-06, "loss": 0.0041, "step": 11230 }, { "epoch": 0.18391556900924486, "grad_norm": 0.29467493295669556, "learning_rate": 9.195778450462244e-06, "loss": 0.0062, "step": 11240 }, { "epoch": 0.1840791949603207, "grad_norm": 0.20356979966163635, "learning_rate": 9.203959748016036e-06, "loss": 0.006, "step": 11250 }, { "epoch": 0.18424282091139654, "grad_norm": 0.12055736780166626, "learning_rate": 9.212141045569829e-06, "loss": 0.0055, "step": 11260 }, { "epoch": 0.1844064468624724, "grad_norm": 0.43545442819595337, "learning_rate": 9.22032234312362e-06, "loss": 0.0068, "step": 11270 }, { "epoch": 0.18457007281354823, "grad_norm": 0.13396865129470825, "learning_rate": 9.228503640677412e-06, "loss": 0.0052, "step": 11280 }, { "epoch": 0.18473369876462406, "grad_norm": 0.2616546154022217, "learning_rate": 9.236684938231205e-06, "loss": 0.0061, "step": 11290 }, { "epoch": 0.18489732471569992, "grad_norm": 0.14696097373962402, "learning_rate": 9.244866235784996e-06, "loss": 0.0057, "step": 11300 }, { "epoch": 0.18506095066677575, "grad_norm": 0.25762873888015747, "learning_rate": 9.253047533338787e-06, "loss": 0.0085, "step": 11310 }, { "epoch": 0.1852245766178516, "grad_norm": 0.1531849205493927, "learning_rate": 9.26122883089258e-06, "loss": 0.0044, "step": 11320 }, { "epoch": 0.18538820256892743, "grad_norm": 0.1525973379611969, "learning_rate": 9.269410128446372e-06, "loss": 0.0061, "step": 11330 }, { "epoch": 0.18555182852000326, "grad_norm": 0.16310057044029236, "learning_rate": 9.277591426000165e-06, "loss": 0.0076, "step": 11340 }, { "epoch": 0.18571545447107912, "grad_norm": 0.1427851766347885, "learning_rate": 9.285772723553957e-06, "loss": 0.0053, "step": 11350 }, { "epoch": 0.18587908042215495, "grad_norm": 0.17253410816192627, "learning_rate": 9.293954021107748e-06, "loss": 0.0069, "step": 11360 }, { "epoch": 0.18604270637323078, "grad_norm": 0.23499998450279236, "learning_rate": 9.302135318661541e-06, "loss": 0.0091, "step": 11370 }, { "epoch": 0.18620633232430664, "grad_norm": 0.3588450253009796, "learning_rate": 9.310316616215332e-06, "loss": 0.0086, "step": 11380 }, { "epoch": 0.18636995827538247, "grad_norm": 0.1670575886964798, "learning_rate": 9.318497913769124e-06, "loss": 0.0093, "step": 11390 }, { "epoch": 0.18653358422645833, "grad_norm": 0.412836492061615, "learning_rate": 9.326679211322917e-06, "loss": 0.0076, "step": 11400 }, { "epoch": 0.18669721017753416, "grad_norm": 0.25285816192626953, "learning_rate": 9.334860508876708e-06, "loss": 0.0057, "step": 11410 }, { "epoch": 0.18686083612861, "grad_norm": 0.5399177670478821, "learning_rate": 9.3430418064305e-06, "loss": 0.0078, "step": 11420 }, { "epoch": 0.18702446207968584, "grad_norm": 0.15615855157375336, "learning_rate": 9.351223103984293e-06, "loss": 0.0056, "step": 11430 }, { "epoch": 0.18718808803076167, "grad_norm": 0.1417391151189804, "learning_rate": 9.359404401538084e-06, "loss": 0.0054, "step": 11440 }, { "epoch": 0.18735171398183753, "grad_norm": 0.414683997631073, "learning_rate": 9.367585699091877e-06, "loss": 0.0097, "step": 11450 }, { "epoch": 0.18751533993291336, "grad_norm": 0.27592965960502625, "learning_rate": 9.375766996645669e-06, "loss": 0.0054, "step": 11460 }, { "epoch": 0.1876789658839892, "grad_norm": 0.08436980098485947, "learning_rate": 9.38394829419946e-06, "loss": 0.0062, "step": 11470 }, { "epoch": 0.18784259183506505, "grad_norm": 0.22398467361927032, "learning_rate": 9.392129591753253e-06, "loss": 0.0058, "step": 11480 }, { "epoch": 0.18800621778614088, "grad_norm": 0.14039935171604156, "learning_rate": 9.400310889307045e-06, "loss": 0.0051, "step": 11490 }, { "epoch": 0.18816984373721674, "grad_norm": 0.17133980989456177, "learning_rate": 9.408492186860836e-06, "loss": 0.0053, "step": 11500 }, { "epoch": 0.18833346968829257, "grad_norm": 0.17555159330368042, "learning_rate": 9.41667348441463e-06, "loss": 0.0048, "step": 11510 }, { "epoch": 0.1884970956393684, "grad_norm": 0.29000338912010193, "learning_rate": 9.42485478196842e-06, "loss": 0.0095, "step": 11520 }, { "epoch": 0.18866072159044425, "grad_norm": 0.1891888976097107, "learning_rate": 9.433036079522214e-06, "loss": 0.0075, "step": 11530 }, { "epoch": 0.18882434754152008, "grad_norm": 0.14998741447925568, "learning_rate": 9.441217377076005e-06, "loss": 0.0061, "step": 11540 }, { "epoch": 0.1889879734925959, "grad_norm": 0.49098023772239685, "learning_rate": 9.449398674629797e-06, "loss": 0.0068, "step": 11550 }, { "epoch": 0.18915159944367177, "grad_norm": 0.14602211117744446, "learning_rate": 9.45757997218359e-06, "loss": 0.0069, "step": 11560 }, { "epoch": 0.1893152253947476, "grad_norm": 0.36751922965049744, "learning_rate": 9.465761269737381e-06, "loss": 0.0084, "step": 11570 }, { "epoch": 0.18947885134582346, "grad_norm": 0.12051521986722946, "learning_rate": 9.473942567291173e-06, "loss": 0.0059, "step": 11580 }, { "epoch": 0.1896424772968993, "grad_norm": 0.10984425246715546, "learning_rate": 9.482123864844966e-06, "loss": 0.0043, "step": 11590 }, { "epoch": 0.18980610324797512, "grad_norm": 0.0915994793176651, "learning_rate": 9.490305162398757e-06, "loss": 0.0078, "step": 11600 }, { "epoch": 0.18996972919905097, "grad_norm": 0.20405228435993195, "learning_rate": 9.49848645995255e-06, "loss": 0.0046, "step": 11610 }, { "epoch": 0.1901333551501268, "grad_norm": 0.13243839144706726, "learning_rate": 9.506667757506342e-06, "loss": 0.0079, "step": 11620 }, { "epoch": 0.19029698110120266, "grad_norm": 0.04314020648598671, "learning_rate": 9.514849055060133e-06, "loss": 0.0044, "step": 11630 }, { "epoch": 0.1904606070522785, "grad_norm": 0.3143502175807953, "learning_rate": 9.523030352613926e-06, "loss": 0.0072, "step": 11640 }, { "epoch": 0.19062423300335432, "grad_norm": 0.18950845301151276, "learning_rate": 9.531211650167718e-06, "loss": 0.0045, "step": 11650 }, { "epoch": 0.19078785895443018, "grad_norm": 0.27728721499443054, "learning_rate": 9.539392947721509e-06, "loss": 0.0052, "step": 11660 }, { "epoch": 0.190951484905506, "grad_norm": 0.10692378878593445, "learning_rate": 9.547574245275302e-06, "loss": 0.0057, "step": 11670 }, { "epoch": 0.19111511085658187, "grad_norm": 0.06206267699599266, "learning_rate": 9.555755542829094e-06, "loss": 0.005, "step": 11680 }, { "epoch": 0.1912787368076577, "grad_norm": 0.3834879696369171, "learning_rate": 9.563936840382887e-06, "loss": 0.0082, "step": 11690 }, { "epoch": 0.19144236275873353, "grad_norm": 0.10655863583087921, "learning_rate": 9.572118137936678e-06, "loss": 0.004, "step": 11700 }, { "epoch": 0.19160598870980938, "grad_norm": 0.34981513023376465, "learning_rate": 9.58029943549047e-06, "loss": 0.0071, "step": 11710 }, { "epoch": 0.1917696146608852, "grad_norm": 0.3693655729293823, "learning_rate": 9.588480733044263e-06, "loss": 0.0063, "step": 11720 }, { "epoch": 0.19193324061196107, "grad_norm": 0.3498647212982178, "learning_rate": 9.596662030598054e-06, "loss": 0.0055, "step": 11730 }, { "epoch": 0.1920968665630369, "grad_norm": 0.2284696251153946, "learning_rate": 9.604843328151845e-06, "loss": 0.0035, "step": 11740 }, { "epoch": 0.19226049251411273, "grad_norm": 0.03513551875948906, "learning_rate": 9.613024625705639e-06, "loss": 0.0061, "step": 11750 }, { "epoch": 0.1924241184651886, "grad_norm": 0.1508800983428955, "learning_rate": 9.62120592325943e-06, "loss": 0.0046, "step": 11760 }, { "epoch": 0.19258774441626442, "grad_norm": 0.3604476749897003, "learning_rate": 9.629387220813221e-06, "loss": 0.0071, "step": 11770 }, { "epoch": 0.19275137036734025, "grad_norm": 0.28478652238845825, "learning_rate": 9.637568518367015e-06, "loss": 0.0051, "step": 11780 }, { "epoch": 0.1929149963184161, "grad_norm": 0.06905519962310791, "learning_rate": 9.645749815920806e-06, "loss": 0.0045, "step": 11790 }, { "epoch": 0.19307862226949193, "grad_norm": 0.15411260724067688, "learning_rate": 9.653931113474599e-06, "loss": 0.0063, "step": 11800 }, { "epoch": 0.1932422482205678, "grad_norm": 1.007348895072937, "learning_rate": 9.66211241102839e-06, "loss": 0.0041, "step": 11810 }, { "epoch": 0.19340587417164362, "grad_norm": 0.29335278272628784, "learning_rate": 9.670293708582182e-06, "loss": 0.0073, "step": 11820 }, { "epoch": 0.19356950012271945, "grad_norm": 0.1454388052225113, "learning_rate": 9.678475006135975e-06, "loss": 0.0054, "step": 11830 }, { "epoch": 0.1937331260737953, "grad_norm": 1.1332452297210693, "learning_rate": 9.686656303689766e-06, "loss": 0.0045, "step": 11840 }, { "epoch": 0.19389675202487114, "grad_norm": 0.3866525888442993, "learning_rate": 9.694837601243558e-06, "loss": 0.0047, "step": 11850 }, { "epoch": 0.194060377975947, "grad_norm": 0.22672446072101593, "learning_rate": 9.703018898797351e-06, "loss": 0.0069, "step": 11860 }, { "epoch": 0.19422400392702283, "grad_norm": 0.22819767892360687, "learning_rate": 9.711200196351142e-06, "loss": 0.0078, "step": 11870 }, { "epoch": 0.19438762987809866, "grad_norm": 0.24415896832942963, "learning_rate": 9.719381493904934e-06, "loss": 0.0073, "step": 11880 }, { "epoch": 0.1945512558291745, "grad_norm": 0.20112735033035278, "learning_rate": 9.727562791458725e-06, "loss": 0.0055, "step": 11890 }, { "epoch": 0.19471488178025034, "grad_norm": 0.4894067049026489, "learning_rate": 9.735744089012518e-06, "loss": 0.0076, "step": 11900 }, { "epoch": 0.1948785077313262, "grad_norm": 0.12639126181602478, "learning_rate": 9.74392538656631e-06, "loss": 0.0038, "step": 11910 }, { "epoch": 0.19504213368240203, "grad_norm": 0.08692676573991776, "learning_rate": 9.752106684120101e-06, "loss": 0.0057, "step": 11920 }, { "epoch": 0.19520575963347786, "grad_norm": 0.07425641268491745, "learning_rate": 9.760287981673894e-06, "loss": 0.004, "step": 11930 }, { "epoch": 0.19536938558455372, "grad_norm": 0.19665846228599548, "learning_rate": 9.768469279227686e-06, "loss": 0.0057, "step": 11940 }, { "epoch": 0.19553301153562955, "grad_norm": 0.29606568813323975, "learning_rate": 9.776650576781477e-06, "loss": 0.0061, "step": 11950 }, { "epoch": 0.1956966374867054, "grad_norm": 0.26980194449424744, "learning_rate": 9.78483187433527e-06, "loss": 0.0041, "step": 11960 }, { "epoch": 0.19586026343778123, "grad_norm": 0.3186739385128021, "learning_rate": 9.793013171889062e-06, "loss": 0.0063, "step": 11970 }, { "epoch": 0.19602388938885706, "grad_norm": 0.24271419644355774, "learning_rate": 9.801194469442855e-06, "loss": 0.0054, "step": 11980 }, { "epoch": 0.19618751533993292, "grad_norm": 0.2628874182701111, "learning_rate": 9.809375766996646e-06, "loss": 0.0099, "step": 11990 }, { "epoch": 0.19635114129100875, "grad_norm": 0.1418476551771164, "learning_rate": 9.817557064550438e-06, "loss": 0.0062, "step": 12000 }, { "epoch": 0.19651476724208458, "grad_norm": 0.06236148253083229, "learning_rate": 9.82573836210423e-06, "loss": 0.0073, "step": 12010 }, { "epoch": 0.19667839319316044, "grad_norm": 0.23187607526779175, "learning_rate": 9.833919659658022e-06, "loss": 0.0072, "step": 12020 }, { "epoch": 0.19684201914423627, "grad_norm": 0.2311960905790329, "learning_rate": 9.842100957211813e-06, "loss": 0.007, "step": 12030 }, { "epoch": 0.19700564509531213, "grad_norm": 0.11824999749660492, "learning_rate": 9.850282254765607e-06, "loss": 0.0069, "step": 12040 }, { "epoch": 0.19716927104638796, "grad_norm": 0.16228464245796204, "learning_rate": 9.858463552319398e-06, "loss": 0.0067, "step": 12050 }, { "epoch": 0.19733289699746379, "grad_norm": 0.23480363190174103, "learning_rate": 9.866644849873191e-06, "loss": 0.0101, "step": 12060 }, { "epoch": 0.19749652294853964, "grad_norm": 0.1719197928905487, "learning_rate": 9.874826147426983e-06, "loss": 0.007, "step": 12070 }, { "epoch": 0.19766014889961547, "grad_norm": 0.12306105345487595, "learning_rate": 9.883007444980774e-06, "loss": 0.0065, "step": 12080 }, { "epoch": 0.19782377485069133, "grad_norm": 0.37918901443481445, "learning_rate": 9.891188742534567e-06, "loss": 0.0064, "step": 12090 }, { "epoch": 0.19798740080176716, "grad_norm": 0.3092987835407257, "learning_rate": 9.899370040088358e-06, "loss": 0.0077, "step": 12100 }, { "epoch": 0.198151026752843, "grad_norm": 0.14266997575759888, "learning_rate": 9.90755133764215e-06, "loss": 0.0053, "step": 12110 }, { "epoch": 0.19831465270391885, "grad_norm": 0.24467043578624725, "learning_rate": 9.915732635195943e-06, "loss": 0.0052, "step": 12120 }, { "epoch": 0.19847827865499468, "grad_norm": 0.01567120850086212, "learning_rate": 9.923913932749734e-06, "loss": 0.0041, "step": 12130 }, { "epoch": 0.19864190460607054, "grad_norm": 0.26190006732940674, "learning_rate": 9.932095230303527e-06, "loss": 0.0062, "step": 12140 }, { "epoch": 0.19880553055714636, "grad_norm": 0.5142116546630859, "learning_rate": 9.940276527857319e-06, "loss": 0.0076, "step": 12150 }, { "epoch": 0.1989691565082222, "grad_norm": 0.1658213585615158, "learning_rate": 9.94845782541111e-06, "loss": 0.0044, "step": 12160 }, { "epoch": 0.19913278245929805, "grad_norm": 0.17540432512760162, "learning_rate": 9.956639122964903e-06, "loss": 0.0099, "step": 12170 }, { "epoch": 0.19929640841037388, "grad_norm": 0.30687403678894043, "learning_rate": 9.964820420518695e-06, "loss": 0.0079, "step": 12180 }, { "epoch": 0.19946003436144974, "grad_norm": 0.14110256731510162, "learning_rate": 9.973001718072486e-06, "loss": 0.005, "step": 12190 }, { "epoch": 0.19962366031252557, "grad_norm": 0.33628714084625244, "learning_rate": 9.98118301562628e-06, "loss": 0.0058, "step": 12200 }, { "epoch": 0.1997872862636014, "grad_norm": 0.0796908438205719, "learning_rate": 9.98936431318007e-06, "loss": 0.0037, "step": 12210 }, { "epoch": 0.19995091221467726, "grad_norm": 0.5572142004966736, "learning_rate": 9.997545610733864e-06, "loss": 0.0068, "step": 12220 }, { "epoch": 0.2001145381657531, "grad_norm": 0.3793887197971344, "learning_rate": 9.999999900093168e-06, "loss": 0.0065, "step": 12230 }, { "epoch": 0.20027816411682892, "grad_norm": 0.19541969895362854, "learning_rate": 9.99999941075359e-06, "loss": 0.0071, "step": 12240 }, { "epoch": 0.20044179006790477, "grad_norm": 0.24930810928344727, "learning_rate": 9.999998513631071e-06, "loss": 0.0067, "step": 12250 }, { "epoch": 0.2006054160189806, "grad_norm": 0.26440510153770447, "learning_rate": 9.999997208725685e-06, "loss": 0.0038, "step": 12260 }, { "epoch": 0.20076904197005646, "grad_norm": 0.12102105468511581, "learning_rate": 9.999995496037538e-06, "loss": 0.0036, "step": 12270 }, { "epoch": 0.2009326679211323, "grad_norm": 0.276606947183609, "learning_rate": 9.99999337556677e-06, "loss": 0.0052, "step": 12280 }, { "epoch": 0.20109629387220812, "grad_norm": 0.2862796187400818, "learning_rate": 9.999990847313552e-06, "loss": 0.0082, "step": 12290 }, { "epoch": 0.20125991982328398, "grad_norm": 0.16471347212791443, "learning_rate": 9.999987911278094e-06, "loss": 0.0056, "step": 12300 }, { "epoch": 0.2014235457743598, "grad_norm": 0.30309781432151794, "learning_rate": 9.999984567460635e-06, "loss": 0.0062, "step": 12310 }, { "epoch": 0.20158717172543567, "grad_norm": 0.12292275577783585, "learning_rate": 9.999980815861443e-06, "loss": 0.0054, "step": 12320 }, { "epoch": 0.2017507976765115, "grad_norm": 0.29708683490753174, "learning_rate": 9.999976656480828e-06, "loss": 0.0073, "step": 12330 }, { "epoch": 0.20191442362758732, "grad_norm": 0.17786575853824615, "learning_rate": 9.999972089319127e-06, "loss": 0.0049, "step": 12340 }, { "epoch": 0.20207804957866318, "grad_norm": 0.3271494209766388, "learning_rate": 9.999967114376716e-06, "loss": 0.0059, "step": 12350 }, { "epoch": 0.202241675529739, "grad_norm": 0.18429258465766907, "learning_rate": 9.999961731653998e-06, "loss": 0.0049, "step": 12360 }, { "epoch": 0.20240530148081487, "grad_norm": 0.09776465594768524, "learning_rate": 9.99995594115141e-06, "loss": 0.0058, "step": 12370 }, { "epoch": 0.2025689274318907, "grad_norm": 0.2743496596813202, "learning_rate": 9.999949742869431e-06, "loss": 0.0062, "step": 12380 }, { "epoch": 0.20273255338296653, "grad_norm": 0.13429966568946838, "learning_rate": 9.99994313680856e-06, "loss": 0.0117, "step": 12390 }, { "epoch": 0.2028961793340424, "grad_norm": 0.2560930550098419, "learning_rate": 9.999936122969339e-06, "loss": 0.0036, "step": 12400 }, { "epoch": 0.20305980528511822, "grad_norm": 0.18724317848682404, "learning_rate": 9.999928701352337e-06, "loss": 0.0059, "step": 12410 }, { "epoch": 0.20322343123619407, "grad_norm": 0.41032344102859497, "learning_rate": 9.999920871958163e-06, "loss": 0.006, "step": 12420 }, { "epoch": 0.2033870571872699, "grad_norm": 0.324008584022522, "learning_rate": 9.999912634787453e-06, "loss": 0.0058, "step": 12430 }, { "epoch": 0.20355068313834573, "grad_norm": 0.21129874885082245, "learning_rate": 9.99990398984088e-06, "loss": 0.0065, "step": 12440 }, { "epoch": 0.2037143090894216, "grad_norm": 0.21353356540203094, "learning_rate": 9.999894937119149e-06, "loss": 0.0044, "step": 12450 }, { "epoch": 0.20387793504049742, "grad_norm": 0.16450877487659454, "learning_rate": 9.999885476622998e-06, "loss": 0.0061, "step": 12460 }, { "epoch": 0.20404156099157325, "grad_norm": 0.39673373103141785, "learning_rate": 9.999875608353199e-06, "loss": 0.0064, "step": 12470 }, { "epoch": 0.2042051869426491, "grad_norm": 0.260753870010376, "learning_rate": 9.999865332310556e-06, "loss": 0.006, "step": 12480 }, { "epoch": 0.20436881289372494, "grad_norm": 0.2538551688194275, "learning_rate": 9.999854648495907e-06, "loss": 0.0048, "step": 12490 }, { "epoch": 0.2045324388448008, "grad_norm": 0.12521472573280334, "learning_rate": 9.999843556910125e-06, "loss": 0.0076, "step": 12500 }, { "epoch": 0.20469606479587663, "grad_norm": 4.710920810699463, "learning_rate": 9.999832057554111e-06, "loss": 0.0067, "step": 12510 }, { "epoch": 0.20485969074695246, "grad_norm": 1.7755043506622314, "learning_rate": 9.999820150428808e-06, "loss": 0.0155, "step": 12520 }, { "epoch": 0.2050233166980283, "grad_norm": 0.1261700689792633, "learning_rate": 9.999807835535184e-06, "loss": 0.0085, "step": 12530 }, { "epoch": 0.20518694264910414, "grad_norm": 0.26906171441078186, "learning_rate": 9.999795112874242e-06, "loss": 0.0059, "step": 12540 }, { "epoch": 0.20535056860018, "grad_norm": 0.13919776678085327, "learning_rate": 9.999781982447024e-06, "loss": 0.0038, "step": 12550 }, { "epoch": 0.20551419455125583, "grad_norm": 0.5989676117897034, "learning_rate": 9.999768444254596e-06, "loss": 0.0062, "step": 12560 }, { "epoch": 0.20567782050233166, "grad_norm": 0.268352210521698, "learning_rate": 9.999754498298064e-06, "loss": 0.0051, "step": 12570 }, { "epoch": 0.20584144645340752, "grad_norm": 0.2456214874982834, "learning_rate": 9.999740144578568e-06, "loss": 0.0087, "step": 12580 }, { "epoch": 0.20600507240448335, "grad_norm": 0.31719282269477844, "learning_rate": 9.999725383097275e-06, "loss": 0.0061, "step": 12590 }, { "epoch": 0.2061686983555592, "grad_norm": 0.28815433382987976, "learning_rate": 9.99971021385539e-06, "loss": 0.0065, "step": 12600 }, { "epoch": 0.20633232430663503, "grad_norm": 0.14909744262695312, "learning_rate": 9.999694636854151e-06, "loss": 0.0055, "step": 12610 }, { "epoch": 0.20649595025771086, "grad_norm": 0.03553149849176407, "learning_rate": 9.999678652094828e-06, "loss": 0.0056, "step": 12620 }, { "epoch": 0.20665957620878672, "grad_norm": 0.030735662207007408, "learning_rate": 9.999662259578725e-06, "loss": 0.0066, "step": 12630 }, { "epoch": 0.20682320215986255, "grad_norm": 0.28481221199035645, "learning_rate": 9.999645459307176e-06, "loss": 0.0063, "step": 12640 }, { "epoch": 0.20698682811093838, "grad_norm": 0.3420799970626831, "learning_rate": 9.999628251281556e-06, "loss": 0.0074, "step": 12650 }, { "epoch": 0.20715045406201424, "grad_norm": 0.10076165944337845, "learning_rate": 9.999610635503266e-06, "loss": 0.0052, "step": 12660 }, { "epoch": 0.20731408001309007, "grad_norm": 0.10481889545917511, "learning_rate": 9.999592611973743e-06, "loss": 0.0072, "step": 12670 }, { "epoch": 0.20747770596416593, "grad_norm": 0.37956297397613525, "learning_rate": 9.999574180694456e-06, "loss": 0.0082, "step": 12680 }, { "epoch": 0.20764133191524176, "grad_norm": 0.09679333120584488, "learning_rate": 9.999555341666908e-06, "loss": 0.0043, "step": 12690 }, { "epoch": 0.20780495786631759, "grad_norm": 0.3721299469470978, "learning_rate": 9.999536094892637e-06, "loss": 0.0058, "step": 12700 }, { "epoch": 0.20796858381739344, "grad_norm": 0.1958468109369278, "learning_rate": 9.999516440373212e-06, "loss": 0.0058, "step": 12710 }, { "epoch": 0.20813220976846927, "grad_norm": 0.10566503554582596, "learning_rate": 9.999496378110236e-06, "loss": 0.0036, "step": 12720 }, { "epoch": 0.20829583571954513, "grad_norm": 0.10382892191410065, "learning_rate": 9.999475908105345e-06, "loss": 0.0042, "step": 12730 }, { "epoch": 0.20845946167062096, "grad_norm": 0.25233131647109985, "learning_rate": 9.999455030360207e-06, "loss": 0.0086, "step": 12740 }, { "epoch": 0.2086230876216968, "grad_norm": 0.13550812005996704, "learning_rate": 9.999433744876528e-06, "loss": 0.0058, "step": 12750 }, { "epoch": 0.20878671357277265, "grad_norm": 0.10901674628257751, "learning_rate": 9.999412051656044e-06, "loss": 0.008, "step": 12760 }, { "epoch": 0.20895033952384848, "grad_norm": 0.22533440589904785, "learning_rate": 9.99938995070052e-06, "loss": 0.0083, "step": 12770 }, { "epoch": 0.20911396547492433, "grad_norm": 0.07614787667989731, "learning_rate": 9.999367442011763e-06, "loss": 0.0047, "step": 12780 }, { "epoch": 0.20927759142600016, "grad_norm": 0.23862454295158386, "learning_rate": 9.999344525591604e-06, "loss": 0.0073, "step": 12790 }, { "epoch": 0.209441217377076, "grad_norm": 0.0961994156241417, "learning_rate": 9.999321201441916e-06, "loss": 0.0085, "step": 12800 }, { "epoch": 0.20960484332815185, "grad_norm": 0.09173143655061722, "learning_rate": 9.999297469564601e-06, "loss": 0.0039, "step": 12810 }, { "epoch": 0.20976846927922768, "grad_norm": 0.2273540049791336, "learning_rate": 9.999273329961594e-06, "loss": 0.0055, "step": 12820 }, { "epoch": 0.20993209523030354, "grad_norm": 0.12911464273929596, "learning_rate": 9.99924878263486e-06, "loss": 0.0061, "step": 12830 }, { "epoch": 0.21009572118137937, "grad_norm": 0.03453594818711281, "learning_rate": 9.999223827586406e-06, "loss": 0.0055, "step": 12840 }, { "epoch": 0.2102593471324552, "grad_norm": 0.31178250908851624, "learning_rate": 9.999198464818268e-06, "loss": 0.0053, "step": 12850 }, { "epoch": 0.21042297308353106, "grad_norm": 0.09923781454563141, "learning_rate": 9.999172694332508e-06, "loss": 0.0051, "step": 12860 }, { "epoch": 0.21058659903460689, "grad_norm": 0.14487408101558685, "learning_rate": 9.999146516131234e-06, "loss": 0.0033, "step": 12870 }, { "epoch": 0.21075022498568272, "grad_norm": 0.15765681862831116, "learning_rate": 9.999119930216576e-06, "loss": 0.0044, "step": 12880 }, { "epoch": 0.21091385093675857, "grad_norm": 0.33947205543518066, "learning_rate": 9.999092936590708e-06, "loss": 0.0039, "step": 12890 }, { "epoch": 0.2110774768878344, "grad_norm": 0.2355516254901886, "learning_rate": 9.999065535255828e-06, "loss": 0.0062, "step": 12900 }, { "epoch": 0.21124110283891026, "grad_norm": 0.17342792451381683, "learning_rate": 9.99903772621417e-06, "loss": 0.0065, "step": 12910 }, { "epoch": 0.2114047287899861, "grad_norm": 0.22083014249801636, "learning_rate": 9.999009509468003e-06, "loss": 0.0057, "step": 12920 }, { "epoch": 0.21156835474106192, "grad_norm": 0.24771888554096222, "learning_rate": 9.99898088501963e-06, "loss": 0.0035, "step": 12930 }, { "epoch": 0.21173198069213778, "grad_norm": 0.2606211304664612, "learning_rate": 9.998951852871384e-06, "loss": 0.0048, "step": 12940 }, { "epoch": 0.2118956066432136, "grad_norm": 0.20061686635017395, "learning_rate": 9.998922413025632e-06, "loss": 0.0036, "step": 12950 }, { "epoch": 0.21205923259428947, "grad_norm": 0.24848076701164246, "learning_rate": 9.998892565484776e-06, "loss": 0.0055, "step": 12960 }, { "epoch": 0.2122228585453653, "grad_norm": 0.20416420698165894, "learning_rate": 9.99886231025125e-06, "loss": 0.0053, "step": 12970 }, { "epoch": 0.21238648449644112, "grad_norm": 0.16592179238796234, "learning_rate": 9.998831647327521e-06, "loss": 0.0067, "step": 12980 }, { "epoch": 0.21255011044751698, "grad_norm": 0.2822223901748657, "learning_rate": 9.998800576716092e-06, "loss": 0.0071, "step": 12990 }, { "epoch": 0.2127137363985928, "grad_norm": 0.17762772738933563, "learning_rate": 9.998769098419494e-06, "loss": 0.004, "step": 13000 }, { "epoch": 0.21287736234966867, "grad_norm": 0.2055288404226303, "learning_rate": 9.998737212440296e-06, "loss": 0.0026, "step": 13010 }, { "epoch": 0.2130409883007445, "grad_norm": 0.11760883033275604, "learning_rate": 9.998704918781097e-06, "loss": 0.0038, "step": 13020 }, { "epoch": 0.21320461425182033, "grad_norm": 0.24624723196029663, "learning_rate": 9.998672217444533e-06, "loss": 0.0063, "step": 13030 }, { "epoch": 0.2133682402028962, "grad_norm": 0.280038446187973, "learning_rate": 9.99863910843327e-06, "loss": 0.0072, "step": 13040 }, { "epoch": 0.21353186615397202, "grad_norm": 0.23071032762527466, "learning_rate": 9.998605591750009e-06, "loss": 0.0061, "step": 13050 }, { "epoch": 0.21369549210504787, "grad_norm": 0.11674752086400986, "learning_rate": 9.998571667397481e-06, "loss": 0.0046, "step": 13060 }, { "epoch": 0.2138591180561237, "grad_norm": 0.627616286277771, "learning_rate": 9.998537335378456e-06, "loss": 0.0052, "step": 13070 }, { "epoch": 0.21402274400719953, "grad_norm": 0.4384259879589081, "learning_rate": 9.998502595695732e-06, "loss": 0.0065, "step": 13080 }, { "epoch": 0.2141863699582754, "grad_norm": 0.15268398821353912, "learning_rate": 9.998467448352141e-06, "loss": 0.0042, "step": 13090 }, { "epoch": 0.21434999590935122, "grad_norm": 0.1323525309562683, "learning_rate": 9.998431893350552e-06, "loss": 0.0061, "step": 13100 }, { "epoch": 0.21451362186042705, "grad_norm": 0.1803782433271408, "learning_rate": 9.998395930693865e-06, "loss": 0.0047, "step": 13110 }, { "epoch": 0.2146772478115029, "grad_norm": 0.1619434803724289, "learning_rate": 9.998359560385011e-06, "loss": 0.0042, "step": 13120 }, { "epoch": 0.21484087376257874, "grad_norm": 0.11872068792581558, "learning_rate": 9.998322782426957e-06, "loss": 0.0054, "step": 13130 }, { "epoch": 0.2150044997136546, "grad_norm": 0.14572450518608093, "learning_rate": 9.998285596822704e-06, "loss": 0.0059, "step": 13140 }, { "epoch": 0.21516812566473043, "grad_norm": 0.10185088962316513, "learning_rate": 9.998248003575282e-06, "loss": 0.0063, "step": 13150 }, { "epoch": 0.21533175161580626, "grad_norm": 0.06390407681465149, "learning_rate": 9.998210002687758e-06, "loss": 0.0061, "step": 13160 }, { "epoch": 0.2154953775668821, "grad_norm": 0.29848599433898926, "learning_rate": 9.998171594163232e-06, "loss": 0.0065, "step": 13170 }, { "epoch": 0.21565900351795794, "grad_norm": 0.2984239161014557, "learning_rate": 9.998132778004837e-06, "loss": 0.0076, "step": 13180 }, { "epoch": 0.2158226294690338, "grad_norm": 0.17627207934856415, "learning_rate": 9.998093554215735e-06, "loss": 0.0084, "step": 13190 }, { "epoch": 0.21598625542010963, "grad_norm": 0.18145498633384705, "learning_rate": 9.998053922799131e-06, "loss": 0.0062, "step": 13200 }, { "epoch": 0.21614988137118546, "grad_norm": 0.23339824378490448, "learning_rate": 9.998013883758252e-06, "loss": 0.0056, "step": 13210 }, { "epoch": 0.21631350732226132, "grad_norm": 0.3056366443634033, "learning_rate": 9.997973437096366e-06, "loss": 0.0046, "step": 13220 }, { "epoch": 0.21647713327333715, "grad_norm": 0.10920435190200806, "learning_rate": 9.997932582816771e-06, "loss": 0.0056, "step": 13230 }, { "epoch": 0.216640759224413, "grad_norm": 0.044270556420087814, "learning_rate": 9.9978913209228e-06, "loss": 0.005, "step": 13240 }, { "epoch": 0.21680438517548883, "grad_norm": 0.11681827902793884, "learning_rate": 9.997849651417815e-06, "loss": 0.006, "step": 13250 }, { "epoch": 0.21696801112656466, "grad_norm": 0.22933712601661682, "learning_rate": 9.997807574305218e-06, "loss": 0.0065, "step": 13260 }, { "epoch": 0.21713163707764052, "grad_norm": 0.24406129121780396, "learning_rate": 9.997765089588439e-06, "loss": 0.0057, "step": 13270 }, { "epoch": 0.21729526302871635, "grad_norm": 0.20547881722450256, "learning_rate": 9.997722197270942e-06, "loss": 0.0037, "step": 13280 }, { "epoch": 0.2174588889797922, "grad_norm": 0.14097577333450317, "learning_rate": 9.997678897356227e-06, "loss": 0.0053, "step": 13290 }, { "epoch": 0.21762251493086804, "grad_norm": 0.27782827615737915, "learning_rate": 9.997635189847827e-06, "loss": 0.0048, "step": 13300 }, { "epoch": 0.21778614088194387, "grad_norm": 0.09930739551782608, "learning_rate": 9.997591074749302e-06, "loss": 0.0061, "step": 13310 }, { "epoch": 0.21794976683301973, "grad_norm": 0.17293070256710052, "learning_rate": 9.997546552064252e-06, "loss": 0.0044, "step": 13320 }, { "epoch": 0.21811339278409556, "grad_norm": 0.2866891622543335, "learning_rate": 9.997501621796309e-06, "loss": 0.0076, "step": 13330 }, { "epoch": 0.21827701873517139, "grad_norm": 0.2108461856842041, "learning_rate": 9.997456283949135e-06, "loss": 0.005, "step": 13340 }, { "epoch": 0.21844064468624724, "grad_norm": 0.17102141678333282, "learning_rate": 9.997410538526434e-06, "loss": 0.0074, "step": 13350 }, { "epoch": 0.21860427063732307, "grad_norm": 0.17764686048030853, "learning_rate": 9.997364385531928e-06, "loss": 0.0098, "step": 13360 }, { "epoch": 0.21876789658839893, "grad_norm": 0.25975266098976135, "learning_rate": 9.997317824969385e-06, "loss": 0.0077, "step": 13370 }, { "epoch": 0.21893152253947476, "grad_norm": 0.10543973743915558, "learning_rate": 9.997270856842605e-06, "loss": 0.0043, "step": 13380 }, { "epoch": 0.2190951484905506, "grad_norm": 0.07928754389286041, "learning_rate": 9.997223481155417e-06, "loss": 0.0054, "step": 13390 }, { "epoch": 0.21925877444162645, "grad_norm": 0.25949251651763916, "learning_rate": 9.99717569791168e-06, "loss": 0.0053, "step": 13400 }, { "epoch": 0.21942240039270228, "grad_norm": 0.21976803243160248, "learning_rate": 9.997127507115297e-06, "loss": 0.0077, "step": 13410 }, { "epoch": 0.21958602634377813, "grad_norm": 0.10809529572725296, "learning_rate": 9.997078908770197e-06, "loss": 0.0046, "step": 13420 }, { "epoch": 0.21974965229485396, "grad_norm": 0.3533380329608917, "learning_rate": 9.99702990288034e-06, "loss": 0.009, "step": 13430 }, { "epoch": 0.2199132782459298, "grad_norm": 0.07063060253858566, "learning_rate": 9.996980489449728e-06, "loss": 0.0083, "step": 13440 }, { "epoch": 0.22007690419700565, "grad_norm": 0.14417965710163116, "learning_rate": 9.996930668482388e-06, "loss": 0.0048, "step": 13450 }, { "epoch": 0.22024053014808148, "grad_norm": 0.13901092112064362, "learning_rate": 9.996880439982382e-06, "loss": 0.0033, "step": 13460 }, { "epoch": 0.22040415609915734, "grad_norm": 0.21198055148124695, "learning_rate": 9.99682980395381e-06, "loss": 0.0064, "step": 13470 }, { "epoch": 0.22056778205023317, "grad_norm": 0.2886105179786682, "learning_rate": 9.9967787604008e-06, "loss": 0.0042, "step": 13480 }, { "epoch": 0.220731408001309, "grad_norm": 0.09032531082630157, "learning_rate": 9.996727309327514e-06, "loss": 0.0055, "step": 13490 }, { "epoch": 0.22089503395238486, "grad_norm": 0.23418252170085907, "learning_rate": 9.99667545073815e-06, "loss": 0.0079, "step": 13500 }, { "epoch": 0.22105865990346069, "grad_norm": 0.05176910385489464, "learning_rate": 9.996623184636936e-06, "loss": 0.004, "step": 13510 }, { "epoch": 0.22122228585453654, "grad_norm": 0.15251822769641876, "learning_rate": 9.996570511028135e-06, "loss": 0.0072, "step": 13520 }, { "epoch": 0.22138591180561237, "grad_norm": 0.34697702527046204, "learning_rate": 9.996517429916041e-06, "loss": 0.0071, "step": 13530 }, { "epoch": 0.2215495377566882, "grad_norm": 0.2799191176891327, "learning_rate": 9.996463941304987e-06, "loss": 0.0054, "step": 13540 }, { "epoch": 0.22171316370776406, "grad_norm": 0.8140790462493896, "learning_rate": 9.996410045199334e-06, "loss": 0.0039, "step": 13550 }, { "epoch": 0.2218767896588399, "grad_norm": 0.16691988706588745, "learning_rate": 9.996355741603475e-06, "loss": 0.0058, "step": 13560 }, { "epoch": 0.22204041560991572, "grad_norm": 0.26519906520843506, "learning_rate": 9.996301030521842e-06, "loss": 0.0093, "step": 13570 }, { "epoch": 0.22220404156099158, "grad_norm": 0.0860215499997139, "learning_rate": 9.996245911958896e-06, "loss": 0.0053, "step": 13580 }, { "epoch": 0.2223676675120674, "grad_norm": 0.14534030854701996, "learning_rate": 9.996190385919131e-06, "loss": 0.005, "step": 13590 }, { "epoch": 0.22253129346314326, "grad_norm": 0.2794528603553772, "learning_rate": 9.996134452407077e-06, "loss": 0.0065, "step": 13600 }, { "epoch": 0.2226949194142191, "grad_norm": 0.27352064847946167, "learning_rate": 9.996078111427297e-06, "loss": 0.0067, "step": 13610 }, { "epoch": 0.22285854536529492, "grad_norm": 0.12785963714122772, "learning_rate": 9.996021362984383e-06, "loss": 0.007, "step": 13620 }, { "epoch": 0.22302217131637078, "grad_norm": 0.11861082911491394, "learning_rate": 9.995964207082964e-06, "loss": 0.0064, "step": 13630 }, { "epoch": 0.2231857972674466, "grad_norm": 0.27641215920448303, "learning_rate": 9.995906643727703e-06, "loss": 0.0042, "step": 13640 }, { "epoch": 0.22334942321852247, "grad_norm": 0.14083783328533173, "learning_rate": 9.995848672923293e-06, "loss": 0.0036, "step": 13650 }, { "epoch": 0.2235130491695983, "grad_norm": 0.19127905368804932, "learning_rate": 9.995790294674464e-06, "loss": 0.0044, "step": 13660 }, { "epoch": 0.22367667512067413, "grad_norm": 0.22909848392009735, "learning_rate": 9.995731508985974e-06, "loss": 0.0047, "step": 13670 }, { "epoch": 0.22384030107175, "grad_norm": 0.12029135227203369, "learning_rate": 9.99567231586262e-06, "loss": 0.0067, "step": 13680 }, { "epoch": 0.22400392702282582, "grad_norm": 0.2586616277694702, "learning_rate": 9.995612715309228e-06, "loss": 0.0043, "step": 13690 }, { "epoch": 0.22416755297390167, "grad_norm": 0.21871019899845123, "learning_rate": 9.99555270733066e-06, "loss": 0.0061, "step": 13700 }, { "epoch": 0.2243311789249775, "grad_norm": 0.21606074273586273, "learning_rate": 9.99549229193181e-06, "loss": 0.0034, "step": 13710 }, { "epoch": 0.22449480487605333, "grad_norm": 0.3272698223590851, "learning_rate": 9.995431469117604e-06, "loss": 0.0045, "step": 13720 }, { "epoch": 0.2246584308271292, "grad_norm": 0.15977269411087036, "learning_rate": 9.995370238893002e-06, "loss": 0.0077, "step": 13730 }, { "epoch": 0.22482205677820502, "grad_norm": 0.1499793827533722, "learning_rate": 9.995308601263001e-06, "loss": 0.0053, "step": 13740 }, { "epoch": 0.22498568272928085, "grad_norm": 0.13406315445899963, "learning_rate": 9.995246556232626e-06, "loss": 0.0048, "step": 13750 }, { "epoch": 0.2251493086803567, "grad_norm": 0.13505113124847412, "learning_rate": 9.995184103806937e-06, "loss": 0.0046, "step": 13760 }, { "epoch": 0.22531293463143254, "grad_norm": 0.22426843643188477, "learning_rate": 9.995121243991028e-06, "loss": 0.0102, "step": 13770 }, { "epoch": 0.2254765605825084, "grad_norm": 0.16202637553215027, "learning_rate": 9.995057976790024e-06, "loss": 0.0047, "step": 13780 }, { "epoch": 0.22564018653358422, "grad_norm": 0.23404191434383392, "learning_rate": 9.994994302209087e-06, "loss": 0.0069, "step": 13790 }, { "epoch": 0.22580381248466005, "grad_norm": 0.1363525688648224, "learning_rate": 9.994930220253407e-06, "loss": 0.0054, "step": 13800 }, { "epoch": 0.2259674384357359, "grad_norm": 0.1189674362540245, "learning_rate": 9.994865730928214e-06, "loss": 0.0049, "step": 13810 }, { "epoch": 0.22613106438681174, "grad_norm": 0.18299731612205505, "learning_rate": 9.994800834238767e-06, "loss": 0.0058, "step": 13820 }, { "epoch": 0.2262946903378876, "grad_norm": 0.09564902633428574, "learning_rate": 9.994735530190356e-06, "loss": 0.0052, "step": 13830 }, { "epoch": 0.22645831628896343, "grad_norm": 0.11441633105278015, "learning_rate": 9.994669818788311e-06, "loss": 0.0075, "step": 13840 }, { "epoch": 0.22662194224003926, "grad_norm": 0.17015090584754944, "learning_rate": 9.994603700037988e-06, "loss": 0.0055, "step": 13850 }, { "epoch": 0.22678556819111512, "grad_norm": 0.3901327848434448, "learning_rate": 9.994537173944779e-06, "loss": 0.0059, "step": 13860 }, { "epoch": 0.22694919414219095, "grad_norm": 0.26342910528182983, "learning_rate": 9.994470240514111e-06, "loss": 0.0065, "step": 13870 }, { "epoch": 0.2271128200932668, "grad_norm": 0.13855819404125214, "learning_rate": 9.994402899751445e-06, "loss": 0.0058, "step": 13880 }, { "epoch": 0.22727644604434263, "grad_norm": 0.0944550558924675, "learning_rate": 9.994335151662268e-06, "loss": 0.005, "step": 13890 }, { "epoch": 0.22744007199541846, "grad_norm": 0.14530417323112488, "learning_rate": 9.994266996252111e-06, "loss": 0.0062, "step": 13900 }, { "epoch": 0.22760369794649432, "grad_norm": 0.19953951239585876, "learning_rate": 9.994198433526529e-06, "loss": 0.0055, "step": 13910 }, { "epoch": 0.22776732389757015, "grad_norm": 0.1666042059659958, "learning_rate": 9.994129463491114e-06, "loss": 0.0046, "step": 13920 }, { "epoch": 0.227930949848646, "grad_norm": 0.07594744861125946, "learning_rate": 9.994060086151491e-06, "loss": 0.0056, "step": 13930 }, { "epoch": 0.22809457579972184, "grad_norm": 0.23996597528457642, "learning_rate": 9.99399030151332e-06, "loss": 0.004, "step": 13940 }, { "epoch": 0.22825820175079767, "grad_norm": 0.19474714994430542, "learning_rate": 9.99392010958229e-06, "loss": 0.0041, "step": 13950 }, { "epoch": 0.22842182770187353, "grad_norm": 0.07609615474939346, "learning_rate": 9.993849510364127e-06, "loss": 0.004, "step": 13960 }, { "epoch": 0.22858545365294936, "grad_norm": 0.27029407024383545, "learning_rate": 9.993778503864588e-06, "loss": 0.0051, "step": 13970 }, { "epoch": 0.22874907960402519, "grad_norm": 0.24098403751850128, "learning_rate": 9.993707090089463e-06, "loss": 0.0054, "step": 13980 }, { "epoch": 0.22891270555510104, "grad_norm": 0.12854556739330292, "learning_rate": 9.993635269044582e-06, "loss": 0.0063, "step": 13990 }, { "epoch": 0.22907633150617687, "grad_norm": 0.08363831788301468, "learning_rate": 9.993563040735796e-06, "loss": 0.0056, "step": 14000 }, { "epoch": 0.22923995745725273, "grad_norm": 0.5338570475578308, "learning_rate": 9.993490405168997e-06, "loss": 0.0045, "step": 14010 }, { "epoch": 0.22940358340832856, "grad_norm": 0.31365853548049927, "learning_rate": 9.99341736235011e-06, "loss": 0.008, "step": 14020 }, { "epoch": 0.2295672093594044, "grad_norm": 0.1387081742286682, "learning_rate": 9.993343912285093e-06, "loss": 0.0029, "step": 14030 }, { "epoch": 0.22973083531048025, "grad_norm": 0.07173755019903183, "learning_rate": 9.993270054979935e-06, "loss": 0.0063, "step": 14040 }, { "epoch": 0.22989446126155608, "grad_norm": 0.22205767035484314, "learning_rate": 9.993195790440661e-06, "loss": 0.0045, "step": 14050 }, { "epoch": 0.23005808721263193, "grad_norm": 0.21055075526237488, "learning_rate": 9.993121118673326e-06, "loss": 0.0046, "step": 14060 }, { "epoch": 0.23022171316370776, "grad_norm": 0.33890509605407715, "learning_rate": 9.99304603968402e-06, "loss": 0.0044, "step": 14070 }, { "epoch": 0.2303853391147836, "grad_norm": 0.3940201699733734, "learning_rate": 9.992970553478867e-06, "loss": 0.0098, "step": 14080 }, { "epoch": 0.23054896506585945, "grad_norm": 0.15668921172618866, "learning_rate": 9.992894660064023e-06, "loss": 0.0047, "step": 14090 }, { "epoch": 0.23071259101693528, "grad_norm": 0.2652505040168762, "learning_rate": 9.992818359445678e-06, "loss": 0.0044, "step": 14100 }, { "epoch": 0.23087621696801114, "grad_norm": 0.11571596562862396, "learning_rate": 9.992741651630055e-06, "loss": 0.0026, "step": 14110 }, { "epoch": 0.23103984291908697, "grad_norm": 0.33519086241722107, "learning_rate": 9.992664536623409e-06, "loss": 0.0048, "step": 14120 }, { "epoch": 0.2312034688701628, "grad_norm": 0.2683626711368561, "learning_rate": 9.99258701443203e-06, "loss": 0.0064, "step": 14130 }, { "epoch": 0.23136709482123866, "grad_norm": 0.060153067111968994, "learning_rate": 9.992509085062241e-06, "loss": 0.0049, "step": 14140 }, { "epoch": 0.23153072077231449, "grad_norm": 0.051124393939971924, "learning_rate": 9.992430748520396e-06, "loss": 0.0073, "step": 14150 }, { "epoch": 0.23169434672339034, "grad_norm": 0.14996354281902313, "learning_rate": 9.992352004812887e-06, "loss": 0.004, "step": 14160 }, { "epoch": 0.23185797267446617, "grad_norm": 0.17824135720729828, "learning_rate": 9.992272853946133e-06, "loss": 0.005, "step": 14170 }, { "epoch": 0.232021598625542, "grad_norm": 0.3842923045158386, "learning_rate": 9.99219329592659e-06, "loss": 0.0074, "step": 14180 }, { "epoch": 0.23218522457661786, "grad_norm": 0.3007977306842804, "learning_rate": 9.992113330760744e-06, "loss": 0.0059, "step": 14190 }, { "epoch": 0.2323488505276937, "grad_norm": 0.3273521065711975, "learning_rate": 9.992032958455122e-06, "loss": 0.0057, "step": 14200 }, { "epoch": 0.23251247647876952, "grad_norm": 0.27948787808418274, "learning_rate": 9.991952179016277e-06, "loss": 0.0072, "step": 14210 }, { "epoch": 0.23267610242984538, "grad_norm": 0.07427410036325455, "learning_rate": 9.991870992450794e-06, "loss": 0.0058, "step": 14220 }, { "epoch": 0.2328397283809212, "grad_norm": 0.16711176931858063, "learning_rate": 9.9917893987653e-06, "loss": 0.0063, "step": 14230 }, { "epoch": 0.23300335433199706, "grad_norm": 0.15623080730438232, "learning_rate": 9.991707397966443e-06, "loss": 0.0047, "step": 14240 }, { "epoch": 0.2331669802830729, "grad_norm": 0.8671354651451111, "learning_rate": 9.991624990060915e-06, "loss": 0.0065, "step": 14250 }, { "epoch": 0.23333060623414872, "grad_norm": 0.30463269352912903, "learning_rate": 9.991542175055436e-06, "loss": 0.0053, "step": 14260 }, { "epoch": 0.23349423218522458, "grad_norm": 0.14944496750831604, "learning_rate": 9.99145895295676e-06, "loss": 0.0064, "step": 14270 }, { "epoch": 0.2336578581363004, "grad_norm": 0.1360207349061966, "learning_rate": 9.991375323771673e-06, "loss": 0.0041, "step": 14280 }, { "epoch": 0.23382148408737627, "grad_norm": 0.24179519712924957, "learning_rate": 9.991291287506998e-06, "loss": 0.0047, "step": 14290 }, { "epoch": 0.2339851100384521, "grad_norm": 0.2822986841201782, "learning_rate": 9.991206844169588e-06, "loss": 0.0069, "step": 14300 }, { "epoch": 0.23414873598952793, "grad_norm": 0.21003149449825287, "learning_rate": 9.991121993766329e-06, "loss": 0.0046, "step": 14310 }, { "epoch": 0.23431236194060379, "grad_norm": 0.559204638004303, "learning_rate": 9.99103673630414e-06, "loss": 0.009, "step": 14320 }, { "epoch": 0.23447598789167962, "grad_norm": 0.18115448951721191, "learning_rate": 9.990951071789977e-06, "loss": 0.0056, "step": 14330 }, { "epoch": 0.23463961384275547, "grad_norm": 0.14334458112716675, "learning_rate": 9.990865000230825e-06, "loss": 0.0053, "step": 14340 }, { "epoch": 0.2348032397938313, "grad_norm": 0.3625998795032501, "learning_rate": 9.990778521633703e-06, "loss": 0.0078, "step": 14350 }, { "epoch": 0.23496686574490713, "grad_norm": 0.14576828479766846, "learning_rate": 9.990691636005667e-06, "loss": 0.0067, "step": 14360 }, { "epoch": 0.235130491695983, "grad_norm": 0.13885726034641266, "learning_rate": 9.990604343353799e-06, "loss": 0.0049, "step": 14370 }, { "epoch": 0.23529411764705882, "grad_norm": 0.26105016469955444, "learning_rate": 9.990516643685222e-06, "loss": 0.0064, "step": 14380 }, { "epoch": 0.23545774359813468, "grad_norm": 0.06199558824300766, "learning_rate": 9.990428537007084e-06, "loss": 0.0073, "step": 14390 }, { "epoch": 0.2356213695492105, "grad_norm": 0.18994049727916718, "learning_rate": 9.990340023326577e-06, "loss": 0.0072, "step": 14400 }, { "epoch": 0.23578499550028634, "grad_norm": 0.2793636918067932, "learning_rate": 9.990251102650914e-06, "loss": 0.0058, "step": 14410 }, { "epoch": 0.2359486214513622, "grad_norm": 0.2294287085533142, "learning_rate": 9.99016177498735e-06, "loss": 0.0049, "step": 14420 }, { "epoch": 0.23611224740243802, "grad_norm": 0.21436075866222382, "learning_rate": 9.990072040343169e-06, "loss": 0.0064, "step": 14430 }, { "epoch": 0.23627587335351385, "grad_norm": 0.1207033172249794, "learning_rate": 9.989981898725692e-06, "loss": 0.0052, "step": 14440 }, { "epoch": 0.2364394993045897, "grad_norm": 0.18606725335121155, "learning_rate": 9.989891350142266e-06, "loss": 0.0088, "step": 14450 }, { "epoch": 0.23660312525566554, "grad_norm": 0.12256237119436264, "learning_rate": 9.98980039460028e-06, "loss": 0.0037, "step": 14460 }, { "epoch": 0.2367667512067414, "grad_norm": 0.5557464361190796, "learning_rate": 9.989709032107151e-06, "loss": 0.0055, "step": 14470 }, { "epoch": 0.23693037715781723, "grad_norm": 0.28305840492248535, "learning_rate": 9.989617262670328e-06, "loss": 0.004, "step": 14480 }, { "epoch": 0.23709400310889306, "grad_norm": 0.4966054856777191, "learning_rate": 9.989525086297299e-06, "loss": 0.0047, "step": 14490 }, { "epoch": 0.23725762905996892, "grad_norm": 0.13753916323184967, "learning_rate": 9.98943250299558e-06, "loss": 0.003, "step": 14500 }, { "epoch": 0.23742125501104475, "grad_norm": 0.059442464262247086, "learning_rate": 9.98933951277272e-06, "loss": 0.0047, "step": 14510 }, { "epoch": 0.2375848809621206, "grad_norm": 0.22410885989665985, "learning_rate": 9.989246115636304e-06, "loss": 0.0038, "step": 14520 }, { "epoch": 0.23774850691319643, "grad_norm": 0.25762972235679626, "learning_rate": 9.989152311593953e-06, "loss": 0.0059, "step": 14530 }, { "epoch": 0.23791213286427226, "grad_norm": 0.2537146508693695, "learning_rate": 9.989058100653312e-06, "loss": 0.0034, "step": 14540 }, { "epoch": 0.23807575881534812, "grad_norm": 0.30126863718032837, "learning_rate": 9.988963482822066e-06, "loss": 0.0045, "step": 14550 }, { "epoch": 0.23823938476642395, "grad_norm": 0.24404503405094147, "learning_rate": 9.988868458107931e-06, "loss": 0.0041, "step": 14560 }, { "epoch": 0.2384030107174998, "grad_norm": 0.36956584453582764, "learning_rate": 9.988773026518661e-06, "loss": 0.0044, "step": 14570 }, { "epoch": 0.23856663666857564, "grad_norm": 0.11267326027154922, "learning_rate": 9.988677188062036e-06, "loss": 0.0049, "step": 14580 }, { "epoch": 0.23873026261965147, "grad_norm": 0.1392108052968979, "learning_rate": 9.988580942745871e-06, "loss": 0.0083, "step": 14590 }, { "epoch": 0.23889388857072733, "grad_norm": 0.07403679937124252, "learning_rate": 9.988484290578018e-06, "loss": 0.0035, "step": 14600 }, { "epoch": 0.23905751452180315, "grad_norm": 0.2874336242675781, "learning_rate": 9.988387231566358e-06, "loss": 0.0066, "step": 14610 }, { "epoch": 0.23922114047287898, "grad_norm": 0.15391848981380463, "learning_rate": 9.988289765718808e-06, "loss": 0.0044, "step": 14620 }, { "epoch": 0.23938476642395484, "grad_norm": 0.07161050289869308, "learning_rate": 9.988191893043317e-06, "loss": 0.0037, "step": 14630 }, { "epoch": 0.23954839237503067, "grad_norm": 0.30230942368507385, "learning_rate": 9.988093613547864e-06, "loss": 0.0053, "step": 14640 }, { "epoch": 0.23971201832610653, "grad_norm": 0.11311539262533188, "learning_rate": 9.987994927240469e-06, "loss": 0.0048, "step": 14650 }, { "epoch": 0.23987564427718236, "grad_norm": 0.06841092556715012, "learning_rate": 9.987895834129177e-06, "loss": 0.0069, "step": 14660 }, { "epoch": 0.2400392702282582, "grad_norm": 0.15750084817409515, "learning_rate": 9.987796334222073e-06, "loss": 0.0055, "step": 14670 }, { "epoch": 0.24020289617933405, "grad_norm": 0.06570466607809067, "learning_rate": 9.987696427527268e-06, "loss": 0.0052, "step": 14680 }, { "epoch": 0.24036652213040988, "grad_norm": 0.14742617309093475, "learning_rate": 9.987596114052912e-06, "loss": 0.0026, "step": 14690 }, { "epoch": 0.24053014808148573, "grad_norm": 0.08288706094026566, "learning_rate": 9.987495393807188e-06, "loss": 0.0042, "step": 14700 }, { "epoch": 0.24069377403256156, "grad_norm": 0.3669871985912323, "learning_rate": 9.987394266798308e-06, "loss": 0.0068, "step": 14710 }, { "epoch": 0.2408573999836374, "grad_norm": 0.1870093047618866, "learning_rate": 9.987292733034518e-06, "loss": 0.004, "step": 14720 }, { "epoch": 0.24102102593471325, "grad_norm": 0.0927494689822197, "learning_rate": 9.987190792524101e-06, "loss": 0.0045, "step": 14730 }, { "epoch": 0.24118465188578908, "grad_norm": 0.07338996231555939, "learning_rate": 9.987088445275375e-06, "loss": 0.0051, "step": 14740 }, { "epoch": 0.24134827783686494, "grad_norm": 0.2542848289012909, "learning_rate": 9.986985691296679e-06, "loss": 0.0049, "step": 14750 }, { "epoch": 0.24151190378794077, "grad_norm": 0.14750252664089203, "learning_rate": 9.986882530596398e-06, "loss": 0.0057, "step": 14760 }, { "epoch": 0.2416755297390166, "grad_norm": 0.13782623410224915, "learning_rate": 9.986778963182945e-06, "loss": 0.0054, "step": 14770 }, { "epoch": 0.24183915569009246, "grad_norm": 0.0877394899725914, "learning_rate": 9.986674989064768e-06, "loss": 0.0035, "step": 14780 }, { "epoch": 0.24200278164116829, "grad_norm": 0.1941722333431244, "learning_rate": 9.986570608250343e-06, "loss": 0.0033, "step": 14790 }, { "epoch": 0.24216640759224414, "grad_norm": 0.07986953109502792, "learning_rate": 9.986465820748186e-06, "loss": 0.0071, "step": 14800 }, { "epoch": 0.24233003354331997, "grad_norm": 0.15755994617938995, "learning_rate": 9.98636062656684e-06, "loss": 0.0048, "step": 14810 }, { "epoch": 0.2424936594943958, "grad_norm": 0.16584065556526184, "learning_rate": 9.986255025714888e-06, "loss": 0.0058, "step": 14820 }, { "epoch": 0.24265728544547166, "grad_norm": 0.1625998318195343, "learning_rate": 9.98614901820094e-06, "loss": 0.0078, "step": 14830 }, { "epoch": 0.2428209113965475, "grad_norm": 0.0650317594408989, "learning_rate": 9.986042604033644e-06, "loss": 0.0047, "step": 14840 }, { "epoch": 0.24298453734762332, "grad_norm": 0.1152484193444252, "learning_rate": 9.985935783221676e-06, "loss": 0.0052, "step": 14850 }, { "epoch": 0.24314816329869918, "grad_norm": 0.14701791107654572, "learning_rate": 9.985828555773748e-06, "loss": 0.0059, "step": 14860 }, { "epoch": 0.243311789249775, "grad_norm": 0.33101290464401245, "learning_rate": 9.98572092169861e-06, "loss": 0.0038, "step": 14870 }, { "epoch": 0.24347541520085086, "grad_norm": 0.1879790723323822, "learning_rate": 9.985612881005034e-06, "loss": 0.0044, "step": 14880 }, { "epoch": 0.2436390411519267, "grad_norm": 0.09927515685558319, "learning_rate": 9.985504433701834e-06, "loss": 0.0039, "step": 14890 }, { "epoch": 0.24380266710300252, "grad_norm": 0.13312315940856934, "learning_rate": 9.985395579797855e-06, "loss": 0.0032, "step": 14900 }, { "epoch": 0.24396629305407838, "grad_norm": 0.176203653216362, "learning_rate": 9.985286319301972e-06, "loss": 0.0085, "step": 14910 }, { "epoch": 0.2441299190051542, "grad_norm": 0.17916902899742126, "learning_rate": 9.985176652223101e-06, "loss": 0.0038, "step": 14920 }, { "epoch": 0.24429354495623007, "grad_norm": 0.1555701494216919, "learning_rate": 9.985066578570184e-06, "loss": 0.0077, "step": 14930 }, { "epoch": 0.2444571709073059, "grad_norm": 1.00691819190979, "learning_rate": 9.984956098352194e-06, "loss": 0.0044, "step": 14940 }, { "epoch": 0.24462079685838173, "grad_norm": 0.1658470630645752, "learning_rate": 9.984845211578147e-06, "loss": 0.0044, "step": 14950 }, { "epoch": 0.24478442280945759, "grad_norm": 0.22211848199367523, "learning_rate": 9.984733918257085e-06, "loss": 0.0048, "step": 14960 }, { "epoch": 0.24494804876053342, "grad_norm": 0.8488159775733948, "learning_rate": 9.984622218398083e-06, "loss": 0.0067, "step": 14970 }, { "epoch": 0.24511167471160927, "grad_norm": 0.3361782431602478, "learning_rate": 9.984510112010253e-06, "loss": 0.0075, "step": 14980 }, { "epoch": 0.2452753006626851, "grad_norm": 0.16249647736549377, "learning_rate": 9.984397599102735e-06, "loss": 0.0063, "step": 14990 }, { "epoch": 0.24543892661376093, "grad_norm": 0.21449130773544312, "learning_rate": 9.98428467968471e-06, "loss": 0.0058, "step": 15000 }, { "epoch": 0.2456025525648368, "grad_norm": 0.11375071108341217, "learning_rate": 9.984171353765383e-06, "loss": 0.0031, "step": 15010 }, { "epoch": 0.24576617851591262, "grad_norm": 0.11024336516857147, "learning_rate": 9.984057621353997e-06, "loss": 0.0045, "step": 15020 }, { "epoch": 0.24592980446698848, "grad_norm": 0.08600004762411118, "learning_rate": 9.98394348245983e-06, "loss": 0.0054, "step": 15030 }, { "epoch": 0.2460934304180643, "grad_norm": 0.22953863441944122, "learning_rate": 9.98382893709219e-06, "loss": 0.0065, "step": 15040 }, { "epoch": 0.24625705636914014, "grad_norm": 0.11961615085601807, "learning_rate": 9.983713985260418e-06, "loss": 0.0058, "step": 15050 }, { "epoch": 0.246420682320216, "grad_norm": 0.34285932779312134, "learning_rate": 9.98359862697389e-06, "loss": 0.0042, "step": 15060 }, { "epoch": 0.24658430827129182, "grad_norm": 0.07007353007793427, "learning_rate": 9.983482862242011e-06, "loss": 0.0045, "step": 15070 }, { "epoch": 0.24674793422236765, "grad_norm": 0.3148964047431946, "learning_rate": 9.983366691074228e-06, "loss": 0.0064, "step": 15080 }, { "epoch": 0.2469115601734435, "grad_norm": 0.08215848356485367, "learning_rate": 9.983250113480009e-06, "loss": 0.0034, "step": 15090 }, { "epoch": 0.24707518612451934, "grad_norm": 0.2079235315322876, "learning_rate": 9.983133129468869e-06, "loss": 0.0039, "step": 15100 }, { "epoch": 0.2472388120755952, "grad_norm": 0.18692241609096527, "learning_rate": 9.983015739050343e-06, "loss": 0.0066, "step": 15110 }, { "epoch": 0.24740243802667103, "grad_norm": 0.27569684386253357, "learning_rate": 9.982897942234008e-06, "loss": 0.0042, "step": 15120 }, { "epoch": 0.24756606397774686, "grad_norm": 0.1384645253419876, "learning_rate": 9.98277973902947e-06, "loss": 0.0042, "step": 15130 }, { "epoch": 0.24772968992882272, "grad_norm": 0.1159195825457573, "learning_rate": 9.982661129446369e-06, "loss": 0.0052, "step": 15140 }, { "epoch": 0.24789331587989855, "grad_norm": 0.031648822128772736, "learning_rate": 9.982542113494378e-06, "loss": 0.0039, "step": 15150 }, { "epoch": 0.2480569418309744, "grad_norm": 0.12593811750411987, "learning_rate": 9.982422691183206e-06, "loss": 0.0046, "step": 15160 }, { "epoch": 0.24822056778205023, "grad_norm": 0.22473861277103424, "learning_rate": 9.982302862522591e-06, "loss": 0.0043, "step": 15170 }, { "epoch": 0.24838419373312606, "grad_norm": 0.1660010665655136, "learning_rate": 9.982182627522304e-06, "loss": 0.0058, "step": 15180 }, { "epoch": 0.24854781968420192, "grad_norm": 0.13586291670799255, "learning_rate": 9.982061986192153e-06, "loss": 0.0057, "step": 15190 }, { "epoch": 0.24871144563527775, "grad_norm": 0.2331942319869995, "learning_rate": 9.981940938541977e-06, "loss": 0.0046, "step": 15200 }, { "epoch": 0.2488750715863536, "grad_norm": 0.21079044044017792, "learning_rate": 9.981819484581649e-06, "loss": 0.0026, "step": 15210 }, { "epoch": 0.24903869753742944, "grad_norm": 0.12120892107486725, "learning_rate": 9.981697624321073e-06, "loss": 0.0041, "step": 15220 }, { "epoch": 0.24920232348850527, "grad_norm": 0.045738670974969864, "learning_rate": 9.981575357770187e-06, "loss": 0.0073, "step": 15230 }, { "epoch": 0.24936594943958112, "grad_norm": 0.08233648538589478, "learning_rate": 9.981452684938966e-06, "loss": 0.0038, "step": 15240 }, { "epoch": 0.24952957539065695, "grad_norm": 0.10493548214435577, "learning_rate": 9.981329605837412e-06, "loss": 0.0039, "step": 15250 }, { "epoch": 0.2496932013417328, "grad_norm": 0.22596482932567596, "learning_rate": 9.981206120475561e-06, "loss": 0.006, "step": 15260 }, { "epoch": 0.24985682729280864, "grad_norm": 0.07747557759284973, "learning_rate": 9.981082228863487e-06, "loss": 0.0041, "step": 15270 }, { "epoch": 0.2500204532438845, "grad_norm": 0.37612032890319824, "learning_rate": 9.980957931011294e-06, "loss": 0.0049, "step": 15280 }, { "epoch": 0.25018407919496033, "grad_norm": 0.18529130518436432, "learning_rate": 9.980833226929118e-06, "loss": 0.0045, "step": 15290 }, { "epoch": 0.25034770514603616, "grad_norm": 0.268167644739151, "learning_rate": 9.98070811662713e-06, "loss": 0.0047, "step": 15300 }, { "epoch": 0.250511331097112, "grad_norm": 0.08909649401903152, "learning_rate": 9.980582600115536e-06, "loss": 0.0044, "step": 15310 }, { "epoch": 0.2506749570481878, "grad_norm": 0.17307843267917633, "learning_rate": 9.98045667740457e-06, "loss": 0.0042, "step": 15320 }, { "epoch": 0.2508385829992637, "grad_norm": 0.07683459669351578, "learning_rate": 9.980330348504502e-06, "loss": 0.0048, "step": 15330 }, { "epoch": 0.25100220895033953, "grad_norm": 0.19765128195285797, "learning_rate": 9.980203613425636e-06, "loss": 0.0036, "step": 15340 }, { "epoch": 0.25116583490141536, "grad_norm": 0.31956881284713745, "learning_rate": 9.980076472178307e-06, "loss": 0.0047, "step": 15350 }, { "epoch": 0.2513294608524912, "grad_norm": 0.2477467805147171, "learning_rate": 9.979948924772884e-06, "loss": 0.0073, "step": 15360 }, { "epoch": 0.251493086803567, "grad_norm": 0.20623217523097992, "learning_rate": 9.979820971219768e-06, "loss": 0.0037, "step": 15370 }, { "epoch": 0.2516567127546429, "grad_norm": 0.0902077704668045, "learning_rate": 9.9796926115294e-06, "loss": 0.0042, "step": 15380 }, { "epoch": 0.25182033870571874, "grad_norm": 0.38105589151382446, "learning_rate": 9.979563845712244e-06, "loss": 0.0055, "step": 15390 }, { "epoch": 0.25198396465679457, "grad_norm": 0.09033389389514923, "learning_rate": 9.979434673778803e-06, "loss": 0.0057, "step": 15400 }, { "epoch": 0.2521475906078704, "grad_norm": 0.08248700946569443, "learning_rate": 9.97930509573961e-06, "loss": 0.004, "step": 15410 }, { "epoch": 0.2523112165589462, "grad_norm": 0.5345564484596252, "learning_rate": 9.979175111605235e-06, "loss": 0.0056, "step": 15420 }, { "epoch": 0.2524748425100221, "grad_norm": 0.19937844574451447, "learning_rate": 9.97904472138628e-06, "loss": 0.0038, "step": 15430 }, { "epoch": 0.25263846846109794, "grad_norm": 0.11802786588668823, "learning_rate": 9.978913925093375e-06, "loss": 0.0056, "step": 15440 }, { "epoch": 0.2528020944121738, "grad_norm": 0.14457106590270996, "learning_rate": 9.978782722737192e-06, "loss": 0.0041, "step": 15450 }, { "epoch": 0.2529657203632496, "grad_norm": 0.07589415460824966, "learning_rate": 9.978651114328429e-06, "loss": 0.0076, "step": 15460 }, { "epoch": 0.25312934631432543, "grad_norm": 0.18440186977386475, "learning_rate": 9.978519099877819e-06, "loss": 0.0024, "step": 15470 }, { "epoch": 0.2532929722654013, "grad_norm": 0.2740703523159027, "learning_rate": 9.97838667939613e-06, "loss": 0.0068, "step": 15480 }, { "epoch": 0.25345659821647715, "grad_norm": 0.36186614632606506, "learning_rate": 9.978253852894162e-06, "loss": 0.0084, "step": 15490 }, { "epoch": 0.253620224167553, "grad_norm": 0.11956515908241272, "learning_rate": 9.978120620382748e-06, "loss": 0.008, "step": 15500 }, { "epoch": 0.2537838501186288, "grad_norm": 0.016868753358721733, "learning_rate": 9.97798698187275e-06, "loss": 0.0058, "step": 15510 }, { "epoch": 0.25394747606970464, "grad_norm": 0.13054509460926056, "learning_rate": 9.977852937375074e-06, "loss": 0.004, "step": 15520 }, { "epoch": 0.2541111020207805, "grad_norm": 0.24364294111728668, "learning_rate": 9.977718486900647e-06, "loss": 0.0046, "step": 15530 }, { "epoch": 0.25427472797185635, "grad_norm": 0.05926011502742767, "learning_rate": 9.977583630460437e-06, "loss": 0.0046, "step": 15540 }, { "epoch": 0.2544383539229322, "grad_norm": 0.1436033844947815, "learning_rate": 9.977448368065438e-06, "loss": 0.0089, "step": 15550 }, { "epoch": 0.254601979874008, "grad_norm": 0.30904632806777954, "learning_rate": 9.977312699726689e-06, "loss": 0.0051, "step": 15560 }, { "epoch": 0.25476560582508384, "grad_norm": 0.16983985900878906, "learning_rate": 9.977176625455248e-06, "loss": 0.0033, "step": 15570 }, { "epoch": 0.2549292317761597, "grad_norm": 0.1800442487001419, "learning_rate": 9.977040145262216e-06, "loss": 0.0049, "step": 15580 }, { "epoch": 0.25509285772723556, "grad_norm": 0.13681121170520782, "learning_rate": 9.976903259158723e-06, "loss": 0.0052, "step": 15590 }, { "epoch": 0.2552564836783114, "grad_norm": 0.3236236274242401, "learning_rate": 9.976765967155933e-06, "loss": 0.0044, "step": 15600 }, { "epoch": 0.2554201096293872, "grad_norm": 0.21640510857105255, "learning_rate": 9.976628269265044e-06, "loss": 0.0066, "step": 15610 }, { "epoch": 0.25558373558046305, "grad_norm": 0.0908389464020729, "learning_rate": 9.976490165497283e-06, "loss": 0.0049, "step": 15620 }, { "epoch": 0.2557473615315389, "grad_norm": 0.3596591353416443, "learning_rate": 9.976351655863919e-06, "loss": 0.0034, "step": 15630 }, { "epoch": 0.25591098748261476, "grad_norm": 0.10676617920398712, "learning_rate": 9.976212740376241e-06, "loss": 0.0035, "step": 15640 }, { "epoch": 0.2560746134336906, "grad_norm": 0.22603820264339447, "learning_rate": 9.976073419045585e-06, "loss": 0.0055, "step": 15650 }, { "epoch": 0.2562382393847664, "grad_norm": 0.4517509639263153, "learning_rate": 9.97593369188331e-06, "loss": 0.0031, "step": 15660 }, { "epoch": 0.25640186533584225, "grad_norm": 0.23560291528701782, "learning_rate": 9.975793558900814e-06, "loss": 0.0066, "step": 15670 }, { "epoch": 0.2565654912869181, "grad_norm": 0.05763200297951698, "learning_rate": 9.975653020109524e-06, "loss": 0.0046, "step": 15680 }, { "epoch": 0.25672911723799396, "grad_norm": 0.07072847336530685, "learning_rate": 9.975512075520901e-06, "loss": 0.0045, "step": 15690 }, { "epoch": 0.2568927431890698, "grad_norm": 0.11106191575527191, "learning_rate": 9.975370725146443e-06, "loss": 0.0043, "step": 15700 }, { "epoch": 0.2570563691401456, "grad_norm": 0.12212468683719635, "learning_rate": 9.975228968997674e-06, "loss": 0.0054, "step": 15710 }, { "epoch": 0.25721999509122145, "grad_norm": 0.2445971667766571, "learning_rate": 9.97508680708616e-06, "loss": 0.0058, "step": 15720 }, { "epoch": 0.2573836210422973, "grad_norm": 0.11314980685710907, "learning_rate": 9.974944239423492e-06, "loss": 0.0061, "step": 15730 }, { "epoch": 0.25754724699337317, "grad_norm": 0.10585033893585205, "learning_rate": 9.974801266021296e-06, "loss": 0.0098, "step": 15740 }, { "epoch": 0.257710872944449, "grad_norm": 0.2456834465265274, "learning_rate": 9.974657886891237e-06, "loss": 0.0057, "step": 15750 }, { "epoch": 0.25787449889552483, "grad_norm": 0.12119577825069427, "learning_rate": 9.974514102045006e-06, "loss": 0.0042, "step": 15760 }, { "epoch": 0.25803812484660066, "grad_norm": 0.25558027625083923, "learning_rate": 9.974369911494329e-06, "loss": 0.0049, "step": 15770 }, { "epoch": 0.2582017507976765, "grad_norm": 0.11136852204799652, "learning_rate": 9.974225315250965e-06, "loss": 0.0042, "step": 15780 }, { "epoch": 0.2583653767487524, "grad_norm": 0.04781457781791687, "learning_rate": 9.97408031332671e-06, "loss": 0.0026, "step": 15790 }, { "epoch": 0.2585290026998282, "grad_norm": 0.5310918688774109, "learning_rate": 9.973934905733387e-06, "loss": 0.004, "step": 15800 }, { "epoch": 0.25869262865090403, "grad_norm": 0.008327346295118332, "learning_rate": 9.973789092482856e-06, "loss": 0.0032, "step": 15810 }, { "epoch": 0.25885625460197986, "grad_norm": 0.13971813023090363, "learning_rate": 9.973642873587009e-06, "loss": 0.0041, "step": 15820 }, { "epoch": 0.2590198805530557, "grad_norm": 0.20547564327716827, "learning_rate": 9.97349624905777e-06, "loss": 0.0036, "step": 15830 }, { "epoch": 0.2591835065041316, "grad_norm": 0.23655913770198822, "learning_rate": 9.9733492189071e-06, "loss": 0.0039, "step": 15840 }, { "epoch": 0.2593471324552074, "grad_norm": 0.10760346800088882, "learning_rate": 9.973201783146989e-06, "loss": 0.0047, "step": 15850 }, { "epoch": 0.25951075840628324, "grad_norm": 0.10793501883745193, "learning_rate": 9.973053941789458e-06, "loss": 0.004, "step": 15860 }, { "epoch": 0.25967438435735907, "grad_norm": 0.07372265309095383, "learning_rate": 9.972905694846569e-06, "loss": 0.0034, "step": 15870 }, { "epoch": 0.2598380103084349, "grad_norm": 0.17902660369873047, "learning_rate": 9.97275704233041e-06, "loss": 0.0054, "step": 15880 }, { "epoch": 0.2600016362595108, "grad_norm": 0.24299824237823486, "learning_rate": 9.972607984253107e-06, "loss": 0.0031, "step": 15890 }, { "epoch": 0.2601652622105866, "grad_norm": 0.39346474409103394, "learning_rate": 9.972458520626814e-06, "loss": 0.0039, "step": 15900 }, { "epoch": 0.26032888816166244, "grad_norm": 0.13941121101379395, "learning_rate": 9.972308651463722e-06, "loss": 0.0043, "step": 15910 }, { "epoch": 0.26049251411273827, "grad_norm": 0.06831086426973343, "learning_rate": 9.972158376776053e-06, "loss": 0.0088, "step": 15920 }, { "epoch": 0.2606561400638141, "grad_norm": 0.24526947736740112, "learning_rate": 9.972007696576065e-06, "loss": 0.0073, "step": 15930 }, { "epoch": 0.26081976601489, "grad_norm": 0.1682722270488739, "learning_rate": 9.971856610876043e-06, "loss": 0.0031, "step": 15940 }, { "epoch": 0.2609833919659658, "grad_norm": 0.11419714987277985, "learning_rate": 9.971705119688314e-06, "loss": 0.0036, "step": 15950 }, { "epoch": 0.26114701791704165, "grad_norm": 0.7184643149375916, "learning_rate": 9.97155322302523e-06, "loss": 0.0108, "step": 15960 }, { "epoch": 0.2613106438681175, "grad_norm": 0.15539275109767914, "learning_rate": 9.97140092089918e-06, "loss": 0.006, "step": 15970 }, { "epoch": 0.2614742698191933, "grad_norm": 0.31286701560020447, "learning_rate": 9.971248213322585e-06, "loss": 0.004, "step": 15980 }, { "epoch": 0.2616378957702692, "grad_norm": 0.17413610219955444, "learning_rate": 9.971095100307898e-06, "loss": 0.006, "step": 15990 }, { "epoch": 0.261801521721345, "grad_norm": 0.09632241725921631, "learning_rate": 9.970941581867608e-06, "loss": 0.005, "step": 16000 }, { "epoch": 0.26196514767242085, "grad_norm": 0.12612801790237427, "learning_rate": 9.970787658014235e-06, "loss": 0.0055, "step": 16010 }, { "epoch": 0.2621287736234967, "grad_norm": 0.19577094912528992, "learning_rate": 9.970633328760335e-06, "loss": 0.0043, "step": 16020 }, { "epoch": 0.2622923995745725, "grad_norm": 0.08477102965116501, "learning_rate": 9.970478594118491e-06, "loss": 0.0071, "step": 16030 }, { "epoch": 0.2624560255256484, "grad_norm": 0.24222905933856964, "learning_rate": 9.970323454101324e-06, "loss": 0.0043, "step": 16040 }, { "epoch": 0.2626196514767242, "grad_norm": 0.2804945409297943, "learning_rate": 9.970167908721486e-06, "loss": 0.0053, "step": 16050 }, { "epoch": 0.26278327742780005, "grad_norm": 0.1598391830921173, "learning_rate": 9.970011957991664e-06, "loss": 0.0046, "step": 16060 }, { "epoch": 0.2629469033788759, "grad_norm": 0.08779624104499817, "learning_rate": 9.969855601924577e-06, "loss": 0.0034, "step": 16070 }, { "epoch": 0.2631105293299517, "grad_norm": 0.1460345834493637, "learning_rate": 9.969698840532974e-06, "loss": 0.003, "step": 16080 }, { "epoch": 0.26327415528102754, "grad_norm": 0.3892747163772583, "learning_rate": 9.969541673829643e-06, "loss": 0.005, "step": 16090 }, { "epoch": 0.26343778123210343, "grad_norm": 0.13804906606674194, "learning_rate": 9.9693841018274e-06, "loss": 0.0058, "step": 16100 }, { "epoch": 0.26360140718317926, "grad_norm": 0.14759685099124908, "learning_rate": 9.969226124539097e-06, "loss": 0.0059, "step": 16110 }, { "epoch": 0.2637650331342551, "grad_norm": 0.08929809927940369, "learning_rate": 9.96906774197762e-06, "loss": 0.0049, "step": 16120 }, { "epoch": 0.2639286590853309, "grad_norm": 0.11297618597745895, "learning_rate": 9.968908954155883e-06, "loss": 0.0046, "step": 16130 }, { "epoch": 0.26409228503640675, "grad_norm": 0.21108055114746094, "learning_rate": 9.968749761086837e-06, "loss": 0.0034, "step": 16140 }, { "epoch": 0.26425591098748263, "grad_norm": 0.09542800486087799, "learning_rate": 9.968590162783467e-06, "loss": 0.0044, "step": 16150 }, { "epoch": 0.26441953693855846, "grad_norm": 0.23468288779258728, "learning_rate": 9.968430159258785e-06, "loss": 0.004, "step": 16160 }, { "epoch": 0.2645831628896343, "grad_norm": 0.06206181272864342, "learning_rate": 9.968269750525846e-06, "loss": 0.0027, "step": 16170 }, { "epoch": 0.2647467888407101, "grad_norm": 0.3007090389728546, "learning_rate": 9.968108936597729e-06, "loss": 0.0072, "step": 16180 }, { "epoch": 0.26491041479178595, "grad_norm": 0.23993346095085144, "learning_rate": 9.967947717487549e-06, "loss": 0.0108, "step": 16190 }, { "epoch": 0.26507404074286184, "grad_norm": 0.11867068707942963, "learning_rate": 9.967786093208457e-06, "loss": 0.0026, "step": 16200 }, { "epoch": 0.26523766669393767, "grad_norm": 0.2238997220993042, "learning_rate": 9.967624063773634e-06, "loss": 0.0048, "step": 16210 }, { "epoch": 0.2654012926450135, "grad_norm": 0.2756165564060211, "learning_rate": 9.967461629196291e-06, "loss": 0.0036, "step": 16220 }, { "epoch": 0.2655649185960893, "grad_norm": 0.21465995907783508, "learning_rate": 9.96729878948968e-06, "loss": 0.0065, "step": 16230 }, { "epoch": 0.26572854454716516, "grad_norm": 0.1759486198425293, "learning_rate": 9.96713554466708e-06, "loss": 0.0046, "step": 16240 }, { "epoch": 0.26589217049824104, "grad_norm": 0.3826967477798462, "learning_rate": 9.966971894741804e-06, "loss": 0.0037, "step": 16250 }, { "epoch": 0.2660557964493169, "grad_norm": 0.33866703510284424, "learning_rate": 9.9668078397272e-06, "loss": 0.0031, "step": 16260 }, { "epoch": 0.2662194224003927, "grad_norm": 0.23178279399871826, "learning_rate": 9.966643379636646e-06, "loss": 0.0058, "step": 16270 }, { "epoch": 0.26638304835146853, "grad_norm": 0.2155774086713791, "learning_rate": 9.966478514483557e-06, "loss": 0.0038, "step": 16280 }, { "epoch": 0.26654667430254436, "grad_norm": 0.21046411991119385, "learning_rate": 9.966313244281377e-06, "loss": 0.0053, "step": 16290 }, { "epoch": 0.26671030025362025, "grad_norm": 0.05194896087050438, "learning_rate": 9.966147569043584e-06, "loss": 0.0043, "step": 16300 }, { "epoch": 0.2668739262046961, "grad_norm": 0.11445903778076172, "learning_rate": 9.965981488783697e-06, "loss": 0.0052, "step": 16310 }, { "epoch": 0.2670375521557719, "grad_norm": 0.09579271823167801, "learning_rate": 9.96581500351525e-06, "loss": 0.0057, "step": 16320 }, { "epoch": 0.26720117810684774, "grad_norm": 0.11837436258792877, "learning_rate": 9.965648113251828e-06, "loss": 0.0065, "step": 16330 }, { "epoch": 0.26736480405792357, "grad_norm": 0.03708185628056526, "learning_rate": 9.965480818007042e-06, "loss": 0.0064, "step": 16340 }, { "epoch": 0.26752843000899945, "grad_norm": 0.14848802983760834, "learning_rate": 9.965313117794532e-06, "loss": 0.0036, "step": 16350 }, { "epoch": 0.2676920559600753, "grad_norm": 0.13026051223278046, "learning_rate": 9.96514501262798e-06, "loss": 0.0064, "step": 16360 }, { "epoch": 0.2678556819111511, "grad_norm": 0.09151672571897507, "learning_rate": 9.964976502521093e-06, "loss": 0.0061, "step": 16370 }, { "epoch": 0.26801930786222694, "grad_norm": 0.11466898769140244, "learning_rate": 9.964807587487614e-06, "loss": 0.006, "step": 16380 }, { "epoch": 0.26818293381330277, "grad_norm": 0.23110909759998322, "learning_rate": 9.964638267541321e-06, "loss": 0.0089, "step": 16390 }, { "epoch": 0.26834655976437866, "grad_norm": 0.18098123371601105, "learning_rate": 9.964468542696022e-06, "loss": 0.0051, "step": 16400 }, { "epoch": 0.2685101857154545, "grad_norm": 0.5774562358856201, "learning_rate": 9.964298412965558e-06, "loss": 0.0041, "step": 16410 }, { "epoch": 0.2686738116665303, "grad_norm": 0.36918169260025024, "learning_rate": 9.964127878363805e-06, "loss": 0.0069, "step": 16420 }, { "epoch": 0.26883743761760615, "grad_norm": 0.18472912907600403, "learning_rate": 9.963956938904674e-06, "loss": 0.0041, "step": 16430 }, { "epoch": 0.269001063568682, "grad_norm": 0.6528200507164001, "learning_rate": 9.963785594602103e-06, "loss": 0.0058, "step": 16440 }, { "epoch": 0.26916468951975786, "grad_norm": 0.36004722118377686, "learning_rate": 9.963613845470066e-06, "loss": 0.005, "step": 16450 }, { "epoch": 0.2693283154708337, "grad_norm": 0.3867912292480469, "learning_rate": 9.963441691522573e-06, "loss": 0.006, "step": 16460 }, { "epoch": 0.2694919414219095, "grad_norm": 0.5443325638771057, "learning_rate": 9.963269132773661e-06, "loss": 0.0074, "step": 16470 }, { "epoch": 0.26965556737298535, "grad_norm": 0.2045241743326187, "learning_rate": 9.963096169237407e-06, "loss": 0.0078, "step": 16480 }, { "epoch": 0.2698191933240612, "grad_norm": 0.4688001275062561, "learning_rate": 9.962922800927915e-06, "loss": 0.005, "step": 16490 }, { "epoch": 0.269982819275137, "grad_norm": 0.17226062715053558, "learning_rate": 9.962749027859325e-06, "loss": 0.0047, "step": 16500 }, { "epoch": 0.2701464452262129, "grad_norm": 0.2984844446182251, "learning_rate": 9.962574850045807e-06, "loss": 0.0064, "step": 16510 }, { "epoch": 0.2703100711772887, "grad_norm": 0.18127554655075073, "learning_rate": 9.962400267501568e-06, "loss": 0.0058, "step": 16520 }, { "epoch": 0.27047369712836455, "grad_norm": 0.17778706550598145, "learning_rate": 9.96222528024085e-06, "loss": 0.0041, "step": 16530 }, { "epoch": 0.2706373230794404, "grad_norm": 0.20506048202514648, "learning_rate": 9.962049888277918e-06, "loss": 0.0036, "step": 16540 }, { "epoch": 0.2708009490305162, "grad_norm": 0.22299182415008545, "learning_rate": 9.961874091627082e-06, "loss": 0.0101, "step": 16550 }, { "epoch": 0.2709645749815921, "grad_norm": 0.14671024680137634, "learning_rate": 9.961697890302675e-06, "loss": 0.0064, "step": 16560 }, { "epoch": 0.27112820093266793, "grad_norm": 0.08210575580596924, "learning_rate": 9.96152128431907e-06, "loss": 0.004, "step": 16570 }, { "epoch": 0.27129182688374376, "grad_norm": 0.21329963207244873, "learning_rate": 9.96134427369067e-06, "loss": 0.005, "step": 16580 }, { "epoch": 0.2714554528348196, "grad_norm": 0.1510000228881836, "learning_rate": 9.96116685843191e-06, "loss": 0.0035, "step": 16590 }, { "epoch": 0.2716190787858954, "grad_norm": 0.5466216206550598, "learning_rate": 9.96098903855726e-06, "loss": 0.005, "step": 16600 }, { "epoch": 0.2717827047369713, "grad_norm": 0.17150095105171204, "learning_rate": 9.960810814081225e-06, "loss": 0.0053, "step": 16610 }, { "epoch": 0.27194633068804713, "grad_norm": 0.10637058317661285, "learning_rate": 9.960632185018335e-06, "loss": 0.0036, "step": 16620 }, { "epoch": 0.27210995663912296, "grad_norm": 0.06568179279565811, "learning_rate": 9.960453151383164e-06, "loss": 0.0034, "step": 16630 }, { "epoch": 0.2722735825901988, "grad_norm": 0.1168641522526741, "learning_rate": 9.96027371319031e-06, "loss": 0.0045, "step": 16640 }, { "epoch": 0.2724372085412746, "grad_norm": 0.10334012657403946, "learning_rate": 9.960093870454408e-06, "loss": 0.0047, "step": 16650 }, { "epoch": 0.2726008344923505, "grad_norm": 0.22061508893966675, "learning_rate": 9.959913623190127e-06, "loss": 0.0055, "step": 16660 }, { "epoch": 0.27276446044342634, "grad_norm": 0.20489159226417542, "learning_rate": 9.959732971412165e-06, "loss": 0.005, "step": 16670 }, { "epoch": 0.27292808639450217, "grad_norm": 0.17072172462940216, "learning_rate": 9.959551915135255e-06, "loss": 0.0025, "step": 16680 }, { "epoch": 0.273091712345578, "grad_norm": 0.4690852463245392, "learning_rate": 9.959370454374166e-06, "loss": 0.0052, "step": 16690 }, { "epoch": 0.2732553382966538, "grad_norm": 0.1498551368713379, "learning_rate": 9.959188589143695e-06, "loss": 0.0058, "step": 16700 }, { "epoch": 0.2734189642477297, "grad_norm": 0.24540501832962036, "learning_rate": 9.959006319458676e-06, "loss": 0.0041, "step": 16710 }, { "epoch": 0.27358259019880554, "grad_norm": 0.06451457738876343, "learning_rate": 9.958823645333975e-06, "loss": 0.0037, "step": 16720 }, { "epoch": 0.27374621614988137, "grad_norm": 0.13698600232601166, "learning_rate": 9.958640566784488e-06, "loss": 0.0076, "step": 16730 }, { "epoch": 0.2739098421009572, "grad_norm": 0.07890374213457108, "learning_rate": 9.958457083825147e-06, "loss": 0.0039, "step": 16740 }, { "epoch": 0.27407346805203303, "grad_norm": 0.14798398315906525, "learning_rate": 9.958273196470915e-06, "loss": 0.0026, "step": 16750 }, { "epoch": 0.2742370940031089, "grad_norm": 0.2839567959308624, "learning_rate": 9.958088904736793e-06, "loss": 0.0041, "step": 16760 }, { "epoch": 0.27440071995418475, "grad_norm": 0.19271968305110931, "learning_rate": 9.957904208637807e-06, "loss": 0.0066, "step": 16770 }, { "epoch": 0.2745643459052606, "grad_norm": 0.11610250174999237, "learning_rate": 9.957719108189023e-06, "loss": 0.0045, "step": 16780 }, { "epoch": 0.2747279718563364, "grad_norm": 0.2309107631444931, "learning_rate": 9.957533603405536e-06, "loss": 0.0053, "step": 16790 }, { "epoch": 0.27489159780741224, "grad_norm": 0.11249174177646637, "learning_rate": 9.957347694302475e-06, "loss": 0.0036, "step": 16800 }, { "epoch": 0.2750552237584881, "grad_norm": 0.25255057215690613, "learning_rate": 9.957161380895002e-06, "loss": 0.0028, "step": 16810 }, { "epoch": 0.27521884970956395, "grad_norm": 0.04065065085887909, "learning_rate": 9.956974663198314e-06, "loss": 0.0039, "step": 16820 }, { "epoch": 0.2753824756606398, "grad_norm": 0.13424116373062134, "learning_rate": 9.956787541227635e-06, "loss": 0.0036, "step": 16830 }, { "epoch": 0.2755461016117156, "grad_norm": 0.4534515142440796, "learning_rate": 9.95660001499823e-06, "loss": 0.0067, "step": 16840 }, { "epoch": 0.27570972756279144, "grad_norm": 0.11763358861207962, "learning_rate": 9.956412084525392e-06, "loss": 0.0025, "step": 16850 }, { "epoch": 0.2758733535138673, "grad_norm": 0.1640862673521042, "learning_rate": 9.956223749824447e-06, "loss": 0.0071, "step": 16860 }, { "epoch": 0.27603697946494316, "grad_norm": 0.04584338888525963, "learning_rate": 9.956035010910757e-06, "loss": 0.0042, "step": 16870 }, { "epoch": 0.276200605416019, "grad_norm": 0.3774982988834381, "learning_rate": 9.95584586779971e-06, "loss": 0.0057, "step": 16880 }, { "epoch": 0.2763642313670948, "grad_norm": 0.30738601088523865, "learning_rate": 9.955656320506738e-06, "loss": 0.0036, "step": 16890 }, { "epoch": 0.27652785731817064, "grad_norm": 0.2193276435136795, "learning_rate": 9.955466369047297e-06, "loss": 0.0071, "step": 16900 }, { "epoch": 0.27669148326924653, "grad_norm": 0.271788090467453, "learning_rate": 9.955276013436877e-06, "loss": 0.0053, "step": 16910 }, { "epoch": 0.27685510922032236, "grad_norm": 0.2519876956939697, "learning_rate": 9.955085253691006e-06, "loss": 0.0068, "step": 16920 }, { "epoch": 0.2770187351713982, "grad_norm": 0.15955935418605804, "learning_rate": 9.95489408982524e-06, "loss": 0.0032, "step": 16930 }, { "epoch": 0.277182361122474, "grad_norm": 0.1520773321390152, "learning_rate": 9.954702521855171e-06, "loss": 0.0049, "step": 16940 }, { "epoch": 0.27734598707354985, "grad_norm": 0.132028728723526, "learning_rate": 9.954510549796421e-06, "loss": 0.0059, "step": 16950 }, { "epoch": 0.2775096130246257, "grad_norm": 0.14376412332057953, "learning_rate": 9.954318173664648e-06, "loss": 0.0054, "step": 16960 }, { "epoch": 0.27767323897570156, "grad_norm": 0.1658952385187149, "learning_rate": 9.95412539347554e-06, "loss": 0.0047, "step": 16970 }, { "epoch": 0.2778368649267774, "grad_norm": 0.008403602056205273, "learning_rate": 9.95393220924482e-06, "loss": 0.0065, "step": 16980 }, { "epoch": 0.2780004908778532, "grad_norm": 0.20611797273159027, "learning_rate": 9.953738620988244e-06, "loss": 0.0035, "step": 16990 }, { "epoch": 0.27816411682892905, "grad_norm": 0.21646977961063385, "learning_rate": 9.9535446287216e-06, "loss": 0.003, "step": 17000 }, { "epoch": 0.2783277427800049, "grad_norm": 0.09182654321193695, "learning_rate": 9.95335023246071e-06, "loss": 0.0054, "step": 17010 }, { "epoch": 0.27849136873108077, "grad_norm": 0.1209140196442604, "learning_rate": 9.953155432221428e-06, "loss": 0.0095, "step": 17020 }, { "epoch": 0.2786549946821566, "grad_norm": 0.1542261838912964, "learning_rate": 9.95296022801964e-06, "loss": 0.0029, "step": 17030 }, { "epoch": 0.27881862063323243, "grad_norm": 0.008339930325746536, "learning_rate": 9.95276461987127e-06, "loss": 0.0036, "step": 17040 }, { "epoch": 0.27898224658430826, "grad_norm": 0.2207038402557373, "learning_rate": 9.952568607792265e-06, "loss": 0.0047, "step": 17050 }, { "epoch": 0.2791458725353841, "grad_norm": 0.40690934658050537, "learning_rate": 9.952372191798615e-06, "loss": 0.0049, "step": 17060 }, { "epoch": 0.27930949848646, "grad_norm": 0.18785670399665833, "learning_rate": 9.952175371906339e-06, "loss": 0.0037, "step": 17070 }, { "epoch": 0.2794731244375358, "grad_norm": 0.17055024206638336, "learning_rate": 9.951978148131489e-06, "loss": 0.0056, "step": 17080 }, { "epoch": 0.27963675038861163, "grad_norm": 0.054821744561195374, "learning_rate": 9.951780520490149e-06, "loss": 0.0031, "step": 17090 }, { "epoch": 0.27980037633968746, "grad_norm": 0.13429221510887146, "learning_rate": 9.951582488998436e-06, "loss": 0.006, "step": 17100 }, { "epoch": 0.2799640022907633, "grad_norm": 0.033095214515924454, "learning_rate": 9.951384053672504e-06, "loss": 0.0042, "step": 17110 }, { "epoch": 0.2801276282418392, "grad_norm": 0.19814211130142212, "learning_rate": 9.951185214528534e-06, "loss": 0.0071, "step": 17120 }, { "epoch": 0.280291254192915, "grad_norm": 0.09839620441198349, "learning_rate": 9.950985971582743e-06, "loss": 0.0038, "step": 17130 }, { "epoch": 0.28045488014399084, "grad_norm": 0.17991285026073456, "learning_rate": 9.95078632485138e-06, "loss": 0.0039, "step": 17140 }, { "epoch": 0.28061850609506667, "grad_norm": 0.176238551735878, "learning_rate": 9.950586274350728e-06, "loss": 0.0089, "step": 17150 }, { "epoch": 0.2807821320461425, "grad_norm": 0.18992644548416138, "learning_rate": 9.950385820097104e-06, "loss": 0.0052, "step": 17160 }, { "epoch": 0.2809457579972184, "grad_norm": 0.19561715424060822, "learning_rate": 9.950184962106855e-06, "loss": 0.005, "step": 17170 }, { "epoch": 0.2811093839482942, "grad_norm": 0.16559667885303497, "learning_rate": 9.949983700396363e-06, "loss": 0.0049, "step": 17180 }, { "epoch": 0.28127300989937004, "grad_norm": 0.19417065382003784, "learning_rate": 9.949782034982042e-06, "loss": 0.0042, "step": 17190 }, { "epoch": 0.28143663585044587, "grad_norm": 0.33402079343795776, "learning_rate": 9.949579965880338e-06, "loss": 0.0065, "step": 17200 }, { "epoch": 0.2816002618015217, "grad_norm": 0.2246643304824829, "learning_rate": 9.949377493107732e-06, "loss": 0.005, "step": 17210 }, { "epoch": 0.2817638877525976, "grad_norm": 0.17307962477207184, "learning_rate": 9.949174616680736e-06, "loss": 0.0059, "step": 17220 }, { "epoch": 0.2819275137036734, "grad_norm": 0.5333188772201538, "learning_rate": 9.948971336615897e-06, "loss": 0.0039, "step": 17230 }, { "epoch": 0.28209113965474925, "grad_norm": 0.1200670599937439, "learning_rate": 9.948767652929796e-06, "loss": 0.0048, "step": 17240 }, { "epoch": 0.2822547656058251, "grad_norm": 0.23842327296733856, "learning_rate": 9.948563565639041e-06, "loss": 0.0049, "step": 17250 }, { "epoch": 0.2824183915569009, "grad_norm": 0.19713306427001953, "learning_rate": 9.948359074760277e-06, "loss": 0.0044, "step": 17260 }, { "epoch": 0.2825820175079768, "grad_norm": 0.1332864612340927, "learning_rate": 9.948154180310184e-06, "loss": 0.005, "step": 17270 }, { "epoch": 0.2827456434590526, "grad_norm": 0.11713171005249023, "learning_rate": 9.94794888230547e-06, "loss": 0.0046, "step": 17280 }, { "epoch": 0.28290926941012845, "grad_norm": 0.11685027182102203, "learning_rate": 9.947743180762881e-06, "loss": 0.0052, "step": 17290 }, { "epoch": 0.2830728953612043, "grad_norm": 0.07329851388931274, "learning_rate": 9.947537075699193e-06, "loss": 0.0041, "step": 17300 }, { "epoch": 0.2832365213122801, "grad_norm": 0.1772112399339676, "learning_rate": 9.94733056713121e-06, "loss": 0.0032, "step": 17310 }, { "epoch": 0.283400147263356, "grad_norm": 0.1411488801240921, "learning_rate": 9.94712365507578e-06, "loss": 0.0033, "step": 17320 }, { "epoch": 0.2835637732144318, "grad_norm": 0.18904012441635132, "learning_rate": 9.946916339549777e-06, "loss": 0.0043, "step": 17330 }, { "epoch": 0.28372739916550765, "grad_norm": 0.1454576551914215, "learning_rate": 9.946708620570108e-06, "loss": 0.0044, "step": 17340 }, { "epoch": 0.2838910251165835, "grad_norm": 0.14934088289737701, "learning_rate": 9.946500498153712e-06, "loss": 0.003, "step": 17350 }, { "epoch": 0.2840546510676593, "grad_norm": 0.3014727532863617, "learning_rate": 9.946291972317567e-06, "loss": 0.004, "step": 17360 }, { "epoch": 0.28421827701873514, "grad_norm": 0.0963466539978981, "learning_rate": 9.946083043078677e-06, "loss": 0.0035, "step": 17370 }, { "epoch": 0.28438190296981103, "grad_norm": 0.259141743183136, "learning_rate": 9.945873710454084e-06, "loss": 0.0033, "step": 17380 }, { "epoch": 0.28454552892088686, "grad_norm": 0.06163093075156212, "learning_rate": 9.945663974460856e-06, "loss": 0.0052, "step": 17390 }, { "epoch": 0.2847091548719627, "grad_norm": 0.12710176408290863, "learning_rate": 9.945453835116101e-06, "loss": 0.0048, "step": 17400 }, { "epoch": 0.2848727808230385, "grad_norm": 0.39482325315475464, "learning_rate": 9.945243292436958e-06, "loss": 0.0034, "step": 17410 }, { "epoch": 0.28503640677411435, "grad_norm": 0.10586988180875778, "learning_rate": 9.945032346440597e-06, "loss": 0.0049, "step": 17420 }, { "epoch": 0.28520003272519023, "grad_norm": 0.2811430096626282, "learning_rate": 9.944820997144222e-06, "loss": 0.0051, "step": 17430 }, { "epoch": 0.28536365867626606, "grad_norm": 0.23797577619552612, "learning_rate": 9.94460924456507e-06, "loss": 0.0042, "step": 17440 }, { "epoch": 0.2855272846273419, "grad_norm": 0.21866446733474731, "learning_rate": 9.944397088720412e-06, "loss": 0.0063, "step": 17450 }, { "epoch": 0.2856909105784177, "grad_norm": 0.21976706385612488, "learning_rate": 9.944184529627549e-06, "loss": 0.006, "step": 17460 }, { "epoch": 0.28585453652949355, "grad_norm": 0.1605653464794159, "learning_rate": 9.943971567303815e-06, "loss": 0.0045, "step": 17470 }, { "epoch": 0.28601816248056944, "grad_norm": 0.11667539924383163, "learning_rate": 9.943758201766585e-06, "loss": 0.0025, "step": 17480 }, { "epoch": 0.28618178843164527, "grad_norm": 0.213748961687088, "learning_rate": 9.943544433033254e-06, "loss": 0.0037, "step": 17490 }, { "epoch": 0.2863454143827211, "grad_norm": 0.067848339676857, "learning_rate": 9.94333026112126e-06, "loss": 0.0042, "step": 17500 }, { "epoch": 0.2865090403337969, "grad_norm": 0.12439949065446854, "learning_rate": 9.943115686048067e-06, "loss": 0.0026, "step": 17510 }, { "epoch": 0.28667266628487276, "grad_norm": 0.37913748621940613, "learning_rate": 9.942900707831178e-06, "loss": 0.0081, "step": 17520 }, { "epoch": 0.28683629223594864, "grad_norm": 0.23941577970981598, "learning_rate": 9.942685326488122e-06, "loss": 0.0036, "step": 17530 }, { "epoch": 0.28699991818702447, "grad_norm": 0.14176420867443085, "learning_rate": 9.942469542036468e-06, "loss": 0.003, "step": 17540 }, { "epoch": 0.2871635441381003, "grad_norm": 0.11878236383199692, "learning_rate": 9.942253354493816e-06, "loss": 0.0047, "step": 17550 }, { "epoch": 0.28732717008917613, "grad_norm": 0.24840830266475677, "learning_rate": 9.942036763877794e-06, "loss": 0.0073, "step": 17560 }, { "epoch": 0.28749079604025196, "grad_norm": 0.2546611428260803, "learning_rate": 9.941819770206067e-06, "loss": 0.0048, "step": 17570 }, { "epoch": 0.28765442199132785, "grad_norm": 0.09517211467027664, "learning_rate": 9.941602373496334e-06, "loss": 0.0058, "step": 17580 }, { "epoch": 0.2878180479424037, "grad_norm": 0.20494259893894196, "learning_rate": 9.941384573766324e-06, "loss": 0.0042, "step": 17590 }, { "epoch": 0.2879816738934795, "grad_norm": 0.5144269466400146, "learning_rate": 9.9411663710338e-06, "loss": 0.0092, "step": 17600 }, { "epoch": 0.28814529984455534, "grad_norm": 0.6665263772010803, "learning_rate": 9.940947765316559e-06, "loss": 0.0042, "step": 17610 }, { "epoch": 0.28830892579563117, "grad_norm": 0.14284846186637878, "learning_rate": 9.940728756632427e-06, "loss": 0.0029, "step": 17620 }, { "epoch": 0.28847255174670705, "grad_norm": 0.23586921393871307, "learning_rate": 9.94050934499927e-06, "loss": 0.0092, "step": 17630 }, { "epoch": 0.2886361776977829, "grad_norm": 0.23336808383464813, "learning_rate": 9.940289530434978e-06, "loss": 0.0047, "step": 17640 }, { "epoch": 0.2887998036488587, "grad_norm": 0.2163964956998825, "learning_rate": 9.940069312957481e-06, "loss": 0.004, "step": 17650 }, { "epoch": 0.28896342959993454, "grad_norm": 0.15693232417106628, "learning_rate": 9.939848692584737e-06, "loss": 0.0047, "step": 17660 }, { "epoch": 0.28912705555101037, "grad_norm": 0.1399923861026764, "learning_rate": 9.939627669334741e-06, "loss": 0.0088, "step": 17670 }, { "epoch": 0.28929068150208626, "grad_norm": 0.23836109042167664, "learning_rate": 9.93940624322552e-06, "loss": 0.0045, "step": 17680 }, { "epoch": 0.2894543074531621, "grad_norm": 0.050740379840135574, "learning_rate": 9.93918441427513e-06, "loss": 0.0039, "step": 17690 }, { "epoch": 0.2896179334042379, "grad_norm": 0.05332972854375839, "learning_rate": 9.938962182501663e-06, "loss": 0.004, "step": 17700 }, { "epoch": 0.28978155935531374, "grad_norm": 0.1492157131433487, "learning_rate": 9.938739547923245e-06, "loss": 0.0039, "step": 17710 }, { "epoch": 0.2899451853063896, "grad_norm": 0.31322428584098816, "learning_rate": 9.938516510558034e-06, "loss": 0.0039, "step": 17720 }, { "epoch": 0.29010881125746546, "grad_norm": 0.1721370667219162, "learning_rate": 9.938293070424217e-06, "loss": 0.0054, "step": 17730 }, { "epoch": 0.2902724372085413, "grad_norm": 0.30744728446006775, "learning_rate": 9.938069227540017e-06, "loss": 0.006, "step": 17740 }, { "epoch": 0.2904360631596171, "grad_norm": 0.05862875282764435, "learning_rate": 9.937844981923695e-06, "loss": 0.005, "step": 17750 }, { "epoch": 0.29059968911069295, "grad_norm": 0.1096782386302948, "learning_rate": 9.937620333593534e-06, "loss": 0.0038, "step": 17760 }, { "epoch": 0.2907633150617688, "grad_norm": 0.11893852800130844, "learning_rate": 9.937395282567859e-06, "loss": 0.005, "step": 17770 }, { "epoch": 0.29092694101284466, "grad_norm": 0.09523512423038483, "learning_rate": 9.937169828865023e-06, "loss": 0.0039, "step": 17780 }, { "epoch": 0.2910905669639205, "grad_norm": 0.0739772841334343, "learning_rate": 9.936943972503412e-06, "loss": 0.0051, "step": 17790 }, { "epoch": 0.2912541929149963, "grad_norm": 0.24942955374717712, "learning_rate": 9.93671771350145e-06, "loss": 0.0031, "step": 17800 }, { "epoch": 0.29141781886607215, "grad_norm": 0.09600315243005753, "learning_rate": 9.936491051877584e-06, "loss": 0.0037, "step": 17810 }, { "epoch": 0.291581444817148, "grad_norm": 0.12371202558279037, "learning_rate": 9.936263987650305e-06, "loss": 0.0074, "step": 17820 }, { "epoch": 0.2917450707682238, "grad_norm": 0.047643277794122696, "learning_rate": 9.936036520838131e-06, "loss": 0.003, "step": 17830 }, { "epoch": 0.2919086967192997, "grad_norm": 0.06473499536514282, "learning_rate": 9.93580865145961e-06, "loss": 0.0024, "step": 17840 }, { "epoch": 0.29207232267037553, "grad_norm": 0.07858459651470184, "learning_rate": 9.935580379533331e-06, "loss": 0.0036, "step": 17850 }, { "epoch": 0.29223594862145136, "grad_norm": 0.15511786937713623, "learning_rate": 9.935351705077907e-06, "loss": 0.0058, "step": 17860 }, { "epoch": 0.2923995745725272, "grad_norm": 0.13278204202651978, "learning_rate": 9.93512262811199e-06, "loss": 0.0034, "step": 17870 }, { "epoch": 0.292563200523603, "grad_norm": 0.3024803698062897, "learning_rate": 9.934893148654263e-06, "loss": 0.0034, "step": 17880 }, { "epoch": 0.2927268264746789, "grad_norm": 0.06911783665418625, "learning_rate": 9.934663266723438e-06, "loss": 0.0047, "step": 17890 }, { "epoch": 0.29289045242575473, "grad_norm": 0.21858294308185577, "learning_rate": 9.934432982338268e-06, "loss": 0.0042, "step": 17900 }, { "epoch": 0.29305407837683056, "grad_norm": 0.29643192887306213, "learning_rate": 9.934202295517533e-06, "loss": 0.0072, "step": 17910 }, { "epoch": 0.2932177043279064, "grad_norm": 0.21975645422935486, "learning_rate": 9.933971206280047e-06, "loss": 0.0039, "step": 17920 }, { "epoch": 0.2933813302789822, "grad_norm": 0.1387801170349121, "learning_rate": 9.933739714644653e-06, "loss": 0.0033, "step": 17930 }, { "epoch": 0.2935449562300581, "grad_norm": 0.25593283772468567, "learning_rate": 9.933507820630237e-06, "loss": 0.0051, "step": 17940 }, { "epoch": 0.29370858218113394, "grad_norm": 0.17318041622638702, "learning_rate": 9.933275524255707e-06, "loss": 0.0048, "step": 17950 }, { "epoch": 0.29387220813220977, "grad_norm": 0.08971473574638367, "learning_rate": 9.93304282554001e-06, "loss": 0.0026, "step": 17960 }, { "epoch": 0.2940358340832856, "grad_norm": 0.00987961608916521, "learning_rate": 9.932809724502124e-06, "loss": 0.0068, "step": 17970 }, { "epoch": 0.2941994600343614, "grad_norm": 0.28710561990737915, "learning_rate": 9.93257622116106e-06, "loss": 0.0052, "step": 17980 }, { "epoch": 0.2943630859854373, "grad_norm": 0.1490069329738617, "learning_rate": 9.93234231553586e-06, "loss": 0.006, "step": 17990 }, { "epoch": 0.29452671193651314, "grad_norm": 0.11392813920974731, "learning_rate": 9.932108007645602e-06, "loss": 0.0037, "step": 18000 }, { "epoch": 0.29469033788758897, "grad_norm": 0.1317816823720932, "learning_rate": 9.931873297509396e-06, "loss": 0.0035, "step": 18010 }, { "epoch": 0.2948539638386648, "grad_norm": 0.16625213623046875, "learning_rate": 9.931638185146383e-06, "loss": 0.005, "step": 18020 }, { "epoch": 0.29501758978974063, "grad_norm": 0.2743920385837555, "learning_rate": 9.93140267057574e-06, "loss": 0.0074, "step": 18030 }, { "epoch": 0.2951812157408165, "grad_norm": 0.1882653832435608, "learning_rate": 9.931166753816673e-06, "loss": 0.0033, "step": 18040 }, { "epoch": 0.29534484169189235, "grad_norm": 0.2246755063533783, "learning_rate": 9.930930434888422e-06, "loss": 0.0032, "step": 18050 }, { "epoch": 0.2955084676429682, "grad_norm": 0.10592489689588547, "learning_rate": 9.930693713810262e-06, "loss": 0.0063, "step": 18060 }, { "epoch": 0.295672093594044, "grad_norm": 0.007365428376942873, "learning_rate": 9.930456590601499e-06, "loss": 0.0089, "step": 18070 }, { "epoch": 0.29583571954511984, "grad_norm": 0.09267830103635788, "learning_rate": 9.93021906528147e-06, "loss": 0.0044, "step": 18080 }, { "epoch": 0.2959993454961957, "grad_norm": 0.23627789318561554, "learning_rate": 9.929981137869548e-06, "loss": 0.0058, "step": 18090 }, { "epoch": 0.29616297144727155, "grad_norm": 0.851901113986969, "learning_rate": 9.929742808385139e-06, "loss": 0.0068, "step": 18100 }, { "epoch": 0.2963265973983474, "grad_norm": 0.2420031875371933, "learning_rate": 9.929504076847677e-06, "loss": 0.0063, "step": 18110 }, { "epoch": 0.2964902233494232, "grad_norm": 0.22772027552127838, "learning_rate": 9.929264943276635e-06, "loss": 0.0067, "step": 18120 }, { "epoch": 0.29665384930049904, "grad_norm": 0.15481628477573395, "learning_rate": 9.929025407691516e-06, "loss": 0.004, "step": 18130 }, { "epoch": 0.2968174752515749, "grad_norm": 0.10913346707820892, "learning_rate": 9.928785470111852e-06, "loss": 0.0057, "step": 18140 }, { "epoch": 0.29698110120265075, "grad_norm": 0.3253653049468994, "learning_rate": 9.928545130557216e-06, "loss": 0.0058, "step": 18150 }, { "epoch": 0.2971447271537266, "grad_norm": 0.16288554668426514, "learning_rate": 9.928304389047209e-06, "loss": 0.0066, "step": 18160 }, { "epoch": 0.2973083531048024, "grad_norm": 0.2058979570865631, "learning_rate": 9.928063245601463e-06, "loss": 0.0049, "step": 18170 }, { "epoch": 0.29747197905587824, "grad_norm": 0.17294132709503174, "learning_rate": 9.927821700239643e-06, "loss": 0.0052, "step": 18180 }, { "epoch": 0.29763560500695413, "grad_norm": 0.13950498402118683, "learning_rate": 9.927579752981454e-06, "loss": 0.0065, "step": 18190 }, { "epoch": 0.29779923095802996, "grad_norm": 0.249586284160614, "learning_rate": 9.927337403846622e-06, "loss": 0.0046, "step": 18200 }, { "epoch": 0.2979628569091058, "grad_norm": 0.0983256921172142, "learning_rate": 9.927094652854919e-06, "loss": 0.0037, "step": 18210 }, { "epoch": 0.2981264828601816, "grad_norm": 0.18704041838645935, "learning_rate": 9.926851500026138e-06, "loss": 0.0034, "step": 18220 }, { "epoch": 0.29829010881125745, "grad_norm": 0.28302499651908875, "learning_rate": 9.926607945380111e-06, "loss": 0.0053, "step": 18230 }, { "epoch": 0.29845373476233333, "grad_norm": 0.1998574584722519, "learning_rate": 9.926363988936703e-06, "loss": 0.0046, "step": 18240 }, { "epoch": 0.29861736071340916, "grad_norm": 0.08697161078453064, "learning_rate": 9.926119630715808e-06, "loss": 0.0028, "step": 18250 }, { "epoch": 0.298780986664485, "grad_norm": 0.11392093449831009, "learning_rate": 9.925874870737356e-06, "loss": 0.0045, "step": 18260 }, { "epoch": 0.2989446126155608, "grad_norm": 0.243545800447464, "learning_rate": 9.92562970902131e-06, "loss": 0.0026, "step": 18270 }, { "epoch": 0.29910823856663665, "grad_norm": 0.19214607775211334, "learning_rate": 9.925384145587662e-06, "loss": 0.0037, "step": 18280 }, { "epoch": 0.2992718645177125, "grad_norm": 0.2569046914577484, "learning_rate": 9.92513818045644e-06, "loss": 0.0045, "step": 18290 }, { "epoch": 0.29943549046878837, "grad_norm": 0.2964117228984833, "learning_rate": 9.924891813647707e-06, "loss": 0.0046, "step": 18300 }, { "epoch": 0.2995991164198642, "grad_norm": 0.06764807552099228, "learning_rate": 9.924645045181552e-06, "loss": 0.0045, "step": 18310 }, { "epoch": 0.29976274237094, "grad_norm": 0.565817654132843, "learning_rate": 9.924397875078103e-06, "loss": 0.0055, "step": 18320 }, { "epoch": 0.29992636832201586, "grad_norm": 0.33004099130630493, "learning_rate": 9.924150303357517e-06, "loss": 0.0046, "step": 18330 }, { "epoch": 0.3000899942730917, "grad_norm": 0.12197458744049072, "learning_rate": 9.923902330039986e-06, "loss": 0.0059, "step": 18340 }, { "epoch": 0.30025362022416757, "grad_norm": 0.3221333920955658, "learning_rate": 9.923653955145733e-06, "loss": 0.0036, "step": 18350 }, { "epoch": 0.3004172461752434, "grad_norm": 0.15347731113433838, "learning_rate": 9.923405178695016e-06, "loss": 0.0052, "step": 18360 }, { "epoch": 0.30058087212631923, "grad_norm": 0.08454122394323349, "learning_rate": 9.923156000708123e-06, "loss": 0.0044, "step": 18370 }, { "epoch": 0.30074449807739506, "grad_norm": 0.11487319320440292, "learning_rate": 9.922906421205376e-06, "loss": 0.004, "step": 18380 }, { "epoch": 0.3009081240284709, "grad_norm": 0.18744653463363647, "learning_rate": 9.922656440207133e-06, "loss": 0.0041, "step": 18390 }, { "epoch": 0.3010717499795468, "grad_norm": 0.18605977296829224, "learning_rate": 9.922406057733776e-06, "loss": 0.0032, "step": 18400 }, { "epoch": 0.3012353759306226, "grad_norm": 0.0707220807671547, "learning_rate": 9.92215527380573e-06, "loss": 0.0035, "step": 18410 }, { "epoch": 0.30139900188169844, "grad_norm": 0.05524848401546478, "learning_rate": 9.921904088443447e-06, "loss": 0.0045, "step": 18420 }, { "epoch": 0.30156262783277427, "grad_norm": 0.053524669259786606, "learning_rate": 9.92165250166741e-06, "loss": 0.0074, "step": 18430 }, { "epoch": 0.3017262537838501, "grad_norm": 0.08276185393333435, "learning_rate": 9.92140051349814e-06, "loss": 0.005, "step": 18440 }, { "epoch": 0.301889879734926, "grad_norm": 0.09610067307949066, "learning_rate": 9.921148123956191e-06, "loss": 0.0036, "step": 18450 }, { "epoch": 0.3020535056860018, "grad_norm": 0.20032677054405212, "learning_rate": 9.920895333062142e-06, "loss": 0.0043, "step": 18460 }, { "epoch": 0.30221713163707764, "grad_norm": 0.14135132730007172, "learning_rate": 9.920642140836613e-06, "loss": 0.0054, "step": 18470 }, { "epoch": 0.30238075758815347, "grad_norm": 0.11000286042690277, "learning_rate": 9.920388547300252e-06, "loss": 0.0035, "step": 18480 }, { "epoch": 0.3025443835392293, "grad_norm": 0.3235343098640442, "learning_rate": 9.920134552473741e-06, "loss": 0.0052, "step": 18490 }, { "epoch": 0.3027080094903052, "grad_norm": 0.12839002907276154, "learning_rate": 9.919880156377796e-06, "loss": 0.0069, "step": 18500 }, { "epoch": 0.302871635441381, "grad_norm": 0.125684916973114, "learning_rate": 9.919625359033166e-06, "loss": 0.0038, "step": 18510 }, { "epoch": 0.30303526139245685, "grad_norm": 0.2214241772890091, "learning_rate": 9.919370160460629e-06, "loss": 0.0056, "step": 18520 }, { "epoch": 0.3031988873435327, "grad_norm": 0.1472001075744629, "learning_rate": 9.919114560680997e-06, "loss": 0.0032, "step": 18530 }, { "epoch": 0.3033625132946085, "grad_norm": 0.24738214910030365, "learning_rate": 9.91885855971512e-06, "loss": 0.0036, "step": 18540 }, { "epoch": 0.3035261392456844, "grad_norm": 0.07262137532234192, "learning_rate": 9.918602157583874e-06, "loss": 0.0056, "step": 18550 }, { "epoch": 0.3036897651967602, "grad_norm": 0.2765899896621704, "learning_rate": 9.918345354308169e-06, "loss": 0.0043, "step": 18560 }, { "epoch": 0.30385339114783605, "grad_norm": 0.1799459308385849, "learning_rate": 9.918088149908951e-06, "loss": 0.0068, "step": 18570 }, { "epoch": 0.3040170170989119, "grad_norm": 0.07286353409290314, "learning_rate": 9.917830544407197e-06, "loss": 0.0036, "step": 18580 }, { "epoch": 0.3041806430499877, "grad_norm": 0.1088128387928009, "learning_rate": 9.917572537823915e-06, "loss": 0.003, "step": 18590 }, { "epoch": 0.3043442690010636, "grad_norm": 0.08138229697942734, "learning_rate": 9.917314130180149e-06, "loss": 0.0044, "step": 18600 }, { "epoch": 0.3045078949521394, "grad_norm": 0.29889506101608276, "learning_rate": 9.917055321496972e-06, "loss": 0.0044, "step": 18610 }, { "epoch": 0.30467152090321525, "grad_norm": 0.1717907041311264, "learning_rate": 9.916796111795491e-06, "loss": 0.0051, "step": 18620 }, { "epoch": 0.3048351468542911, "grad_norm": 0.1286926567554474, "learning_rate": 9.916536501096849e-06, "loss": 0.0058, "step": 18630 }, { "epoch": 0.3049987728053669, "grad_norm": 0.1292576640844345, "learning_rate": 9.916276489422218e-06, "loss": 0.0063, "step": 18640 }, { "epoch": 0.3051623987564428, "grad_norm": 0.031184492632746696, "learning_rate": 9.916016076792802e-06, "loss": 0.0045, "step": 18650 }, { "epoch": 0.30532602470751863, "grad_norm": 0.04804946482181549, "learning_rate": 9.91575526322984e-06, "loss": 0.0057, "step": 18660 }, { "epoch": 0.30548965065859446, "grad_norm": 0.17744258046150208, "learning_rate": 9.915494048754605e-06, "loss": 0.0036, "step": 18670 }, { "epoch": 0.3056532766096703, "grad_norm": 0.0896824449300766, "learning_rate": 9.915232433388397e-06, "loss": 0.0027, "step": 18680 }, { "epoch": 0.3058169025607461, "grad_norm": 0.15027998387813568, "learning_rate": 9.914970417152558e-06, "loss": 0.0044, "step": 18690 }, { "epoch": 0.30598052851182195, "grad_norm": 0.05921371653676033, "learning_rate": 9.914708000068452e-06, "loss": 0.0068, "step": 18700 }, { "epoch": 0.30614415446289783, "grad_norm": 0.09498249739408493, "learning_rate": 9.914445182157484e-06, "loss": 0.0038, "step": 18710 }, { "epoch": 0.30630778041397366, "grad_norm": 0.13720545172691345, "learning_rate": 9.914181963441087e-06, "loss": 0.0036, "step": 18720 }, { "epoch": 0.3064714063650495, "grad_norm": 0.037773698568344116, "learning_rate": 9.913918343940728e-06, "loss": 0.0061, "step": 18730 }, { "epoch": 0.3066350323161253, "grad_norm": 0.037387363612651825, "learning_rate": 9.913654323677907e-06, "loss": 0.0038, "step": 18740 }, { "epoch": 0.30679865826720115, "grad_norm": 0.1995997279882431, "learning_rate": 9.913389902674158e-06, "loss": 0.004, "step": 18750 }, { "epoch": 0.30696228421827704, "grad_norm": 0.07831252366304398, "learning_rate": 9.913125080951046e-06, "loss": 0.0043, "step": 18760 }, { "epoch": 0.30712591016935287, "grad_norm": 0.12026140838861465, "learning_rate": 9.912859858530167e-06, "loss": 0.0036, "step": 18770 }, { "epoch": 0.3072895361204287, "grad_norm": 0.18794669210910797, "learning_rate": 9.912594235433152e-06, "loss": 0.0042, "step": 18780 }, { "epoch": 0.3074531620715045, "grad_norm": 0.3090685307979584, "learning_rate": 9.912328211681667e-06, "loss": 0.0051, "step": 18790 }, { "epoch": 0.30761678802258036, "grad_norm": 0.195156529545784, "learning_rate": 9.912061787297405e-06, "loss": 0.0026, "step": 18800 }, { "epoch": 0.30778041397365624, "grad_norm": 0.1345290243625641, "learning_rate": 9.911794962302098e-06, "loss": 0.007, "step": 18810 }, { "epoch": 0.30794403992473207, "grad_norm": 0.1819356381893158, "learning_rate": 9.911527736717503e-06, "loss": 0.0027, "step": 18820 }, { "epoch": 0.3081076658758079, "grad_norm": 0.10387175530195236, "learning_rate": 9.911260110565416e-06, "loss": 0.0039, "step": 18830 }, { "epoch": 0.30827129182688373, "grad_norm": 0.09819977730512619, "learning_rate": 9.910992083867665e-06, "loss": 0.0037, "step": 18840 }, { "epoch": 0.30843491777795956, "grad_norm": 0.091976098716259, "learning_rate": 9.910723656646108e-06, "loss": 0.0038, "step": 18850 }, { "epoch": 0.30859854372903545, "grad_norm": 0.24055665731430054, "learning_rate": 9.910454828922638e-06, "loss": 0.0037, "step": 18860 }, { "epoch": 0.3087621696801113, "grad_norm": 0.1224713996052742, "learning_rate": 9.910185600719179e-06, "loss": 0.006, "step": 18870 }, { "epoch": 0.3089257956311871, "grad_norm": 0.002193062799051404, "learning_rate": 9.909915972057688e-06, "loss": 0.0019, "step": 18880 }, { "epoch": 0.30908942158226294, "grad_norm": 0.41805511713027954, "learning_rate": 9.909645942960156e-06, "loss": 0.0063, "step": 18890 }, { "epoch": 0.30925304753333877, "grad_norm": 0.12885433435440063, "learning_rate": 9.909375513448603e-06, "loss": 0.0059, "step": 18900 }, { "epoch": 0.30941667348441465, "grad_norm": 0.12093693763017654, "learning_rate": 9.909104683545088e-06, "loss": 0.0044, "step": 18910 }, { "epoch": 0.3095802994354905, "grad_norm": 0.15998868644237518, "learning_rate": 9.908833453271695e-06, "loss": 0.003, "step": 18920 }, { "epoch": 0.3097439253865663, "grad_norm": 0.06271903216838837, "learning_rate": 9.90856182265055e-06, "loss": 0.0028, "step": 18930 }, { "epoch": 0.30990755133764214, "grad_norm": 0.029513388872146606, "learning_rate": 9.908289791703801e-06, "loss": 0.0046, "step": 18940 }, { "epoch": 0.31007117728871797, "grad_norm": 0.1227802038192749, "learning_rate": 9.908017360453636e-06, "loss": 0.0042, "step": 18950 }, { "epoch": 0.31023480323979385, "grad_norm": 0.28032901883125305, "learning_rate": 9.907744528922274e-06, "loss": 0.0055, "step": 18960 }, { "epoch": 0.3103984291908697, "grad_norm": 0.1124168261885643, "learning_rate": 9.907471297131967e-06, "loss": 0.0041, "step": 18970 }, { "epoch": 0.3105620551419455, "grad_norm": 0.07405764609575272, "learning_rate": 9.907197665104997e-06, "loss": 0.0045, "step": 18980 }, { "epoch": 0.31072568109302134, "grad_norm": 0.08645839989185333, "learning_rate": 9.906923632863682e-06, "loss": 0.005, "step": 18990 }, { "epoch": 0.3108893070440972, "grad_norm": 0.09636373817920685, "learning_rate": 9.906649200430367e-06, "loss": 0.0037, "step": 19000 }, { "epoch": 0.31105293299517306, "grad_norm": 0.03481016680598259, "learning_rate": 9.90637436782744e-06, "loss": 0.004, "step": 19010 }, { "epoch": 0.3112165589462489, "grad_norm": 0.3704533576965332, "learning_rate": 9.906099135077312e-06, "loss": 0.0051, "step": 19020 }, { "epoch": 0.3113801848973247, "grad_norm": 0.36298590898513794, "learning_rate": 9.90582350220243e-06, "loss": 0.0069, "step": 19030 }, { "epoch": 0.31154381084840055, "grad_norm": 0.17100781202316284, "learning_rate": 9.905547469225274e-06, "loss": 0.0037, "step": 19040 }, { "epoch": 0.3117074367994764, "grad_norm": 0.19253915548324585, "learning_rate": 9.905271036168357e-06, "loss": 0.0044, "step": 19050 }, { "epoch": 0.31187106275055226, "grad_norm": 0.1165117397904396, "learning_rate": 9.904994203054224e-06, "loss": 0.0037, "step": 19060 }, { "epoch": 0.3120346887016281, "grad_norm": 0.11860129982233047, "learning_rate": 9.90471696990545e-06, "loss": 0.0055, "step": 19070 }, { "epoch": 0.3121983146527039, "grad_norm": 0.09898581355810165, "learning_rate": 9.90443933674465e-06, "loss": 0.007, "step": 19080 }, { "epoch": 0.31236194060377975, "grad_norm": 0.0958847850561142, "learning_rate": 9.904161303594461e-06, "loss": 0.0036, "step": 19090 }, { "epoch": 0.3125255665548556, "grad_norm": 0.1846855878829956, "learning_rate": 9.903882870477563e-06, "loss": 0.0029, "step": 19100 }, { "epoch": 0.31268919250593147, "grad_norm": 0.4415309429168701, "learning_rate": 9.903604037416664e-06, "loss": 0.0037, "step": 19110 }, { "epoch": 0.3128528184570073, "grad_norm": 0.19907858967781067, "learning_rate": 9.903324804434503e-06, "loss": 0.0052, "step": 19120 }, { "epoch": 0.3130164444080831, "grad_norm": 0.22549308836460114, "learning_rate": 9.903045171553851e-06, "loss": 0.0041, "step": 19130 }, { "epoch": 0.31318007035915896, "grad_norm": 0.08515822887420654, "learning_rate": 9.90276513879752e-06, "loss": 0.003, "step": 19140 }, { "epoch": 0.3133436963102348, "grad_norm": 0.08104472607374191, "learning_rate": 9.902484706188341e-06, "loss": 0.0043, "step": 19150 }, { "epoch": 0.3135073222613106, "grad_norm": 0.33319786190986633, "learning_rate": 9.90220387374919e-06, "loss": 0.004, "step": 19160 }, { "epoch": 0.3136709482123865, "grad_norm": 0.31271547079086304, "learning_rate": 9.901922641502972e-06, "loss": 0.0051, "step": 19170 }, { "epoch": 0.31383457416346233, "grad_norm": 0.14193718135356903, "learning_rate": 9.90164100947262e-06, "loss": 0.0046, "step": 19180 }, { "epoch": 0.31399820011453816, "grad_norm": 0.332012414932251, "learning_rate": 9.901358977681103e-06, "loss": 0.0052, "step": 19190 }, { "epoch": 0.314161826065614, "grad_norm": 0.17114713788032532, "learning_rate": 9.901076546151425e-06, "loss": 0.0048, "step": 19200 }, { "epoch": 0.3143254520166898, "grad_norm": 0.1274709552526474, "learning_rate": 9.900793714906618e-06, "loss": 0.0042, "step": 19210 }, { "epoch": 0.3144890779677657, "grad_norm": 0.28423044085502625, "learning_rate": 9.900510483969749e-06, "loss": 0.007, "step": 19220 }, { "epoch": 0.31465270391884154, "grad_norm": 0.06696786731481552, "learning_rate": 9.900226853363919e-06, "loss": 0.0037, "step": 19230 }, { "epoch": 0.31481632986991737, "grad_norm": 0.12529677152633667, "learning_rate": 9.899942823112259e-06, "loss": 0.0038, "step": 19240 }, { "epoch": 0.3149799558209932, "grad_norm": 0.1957942396402359, "learning_rate": 9.899658393237934e-06, "loss": 0.0053, "step": 19250 }, { "epoch": 0.315143581772069, "grad_norm": 0.10554284602403641, "learning_rate": 9.899373563764138e-06, "loss": 0.0043, "step": 19260 }, { "epoch": 0.3153072077231449, "grad_norm": 0.11743362993001938, "learning_rate": 9.899088334714106e-06, "loss": 0.0039, "step": 19270 }, { "epoch": 0.31547083367422074, "grad_norm": 0.09323113411664963, "learning_rate": 9.898802706111095e-06, "loss": 0.0044, "step": 19280 }, { "epoch": 0.31563445962529657, "grad_norm": 0.23355872929096222, "learning_rate": 9.898516677978404e-06, "loss": 0.0036, "step": 19290 }, { "epoch": 0.3157980855763724, "grad_norm": 0.055486563593149185, "learning_rate": 9.89823025033936e-06, "loss": 0.0046, "step": 19300 }, { "epoch": 0.31596171152744823, "grad_norm": 0.2832466661930084, "learning_rate": 9.89794342321732e-06, "loss": 0.0061, "step": 19310 }, { "epoch": 0.3161253374785241, "grad_norm": 0.15053504705429077, "learning_rate": 9.897656196635678e-06, "loss": 0.0028, "step": 19320 }, { "epoch": 0.31628896342959995, "grad_norm": 0.19460567831993103, "learning_rate": 9.897368570617862e-06, "loss": 0.0035, "step": 19330 }, { "epoch": 0.3164525893806758, "grad_norm": 0.13320541381835938, "learning_rate": 9.897080545187328e-06, "loss": 0.0044, "step": 19340 }, { "epoch": 0.3166162153317516, "grad_norm": 0.1881943941116333, "learning_rate": 9.896792120367564e-06, "loss": 0.0048, "step": 19350 }, { "epoch": 0.31677984128282743, "grad_norm": 0.11229316145181656, "learning_rate": 9.896503296182096e-06, "loss": 0.0035, "step": 19360 }, { "epoch": 0.3169434672339033, "grad_norm": 0.07651843130588531, "learning_rate": 9.896214072654478e-06, "loss": 0.0045, "step": 19370 }, { "epoch": 0.31710709318497915, "grad_norm": 0.46119266748428345, "learning_rate": 9.8959244498083e-06, "loss": 0.0064, "step": 19380 }, { "epoch": 0.317270719136055, "grad_norm": 0.3152843117713928, "learning_rate": 9.89563442766718e-06, "loss": 0.0051, "step": 19390 }, { "epoch": 0.3174343450871308, "grad_norm": 0.164754256606102, "learning_rate": 9.895344006254773e-06, "loss": 0.0042, "step": 19400 }, { "epoch": 0.31759797103820664, "grad_norm": 0.23276254534721375, "learning_rate": 9.895053185594762e-06, "loss": 0.0032, "step": 19410 }, { "epoch": 0.3177615969892825, "grad_norm": 0.31304922699928284, "learning_rate": 9.894761965710871e-06, "loss": 0.0049, "step": 19420 }, { "epoch": 0.31792522294035835, "grad_norm": 0.06616653501987457, "learning_rate": 9.894470346626846e-06, "loss": 0.0069, "step": 19430 }, { "epoch": 0.3180888488914342, "grad_norm": 0.15861405432224274, "learning_rate": 9.894178328366473e-06, "loss": 0.0048, "step": 19440 }, { "epoch": 0.31825247484251, "grad_norm": 0.24078448116779327, "learning_rate": 9.893885910953564e-06, "loss": 0.0047, "step": 19450 }, { "epoch": 0.31841610079358584, "grad_norm": 0.05956120043992996, "learning_rate": 9.893593094411973e-06, "loss": 0.0034, "step": 19460 }, { "epoch": 0.31857972674466173, "grad_norm": 0.11779209226369858, "learning_rate": 9.89329987876558e-06, "loss": 0.0048, "step": 19470 }, { "epoch": 0.31874335269573756, "grad_norm": 0.3354147970676422, "learning_rate": 9.893006264038294e-06, "loss": 0.0028, "step": 19480 }, { "epoch": 0.3189069786468134, "grad_norm": 0.09096290171146393, "learning_rate": 9.892712250254067e-06, "loss": 0.0056, "step": 19490 }, { "epoch": 0.3190706045978892, "grad_norm": 0.20709989964962006, "learning_rate": 9.892417837436874e-06, "loss": 0.005, "step": 19500 }, { "epoch": 0.31923423054896505, "grad_norm": 0.11091521382331848, "learning_rate": 9.892123025610728e-06, "loss": 0.0048, "step": 19510 }, { "epoch": 0.31939785650004093, "grad_norm": 0.31609076261520386, "learning_rate": 9.891827814799672e-06, "loss": 0.0028, "step": 19520 }, { "epoch": 0.31956148245111676, "grad_norm": 0.10027654469013214, "learning_rate": 9.891532205027783e-06, "loss": 0.0042, "step": 19530 }, { "epoch": 0.3197251084021926, "grad_norm": 0.36275723576545715, "learning_rate": 9.891236196319172e-06, "loss": 0.0037, "step": 19540 }, { "epoch": 0.3198887343532684, "grad_norm": 0.17737677693367004, "learning_rate": 9.890939788697975e-06, "loss": 0.0031, "step": 19550 }, { "epoch": 0.32005236030434425, "grad_norm": 0.15842317044734955, "learning_rate": 9.890642982188372e-06, "loss": 0.0026, "step": 19560 }, { "epoch": 0.3202159862554201, "grad_norm": 0.04089176282286644, "learning_rate": 9.890345776814565e-06, "loss": 0.0036, "step": 19570 }, { "epoch": 0.32037961220649597, "grad_norm": 0.2341189682483673, "learning_rate": 9.890048172600795e-06, "loss": 0.0058, "step": 19580 }, { "epoch": 0.3205432381575718, "grad_norm": 0.12738503515720367, "learning_rate": 9.889750169571332e-06, "loss": 0.0041, "step": 19590 }, { "epoch": 0.3207068641086476, "grad_norm": 0.03597637265920639, "learning_rate": 9.889451767750484e-06, "loss": 0.0048, "step": 19600 }, { "epoch": 0.32087049005972346, "grad_norm": 0.10549870878458023, "learning_rate": 9.889152967162586e-06, "loss": 0.0051, "step": 19610 }, { "epoch": 0.3210341160107993, "grad_norm": 0.10223033279180527, "learning_rate": 9.888853767832003e-06, "loss": 0.0043, "step": 19620 }, { "epoch": 0.32119774196187517, "grad_norm": 0.10213784128427505, "learning_rate": 9.888554169783143e-06, "loss": 0.0057, "step": 19630 }, { "epoch": 0.321361367912951, "grad_norm": 0.07770213484764099, "learning_rate": 9.888254173040434e-06, "loss": 0.0036, "step": 19640 }, { "epoch": 0.32152499386402683, "grad_norm": 0.41759949922561646, "learning_rate": 9.887953777628349e-06, "loss": 0.006, "step": 19650 }, { "epoch": 0.32168861981510266, "grad_norm": 0.22137853503227234, "learning_rate": 9.887652983571383e-06, "loss": 0.0042, "step": 19660 }, { "epoch": 0.3218522457661785, "grad_norm": 0.18771515786647797, "learning_rate": 9.887351790894069e-06, "loss": 0.004, "step": 19670 }, { "epoch": 0.3220158717172544, "grad_norm": 0.1392206996679306, "learning_rate": 9.887050199620972e-06, "loss": 0.006, "step": 19680 }, { "epoch": 0.3221794976683302, "grad_norm": 0.23520411550998688, "learning_rate": 9.886748209776687e-06, "loss": 0.005, "step": 19690 }, { "epoch": 0.32234312361940604, "grad_norm": 0.4393717646598816, "learning_rate": 9.886445821385844e-06, "loss": 0.0042, "step": 19700 }, { "epoch": 0.32250674957048187, "grad_norm": 0.04335736110806465, "learning_rate": 9.886143034473104e-06, "loss": 0.0022, "step": 19710 }, { "epoch": 0.3226703755215577, "grad_norm": 0.07217326760292053, "learning_rate": 9.885839849063163e-06, "loss": 0.0044, "step": 19720 }, { "epoch": 0.3228340014726336, "grad_norm": 0.07146336883306503, "learning_rate": 9.885536265180748e-06, "loss": 0.003, "step": 19730 }, { "epoch": 0.3229976274237094, "grad_norm": 0.08882381021976471, "learning_rate": 9.885232282850616e-06, "loss": 0.0051, "step": 19740 }, { "epoch": 0.32316125337478524, "grad_norm": 0.2379351705312729, "learning_rate": 9.884927902097561e-06, "loss": 0.0052, "step": 19750 }, { "epoch": 0.32332487932586107, "grad_norm": 0.142432302236557, "learning_rate": 9.884623122946405e-06, "loss": 0.0036, "step": 19760 }, { "epoch": 0.3234885052769369, "grad_norm": 0.08389263600111008, "learning_rate": 9.884317945422007e-06, "loss": 0.0046, "step": 19770 }, { "epoch": 0.3236521312280128, "grad_norm": 0.10304949432611465, "learning_rate": 9.884012369549255e-06, "loss": 0.0028, "step": 19780 }, { "epoch": 0.3238157571790886, "grad_norm": 0.08805543184280396, "learning_rate": 9.883706395353072e-06, "loss": 0.004, "step": 19790 }, { "epoch": 0.32397938313016444, "grad_norm": 0.14350149035453796, "learning_rate": 9.88340002285841e-06, "loss": 0.0055, "step": 19800 }, { "epoch": 0.3241430090812403, "grad_norm": 0.2578975260257721, "learning_rate": 9.883093252090257e-06, "loss": 0.0044, "step": 19810 }, { "epoch": 0.3243066350323161, "grad_norm": 0.08603624999523163, "learning_rate": 9.882786083073632e-06, "loss": 0.0037, "step": 19820 }, { "epoch": 0.324470260983392, "grad_norm": 0.058858297765254974, "learning_rate": 9.882478515833587e-06, "loss": 0.0039, "step": 19830 }, { "epoch": 0.3246338869344678, "grad_norm": 0.20485638082027435, "learning_rate": 9.882170550395205e-06, "loss": 0.0038, "step": 19840 }, { "epoch": 0.32479751288554365, "grad_norm": 0.11642660200595856, "learning_rate": 9.881862186783605e-06, "loss": 0.0036, "step": 19850 }, { "epoch": 0.3249611388366195, "grad_norm": 0.12272995710372925, "learning_rate": 9.881553425023933e-06, "loss": 0.0051, "step": 19860 }, { "epoch": 0.3251247647876953, "grad_norm": 0.4291192293167114, "learning_rate": 9.881244265141374e-06, "loss": 0.0045, "step": 19870 }, { "epoch": 0.3252883907387712, "grad_norm": 0.10232923924922943, "learning_rate": 9.880934707161138e-06, "loss": 0.0072, "step": 19880 }, { "epoch": 0.325452016689847, "grad_norm": 0.11810983717441559, "learning_rate": 9.880624751108476e-06, "loss": 0.0044, "step": 19890 }, { "epoch": 0.32561564264092285, "grad_norm": 0.2597808539867401, "learning_rate": 9.880314397008663e-06, "loss": 0.0057, "step": 19900 }, { "epoch": 0.3257792685919987, "grad_norm": 0.2902827262878418, "learning_rate": 9.880003644887013e-06, "loss": 0.0061, "step": 19910 }, { "epoch": 0.3259428945430745, "grad_norm": 0.2732642590999603, "learning_rate": 9.879692494768868e-06, "loss": 0.0039, "step": 19920 }, { "epoch": 0.3261065204941504, "grad_norm": 0.21653032302856445, "learning_rate": 9.879380946679605e-06, "loss": 0.0037, "step": 19930 }, { "epoch": 0.32627014644522623, "grad_norm": 0.14108705520629883, "learning_rate": 9.879069000644635e-06, "loss": 0.0042, "step": 19940 }, { "epoch": 0.32643377239630206, "grad_norm": 0.12505283951759338, "learning_rate": 9.878756656689395e-06, "loss": 0.0051, "step": 19950 }, { "epoch": 0.3265973983473779, "grad_norm": 0.13990822434425354, "learning_rate": 9.878443914839362e-06, "loss": 0.0029, "step": 19960 }, { "epoch": 0.3267610242984537, "grad_norm": 0.1480061113834381, "learning_rate": 9.878130775120041e-06, "loss": 0.0057, "step": 19970 }, { "epoch": 0.3269246502495296, "grad_norm": 0.1516495645046234, "learning_rate": 9.877817237556972e-06, "loss": 0.0031, "step": 19980 }, { "epoch": 0.32708827620060543, "grad_norm": 0.1430971622467041, "learning_rate": 9.877503302175724e-06, "loss": 0.004, "step": 19990 }, { "epoch": 0.32725190215168126, "grad_norm": 0.17753866314888, "learning_rate": 9.8771889690019e-06, "loss": 0.0066, "step": 20000 }, { "epoch": 0.3274155281027571, "grad_norm": 0.10308807343244553, "learning_rate": 9.87687423806114e-06, "loss": 0.0034, "step": 20010 }, { "epoch": 0.3275791540538329, "grad_norm": 0.13336046040058136, "learning_rate": 9.876559109379108e-06, "loss": 0.0039, "step": 20020 }, { "epoch": 0.32774278000490875, "grad_norm": 0.1490883231163025, "learning_rate": 9.876243582981507e-06, "loss": 0.0025, "step": 20030 }, { "epoch": 0.32790640595598464, "grad_norm": 0.0983930230140686, "learning_rate": 9.87592765889407e-06, "loss": 0.0048, "step": 20040 }, { "epoch": 0.32807003190706047, "grad_norm": 0.15341004729270935, "learning_rate": 9.875611337142561e-06, "loss": 0.0054, "step": 20050 }, { "epoch": 0.3282336578581363, "grad_norm": 0.1860189288854599, "learning_rate": 9.875294617752782e-06, "loss": 0.0058, "step": 20060 }, { "epoch": 0.3283972838092121, "grad_norm": 0.45263639092445374, "learning_rate": 9.87497750075056e-06, "loss": 0.0054, "step": 20070 }, { "epoch": 0.32856090976028796, "grad_norm": 0.04703308641910553, "learning_rate": 9.874659986161758e-06, "loss": 0.0022, "step": 20080 }, { "epoch": 0.32872453571136384, "grad_norm": 0.4157925248146057, "learning_rate": 9.874342074012275e-06, "loss": 0.0073, "step": 20090 }, { "epoch": 0.32888816166243967, "grad_norm": 0.0724644586443901, "learning_rate": 9.874023764328034e-06, "loss": 0.0034, "step": 20100 }, { "epoch": 0.3290517876135155, "grad_norm": 0.16821777820587158, "learning_rate": 9.873705057134998e-06, "loss": 0.0036, "step": 20110 }, { "epoch": 0.32921541356459133, "grad_norm": 0.15424877405166626, "learning_rate": 9.87338595245916e-06, "loss": 0.0067, "step": 20120 }, { "epoch": 0.32937903951566716, "grad_norm": 0.018444593995809555, "learning_rate": 9.873066450326546e-06, "loss": 0.0063, "step": 20130 }, { "epoch": 0.32954266546674305, "grad_norm": 0.2621767520904541, "learning_rate": 9.87274655076321e-06, "loss": 0.0057, "step": 20140 }, { "epoch": 0.3297062914178189, "grad_norm": 0.1759834885597229, "learning_rate": 9.872426253795244e-06, "loss": 0.0041, "step": 20150 }, { "epoch": 0.3298699173688947, "grad_norm": 0.08095880597829819, "learning_rate": 9.87210555944877e-06, "loss": 0.0057, "step": 20160 }, { "epoch": 0.33003354331997053, "grad_norm": 0.3416996896266937, "learning_rate": 9.871784467749944e-06, "loss": 0.0086, "step": 20170 }, { "epoch": 0.33019716927104636, "grad_norm": 0.20829014480113983, "learning_rate": 9.87146297872495e-06, "loss": 0.0036, "step": 20180 }, { "epoch": 0.33036079522212225, "grad_norm": 0.1080484390258789, "learning_rate": 9.871141092400012e-06, "loss": 0.0042, "step": 20190 }, { "epoch": 0.3305244211731981, "grad_norm": 0.4624972939491272, "learning_rate": 9.87081880880138e-06, "loss": 0.0043, "step": 20200 }, { "epoch": 0.3306880471242739, "grad_norm": 0.1670476645231247, "learning_rate": 9.870496127955335e-06, "loss": 0.0059, "step": 20210 }, { "epoch": 0.33085167307534974, "grad_norm": 0.29990607500076294, "learning_rate": 9.8701730498882e-06, "loss": 0.0069, "step": 20220 }, { "epoch": 0.33101529902642557, "grad_norm": 0.14530187845230103, "learning_rate": 9.869849574626317e-06, "loss": 0.0036, "step": 20230 }, { "epoch": 0.33117892497750145, "grad_norm": 0.11519566178321838, "learning_rate": 9.869525702196074e-06, "loss": 0.0024, "step": 20240 }, { "epoch": 0.3313425509285773, "grad_norm": 0.37026020884513855, "learning_rate": 9.86920143262388e-06, "loss": 0.0052, "step": 20250 }, { "epoch": 0.3315061768796531, "grad_norm": 0.1430608183145523, "learning_rate": 9.868876765936186e-06, "loss": 0.0045, "step": 20260 }, { "epoch": 0.33166980283072894, "grad_norm": 0.12555855512619019, "learning_rate": 9.868551702159466e-06, "loss": 0.0029, "step": 20270 }, { "epoch": 0.3318334287818048, "grad_norm": 0.17927005887031555, "learning_rate": 9.868226241320237e-06, "loss": 0.0036, "step": 20280 }, { "epoch": 0.33199705473288066, "grad_norm": 0.1316860020160675, "learning_rate": 9.867900383445035e-06, "loss": 0.0033, "step": 20290 }, { "epoch": 0.3321606806839565, "grad_norm": 0.07333186268806458, "learning_rate": 9.867574128560442e-06, "loss": 0.0056, "step": 20300 }, { "epoch": 0.3323243066350323, "grad_norm": 0.18035760521888733, "learning_rate": 9.867247476693064e-06, "loss": 0.004, "step": 20310 }, { "epoch": 0.33248793258610815, "grad_norm": 0.1360657960176468, "learning_rate": 9.86692042786954e-06, "loss": 0.0041, "step": 20320 }, { "epoch": 0.332651558537184, "grad_norm": 0.14488337934017181, "learning_rate": 9.866592982116547e-06, "loss": 0.0055, "step": 20330 }, { "epoch": 0.33281518448825986, "grad_norm": 0.14830628037452698, "learning_rate": 9.866265139460787e-06, "loss": 0.0055, "step": 20340 }, { "epoch": 0.3329788104393357, "grad_norm": 0.25210511684417725, "learning_rate": 9.865936899928998e-06, "loss": 0.0035, "step": 20350 }, { "epoch": 0.3331424363904115, "grad_norm": 0.06869592517614365, "learning_rate": 9.86560826354795e-06, "loss": 0.0045, "step": 20360 }, { "epoch": 0.33330606234148735, "grad_norm": 0.11608990281820297, "learning_rate": 9.865279230344448e-06, "loss": 0.004, "step": 20370 }, { "epoch": 0.3334696882925632, "grad_norm": 0.2703023850917816, "learning_rate": 9.864949800345325e-06, "loss": 0.0042, "step": 20380 }, { "epoch": 0.33363331424363907, "grad_norm": 0.10395345836877823, "learning_rate": 9.864619973577448e-06, "loss": 0.0046, "step": 20390 }, { "epoch": 0.3337969401947149, "grad_norm": 0.16700316965579987, "learning_rate": 9.864289750067715e-06, "loss": 0.0039, "step": 20400 }, { "epoch": 0.3339605661457907, "grad_norm": 0.08299065381288528, "learning_rate": 9.863959129843061e-06, "loss": 0.0041, "step": 20410 }, { "epoch": 0.33412419209686656, "grad_norm": 0.1187463104724884, "learning_rate": 9.86362811293045e-06, "loss": 0.0061, "step": 20420 }, { "epoch": 0.3342878180479424, "grad_norm": 0.16819888353347778, "learning_rate": 9.863296699356876e-06, "loss": 0.0049, "step": 20430 }, { "epoch": 0.33445144399901827, "grad_norm": 0.11607053130865097, "learning_rate": 9.862964889149372e-06, "loss": 0.0059, "step": 20440 }, { "epoch": 0.3346150699500941, "grad_norm": 0.3275061845779419, "learning_rate": 9.862632682334994e-06, "loss": 0.0038, "step": 20450 }, { "epoch": 0.33477869590116993, "grad_norm": 0.10634026676416397, "learning_rate": 9.86230007894084e-06, "loss": 0.0025, "step": 20460 }, { "epoch": 0.33494232185224576, "grad_norm": 0.10460998117923737, "learning_rate": 9.861967078994035e-06, "loss": 0.0029, "step": 20470 }, { "epoch": 0.3351059478033216, "grad_norm": 0.209858238697052, "learning_rate": 9.861633682521736e-06, "loss": 0.0104, "step": 20480 }, { "epoch": 0.3352695737543974, "grad_norm": 0.18016286194324493, "learning_rate": 9.861299889551135e-06, "loss": 0.0041, "step": 20490 }, { "epoch": 0.3354331997054733, "grad_norm": 0.38085615634918213, "learning_rate": 9.860965700109453e-06, "loss": 0.0053, "step": 20500 }, { "epoch": 0.33559682565654914, "grad_norm": 0.2172110229730606, "learning_rate": 9.860631114223948e-06, "loss": 0.0048, "step": 20510 }, { "epoch": 0.33576045160762497, "grad_norm": 0.06819513440132141, "learning_rate": 9.860296131921909e-06, "loss": 0.0052, "step": 20520 }, { "epoch": 0.3359240775587008, "grad_norm": 0.1689801961183548, "learning_rate": 9.859960753230651e-06, "loss": 0.0076, "step": 20530 }, { "epoch": 0.3360877035097766, "grad_norm": 0.19185730814933777, "learning_rate": 9.859624978177529e-06, "loss": 0.0056, "step": 20540 }, { "epoch": 0.3362513294608525, "grad_norm": 0.09345399588346481, "learning_rate": 9.859288806789929e-06, "loss": 0.0038, "step": 20550 }, { "epoch": 0.33641495541192834, "grad_norm": 0.11281021684408188, "learning_rate": 9.858952239095265e-06, "loss": 0.0032, "step": 20560 }, { "epoch": 0.33657858136300417, "grad_norm": 0.061719730496406555, "learning_rate": 9.858615275120989e-06, "loss": 0.0031, "step": 20570 }, { "epoch": 0.33674220731408, "grad_norm": 0.2039213925600052, "learning_rate": 9.858277914894581e-06, "loss": 0.0052, "step": 20580 }, { "epoch": 0.33690583326515583, "grad_norm": 0.11667054146528244, "learning_rate": 9.857940158443558e-06, "loss": 0.0038, "step": 20590 }, { "epoch": 0.3370694592162317, "grad_norm": 0.02627541869878769, "learning_rate": 9.85760200579546e-06, "loss": 0.003, "step": 20600 }, { "epoch": 0.33723308516730754, "grad_norm": 0.21701110899448395, "learning_rate": 9.857263456977872e-06, "loss": 0.0031, "step": 20610 }, { "epoch": 0.3373967111183834, "grad_norm": 0.06111739203333855, "learning_rate": 9.8569245120184e-06, "loss": 0.0025, "step": 20620 }, { "epoch": 0.3375603370694592, "grad_norm": 0.06633685529232025, "learning_rate": 9.856585170944693e-06, "loss": 0.0026, "step": 20630 }, { "epoch": 0.33772396302053503, "grad_norm": 0.1305380016565323, "learning_rate": 9.856245433784419e-06, "loss": 0.0033, "step": 20640 }, { "epoch": 0.3378875889716109, "grad_norm": 0.09376155585050583, "learning_rate": 9.855905300565293e-06, "loss": 0.005, "step": 20650 }, { "epoch": 0.33805121492268675, "grad_norm": 0.11227598041296005, "learning_rate": 9.85556477131505e-06, "loss": 0.0027, "step": 20660 }, { "epoch": 0.3382148408737626, "grad_norm": 0.1978847086429596, "learning_rate": 9.855223846061466e-06, "loss": 0.0032, "step": 20670 }, { "epoch": 0.3383784668248384, "grad_norm": 0.11767446249723434, "learning_rate": 9.854882524832343e-06, "loss": 0.005, "step": 20680 }, { "epoch": 0.33854209277591424, "grad_norm": 0.10278233140707016, "learning_rate": 9.854540807655519e-06, "loss": 0.003, "step": 20690 }, { "epoch": 0.3387057187269901, "grad_norm": 0.16309624910354614, "learning_rate": 9.854198694558862e-06, "loss": 0.006, "step": 20700 }, { "epoch": 0.33886934467806595, "grad_norm": 0.1410336196422577, "learning_rate": 9.853856185570276e-06, "loss": 0.0035, "step": 20710 }, { "epoch": 0.3390329706291418, "grad_norm": 0.06369351595640182, "learning_rate": 9.853513280717695e-06, "loss": 0.0029, "step": 20720 }, { "epoch": 0.3391965965802176, "grad_norm": 0.20060832798480988, "learning_rate": 9.853169980029083e-06, "loss": 0.006, "step": 20730 }, { "epoch": 0.33936022253129344, "grad_norm": 0.0910542905330658, "learning_rate": 9.852826283532439e-06, "loss": 0.0038, "step": 20740 }, { "epoch": 0.33952384848236933, "grad_norm": 0.1707943081855774, "learning_rate": 9.852482191255794e-06, "loss": 0.0035, "step": 20750 }, { "epoch": 0.33968747443344516, "grad_norm": 0.0393177792429924, "learning_rate": 9.852137703227212e-06, "loss": 0.0031, "step": 20760 }, { "epoch": 0.339851100384521, "grad_norm": 0.11621933430433273, "learning_rate": 9.851792819474785e-06, "loss": 0.0032, "step": 20770 }, { "epoch": 0.3400147263355968, "grad_norm": 0.11890273541212082, "learning_rate": 9.851447540026645e-06, "loss": 0.0037, "step": 20780 }, { "epoch": 0.34017835228667265, "grad_norm": 0.1648484319448471, "learning_rate": 9.851101864910949e-06, "loss": 0.0039, "step": 20790 }, { "epoch": 0.34034197823774853, "grad_norm": 0.23658430576324463, "learning_rate": 9.850755794155891e-06, "loss": 0.0055, "step": 20800 }, { "epoch": 0.34050560418882436, "grad_norm": 0.07108192890882492, "learning_rate": 9.850409327789692e-06, "loss": 0.0029, "step": 20810 }, { "epoch": 0.3406692301399002, "grad_norm": 0.10644032061100006, "learning_rate": 9.850062465840611e-06, "loss": 0.0037, "step": 20820 }, { "epoch": 0.340832856090976, "grad_norm": 0.013563537038862705, "learning_rate": 9.849715208336938e-06, "loss": 0.0045, "step": 20830 }, { "epoch": 0.34099648204205185, "grad_norm": 0.16491533815860748, "learning_rate": 9.849367555306993e-06, "loss": 0.0043, "step": 20840 }, { "epoch": 0.34116010799312774, "grad_norm": 0.18684102594852448, "learning_rate": 9.849019506779127e-06, "loss": 0.006, "step": 20850 }, { "epoch": 0.34132373394420357, "grad_norm": 0.09518221020698547, "learning_rate": 9.84867106278173e-06, "loss": 0.0061, "step": 20860 }, { "epoch": 0.3414873598952794, "grad_norm": 0.053158923983573914, "learning_rate": 9.848322223343217e-06, "loss": 0.0038, "step": 20870 }, { "epoch": 0.3416509858463552, "grad_norm": 0.11258822679519653, "learning_rate": 9.847972988492038e-06, "loss": 0.0026, "step": 20880 }, { "epoch": 0.34181461179743106, "grad_norm": 0.18029457330703735, "learning_rate": 9.847623358256678e-06, "loss": 0.0042, "step": 20890 }, { "epoch": 0.3419782377485069, "grad_norm": 0.11698746681213379, "learning_rate": 9.847273332665648e-06, "loss": 0.0105, "step": 20900 }, { "epoch": 0.34214186369958277, "grad_norm": 0.3530029058456421, "learning_rate": 9.846922911747498e-06, "loss": 0.0072, "step": 20910 }, { "epoch": 0.3423054896506586, "grad_norm": 0.2188953161239624, "learning_rate": 9.846572095530807e-06, "loss": 0.0057, "step": 20920 }, { "epoch": 0.34246911560173443, "grad_norm": 0.18808463215827942, "learning_rate": 9.846220884044183e-06, "loss": 0.005, "step": 20930 }, { "epoch": 0.34263274155281026, "grad_norm": 0.11173252761363983, "learning_rate": 9.845869277316273e-06, "loss": 0.0042, "step": 20940 }, { "epoch": 0.3427963675038861, "grad_norm": 0.10642429441213608, "learning_rate": 9.845517275375752e-06, "loss": 0.0028, "step": 20950 }, { "epoch": 0.342959993454962, "grad_norm": 0.08499259501695633, "learning_rate": 9.845164878251327e-06, "loss": 0.0044, "step": 20960 }, { "epoch": 0.3431236194060378, "grad_norm": 0.06196568161249161, "learning_rate": 9.84481208597174e-06, "loss": 0.0091, "step": 20970 }, { "epoch": 0.34328724535711364, "grad_norm": 0.07904001325368881, "learning_rate": 9.844458898565763e-06, "loss": 0.0038, "step": 20980 }, { "epoch": 0.34345087130818946, "grad_norm": 0.06873547285795212, "learning_rate": 9.8441053160622e-06, "loss": 0.0032, "step": 20990 }, { "epoch": 0.3436144972592653, "grad_norm": 0.12111184000968933, "learning_rate": 9.84375133848989e-06, "loss": 0.0029, "step": 21000 }, { "epoch": 0.3437781232103412, "grad_norm": 0.32035741209983826, "learning_rate": 9.843396965877698e-06, "loss": 0.0051, "step": 21010 }, { "epoch": 0.343941749161417, "grad_norm": 0.18931397795677185, "learning_rate": 9.843042198254531e-06, "loss": 0.003, "step": 21020 }, { "epoch": 0.34410537511249284, "grad_norm": 0.23826611042022705, "learning_rate": 9.842687035649319e-06, "loss": 0.0034, "step": 21030 }, { "epoch": 0.34426900106356867, "grad_norm": 0.08468547463417053, "learning_rate": 9.842331478091027e-06, "loss": 0.0042, "step": 21040 }, { "epoch": 0.3444326270146445, "grad_norm": 0.07451336830854416, "learning_rate": 9.841975525608655e-06, "loss": 0.0029, "step": 21050 }, { "epoch": 0.3445962529657204, "grad_norm": 0.01555787306278944, "learning_rate": 9.841619178231234e-06, "loss": 0.0037, "step": 21060 }, { "epoch": 0.3447598789167962, "grad_norm": 0.2597961127758026, "learning_rate": 9.841262435987825e-06, "loss": 0.0049, "step": 21070 }, { "epoch": 0.34492350486787204, "grad_norm": 0.15131132304668427, "learning_rate": 9.840905298907523e-06, "loss": 0.0028, "step": 21080 }, { "epoch": 0.3450871308189479, "grad_norm": 0.0743577852845192, "learning_rate": 9.840547767019456e-06, "loss": 0.0025, "step": 21090 }, { "epoch": 0.3452507567700237, "grad_norm": 0.1728866696357727, "learning_rate": 9.84018984035278e-06, "loss": 0.0047, "step": 21100 }, { "epoch": 0.3454143827210996, "grad_norm": 0.14121197164058685, "learning_rate": 9.83983151893669e-06, "loss": 0.0044, "step": 21110 }, { "epoch": 0.3455780086721754, "grad_norm": 0.46367713809013367, "learning_rate": 9.839472802800407e-06, "loss": 0.0037, "step": 21120 }, { "epoch": 0.34574163462325125, "grad_norm": 0.3309983015060425, "learning_rate": 9.839113691973188e-06, "loss": 0.005, "step": 21130 }, { "epoch": 0.3459052605743271, "grad_norm": 0.1667407900094986, "learning_rate": 9.838754186484321e-06, "loss": 0.0036, "step": 21140 }, { "epoch": 0.3460688865254029, "grad_norm": 0.23879320919513702, "learning_rate": 9.838394286363124e-06, "loss": 0.0041, "step": 21150 }, { "epoch": 0.3462325124764788, "grad_norm": 0.20847830176353455, "learning_rate": 9.83803399163895e-06, "loss": 0.0036, "step": 21160 }, { "epoch": 0.3463961384275546, "grad_norm": 0.4925137758255005, "learning_rate": 9.837673302341187e-06, "loss": 0.006, "step": 21170 }, { "epoch": 0.34655976437863045, "grad_norm": 0.239491805434227, "learning_rate": 9.837312218499248e-06, "loss": 0.0061, "step": 21180 }, { "epoch": 0.3467233903297063, "grad_norm": 0.08568806946277618, "learning_rate": 9.83695074014258e-06, "loss": 0.0047, "step": 21190 }, { "epoch": 0.3468870162807821, "grad_norm": 0.1617346704006195, "learning_rate": 9.836588867300669e-06, "loss": 0.0039, "step": 21200 }, { "epoch": 0.347050642231858, "grad_norm": 0.3689493238925934, "learning_rate": 9.836226600003025e-06, "loss": 0.0063, "step": 21210 }, { "epoch": 0.3472142681829338, "grad_norm": 0.06783358007669449, "learning_rate": 9.835863938279193e-06, "loss": 0.0036, "step": 21220 }, { "epoch": 0.34737789413400966, "grad_norm": 0.06754415482282639, "learning_rate": 9.83550088215875e-06, "loss": 0.0043, "step": 21230 }, { "epoch": 0.3475415200850855, "grad_norm": 0.03541114926338196, "learning_rate": 9.835137431671308e-06, "loss": 0.0026, "step": 21240 }, { "epoch": 0.3477051460361613, "grad_norm": 0.08518331497907639, "learning_rate": 9.834773586846509e-06, "loss": 0.0064, "step": 21250 }, { "epoch": 0.3478687719872372, "grad_norm": 0.19849002361297607, "learning_rate": 9.834409347714025e-06, "loss": 0.0043, "step": 21260 }, { "epoch": 0.34803239793831303, "grad_norm": 0.2208586037158966, "learning_rate": 9.834044714303562e-06, "loss": 0.0041, "step": 21270 }, { "epoch": 0.34819602388938886, "grad_norm": 0.06353924423456192, "learning_rate": 9.833679686644858e-06, "loss": 0.0029, "step": 21280 }, { "epoch": 0.3483596498404647, "grad_norm": 0.2002340853214264, "learning_rate": 9.833314264767686e-06, "loss": 0.0035, "step": 21290 }, { "epoch": 0.3485232757915405, "grad_norm": 0.21066778898239136, "learning_rate": 9.832948448701846e-06, "loss": 0.0035, "step": 21300 }, { "epoch": 0.3486869017426164, "grad_norm": 0.08138404041528702, "learning_rate": 9.832582238477173e-06, "loss": 0.0048, "step": 21310 }, { "epoch": 0.34885052769369224, "grad_norm": 0.07733263820409775, "learning_rate": 9.832215634123537e-06, "loss": 0.0045, "step": 21320 }, { "epoch": 0.34901415364476807, "grad_norm": 0.038150470703840256, "learning_rate": 9.831848635670831e-06, "loss": 0.0031, "step": 21330 }, { "epoch": 0.3491777795958439, "grad_norm": 0.21571916341781616, "learning_rate": 9.831481243148992e-06, "loss": 0.006, "step": 21340 }, { "epoch": 0.3493414055469197, "grad_norm": 0.16414399445056915, "learning_rate": 9.831113456587981e-06, "loss": 0.0035, "step": 21350 }, { "epoch": 0.34950503149799556, "grad_norm": 0.07113195210695267, "learning_rate": 9.830745276017792e-06, "loss": 0.0053, "step": 21360 }, { "epoch": 0.34966865744907144, "grad_norm": 0.042324427515268326, "learning_rate": 9.830376701468456e-06, "loss": 0.0038, "step": 21370 }, { "epoch": 0.34983228340014727, "grad_norm": 0.0561634860932827, "learning_rate": 9.830007732970028e-06, "loss": 0.0032, "step": 21380 }, { "epoch": 0.3499959093512231, "grad_norm": 0.1563832312822342, "learning_rate": 9.829638370552605e-06, "loss": 0.01, "step": 21390 }, { "epoch": 0.35015953530229893, "grad_norm": 0.12699761986732483, "learning_rate": 9.829268614246306e-06, "loss": 0.0037, "step": 21400 }, { "epoch": 0.35032316125337476, "grad_norm": 0.19512920081615448, "learning_rate": 9.828898464081291e-06, "loss": 0.0045, "step": 21410 }, { "epoch": 0.35048678720445064, "grad_norm": 0.09152159839868546, "learning_rate": 9.828527920087746e-06, "loss": 0.0038, "step": 21420 }, { "epoch": 0.3506504131555265, "grad_norm": 0.07050028443336487, "learning_rate": 9.828156982295893e-06, "loss": 0.0028, "step": 21430 }, { "epoch": 0.3508140391066023, "grad_norm": 0.19286592304706573, "learning_rate": 9.827785650735982e-06, "loss": 0.0018, "step": 21440 }, { "epoch": 0.35097766505767813, "grad_norm": 0.09797953814268112, "learning_rate": 9.827413925438299e-06, "loss": 0.0056, "step": 21450 }, { "epoch": 0.35114129100875396, "grad_norm": 0.15946625173091888, "learning_rate": 9.82704180643316e-06, "loss": 0.0065, "step": 21460 }, { "epoch": 0.35130491695982985, "grad_norm": 0.1895676553249359, "learning_rate": 9.826669293750915e-06, "loss": 0.0049, "step": 21470 }, { "epoch": 0.3514685429109057, "grad_norm": 0.1782289296388626, "learning_rate": 9.826296387421945e-06, "loss": 0.0036, "step": 21480 }, { "epoch": 0.3516321688619815, "grad_norm": 0.1732427179813385, "learning_rate": 9.825923087476662e-06, "loss": 0.0061, "step": 21490 }, { "epoch": 0.35179579481305734, "grad_norm": 0.1630256474018097, "learning_rate": 9.82554939394551e-06, "loss": 0.0046, "step": 21500 }, { "epoch": 0.35195942076413317, "grad_norm": 0.09715411812067032, "learning_rate": 9.825175306858968e-06, "loss": 0.0029, "step": 21510 }, { "epoch": 0.35212304671520905, "grad_norm": 0.10771908611059189, "learning_rate": 9.824800826247544e-06, "loss": 0.0027, "step": 21520 }, { "epoch": 0.3522866726662849, "grad_norm": 0.16415317356586456, "learning_rate": 9.824425952141781e-06, "loss": 0.0045, "step": 21530 }, { "epoch": 0.3524502986173607, "grad_norm": 0.08979246020317078, "learning_rate": 9.824050684572251e-06, "loss": 0.0043, "step": 21540 }, { "epoch": 0.35261392456843654, "grad_norm": 0.0841081440448761, "learning_rate": 9.82367502356956e-06, "loss": 0.0033, "step": 21550 }, { "epoch": 0.3527775505195124, "grad_norm": 0.11242271214723587, "learning_rate": 9.823298969164345e-06, "loss": 0.0034, "step": 21560 }, { "epoch": 0.35294117647058826, "grad_norm": 0.07970384508371353, "learning_rate": 9.822922521387277e-06, "loss": 0.0038, "step": 21570 }, { "epoch": 0.3531048024216641, "grad_norm": 0.13367889821529388, "learning_rate": 9.822545680269056e-06, "loss": 0.0048, "step": 21580 }, { "epoch": 0.3532684283727399, "grad_norm": 0.16324225068092346, "learning_rate": 9.822168445840418e-06, "loss": 0.0032, "step": 21590 }, { "epoch": 0.35343205432381575, "grad_norm": 0.11214855313301086, "learning_rate": 9.821790818132128e-06, "loss": 0.0036, "step": 21600 }, { "epoch": 0.3535956802748916, "grad_norm": 0.060387782752513885, "learning_rate": 9.821412797174985e-06, "loss": 0.0051, "step": 21610 }, { "epoch": 0.35375930622596746, "grad_norm": 0.17262721061706543, "learning_rate": 9.821034382999817e-06, "loss": 0.0049, "step": 21620 }, { "epoch": 0.3539229321770433, "grad_norm": 0.16682997345924377, "learning_rate": 9.820655575637488e-06, "loss": 0.0043, "step": 21630 }, { "epoch": 0.3540865581281191, "grad_norm": 0.06710716336965561, "learning_rate": 9.82027637511889e-06, "loss": 0.0049, "step": 21640 }, { "epoch": 0.35425018407919495, "grad_norm": 0.14109598100185394, "learning_rate": 9.819896781474953e-06, "loss": 0.0033, "step": 21650 }, { "epoch": 0.3544138100302708, "grad_norm": 0.13810548186302185, "learning_rate": 9.819516794736632e-06, "loss": 0.0074, "step": 21660 }, { "epoch": 0.35457743598134667, "grad_norm": 0.15594951808452606, "learning_rate": 9.81913641493492e-06, "loss": 0.0057, "step": 21670 }, { "epoch": 0.3547410619324225, "grad_norm": 0.046292707324028015, "learning_rate": 9.818755642100836e-06, "loss": 0.0035, "step": 21680 }, { "epoch": 0.3549046878834983, "grad_norm": 0.12565389275550842, "learning_rate": 9.818374476265438e-06, "loss": 0.0094, "step": 21690 }, { "epoch": 0.35506831383457416, "grad_norm": 0.0537894144654274, "learning_rate": 9.81799291745981e-06, "loss": 0.0037, "step": 21700 }, { "epoch": 0.35523193978565, "grad_norm": 0.24739708006381989, "learning_rate": 9.817610965715072e-06, "loss": 0.0043, "step": 21710 }, { "epoch": 0.35539556573672587, "grad_norm": 0.17831526696681976, "learning_rate": 9.817228621062377e-06, "loss": 0.0037, "step": 21720 }, { "epoch": 0.3555591916878017, "grad_norm": 0.06960579007863998, "learning_rate": 9.816845883532902e-06, "loss": 0.0039, "step": 21730 }, { "epoch": 0.35572281763887753, "grad_norm": 0.09768445789813995, "learning_rate": 9.816462753157867e-06, "loss": 0.004, "step": 21740 }, { "epoch": 0.35588644358995336, "grad_norm": 0.1259879767894745, "learning_rate": 9.816079229968515e-06, "loss": 0.004, "step": 21750 }, { "epoch": 0.3560500695410292, "grad_norm": 0.11670015007257462, "learning_rate": 9.81569531399613e-06, "loss": 0.0035, "step": 21760 }, { "epoch": 0.356213695492105, "grad_norm": 0.07601787149906158, "learning_rate": 9.815311005272017e-06, "loss": 0.0054, "step": 21770 }, { "epoch": 0.3563773214431809, "grad_norm": 0.2184062898159027, "learning_rate": 9.814926303827524e-06, "loss": 0.005, "step": 21780 }, { "epoch": 0.35654094739425674, "grad_norm": 0.3103298544883728, "learning_rate": 9.81454120969402e-06, "loss": 0.003, "step": 21790 }, { "epoch": 0.35670457334533257, "grad_norm": 0.3028302788734436, "learning_rate": 9.814155722902919e-06, "loss": 0.0078, "step": 21800 }, { "epoch": 0.3568681992964084, "grad_norm": 0.06297461688518524, "learning_rate": 9.813769843485654e-06, "loss": 0.0032, "step": 21810 }, { "epoch": 0.3570318252474842, "grad_norm": 0.14724983274936676, "learning_rate": 9.813383571473699e-06, "loss": 0.0043, "step": 21820 }, { "epoch": 0.3571954511985601, "grad_norm": 0.09313119202852249, "learning_rate": 9.812996906898556e-06, "loss": 0.0042, "step": 21830 }, { "epoch": 0.35735907714963594, "grad_norm": 0.07065626233816147, "learning_rate": 9.812609849791762e-06, "loss": 0.0036, "step": 21840 }, { "epoch": 0.35752270310071177, "grad_norm": 0.053032826632261276, "learning_rate": 9.81222240018488e-06, "loss": 0.0038, "step": 21850 }, { "epoch": 0.3576863290517876, "grad_norm": 0.16859860718250275, "learning_rate": 9.811834558109514e-06, "loss": 0.003, "step": 21860 }, { "epoch": 0.35784995500286343, "grad_norm": 0.19317512214183807, "learning_rate": 9.81144632359729e-06, "loss": 0.0049, "step": 21870 }, { "epoch": 0.3580135809539393, "grad_norm": 0.10547548532485962, "learning_rate": 9.811057696679878e-06, "loss": 0.0028, "step": 21880 }, { "epoch": 0.35817720690501514, "grad_norm": 0.1879497915506363, "learning_rate": 9.810668677388967e-06, "loss": 0.0045, "step": 21890 }, { "epoch": 0.358340832856091, "grad_norm": 0.32673487067222595, "learning_rate": 9.810279265756285e-06, "loss": 0.0058, "step": 21900 }, { "epoch": 0.3585044588071668, "grad_norm": 0.26756083965301514, "learning_rate": 9.809889461813592e-06, "loss": 0.0053, "step": 21910 }, { "epoch": 0.35866808475824263, "grad_norm": 0.23736342787742615, "learning_rate": 9.809499265592678e-06, "loss": 0.0046, "step": 21920 }, { "epoch": 0.3588317107093185, "grad_norm": 0.14139366149902344, "learning_rate": 9.809108677125371e-06, "loss": 0.0055, "step": 21930 }, { "epoch": 0.35899533666039435, "grad_norm": 0.061194807291030884, "learning_rate": 9.80871769644352e-06, "loss": 0.0032, "step": 21940 }, { "epoch": 0.3591589626114702, "grad_norm": 0.18361642956733704, "learning_rate": 9.808326323579014e-06, "loss": 0.0051, "step": 21950 }, { "epoch": 0.359322588562546, "grad_norm": 0.0626908540725708, "learning_rate": 9.807934558563774e-06, "loss": 0.0029, "step": 21960 }, { "epoch": 0.35948621451362184, "grad_norm": 0.08477026224136353, "learning_rate": 9.807542401429748e-06, "loss": 0.0033, "step": 21970 }, { "epoch": 0.3596498404646977, "grad_norm": 0.040161699056625366, "learning_rate": 9.807149852208922e-06, "loss": 0.0051, "step": 21980 }, { "epoch": 0.35981346641577355, "grad_norm": 0.1196545660495758, "learning_rate": 9.806756910933308e-06, "loss": 0.0042, "step": 21990 }, { "epoch": 0.3599770923668494, "grad_norm": 0.09050813317298889, "learning_rate": 9.806363577634954e-06, "loss": 0.0037, "step": 22000 }, { "epoch": 0.3601407183179252, "grad_norm": 0.0685819610953331, "learning_rate": 9.80596985234594e-06, "loss": 0.003, "step": 22010 }, { "epoch": 0.36030434426900104, "grad_norm": 0.23909932374954224, "learning_rate": 9.805575735098376e-06, "loss": 0.0049, "step": 22020 }, { "epoch": 0.3604679702200769, "grad_norm": 0.09177765995264053, "learning_rate": 9.805181225924406e-06, "loss": 0.0051, "step": 22030 }, { "epoch": 0.36063159617115276, "grad_norm": 0.19640956819057465, "learning_rate": 9.804786324856202e-06, "loss": 0.0045, "step": 22040 }, { "epoch": 0.3607952221222286, "grad_norm": 0.2347734123468399, "learning_rate": 9.804391031925972e-06, "loss": 0.0031, "step": 22050 }, { "epoch": 0.3609588480733044, "grad_norm": 0.3181383013725281, "learning_rate": 9.803995347165958e-06, "loss": 0.0037, "step": 22060 }, { "epoch": 0.36112247402438025, "grad_norm": 0.18287114799022675, "learning_rate": 9.803599270608427e-06, "loss": 0.0061, "step": 22070 }, { "epoch": 0.36128609997545613, "grad_norm": 0.4434426426887512, "learning_rate": 9.803202802285682e-06, "loss": 0.0046, "step": 22080 }, { "epoch": 0.36144972592653196, "grad_norm": 0.19482307136058807, "learning_rate": 9.802805942230061e-06, "loss": 0.0038, "step": 22090 }, { "epoch": 0.3616133518776078, "grad_norm": 0.139286071062088, "learning_rate": 9.802408690473925e-06, "loss": 0.0042, "step": 22100 }, { "epoch": 0.3617769778286836, "grad_norm": 0.11104580760002136, "learning_rate": 9.802011047049675e-06, "loss": 0.0045, "step": 22110 }, { "epoch": 0.36194060377975945, "grad_norm": 0.2055734097957611, "learning_rate": 9.801613011989745e-06, "loss": 0.0033, "step": 22120 }, { "epoch": 0.36210422973083534, "grad_norm": 0.07899788022041321, "learning_rate": 9.801214585326592e-06, "loss": 0.0055, "step": 22130 }, { "epoch": 0.36226785568191117, "grad_norm": 0.13910041749477386, "learning_rate": 9.800815767092712e-06, "loss": 0.0047, "step": 22140 }, { "epoch": 0.362431481632987, "grad_norm": 0.0787155032157898, "learning_rate": 9.800416557320634e-06, "loss": 0.0042, "step": 22150 }, { "epoch": 0.3625951075840628, "grad_norm": 0.05051407963037491, "learning_rate": 9.800016956042913e-06, "loss": 0.0046, "step": 22160 }, { "epoch": 0.36275873353513866, "grad_norm": 0.2281433492898941, "learning_rate": 9.79961696329214e-06, "loss": 0.0031, "step": 22170 }, { "epoch": 0.36292235948621454, "grad_norm": 0.06710359454154968, "learning_rate": 9.799216579100937e-06, "loss": 0.0048, "step": 22180 }, { "epoch": 0.36308598543729037, "grad_norm": 0.08895856887102127, "learning_rate": 9.798815803501958e-06, "loss": 0.006, "step": 22190 }, { "epoch": 0.3632496113883662, "grad_norm": 0.164419025182724, "learning_rate": 9.798414636527891e-06, "loss": 0.0038, "step": 22200 }, { "epoch": 0.36341323733944203, "grad_norm": 0.1343121975660324, "learning_rate": 9.79801307821145e-06, "loss": 0.0053, "step": 22210 }, { "epoch": 0.36357686329051786, "grad_norm": 0.09868015348911285, "learning_rate": 9.797611128585388e-06, "loss": 0.0048, "step": 22220 }, { "epoch": 0.3637404892415937, "grad_norm": 0.2300853580236435, "learning_rate": 9.797208787682482e-06, "loss": 0.006, "step": 22230 }, { "epoch": 0.3639041151926696, "grad_norm": 0.1900927722454071, "learning_rate": 9.796806055535551e-06, "loss": 0.0039, "step": 22240 }, { "epoch": 0.3640677411437454, "grad_norm": 0.2144131362438202, "learning_rate": 9.796402932177438e-06, "loss": 0.0056, "step": 22250 }, { "epoch": 0.36423136709482123, "grad_norm": 0.05185553804039955, "learning_rate": 9.795999417641022e-06, "loss": 0.0034, "step": 22260 }, { "epoch": 0.36439499304589706, "grad_norm": 0.060830626636743546, "learning_rate": 9.795595511959208e-06, "loss": 0.0043, "step": 22270 }, { "epoch": 0.3645586189969729, "grad_norm": 0.1453610509634018, "learning_rate": 9.795191215164943e-06, "loss": 0.0036, "step": 22280 }, { "epoch": 0.3647222449480488, "grad_norm": 0.1452437788248062, "learning_rate": 9.794786527291196e-06, "loss": 0.0032, "step": 22290 }, { "epoch": 0.3648858708991246, "grad_norm": 0.1662825047969818, "learning_rate": 9.794381448370974e-06, "loss": 0.0068, "step": 22300 }, { "epoch": 0.36504949685020044, "grad_norm": 0.08057316392660141, "learning_rate": 9.793975978437312e-06, "loss": 0.0032, "step": 22310 }, { "epoch": 0.36521312280127627, "grad_norm": 0.064130499958992, "learning_rate": 9.793570117523279e-06, "loss": 0.0028, "step": 22320 }, { "epoch": 0.3653767487523521, "grad_norm": 0.13349877297878265, "learning_rate": 9.793163865661977e-06, "loss": 0.003, "step": 22330 }, { "epoch": 0.365540374703428, "grad_norm": 0.07337766885757446, "learning_rate": 9.792757222886538e-06, "loss": 0.0026, "step": 22340 }, { "epoch": 0.3657040006545038, "grad_norm": 0.009043429978191853, "learning_rate": 9.792350189230125e-06, "loss": 0.0059, "step": 22350 }, { "epoch": 0.36586762660557964, "grad_norm": 0.06668829917907715, "learning_rate": 9.791942764725938e-06, "loss": 0.0025, "step": 22360 }, { "epoch": 0.3660312525566555, "grad_norm": 0.11217039078474045, "learning_rate": 9.791534949407202e-06, "loss": 0.0043, "step": 22370 }, { "epoch": 0.3661948785077313, "grad_norm": 0.08812987804412842, "learning_rate": 9.791126743307176e-06, "loss": 0.0038, "step": 22380 }, { "epoch": 0.3663585044588072, "grad_norm": 0.195985808968544, "learning_rate": 9.790718146459154e-06, "loss": 0.0036, "step": 22390 }, { "epoch": 0.366522130409883, "grad_norm": 0.14922137558460236, "learning_rate": 9.79030915889646e-06, "loss": 0.0025, "step": 22400 }, { "epoch": 0.36668575636095885, "grad_norm": 0.1597341001033783, "learning_rate": 9.78989978065245e-06, "loss": 0.0042, "step": 22410 }, { "epoch": 0.3668493823120347, "grad_norm": 0.017290370538830757, "learning_rate": 9.78949001176051e-06, "loss": 0.0035, "step": 22420 }, { "epoch": 0.3670130082631105, "grad_norm": 0.14993682503700256, "learning_rate": 9.78907985225406e-06, "loss": 0.0031, "step": 22430 }, { "epoch": 0.3671766342141864, "grad_norm": 0.05891666188836098, "learning_rate": 9.788669302166551e-06, "loss": 0.0034, "step": 22440 }, { "epoch": 0.3673402601652622, "grad_norm": 0.135666623711586, "learning_rate": 9.788258361531466e-06, "loss": 0.0027, "step": 22450 }, { "epoch": 0.36750388611633805, "grad_norm": 0.10232146829366684, "learning_rate": 9.78784703038232e-06, "loss": 0.0042, "step": 22460 }, { "epoch": 0.3676675120674139, "grad_norm": 0.15934139490127563, "learning_rate": 9.78743530875266e-06, "loss": 0.0035, "step": 22470 }, { "epoch": 0.3678311380184897, "grad_norm": 0.08764767646789551, "learning_rate": 9.787023196676064e-06, "loss": 0.0051, "step": 22480 }, { "epoch": 0.3679947639695656, "grad_norm": 0.07965680956840515, "learning_rate": 9.786610694186143e-06, "loss": 0.0049, "step": 22490 }, { "epoch": 0.3681583899206414, "grad_norm": 0.07782886922359467, "learning_rate": 9.786197801316539e-06, "loss": 0.0026, "step": 22500 }, { "epoch": 0.36832201587171726, "grad_norm": 0.12102463841438293, "learning_rate": 9.785784518100928e-06, "loss": 0.0062, "step": 22510 }, { "epoch": 0.3684856418227931, "grad_norm": 0.02740984596312046, "learning_rate": 9.785370844573014e-06, "loss": 0.0037, "step": 22520 }, { "epoch": 0.3686492677738689, "grad_norm": 0.059107620269060135, "learning_rate": 9.784956780766534e-06, "loss": 0.003, "step": 22530 }, { "epoch": 0.3688128937249448, "grad_norm": 0.30199170112609863, "learning_rate": 9.784542326715259e-06, "loss": 0.0043, "step": 22540 }, { "epoch": 0.36897651967602063, "grad_norm": 0.15318413078784943, "learning_rate": 9.78412748245299e-06, "loss": 0.0049, "step": 22550 }, { "epoch": 0.36914014562709646, "grad_norm": 0.37131184339523315, "learning_rate": 9.783712248013561e-06, "loss": 0.004, "step": 22560 }, { "epoch": 0.3693037715781723, "grad_norm": 0.0888022854924202, "learning_rate": 9.783296623430838e-06, "loss": 0.005, "step": 22570 }, { "epoch": 0.3694673975292481, "grad_norm": 0.12317230552434921, "learning_rate": 9.782880608738714e-06, "loss": 0.0022, "step": 22580 }, { "epoch": 0.369631023480324, "grad_norm": 0.17772452533245087, "learning_rate": 9.782464203971122e-06, "loss": 0.0041, "step": 22590 }, { "epoch": 0.36979464943139984, "grad_norm": 0.15841124951839447, "learning_rate": 9.78204740916202e-06, "loss": 0.0049, "step": 22600 }, { "epoch": 0.36995827538247567, "grad_norm": 0.1106036975979805, "learning_rate": 9.781630224345402e-06, "loss": 0.0041, "step": 22610 }, { "epoch": 0.3701219013335515, "grad_norm": 0.13645784556865692, "learning_rate": 9.781212649555291e-06, "loss": 0.0041, "step": 22620 }, { "epoch": 0.3702855272846273, "grad_norm": 0.2242395579814911, "learning_rate": 9.780794684825743e-06, "loss": 0.0062, "step": 22630 }, { "epoch": 0.3704491532357032, "grad_norm": 0.09353913366794586, "learning_rate": 9.780376330190847e-06, "loss": 0.0038, "step": 22640 }, { "epoch": 0.37061277918677904, "grad_norm": 0.13247133791446686, "learning_rate": 9.779957585684721e-06, "loss": 0.0054, "step": 22650 }, { "epoch": 0.37077640513785487, "grad_norm": 0.14868858456611633, "learning_rate": 9.779538451341519e-06, "loss": 0.0055, "step": 22660 }, { "epoch": 0.3709400310889307, "grad_norm": 0.15545055270195007, "learning_rate": 9.77911892719542e-06, "loss": 0.0082, "step": 22670 }, { "epoch": 0.37110365704000653, "grad_norm": 0.2338918149471283, "learning_rate": 9.778699013280642e-06, "loss": 0.0055, "step": 22680 }, { "epoch": 0.37126728299108236, "grad_norm": 0.1080441102385521, "learning_rate": 9.77827870963143e-06, "loss": 0.0036, "step": 22690 }, { "epoch": 0.37143090894215824, "grad_norm": 0.1431145966053009, "learning_rate": 9.777858016282067e-06, "loss": 0.0048, "step": 22700 }, { "epoch": 0.3715945348932341, "grad_norm": 0.11440259218215942, "learning_rate": 9.777436933266857e-06, "loss": 0.0028, "step": 22710 }, { "epoch": 0.3717581608443099, "grad_norm": 0.26593178510665894, "learning_rate": 9.777015460620144e-06, "loss": 0.0024, "step": 22720 }, { "epoch": 0.37192178679538573, "grad_norm": 0.03324040398001671, "learning_rate": 9.776593598376305e-06, "loss": 0.0031, "step": 22730 }, { "epoch": 0.37208541274646156, "grad_norm": 0.07390396296977997, "learning_rate": 9.776171346569743e-06, "loss": 0.0019, "step": 22740 }, { "epoch": 0.37224903869753745, "grad_norm": 0.06852605193853378, "learning_rate": 9.775748705234897e-06, "loss": 0.0036, "step": 22750 }, { "epoch": 0.3724126646486133, "grad_norm": 0.1449580192565918, "learning_rate": 9.775325674406233e-06, "loss": 0.0032, "step": 22760 }, { "epoch": 0.3725762905996891, "grad_norm": 0.04383785277605057, "learning_rate": 9.774902254118255e-06, "loss": 0.0024, "step": 22770 }, { "epoch": 0.37273991655076494, "grad_norm": 0.1101393923163414, "learning_rate": 9.774478444405494e-06, "loss": 0.0027, "step": 22780 }, { "epoch": 0.37290354250184077, "grad_norm": 0.012634593062102795, "learning_rate": 9.774054245302516e-06, "loss": 0.0039, "step": 22790 }, { "epoch": 0.37306716845291665, "grad_norm": 0.06818386912345886, "learning_rate": 9.773629656843917e-06, "loss": 0.0049, "step": 22800 }, { "epoch": 0.3732307944039925, "grad_norm": 0.09158064424991608, "learning_rate": 9.773204679064324e-06, "loss": 0.0032, "step": 22810 }, { "epoch": 0.3733944203550683, "grad_norm": 0.0785619467496872, "learning_rate": 9.772779311998398e-06, "loss": 0.0038, "step": 22820 }, { "epoch": 0.37355804630614414, "grad_norm": 0.077456995844841, "learning_rate": 9.772353555680828e-06, "loss": 0.0031, "step": 22830 }, { "epoch": 0.37372167225722, "grad_norm": 0.08676807582378387, "learning_rate": 9.77192741014634e-06, "loss": 0.0025, "step": 22840 }, { "epoch": 0.37388529820829586, "grad_norm": 0.18085947632789612, "learning_rate": 9.771500875429687e-06, "loss": 0.0033, "step": 22850 }, { "epoch": 0.3740489241593717, "grad_norm": 0.07374570518732071, "learning_rate": 9.771073951565657e-06, "loss": 0.0053, "step": 22860 }, { "epoch": 0.3742125501104475, "grad_norm": 0.060111820697784424, "learning_rate": 9.770646638589069e-06, "loss": 0.0038, "step": 22870 }, { "epoch": 0.37437617606152335, "grad_norm": 0.12110818177461624, "learning_rate": 9.770218936534773e-06, "loss": 0.0037, "step": 22880 }, { "epoch": 0.3745398020125992, "grad_norm": 0.22021162509918213, "learning_rate": 9.769790845437647e-06, "loss": 0.0031, "step": 22890 }, { "epoch": 0.37470342796367506, "grad_norm": 0.08133351802825928, "learning_rate": 9.769362365332611e-06, "loss": 0.0028, "step": 22900 }, { "epoch": 0.3748670539147509, "grad_norm": 0.3176220655441284, "learning_rate": 9.768933496254607e-06, "loss": 0.0026, "step": 22910 }, { "epoch": 0.3750306798658267, "grad_norm": 0.06265996396541595, "learning_rate": 9.76850423823861e-06, "loss": 0.0024, "step": 22920 }, { "epoch": 0.37519430581690255, "grad_norm": 0.014191754162311554, "learning_rate": 9.768074591319634e-06, "loss": 0.0024, "step": 22930 }, { "epoch": 0.3753579317679784, "grad_norm": 0.19419102370738983, "learning_rate": 9.767644555532715e-06, "loss": 0.0046, "step": 22940 }, { "epoch": 0.37552155771905427, "grad_norm": 0.11108149588108063, "learning_rate": 9.767214130912928e-06, "loss": 0.0032, "step": 22950 }, { "epoch": 0.3756851836701301, "grad_norm": 0.10928455740213394, "learning_rate": 9.766783317495373e-06, "loss": 0.0033, "step": 22960 }, { "epoch": 0.3758488096212059, "grad_norm": 0.06540073454380035, "learning_rate": 9.766352115315191e-06, "loss": 0.0034, "step": 22970 }, { "epoch": 0.37601243557228176, "grad_norm": 0.24643945693969727, "learning_rate": 9.765920524407548e-06, "loss": 0.0029, "step": 22980 }, { "epoch": 0.3761760615233576, "grad_norm": 0.2234330177307129, "learning_rate": 9.765488544807642e-06, "loss": 0.0047, "step": 22990 }, { "epoch": 0.37633968747443347, "grad_norm": 0.19479380548000336, "learning_rate": 9.765056176550703e-06, "loss": 0.0048, "step": 23000 }, { "epoch": 0.3765033134255093, "grad_norm": 0.18185527622699738, "learning_rate": 9.764623419671995e-06, "loss": 0.0042, "step": 23010 }, { "epoch": 0.37666693937658513, "grad_norm": 0.04639330506324768, "learning_rate": 9.76419027420681e-06, "loss": 0.0035, "step": 23020 }, { "epoch": 0.37683056532766096, "grad_norm": 0.15931709110736847, "learning_rate": 9.763756740190475e-06, "loss": 0.004, "step": 23030 }, { "epoch": 0.3769941912787368, "grad_norm": 0.1329541802406311, "learning_rate": 9.76332281765835e-06, "loss": 0.0051, "step": 23040 }, { "epoch": 0.3771578172298127, "grad_norm": 0.25017082691192627, "learning_rate": 9.762888506645822e-06, "loss": 0.0046, "step": 23050 }, { "epoch": 0.3773214431808885, "grad_norm": 0.14795063436031342, "learning_rate": 9.76245380718831e-06, "loss": 0.0039, "step": 23060 }, { "epoch": 0.37748506913196433, "grad_norm": 0.04935089871287346, "learning_rate": 9.762018719321272e-06, "loss": 0.0027, "step": 23070 }, { "epoch": 0.37764869508304016, "grad_norm": 0.06639645993709564, "learning_rate": 9.761583243080187e-06, "loss": 0.0038, "step": 23080 }, { "epoch": 0.377812321034116, "grad_norm": 0.17891868948936462, "learning_rate": 9.761147378500573e-06, "loss": 0.0042, "step": 23090 }, { "epoch": 0.3779759469851918, "grad_norm": 0.18009509146213531, "learning_rate": 9.760711125617979e-06, "loss": 0.0029, "step": 23100 }, { "epoch": 0.3781395729362677, "grad_norm": 0.08619875460863113, "learning_rate": 9.760274484467981e-06, "loss": 0.0031, "step": 23110 }, { "epoch": 0.37830319888734354, "grad_norm": 0.041656337678432465, "learning_rate": 9.759837455086193e-06, "loss": 0.0027, "step": 23120 }, { "epoch": 0.37846682483841937, "grad_norm": 0.0884852409362793, "learning_rate": 9.759400037508257e-06, "loss": 0.0043, "step": 23130 }, { "epoch": 0.3786304507894952, "grad_norm": 0.22761377692222595, "learning_rate": 9.758962231769846e-06, "loss": 0.0033, "step": 23140 }, { "epoch": 0.37879407674057103, "grad_norm": 0.24562352895736694, "learning_rate": 9.758524037906666e-06, "loss": 0.0042, "step": 23150 }, { "epoch": 0.3789577026916469, "grad_norm": 0.07740072906017303, "learning_rate": 9.758085455954457e-06, "loss": 0.0037, "step": 23160 }, { "epoch": 0.37912132864272274, "grad_norm": 0.12342134863138199, "learning_rate": 9.757646485948986e-06, "loss": 0.0038, "step": 23170 }, { "epoch": 0.3792849545937986, "grad_norm": 0.13873374462127686, "learning_rate": 9.757207127926054e-06, "loss": 0.0036, "step": 23180 }, { "epoch": 0.3794485805448744, "grad_norm": 0.07392168045043945, "learning_rate": 9.756767381921495e-06, "loss": 0.003, "step": 23190 }, { "epoch": 0.37961220649595023, "grad_norm": 0.2070484757423401, "learning_rate": 9.756327247971171e-06, "loss": 0.0032, "step": 23200 }, { "epoch": 0.3797758324470261, "grad_norm": 0.17215843498706818, "learning_rate": 9.75588672611098e-06, "loss": 0.0055, "step": 23210 }, { "epoch": 0.37993945839810195, "grad_norm": 0.20391426980495453, "learning_rate": 9.75544581637685e-06, "loss": 0.0047, "step": 23220 }, { "epoch": 0.3801030843491778, "grad_norm": 0.07884842902421951, "learning_rate": 9.755004518804736e-06, "loss": 0.0033, "step": 23230 }, { "epoch": 0.3802667103002536, "grad_norm": 0.05794193968176842, "learning_rate": 9.754562833430632e-06, "loss": 0.0036, "step": 23240 }, { "epoch": 0.38043033625132944, "grad_norm": 0.05548607185482979, "learning_rate": 9.75412076029056e-06, "loss": 0.0029, "step": 23250 }, { "epoch": 0.3805939622024053, "grad_norm": 0.08710295706987381, "learning_rate": 9.753678299420574e-06, "loss": 0.0028, "step": 23260 }, { "epoch": 0.38075758815348115, "grad_norm": 0.26748549938201904, "learning_rate": 9.75323545085676e-06, "loss": 0.0039, "step": 23270 }, { "epoch": 0.380921214104557, "grad_norm": 0.11372175812721252, "learning_rate": 9.752792214635232e-06, "loss": 0.0032, "step": 23280 }, { "epoch": 0.3810848400556328, "grad_norm": 0.17136171460151672, "learning_rate": 9.752348590792144e-06, "loss": 0.0048, "step": 23290 }, { "epoch": 0.38124846600670864, "grad_norm": 0.26676544547080994, "learning_rate": 9.751904579363673e-06, "loss": 0.0031, "step": 23300 }, { "epoch": 0.3814120919577845, "grad_norm": 0.12151642888784409, "learning_rate": 9.751460180386032e-06, "loss": 0.0051, "step": 23310 }, { "epoch": 0.38157571790886036, "grad_norm": 0.2721957266330719, "learning_rate": 9.751015393895465e-06, "loss": 0.0033, "step": 23320 }, { "epoch": 0.3817393438599362, "grad_norm": 0.049269482493400574, "learning_rate": 9.750570219928245e-06, "loss": 0.0032, "step": 23330 }, { "epoch": 0.381902969811012, "grad_norm": 0.3966676592826843, "learning_rate": 9.750124658520682e-06, "loss": 0.0038, "step": 23340 }, { "epoch": 0.38206659576208785, "grad_norm": 0.10777287930250168, "learning_rate": 9.749678709709112e-06, "loss": 0.0042, "step": 23350 }, { "epoch": 0.38223022171316373, "grad_norm": 0.24577341973781586, "learning_rate": 9.749232373529907e-06, "loss": 0.004, "step": 23360 }, { "epoch": 0.38239384766423956, "grad_norm": 0.07021196186542511, "learning_rate": 9.748785650019468e-06, "loss": 0.0037, "step": 23370 }, { "epoch": 0.3825574736153154, "grad_norm": 0.2510216236114502, "learning_rate": 9.748338539214229e-06, "loss": 0.0043, "step": 23380 }, { "epoch": 0.3827210995663912, "grad_norm": 0.19000519812107086, "learning_rate": 9.747891041150654e-06, "loss": 0.0037, "step": 23390 }, { "epoch": 0.38288472551746705, "grad_norm": 0.0985528975725174, "learning_rate": 9.747443155865238e-06, "loss": 0.0039, "step": 23400 }, { "epoch": 0.38304835146854294, "grad_norm": 0.172266885638237, "learning_rate": 9.746994883394512e-06, "loss": 0.0035, "step": 23410 }, { "epoch": 0.38321197741961877, "grad_norm": 0.07203979045152664, "learning_rate": 9.746546223775032e-06, "loss": 0.0031, "step": 23420 }, { "epoch": 0.3833756033706946, "grad_norm": 0.21380412578582764, "learning_rate": 9.746097177043393e-06, "loss": 0.0062, "step": 23430 }, { "epoch": 0.3835392293217704, "grad_norm": 0.07951556146144867, "learning_rate": 9.745647743236216e-06, "loss": 0.0026, "step": 23440 }, { "epoch": 0.38370285527284625, "grad_norm": 0.09176522493362427, "learning_rate": 9.745197922390153e-06, "loss": 0.005, "step": 23450 }, { "epoch": 0.38386648122392214, "grad_norm": 0.14726032316684723, "learning_rate": 9.744747714541894e-06, "loss": 0.0028, "step": 23460 }, { "epoch": 0.38403010717499797, "grad_norm": 0.05336847901344299, "learning_rate": 9.744297119728152e-06, "loss": 0.0039, "step": 23470 }, { "epoch": 0.3841937331260738, "grad_norm": 0.14600268006324768, "learning_rate": 9.74384613798568e-06, "loss": 0.005, "step": 23480 }, { "epoch": 0.38435735907714963, "grad_norm": 0.10225504636764526, "learning_rate": 9.743394769351258e-06, "loss": 0.0025, "step": 23490 }, { "epoch": 0.38452098502822546, "grad_norm": 0.08892350643873215, "learning_rate": 9.742943013861695e-06, "loss": 0.0036, "step": 23500 }, { "epoch": 0.38468461097930134, "grad_norm": 0.14312788844108582, "learning_rate": 9.742490871553837e-06, "loss": 0.0026, "step": 23510 }, { "epoch": 0.3848482369303772, "grad_norm": 0.24043717980384827, "learning_rate": 9.74203834246456e-06, "loss": 0.0081, "step": 23520 }, { "epoch": 0.385011862881453, "grad_norm": 0.08359953761100769, "learning_rate": 9.74158542663077e-06, "loss": 0.0037, "step": 23530 }, { "epoch": 0.38517548883252883, "grad_norm": 0.05971476808190346, "learning_rate": 9.741132124089403e-06, "loss": 0.0022, "step": 23540 }, { "epoch": 0.38533911478360466, "grad_norm": 0.09047897160053253, "learning_rate": 9.740678434877433e-06, "loss": 0.0049, "step": 23550 }, { "epoch": 0.3855027407346805, "grad_norm": 0.06716237962245941, "learning_rate": 9.740224359031858e-06, "loss": 0.0025, "step": 23560 }, { "epoch": 0.3856663666857564, "grad_norm": 0.2412993162870407, "learning_rate": 9.73976989658971e-06, "loss": 0.0044, "step": 23570 }, { "epoch": 0.3858299926368322, "grad_norm": 0.14340847730636597, "learning_rate": 9.739315047588059e-06, "loss": 0.0034, "step": 23580 }, { "epoch": 0.38599361858790804, "grad_norm": 0.04268035292625427, "learning_rate": 9.738859812063994e-06, "loss": 0.0026, "step": 23590 }, { "epoch": 0.38615724453898387, "grad_norm": 0.1195569857954979, "learning_rate": 9.738404190054646e-06, "loss": 0.0061, "step": 23600 }, { "epoch": 0.3863208704900597, "grad_norm": 0.10575102269649506, "learning_rate": 9.737948181597176e-06, "loss": 0.0041, "step": 23610 }, { "epoch": 0.3864844964411356, "grad_norm": 0.1247902438044548, "learning_rate": 9.73749178672877e-06, "loss": 0.0025, "step": 23620 }, { "epoch": 0.3866481223922114, "grad_norm": 0.08549071848392487, "learning_rate": 9.737035005486653e-06, "loss": 0.0024, "step": 23630 }, { "epoch": 0.38681174834328724, "grad_norm": 0.0867099016904831, "learning_rate": 9.73657783790808e-06, "loss": 0.0043, "step": 23640 }, { "epoch": 0.3869753742943631, "grad_norm": 0.08126109093427658, "learning_rate": 9.73612028403033e-06, "loss": 0.0036, "step": 23650 }, { "epoch": 0.3871390002454389, "grad_norm": 0.08932679891586304, "learning_rate": 9.735662343890723e-06, "loss": 0.004, "step": 23660 }, { "epoch": 0.3873026261965148, "grad_norm": 0.1253349781036377, "learning_rate": 9.735204017526611e-06, "loss": 0.0033, "step": 23670 }, { "epoch": 0.3874662521475906, "grad_norm": 0.0405646376311779, "learning_rate": 9.734745304975368e-06, "loss": 0.0055, "step": 23680 }, { "epoch": 0.38762987809866645, "grad_norm": 0.10145255923271179, "learning_rate": 9.734286206274408e-06, "loss": 0.0042, "step": 23690 }, { "epoch": 0.3877935040497423, "grad_norm": 0.032671693712472916, "learning_rate": 9.733826721461171e-06, "loss": 0.003, "step": 23700 }, { "epoch": 0.3879571300008181, "grad_norm": 0.016059959307312965, "learning_rate": 9.733366850573133e-06, "loss": 0.0034, "step": 23710 }, { "epoch": 0.388120755951894, "grad_norm": 0.08102528750896454, "learning_rate": 9.732906593647799e-06, "loss": 0.0044, "step": 23720 }, { "epoch": 0.3882843819029698, "grad_norm": 0.06434213370084763, "learning_rate": 9.732445950722706e-06, "loss": 0.0034, "step": 23730 }, { "epoch": 0.38844800785404565, "grad_norm": 0.10787647217512131, "learning_rate": 9.731984921835422e-06, "loss": 0.0026, "step": 23740 }, { "epoch": 0.3886116338051215, "grad_norm": 0.10055384039878845, "learning_rate": 9.731523507023547e-06, "loss": 0.0035, "step": 23750 }, { "epoch": 0.3887752597561973, "grad_norm": 0.21888549625873566, "learning_rate": 9.731061706324714e-06, "loss": 0.0036, "step": 23760 }, { "epoch": 0.3889388857072732, "grad_norm": 0.14533309638500214, "learning_rate": 9.730599519776584e-06, "loss": 0.0056, "step": 23770 }, { "epoch": 0.389102511658349, "grad_norm": 0.2747025191783905, "learning_rate": 9.730136947416853e-06, "loss": 0.0032, "step": 23780 }, { "epoch": 0.38926613760942486, "grad_norm": 0.1449516862630844, "learning_rate": 9.729673989283244e-06, "loss": 0.0042, "step": 23790 }, { "epoch": 0.3894297635605007, "grad_norm": 0.17499899864196777, "learning_rate": 9.729210645413518e-06, "loss": 0.0061, "step": 23800 }, { "epoch": 0.3895933895115765, "grad_norm": 0.0841960534453392, "learning_rate": 9.728746915845461e-06, "loss": 0.0039, "step": 23810 }, { "epoch": 0.3897570154626524, "grad_norm": 0.10993972420692444, "learning_rate": 9.728282800616896e-06, "loss": 0.0062, "step": 23820 }, { "epoch": 0.38992064141372823, "grad_norm": 0.06402769684791565, "learning_rate": 9.72781829976567e-06, "loss": 0.0045, "step": 23830 }, { "epoch": 0.39008426736480406, "grad_norm": 0.046928927302360535, "learning_rate": 9.72735341332967e-06, "loss": 0.0025, "step": 23840 }, { "epoch": 0.3902478933158799, "grad_norm": 0.04450773447751999, "learning_rate": 9.72688814134681e-06, "loss": 0.005, "step": 23850 }, { "epoch": 0.3904115192669557, "grad_norm": 0.07732709497213364, "learning_rate": 9.726422483855034e-06, "loss": 0.0033, "step": 23860 }, { "epoch": 0.3905751452180316, "grad_norm": 0.16074153780937195, "learning_rate": 9.72595644089232e-06, "loss": 0.004, "step": 23870 }, { "epoch": 0.39073877116910744, "grad_norm": 0.24663259088993073, "learning_rate": 9.725490012496682e-06, "loss": 0.0058, "step": 23880 }, { "epoch": 0.39090239712018326, "grad_norm": 0.3892887234687805, "learning_rate": 9.725023198706154e-06, "loss": 0.0044, "step": 23890 }, { "epoch": 0.3910660230712591, "grad_norm": 0.17836634814739227, "learning_rate": 9.724555999558809e-06, "loss": 0.0032, "step": 23900 }, { "epoch": 0.3912296490223349, "grad_norm": 0.016964320093393326, "learning_rate": 9.724088415092752e-06, "loss": 0.0036, "step": 23910 }, { "epoch": 0.3913932749734108, "grad_norm": 0.07119446992874146, "learning_rate": 9.723620445346116e-06, "loss": 0.0027, "step": 23920 }, { "epoch": 0.39155690092448664, "grad_norm": 0.04636238515377045, "learning_rate": 9.723152090357066e-06, "loss": 0.0023, "step": 23930 }, { "epoch": 0.39172052687556247, "grad_norm": 0.08750925213098526, "learning_rate": 9.722683350163804e-06, "loss": 0.0046, "step": 23940 }, { "epoch": 0.3918841528266383, "grad_norm": 0.18636532127857208, "learning_rate": 9.722214224804555e-06, "loss": 0.0051, "step": 23950 }, { "epoch": 0.39204777877771413, "grad_norm": 0.2473328560590744, "learning_rate": 9.72174471431758e-06, "loss": 0.0044, "step": 23960 }, { "epoch": 0.39221140472878996, "grad_norm": 0.37887436151504517, "learning_rate": 9.721274818741171e-06, "loss": 0.0046, "step": 23970 }, { "epoch": 0.39237503067986584, "grad_norm": 0.09699217230081558, "learning_rate": 9.720804538113651e-06, "loss": 0.0032, "step": 23980 }, { "epoch": 0.3925386566309417, "grad_norm": 0.024318208917975426, "learning_rate": 9.720333872473373e-06, "loss": 0.0056, "step": 23990 }, { "epoch": 0.3927022825820175, "grad_norm": 0.09461870044469833, "learning_rate": 9.719862821858726e-06, "loss": 0.0037, "step": 24000 }, { "epoch": 0.39286590853309333, "grad_norm": 0.12473174184560776, "learning_rate": 9.719391386308125e-06, "loss": 0.0043, "step": 24010 }, { "epoch": 0.39302953448416916, "grad_norm": 0.2460101693868637, "learning_rate": 9.71891956586002e-06, "loss": 0.0036, "step": 24020 }, { "epoch": 0.39319316043524505, "grad_norm": 0.0626598596572876, "learning_rate": 9.71844736055289e-06, "loss": 0.0042, "step": 24030 }, { "epoch": 0.3933567863863209, "grad_norm": 0.06889753043651581, "learning_rate": 9.717974770425246e-06, "loss": 0.0032, "step": 24040 }, { "epoch": 0.3935204123373967, "grad_norm": 0.18909382820129395, "learning_rate": 9.717501795515632e-06, "loss": 0.0045, "step": 24050 }, { "epoch": 0.39368403828847254, "grad_norm": 0.09070835262537003, "learning_rate": 9.717028435862623e-06, "loss": 0.0022, "step": 24060 }, { "epoch": 0.39384766423954837, "grad_norm": 0.11403269320726395, "learning_rate": 9.716554691504822e-06, "loss": 0.003, "step": 24070 }, { "epoch": 0.39401129019062425, "grad_norm": 0.2606067657470703, "learning_rate": 9.716080562480867e-06, "loss": 0.0062, "step": 24080 }, { "epoch": 0.3941749161417001, "grad_norm": 0.05964642018079758, "learning_rate": 9.715606048829429e-06, "loss": 0.0054, "step": 24090 }, { "epoch": 0.3943385420927759, "grad_norm": 0.11207167059183121, "learning_rate": 9.715131150589203e-06, "loss": 0.0021, "step": 24100 }, { "epoch": 0.39450216804385174, "grad_norm": 0.3053283095359802, "learning_rate": 9.714655867798926e-06, "loss": 0.0038, "step": 24110 }, { "epoch": 0.39466579399492757, "grad_norm": 0.37912648916244507, "learning_rate": 9.714180200497353e-06, "loss": 0.0086, "step": 24120 }, { "epoch": 0.39482941994600346, "grad_norm": 0.3171529471874237, "learning_rate": 9.713704148723286e-06, "loss": 0.004, "step": 24130 }, { "epoch": 0.3949930458970793, "grad_norm": 0.20151226222515106, "learning_rate": 9.713227712515543e-06, "loss": 0.0034, "step": 24140 }, { "epoch": 0.3951566718481551, "grad_norm": 0.3001769185066223, "learning_rate": 9.712750891912986e-06, "loss": 0.0041, "step": 24150 }, { "epoch": 0.39532029779923095, "grad_norm": 0.15295594930648804, "learning_rate": 9.712273686954498e-06, "loss": 0.007, "step": 24160 }, { "epoch": 0.3954839237503068, "grad_norm": 0.19454239308834076, "learning_rate": 9.711796097679e-06, "loss": 0.0029, "step": 24170 }, { "epoch": 0.39564754970138266, "grad_norm": 0.09349364042282104, "learning_rate": 9.711318124125445e-06, "loss": 0.0031, "step": 24180 }, { "epoch": 0.3958111756524585, "grad_norm": 0.12422489374876022, "learning_rate": 9.710839766332814e-06, "loss": 0.0032, "step": 24190 }, { "epoch": 0.3959748016035343, "grad_norm": 0.11687396466732025, "learning_rate": 9.710361024340118e-06, "loss": 0.0035, "step": 24200 }, { "epoch": 0.39613842755461015, "grad_norm": 0.05111733451485634, "learning_rate": 9.709881898186403e-06, "loss": 0.0045, "step": 24210 }, { "epoch": 0.396302053505686, "grad_norm": 0.15105533599853516, "learning_rate": 9.709402387910745e-06, "loss": 0.0046, "step": 24220 }, { "epoch": 0.39646567945676187, "grad_norm": 0.271156370639801, "learning_rate": 9.70892249355225e-06, "loss": 0.004, "step": 24230 }, { "epoch": 0.3966293054078377, "grad_norm": 0.1677645742893219, "learning_rate": 9.70844221515006e-06, "loss": 0.0037, "step": 24240 }, { "epoch": 0.3967929313589135, "grad_norm": 0.12438059598207474, "learning_rate": 9.707961552743341e-06, "loss": 0.0031, "step": 24250 }, { "epoch": 0.39695655730998936, "grad_norm": 0.3922032415866852, "learning_rate": 9.707480506371295e-06, "loss": 0.0054, "step": 24260 }, { "epoch": 0.3971201832610652, "grad_norm": 0.12613599002361298, "learning_rate": 9.706999076073157e-06, "loss": 0.0045, "step": 24270 }, { "epoch": 0.39728380921214107, "grad_norm": 0.06518051028251648, "learning_rate": 9.706517261888187e-06, "loss": 0.003, "step": 24280 }, { "epoch": 0.3974474351632169, "grad_norm": 0.041631996631622314, "learning_rate": 9.706035063855682e-06, "loss": 0.002, "step": 24290 }, { "epoch": 0.39761106111429273, "grad_norm": 0.12702889740467072, "learning_rate": 9.705552482014969e-06, "loss": 0.0032, "step": 24300 }, { "epoch": 0.39777468706536856, "grad_norm": 0.10419308394193649, "learning_rate": 9.705069516405405e-06, "loss": 0.0063, "step": 24310 }, { "epoch": 0.3979383130164444, "grad_norm": 0.3529118299484253, "learning_rate": 9.704586167066382e-06, "loss": 0.0034, "step": 24320 }, { "epoch": 0.3981019389675203, "grad_norm": 0.09571299701929092, "learning_rate": 9.704102434037314e-06, "loss": 0.0023, "step": 24330 }, { "epoch": 0.3982655649185961, "grad_norm": 0.08594782650470734, "learning_rate": 9.703618317357657e-06, "loss": 0.0035, "step": 24340 }, { "epoch": 0.39842919086967193, "grad_norm": 0.05259867385029793, "learning_rate": 9.703133817066894e-06, "loss": 0.0021, "step": 24350 }, { "epoch": 0.39859281682074776, "grad_norm": 0.18884804844856262, "learning_rate": 9.702648933204537e-06, "loss": 0.0047, "step": 24360 }, { "epoch": 0.3987564427718236, "grad_norm": 0.24906374514102936, "learning_rate": 9.702163665810135e-06, "loss": 0.0035, "step": 24370 }, { "epoch": 0.3989200687228995, "grad_norm": 0.11324939131736755, "learning_rate": 9.70167801492326e-06, "loss": 0.003, "step": 24380 }, { "epoch": 0.3990836946739753, "grad_norm": 0.11663619428873062, "learning_rate": 9.701191980583524e-06, "loss": 0.0034, "step": 24390 }, { "epoch": 0.39924732062505114, "grad_norm": 0.3687373697757721, "learning_rate": 9.700705562830566e-06, "loss": 0.0037, "step": 24400 }, { "epoch": 0.39941094657612697, "grad_norm": 0.07192040234804153, "learning_rate": 9.700218761704054e-06, "loss": 0.0022, "step": 24410 }, { "epoch": 0.3995745725272028, "grad_norm": 0.03650715947151184, "learning_rate": 9.699731577243692e-06, "loss": 0.0046, "step": 24420 }, { "epoch": 0.39973819847827863, "grad_norm": 0.18229758739471436, "learning_rate": 9.69924400948921e-06, "loss": 0.0045, "step": 24430 }, { "epoch": 0.3999018244293545, "grad_norm": 0.3506583869457245, "learning_rate": 9.698756058480378e-06, "loss": 0.0021, "step": 24440 }, { "epoch": 0.40006545038043034, "grad_norm": 0.18497774004936218, "learning_rate": 9.698267724256988e-06, "loss": 0.0034, "step": 24450 }, { "epoch": 0.4002290763315062, "grad_norm": 0.12030255049467087, "learning_rate": 9.697779006858866e-06, "loss": 0.0037, "step": 24460 }, { "epoch": 0.400392702282582, "grad_norm": 0.1044439971446991, "learning_rate": 9.697289906325873e-06, "loss": 0.0028, "step": 24470 }, { "epoch": 0.40055632823365783, "grad_norm": 0.09643489122390747, "learning_rate": 9.696800422697896e-06, "loss": 0.004, "step": 24480 }, { "epoch": 0.4007199541847337, "grad_norm": 0.08540601283311844, "learning_rate": 9.696310556014856e-06, "loss": 0.0054, "step": 24490 }, { "epoch": 0.40088358013580955, "grad_norm": 0.11384651809930801, "learning_rate": 9.695820306316705e-06, "loss": 0.0033, "step": 24500 }, { "epoch": 0.4010472060868854, "grad_norm": 0.024184083566069603, "learning_rate": 9.695329673643427e-06, "loss": 0.0034, "step": 24510 }, { "epoch": 0.4012108320379612, "grad_norm": 0.06803640723228455, "learning_rate": 9.694838658035034e-06, "loss": 0.0025, "step": 24520 }, { "epoch": 0.40137445798903704, "grad_norm": 0.059210050851106644, "learning_rate": 9.694347259531576e-06, "loss": 0.0035, "step": 24530 }, { "epoch": 0.4015380839401129, "grad_norm": 0.22898975014686584, "learning_rate": 9.693855478173127e-06, "loss": 0.0063, "step": 24540 }, { "epoch": 0.40170170989118875, "grad_norm": 0.04901501163840294, "learning_rate": 9.693363313999792e-06, "loss": 0.0039, "step": 24550 }, { "epoch": 0.4018653358422646, "grad_norm": 0.3988078832626343, "learning_rate": 9.692870767051717e-06, "loss": 0.0044, "step": 24560 }, { "epoch": 0.4020289617933404, "grad_norm": 0.06453163921833038, "learning_rate": 9.692377837369066e-06, "loss": 0.0035, "step": 24570 }, { "epoch": 0.40219258774441624, "grad_norm": 0.1123410314321518, "learning_rate": 9.691884524992045e-06, "loss": 0.0035, "step": 24580 }, { "epoch": 0.4023562136954921, "grad_norm": 0.20643159747123718, "learning_rate": 9.691390829960886e-06, "loss": 0.0029, "step": 24590 }, { "epoch": 0.40251983964656796, "grad_norm": 0.27239346504211426, "learning_rate": 9.690896752315851e-06, "loss": 0.0041, "step": 24600 }, { "epoch": 0.4026834655976438, "grad_norm": 0.08087146282196045, "learning_rate": 9.690402292097237e-06, "loss": 0.0035, "step": 24610 }, { "epoch": 0.4028470915487196, "grad_norm": 0.0781392976641655, "learning_rate": 9.689907449345369e-06, "loss": 0.0016, "step": 24620 }, { "epoch": 0.40301071749979545, "grad_norm": 0.16768626868724823, "learning_rate": 9.689412224100607e-06, "loss": 0.0025, "step": 24630 }, { "epoch": 0.40317434345087133, "grad_norm": 0.37064576148986816, "learning_rate": 9.688916616403338e-06, "loss": 0.0059, "step": 24640 }, { "epoch": 0.40333796940194716, "grad_norm": 0.051520735025405884, "learning_rate": 9.688420626293984e-06, "loss": 0.004, "step": 24650 }, { "epoch": 0.403501595353023, "grad_norm": 0.06874562799930573, "learning_rate": 9.687924253812994e-06, "loss": 0.0037, "step": 24660 }, { "epoch": 0.4036652213040988, "grad_norm": 0.0444541834294796, "learning_rate": 9.687427499000852e-06, "loss": 0.005, "step": 24670 }, { "epoch": 0.40382884725517465, "grad_norm": 0.26220789551734924, "learning_rate": 9.686930361898073e-06, "loss": 0.0033, "step": 24680 }, { "epoch": 0.40399247320625054, "grad_norm": 0.12733012437820435, "learning_rate": 9.686432842545197e-06, "loss": 0.0039, "step": 24690 }, { "epoch": 0.40415609915732637, "grad_norm": 0.08577050268650055, "learning_rate": 9.685934940982806e-06, "loss": 0.0041, "step": 24700 }, { "epoch": 0.4043197251084022, "grad_norm": 0.14089174568653107, "learning_rate": 9.685436657251501e-06, "loss": 0.0036, "step": 24710 }, { "epoch": 0.404483351059478, "grad_norm": 0.19395211338996887, "learning_rate": 9.684937991391924e-06, "loss": 0.0034, "step": 24720 }, { "epoch": 0.40464697701055385, "grad_norm": 0.15160194039344788, "learning_rate": 9.684438943444747e-06, "loss": 0.0032, "step": 24730 }, { "epoch": 0.40481060296162974, "grad_norm": 0.06268467754125595, "learning_rate": 9.683939513450665e-06, "loss": 0.0042, "step": 24740 }, { "epoch": 0.40497422891270557, "grad_norm": 0.0942181795835495, "learning_rate": 9.683439701450413e-06, "loss": 0.0048, "step": 24750 }, { "epoch": 0.4051378548637814, "grad_norm": 0.20840847492218018, "learning_rate": 9.682939507484754e-06, "loss": 0.0043, "step": 24760 }, { "epoch": 0.40530148081485723, "grad_norm": 0.14479120075702667, "learning_rate": 9.68243893159448e-06, "loss": 0.0026, "step": 24770 }, { "epoch": 0.40546510676593306, "grad_norm": 0.09427852183580399, "learning_rate": 9.68193797382042e-06, "loss": 0.0035, "step": 24780 }, { "epoch": 0.40562873271700894, "grad_norm": 0.1335517019033432, "learning_rate": 9.681436634203426e-06, "loss": 0.0045, "step": 24790 }, { "epoch": 0.4057923586680848, "grad_norm": 0.12426796555519104, "learning_rate": 9.680934912784388e-06, "loss": 0.0037, "step": 24800 }, { "epoch": 0.4059559846191606, "grad_norm": 0.3622891902923584, "learning_rate": 9.680432809604225e-06, "loss": 0.0026, "step": 24810 }, { "epoch": 0.40611961057023643, "grad_norm": 0.09143351763486862, "learning_rate": 9.679930324703888e-06, "loss": 0.0036, "step": 24820 }, { "epoch": 0.40628323652131226, "grad_norm": 0.26484695076942444, "learning_rate": 9.679427458124352e-06, "loss": 0.0058, "step": 24830 }, { "epoch": 0.40644686247238815, "grad_norm": 0.1457994282245636, "learning_rate": 9.678924209906637e-06, "loss": 0.0045, "step": 24840 }, { "epoch": 0.406610488423464, "grad_norm": 0.19691668450832367, "learning_rate": 9.678420580091781e-06, "loss": 0.0026, "step": 24850 }, { "epoch": 0.4067741143745398, "grad_norm": 0.06473774462938309, "learning_rate": 9.67791656872086e-06, "loss": 0.003, "step": 24860 }, { "epoch": 0.40693774032561564, "grad_norm": 0.3390854597091675, "learning_rate": 9.67741217583498e-06, "loss": 0.0044, "step": 24870 }, { "epoch": 0.40710136627669147, "grad_norm": 0.0831959918141365, "learning_rate": 9.676907401475277e-06, "loss": 0.0038, "step": 24880 }, { "epoch": 0.4072649922277673, "grad_norm": 0.05004332587122917, "learning_rate": 9.676402245682916e-06, "loss": 0.0045, "step": 24890 }, { "epoch": 0.4074286181788432, "grad_norm": 0.12332847714424133, "learning_rate": 9.675896708499102e-06, "loss": 0.0028, "step": 24900 }, { "epoch": 0.407592244129919, "grad_norm": 0.26546967029571533, "learning_rate": 9.675390789965058e-06, "loss": 0.0027, "step": 24910 }, { "epoch": 0.40775587008099484, "grad_norm": 0.09480591863393784, "learning_rate": 9.67488449012205e-06, "loss": 0.0032, "step": 24920 }, { "epoch": 0.40791949603207067, "grad_norm": 0.11836016178131104, "learning_rate": 9.674377809011368e-06, "loss": 0.0049, "step": 24930 }, { "epoch": 0.4080831219831465, "grad_norm": 0.2503274083137512, "learning_rate": 9.673870746674336e-06, "loss": 0.0039, "step": 24940 }, { "epoch": 0.4082467479342224, "grad_norm": 0.115508534014225, "learning_rate": 9.673363303152306e-06, "loss": 0.0043, "step": 24950 }, { "epoch": 0.4084103738852982, "grad_norm": 0.16050244867801666, "learning_rate": 9.672855478486668e-06, "loss": 0.0038, "step": 24960 }, { "epoch": 0.40857399983637405, "grad_norm": 0.05900110304355621, "learning_rate": 9.672347272718835e-06, "loss": 0.0057, "step": 24970 }, { "epoch": 0.4087376257874499, "grad_norm": 0.21688447892665863, "learning_rate": 9.671838685890252e-06, "loss": 0.0044, "step": 24980 }, { "epoch": 0.4089012517385257, "grad_norm": 0.08749242126941681, "learning_rate": 9.671329718042404e-06, "loss": 0.0037, "step": 24990 }, { "epoch": 0.4090648776896016, "grad_norm": 0.14104853570461273, "learning_rate": 9.670820369216795e-06, "loss": 0.0029, "step": 25000 }, { "epoch": 0.4092285036406774, "grad_norm": 0.08482057601213455, "learning_rate": 9.670310639454969e-06, "loss": 0.0057, "step": 25010 }, { "epoch": 0.40939212959175325, "grad_norm": 0.062256261706352234, "learning_rate": 9.669800528798498e-06, "loss": 0.0038, "step": 25020 }, { "epoch": 0.4095557555428291, "grad_norm": 0.16285395622253418, "learning_rate": 9.669290037288984e-06, "loss": 0.0035, "step": 25030 }, { "epoch": 0.4097193814939049, "grad_norm": 0.029890358448028564, "learning_rate": 9.668779164968061e-06, "loss": 0.0052, "step": 25040 }, { "epoch": 0.4098830074449808, "grad_norm": 0.07181970030069351, "learning_rate": 9.668267911877392e-06, "loss": 0.0036, "step": 25050 }, { "epoch": 0.4100466333960566, "grad_norm": 0.13503605127334595, "learning_rate": 9.667756278058676e-06, "loss": 0.0054, "step": 25060 }, { "epoch": 0.41021025934713246, "grad_norm": 0.2239055335521698, "learning_rate": 9.667244263553639e-06, "loss": 0.0026, "step": 25070 }, { "epoch": 0.4103738852982083, "grad_norm": 0.06981676816940308, "learning_rate": 9.66673186840404e-06, "loss": 0.0037, "step": 25080 }, { "epoch": 0.4105375112492841, "grad_norm": 0.045035943388938904, "learning_rate": 9.666219092651667e-06, "loss": 0.0033, "step": 25090 }, { "epoch": 0.41070113720036, "grad_norm": 0.04974318668246269, "learning_rate": 9.66570593633834e-06, "loss": 0.0039, "step": 25100 }, { "epoch": 0.41086476315143583, "grad_norm": 0.20097310841083527, "learning_rate": 9.665192399505912e-06, "loss": 0.0036, "step": 25110 }, { "epoch": 0.41102838910251166, "grad_norm": 0.5315530300140381, "learning_rate": 9.664678482196264e-06, "loss": 0.0038, "step": 25120 }, { "epoch": 0.4111920150535875, "grad_norm": 0.11775261908769608, "learning_rate": 9.664164184451309e-06, "loss": 0.0027, "step": 25130 }, { "epoch": 0.4113556410046633, "grad_norm": 0.04819796234369278, "learning_rate": 9.663649506312991e-06, "loss": 0.0061, "step": 25140 }, { "epoch": 0.4115192669557392, "grad_norm": 0.12614324688911438, "learning_rate": 9.663134447823287e-06, "loss": 0.0032, "step": 25150 }, { "epoch": 0.41168289290681503, "grad_norm": 0.19738654792308807, "learning_rate": 9.662619009024203e-06, "loss": 0.0032, "step": 25160 }, { "epoch": 0.41184651885789086, "grad_norm": 0.11247184127569199, "learning_rate": 9.662103189957777e-06, "loss": 0.006, "step": 25170 }, { "epoch": 0.4120101448089667, "grad_norm": 0.05924522876739502, "learning_rate": 9.661586990666077e-06, "loss": 0.004, "step": 25180 }, { "epoch": 0.4121737707600425, "grad_norm": 0.08525634557008743, "learning_rate": 9.6610704111912e-06, "loss": 0.0034, "step": 25190 }, { "epoch": 0.4123373967111184, "grad_norm": 0.06503406912088394, "learning_rate": 9.66055345157528e-06, "loss": 0.0063, "step": 25200 }, { "epoch": 0.41250102266219424, "grad_norm": 0.06928110122680664, "learning_rate": 9.660036111860478e-06, "loss": 0.0052, "step": 25210 }, { "epoch": 0.41266464861327007, "grad_norm": 0.1879051774740219, "learning_rate": 9.659518392088985e-06, "loss": 0.0024, "step": 25220 }, { "epoch": 0.4128282745643459, "grad_norm": 0.1871768981218338, "learning_rate": 9.659000292303023e-06, "loss": 0.0043, "step": 25230 }, { "epoch": 0.41299190051542173, "grad_norm": 0.09390852600336075, "learning_rate": 9.658481812544851e-06, "loss": 0.0027, "step": 25240 }, { "epoch": 0.4131555264664976, "grad_norm": 0.16996236145496368, "learning_rate": 9.65796295285675e-06, "loss": 0.0034, "step": 25250 }, { "epoch": 0.41331915241757344, "grad_norm": 0.18050551414489746, "learning_rate": 9.65744371328104e-06, "loss": 0.0039, "step": 25260 }, { "epoch": 0.4134827783686493, "grad_norm": 0.19516703486442566, "learning_rate": 9.656924093860064e-06, "loss": 0.0025, "step": 25270 }, { "epoch": 0.4136464043197251, "grad_norm": 0.12207509577274323, "learning_rate": 9.656404094636207e-06, "loss": 0.0041, "step": 25280 }, { "epoch": 0.41381003027080093, "grad_norm": 0.15849445760250092, "learning_rate": 9.655883715651873e-06, "loss": 0.0033, "step": 25290 }, { "epoch": 0.41397365622187676, "grad_norm": 0.15875327587127686, "learning_rate": 9.655362956949503e-06, "loss": 0.0035, "step": 25300 }, { "epoch": 0.41413728217295265, "grad_norm": 0.12671710550785065, "learning_rate": 9.654841818571568e-06, "loss": 0.0029, "step": 25310 }, { "epoch": 0.4143009081240285, "grad_norm": 0.12516403198242188, "learning_rate": 9.654320300560573e-06, "loss": 0.0044, "step": 25320 }, { "epoch": 0.4144645340751043, "grad_norm": 0.012723371386528015, "learning_rate": 9.65379840295905e-06, "loss": 0.0052, "step": 25330 }, { "epoch": 0.41462816002618014, "grad_norm": 0.23379164934158325, "learning_rate": 9.653276125809564e-06, "loss": 0.0047, "step": 25340 }, { "epoch": 0.41479178597725597, "grad_norm": 0.03316957876086235, "learning_rate": 9.652753469154707e-06, "loss": 0.0032, "step": 25350 }, { "epoch": 0.41495541192833185, "grad_norm": 0.05698050558567047, "learning_rate": 9.652230433037106e-06, "loss": 0.0031, "step": 25360 }, { "epoch": 0.4151190378794077, "grad_norm": 0.08126674592494965, "learning_rate": 9.651707017499421e-06, "loss": 0.0042, "step": 25370 }, { "epoch": 0.4152826638304835, "grad_norm": 0.04867204651236534, "learning_rate": 9.651183222584338e-06, "loss": 0.0034, "step": 25380 }, { "epoch": 0.41544628978155934, "grad_norm": 0.08237331360578537, "learning_rate": 9.650659048334577e-06, "loss": 0.0029, "step": 25390 }, { "epoch": 0.41560991573263517, "grad_norm": 0.041710592806339264, "learning_rate": 9.650134494792884e-06, "loss": 0.0029, "step": 25400 }, { "epoch": 0.41577354168371106, "grad_norm": 0.030708497390151024, "learning_rate": 9.649609562002045e-06, "loss": 0.0036, "step": 25410 }, { "epoch": 0.4159371676347869, "grad_norm": 0.059733130037784576, "learning_rate": 9.64908425000487e-06, "loss": 0.0036, "step": 25420 }, { "epoch": 0.4161007935858627, "grad_norm": 0.06851238012313843, "learning_rate": 9.648558558844198e-06, "loss": 0.002, "step": 25430 }, { "epoch": 0.41626441953693855, "grad_norm": 0.15898770093917847, "learning_rate": 9.64803248856291e-06, "loss": 0.0047, "step": 25440 }, { "epoch": 0.4164280454880144, "grad_norm": 0.12070489674806595, "learning_rate": 9.647506039203902e-06, "loss": 0.0044, "step": 25450 }, { "epoch": 0.41659167143909026, "grad_norm": 0.037828654050827026, "learning_rate": 9.646979210810117e-06, "loss": 0.0042, "step": 25460 }, { "epoch": 0.4167552973901661, "grad_norm": 0.06421557813882828, "learning_rate": 9.646452003424515e-06, "loss": 0.0037, "step": 25470 }, { "epoch": 0.4169189233412419, "grad_norm": 0.08535627275705338, "learning_rate": 9.645924417090098e-06, "loss": 0.0023, "step": 25480 }, { "epoch": 0.41708254929231775, "grad_norm": 0.1495044231414795, "learning_rate": 9.645396451849893e-06, "loss": 0.0038, "step": 25490 }, { "epoch": 0.4172461752433936, "grad_norm": 0.1131482869386673, "learning_rate": 9.644868107746957e-06, "loss": 0.0043, "step": 25500 }, { "epoch": 0.41740980119446947, "grad_norm": 0.03557871654629707, "learning_rate": 9.644339384824381e-06, "loss": 0.0037, "step": 25510 }, { "epoch": 0.4175734271455453, "grad_norm": 0.11914899200201035, "learning_rate": 9.643810283125287e-06, "loss": 0.0031, "step": 25520 }, { "epoch": 0.4177370530966211, "grad_norm": 0.4008508026599884, "learning_rate": 9.643280802692827e-06, "loss": 0.0037, "step": 25530 }, { "epoch": 0.41790067904769695, "grad_norm": 0.15735241770744324, "learning_rate": 9.64275094357018e-06, "loss": 0.004, "step": 25540 }, { "epoch": 0.4180643049987728, "grad_norm": 0.06464161723852158, "learning_rate": 9.642220705800565e-06, "loss": 0.0031, "step": 25550 }, { "epoch": 0.41822793094984867, "grad_norm": 0.22424744069576263, "learning_rate": 9.641690089427222e-06, "loss": 0.0038, "step": 25560 }, { "epoch": 0.4183915569009245, "grad_norm": 0.28786009550094604, "learning_rate": 9.641159094493428e-06, "loss": 0.0029, "step": 25570 }, { "epoch": 0.41855518285200033, "grad_norm": 0.04271455854177475, "learning_rate": 9.640627721042488e-06, "loss": 0.0035, "step": 25580 }, { "epoch": 0.41871880880307616, "grad_norm": 0.023068362846970558, "learning_rate": 9.64009596911774e-06, "loss": 0.0043, "step": 25590 }, { "epoch": 0.418882434754152, "grad_norm": 0.23619158565998077, "learning_rate": 9.639563838762552e-06, "loss": 0.0066, "step": 25600 }, { "epoch": 0.4190460607052279, "grad_norm": 0.1617545187473297, "learning_rate": 9.639031330020323e-06, "loss": 0.0026, "step": 25610 }, { "epoch": 0.4192096866563037, "grad_norm": 0.6730504035949707, "learning_rate": 9.63849844293448e-06, "loss": 0.0065, "step": 25620 }, { "epoch": 0.41937331260737953, "grad_norm": 0.10138659924268723, "learning_rate": 9.637965177548488e-06, "loss": 0.0038, "step": 25630 }, { "epoch": 0.41953693855845536, "grad_norm": 0.04692848399281502, "learning_rate": 9.637431533905834e-06, "loss": 0.0039, "step": 25640 }, { "epoch": 0.4197005645095312, "grad_norm": 0.1432286500930786, "learning_rate": 9.636897512050044e-06, "loss": 0.0029, "step": 25650 }, { "epoch": 0.4198641904606071, "grad_norm": 0.14014388620853424, "learning_rate": 9.636363112024668e-06, "loss": 0.0035, "step": 25660 }, { "epoch": 0.4200278164116829, "grad_norm": 0.06834814697504044, "learning_rate": 9.635828333873291e-06, "loss": 0.0044, "step": 25670 }, { "epoch": 0.42019144236275874, "grad_norm": 0.1365925818681717, "learning_rate": 9.635293177639526e-06, "loss": 0.0031, "step": 25680 }, { "epoch": 0.42035506831383457, "grad_norm": 0.04953382536768913, "learning_rate": 9.634757643367023e-06, "loss": 0.0031, "step": 25690 }, { "epoch": 0.4205186942649104, "grad_norm": 0.09412840753793716, "learning_rate": 9.634221731099454e-06, "loss": 0.0021, "step": 25700 }, { "epoch": 0.4206823202159863, "grad_norm": 0.28074344992637634, "learning_rate": 9.633685440880527e-06, "loss": 0.0037, "step": 25710 }, { "epoch": 0.4208459461670621, "grad_norm": 0.10350345820188522, "learning_rate": 9.633148772753981e-06, "loss": 0.0026, "step": 25720 }, { "epoch": 0.42100957211813794, "grad_norm": 0.2897213399410248, "learning_rate": 9.632611726763584e-06, "loss": 0.0039, "step": 25730 }, { "epoch": 0.42117319806921377, "grad_norm": 0.12776196002960205, "learning_rate": 9.632074302953135e-06, "loss": 0.0025, "step": 25740 }, { "epoch": 0.4213368240202896, "grad_norm": 0.10490020364522934, "learning_rate": 9.63153650136647e-06, "loss": 0.0028, "step": 25750 }, { "epoch": 0.42150044997136543, "grad_norm": 0.20296016335487366, "learning_rate": 9.630998322047442e-06, "loss": 0.004, "step": 25760 }, { "epoch": 0.4216640759224413, "grad_norm": 0.6429498791694641, "learning_rate": 9.630459765039948e-06, "loss": 0.0032, "step": 25770 }, { "epoch": 0.42182770187351715, "grad_norm": 0.10263705253601074, "learning_rate": 9.629920830387908e-06, "loss": 0.0037, "step": 25780 }, { "epoch": 0.421991327824593, "grad_norm": 0.09050842374563217, "learning_rate": 9.62938151813528e-06, "loss": 0.0034, "step": 25790 }, { "epoch": 0.4221549537756688, "grad_norm": 0.06890574097633362, "learning_rate": 9.628841828326046e-06, "loss": 0.005, "step": 25800 }, { "epoch": 0.42231857972674464, "grad_norm": 0.42209726572036743, "learning_rate": 9.628301761004219e-06, "loss": 0.0043, "step": 25810 }, { "epoch": 0.4224822056778205, "grad_norm": 0.04127822816371918, "learning_rate": 9.627761316213848e-06, "loss": 0.0026, "step": 25820 }, { "epoch": 0.42264583162889635, "grad_norm": 0.09475449472665787, "learning_rate": 9.627220493999008e-06, "loss": 0.0032, "step": 25830 }, { "epoch": 0.4228094575799722, "grad_norm": 0.07014153897762299, "learning_rate": 9.626679294403809e-06, "loss": 0.0039, "step": 25840 }, { "epoch": 0.422973083531048, "grad_norm": 0.08209510147571564, "learning_rate": 9.626137717472387e-06, "loss": 0.0054, "step": 25850 }, { "epoch": 0.42313670948212384, "grad_norm": 0.08755461871623993, "learning_rate": 9.625595763248915e-06, "loss": 0.0029, "step": 25860 }, { "epoch": 0.4233003354331997, "grad_norm": 0.17248448729515076, "learning_rate": 9.62505343177759e-06, "loss": 0.0037, "step": 25870 }, { "epoch": 0.42346396138427556, "grad_norm": 0.19342680275440216, "learning_rate": 9.62451072310264e-06, "loss": 0.0031, "step": 25880 }, { "epoch": 0.4236275873353514, "grad_norm": 0.06711114943027496, "learning_rate": 9.62396763726833e-06, "loss": 0.0024, "step": 25890 }, { "epoch": 0.4237912132864272, "grad_norm": 0.07085072994232178, "learning_rate": 9.623424174318953e-06, "loss": 0.0039, "step": 25900 }, { "epoch": 0.42395483923750305, "grad_norm": 0.1373291313648224, "learning_rate": 9.62288033429883e-06, "loss": 0.0034, "step": 25910 }, { "epoch": 0.42411846518857893, "grad_norm": 0.12586650252342224, "learning_rate": 9.622336117252314e-06, "loss": 0.0034, "step": 25920 }, { "epoch": 0.42428209113965476, "grad_norm": 0.266230046749115, "learning_rate": 9.621791523223792e-06, "loss": 0.0041, "step": 25930 }, { "epoch": 0.4244457170907306, "grad_norm": 0.06697499006986618, "learning_rate": 9.62124655225768e-06, "loss": 0.0043, "step": 25940 }, { "epoch": 0.4246093430418064, "grad_norm": 0.01914454810321331, "learning_rate": 9.620701204398419e-06, "loss": 0.0019, "step": 25950 }, { "epoch": 0.42477296899288225, "grad_norm": 0.11346393823623657, "learning_rate": 9.62015547969049e-06, "loss": 0.0035, "step": 25960 }, { "epoch": 0.42493659494395813, "grad_norm": 0.1259746253490448, "learning_rate": 9.619609378178398e-06, "loss": 0.0027, "step": 25970 }, { "epoch": 0.42510022089503396, "grad_norm": 0.14006903767585754, "learning_rate": 9.619062899906684e-06, "loss": 0.0026, "step": 25980 }, { "epoch": 0.4252638468461098, "grad_norm": 0.09713710099458694, "learning_rate": 9.618516044919914e-06, "loss": 0.0021, "step": 25990 }, { "epoch": 0.4254274727971856, "grad_norm": 0.19937124848365784, "learning_rate": 9.61796881326269e-06, "loss": 0.0029, "step": 26000 }, { "epoch": 0.42559109874826145, "grad_norm": 0.15069615840911865, "learning_rate": 9.617421204979642e-06, "loss": 0.0052, "step": 26010 }, { "epoch": 0.42575472469933734, "grad_norm": 0.21594305336475372, "learning_rate": 9.616873220115429e-06, "loss": 0.0044, "step": 26020 }, { "epoch": 0.42591835065041317, "grad_norm": 0.17940425872802734, "learning_rate": 9.616324858714743e-06, "loss": 0.004, "step": 26030 }, { "epoch": 0.426081976601489, "grad_norm": 0.11648151278495789, "learning_rate": 9.61577612082231e-06, "loss": 0.0027, "step": 26040 }, { "epoch": 0.42624560255256483, "grad_norm": 0.17684414982795715, "learning_rate": 9.61522700648288e-06, "loss": 0.0041, "step": 26050 }, { "epoch": 0.42640922850364066, "grad_norm": 0.027457231655716896, "learning_rate": 9.614677515741238e-06, "loss": 0.0028, "step": 26060 }, { "epoch": 0.42657285445471654, "grad_norm": 0.06978648155927658, "learning_rate": 9.614127648642197e-06, "loss": 0.003, "step": 26070 }, { "epoch": 0.4267364804057924, "grad_norm": 0.21632900834083557, "learning_rate": 9.613577405230605e-06, "loss": 0.0046, "step": 26080 }, { "epoch": 0.4269001063568682, "grad_norm": 0.03772740811109543, "learning_rate": 9.613026785551336e-06, "loss": 0.004, "step": 26090 }, { "epoch": 0.42706373230794403, "grad_norm": 0.057381242513656616, "learning_rate": 9.612475789649297e-06, "loss": 0.0035, "step": 26100 }, { "epoch": 0.42722735825901986, "grad_norm": 0.10717799514532089, "learning_rate": 9.611924417569424e-06, "loss": 0.0026, "step": 26110 }, { "epoch": 0.42739098421009575, "grad_norm": 0.185784250497818, "learning_rate": 9.61137266935669e-06, "loss": 0.0044, "step": 26120 }, { "epoch": 0.4275546101611716, "grad_norm": 0.45703235268592834, "learning_rate": 9.610820545056089e-06, "loss": 0.0036, "step": 26130 }, { "epoch": 0.4277182361122474, "grad_norm": 0.11393826454877853, "learning_rate": 9.610268044712651e-06, "loss": 0.0039, "step": 26140 }, { "epoch": 0.42788186206332324, "grad_norm": 0.1036711037158966, "learning_rate": 9.609715168371439e-06, "loss": 0.0024, "step": 26150 }, { "epoch": 0.42804548801439907, "grad_norm": 0.07201945036649704, "learning_rate": 9.609161916077538e-06, "loss": 0.0035, "step": 26160 }, { "epoch": 0.4282091139654749, "grad_norm": 0.16630186140537262, "learning_rate": 9.608608287876075e-06, "loss": 0.0048, "step": 26170 }, { "epoch": 0.4283727399165508, "grad_norm": 0.09769545495510101, "learning_rate": 9.608054283812199e-06, "loss": 0.0033, "step": 26180 }, { "epoch": 0.4285363658676266, "grad_norm": 0.11515926569700241, "learning_rate": 9.607499903931094e-06, "loss": 0.0032, "step": 26190 }, { "epoch": 0.42869999181870244, "grad_norm": 0.01391480304300785, "learning_rate": 9.606945148277974e-06, "loss": 0.0031, "step": 26200 }, { "epoch": 0.42886361776977827, "grad_norm": 0.2259417474269867, "learning_rate": 9.606390016898081e-06, "loss": 0.0032, "step": 26210 }, { "epoch": 0.4290272437208541, "grad_norm": 0.045059408992528915, "learning_rate": 9.605834509836688e-06, "loss": 0.0044, "step": 26220 }, { "epoch": 0.42919086967193, "grad_norm": 0.05494910478591919, "learning_rate": 9.605278627139107e-06, "loss": 0.0027, "step": 26230 }, { "epoch": 0.4293544956230058, "grad_norm": 0.08120948821306229, "learning_rate": 9.604722368850668e-06, "loss": 0.0027, "step": 26240 }, { "epoch": 0.42951812157408165, "grad_norm": 0.12476938962936401, "learning_rate": 9.60416573501674e-06, "loss": 0.0039, "step": 26250 }, { "epoch": 0.4296817475251575, "grad_norm": 0.256864994764328, "learning_rate": 9.603608725682717e-06, "loss": 0.0059, "step": 26260 }, { "epoch": 0.4298453734762333, "grad_norm": 0.09119724482297897, "learning_rate": 9.603051340894031e-06, "loss": 0.0023, "step": 26270 }, { "epoch": 0.4300089994273092, "grad_norm": 0.040157802402973175, "learning_rate": 9.602493580696137e-06, "loss": 0.0025, "step": 26280 }, { "epoch": 0.430172625378385, "grad_norm": 0.12991248071193695, "learning_rate": 9.601935445134528e-06, "loss": 0.0041, "step": 26290 }, { "epoch": 0.43033625132946085, "grad_norm": 0.040545277297496796, "learning_rate": 9.60137693425472e-06, "loss": 0.0028, "step": 26300 }, { "epoch": 0.4304998772805367, "grad_norm": 0.15685582160949707, "learning_rate": 9.600818048102265e-06, "loss": 0.0047, "step": 26310 }, { "epoch": 0.4306635032316125, "grad_norm": 0.3727242052555084, "learning_rate": 9.600258786722743e-06, "loss": 0.0035, "step": 26320 }, { "epoch": 0.4308271291826884, "grad_norm": 0.2849130928516388, "learning_rate": 9.599699150161765e-06, "loss": 0.0038, "step": 26330 }, { "epoch": 0.4309907551337642, "grad_norm": 0.1869385689496994, "learning_rate": 9.599139138464975e-06, "loss": 0.0057, "step": 26340 }, { "epoch": 0.43115438108484005, "grad_norm": 0.19602486491203308, "learning_rate": 9.598578751678042e-06, "loss": 0.0026, "step": 26350 }, { "epoch": 0.4313180070359159, "grad_norm": 0.1786963790655136, "learning_rate": 9.598017989846675e-06, "loss": 0.004, "step": 26360 }, { "epoch": 0.4314816329869917, "grad_norm": 0.10797512531280518, "learning_rate": 9.597456853016602e-06, "loss": 0.0031, "step": 26370 }, { "epoch": 0.4316452589380676, "grad_norm": 0.049385931342840195, "learning_rate": 9.596895341233592e-06, "loss": 0.0041, "step": 26380 }, { "epoch": 0.43180888488914343, "grad_norm": 0.11708057671785355, "learning_rate": 9.596333454543436e-06, "loss": 0.0032, "step": 26390 }, { "epoch": 0.43197251084021926, "grad_norm": 0.2220579832792282, "learning_rate": 9.595771192991962e-06, "loss": 0.0035, "step": 26400 }, { "epoch": 0.4321361367912951, "grad_norm": 0.08392468094825745, "learning_rate": 9.595208556625027e-06, "loss": 0.0039, "step": 26410 }, { "epoch": 0.4322997627423709, "grad_norm": 0.058450691401958466, "learning_rate": 9.594645545488516e-06, "loss": 0.0061, "step": 26420 }, { "epoch": 0.4324633886934468, "grad_norm": 0.025988183915615082, "learning_rate": 9.594082159628346e-06, "loss": 0.0116, "step": 26430 }, { "epoch": 0.43262701464452263, "grad_norm": 0.11620432138442993, "learning_rate": 9.593518399090467e-06, "loss": 0.0031, "step": 26440 }, { "epoch": 0.43279064059559846, "grad_norm": 0.38806021213531494, "learning_rate": 9.592954263920854e-06, "loss": 0.0031, "step": 26450 }, { "epoch": 0.4329542665466743, "grad_norm": 0.045491643249988556, "learning_rate": 9.592389754165518e-06, "loss": 0.0032, "step": 26460 }, { "epoch": 0.4331178924977501, "grad_norm": 0.35154813528060913, "learning_rate": 9.591824869870498e-06, "loss": 0.003, "step": 26470 }, { "epoch": 0.433281518448826, "grad_norm": 0.1841619610786438, "learning_rate": 9.591259611081868e-06, "loss": 0.0025, "step": 26480 }, { "epoch": 0.43344514439990184, "grad_norm": 0.20501504838466644, "learning_rate": 9.59069397784572e-06, "loss": 0.0039, "step": 26490 }, { "epoch": 0.43360877035097767, "grad_norm": 0.6021678447723389, "learning_rate": 9.590127970208193e-06, "loss": 0.0094, "step": 26500 }, { "epoch": 0.4337723963020535, "grad_norm": 0.0889105498790741, "learning_rate": 9.589561588215444e-06, "loss": 0.0043, "step": 26510 }, { "epoch": 0.4339360222531293, "grad_norm": 0.28999319672584534, "learning_rate": 9.588994831913668e-06, "loss": 0.0028, "step": 26520 }, { "epoch": 0.4340996482042052, "grad_norm": 0.04398936778306961, "learning_rate": 9.588427701349086e-06, "loss": 0.0029, "step": 26530 }, { "epoch": 0.43426327415528104, "grad_norm": 0.046272728592157364, "learning_rate": 9.587860196567954e-06, "loss": 0.0025, "step": 26540 }, { "epoch": 0.4344269001063569, "grad_norm": 0.07538960129022598, "learning_rate": 9.58729231761655e-06, "loss": 0.0019, "step": 26550 }, { "epoch": 0.4345905260574327, "grad_norm": 0.051992617547512054, "learning_rate": 9.586724064541195e-06, "loss": 0.0023, "step": 26560 }, { "epoch": 0.43475415200850853, "grad_norm": 0.1269051730632782, "learning_rate": 9.586155437388229e-06, "loss": 0.0039, "step": 26570 }, { "epoch": 0.4349177779595844, "grad_norm": 0.1950068324804306, "learning_rate": 9.585586436204028e-06, "loss": 0.0025, "step": 26580 }, { "epoch": 0.43508140391066025, "grad_norm": 0.06404326856136322, "learning_rate": 9.585017061035e-06, "loss": 0.0058, "step": 26590 }, { "epoch": 0.4352450298617361, "grad_norm": 0.15282125771045685, "learning_rate": 9.584447311927582e-06, "loss": 0.0035, "step": 26600 }, { "epoch": 0.4354086558128119, "grad_norm": 0.06417321413755417, "learning_rate": 9.583877188928236e-06, "loss": 0.0028, "step": 26610 }, { "epoch": 0.43557228176388774, "grad_norm": 0.10315041989088058, "learning_rate": 9.583306692083464e-06, "loss": 0.0024, "step": 26620 }, { "epoch": 0.43573590771496357, "grad_norm": 0.23699307441711426, "learning_rate": 9.58273582143979e-06, "loss": 0.009, "step": 26630 }, { "epoch": 0.43589953366603945, "grad_norm": 0.07633335143327713, "learning_rate": 9.582164577043776e-06, "loss": 0.0045, "step": 26640 }, { "epoch": 0.4360631596171153, "grad_norm": 0.17290852963924408, "learning_rate": 9.581592958942008e-06, "loss": 0.0033, "step": 26650 }, { "epoch": 0.4362267855681911, "grad_norm": 0.21958240866661072, "learning_rate": 9.581020967181106e-06, "loss": 0.0033, "step": 26660 }, { "epoch": 0.43639041151926694, "grad_norm": 0.17968712747097015, "learning_rate": 9.58044860180772e-06, "loss": 0.0048, "step": 26670 }, { "epoch": 0.43655403747034277, "grad_norm": 0.21733339130878448, "learning_rate": 9.579875862868533e-06, "loss": 0.0033, "step": 26680 }, { "epoch": 0.43671766342141866, "grad_norm": 0.11233142763376236, "learning_rate": 9.57930275041025e-06, "loss": 0.0066, "step": 26690 }, { "epoch": 0.4368812893724945, "grad_norm": 0.09297332912683487, "learning_rate": 9.578729264479615e-06, "loss": 0.0032, "step": 26700 }, { "epoch": 0.4370449153235703, "grad_norm": 0.06843338161706924, "learning_rate": 9.5781554051234e-06, "loss": 0.0047, "step": 26710 }, { "epoch": 0.43720854127464615, "grad_norm": 0.07784595340490341, "learning_rate": 9.577581172388406e-06, "loss": 0.0024, "step": 26720 }, { "epoch": 0.437372167225722, "grad_norm": 0.21292120218276978, "learning_rate": 9.577006566321465e-06, "loss": 0.0048, "step": 26730 }, { "epoch": 0.43753579317679786, "grad_norm": 0.13139836490154266, "learning_rate": 9.576431586969441e-06, "loss": 0.0033, "step": 26740 }, { "epoch": 0.4376994191278737, "grad_norm": 0.2480802983045578, "learning_rate": 9.575856234379228e-06, "loss": 0.0046, "step": 26750 }, { "epoch": 0.4378630450789495, "grad_norm": 0.31265008449554443, "learning_rate": 9.575280508597748e-06, "loss": 0.0062, "step": 26760 }, { "epoch": 0.43802667103002535, "grad_norm": 0.10543932020664215, "learning_rate": 9.574704409671958e-06, "loss": 0.0029, "step": 26770 }, { "epoch": 0.4381902969811012, "grad_norm": 0.20948873460292816, "learning_rate": 9.574127937648838e-06, "loss": 0.0041, "step": 26780 }, { "epoch": 0.43835392293217706, "grad_norm": 0.24258270859718323, "learning_rate": 9.573551092575407e-06, "loss": 0.0044, "step": 26790 }, { "epoch": 0.4385175488832529, "grad_norm": 0.1120290756225586, "learning_rate": 9.572973874498711e-06, "loss": 0.0081, "step": 26800 }, { "epoch": 0.4386811748343287, "grad_norm": 0.14425401389598846, "learning_rate": 9.572396283465824e-06, "loss": 0.0022, "step": 26810 }, { "epoch": 0.43884480078540455, "grad_norm": 0.05229343846440315, "learning_rate": 9.571818319523851e-06, "loss": 0.004, "step": 26820 }, { "epoch": 0.4390084267364804, "grad_norm": 0.1308344304561615, "learning_rate": 9.571239982719932e-06, "loss": 0.0036, "step": 26830 }, { "epoch": 0.43917205268755627, "grad_norm": 0.0666099488735199, "learning_rate": 9.570661273101233e-06, "loss": 0.003, "step": 26840 }, { "epoch": 0.4393356786386321, "grad_norm": 0.10902194678783417, "learning_rate": 9.57008219071495e-06, "loss": 0.0057, "step": 26850 }, { "epoch": 0.43949930458970793, "grad_norm": 0.10403379797935486, "learning_rate": 9.569502735608314e-06, "loss": 0.0043, "step": 26860 }, { "epoch": 0.43966293054078376, "grad_norm": 0.09467127174139023, "learning_rate": 9.568922907828579e-06, "loss": 0.0029, "step": 26870 }, { "epoch": 0.4398265564918596, "grad_norm": 0.28333282470703125, "learning_rate": 9.568342707423038e-06, "loss": 0.0052, "step": 26880 }, { "epoch": 0.4399901824429355, "grad_norm": 0.14598549902439117, "learning_rate": 9.567762134439009e-06, "loss": 0.0031, "step": 26890 }, { "epoch": 0.4401538083940113, "grad_norm": 0.061146821826696396, "learning_rate": 9.567181188923841e-06, "loss": 0.0029, "step": 26900 }, { "epoch": 0.44031743434508713, "grad_norm": 0.20208175480365753, "learning_rate": 9.566599870924914e-06, "loss": 0.0033, "step": 26910 }, { "epoch": 0.44048106029616296, "grad_norm": 0.09864014387130737, "learning_rate": 9.566018180489638e-06, "loss": 0.0029, "step": 26920 }, { "epoch": 0.4406446862472388, "grad_norm": 0.06830285489559174, "learning_rate": 9.565436117665455e-06, "loss": 0.0069, "step": 26930 }, { "epoch": 0.4408083121983147, "grad_norm": 0.1314922273159027, "learning_rate": 9.564853682499835e-06, "loss": 0.0039, "step": 26940 }, { "epoch": 0.4409719381493905, "grad_norm": 0.04637869819998741, "learning_rate": 9.564270875040279e-06, "loss": 0.0033, "step": 26950 }, { "epoch": 0.44113556410046634, "grad_norm": 0.11996118724346161, "learning_rate": 9.563687695334321e-06, "loss": 0.0027, "step": 26960 }, { "epoch": 0.44129919005154217, "grad_norm": 0.1876671016216278, "learning_rate": 9.56310414342952e-06, "loss": 0.0057, "step": 26970 }, { "epoch": 0.441462816002618, "grad_norm": 0.31153422594070435, "learning_rate": 9.56252021937347e-06, "loss": 0.0042, "step": 26980 }, { "epoch": 0.4416264419536939, "grad_norm": 0.10770515352487564, "learning_rate": 9.561935923213794e-06, "loss": 0.0035, "step": 26990 }, { "epoch": 0.4417900679047697, "grad_norm": 0.11300802230834961, "learning_rate": 9.561351254998146e-06, "loss": 0.0028, "step": 27000 }, { "epoch": 0.44195369385584554, "grad_norm": 0.038234710693359375, "learning_rate": 9.560766214774208e-06, "loss": 0.0024, "step": 27010 }, { "epoch": 0.44211731980692137, "grad_norm": 0.11550666391849518, "learning_rate": 9.560180802589695e-06, "loss": 0.0054, "step": 27020 }, { "epoch": 0.4422809457579972, "grad_norm": 0.1973753571510315, "learning_rate": 9.559595018492351e-06, "loss": 0.0033, "step": 27030 }, { "epoch": 0.4424445717090731, "grad_norm": 0.0685434564948082, "learning_rate": 9.55900886252995e-06, "loss": 0.0035, "step": 27040 }, { "epoch": 0.4426081976601489, "grad_norm": 0.09395457059144974, "learning_rate": 9.558422334750297e-06, "loss": 0.003, "step": 27050 }, { "epoch": 0.44277182361122475, "grad_norm": 0.2162901610136032, "learning_rate": 9.557835435201228e-06, "loss": 0.003, "step": 27060 }, { "epoch": 0.4429354495623006, "grad_norm": 0.04899590089917183, "learning_rate": 9.557248163930606e-06, "loss": 0.0023, "step": 27070 }, { "epoch": 0.4430990755133764, "grad_norm": 0.0845181941986084, "learning_rate": 9.556660520986332e-06, "loss": 0.0036, "step": 27080 }, { "epoch": 0.44326270146445224, "grad_norm": 0.08560560643672943, "learning_rate": 9.556072506416327e-06, "loss": 0.0025, "step": 27090 }, { "epoch": 0.4434263274155281, "grad_norm": 0.1488703340291977, "learning_rate": 9.55548412026855e-06, "loss": 0.0051, "step": 27100 }, { "epoch": 0.44358995336660395, "grad_norm": 0.046460285782814026, "learning_rate": 9.554895362590988e-06, "loss": 0.0025, "step": 27110 }, { "epoch": 0.4437535793176798, "grad_norm": 0.14281921088695526, "learning_rate": 9.554306233431656e-06, "loss": 0.0028, "step": 27120 }, { "epoch": 0.4439172052687556, "grad_norm": 0.07671947032213211, "learning_rate": 9.553716732838603e-06, "loss": 0.0033, "step": 27130 }, { "epoch": 0.44408083121983144, "grad_norm": 0.38148581981658936, "learning_rate": 9.553126860859908e-06, "loss": 0.003, "step": 27140 }, { "epoch": 0.4442444571709073, "grad_norm": 0.02350139617919922, "learning_rate": 9.552536617543675e-06, "loss": 0.0029, "step": 27150 }, { "epoch": 0.44440808312198316, "grad_norm": 0.0743347704410553, "learning_rate": 9.551946002938046e-06, "loss": 0.0029, "step": 27160 }, { "epoch": 0.444571709073059, "grad_norm": 0.22405192255973816, "learning_rate": 9.551355017091189e-06, "loss": 0.003, "step": 27170 }, { "epoch": 0.4447353350241348, "grad_norm": 0.08125679939985275, "learning_rate": 9.550763660051299e-06, "loss": 0.0017, "step": 27180 }, { "epoch": 0.44489896097521064, "grad_norm": 0.08913346379995346, "learning_rate": 9.55017193186661e-06, "loss": 0.0034, "step": 27190 }, { "epoch": 0.44506258692628653, "grad_norm": 0.3011961877346039, "learning_rate": 9.549579832585379e-06, "loss": 0.0054, "step": 27200 }, { "epoch": 0.44522621287736236, "grad_norm": 0.09054125845432281, "learning_rate": 9.548987362255896e-06, "loss": 0.0031, "step": 27210 }, { "epoch": 0.4453898388284382, "grad_norm": 0.18851423263549805, "learning_rate": 9.548394520926479e-06, "loss": 0.0038, "step": 27220 }, { "epoch": 0.445553464779514, "grad_norm": 0.08263241499662399, "learning_rate": 9.547801308645482e-06, "loss": 0.0023, "step": 27230 }, { "epoch": 0.44571709073058985, "grad_norm": 0.11365669965744019, "learning_rate": 9.547207725461281e-06, "loss": 0.0023, "step": 27240 }, { "epoch": 0.44588071668166573, "grad_norm": 0.04128199443221092, "learning_rate": 9.54661377142229e-06, "loss": 0.0017, "step": 27250 }, { "epoch": 0.44604434263274156, "grad_norm": 0.37932923436164856, "learning_rate": 9.546019446576949e-06, "loss": 0.0045, "step": 27260 }, { "epoch": 0.4462079685838174, "grad_norm": 0.11917036026716232, "learning_rate": 9.545424750973728e-06, "loss": 0.0037, "step": 27270 }, { "epoch": 0.4463715945348932, "grad_norm": 0.07910063862800598, "learning_rate": 9.54482968466113e-06, "loss": 0.0026, "step": 27280 }, { "epoch": 0.44653522048596905, "grad_norm": 0.10809221118688583, "learning_rate": 9.544234247687688e-06, "loss": 0.0035, "step": 27290 }, { "epoch": 0.44669884643704494, "grad_norm": 0.06450322270393372, "learning_rate": 9.543638440101958e-06, "loss": 0.0021, "step": 27300 }, { "epoch": 0.44686247238812077, "grad_norm": 0.06999453157186508, "learning_rate": 9.543042261952537e-06, "loss": 0.0027, "step": 27310 }, { "epoch": 0.4470260983391966, "grad_norm": 0.04958852007985115, "learning_rate": 9.542445713288044e-06, "loss": 0.0038, "step": 27320 }, { "epoch": 0.44718972429027243, "grad_norm": 0.13095538318157196, "learning_rate": 9.541848794157136e-06, "loss": 0.002, "step": 27330 }, { "epoch": 0.44735335024134826, "grad_norm": 0.03661338984966278, "learning_rate": 9.541251504608493e-06, "loss": 0.005, "step": 27340 }, { "epoch": 0.44751697619242414, "grad_norm": 0.06599044054746628, "learning_rate": 9.540653844690826e-06, "loss": 0.0034, "step": 27350 }, { "epoch": 0.4476806021435, "grad_norm": 0.1458204835653305, "learning_rate": 9.540055814452883e-06, "loss": 0.0034, "step": 27360 }, { "epoch": 0.4478442280945758, "grad_norm": 0.013831786811351776, "learning_rate": 9.53945741394343e-06, "loss": 0.0032, "step": 27370 }, { "epoch": 0.44800785404565163, "grad_norm": 0.3614981472492218, "learning_rate": 9.538858643211278e-06, "loss": 0.0029, "step": 27380 }, { "epoch": 0.44817147999672746, "grad_norm": 0.032748524099588394, "learning_rate": 9.538259502305257e-06, "loss": 0.0033, "step": 27390 }, { "epoch": 0.44833510594780335, "grad_norm": 0.11110106110572815, "learning_rate": 9.537659991274232e-06, "loss": 0.003, "step": 27400 }, { "epoch": 0.4484987318988792, "grad_norm": 0.1370764821767807, "learning_rate": 9.537060110167095e-06, "loss": 0.0027, "step": 27410 }, { "epoch": 0.448662357849955, "grad_norm": 0.07810279726982117, "learning_rate": 9.536459859032774e-06, "loss": 0.0028, "step": 27420 }, { "epoch": 0.44882598380103084, "grad_norm": 0.13732978701591492, "learning_rate": 9.535859237920219e-06, "loss": 0.002, "step": 27430 }, { "epoch": 0.44898960975210667, "grad_norm": 0.06112179160118103, "learning_rate": 9.535258246878418e-06, "loss": 0.0017, "step": 27440 }, { "epoch": 0.44915323570318255, "grad_norm": 0.14673815667629242, "learning_rate": 9.534656885956386e-06, "loss": 0.0036, "step": 27450 }, { "epoch": 0.4493168616542584, "grad_norm": 0.06018859148025513, "learning_rate": 9.534055155203164e-06, "loss": 0.0059, "step": 27460 }, { "epoch": 0.4494804876053342, "grad_norm": 0.06915943324565887, "learning_rate": 9.533453054667831e-06, "loss": 0.0045, "step": 27470 }, { "epoch": 0.44964411355641004, "grad_norm": 0.017338022589683533, "learning_rate": 9.53285058439949e-06, "loss": 0.0047, "step": 27480 }, { "epoch": 0.44980773950748587, "grad_norm": 0.2309543639421463, "learning_rate": 9.532247744447281e-06, "loss": 0.0028, "step": 27490 }, { "epoch": 0.4499713654585617, "grad_norm": 0.1974831372499466, "learning_rate": 9.531644534860363e-06, "loss": 0.0033, "step": 27500 }, { "epoch": 0.4501349914096376, "grad_norm": 0.11578907817602158, "learning_rate": 9.531040955687935e-06, "loss": 0.0038, "step": 27510 }, { "epoch": 0.4502986173607134, "grad_norm": 0.09191634505987167, "learning_rate": 9.530437006979223e-06, "loss": 0.003, "step": 27520 }, { "epoch": 0.45046224331178925, "grad_norm": 0.1568758636713028, "learning_rate": 9.529832688783483e-06, "loss": 0.0043, "step": 27530 }, { "epoch": 0.4506258692628651, "grad_norm": 0.05258748680353165, "learning_rate": 9.52922800115e-06, "loss": 0.0041, "step": 27540 }, { "epoch": 0.4507894952139409, "grad_norm": 0.18043150007724762, "learning_rate": 9.528622944128089e-06, "loss": 0.0025, "step": 27550 }, { "epoch": 0.4509531211650168, "grad_norm": 0.15081089735031128, "learning_rate": 9.528017517767103e-06, "loss": 0.0029, "step": 27560 }, { "epoch": 0.4511167471160926, "grad_norm": 0.07562534511089325, "learning_rate": 9.52741172211641e-06, "loss": 0.0029, "step": 27570 }, { "epoch": 0.45128037306716845, "grad_norm": 0.2528219521045685, "learning_rate": 9.526805557225422e-06, "loss": 0.003, "step": 27580 }, { "epoch": 0.4514439990182443, "grad_norm": 0.19838450849056244, "learning_rate": 9.526199023143574e-06, "loss": 0.0104, "step": 27590 }, { "epoch": 0.4516076249693201, "grad_norm": 0.048178285360336304, "learning_rate": 9.525592119920333e-06, "loss": 0.0047, "step": 27600 }, { "epoch": 0.451771250920396, "grad_norm": 0.09211842715740204, "learning_rate": 9.524984847605197e-06, "loss": 0.0027, "step": 27610 }, { "epoch": 0.4519348768714718, "grad_norm": 0.34548458456993103, "learning_rate": 9.524377206247691e-06, "loss": 0.0049, "step": 27620 }, { "epoch": 0.45209850282254765, "grad_norm": 0.08648984134197235, "learning_rate": 9.523769195897374e-06, "loss": 0.0034, "step": 27630 }, { "epoch": 0.4522621287736235, "grad_norm": 0.08408576995134354, "learning_rate": 9.523160816603832e-06, "loss": 0.0039, "step": 27640 }, { "epoch": 0.4524257547246993, "grad_norm": 0.16873887181282043, "learning_rate": 9.522552068416684e-06, "loss": 0.0033, "step": 27650 }, { "epoch": 0.4525893806757752, "grad_norm": 0.1678036004304886, "learning_rate": 9.521942951385576e-06, "loss": 0.0038, "step": 27660 }, { "epoch": 0.45275300662685103, "grad_norm": 0.24030046164989471, "learning_rate": 9.521333465560186e-06, "loss": 0.0035, "step": 27670 }, { "epoch": 0.45291663257792686, "grad_norm": 0.09746508300304413, "learning_rate": 9.52072361099022e-06, "loss": 0.0038, "step": 27680 }, { "epoch": 0.4530802585290027, "grad_norm": 0.111379474401474, "learning_rate": 9.520113387725418e-06, "loss": 0.003, "step": 27690 }, { "epoch": 0.4532438844800785, "grad_norm": 0.08142360299825668, "learning_rate": 9.519502795815548e-06, "loss": 0.0035, "step": 27700 }, { "epoch": 0.4534075104311544, "grad_norm": 0.12644042074680328, "learning_rate": 9.518891835310404e-06, "loss": 0.0029, "step": 27710 }, { "epoch": 0.45357113638223023, "grad_norm": 0.1252516359090805, "learning_rate": 9.518280506259819e-06, "loss": 0.0025, "step": 27720 }, { "epoch": 0.45373476233330606, "grad_norm": 0.08867057412862778, "learning_rate": 9.517668808713647e-06, "loss": 0.0036, "step": 27730 }, { "epoch": 0.4538983882843819, "grad_norm": 0.060929544270038605, "learning_rate": 9.517056742721779e-06, "loss": 0.0037, "step": 27740 }, { "epoch": 0.4540620142354577, "grad_norm": 0.1040935218334198, "learning_rate": 9.51644430833413e-06, "loss": 0.0034, "step": 27750 }, { "epoch": 0.4542256401865336, "grad_norm": 0.10028395056724548, "learning_rate": 9.51583150560065e-06, "loss": 0.0027, "step": 27760 }, { "epoch": 0.45438926613760944, "grad_norm": 0.17439784109592438, "learning_rate": 9.515218334571317e-06, "loss": 0.0039, "step": 27770 }, { "epoch": 0.45455289208868527, "grad_norm": 0.24616345763206482, "learning_rate": 9.514604795296139e-06, "loss": 0.0028, "step": 27780 }, { "epoch": 0.4547165180397611, "grad_norm": 0.03100091964006424, "learning_rate": 9.513990887825151e-06, "loss": 0.0026, "step": 27790 }, { "epoch": 0.4548801439908369, "grad_norm": 0.4011678993701935, "learning_rate": 9.513376612208427e-06, "loss": 0.0021, "step": 27800 }, { "epoch": 0.4550437699419128, "grad_norm": 0.0659242570400238, "learning_rate": 9.512761968496062e-06, "loss": 0.0038, "step": 27810 }, { "epoch": 0.45520739589298864, "grad_norm": 0.184167742729187, "learning_rate": 9.512146956738184e-06, "loss": 0.0043, "step": 27820 }, { "epoch": 0.45537102184406447, "grad_norm": 0.026814324781298637, "learning_rate": 9.51153157698495e-06, "loss": 0.002, "step": 27830 }, { "epoch": 0.4555346477951403, "grad_norm": 0.03376597538590431, "learning_rate": 9.510915829286553e-06, "loss": 0.0029, "step": 27840 }, { "epoch": 0.45569827374621613, "grad_norm": 0.09375806152820587, "learning_rate": 9.510299713693209e-06, "loss": 0.0048, "step": 27850 }, { "epoch": 0.455861899697292, "grad_norm": 0.07186637818813324, "learning_rate": 9.509683230255165e-06, "loss": 0.0036, "step": 27860 }, { "epoch": 0.45602552564836785, "grad_norm": 0.16127312183380127, "learning_rate": 9.509066379022697e-06, "loss": 0.0032, "step": 27870 }, { "epoch": 0.4561891515994437, "grad_norm": 0.08908277750015259, "learning_rate": 9.508449160046121e-06, "loss": 0.0036, "step": 27880 }, { "epoch": 0.4563527775505195, "grad_norm": 0.11411503702402115, "learning_rate": 9.507831573375767e-06, "loss": 0.0032, "step": 27890 }, { "epoch": 0.45651640350159534, "grad_norm": 0.05092979967594147, "learning_rate": 9.507213619062008e-06, "loss": 0.0026, "step": 27900 }, { "epoch": 0.4566800294526712, "grad_norm": 0.08310269564390182, "learning_rate": 9.50659529715524e-06, "loss": 0.0037, "step": 27910 }, { "epoch": 0.45684365540374705, "grad_norm": 0.06380978971719742, "learning_rate": 9.505976607705895e-06, "loss": 0.0044, "step": 27920 }, { "epoch": 0.4570072813548229, "grad_norm": 0.064381442964077, "learning_rate": 9.505357550764427e-06, "loss": 0.0033, "step": 27930 }, { "epoch": 0.4571709073058987, "grad_norm": 0.22628238797187805, "learning_rate": 9.504738126381325e-06, "loss": 0.0059, "step": 27940 }, { "epoch": 0.45733453325697454, "grad_norm": 0.05161839351058006, "learning_rate": 9.504118334607108e-06, "loss": 0.0035, "step": 27950 }, { "epoch": 0.45749815920805037, "grad_norm": 0.09953735768795013, "learning_rate": 9.503498175492326e-06, "loss": 0.0024, "step": 27960 }, { "epoch": 0.45766178515912626, "grad_norm": 0.05142979323863983, "learning_rate": 9.502877649087552e-06, "loss": 0.0023, "step": 27970 }, { "epoch": 0.4578254111102021, "grad_norm": 0.2211284190416336, "learning_rate": 9.5022567554434e-06, "loss": 0.0049, "step": 27980 }, { "epoch": 0.4579890370612779, "grad_norm": 0.08128587156534195, "learning_rate": 9.501635494610503e-06, "loss": 0.0025, "step": 27990 }, { "epoch": 0.45815266301235374, "grad_norm": 0.11402089893817902, "learning_rate": 9.50101386663953e-06, "loss": 0.0026, "step": 28000 }, { "epoch": 0.4583162889634296, "grad_norm": 0.070767842233181, "learning_rate": 9.500391871581182e-06, "loss": 0.0034, "step": 28010 }, { "epoch": 0.45847991491450546, "grad_norm": 0.36080360412597656, "learning_rate": 9.499769509486185e-06, "loss": 0.0052, "step": 28020 }, { "epoch": 0.4586435408655813, "grad_norm": 0.014667628332972527, "learning_rate": 9.499146780405294e-06, "loss": 0.0038, "step": 28030 }, { "epoch": 0.4588071668166571, "grad_norm": 0.09940271824598312, "learning_rate": 9.498523684389301e-06, "loss": 0.0042, "step": 28040 }, { "epoch": 0.45897079276773295, "grad_norm": 0.09149282425642014, "learning_rate": 9.497900221489022e-06, "loss": 0.0053, "step": 28050 }, { "epoch": 0.4591344187188088, "grad_norm": 0.045744918286800385, "learning_rate": 9.497276391755302e-06, "loss": 0.0023, "step": 28060 }, { "epoch": 0.45929804466988466, "grad_norm": 0.1874120682477951, "learning_rate": 9.496652195239023e-06, "loss": 0.0039, "step": 28070 }, { "epoch": 0.4594616706209605, "grad_norm": 0.07139880955219269, "learning_rate": 9.496027631991086e-06, "loss": 0.0034, "step": 28080 }, { "epoch": 0.4596252965720363, "grad_norm": 0.2532361149787903, "learning_rate": 9.495402702062434e-06, "loss": 0.0053, "step": 28090 }, { "epoch": 0.45978892252311215, "grad_norm": 0.05640101060271263, "learning_rate": 9.494777405504035e-06, "loss": 0.0036, "step": 28100 }, { "epoch": 0.459952548474188, "grad_norm": 0.04346856102347374, "learning_rate": 9.494151742366882e-06, "loss": 0.0039, "step": 28110 }, { "epoch": 0.46011617442526387, "grad_norm": 0.17160440981388092, "learning_rate": 9.493525712702e-06, "loss": 0.0037, "step": 28120 }, { "epoch": 0.4602798003763397, "grad_norm": 0.11071068793535233, "learning_rate": 9.492899316560454e-06, "loss": 0.0027, "step": 28130 }, { "epoch": 0.46044342632741553, "grad_norm": 0.04852447658777237, "learning_rate": 9.492272553993323e-06, "loss": 0.0031, "step": 28140 }, { "epoch": 0.46060705227849136, "grad_norm": 0.08421175926923752, "learning_rate": 9.491645425051728e-06, "loss": 0.0025, "step": 28150 }, { "epoch": 0.4607706782295672, "grad_norm": 0.11441599577665329, "learning_rate": 9.491017929786812e-06, "loss": 0.0042, "step": 28160 }, { "epoch": 0.4609343041806431, "grad_norm": 0.21369783580303192, "learning_rate": 9.490390068249755e-06, "loss": 0.0059, "step": 28170 }, { "epoch": 0.4610979301317189, "grad_norm": 0.10642528533935547, "learning_rate": 9.489761840491761e-06, "loss": 0.0041, "step": 28180 }, { "epoch": 0.46126155608279473, "grad_norm": 0.08429161459207535, "learning_rate": 9.489133246564068e-06, "loss": 0.0036, "step": 28190 }, { "epoch": 0.46142518203387056, "grad_norm": 0.10386421531438828, "learning_rate": 9.488504286517939e-06, "loss": 0.0026, "step": 28200 }, { "epoch": 0.4615888079849464, "grad_norm": 0.11419639736413956, "learning_rate": 9.487874960404673e-06, "loss": 0.004, "step": 28210 }, { "epoch": 0.4617524339360223, "grad_norm": 0.047336872667074203, "learning_rate": 9.487245268275593e-06, "loss": 0.0018, "step": 28220 }, { "epoch": 0.4619160598870981, "grad_norm": 0.06023683398962021, "learning_rate": 9.486615210182057e-06, "loss": 0.0045, "step": 28230 }, { "epoch": 0.46207968583817394, "grad_norm": 0.050592176616191864, "learning_rate": 9.485984786175448e-06, "loss": 0.0054, "step": 28240 }, { "epoch": 0.46224331178924977, "grad_norm": 0.0629870668053627, "learning_rate": 9.485353996307185e-06, "loss": 0.0026, "step": 28250 }, { "epoch": 0.4624069377403256, "grad_norm": 0.10153938084840775, "learning_rate": 9.484722840628708e-06, "loss": 0.0027, "step": 28260 }, { "epoch": 0.4625705636914015, "grad_norm": 0.09818442910909653, "learning_rate": 9.484091319191495e-06, "loss": 0.0023, "step": 28270 }, { "epoch": 0.4627341896424773, "grad_norm": 0.10715015232563019, "learning_rate": 9.48345943204705e-06, "loss": 0.0047, "step": 28280 }, { "epoch": 0.46289781559355314, "grad_norm": 0.19105903804302216, "learning_rate": 9.482827179246909e-06, "loss": 0.0023, "step": 28290 }, { "epoch": 0.46306144154462897, "grad_norm": 0.10577499866485596, "learning_rate": 9.482194560842635e-06, "loss": 0.0038, "step": 28300 }, { "epoch": 0.4632250674957048, "grad_norm": 0.33034053444862366, "learning_rate": 9.481561576885821e-06, "loss": 0.0073, "step": 28310 }, { "epoch": 0.4633886934467807, "grad_norm": 0.14007116854190826, "learning_rate": 9.480928227428095e-06, "loss": 0.0031, "step": 28320 }, { "epoch": 0.4635523193978565, "grad_norm": 0.1659744828939438, "learning_rate": 9.480294512521105e-06, "loss": 0.004, "step": 28330 }, { "epoch": 0.46371594534893235, "grad_norm": 0.08970291912555695, "learning_rate": 9.47966043221654e-06, "loss": 0.0042, "step": 28340 }, { "epoch": 0.4638795713000082, "grad_norm": 0.10167667269706726, "learning_rate": 9.479025986566111e-06, "loss": 0.0029, "step": 28350 }, { "epoch": 0.464043197251084, "grad_norm": 0.14350557327270508, "learning_rate": 9.47839117562156e-06, "loss": 0.0045, "step": 28360 }, { "epoch": 0.46420682320215984, "grad_norm": 0.026401014998555183, "learning_rate": 9.477755999434662e-06, "loss": 0.0032, "step": 28370 }, { "epoch": 0.4643704491532357, "grad_norm": 0.03000246360898018, "learning_rate": 9.477120458057221e-06, "loss": 0.0028, "step": 28380 }, { "epoch": 0.46453407510431155, "grad_norm": 0.17039255797863007, "learning_rate": 9.476484551541065e-06, "loss": 0.0024, "step": 28390 }, { "epoch": 0.4646977010553874, "grad_norm": 0.2673710584640503, "learning_rate": 9.475848279938063e-06, "loss": 0.0044, "step": 28400 }, { "epoch": 0.4648613270064632, "grad_norm": 0.1765543669462204, "learning_rate": 9.475211643300099e-06, "loss": 0.0045, "step": 28410 }, { "epoch": 0.46502495295753904, "grad_norm": 0.12987369298934937, "learning_rate": 9.474574641679102e-06, "loss": 0.0035, "step": 28420 }, { "epoch": 0.4651885789086149, "grad_norm": 0.1911579817533493, "learning_rate": 9.473937275127022e-06, "loss": 0.0026, "step": 28430 }, { "epoch": 0.46535220485969075, "grad_norm": 0.06673678755760193, "learning_rate": 9.47329954369584e-06, "loss": 0.0021, "step": 28440 }, { "epoch": 0.4655158308107666, "grad_norm": 0.08422823250293732, "learning_rate": 9.472661447437563e-06, "loss": 0.0029, "step": 28450 }, { "epoch": 0.4656794567618424, "grad_norm": 0.053204409778118134, "learning_rate": 9.472022986404238e-06, "loss": 0.0029, "step": 28460 }, { "epoch": 0.46584308271291824, "grad_norm": 0.022931329905986786, "learning_rate": 9.471384160647933e-06, "loss": 0.0044, "step": 28470 }, { "epoch": 0.46600670866399413, "grad_norm": 0.04039199650287628, "learning_rate": 9.47074497022075e-06, "loss": 0.0032, "step": 28480 }, { "epoch": 0.46617033461506996, "grad_norm": 0.04516413062810898, "learning_rate": 9.470105415174817e-06, "loss": 0.0029, "step": 28490 }, { "epoch": 0.4663339605661458, "grad_norm": 0.11811202019453049, "learning_rate": 9.469465495562293e-06, "loss": 0.0023, "step": 28500 }, { "epoch": 0.4664975865172216, "grad_norm": 0.12220500409603119, "learning_rate": 9.468825211435373e-06, "loss": 0.0046, "step": 28510 }, { "epoch": 0.46666121246829745, "grad_norm": 0.048121124505996704, "learning_rate": 9.468184562846271e-06, "loss": 0.0035, "step": 28520 }, { "epoch": 0.46682483841937333, "grad_norm": 0.062382303178310394, "learning_rate": 9.467543549847238e-06, "loss": 0.0034, "step": 28530 }, { "epoch": 0.46698846437044916, "grad_norm": 0.02163584530353546, "learning_rate": 9.466902172490555e-06, "loss": 0.0025, "step": 28540 }, { "epoch": 0.467152090321525, "grad_norm": 0.3023717701435089, "learning_rate": 9.466260430828526e-06, "loss": 0.0032, "step": 28550 }, { "epoch": 0.4673157162726008, "grad_norm": 0.17531529068946838, "learning_rate": 9.465618324913494e-06, "loss": 0.0036, "step": 28560 }, { "epoch": 0.46747934222367665, "grad_norm": 0.04204995930194855, "learning_rate": 9.464975854797824e-06, "loss": 0.0043, "step": 28570 }, { "epoch": 0.46764296817475254, "grad_norm": 0.22975222766399384, "learning_rate": 9.464333020533914e-06, "loss": 0.0059, "step": 28580 }, { "epoch": 0.46780659412582837, "grad_norm": 0.08279221504926682, "learning_rate": 9.463689822174191e-06, "loss": 0.0025, "step": 28590 }, { "epoch": 0.4679702200769042, "grad_norm": 0.1958877444267273, "learning_rate": 9.463046259771113e-06, "loss": 0.0025, "step": 28600 }, { "epoch": 0.46813384602798, "grad_norm": 0.03067268803715706, "learning_rate": 9.462402333377169e-06, "loss": 0.0029, "step": 28610 }, { "epoch": 0.46829747197905586, "grad_norm": 0.03204172104597092, "learning_rate": 9.461758043044871e-06, "loss": 0.0042, "step": 28620 }, { "epoch": 0.46846109793013174, "grad_norm": 0.045231226831674576, "learning_rate": 9.461113388826768e-06, "loss": 0.0025, "step": 28630 }, { "epoch": 0.46862472388120757, "grad_norm": 0.03135726973414421, "learning_rate": 9.460468370775435e-06, "loss": 0.0072, "step": 28640 }, { "epoch": 0.4687883498322834, "grad_norm": 0.16129206120967865, "learning_rate": 9.459822988943477e-06, "loss": 0.0048, "step": 28650 }, { "epoch": 0.46895197578335923, "grad_norm": 0.19696134328842163, "learning_rate": 9.459177243383531e-06, "loss": 0.0051, "step": 28660 }, { "epoch": 0.46911560173443506, "grad_norm": 0.07071767747402191, "learning_rate": 9.45853113414826e-06, "loss": 0.005, "step": 28670 }, { "epoch": 0.46927922768551095, "grad_norm": 0.06767883896827698, "learning_rate": 9.457884661290358e-06, "loss": 0.0016, "step": 28680 }, { "epoch": 0.4694428536365868, "grad_norm": 0.21728633344173431, "learning_rate": 9.457237824862549e-06, "loss": 0.0031, "step": 28690 }, { "epoch": 0.4696064795876626, "grad_norm": 0.41223353147506714, "learning_rate": 9.45659062491759e-06, "loss": 0.0064, "step": 28700 }, { "epoch": 0.46977010553873844, "grad_norm": 0.054228391498327255, "learning_rate": 9.455943061508261e-06, "loss": 0.0036, "step": 28710 }, { "epoch": 0.46993373148981427, "grad_norm": 0.043424446135759354, "learning_rate": 9.455295134687377e-06, "loss": 0.0036, "step": 28720 }, { "epoch": 0.47009735744089015, "grad_norm": 0.05108136683702469, "learning_rate": 9.45464684450778e-06, "loss": 0.0038, "step": 28730 }, { "epoch": 0.470260983391966, "grad_norm": 0.07761649787425995, "learning_rate": 9.453998191022343e-06, "loss": 0.0016, "step": 28740 }, { "epoch": 0.4704246093430418, "grad_norm": 0.10181140899658203, "learning_rate": 9.453349174283965e-06, "loss": 0.0055, "step": 28750 }, { "epoch": 0.47058823529411764, "grad_norm": 0.12838008999824524, "learning_rate": 9.452699794345583e-06, "loss": 0.003, "step": 28760 }, { "epoch": 0.47075186124519347, "grad_norm": 0.06869093328714371, "learning_rate": 9.452050051260152e-06, "loss": 0.0022, "step": 28770 }, { "epoch": 0.47091548719626936, "grad_norm": 0.1513408124446869, "learning_rate": 9.451399945080668e-06, "loss": 0.0036, "step": 28780 }, { "epoch": 0.4710791131473452, "grad_norm": 0.20629248023033142, "learning_rate": 9.45074947586015e-06, "loss": 0.0045, "step": 28790 }, { "epoch": 0.471242739098421, "grad_norm": 0.12404251098632812, "learning_rate": 9.450098643651647e-06, "loss": 0.0044, "step": 28800 }, { "epoch": 0.47140636504949684, "grad_norm": 0.22954332828521729, "learning_rate": 9.449447448508238e-06, "loss": 0.0026, "step": 28810 }, { "epoch": 0.4715699910005727, "grad_norm": 0.2752246856689453, "learning_rate": 9.448795890483035e-06, "loss": 0.0039, "step": 28820 }, { "epoch": 0.4717336169516485, "grad_norm": 0.11914423108100891, "learning_rate": 9.448143969629175e-06, "loss": 0.0033, "step": 28830 }, { "epoch": 0.4718972429027244, "grad_norm": 0.07998602092266083, "learning_rate": 9.447491685999825e-06, "loss": 0.02, "step": 28840 }, { "epoch": 0.4720608688538002, "grad_norm": 0.13012129068374634, "learning_rate": 9.446839039648187e-06, "loss": 0.0029, "step": 28850 }, { "epoch": 0.47222449480487605, "grad_norm": 0.21434567868709564, "learning_rate": 9.446186030627486e-06, "loss": 0.0035, "step": 28860 }, { "epoch": 0.4723881207559519, "grad_norm": 0.11092668771743774, "learning_rate": 9.44553265899098e-06, "loss": 0.0033, "step": 28870 }, { "epoch": 0.4725517467070277, "grad_norm": 0.02316344901919365, "learning_rate": 9.444878924791952e-06, "loss": 0.0055, "step": 28880 }, { "epoch": 0.4727153726581036, "grad_norm": 0.041690804064273834, "learning_rate": 9.444224828083725e-06, "loss": 0.0028, "step": 28890 }, { "epoch": 0.4728789986091794, "grad_norm": 0.07620447874069214, "learning_rate": 9.44357036891964e-06, "loss": 0.003, "step": 28900 }, { "epoch": 0.47304262456025525, "grad_norm": 0.2158164083957672, "learning_rate": 9.442915547353075e-06, "loss": 0.003, "step": 28910 }, { "epoch": 0.4732062505113311, "grad_norm": 0.2905883193016052, "learning_rate": 9.442260363437435e-06, "loss": 0.0071, "step": 28920 }, { "epoch": 0.4733698764624069, "grad_norm": 0.2301294356584549, "learning_rate": 9.441604817226151e-06, "loss": 0.003, "step": 28930 }, { "epoch": 0.4735335024134828, "grad_norm": 0.0769018903374672, "learning_rate": 9.440948908772692e-06, "loss": 0.0052, "step": 28940 }, { "epoch": 0.47369712836455863, "grad_norm": 0.0813213661313057, "learning_rate": 9.440292638130548e-06, "loss": 0.0045, "step": 28950 }, { "epoch": 0.47386075431563446, "grad_norm": 0.09577547013759613, "learning_rate": 9.439636005353244e-06, "loss": 0.0036, "step": 28960 }, { "epoch": 0.4740243802667103, "grad_norm": 0.04842466115951538, "learning_rate": 9.438979010494333e-06, "loss": 0.0024, "step": 28970 }, { "epoch": 0.4741880062177861, "grad_norm": 0.24843230843544006, "learning_rate": 9.438321653607397e-06, "loss": 0.0032, "step": 28980 }, { "epoch": 0.474351632168862, "grad_norm": 0.24147559702396393, "learning_rate": 9.437663934746045e-06, "loss": 0.0051, "step": 28990 }, { "epoch": 0.47451525811993783, "grad_norm": 0.05181020125746727, "learning_rate": 9.437005853963923e-06, "loss": 0.0044, "step": 29000 }, { "epoch": 0.47467888407101366, "grad_norm": 0.13449133932590485, "learning_rate": 9.436347411314698e-06, "loss": 0.0036, "step": 29010 }, { "epoch": 0.4748425100220895, "grad_norm": 0.1109488382935524, "learning_rate": 9.435688606852073e-06, "loss": 0.003, "step": 29020 }, { "epoch": 0.4750061359731653, "grad_norm": 0.11689011752605438, "learning_rate": 9.435029440629776e-06, "loss": 0.0039, "step": 29030 }, { "epoch": 0.4751697619242412, "grad_norm": 0.023432515561580658, "learning_rate": 9.434369912701568e-06, "loss": 0.0022, "step": 29040 }, { "epoch": 0.47533338787531704, "grad_norm": 0.11035023629665375, "learning_rate": 9.433710023121236e-06, "loss": 0.0032, "step": 29050 }, { "epoch": 0.47549701382639287, "grad_norm": 0.22261519730091095, "learning_rate": 9.433049771942599e-06, "loss": 0.0045, "step": 29060 }, { "epoch": 0.4756606397774687, "grad_norm": 0.16968069970607758, "learning_rate": 9.432389159219507e-06, "loss": 0.0028, "step": 29070 }, { "epoch": 0.4758242657285445, "grad_norm": 0.06222948804497719, "learning_rate": 9.431728185005834e-06, "loss": 0.0025, "step": 29080 }, { "epoch": 0.4759878916796204, "grad_norm": 0.1742427796125412, "learning_rate": 9.431066849355488e-06, "loss": 0.0047, "step": 29090 }, { "epoch": 0.47615151763069624, "grad_norm": 0.20284488797187805, "learning_rate": 9.430405152322407e-06, "loss": 0.0024, "step": 29100 }, { "epoch": 0.47631514358177207, "grad_norm": 0.07978978753089905, "learning_rate": 9.429743093960555e-06, "loss": 0.0034, "step": 29110 }, { "epoch": 0.4764787695328479, "grad_norm": 0.07165643572807312, "learning_rate": 9.429080674323927e-06, "loss": 0.0034, "step": 29120 }, { "epoch": 0.47664239548392373, "grad_norm": 0.08576705306768417, "learning_rate": 9.428417893466549e-06, "loss": 0.0038, "step": 29130 }, { "epoch": 0.4768060214349996, "grad_norm": 0.1268194317817688, "learning_rate": 9.427754751442473e-06, "loss": 0.005, "step": 29140 }, { "epoch": 0.47696964738607545, "grad_norm": 0.10577467083930969, "learning_rate": 9.427091248305784e-06, "loss": 0.0036, "step": 29150 }, { "epoch": 0.4771332733371513, "grad_norm": 0.06506650894880295, "learning_rate": 9.426427384110596e-06, "loss": 0.0027, "step": 29160 }, { "epoch": 0.4772968992882271, "grad_norm": 0.017393868416547775, "learning_rate": 9.42576315891105e-06, "loss": 0.0027, "step": 29170 }, { "epoch": 0.47746052523930294, "grad_norm": 0.08368945121765137, "learning_rate": 9.42509857276132e-06, "loss": 0.0033, "step": 29180 }, { "epoch": 0.4776241511903788, "grad_norm": 0.14434656500816345, "learning_rate": 9.424433625715602e-06, "loss": 0.0032, "step": 29190 }, { "epoch": 0.47778777714145465, "grad_norm": 0.07934143394231796, "learning_rate": 9.423768317828134e-06, "loss": 0.0061, "step": 29200 }, { "epoch": 0.4779514030925305, "grad_norm": 0.05839494988322258, "learning_rate": 9.423102649153173e-06, "loss": 0.0031, "step": 29210 }, { "epoch": 0.4781150290436063, "grad_norm": 0.11376013606786728, "learning_rate": 9.422436619745008e-06, "loss": 0.0028, "step": 29220 }, { "epoch": 0.47827865499468214, "grad_norm": 0.10313332080841064, "learning_rate": 9.421770229657959e-06, "loss": 0.0024, "step": 29230 }, { "epoch": 0.47844228094575797, "grad_norm": 0.1202329769730568, "learning_rate": 9.421103478946374e-06, "loss": 0.003, "step": 29240 }, { "epoch": 0.47860590689683385, "grad_norm": 0.11629094928503036, "learning_rate": 9.420436367664631e-06, "loss": 0.0034, "step": 29250 }, { "epoch": 0.4787695328479097, "grad_norm": 0.10433991998434067, "learning_rate": 9.41976889586714e-06, "loss": 0.0048, "step": 29260 }, { "epoch": 0.4789331587989855, "grad_norm": 0.11831417679786682, "learning_rate": 9.419101063608331e-06, "loss": 0.0033, "step": 29270 }, { "epoch": 0.47909678475006134, "grad_norm": 0.15843774378299713, "learning_rate": 9.418432870942675e-06, "loss": 0.0046, "step": 29280 }, { "epoch": 0.4792604107011372, "grad_norm": 0.054176975041627884, "learning_rate": 9.417764317924669e-06, "loss": 0.0024, "step": 29290 }, { "epoch": 0.47942403665221306, "grad_norm": 0.03427838906645775, "learning_rate": 9.417095404608834e-06, "loss": 0.0032, "step": 29300 }, { "epoch": 0.4795876626032889, "grad_norm": 0.04099629446864128, "learning_rate": 9.416426131049727e-06, "loss": 0.0018, "step": 29310 }, { "epoch": 0.4797512885543647, "grad_norm": 0.10742400586605072, "learning_rate": 9.41575649730193e-06, "loss": 0.0024, "step": 29320 }, { "epoch": 0.47991491450544055, "grad_norm": 0.17142325639724731, "learning_rate": 9.415086503420059e-06, "loss": 0.0046, "step": 29330 }, { "epoch": 0.4800785404565164, "grad_norm": 0.09329265356063843, "learning_rate": 9.414416149458751e-06, "loss": 0.0029, "step": 29340 }, { "epoch": 0.48024216640759226, "grad_norm": 0.04275538772344589, "learning_rate": 9.413745435472683e-06, "loss": 0.0047, "step": 29350 }, { "epoch": 0.4804057923586681, "grad_norm": 0.004263002425432205, "learning_rate": 9.413074361516554e-06, "loss": 0.003, "step": 29360 }, { "epoch": 0.4805694183097439, "grad_norm": 0.2757914662361145, "learning_rate": 9.412402927645095e-06, "loss": 0.004, "step": 29370 }, { "epoch": 0.48073304426081975, "grad_norm": 0.09664005041122437, "learning_rate": 9.411731133913063e-06, "loss": 0.0033, "step": 29380 }, { "epoch": 0.4808966702118956, "grad_norm": 0.20267799496650696, "learning_rate": 9.411058980375251e-06, "loss": 0.003, "step": 29390 }, { "epoch": 0.48106029616297147, "grad_norm": 0.04491041228175163, "learning_rate": 9.410386467086479e-06, "loss": 0.0026, "step": 29400 }, { "epoch": 0.4812239221140473, "grad_norm": 0.28391507267951965, "learning_rate": 9.409713594101589e-06, "loss": 0.0021, "step": 29410 }, { "epoch": 0.4813875480651231, "grad_norm": 0.18214187026023865, "learning_rate": 9.409040361475463e-06, "loss": 0.0038, "step": 29420 }, { "epoch": 0.48155117401619896, "grad_norm": 0.044792406260967255, "learning_rate": 9.408366769263005e-06, "loss": 0.0031, "step": 29430 }, { "epoch": 0.4817147999672748, "grad_norm": 0.12545135617256165, "learning_rate": 9.407692817519151e-06, "loss": 0.0031, "step": 29440 }, { "epoch": 0.4818784259183507, "grad_norm": 0.11244130879640579, "learning_rate": 9.407018506298868e-06, "loss": 0.0023, "step": 29450 }, { "epoch": 0.4820420518694265, "grad_norm": 0.027645418420433998, "learning_rate": 9.40634383565715e-06, "loss": 0.0035, "step": 29460 }, { "epoch": 0.48220567782050233, "grad_norm": 0.17549319565296173, "learning_rate": 9.405668805649023e-06, "loss": 0.0046, "step": 29470 }, { "epoch": 0.48236930377157816, "grad_norm": 0.12509573996067047, "learning_rate": 9.404993416329533e-06, "loss": 0.0047, "step": 29480 }, { "epoch": 0.482532929722654, "grad_norm": 0.1549886018037796, "learning_rate": 9.40431766775377e-06, "loss": 0.0023, "step": 29490 }, { "epoch": 0.4826965556737299, "grad_norm": 0.2679791748523712, "learning_rate": 9.40364155997684e-06, "loss": 0.0043, "step": 29500 }, { "epoch": 0.4828601816248057, "grad_norm": 0.11589089781045914, "learning_rate": 9.402965093053888e-06, "loss": 0.0044, "step": 29510 }, { "epoch": 0.48302380757588154, "grad_norm": 0.1716080904006958, "learning_rate": 9.402288267040085e-06, "loss": 0.0046, "step": 29520 }, { "epoch": 0.48318743352695737, "grad_norm": 0.16904345154762268, "learning_rate": 9.401611081990628e-06, "loss": 0.003, "step": 29530 }, { "epoch": 0.4833510594780332, "grad_norm": 0.11915217339992523, "learning_rate": 9.400933537960745e-06, "loss": 0.0023, "step": 29540 }, { "epoch": 0.4835146854291091, "grad_norm": 0.061145078390836716, "learning_rate": 9.400255635005699e-06, "loss": 0.0026, "step": 29550 }, { "epoch": 0.4836783113801849, "grad_norm": 0.08691444247961044, "learning_rate": 9.399577373180773e-06, "loss": 0.0039, "step": 29560 }, { "epoch": 0.48384193733126074, "grad_norm": 0.05766739696264267, "learning_rate": 9.398898752541284e-06, "loss": 0.0031, "step": 29570 }, { "epoch": 0.48400556328233657, "grad_norm": 0.08079709112644196, "learning_rate": 9.39821977314258e-06, "loss": 0.0041, "step": 29580 }, { "epoch": 0.4841691892334124, "grad_norm": 0.07256931811571121, "learning_rate": 9.397540435040034e-06, "loss": 0.0032, "step": 29590 }, { "epoch": 0.4843328151844883, "grad_norm": 0.06149398535490036, "learning_rate": 9.396860738289054e-06, "loss": 0.0054, "step": 29600 }, { "epoch": 0.4844964411355641, "grad_norm": 0.1838526725769043, "learning_rate": 9.396180682945071e-06, "loss": 0.0022, "step": 29610 }, { "epoch": 0.48466006708663995, "grad_norm": 0.021014414727687836, "learning_rate": 9.395500269063546e-06, "loss": 0.0027, "step": 29620 }, { "epoch": 0.4848236930377158, "grad_norm": 0.18143707513809204, "learning_rate": 9.394819496699977e-06, "loss": 0.0038, "step": 29630 }, { "epoch": 0.4849873189887916, "grad_norm": 0.041861046105623245, "learning_rate": 9.394138365909882e-06, "loss": 0.0037, "step": 29640 }, { "epoch": 0.4851509449398675, "grad_norm": 0.12975075840950012, "learning_rate": 9.393456876748812e-06, "loss": 0.003, "step": 29650 }, { "epoch": 0.4853145708909433, "grad_norm": 0.31343191862106323, "learning_rate": 9.392775029272345e-06, "loss": 0.0035, "step": 29660 }, { "epoch": 0.48547819684201915, "grad_norm": 0.07164093106985092, "learning_rate": 9.392092823536095e-06, "loss": 0.0029, "step": 29670 }, { "epoch": 0.485641822793095, "grad_norm": 0.08914093673229218, "learning_rate": 9.391410259595695e-06, "loss": 0.003, "step": 29680 }, { "epoch": 0.4858054487441708, "grad_norm": 0.06592705845832825, "learning_rate": 9.390727337506815e-06, "loss": 0.0048, "step": 29690 }, { "epoch": 0.48596907469524664, "grad_norm": 0.0723566859960556, "learning_rate": 9.390044057325154e-06, "loss": 0.0025, "step": 29700 }, { "epoch": 0.4861327006463225, "grad_norm": 0.15704552829265594, "learning_rate": 9.389360419106435e-06, "loss": 0.0034, "step": 29710 }, { "epoch": 0.48629632659739835, "grad_norm": 0.07331628352403641, "learning_rate": 9.388676422906413e-06, "loss": 0.0047, "step": 29720 }, { "epoch": 0.4864599525484742, "grad_norm": 0.07547681778669357, "learning_rate": 9.387992068780876e-06, "loss": 0.0035, "step": 29730 }, { "epoch": 0.48662357849955, "grad_norm": 0.1840086132287979, "learning_rate": 9.387307356785634e-06, "loss": 0.004, "step": 29740 }, { "epoch": 0.48678720445062584, "grad_norm": 0.13028809428215027, "learning_rate": 9.386622286976529e-06, "loss": 0.0038, "step": 29750 }, { "epoch": 0.48695083040170173, "grad_norm": 0.048142917454242706, "learning_rate": 9.385936859409436e-06, "loss": 0.0023, "step": 29760 }, { "epoch": 0.48711445635277756, "grad_norm": 0.08636688441038132, "learning_rate": 9.385251074140256e-06, "loss": 0.0026, "step": 29770 }, { "epoch": 0.4872780823038534, "grad_norm": 0.1748528629541397, "learning_rate": 9.384564931224918e-06, "loss": 0.0048, "step": 29780 }, { "epoch": 0.4874417082549292, "grad_norm": 0.11434413492679596, "learning_rate": 9.38387843071938e-06, "loss": 0.0021, "step": 29790 }, { "epoch": 0.48760533420600505, "grad_norm": 0.09903785586357117, "learning_rate": 9.383191572679635e-06, "loss": 0.006, "step": 29800 }, { "epoch": 0.48776896015708093, "grad_norm": 0.1673927903175354, "learning_rate": 9.382504357161698e-06, "loss": 0.0024, "step": 29810 }, { "epoch": 0.48793258610815676, "grad_norm": 0.04580897465348244, "learning_rate": 9.381816784221613e-06, "loss": 0.0023, "step": 29820 }, { "epoch": 0.4880962120592326, "grad_norm": 0.03202640637755394, "learning_rate": 9.381128853915462e-06, "loss": 0.0022, "step": 29830 }, { "epoch": 0.4882598380103084, "grad_norm": 0.14334113895893097, "learning_rate": 9.380440566299347e-06, "loss": 0.0037, "step": 29840 }, { "epoch": 0.48842346396138425, "grad_norm": 0.1461249738931656, "learning_rate": 9.379751921429403e-06, "loss": 0.0034, "step": 29850 }, { "epoch": 0.48858708991246014, "grad_norm": 0.10982855409383774, "learning_rate": 9.379062919361794e-06, "loss": 0.0034, "step": 29860 }, { "epoch": 0.48875071586353597, "grad_norm": 0.13848136365413666, "learning_rate": 9.378373560152711e-06, "loss": 0.0039, "step": 29870 }, { "epoch": 0.4889143418146118, "grad_norm": 0.1193138062953949, "learning_rate": 9.377683843858378e-06, "loss": 0.0041, "step": 29880 }, { "epoch": 0.4890779677656876, "grad_norm": 0.09822847694158554, "learning_rate": 9.376993770535046e-06, "loss": 0.0029, "step": 29890 }, { "epoch": 0.48924159371676346, "grad_norm": 0.13098573684692383, "learning_rate": 9.376303340238993e-06, "loss": 0.0025, "step": 29900 }, { "epoch": 0.48940521966783934, "grad_norm": 0.05100260302424431, "learning_rate": 9.375612553026527e-06, "loss": 0.0033, "step": 29910 }, { "epoch": 0.48956884561891517, "grad_norm": 0.20293666422367096, "learning_rate": 9.37492140895399e-06, "loss": 0.0028, "step": 29920 }, { "epoch": 0.489732471569991, "grad_norm": 0.10998491197824478, "learning_rate": 9.374229908077748e-06, "loss": 0.0025, "step": 29930 }, { "epoch": 0.48989609752106683, "grad_norm": 0.0260187815874815, "learning_rate": 9.373538050454197e-06, "loss": 0.0021, "step": 29940 }, { "epoch": 0.49005972347214266, "grad_norm": 0.15895287692546844, "learning_rate": 9.37284583613976e-06, "loss": 0.0033, "step": 29950 }, { "epoch": 0.49022334942321855, "grad_norm": 0.11989407986402512, "learning_rate": 9.372153265190898e-06, "loss": 0.0019, "step": 29960 }, { "epoch": 0.4903869753742944, "grad_norm": 0.0730937048792839, "learning_rate": 9.37146033766409e-06, "loss": 0.004, "step": 29970 }, { "epoch": 0.4905506013253702, "grad_norm": 0.08417189866304398, "learning_rate": 9.370767053615849e-06, "loss": 0.0032, "step": 29980 }, { "epoch": 0.49071422727644604, "grad_norm": 0.18247011303901672, "learning_rate": 9.370073413102718e-06, "loss": 0.0038, "step": 29990 }, { "epoch": 0.49087785322752187, "grad_norm": 0.18512503802776337, "learning_rate": 9.369379416181268e-06, "loss": 0.0018, "step": 30000 }, { "epoch": 0.49104147917859775, "grad_norm": 0.2067500352859497, "learning_rate": 9.368685062908098e-06, "loss": 0.0025, "step": 30010 }, { "epoch": 0.4912051051296736, "grad_norm": 0.2376611977815628, "learning_rate": 9.367990353339838e-06, "loss": 0.0042, "step": 30020 }, { "epoch": 0.4913687310807494, "grad_norm": 0.05311886593699455, "learning_rate": 9.367295287533148e-06, "loss": 0.0033, "step": 30030 }, { "epoch": 0.49153235703182524, "grad_norm": 0.14123480021953583, "learning_rate": 9.36659986554471e-06, "loss": 0.0043, "step": 30040 }, { "epoch": 0.49169598298290107, "grad_norm": 0.29360732436180115, "learning_rate": 9.365904087431246e-06, "loss": 0.0021, "step": 30050 }, { "epoch": 0.49185960893397696, "grad_norm": 0.08756820857524872, "learning_rate": 9.365207953249497e-06, "loss": 0.0051, "step": 30060 }, { "epoch": 0.4920232348850528, "grad_norm": 0.06855221837759018, "learning_rate": 9.364511463056241e-06, "loss": 0.0033, "step": 30070 }, { "epoch": 0.4921868608361286, "grad_norm": 0.19539619982242584, "learning_rate": 9.363814616908278e-06, "loss": 0.0033, "step": 30080 }, { "epoch": 0.49235048678720444, "grad_norm": 0.06320386379957199, "learning_rate": 9.363117414862441e-06, "loss": 0.0031, "step": 30090 }, { "epoch": 0.4925141127382803, "grad_norm": 0.04635658487677574, "learning_rate": 9.362419856975593e-06, "loss": 0.0021, "step": 30100 }, { "epoch": 0.49267773868935616, "grad_norm": 0.11186891049146652, "learning_rate": 9.361721943304625e-06, "loss": 0.0037, "step": 30110 }, { "epoch": 0.492841364640432, "grad_norm": 0.31901976466178894, "learning_rate": 9.361023673906454e-06, "loss": 0.0033, "step": 30120 }, { "epoch": 0.4930049905915078, "grad_norm": 0.05110229179263115, "learning_rate": 9.36032504883803e-06, "loss": 0.0026, "step": 30130 }, { "epoch": 0.49316861654258365, "grad_norm": 0.05644770339131355, "learning_rate": 9.359626068156329e-06, "loss": 0.0024, "step": 30140 }, { "epoch": 0.4933322424936595, "grad_norm": 0.0804319903254509, "learning_rate": 9.35892673191836e-06, "loss": 0.0046, "step": 30150 }, { "epoch": 0.4934958684447353, "grad_norm": 0.17363384366035461, "learning_rate": 9.358227040181156e-06, "loss": 0.0035, "step": 30160 }, { "epoch": 0.4936594943958112, "grad_norm": 0.22661608457565308, "learning_rate": 9.357526993001784e-06, "loss": 0.0034, "step": 30170 }, { "epoch": 0.493823120346887, "grad_norm": 0.16821138560771942, "learning_rate": 9.356826590437334e-06, "loss": 0.0028, "step": 30180 }, { "epoch": 0.49398674629796285, "grad_norm": 0.11628932505846024, "learning_rate": 9.356125832544932e-06, "loss": 0.003, "step": 30190 }, { "epoch": 0.4941503722490387, "grad_norm": 0.05957257002592087, "learning_rate": 9.355424719381729e-06, "loss": 0.0033, "step": 30200 }, { "epoch": 0.4943139982001145, "grad_norm": 0.22713930904865265, "learning_rate": 9.354723251004902e-06, "loss": 0.0028, "step": 30210 }, { "epoch": 0.4944776241511904, "grad_norm": 0.10876328498125076, "learning_rate": 9.354021427471665e-06, "loss": 0.0042, "step": 30220 }, { "epoch": 0.49464125010226623, "grad_norm": 0.0741264745593071, "learning_rate": 9.353319248839251e-06, "loss": 0.0038, "step": 30230 }, { "epoch": 0.49480487605334206, "grad_norm": 0.4156412184238434, "learning_rate": 9.352616715164933e-06, "loss": 0.0034, "step": 30240 }, { "epoch": 0.4949685020044179, "grad_norm": 0.21535980701446533, "learning_rate": 9.351913826506003e-06, "loss": 0.0035, "step": 30250 }, { "epoch": 0.4951321279554937, "grad_norm": 0.08432229608297348, "learning_rate": 9.351210582919789e-06, "loss": 0.0039, "step": 30260 }, { "epoch": 0.4952957539065696, "grad_norm": 0.09170220792293549, "learning_rate": 9.350506984463643e-06, "loss": 0.003, "step": 30270 }, { "epoch": 0.49545937985764543, "grad_norm": 0.15254619717597961, "learning_rate": 9.34980303119495e-06, "loss": 0.0023, "step": 30280 }, { "epoch": 0.49562300580872126, "grad_norm": 0.06740779429674149, "learning_rate": 9.349098723171119e-06, "loss": 0.0036, "step": 30290 }, { "epoch": 0.4957866317597971, "grad_norm": 0.09019188582897186, "learning_rate": 9.348394060449594e-06, "loss": 0.0023, "step": 30300 }, { "epoch": 0.4959502577108729, "grad_norm": 0.11102591454982758, "learning_rate": 9.347689043087846e-06, "loss": 0.0035, "step": 30310 }, { "epoch": 0.4961138836619488, "grad_norm": 0.09559548646211624, "learning_rate": 9.34698367114337e-06, "loss": 0.0032, "step": 30320 }, { "epoch": 0.49627750961302464, "grad_norm": 0.20734286308288574, "learning_rate": 9.346277944673696e-06, "loss": 0.0027, "step": 30330 }, { "epoch": 0.49644113556410047, "grad_norm": 0.1631333976984024, "learning_rate": 9.34557186373638e-06, "loss": 0.0042, "step": 30340 }, { "epoch": 0.4966047615151763, "grad_norm": 0.07738049328327179, "learning_rate": 9.344865428389007e-06, "loss": 0.0041, "step": 30350 }, { "epoch": 0.4967683874662521, "grad_norm": 0.033862121403217316, "learning_rate": 9.344158638689193e-06, "loss": 0.0032, "step": 30360 }, { "epoch": 0.496932013417328, "grad_norm": 0.047343477606773376, "learning_rate": 9.34345149469458e-06, "loss": 0.0021, "step": 30370 }, { "epoch": 0.49709563936840384, "grad_norm": 0.04529246687889099, "learning_rate": 9.342743996462841e-06, "loss": 0.0031, "step": 30380 }, { "epoch": 0.49725926531947967, "grad_norm": 0.04019023850560188, "learning_rate": 9.342036144051678e-06, "loss": 0.0053, "step": 30390 }, { "epoch": 0.4974228912705555, "grad_norm": 0.10508981347084045, "learning_rate": 9.341327937518818e-06, "loss": 0.0032, "step": 30400 }, { "epoch": 0.49758651722163133, "grad_norm": 0.07485896348953247, "learning_rate": 9.340619376922023e-06, "loss": 0.0046, "step": 30410 }, { "epoch": 0.4977501431727072, "grad_norm": 0.13404831290245056, "learning_rate": 9.33991046231908e-06, "loss": 0.0061, "step": 30420 }, { "epoch": 0.49791376912378305, "grad_norm": 0.047028373926877975, "learning_rate": 9.339201193767804e-06, "loss": 0.0028, "step": 30430 }, { "epoch": 0.4980773950748589, "grad_norm": 0.22291763126850128, "learning_rate": 9.338491571326043e-06, "loss": 0.0016, "step": 30440 }, { "epoch": 0.4982410210259347, "grad_norm": 0.06728585809469223, "learning_rate": 9.33778159505167e-06, "loss": 0.0045, "step": 30450 }, { "epoch": 0.49840464697701053, "grad_norm": 0.07716561108827591, "learning_rate": 9.337071265002589e-06, "loss": 0.0026, "step": 30460 }, { "epoch": 0.4985682729280864, "grad_norm": 0.09710460156202316, "learning_rate": 9.33636058123673e-06, "loss": 0.003, "step": 30470 }, { "epoch": 0.49873189887916225, "grad_norm": 0.09153927862644196, "learning_rate": 9.335649543812057e-06, "loss": 0.0027, "step": 30480 }, { "epoch": 0.4988955248302381, "grad_norm": 0.07671051472425461, "learning_rate": 9.33493815278656e-06, "loss": 0.0027, "step": 30490 }, { "epoch": 0.4990591507813139, "grad_norm": 0.07697685807943344, "learning_rate": 9.334226408218253e-06, "loss": 0.0039, "step": 30500 }, { "epoch": 0.49922277673238974, "grad_norm": 0.04844066500663757, "learning_rate": 9.333514310165188e-06, "loss": 0.0045, "step": 30510 }, { "epoch": 0.4993864026834656, "grad_norm": 0.08367463946342468, "learning_rate": 9.332801858685438e-06, "loss": 0.004, "step": 30520 }, { "epoch": 0.49955002863454145, "grad_norm": 0.09480585902929306, "learning_rate": 9.332089053837112e-06, "loss": 0.0026, "step": 30530 }, { "epoch": 0.4997136545856173, "grad_norm": 0.23385405540466309, "learning_rate": 9.331375895678341e-06, "loss": 0.0033, "step": 30540 }, { "epoch": 0.4998772805366931, "grad_norm": 0.04920531436800957, "learning_rate": 9.330662384267289e-06, "loss": 0.0044, "step": 30550 }, { "epoch": 0.500040906487769, "grad_norm": 0.06316211074590683, "learning_rate": 9.329948519662147e-06, "loss": 0.0031, "step": 30560 }, { "epoch": 0.5002045324388448, "grad_norm": 0.10192988067865372, "learning_rate": 9.329234301921135e-06, "loss": 0.0048, "step": 30570 }, { "epoch": 0.5003681583899207, "grad_norm": 0.1315154731273651, "learning_rate": 9.328519731102501e-06, "loss": 0.0028, "step": 30580 }, { "epoch": 0.5005317843409964, "grad_norm": 0.10005255043506622, "learning_rate": 9.327804807264528e-06, "loss": 0.0032, "step": 30590 }, { "epoch": 0.5006954102920723, "grad_norm": 0.0728859156370163, "learning_rate": 9.327089530465517e-06, "loss": 0.0026, "step": 30600 }, { "epoch": 0.5008590362431482, "grad_norm": 0.08706450462341309, "learning_rate": 9.326373900763807e-06, "loss": 0.0038, "step": 30610 }, { "epoch": 0.501022662194224, "grad_norm": 0.07957324385643005, "learning_rate": 9.32565791821776e-06, "loss": 0.0033, "step": 30620 }, { "epoch": 0.5011862881452999, "grad_norm": 0.13891012966632843, "learning_rate": 9.324941582885772e-06, "loss": 0.003, "step": 30630 }, { "epoch": 0.5013499140963756, "grad_norm": 0.042739301919937134, "learning_rate": 9.324224894826263e-06, "loss": 0.0043, "step": 30640 }, { "epoch": 0.5015135400474515, "grad_norm": 0.07387175410985947, "learning_rate": 9.323507854097684e-06, "loss": 0.0032, "step": 30650 }, { "epoch": 0.5016771659985274, "grad_norm": 0.10685703903436661, "learning_rate": 9.322790460758513e-06, "loss": 0.0041, "step": 30660 }, { "epoch": 0.5018407919496032, "grad_norm": 0.014521356672048569, "learning_rate": 9.322072714867261e-06, "loss": 0.0034, "step": 30670 }, { "epoch": 0.5020044179006791, "grad_norm": 0.09038981050252914, "learning_rate": 9.32135461648246e-06, "loss": 0.0021, "step": 30680 }, { "epoch": 0.5021680438517548, "grad_norm": 0.07703753560781479, "learning_rate": 9.320636165662684e-06, "loss": 0.003, "step": 30690 }, { "epoch": 0.5023316698028307, "grad_norm": 0.04458431154489517, "learning_rate": 9.31991736246652e-06, "loss": 0.0022, "step": 30700 }, { "epoch": 0.5024952957539066, "grad_norm": 0.23633258044719696, "learning_rate": 9.319198206952592e-06, "loss": 0.0058, "step": 30710 }, { "epoch": 0.5026589217049824, "grad_norm": 0.08995245397090912, "learning_rate": 9.318478699179555e-06, "loss": 0.0043, "step": 30720 }, { "epoch": 0.5028225476560583, "grad_norm": 0.3186088800430298, "learning_rate": 9.317758839206088e-06, "loss": 0.0027, "step": 30730 }, { "epoch": 0.502986173607134, "grad_norm": 0.15831568837165833, "learning_rate": 9.3170386270909e-06, "loss": 0.0023, "step": 30740 }, { "epoch": 0.5031497995582099, "grad_norm": 0.06968633085489273, "learning_rate": 9.31631806289273e-06, "loss": 0.0033, "step": 30750 }, { "epoch": 0.5033134255092858, "grad_norm": 0.03314252942800522, "learning_rate": 9.315597146670343e-06, "loss": 0.0024, "step": 30760 }, { "epoch": 0.5034770514603616, "grad_norm": 0.11809874325990677, "learning_rate": 9.314875878482536e-06, "loss": 0.0035, "step": 30770 }, { "epoch": 0.5036406774114375, "grad_norm": 0.219374880194664, "learning_rate": 9.314154258388135e-06, "loss": 0.0029, "step": 30780 }, { "epoch": 0.5038043033625133, "grad_norm": 0.08316024392843246, "learning_rate": 9.31343228644599e-06, "loss": 0.003, "step": 30790 }, { "epoch": 0.5039679293135891, "grad_norm": 0.14430877566337585, "learning_rate": 9.31270996271498e-06, "loss": 0.003, "step": 30800 }, { "epoch": 0.504131555264665, "grad_norm": 0.17799600958824158, "learning_rate": 9.311987287254022e-06, "loss": 0.0021, "step": 30810 }, { "epoch": 0.5042951812157408, "grad_norm": 0.3822096586227417, "learning_rate": 9.311264260122051e-06, "loss": 0.0022, "step": 30820 }, { "epoch": 0.5044588071668167, "grad_norm": 0.055685460567474365, "learning_rate": 9.310540881378034e-06, "loss": 0.0032, "step": 30830 }, { "epoch": 0.5046224331178925, "grad_norm": 0.1303826868534088, "learning_rate": 9.309817151080969e-06, "loss": 0.0024, "step": 30840 }, { "epoch": 0.5047860590689683, "grad_norm": 0.06911187618970871, "learning_rate": 9.30909306928988e-06, "loss": 0.0026, "step": 30850 }, { "epoch": 0.5049496850200442, "grad_norm": 0.05135410279035568, "learning_rate": 9.30836863606382e-06, "loss": 0.0041, "step": 30860 }, { "epoch": 0.50511331097112, "grad_norm": 0.24860428273677826, "learning_rate": 9.307643851461874e-06, "loss": 0.0035, "step": 30870 }, { "epoch": 0.5052769369221959, "grad_norm": 0.18532079458236694, "learning_rate": 9.306918715543152e-06, "loss": 0.0035, "step": 30880 }, { "epoch": 0.5054405628732717, "grad_norm": 0.05449739098548889, "learning_rate": 9.306193228366791e-06, "loss": 0.0028, "step": 30890 }, { "epoch": 0.5056041888243475, "grad_norm": 0.07781044393777847, "learning_rate": 9.305467389991964e-06, "loss": 0.0033, "step": 30900 }, { "epoch": 0.5057678147754234, "grad_norm": 0.15199995040893555, "learning_rate": 9.304741200477863e-06, "loss": 0.0029, "step": 30910 }, { "epoch": 0.5059314407264992, "grad_norm": 0.2231421023607254, "learning_rate": 9.304014659883716e-06, "loss": 0.0018, "step": 30920 }, { "epoch": 0.5060950666775751, "grad_norm": 0.08540011942386627, "learning_rate": 9.303287768268775e-06, "loss": 0.0027, "step": 30930 }, { "epoch": 0.5062586926286509, "grad_norm": 0.0498240664601326, "learning_rate": 9.302560525692329e-06, "loss": 0.0019, "step": 30940 }, { "epoch": 0.5064223185797267, "grad_norm": 0.2611545920372009, "learning_rate": 9.301832932213681e-06, "loss": 0.0031, "step": 30950 }, { "epoch": 0.5065859445308026, "grad_norm": 0.3127445578575134, "learning_rate": 9.301104987892177e-06, "loss": 0.0022, "step": 30960 }, { "epoch": 0.5067495704818784, "grad_norm": 0.1496264934539795, "learning_rate": 9.300376692787185e-06, "loss": 0.0019, "step": 30970 }, { "epoch": 0.5069131964329543, "grad_norm": 0.06906845420598984, "learning_rate": 9.299648046958098e-06, "loss": 0.0026, "step": 30980 }, { "epoch": 0.5070768223840301, "grad_norm": 0.11016680300235748, "learning_rate": 9.298919050464348e-06, "loss": 0.0045, "step": 30990 }, { "epoch": 0.507240448335106, "grad_norm": 0.06764791905879974, "learning_rate": 9.298189703365385e-06, "loss": 0.0033, "step": 31000 }, { "epoch": 0.5074040742861818, "grad_norm": 0.06481602787971497, "learning_rate": 9.297460005720694e-06, "loss": 0.0023, "step": 31010 }, { "epoch": 0.5075677002372576, "grad_norm": 0.19831061363220215, "learning_rate": 9.296729957589784e-06, "loss": 0.0043, "step": 31020 }, { "epoch": 0.5077313261883335, "grad_norm": 0.08853733539581299, "learning_rate": 9.2959995590322e-06, "loss": 0.0026, "step": 31030 }, { "epoch": 0.5078949521394093, "grad_norm": 0.11338179558515549, "learning_rate": 9.295268810107508e-06, "loss": 0.0051, "step": 31040 }, { "epoch": 0.5080585780904852, "grad_norm": 0.18170523643493652, "learning_rate": 9.294537710875305e-06, "loss": 0.002, "step": 31050 }, { "epoch": 0.508222204041561, "grad_norm": 0.14694349467754364, "learning_rate": 9.293806261395218e-06, "loss": 0.004, "step": 31060 }, { "epoch": 0.5083858299926368, "grad_norm": 0.18828746676445007, "learning_rate": 9.2930744617269e-06, "loss": 0.0045, "step": 31070 }, { "epoch": 0.5085494559437127, "grad_norm": 0.13360236585140228, "learning_rate": 9.292342311930038e-06, "loss": 0.003, "step": 31080 }, { "epoch": 0.5087130818947885, "grad_norm": 0.04547407850623131, "learning_rate": 9.291609812064338e-06, "loss": 0.0028, "step": 31090 }, { "epoch": 0.5088767078458644, "grad_norm": 0.06315422058105469, "learning_rate": 9.290876962189543e-06, "loss": 0.002, "step": 31100 }, { "epoch": 0.5090403337969402, "grad_norm": 0.3497620224952698, "learning_rate": 9.290143762365423e-06, "loss": 0.0043, "step": 31110 }, { "epoch": 0.509203959748016, "grad_norm": 0.08874396234750748, "learning_rate": 9.289410212651774e-06, "loss": 0.0057, "step": 31120 }, { "epoch": 0.5093675856990919, "grad_norm": 0.1794067621231079, "learning_rate": 9.288676313108422e-06, "loss": 0.0034, "step": 31130 }, { "epoch": 0.5095312116501677, "grad_norm": 0.0529162734746933, "learning_rate": 9.28794206379522e-06, "loss": 0.0034, "step": 31140 }, { "epoch": 0.5096948376012436, "grad_norm": 0.18635709583759308, "learning_rate": 9.287207464772054e-06, "loss": 0.0022, "step": 31150 }, { "epoch": 0.5098584635523195, "grad_norm": 0.10029938817024231, "learning_rate": 9.286472516098832e-06, "loss": 0.0023, "step": 31160 }, { "epoch": 0.5100220895033952, "grad_norm": 0.06586988270282745, "learning_rate": 9.285737217835496e-06, "loss": 0.003, "step": 31170 }, { "epoch": 0.5101857154544711, "grad_norm": 0.1123717799782753, "learning_rate": 9.285001570042014e-06, "loss": 0.0021, "step": 31180 }, { "epoch": 0.5103493414055469, "grad_norm": 0.081089086830616, "learning_rate": 9.284265572778381e-06, "loss": 0.0024, "step": 31190 }, { "epoch": 0.5105129673566228, "grad_norm": 0.06498655676841736, "learning_rate": 9.283529226104626e-06, "loss": 0.0034, "step": 31200 }, { "epoch": 0.5106765933076987, "grad_norm": 0.0598677322268486, "learning_rate": 9.282792530080801e-06, "loss": 0.002, "step": 31210 }, { "epoch": 0.5108402192587744, "grad_norm": 0.042015474289655685, "learning_rate": 9.28205548476699e-06, "loss": 0.0025, "step": 31220 }, { "epoch": 0.5110038452098503, "grad_norm": 0.03508414700627327, "learning_rate": 9.2813180902233e-06, "loss": 0.0025, "step": 31230 }, { "epoch": 0.5111674711609261, "grad_norm": 0.059798464179039, "learning_rate": 9.280580346509873e-06, "loss": 0.0035, "step": 31240 }, { "epoch": 0.511331097112002, "grad_norm": 0.06756911426782608, "learning_rate": 9.279842253686878e-06, "loss": 0.0018, "step": 31250 }, { "epoch": 0.5114947230630777, "grad_norm": 0.07574374973773956, "learning_rate": 9.279103811814509e-06, "loss": 0.0024, "step": 31260 }, { "epoch": 0.5116583490141536, "grad_norm": 0.08296548575162888, "learning_rate": 9.278365020952993e-06, "loss": 0.0034, "step": 31270 }, { "epoch": 0.5118219749652295, "grad_norm": 0.10783910751342773, "learning_rate": 9.277625881162582e-06, "loss": 0.0025, "step": 31280 }, { "epoch": 0.5119856009163053, "grad_norm": 0.03009295091032982, "learning_rate": 9.276886392503558e-06, "loss": 0.0021, "step": 31290 }, { "epoch": 0.5121492268673812, "grad_norm": 0.059914231300354004, "learning_rate": 9.27614655503623e-06, "loss": 0.0032, "step": 31300 }, { "epoch": 0.512312852818457, "grad_norm": 0.0674881637096405, "learning_rate": 9.275406368820938e-06, "loss": 0.0032, "step": 31310 }, { "epoch": 0.5124764787695328, "grad_norm": 0.12186701595783234, "learning_rate": 9.274665833918049e-06, "loss": 0.0036, "step": 31320 }, { "epoch": 0.5126401047206087, "grad_norm": 0.24976617097854614, "learning_rate": 9.273924950387958e-06, "loss": 0.0036, "step": 31330 }, { "epoch": 0.5128037306716845, "grad_norm": 0.06949980556964874, "learning_rate": 9.27318371829109e-06, "loss": 0.0026, "step": 31340 }, { "epoch": 0.5129673566227604, "grad_norm": 0.1329091340303421, "learning_rate": 9.272442137687895e-06, "loss": 0.0032, "step": 31350 }, { "epoch": 0.5131309825738362, "grad_norm": 0.12890006601810455, "learning_rate": 9.271700208638856e-06, "loss": 0.0058, "step": 31360 }, { "epoch": 0.513294608524912, "grad_norm": 0.10463836044073105, "learning_rate": 9.270957931204482e-06, "loss": 0.0053, "step": 31370 }, { "epoch": 0.5134582344759879, "grad_norm": 0.0823264941573143, "learning_rate": 9.270215305445311e-06, "loss": 0.0032, "step": 31380 }, { "epoch": 0.5136218604270637, "grad_norm": 0.18861626088619232, "learning_rate": 9.269472331421907e-06, "loss": 0.0025, "step": 31390 }, { "epoch": 0.5137854863781396, "grad_norm": 0.1712394505739212, "learning_rate": 9.268729009194865e-06, "loss": 0.004, "step": 31400 }, { "epoch": 0.5139491123292154, "grad_norm": 0.10809671878814697, "learning_rate": 9.26798533882481e-06, "loss": 0.0022, "step": 31410 }, { "epoch": 0.5141127382802912, "grad_norm": 0.07816719263792038, "learning_rate": 9.267241320372391e-06, "loss": 0.0057, "step": 31420 }, { "epoch": 0.5142763642313671, "grad_norm": 0.04988302290439606, "learning_rate": 9.266496953898289e-06, "loss": 0.0037, "step": 31430 }, { "epoch": 0.5144399901824429, "grad_norm": 0.30293551087379456, "learning_rate": 9.26575223946321e-06, "loss": 0.0027, "step": 31440 }, { "epoch": 0.5146036161335188, "grad_norm": 0.1359817236661911, "learning_rate": 9.265007177127894e-06, "loss": 0.0041, "step": 31450 }, { "epoch": 0.5147672420845946, "grad_norm": 0.07000212371349335, "learning_rate": 9.264261766953101e-06, "loss": 0.0044, "step": 31460 }, { "epoch": 0.5149308680356705, "grad_norm": 0.12727925181388855, "learning_rate": 9.263516008999628e-06, "loss": 0.0035, "step": 31470 }, { "epoch": 0.5150944939867463, "grad_norm": 0.1488708108663559, "learning_rate": 9.262769903328294e-06, "loss": 0.0048, "step": 31480 }, { "epoch": 0.5152581199378221, "grad_norm": 0.05991646647453308, "learning_rate": 9.262023449999951e-06, "loss": 0.0097, "step": 31490 }, { "epoch": 0.515421745888898, "grad_norm": 0.11776924878358841, "learning_rate": 9.261276649075474e-06, "loss": 0.0041, "step": 31500 }, { "epoch": 0.5155853718399738, "grad_norm": 0.11384090036153793, "learning_rate": 9.260529500615774e-06, "loss": 0.0029, "step": 31510 }, { "epoch": 0.5157489977910497, "grad_norm": 0.22239993512630463, "learning_rate": 9.259782004681783e-06, "loss": 0.0022, "step": 31520 }, { "epoch": 0.5159126237421255, "grad_norm": 0.18100106716156006, "learning_rate": 9.259034161334464e-06, "loss": 0.0032, "step": 31530 }, { "epoch": 0.5160762496932013, "grad_norm": 0.15316882729530334, "learning_rate": 9.25828597063481e-06, "loss": 0.0023, "step": 31540 }, { "epoch": 0.5162398756442772, "grad_norm": 0.1278257817029953, "learning_rate": 9.25753743264384e-06, "loss": 0.0043, "step": 31550 }, { "epoch": 0.516403501595353, "grad_norm": 0.06046155095100403, "learning_rate": 9.256788547422601e-06, "loss": 0.0061, "step": 31560 }, { "epoch": 0.5165671275464289, "grad_norm": 0.10817162692546844, "learning_rate": 9.256039315032172e-06, "loss": 0.0023, "step": 31570 }, { "epoch": 0.5167307534975047, "grad_norm": 0.14828264713287354, "learning_rate": 9.255289735533656e-06, "loss": 0.0053, "step": 31580 }, { "epoch": 0.5168943794485805, "grad_norm": 0.1993962526321411, "learning_rate": 9.254539808988189e-06, "loss": 0.0049, "step": 31590 }, { "epoch": 0.5170580053996564, "grad_norm": 0.0732942596077919, "learning_rate": 9.253789535456929e-06, "loss": 0.0042, "step": 31600 }, { "epoch": 0.5172216313507322, "grad_norm": 0.12835919857025146, "learning_rate": 9.253038915001066e-06, "loss": 0.0038, "step": 31610 }, { "epoch": 0.5173852573018081, "grad_norm": 0.21196874976158142, "learning_rate": 9.252287947681822e-06, "loss": 0.0031, "step": 31620 }, { "epoch": 0.517548883252884, "grad_norm": 0.05480005964636803, "learning_rate": 9.251536633560439e-06, "loss": 0.0038, "step": 31630 }, { "epoch": 0.5177125092039597, "grad_norm": 0.06896719336509705, "learning_rate": 9.250784972698192e-06, "loss": 0.0029, "step": 31640 }, { "epoch": 0.5178761351550356, "grad_norm": 0.05573992058634758, "learning_rate": 9.250032965156386e-06, "loss": 0.0027, "step": 31650 }, { "epoch": 0.5180397611061114, "grad_norm": 0.06220722198486328, "learning_rate": 9.249280610996352e-06, "loss": 0.0026, "step": 31660 }, { "epoch": 0.5182033870571873, "grad_norm": 0.12922725081443787, "learning_rate": 9.248527910279447e-06, "loss": 0.0029, "step": 31670 }, { "epoch": 0.5183670130082632, "grad_norm": 0.06652378290891647, "learning_rate": 9.247774863067063e-06, "loss": 0.0035, "step": 31680 }, { "epoch": 0.5185306389593389, "grad_norm": 0.1481589525938034, "learning_rate": 9.24702146942061e-06, "loss": 0.0023, "step": 31690 }, { "epoch": 0.5186942649104148, "grad_norm": 0.7701807618141174, "learning_rate": 9.24626772940154e-06, "loss": 0.0026, "step": 31700 }, { "epoch": 0.5188578908614906, "grad_norm": 0.04232223704457283, "learning_rate": 9.245513643071317e-06, "loss": 0.0037, "step": 31710 }, { "epoch": 0.5190215168125665, "grad_norm": 0.09090936928987503, "learning_rate": 9.244759210491448e-06, "loss": 0.0041, "step": 31720 }, { "epoch": 0.5191851427636424, "grad_norm": 0.07332294434309006, "learning_rate": 9.244004431723458e-06, "loss": 0.0025, "step": 31730 }, { "epoch": 0.5193487687147181, "grad_norm": 0.12391065806150436, "learning_rate": 9.243249306828907e-06, "loss": 0.0024, "step": 31740 }, { "epoch": 0.519512394665794, "grad_norm": 0.04803864285349846, "learning_rate": 9.24249383586938e-06, "loss": 0.0033, "step": 31750 }, { "epoch": 0.5196760206168698, "grad_norm": 0.04341021552681923, "learning_rate": 9.241738018906487e-06, "loss": 0.0026, "step": 31760 }, { "epoch": 0.5198396465679457, "grad_norm": 0.07323268055915833, "learning_rate": 9.240981856001876e-06, "loss": 0.002, "step": 31770 }, { "epoch": 0.5200032725190216, "grad_norm": 0.07374236732721329, "learning_rate": 9.240225347217213e-06, "loss": 0.004, "step": 31780 }, { "epoch": 0.5201668984700973, "grad_norm": 0.30002522468566895, "learning_rate": 9.239468492614197e-06, "loss": 0.0058, "step": 31790 }, { "epoch": 0.5203305244211732, "grad_norm": 0.14853738248348236, "learning_rate": 9.238711292254553e-06, "loss": 0.0032, "step": 31800 }, { "epoch": 0.520494150372249, "grad_norm": 0.04692799225449562, "learning_rate": 9.23795374620004e-06, "loss": 0.0022, "step": 31810 }, { "epoch": 0.5206577763233249, "grad_norm": 0.06869132816791534, "learning_rate": 9.237195854512436e-06, "loss": 0.0041, "step": 31820 }, { "epoch": 0.5208214022744008, "grad_norm": 0.04035910964012146, "learning_rate": 9.236437617253556e-06, "loss": 0.0035, "step": 31830 }, { "epoch": 0.5209850282254765, "grad_norm": 0.04523344337940216, "learning_rate": 9.235679034485237e-06, "loss": 0.0028, "step": 31840 }, { "epoch": 0.5211486541765524, "grad_norm": 0.20912596583366394, "learning_rate": 9.234920106269346e-06, "loss": 0.0029, "step": 31850 }, { "epoch": 0.5213122801276282, "grad_norm": 0.10722243785858154, "learning_rate": 9.234160832667781e-06, "loss": 0.0019, "step": 31860 }, { "epoch": 0.5214759060787041, "grad_norm": 0.18421198427677155, "learning_rate": 9.233401213742464e-06, "loss": 0.0041, "step": 31870 }, { "epoch": 0.52163953202978, "grad_norm": 0.23088569939136505, "learning_rate": 9.232641249555348e-06, "loss": 0.0047, "step": 31880 }, { "epoch": 0.5218031579808557, "grad_norm": 0.17881347239017487, "learning_rate": 9.23188094016841e-06, "loss": 0.0034, "step": 31890 }, { "epoch": 0.5219667839319316, "grad_norm": 0.06535833328962326, "learning_rate": 9.231120285643662e-06, "loss": 0.0035, "step": 31900 }, { "epoch": 0.5221304098830074, "grad_norm": 0.12647125124931335, "learning_rate": 9.23035928604314e-06, "loss": 0.0033, "step": 31910 }, { "epoch": 0.5222940358340833, "grad_norm": 0.025344140827655792, "learning_rate": 9.229597941428907e-06, "loss": 0.0024, "step": 31920 }, { "epoch": 0.5224576617851592, "grad_norm": 0.09759747982025146, "learning_rate": 9.228836251863055e-06, "loss": 0.0026, "step": 31930 }, { "epoch": 0.522621287736235, "grad_norm": 0.06471876800060272, "learning_rate": 9.228074217407707e-06, "loss": 0.0025, "step": 31940 }, { "epoch": 0.5227849136873108, "grad_norm": 0.07845073193311691, "learning_rate": 9.22731183812501e-06, "loss": 0.0033, "step": 31950 }, { "epoch": 0.5229485396383866, "grad_norm": 0.3236940801143646, "learning_rate": 9.226549114077143e-06, "loss": 0.0034, "step": 31960 }, { "epoch": 0.5231121655894625, "grad_norm": 0.02920597791671753, "learning_rate": 9.225786045326311e-06, "loss": 0.0019, "step": 31970 }, { "epoch": 0.5232757915405384, "grad_norm": 0.1861599236726761, "learning_rate": 9.225022631934745e-06, "loss": 0.0028, "step": 31980 }, { "epoch": 0.5234394174916142, "grad_norm": 0.07145814597606659, "learning_rate": 9.224258873964708e-06, "loss": 0.002, "step": 31990 }, { "epoch": 0.52360304344269, "grad_norm": 0.042243584990501404, "learning_rate": 9.223494771478492e-06, "loss": 0.0017, "step": 32000 }, { "epoch": 0.5237666693937658, "grad_norm": 0.05444829538464546, "learning_rate": 9.22273032453841e-06, "loss": 0.0032, "step": 32010 }, { "epoch": 0.5239302953448417, "grad_norm": 0.11444508284330368, "learning_rate": 9.221965533206808e-06, "loss": 0.0032, "step": 32020 }, { "epoch": 0.5240939212959176, "grad_norm": 0.10446197539567947, "learning_rate": 9.221200397546065e-06, "loss": 0.0048, "step": 32030 }, { "epoch": 0.5242575472469934, "grad_norm": 0.15641173720359802, "learning_rate": 9.220434917618576e-06, "loss": 0.0037, "step": 32040 }, { "epoch": 0.5244211731980692, "grad_norm": 0.06004998832941055, "learning_rate": 9.219669093486777e-06, "loss": 0.0025, "step": 32050 }, { "epoch": 0.524584799149145, "grad_norm": 0.13192476332187653, "learning_rate": 9.218902925213122e-06, "loss": 0.0027, "step": 32060 }, { "epoch": 0.5247484251002209, "grad_norm": 0.1496613323688507, "learning_rate": 9.218136412860099e-06, "loss": 0.0047, "step": 32070 }, { "epoch": 0.5249120510512968, "grad_norm": 0.07085603475570679, "learning_rate": 9.21736955649022e-06, "loss": 0.0024, "step": 32080 }, { "epoch": 0.5250756770023726, "grad_norm": 0.180672749876976, "learning_rate": 9.21660235616603e-06, "loss": 0.0014, "step": 32090 }, { "epoch": 0.5252393029534485, "grad_norm": 0.05476313456892967, "learning_rate": 9.215834811950097e-06, "loss": 0.0029, "step": 32100 }, { "epoch": 0.5254029289045242, "grad_norm": 0.09045332670211792, "learning_rate": 9.215066923905022e-06, "loss": 0.0018, "step": 32110 }, { "epoch": 0.5255665548556001, "grad_norm": 0.0838485136628151, "learning_rate": 9.214298692093429e-06, "loss": 0.0029, "step": 32120 }, { "epoch": 0.5257301808066759, "grad_norm": 0.12681303918361664, "learning_rate": 9.21353011657797e-06, "loss": 0.0046, "step": 32130 }, { "epoch": 0.5258938067577518, "grad_norm": 0.09967485070228577, "learning_rate": 9.212761197421334e-06, "loss": 0.0034, "step": 32140 }, { "epoch": 0.5260574327088277, "grad_norm": 0.02472381852567196, "learning_rate": 9.211991934686227e-06, "loss": 0.0014, "step": 32150 }, { "epoch": 0.5262210586599034, "grad_norm": 0.1094222217798233, "learning_rate": 9.211222328435388e-06, "loss": 0.0026, "step": 32160 }, { "epoch": 0.5263846846109793, "grad_norm": 0.22920668125152588, "learning_rate": 9.210452378731583e-06, "loss": 0.0034, "step": 32170 }, { "epoch": 0.5265483105620551, "grad_norm": 0.06849930435419083, "learning_rate": 9.209682085637608e-06, "loss": 0.0043, "step": 32180 }, { "epoch": 0.526711936513131, "grad_norm": 0.09366314113140106, "learning_rate": 9.208911449216284e-06, "loss": 0.0029, "step": 32190 }, { "epoch": 0.5268755624642069, "grad_norm": 0.1349136084318161, "learning_rate": 9.208140469530463e-06, "loss": 0.0041, "step": 32200 }, { "epoch": 0.5270391884152826, "grad_norm": 0.2200312614440918, "learning_rate": 9.207369146643022e-06, "loss": 0.002, "step": 32210 }, { "epoch": 0.5272028143663585, "grad_norm": 0.15642328560352325, "learning_rate": 9.20659748061687e-06, "loss": 0.0028, "step": 32220 }, { "epoch": 0.5273664403174343, "grad_norm": 0.13700298964977264, "learning_rate": 9.205825471514937e-06, "loss": 0.0029, "step": 32230 }, { "epoch": 0.5275300662685102, "grad_norm": 0.08765758574008942, "learning_rate": 9.20505311940019e-06, "loss": 0.0017, "step": 32240 }, { "epoch": 0.5276936922195861, "grad_norm": 0.17214705049991608, "learning_rate": 9.204280424335615e-06, "loss": 0.002, "step": 32250 }, { "epoch": 0.5278573181706618, "grad_norm": 0.1650456041097641, "learning_rate": 9.203507386384233e-06, "loss": 0.0023, "step": 32260 }, { "epoch": 0.5280209441217377, "grad_norm": 0.08436457067728043, "learning_rate": 9.202734005609092e-06, "loss": 0.0033, "step": 32270 }, { "epoch": 0.5281845700728135, "grad_norm": 0.09092748165130615, "learning_rate": 9.201960282073264e-06, "loss": 0.0036, "step": 32280 }, { "epoch": 0.5283481960238894, "grad_norm": 0.08706819266080856, "learning_rate": 9.20118621583985e-06, "loss": 0.0027, "step": 32290 }, { "epoch": 0.5285118219749653, "grad_norm": 0.24460530281066895, "learning_rate": 9.200411806971985e-06, "loss": 0.0042, "step": 32300 }, { "epoch": 0.528675447926041, "grad_norm": 0.2795489728450775, "learning_rate": 9.199637055532822e-06, "loss": 0.0037, "step": 32310 }, { "epoch": 0.5288390738771169, "grad_norm": 0.06631495803594589, "learning_rate": 9.198861961585548e-06, "loss": 0.0027, "step": 32320 }, { "epoch": 0.5290026998281927, "grad_norm": 0.16572906076908112, "learning_rate": 9.198086525193381e-06, "loss": 0.0039, "step": 32330 }, { "epoch": 0.5291663257792686, "grad_norm": 0.049690622836351395, "learning_rate": 9.197310746419558e-06, "loss": 0.0049, "step": 32340 }, { "epoch": 0.5293299517303445, "grad_norm": 0.05921037122607231, "learning_rate": 9.19653462532735e-06, "loss": 0.0031, "step": 32350 }, { "epoch": 0.5294935776814202, "grad_norm": 0.07682120054960251, "learning_rate": 9.19575816198006e-06, "loss": 0.0017, "step": 32360 }, { "epoch": 0.5296572036324961, "grad_norm": 0.3854997158050537, "learning_rate": 9.194981356441006e-06, "loss": 0.0029, "step": 32370 }, { "epoch": 0.5298208295835719, "grad_norm": 0.06295394897460938, "learning_rate": 9.194204208773547e-06, "loss": 0.003, "step": 32380 }, { "epoch": 0.5299844555346478, "grad_norm": 0.06632732599973679, "learning_rate": 9.193426719041062e-06, "loss": 0.0033, "step": 32390 }, { "epoch": 0.5301480814857237, "grad_norm": 0.11335868388414383, "learning_rate": 9.19264888730696e-06, "loss": 0.0031, "step": 32400 }, { "epoch": 0.5303117074367995, "grad_norm": 0.1577157974243164, "learning_rate": 9.191870713634681e-06, "loss": 0.0032, "step": 32410 }, { "epoch": 0.5304753333878753, "grad_norm": 0.061971068382263184, "learning_rate": 9.191092198087688e-06, "loss": 0.0033, "step": 32420 }, { "epoch": 0.5306389593389511, "grad_norm": 0.07954541593790054, "learning_rate": 9.190313340729474e-06, "loss": 0.0021, "step": 32430 }, { "epoch": 0.530802585290027, "grad_norm": 0.04109479486942291, "learning_rate": 9.189534141623562e-06, "loss": 0.0034, "step": 32440 }, { "epoch": 0.5309662112411029, "grad_norm": 0.24136632680892944, "learning_rate": 9.188754600833499e-06, "loss": 0.0045, "step": 32450 }, { "epoch": 0.5311298371921787, "grad_norm": 0.04293668270111084, "learning_rate": 9.18797471842286e-06, "loss": 0.0039, "step": 32460 }, { "epoch": 0.5312934631432545, "grad_norm": 0.2215060442686081, "learning_rate": 9.187194494455254e-06, "loss": 0.0045, "step": 32470 }, { "epoch": 0.5314570890943303, "grad_norm": 0.09272361546754837, "learning_rate": 9.18641392899431e-06, "loss": 0.0042, "step": 32480 }, { "epoch": 0.5316207150454062, "grad_norm": 0.048450734466314316, "learning_rate": 9.18563302210369e-06, "loss": 0.0025, "step": 32490 }, { "epoch": 0.5317843409964821, "grad_norm": 0.2152061015367508, "learning_rate": 9.184851773847081e-06, "loss": 0.0038, "step": 32500 }, { "epoch": 0.5319479669475579, "grad_norm": 0.08277581632137299, "learning_rate": 9.1840701842882e-06, "loss": 0.0028, "step": 32510 }, { "epoch": 0.5321115928986337, "grad_norm": 0.053438831120729446, "learning_rate": 9.183288253490789e-06, "loss": 0.0038, "step": 32520 }, { "epoch": 0.5322752188497095, "grad_norm": 0.21432149410247803, "learning_rate": 9.182505981518622e-06, "loss": 0.0038, "step": 32530 }, { "epoch": 0.5324388448007854, "grad_norm": 0.17829272150993347, "learning_rate": 9.181723368435498e-06, "loss": 0.0033, "step": 32540 }, { "epoch": 0.5326024707518613, "grad_norm": 0.06643196940422058, "learning_rate": 9.180940414305241e-06, "loss": 0.0033, "step": 32550 }, { "epoch": 0.5327660967029371, "grad_norm": 0.07685931771993637, "learning_rate": 9.180157119191711e-06, "loss": 0.0029, "step": 32560 }, { "epoch": 0.532929722654013, "grad_norm": 0.08952626585960388, "learning_rate": 9.179373483158788e-06, "loss": 0.002, "step": 32570 }, { "epoch": 0.5330933486050887, "grad_norm": 0.09441076964139938, "learning_rate": 9.178589506270382e-06, "loss": 0.0027, "step": 32580 }, { "epoch": 0.5332569745561646, "grad_norm": 0.0432206429541111, "learning_rate": 9.177805188590435e-06, "loss": 0.0023, "step": 32590 }, { "epoch": 0.5334206005072405, "grad_norm": 0.1083078682422638, "learning_rate": 9.177020530182908e-06, "loss": 0.0021, "step": 32600 }, { "epoch": 0.5335842264583163, "grad_norm": 0.10792893171310425, "learning_rate": 9.176235531111799e-06, "loss": 0.0026, "step": 32610 }, { "epoch": 0.5337478524093922, "grad_norm": 0.1392124891281128, "learning_rate": 9.17545019144113e-06, "loss": 0.003, "step": 32620 }, { "epoch": 0.5339114783604679, "grad_norm": 0.08289247751235962, "learning_rate": 9.174664511234947e-06, "loss": 0.0029, "step": 32630 }, { "epoch": 0.5340751043115438, "grad_norm": 0.01622731238603592, "learning_rate": 9.173878490557332e-06, "loss": 0.002, "step": 32640 }, { "epoch": 0.5342387302626197, "grad_norm": 0.04721998795866966, "learning_rate": 9.173092129472388e-06, "loss": 0.002, "step": 32650 }, { "epoch": 0.5344023562136955, "grad_norm": 0.06475071609020233, "learning_rate": 9.172305428044248e-06, "loss": 0.0019, "step": 32660 }, { "epoch": 0.5345659821647714, "grad_norm": 0.09046278893947601, "learning_rate": 9.171518386337073e-06, "loss": 0.0021, "step": 32670 }, { "epoch": 0.5347296081158471, "grad_norm": 0.1746835708618164, "learning_rate": 9.17073100441505e-06, "loss": 0.0023, "step": 32680 }, { "epoch": 0.534893234066923, "grad_norm": 0.13488642871379852, "learning_rate": 9.169943282342398e-06, "loss": 0.0022, "step": 32690 }, { "epoch": 0.5350568600179989, "grad_norm": 0.1203932836651802, "learning_rate": 9.169155220183357e-06, "loss": 0.0023, "step": 32700 }, { "epoch": 0.5352204859690747, "grad_norm": 0.05865379050374031, "learning_rate": 9.168366818002203e-06, "loss": 0.0019, "step": 32710 }, { "epoch": 0.5353841119201506, "grad_norm": 0.05368148535490036, "learning_rate": 9.167578075863232e-06, "loss": 0.0022, "step": 32720 }, { "epoch": 0.5355477378712263, "grad_norm": 0.1944589614868164, "learning_rate": 9.166788993830773e-06, "loss": 0.0035, "step": 32730 }, { "epoch": 0.5357113638223022, "grad_norm": 0.25233814120292664, "learning_rate": 9.16599957196918e-06, "loss": 0.0026, "step": 32740 }, { "epoch": 0.5358749897733781, "grad_norm": 0.04803385213017464, "learning_rate": 9.165209810342835e-06, "loss": 0.0026, "step": 32750 }, { "epoch": 0.5360386157244539, "grad_norm": 0.11446692049503326, "learning_rate": 9.16441970901615e-06, "loss": 0.0042, "step": 32760 }, { "epoch": 0.5362022416755298, "grad_norm": 0.1486598551273346, "learning_rate": 9.163629268053564e-06, "loss": 0.0035, "step": 32770 }, { "epoch": 0.5363658676266055, "grad_norm": 0.13443703949451447, "learning_rate": 9.162838487519539e-06, "loss": 0.0073, "step": 32780 }, { "epoch": 0.5365294935776814, "grad_norm": 0.11499325931072235, "learning_rate": 9.16204736747857e-06, "loss": 0.002, "step": 32790 }, { "epoch": 0.5366931195287573, "grad_norm": 0.07831773161888123, "learning_rate": 9.161255907995177e-06, "loss": 0.0028, "step": 32800 }, { "epoch": 0.5368567454798331, "grad_norm": 0.09263172000646591, "learning_rate": 9.160464109133913e-06, "loss": 0.0051, "step": 32810 }, { "epoch": 0.537020371430909, "grad_norm": 0.041871313005685806, "learning_rate": 9.15967197095935e-06, "loss": 0.0019, "step": 32820 }, { "epoch": 0.5371839973819847, "grad_norm": 0.20193035900592804, "learning_rate": 9.158879493536092e-06, "loss": 0.0019, "step": 32830 }, { "epoch": 0.5373476233330606, "grad_norm": 0.04876742511987686, "learning_rate": 9.158086676928773e-06, "loss": 0.003, "step": 32840 }, { "epoch": 0.5375112492841365, "grad_norm": 0.04884132370352745, "learning_rate": 9.157293521202053e-06, "loss": 0.0025, "step": 32850 }, { "epoch": 0.5376748752352123, "grad_norm": 0.10001318901777267, "learning_rate": 9.156500026420616e-06, "loss": 0.0022, "step": 32860 }, { "epoch": 0.5378385011862882, "grad_norm": 0.12856820225715637, "learning_rate": 9.155706192649181e-06, "loss": 0.0034, "step": 32870 }, { "epoch": 0.538002127137364, "grad_norm": 0.05957594886422157, "learning_rate": 9.154912019952485e-06, "loss": 0.0032, "step": 32880 }, { "epoch": 0.5381657530884398, "grad_norm": 0.043399207293987274, "learning_rate": 9.154117508395303e-06, "loss": 0.0022, "step": 32890 }, { "epoch": 0.5383293790395157, "grad_norm": 0.07214365154504776, "learning_rate": 9.153322658042429e-06, "loss": 0.0037, "step": 32900 }, { "epoch": 0.5384930049905915, "grad_norm": 0.08859546482563019, "learning_rate": 9.152527468958692e-06, "loss": 0.0014, "step": 32910 }, { "epoch": 0.5386566309416674, "grad_norm": 0.17552126944065094, "learning_rate": 9.15173194120894e-06, "loss": 0.0025, "step": 32920 }, { "epoch": 0.5388202568927432, "grad_norm": 0.07439539581537247, "learning_rate": 9.150936074858057e-06, "loss": 0.0043, "step": 32930 }, { "epoch": 0.538983882843819, "grad_norm": 0.07390560209751129, "learning_rate": 9.150139869970951e-06, "loss": 0.0036, "step": 32940 }, { "epoch": 0.5391475087948949, "grad_norm": 0.10978835821151733, "learning_rate": 9.149343326612557e-06, "loss": 0.002, "step": 32950 }, { "epoch": 0.5393111347459707, "grad_norm": 0.02490217424929142, "learning_rate": 9.14854644484784e-06, "loss": 0.0018, "step": 32960 }, { "epoch": 0.5394747606970466, "grad_norm": 0.13357634842395782, "learning_rate": 9.147749224741788e-06, "loss": 0.0033, "step": 32970 }, { "epoch": 0.5396383866481224, "grad_norm": 0.11878160387277603, "learning_rate": 9.14695166635942e-06, "loss": 0.0022, "step": 32980 }, { "epoch": 0.5398020125991982, "grad_norm": 0.14954353868961334, "learning_rate": 9.146153769765786e-06, "loss": 0.0023, "step": 32990 }, { "epoch": 0.539965638550274, "grad_norm": 0.1649288684129715, "learning_rate": 9.145355535025955e-06, "loss": 0.0029, "step": 33000 }, { "epoch": 0.5401292645013499, "grad_norm": 0.4143097698688507, "learning_rate": 9.144556962205032e-06, "loss": 0.0041, "step": 33010 }, { "epoch": 0.5402928904524258, "grad_norm": 0.06574437767267227, "learning_rate": 9.14375805136814e-06, "loss": 0.0022, "step": 33020 }, { "epoch": 0.5404565164035016, "grad_norm": 0.05873316526412964, "learning_rate": 9.142958802580445e-06, "loss": 0.0037, "step": 33030 }, { "epoch": 0.5406201423545774, "grad_norm": 0.08040419965982437, "learning_rate": 9.142159215907123e-06, "loss": 0.0028, "step": 33040 }, { "epoch": 0.5407837683056532, "grad_norm": 0.08465006947517395, "learning_rate": 9.14135929141339e-06, "loss": 0.0016, "step": 33050 }, { "epoch": 0.5409473942567291, "grad_norm": 0.11002630740404129, "learning_rate": 9.140559029164479e-06, "loss": 0.0038, "step": 33060 }, { "epoch": 0.541111020207805, "grad_norm": 0.040398065000772476, "learning_rate": 9.139758429225664e-06, "loss": 0.0023, "step": 33070 }, { "epoch": 0.5412746461588808, "grad_norm": 0.11514900624752045, "learning_rate": 9.138957491662237e-06, "loss": 0.0034, "step": 33080 }, { "epoch": 0.5414382721099567, "grad_norm": 0.06339000165462494, "learning_rate": 9.138156216539517e-06, "loss": 0.0022, "step": 33090 }, { "epoch": 0.5416018980610324, "grad_norm": 0.0937352105975151, "learning_rate": 9.137354603922857e-06, "loss": 0.0031, "step": 33100 }, { "epoch": 0.5417655240121083, "grad_norm": 0.09305834025144577, "learning_rate": 9.136552653877631e-06, "loss": 0.0027, "step": 33110 }, { "epoch": 0.5419291499631842, "grad_norm": 0.06475158780813217, "learning_rate": 9.135750366469245e-06, "loss": 0.0034, "step": 33120 }, { "epoch": 0.54209277591426, "grad_norm": 0.05281263589859009, "learning_rate": 9.13494774176313e-06, "loss": 0.0033, "step": 33130 }, { "epoch": 0.5422564018653359, "grad_norm": 0.09146049618721008, "learning_rate": 9.134144779824744e-06, "loss": 0.0034, "step": 33140 }, { "epoch": 0.5424200278164116, "grad_norm": 0.22583580017089844, "learning_rate": 9.133341480719575e-06, "loss": 0.0029, "step": 33150 }, { "epoch": 0.5425836537674875, "grad_norm": 0.13174423575401306, "learning_rate": 9.13253784451314e-06, "loss": 0.0047, "step": 33160 }, { "epoch": 0.5427472797185634, "grad_norm": 0.11868665367364883, "learning_rate": 9.131733871270978e-06, "loss": 0.0029, "step": 33170 }, { "epoch": 0.5429109056696392, "grad_norm": 0.1526699662208557, "learning_rate": 9.130929561058658e-06, "loss": 0.0024, "step": 33180 }, { "epoch": 0.5430745316207151, "grad_norm": 0.06724399328231812, "learning_rate": 9.130124913941779e-06, "loss": 0.0029, "step": 33190 }, { "epoch": 0.5432381575717908, "grad_norm": 0.06776267290115356, "learning_rate": 9.129319929985963e-06, "loss": 0.0025, "step": 33200 }, { "epoch": 0.5434017835228667, "grad_norm": 0.04374212399125099, "learning_rate": 9.128514609256863e-06, "loss": 0.003, "step": 33210 }, { "epoch": 0.5435654094739426, "grad_norm": 0.13316085934638977, "learning_rate": 9.127708951820156e-06, "loss": 0.0033, "step": 33220 }, { "epoch": 0.5437290354250184, "grad_norm": 0.061265528202056885, "learning_rate": 9.126902957741552e-06, "loss": 0.0016, "step": 33230 }, { "epoch": 0.5438926613760943, "grad_norm": 0.0881817489862442, "learning_rate": 9.126096627086785e-06, "loss": 0.0084, "step": 33240 }, { "epoch": 0.54405628732717, "grad_norm": 0.2160559743642807, "learning_rate": 9.125289959921613e-06, "loss": 0.0028, "step": 33250 }, { "epoch": 0.5442199132782459, "grad_norm": 0.07653578370809555, "learning_rate": 9.124482956311828e-06, "loss": 0.0033, "step": 33260 }, { "epoch": 0.5443835392293218, "grad_norm": 0.0900450050830841, "learning_rate": 9.123675616323246e-06, "loss": 0.0025, "step": 33270 }, { "epoch": 0.5445471651803976, "grad_norm": 0.1406930685043335, "learning_rate": 9.122867940021712e-06, "loss": 0.0022, "step": 33280 }, { "epoch": 0.5447107911314735, "grad_norm": 0.11342816799879074, "learning_rate": 9.122059927473094e-06, "loss": 0.0031, "step": 33290 }, { "epoch": 0.5448744170825492, "grad_norm": 0.16277405619621277, "learning_rate": 9.121251578743294e-06, "loss": 0.0029, "step": 33300 }, { "epoch": 0.5450380430336251, "grad_norm": 0.016029680147767067, "learning_rate": 9.120442893898237e-06, "loss": 0.0044, "step": 33310 }, { "epoch": 0.545201668984701, "grad_norm": 0.13244576752185822, "learning_rate": 9.119633873003875e-06, "loss": 0.0017, "step": 33320 }, { "epoch": 0.5453652949357768, "grad_norm": 0.2600537836551666, "learning_rate": 9.118824516126193e-06, "loss": 0.003, "step": 33330 }, { "epoch": 0.5455289208868527, "grad_norm": 0.017338261008262634, "learning_rate": 9.118014823331193e-06, "loss": 0.0054, "step": 33340 }, { "epoch": 0.5456925468379284, "grad_norm": 0.11253099143505096, "learning_rate": 9.11720479468492e-06, "loss": 0.0025, "step": 33350 }, { "epoch": 0.5458561727890043, "grad_norm": 0.05320839583873749, "learning_rate": 9.116394430253428e-06, "loss": 0.0015, "step": 33360 }, { "epoch": 0.5460197987400802, "grad_norm": 0.09717463701963425, "learning_rate": 9.115583730102813e-06, "loss": 0.0034, "step": 33370 }, { "epoch": 0.546183424691156, "grad_norm": 0.011118598282337189, "learning_rate": 9.11477269429919e-06, "loss": 0.0027, "step": 33380 }, { "epoch": 0.5463470506422319, "grad_norm": 0.16390582919120789, "learning_rate": 9.113961322908707e-06, "loss": 0.0032, "step": 33390 }, { "epoch": 0.5465106765933077, "grad_norm": 0.07863014191389084, "learning_rate": 9.113149615997535e-06, "loss": 0.0037, "step": 33400 }, { "epoch": 0.5466743025443835, "grad_norm": 0.04523053020238876, "learning_rate": 9.112337573631875e-06, "loss": 0.0047, "step": 33410 }, { "epoch": 0.5468379284954594, "grad_norm": 0.06712795794010162, "learning_rate": 9.111525195877952e-06, "loss": 0.0023, "step": 33420 }, { "epoch": 0.5470015544465352, "grad_norm": 0.06714444607496262, "learning_rate": 9.110712482802026e-06, "loss": 0.0027, "step": 33430 }, { "epoch": 0.5471651803976111, "grad_norm": 0.1865249127149582, "learning_rate": 9.109899434470373e-06, "loss": 0.003, "step": 33440 }, { "epoch": 0.5473288063486869, "grad_norm": 0.17515318095684052, "learning_rate": 9.109086050949307e-06, "loss": 0.0021, "step": 33450 }, { "epoch": 0.5474924322997627, "grad_norm": 0.17247720062732697, "learning_rate": 9.108272332305161e-06, "loss": 0.0021, "step": 33460 }, { "epoch": 0.5476560582508386, "grad_norm": 0.07520222663879395, "learning_rate": 9.107458278604302e-06, "loss": 0.0025, "step": 33470 }, { "epoch": 0.5478196842019144, "grad_norm": 0.4124133288860321, "learning_rate": 9.106643889913122e-06, "loss": 0.0027, "step": 33480 }, { "epoch": 0.5479833101529903, "grad_norm": 0.12948864698410034, "learning_rate": 9.105829166298037e-06, "loss": 0.0029, "step": 33490 }, { "epoch": 0.5481469361040661, "grad_norm": 0.1656457632780075, "learning_rate": 9.105014107825493e-06, "loss": 0.0028, "step": 33500 }, { "epoch": 0.548310562055142, "grad_norm": 0.2449573129415512, "learning_rate": 9.104198714561968e-06, "loss": 0.0025, "step": 33510 }, { "epoch": 0.5484741880062178, "grad_norm": 0.06181897595524788, "learning_rate": 9.103382986573957e-06, "loss": 0.002, "step": 33520 }, { "epoch": 0.5486378139572936, "grad_norm": 0.05906658619642258, "learning_rate": 9.102566923927991e-06, "loss": 0.006, "step": 33530 }, { "epoch": 0.5488014399083695, "grad_norm": 0.07057667523622513, "learning_rate": 9.101750526690626e-06, "loss": 0.0017, "step": 33540 }, { "epoch": 0.5489650658594453, "grad_norm": 0.09633629024028778, "learning_rate": 9.100933794928442e-06, "loss": 0.0035, "step": 33550 }, { "epoch": 0.5491286918105212, "grad_norm": 0.19643664360046387, "learning_rate": 9.100116728708052e-06, "loss": 0.0027, "step": 33560 }, { "epoch": 0.549292317761597, "grad_norm": 0.14608485996723175, "learning_rate": 9.09929932809609e-06, "loss": 0.0028, "step": 33570 }, { "epoch": 0.5494559437126728, "grad_norm": 0.09090889990329742, "learning_rate": 9.098481593159223e-06, "loss": 0.0036, "step": 33580 }, { "epoch": 0.5496195696637487, "grad_norm": 0.10197705030441284, "learning_rate": 9.09766352396414e-06, "loss": 0.0027, "step": 33590 }, { "epoch": 0.5497831956148245, "grad_norm": 0.06614929437637329, "learning_rate": 9.096845120577561e-06, "loss": 0.0023, "step": 33600 }, { "epoch": 0.5499468215659004, "grad_norm": 0.046114519238471985, "learning_rate": 9.096026383066234e-06, "loss": 0.0027, "step": 33610 }, { "epoch": 0.5501104475169762, "grad_norm": 0.022980744019150734, "learning_rate": 9.095207311496932e-06, "loss": 0.0018, "step": 33620 }, { "epoch": 0.550274073468052, "grad_norm": 0.05219675227999687, "learning_rate": 9.094387905936452e-06, "loss": 0.0034, "step": 33630 }, { "epoch": 0.5504376994191279, "grad_norm": 0.03392721712589264, "learning_rate": 9.093568166451627e-06, "loss": 0.0021, "step": 33640 }, { "epoch": 0.5506013253702037, "grad_norm": 0.10393422096967697, "learning_rate": 9.092748093109309e-06, "loss": 0.003, "step": 33650 }, { "epoch": 0.5507649513212796, "grad_norm": 0.042780663818120956, "learning_rate": 9.091927685976382e-06, "loss": 0.0026, "step": 33660 }, { "epoch": 0.5509285772723554, "grad_norm": 0.09566706418991089, "learning_rate": 9.091106945119754e-06, "loss": 0.0017, "step": 33670 }, { "epoch": 0.5510922032234312, "grad_norm": 0.1961510330438614, "learning_rate": 9.090285870606363e-06, "loss": 0.0027, "step": 33680 }, { "epoch": 0.5512558291745071, "grad_norm": 0.06689534336328506, "learning_rate": 9.089464462503172e-06, "loss": 0.0025, "step": 33690 }, { "epoch": 0.5514194551255829, "grad_norm": 0.02609667181968689, "learning_rate": 9.088642720877174e-06, "loss": 0.0034, "step": 33700 }, { "epoch": 0.5515830810766588, "grad_norm": 0.03313341364264488, "learning_rate": 9.087820645795386e-06, "loss": 0.0017, "step": 33710 }, { "epoch": 0.5517467070277347, "grad_norm": 0.19971023499965668, "learning_rate": 9.086998237324855e-06, "loss": 0.0022, "step": 33720 }, { "epoch": 0.5519103329788104, "grad_norm": 0.07108059525489807, "learning_rate": 9.086175495532653e-06, "loss": 0.0028, "step": 33730 }, { "epoch": 0.5520739589298863, "grad_norm": 0.23887395858764648, "learning_rate": 9.085352420485878e-06, "loss": 0.0044, "step": 33740 }, { "epoch": 0.5522375848809621, "grad_norm": 0.024461984634399414, "learning_rate": 9.084529012251661e-06, "loss": 0.0015, "step": 33750 }, { "epoch": 0.552401210832038, "grad_norm": 0.03654909506440163, "learning_rate": 9.083705270897153e-06, "loss": 0.003, "step": 33760 }, { "epoch": 0.5525648367831139, "grad_norm": 0.05137854442000389, "learning_rate": 9.082881196489538e-06, "loss": 0.004, "step": 33770 }, { "epoch": 0.5527284627341896, "grad_norm": 0.1305990070104599, "learning_rate": 9.082056789096024e-06, "loss": 0.0013, "step": 33780 }, { "epoch": 0.5528920886852655, "grad_norm": 0.06542232632637024, "learning_rate": 9.081232048783847e-06, "loss": 0.0018, "step": 33790 }, { "epoch": 0.5530557146363413, "grad_norm": 0.2352975606918335, "learning_rate": 9.080406975620269e-06, "loss": 0.0036, "step": 33800 }, { "epoch": 0.5532193405874172, "grad_norm": 0.049139637500047684, "learning_rate": 9.07958156967258e-06, "loss": 0.0027, "step": 33810 }, { "epoch": 0.5533829665384931, "grad_norm": 0.18974940478801727, "learning_rate": 9.078755831008099e-06, "loss": 0.0035, "step": 33820 }, { "epoch": 0.5535465924895688, "grad_norm": 0.0452951043844223, "learning_rate": 9.077929759694171e-06, "loss": 0.002, "step": 33830 }, { "epoch": 0.5537102184406447, "grad_norm": 0.07449331879615784, "learning_rate": 9.077103355798163e-06, "loss": 0.002, "step": 33840 }, { "epoch": 0.5538738443917205, "grad_norm": 0.2256460338830948, "learning_rate": 9.076276619387478e-06, "loss": 0.0053, "step": 33850 }, { "epoch": 0.5540374703427964, "grad_norm": 0.029968148097395897, "learning_rate": 9.075449550529542e-06, "loss": 0.0025, "step": 33860 }, { "epoch": 0.5542010962938722, "grad_norm": 0.06543967872858047, "learning_rate": 9.074622149291805e-06, "loss": 0.0026, "step": 33870 }, { "epoch": 0.554364722244948, "grad_norm": 0.12374580651521683, "learning_rate": 9.07379441574175e-06, "loss": 0.0044, "step": 33880 }, { "epoch": 0.5545283481960239, "grad_norm": 0.044838760048151016, "learning_rate": 9.072966349946881e-06, "loss": 0.0023, "step": 33890 }, { "epoch": 0.5546919741470997, "grad_norm": 0.14062732458114624, "learning_rate": 9.072137951974736e-06, "loss": 0.002, "step": 33900 }, { "epoch": 0.5548556000981756, "grad_norm": 0.126304030418396, "learning_rate": 9.071309221892873e-06, "loss": 0.003, "step": 33910 }, { "epoch": 0.5550192260492514, "grad_norm": 0.06975863873958588, "learning_rate": 9.07048015976888e-06, "loss": 0.002, "step": 33920 }, { "epoch": 0.5551828520003272, "grad_norm": 0.3728843331336975, "learning_rate": 9.069650765670376e-06, "loss": 0.0032, "step": 33930 }, { "epoch": 0.5553464779514031, "grad_norm": 0.06448273360729218, "learning_rate": 9.068821039665e-06, "loss": 0.0029, "step": 33940 }, { "epoch": 0.5555101039024789, "grad_norm": 0.027964968234300613, "learning_rate": 9.067990981820427e-06, "loss": 0.0043, "step": 33950 }, { "epoch": 0.5556737298535548, "grad_norm": 0.12232799082994461, "learning_rate": 9.067160592204346e-06, "loss": 0.0029, "step": 33960 }, { "epoch": 0.5558373558046306, "grad_norm": 0.0456613190472126, "learning_rate": 9.066329870884488e-06, "loss": 0.0036, "step": 33970 }, { "epoch": 0.5560009817557064, "grad_norm": 0.06744259595870972, "learning_rate": 9.0654988179286e-06, "loss": 0.0025, "step": 33980 }, { "epoch": 0.5561646077067823, "grad_norm": 0.08431285619735718, "learning_rate": 9.06466743340446e-06, "loss": 0.0036, "step": 33990 }, { "epoch": 0.5563282336578581, "grad_norm": 0.01698785275220871, "learning_rate": 9.063835717379872e-06, "loss": 0.003, "step": 34000 }, { "epoch": 0.556491859608934, "grad_norm": 0.02922319620847702, "learning_rate": 9.06300366992267e-06, "loss": 0.0026, "step": 34010 }, { "epoch": 0.5566554855600098, "grad_norm": 0.050951965153217316, "learning_rate": 9.062171291100713e-06, "loss": 0.0024, "step": 34020 }, { "epoch": 0.5568191115110857, "grad_norm": 0.05026979371905327, "learning_rate": 9.061338580981884e-06, "loss": 0.0027, "step": 34030 }, { "epoch": 0.5569827374621615, "grad_norm": 0.16412629187107086, "learning_rate": 9.0605055396341e-06, "loss": 0.0028, "step": 34040 }, { "epoch": 0.5571463634132373, "grad_norm": 0.16496610641479492, "learning_rate": 9.0596721671253e-06, "loss": 0.0033, "step": 34050 }, { "epoch": 0.5573099893643132, "grad_norm": 0.0889497920870781, "learning_rate": 9.05883846352345e-06, "loss": 0.0019, "step": 34060 }, { "epoch": 0.557473615315389, "grad_norm": 0.14715588092803955, "learning_rate": 9.058004428896544e-06, "loss": 0.0024, "step": 34070 }, { "epoch": 0.5576372412664649, "grad_norm": 0.07355787605047226, "learning_rate": 9.0571700633126e-06, "loss": 0.0035, "step": 34080 }, { "epoch": 0.5578008672175407, "grad_norm": 0.127385675907135, "learning_rate": 9.056335366839674e-06, "loss": 0.0033, "step": 34090 }, { "epoch": 0.5579644931686165, "grad_norm": 0.1759384125471115, "learning_rate": 9.055500339545834e-06, "loss": 0.003, "step": 34100 }, { "epoch": 0.5581281191196924, "grad_norm": 0.06901757419109344, "learning_rate": 9.054664981499185e-06, "loss": 0.003, "step": 34110 }, { "epoch": 0.5582917450707682, "grad_norm": 0.04467601329088211, "learning_rate": 9.053829292767855e-06, "loss": 0.0032, "step": 34120 }, { "epoch": 0.5584553710218441, "grad_norm": 0.08457455039024353, "learning_rate": 9.05299327342e-06, "loss": 0.002, "step": 34130 }, { "epoch": 0.55861899697292, "grad_norm": 0.18473650515079498, "learning_rate": 9.052156923523803e-06, "loss": 0.0039, "step": 34140 }, { "epoch": 0.5587826229239957, "grad_norm": 0.05865251272916794, "learning_rate": 9.051320243147474e-06, "loss": 0.0025, "step": 34150 }, { "epoch": 0.5589462488750716, "grad_norm": 0.07468479126691818, "learning_rate": 9.050483232359251e-06, "loss": 0.002, "step": 34160 }, { "epoch": 0.5591098748261474, "grad_norm": 0.0697217732667923, "learning_rate": 9.049645891227395e-06, "loss": 0.002, "step": 34170 }, { "epoch": 0.5592735007772233, "grad_norm": 0.1778220534324646, "learning_rate": 9.0488082198202e-06, "loss": 0.0034, "step": 34180 }, { "epoch": 0.5594371267282992, "grad_norm": 0.018835963681340218, "learning_rate": 9.04797021820598e-06, "loss": 0.003, "step": 34190 }, { "epoch": 0.5596007526793749, "grad_norm": 0.06329817324876785, "learning_rate": 9.047131886453081e-06, "loss": 0.0033, "step": 34200 }, { "epoch": 0.5597643786304508, "grad_norm": 0.11150910705327988, "learning_rate": 9.046293224629876e-06, "loss": 0.003, "step": 34210 }, { "epoch": 0.5599280045815266, "grad_norm": 0.06717385351657867, "learning_rate": 9.045454232804764e-06, "loss": 0.002, "step": 34220 }, { "epoch": 0.5600916305326025, "grad_norm": 0.04742787778377533, "learning_rate": 9.044614911046166e-06, "loss": 0.0019, "step": 34230 }, { "epoch": 0.5602552564836784, "grad_norm": 0.13319995999336243, "learning_rate": 9.043775259422539e-06, "loss": 0.0027, "step": 34240 }, { "epoch": 0.5604188824347541, "grad_norm": 0.02570258267223835, "learning_rate": 9.042935278002356e-06, "loss": 0.0015, "step": 34250 }, { "epoch": 0.56058250838583, "grad_norm": 0.17978332936763763, "learning_rate": 9.042094966854131e-06, "loss": 0.0028, "step": 34260 }, { "epoch": 0.5607461343369058, "grad_norm": 0.06665828824043274, "learning_rate": 9.041254326046392e-06, "loss": 0.0029, "step": 34270 }, { "epoch": 0.5609097602879817, "grad_norm": 0.1326732039451599, "learning_rate": 9.040413355647697e-06, "loss": 0.0023, "step": 34280 }, { "epoch": 0.5610733862390576, "grad_norm": 0.05947272479534149, "learning_rate": 9.039572055726637e-06, "loss": 0.0037, "step": 34290 }, { "epoch": 0.5612370121901333, "grad_norm": 0.20495085418224335, "learning_rate": 9.038730426351826e-06, "loss": 0.0038, "step": 34300 }, { "epoch": 0.5614006381412092, "grad_norm": 0.4255751967430115, "learning_rate": 9.0378884675919e-06, "loss": 0.0024, "step": 34310 }, { "epoch": 0.561564264092285, "grad_norm": 0.10219269245862961, "learning_rate": 9.037046179515529e-06, "loss": 0.004, "step": 34320 }, { "epoch": 0.5617278900433609, "grad_norm": 0.06360520422458649, "learning_rate": 9.036203562191408e-06, "loss": 0.0029, "step": 34330 }, { "epoch": 0.5618915159944368, "grad_norm": 0.1153009831905365, "learning_rate": 9.035360615688255e-06, "loss": 0.0039, "step": 34340 }, { "epoch": 0.5620551419455125, "grad_norm": 0.11293741315603256, "learning_rate": 9.034517340074822e-06, "loss": 0.0042, "step": 34350 }, { "epoch": 0.5622187678965884, "grad_norm": 0.15900881588459015, "learning_rate": 9.03367373541988e-06, "loss": 0.0026, "step": 34360 }, { "epoch": 0.5623823938476642, "grad_norm": 0.03355743736028671, "learning_rate": 9.032829801792232e-06, "loss": 0.0017, "step": 34370 }, { "epoch": 0.5625460197987401, "grad_norm": 0.14817260205745697, "learning_rate": 9.031985539260705e-06, "loss": 0.0041, "step": 34380 }, { "epoch": 0.562709645749816, "grad_norm": 0.18024475872516632, "learning_rate": 9.031140947894158e-06, "loss": 0.0059, "step": 34390 }, { "epoch": 0.5628732717008917, "grad_norm": 0.037742141634225845, "learning_rate": 9.030296027761469e-06, "loss": 0.0017, "step": 34400 }, { "epoch": 0.5630368976519676, "grad_norm": 0.08214405179023743, "learning_rate": 9.029450778931548e-06, "loss": 0.0016, "step": 34410 }, { "epoch": 0.5632005236030434, "grad_norm": 0.10488860309123993, "learning_rate": 9.028605201473331e-06, "loss": 0.0027, "step": 34420 }, { "epoch": 0.5633641495541193, "grad_norm": 0.07780135422945023, "learning_rate": 9.02775929545578e-06, "loss": 0.0026, "step": 34430 }, { "epoch": 0.5635277755051952, "grad_norm": 0.15384942293167114, "learning_rate": 9.026913060947887e-06, "loss": 0.0027, "step": 34440 }, { "epoch": 0.563691401456271, "grad_norm": 0.06369028985500336, "learning_rate": 9.026066498018661e-06, "loss": 0.0023, "step": 34450 }, { "epoch": 0.5638550274073468, "grad_norm": 0.2127196192741394, "learning_rate": 9.025219606737152e-06, "loss": 0.0045, "step": 34460 }, { "epoch": 0.5640186533584226, "grad_norm": 0.08209564536809921, "learning_rate": 9.024372387172426e-06, "loss": 0.0033, "step": 34470 }, { "epoch": 0.5641822793094985, "grad_norm": 0.04950880631804466, "learning_rate": 9.023524839393582e-06, "loss": 0.0029, "step": 34480 }, { "epoch": 0.5643459052605744, "grad_norm": 0.09371622651815414, "learning_rate": 9.02267696346974e-06, "loss": 0.004, "step": 34490 }, { "epoch": 0.5645095312116502, "grad_norm": 0.14973004162311554, "learning_rate": 9.021828759470051e-06, "loss": 0.0034, "step": 34500 }, { "epoch": 0.564673157162726, "grad_norm": 0.13089285790920258, "learning_rate": 9.020980227463692e-06, "loss": 0.0018, "step": 34510 }, { "epoch": 0.5648367831138018, "grad_norm": 0.029335087165236473, "learning_rate": 9.020131367519866e-06, "loss": 0.003, "step": 34520 }, { "epoch": 0.5650004090648777, "grad_norm": 0.1268162578344345, "learning_rate": 9.019282179707805e-06, "loss": 0.0034, "step": 34530 }, { "epoch": 0.5651640350159536, "grad_norm": 0.03817616030573845, "learning_rate": 9.01843266409676e-06, "loss": 0.002, "step": 34540 }, { "epoch": 0.5653276609670294, "grad_norm": 0.022283917292952538, "learning_rate": 9.017582820756024e-06, "loss": 0.0023, "step": 34550 }, { "epoch": 0.5654912869181052, "grad_norm": 0.12352630496025085, "learning_rate": 9.016732649754898e-06, "loss": 0.0039, "step": 34560 }, { "epoch": 0.565654912869181, "grad_norm": 0.1744757443666458, "learning_rate": 9.015882151162727e-06, "loss": 0.0036, "step": 34570 }, { "epoch": 0.5658185388202569, "grad_norm": 0.07029528170824051, "learning_rate": 9.015031325048869e-06, "loss": 0.0026, "step": 34580 }, { "epoch": 0.5659821647713328, "grad_norm": 0.12010376155376434, "learning_rate": 9.014180171482716e-06, "loss": 0.0023, "step": 34590 }, { "epoch": 0.5661457907224086, "grad_norm": 0.13346995413303375, "learning_rate": 9.013328690533685e-06, "loss": 0.0033, "step": 34600 }, { "epoch": 0.5663094166734844, "grad_norm": 0.17702341079711914, "learning_rate": 9.012476882271222e-06, "loss": 0.0025, "step": 34610 }, { "epoch": 0.5664730426245602, "grad_norm": 0.03770411014556885, "learning_rate": 9.011624746764796e-06, "loss": 0.0057, "step": 34620 }, { "epoch": 0.5666366685756361, "grad_norm": 0.047059137374162674, "learning_rate": 9.010772284083904e-06, "loss": 0.0016, "step": 34630 }, { "epoch": 0.566800294526712, "grad_norm": 0.28435850143432617, "learning_rate": 9.009919494298069e-06, "loss": 0.0034, "step": 34640 }, { "epoch": 0.5669639204777878, "grad_norm": 0.0524299219250679, "learning_rate": 9.009066377476845e-06, "loss": 0.0022, "step": 34650 }, { "epoch": 0.5671275464288636, "grad_norm": 0.022958291694521904, "learning_rate": 9.008212933689806e-06, "loss": 0.0027, "step": 34660 }, { "epoch": 0.5672911723799394, "grad_norm": 0.050073932856321335, "learning_rate": 9.007359163006558e-06, "loss": 0.0038, "step": 34670 }, { "epoch": 0.5674547983310153, "grad_norm": 0.0715407058596611, "learning_rate": 9.00650506549673e-06, "loss": 0.0029, "step": 34680 }, { "epoch": 0.5676184242820912, "grad_norm": 0.27536875009536743, "learning_rate": 9.005650641229981e-06, "loss": 0.0026, "step": 34690 }, { "epoch": 0.567782050233167, "grad_norm": 0.10513325780630112, "learning_rate": 9.004795890275995e-06, "loss": 0.0025, "step": 34700 }, { "epoch": 0.5679456761842429, "grad_norm": 0.125313863158226, "learning_rate": 9.00394081270448e-06, "loss": 0.0024, "step": 34710 }, { "epoch": 0.5681093021353186, "grad_norm": 0.09218619018793106, "learning_rate": 9.003085408585176e-06, "loss": 0.0022, "step": 34720 }, { "epoch": 0.5682729280863945, "grad_norm": 0.13089098036289215, "learning_rate": 9.002229677987845e-06, "loss": 0.0018, "step": 34730 }, { "epoch": 0.5684365540374703, "grad_norm": 0.020297177135944366, "learning_rate": 9.001373620982279e-06, "loss": 0.0037, "step": 34740 }, { "epoch": 0.5686001799885462, "grad_norm": 0.10721893608570099, "learning_rate": 9.000517237638293e-06, "loss": 0.0021, "step": 34750 }, { "epoch": 0.5687638059396221, "grad_norm": 0.12039229273796082, "learning_rate": 8.999660528025733e-06, "loss": 0.0026, "step": 34760 }, { "epoch": 0.5689274318906978, "grad_norm": 0.055747658014297485, "learning_rate": 8.998803492214468e-06, "loss": 0.0042, "step": 34770 }, { "epoch": 0.5690910578417737, "grad_norm": 0.05545589327812195, "learning_rate": 8.997946130274396e-06, "loss": 0.0043, "step": 34780 }, { "epoch": 0.5692546837928495, "grad_norm": 0.1255505383014679, "learning_rate": 8.997088442275439e-06, "loss": 0.0044, "step": 34790 }, { "epoch": 0.5694183097439254, "grad_norm": 0.07918395847082138, "learning_rate": 8.996230428287548e-06, "loss": 0.004, "step": 34800 }, { "epoch": 0.5695819356950013, "grad_norm": 0.17006701231002808, "learning_rate": 8.9953720883807e-06, "loss": 0.003, "step": 34810 }, { "epoch": 0.569745561646077, "grad_norm": 0.1715926080942154, "learning_rate": 8.994513422624898e-06, "loss": 0.0035, "step": 34820 }, { "epoch": 0.5699091875971529, "grad_norm": 0.19922161102294922, "learning_rate": 8.99365443109017e-06, "loss": 0.0037, "step": 34830 }, { "epoch": 0.5700728135482287, "grad_norm": 0.12470365315675735, "learning_rate": 8.992795113846577e-06, "loss": 0.0035, "step": 34840 }, { "epoch": 0.5702364394993046, "grad_norm": 0.07749177515506744, "learning_rate": 8.991935470964198e-06, "loss": 0.0021, "step": 34850 }, { "epoch": 0.5704000654503805, "grad_norm": 0.12300921231508255, "learning_rate": 8.991075502513144e-06, "loss": 0.0024, "step": 34860 }, { "epoch": 0.5705636914014562, "grad_norm": 0.19126859307289124, "learning_rate": 8.990215208563549e-06, "loss": 0.005, "step": 34870 }, { "epoch": 0.5707273173525321, "grad_norm": 0.23512768745422363, "learning_rate": 8.98935458918558e-06, "loss": 0.0039, "step": 34880 }, { "epoch": 0.5708909433036079, "grad_norm": 0.19217929244041443, "learning_rate": 8.988493644449424e-06, "loss": 0.0032, "step": 34890 }, { "epoch": 0.5710545692546838, "grad_norm": 0.14963099360466003, "learning_rate": 8.987632374425293e-06, "loss": 0.005, "step": 34900 }, { "epoch": 0.5712181952057597, "grad_norm": 0.03155124559998512, "learning_rate": 8.986770779183434e-06, "loss": 0.0038, "step": 34910 }, { "epoch": 0.5713818211568354, "grad_norm": 0.14544332027435303, "learning_rate": 8.985908858794115e-06, "loss": 0.002, "step": 34920 }, { "epoch": 0.5715454471079113, "grad_norm": 0.1308814287185669, "learning_rate": 8.985046613327631e-06, "loss": 0.0032, "step": 34930 }, { "epoch": 0.5717090730589871, "grad_norm": 0.2157951295375824, "learning_rate": 8.984184042854303e-06, "loss": 0.0046, "step": 34940 }, { "epoch": 0.571872699010063, "grad_norm": 0.10986444354057312, "learning_rate": 8.98332114744448e-06, "loss": 0.0023, "step": 34950 }, { "epoch": 0.5720363249611389, "grad_norm": 0.1494085192680359, "learning_rate": 8.982457927168537e-06, "loss": 0.0041, "step": 34960 }, { "epoch": 0.5721999509122146, "grad_norm": 0.10345122218132019, "learning_rate": 8.981594382096875e-06, "loss": 0.0037, "step": 34970 }, { "epoch": 0.5723635768632905, "grad_norm": 0.022296082228422165, "learning_rate": 8.980730512299922e-06, "loss": 0.0026, "step": 34980 }, { "epoch": 0.5725272028143663, "grad_norm": 0.19852212071418762, "learning_rate": 8.97986631784813e-06, "loss": 0.0025, "step": 34990 }, { "epoch": 0.5726908287654422, "grad_norm": 0.17523986101150513, "learning_rate": 8.979001798811984e-06, "loss": 0.0043, "step": 35000 }, { "epoch": 0.5728544547165181, "grad_norm": 0.03879449516534805, "learning_rate": 8.978136955261989e-06, "loss": 0.0024, "step": 35010 }, { "epoch": 0.5730180806675939, "grad_norm": 0.21252746880054474, "learning_rate": 8.977271787268678e-06, "loss": 0.0035, "step": 35020 }, { "epoch": 0.5731817066186697, "grad_norm": 0.025494545698165894, "learning_rate": 8.97640629490261e-06, "loss": 0.002, "step": 35030 }, { "epoch": 0.5733453325697455, "grad_norm": 0.07665806263685226, "learning_rate": 8.975540478234377e-06, "loss": 0.0018, "step": 35040 }, { "epoch": 0.5735089585208214, "grad_norm": 0.07661059498786926, "learning_rate": 8.974674337334586e-06, "loss": 0.0027, "step": 35050 }, { "epoch": 0.5736725844718973, "grad_norm": 0.09893879294395447, "learning_rate": 8.97380787227388e-06, "loss": 0.0029, "step": 35060 }, { "epoch": 0.5738362104229731, "grad_norm": 0.1142156571149826, "learning_rate": 8.972941083122923e-06, "loss": 0.0037, "step": 35070 }, { "epoch": 0.5739998363740489, "grad_norm": 0.036033470183610916, "learning_rate": 8.972073969952411e-06, "loss": 0.0029, "step": 35080 }, { "epoch": 0.5741634623251247, "grad_norm": 0.17171558737754822, "learning_rate": 8.971206532833058e-06, "loss": 0.0036, "step": 35090 }, { "epoch": 0.5743270882762006, "grad_norm": 0.08280320465564728, "learning_rate": 8.970338771835612e-06, "loss": 0.0034, "step": 35100 }, { "epoch": 0.5744907142272765, "grad_norm": 0.09014665335416794, "learning_rate": 8.969470687030843e-06, "loss": 0.0025, "step": 35110 }, { "epoch": 0.5746543401783523, "grad_norm": 0.15907633304595947, "learning_rate": 8.96860227848955e-06, "loss": 0.0029, "step": 35120 }, { "epoch": 0.5748179661294281, "grad_norm": 0.10765702277421951, "learning_rate": 8.96773354628256e-06, "loss": 0.002, "step": 35130 }, { "epoch": 0.5749815920805039, "grad_norm": 0.0546712689101696, "learning_rate": 8.96686449048072e-06, "loss": 0.0028, "step": 35140 }, { "epoch": 0.5751452180315798, "grad_norm": 0.1573525071144104, "learning_rate": 8.965995111154909e-06, "loss": 0.0023, "step": 35150 }, { "epoch": 0.5753088439826557, "grad_norm": 0.11107318848371506, "learning_rate": 8.965125408376029e-06, "loss": 0.0022, "step": 35160 }, { "epoch": 0.5754724699337315, "grad_norm": 0.0553484745323658, "learning_rate": 8.964255382215012e-06, "loss": 0.0032, "step": 35170 }, { "epoch": 0.5756360958848074, "grad_norm": 0.16430622339248657, "learning_rate": 8.963385032742813e-06, "loss": 0.0016, "step": 35180 }, { "epoch": 0.5757997218358831, "grad_norm": 0.15215294063091278, "learning_rate": 8.962514360030417e-06, "loss": 0.0055, "step": 35190 }, { "epoch": 0.575963347786959, "grad_norm": 0.10163141787052155, "learning_rate": 8.961643364148829e-06, "loss": 0.003, "step": 35200 }, { "epoch": 0.5761269737380349, "grad_norm": 0.16122987866401672, "learning_rate": 8.960772045169088e-06, "loss": 0.0046, "step": 35210 }, { "epoch": 0.5762905996891107, "grad_norm": 0.15351475775241852, "learning_rate": 8.959900403162255e-06, "loss": 0.0034, "step": 35220 }, { "epoch": 0.5764542256401866, "grad_norm": 0.06374000012874603, "learning_rate": 8.959028438199417e-06, "loss": 0.0043, "step": 35230 }, { "epoch": 0.5766178515912623, "grad_norm": 0.04834722727537155, "learning_rate": 8.958156150351692e-06, "loss": 0.0029, "step": 35240 }, { "epoch": 0.5767814775423382, "grad_norm": 0.04215531051158905, "learning_rate": 8.957283539690215e-06, "loss": 0.0028, "step": 35250 }, { "epoch": 0.5769451034934141, "grad_norm": 0.04755663871765137, "learning_rate": 8.956410606286157e-06, "loss": 0.0019, "step": 35260 }, { "epoch": 0.5771087294444899, "grad_norm": 0.11351222544908524, "learning_rate": 8.955537350210712e-06, "loss": 0.0018, "step": 35270 }, { "epoch": 0.5772723553955658, "grad_norm": 0.1154792532324791, "learning_rate": 8.954663771535098e-06, "loss": 0.0025, "step": 35280 }, { "epoch": 0.5774359813466415, "grad_norm": 0.00735984742641449, "learning_rate": 8.953789870330562e-06, "loss": 0.0022, "step": 35290 }, { "epoch": 0.5775996072977174, "grad_norm": 0.08282400667667389, "learning_rate": 8.952915646668376e-06, "loss": 0.0017, "step": 35300 }, { "epoch": 0.5777632332487933, "grad_norm": 0.10006248205900192, "learning_rate": 8.952041100619837e-06, "loss": 0.0026, "step": 35310 }, { "epoch": 0.5779268591998691, "grad_norm": 0.04867241159081459, "learning_rate": 8.951166232256273e-06, "loss": 0.0032, "step": 35320 }, { "epoch": 0.578090485150945, "grad_norm": 0.18728525936603546, "learning_rate": 8.950291041649037e-06, "loss": 0.0035, "step": 35330 }, { "epoch": 0.5782541111020207, "grad_norm": 0.13466301560401917, "learning_rate": 8.9494155288695e-06, "loss": 0.0037, "step": 35340 }, { "epoch": 0.5784177370530966, "grad_norm": 0.1026085615158081, "learning_rate": 8.948539693989072e-06, "loss": 0.0036, "step": 35350 }, { "epoch": 0.5785813630041725, "grad_norm": 0.15514378249645233, "learning_rate": 8.947663537079178e-06, "loss": 0.0023, "step": 35360 }, { "epoch": 0.5787449889552483, "grad_norm": 0.15846118330955505, "learning_rate": 8.946787058211279e-06, "loss": 0.0033, "step": 35370 }, { "epoch": 0.5789086149063242, "grad_norm": 0.2537915110588074, "learning_rate": 8.945910257456856e-06, "loss": 0.0033, "step": 35380 }, { "epoch": 0.5790722408574, "grad_norm": 0.09212382137775421, "learning_rate": 8.945033134887417e-06, "loss": 0.0046, "step": 35390 }, { "epoch": 0.5792358668084758, "grad_norm": 0.11593269556760788, "learning_rate": 8.944155690574497e-06, "loss": 0.0053, "step": 35400 }, { "epoch": 0.5793994927595517, "grad_norm": 0.11310001462697983, "learning_rate": 8.943277924589659e-06, "loss": 0.0041, "step": 35410 }, { "epoch": 0.5795631187106275, "grad_norm": 0.006186493672430515, "learning_rate": 8.942399837004489e-06, "loss": 0.0027, "step": 35420 }, { "epoch": 0.5797267446617034, "grad_norm": 0.07787127792835236, "learning_rate": 8.9415214278906e-06, "loss": 0.0025, "step": 35430 }, { "epoch": 0.5798903706127791, "grad_norm": 0.17375636100769043, "learning_rate": 8.940642697319637e-06, "loss": 0.0038, "step": 35440 }, { "epoch": 0.580053996563855, "grad_norm": 0.1743973195552826, "learning_rate": 8.939763645363262e-06, "loss": 0.0032, "step": 35450 }, { "epoch": 0.5802176225149309, "grad_norm": 0.24443970620632172, "learning_rate": 8.938884272093166e-06, "loss": 0.003, "step": 35460 }, { "epoch": 0.5803812484660067, "grad_norm": 0.03497815132141113, "learning_rate": 8.938004577581071e-06, "loss": 0.0033, "step": 35470 }, { "epoch": 0.5805448744170826, "grad_norm": 0.11990688741207123, "learning_rate": 8.937124561898723e-06, "loss": 0.0028, "step": 35480 }, { "epoch": 0.5807085003681584, "grad_norm": 0.05437789112329483, "learning_rate": 8.936244225117887e-06, "loss": 0.0028, "step": 35490 }, { "epoch": 0.5808721263192342, "grad_norm": 0.06166590005159378, "learning_rate": 8.935363567310367e-06, "loss": 0.0021, "step": 35500 }, { "epoch": 0.5810357522703101, "grad_norm": 0.005976386368274689, "learning_rate": 8.934482588547983e-06, "loss": 0.0019, "step": 35510 }, { "epoch": 0.5811993782213859, "grad_norm": 0.0694926455616951, "learning_rate": 8.933601288902587e-06, "loss": 0.003, "step": 35520 }, { "epoch": 0.5813630041724618, "grad_norm": 0.06724761426448822, "learning_rate": 8.93271966844605e-06, "loss": 0.0033, "step": 35530 }, { "epoch": 0.5815266301235376, "grad_norm": 0.07907678186893463, "learning_rate": 8.93183772725028e-06, "loss": 0.0028, "step": 35540 }, { "epoch": 0.5816902560746134, "grad_norm": 0.023092232644557953, "learning_rate": 8.930955465387201e-06, "loss": 0.0025, "step": 35550 }, { "epoch": 0.5818538820256893, "grad_norm": 0.18264669179916382, "learning_rate": 8.930072882928768e-06, "loss": 0.0018, "step": 35560 }, { "epoch": 0.5820175079767651, "grad_norm": 0.09096872061491013, "learning_rate": 8.929189979946964e-06, "loss": 0.0031, "step": 35570 }, { "epoch": 0.582181133927841, "grad_norm": 0.03712155669927597, "learning_rate": 8.92830675651379e-06, "loss": 0.0033, "step": 35580 }, { "epoch": 0.5823447598789168, "grad_norm": 0.08800095319747925, "learning_rate": 8.927423212701284e-06, "loss": 0.0024, "step": 35590 }, { "epoch": 0.5825083858299926, "grad_norm": 0.13426180183887482, "learning_rate": 8.926539348581505e-06, "loss": 0.0043, "step": 35600 }, { "epoch": 0.5826720117810685, "grad_norm": 0.04330554977059364, "learning_rate": 8.925655164226534e-06, "loss": 0.0028, "step": 35610 }, { "epoch": 0.5828356377321443, "grad_norm": 0.03764757513999939, "learning_rate": 8.924770659708483e-06, "loss": 0.0032, "step": 35620 }, { "epoch": 0.5829992636832202, "grad_norm": 0.10116902738809586, "learning_rate": 8.923885835099493e-06, "loss": 0.0042, "step": 35630 }, { "epoch": 0.583162889634296, "grad_norm": 0.23714792728424072, "learning_rate": 8.923000690471723e-06, "loss": 0.0043, "step": 35640 }, { "epoch": 0.5833265155853719, "grad_norm": 0.1362658441066742, "learning_rate": 8.922115225897363e-06, "loss": 0.0035, "step": 35650 }, { "epoch": 0.5834901415364476, "grad_norm": 0.061279796063899994, "learning_rate": 8.921229441448632e-06, "loss": 0.0035, "step": 35660 }, { "epoch": 0.5836537674875235, "grad_norm": 0.04157334193587303, "learning_rate": 8.920343337197766e-06, "loss": 0.0023, "step": 35670 }, { "epoch": 0.5838173934385994, "grad_norm": 0.06357670575380325, "learning_rate": 8.919456913217037e-06, "loss": 0.0022, "step": 35680 }, { "epoch": 0.5839810193896752, "grad_norm": 0.03231954947113991, "learning_rate": 8.918570169578736e-06, "loss": 0.0027, "step": 35690 }, { "epoch": 0.5841446453407511, "grad_norm": 0.01109381765127182, "learning_rate": 8.917683106355186e-06, "loss": 0.0007, "step": 35700 }, { "epoch": 0.5843082712918268, "grad_norm": 0.12514245510101318, "learning_rate": 8.91679572361873e-06, "loss": 0.0038, "step": 35710 }, { "epoch": 0.5844718972429027, "grad_norm": 0.1741524487733841, "learning_rate": 8.91590802144174e-06, "loss": 0.0026, "step": 35720 }, { "epoch": 0.5846355231939786, "grad_norm": 0.190440371632576, "learning_rate": 8.915019999896616e-06, "loss": 0.0029, "step": 35730 }, { "epoch": 0.5847991491450544, "grad_norm": 0.05949242040514946, "learning_rate": 8.91413165905578e-06, "loss": 0.0025, "step": 35740 }, { "epoch": 0.5849627750961303, "grad_norm": 0.08088551461696625, "learning_rate": 8.913242998991684e-06, "loss": 0.0024, "step": 35750 }, { "epoch": 0.585126401047206, "grad_norm": 0.13037364184856415, "learning_rate": 8.912354019776804e-06, "loss": 0.0073, "step": 35760 }, { "epoch": 0.5852900269982819, "grad_norm": 0.05738003924489021, "learning_rate": 8.911464721483638e-06, "loss": 0.0041, "step": 35770 }, { "epoch": 0.5854536529493578, "grad_norm": 0.05892040207982063, "learning_rate": 8.91057510418472e-06, "loss": 0.0027, "step": 35780 }, { "epoch": 0.5856172789004336, "grad_norm": 0.042426131665706635, "learning_rate": 8.9096851679526e-06, "loss": 0.0025, "step": 35790 }, { "epoch": 0.5857809048515095, "grad_norm": 0.18582364916801453, "learning_rate": 8.908794912859859e-06, "loss": 0.0035, "step": 35800 }, { "epoch": 0.5859445308025852, "grad_norm": 0.07756080478429794, "learning_rate": 8.907904338979105e-06, "loss": 0.0026, "step": 35810 }, { "epoch": 0.5861081567536611, "grad_norm": 0.10745445638895035, "learning_rate": 8.90701344638297e-06, "loss": 0.0031, "step": 35820 }, { "epoch": 0.586271782704737, "grad_norm": 0.08886758238077164, "learning_rate": 8.90612223514411e-06, "loss": 0.0014, "step": 35830 }, { "epoch": 0.5864354086558128, "grad_norm": 0.08024553209543228, "learning_rate": 8.905230705335209e-06, "loss": 0.0022, "step": 35840 }, { "epoch": 0.5865990346068887, "grad_norm": 0.015699956566095352, "learning_rate": 8.90433885702898e-06, "loss": 0.0017, "step": 35850 }, { "epoch": 0.5867626605579644, "grad_norm": 0.02287263423204422, "learning_rate": 8.903446690298156e-06, "loss": 0.0045, "step": 35860 }, { "epoch": 0.5869262865090403, "grad_norm": 0.021680114790797234, "learning_rate": 8.902554205215503e-06, "loss": 0.0017, "step": 35870 }, { "epoch": 0.5870899124601162, "grad_norm": 0.14445845782756805, "learning_rate": 8.901661401853806e-06, "loss": 0.0025, "step": 35880 }, { "epoch": 0.587253538411192, "grad_norm": 0.14091873168945312, "learning_rate": 8.900768280285878e-06, "loss": 0.0035, "step": 35890 }, { "epoch": 0.5874171643622679, "grad_norm": 0.005799212027341127, "learning_rate": 8.899874840584561e-06, "loss": 0.0019, "step": 35900 }, { "epoch": 0.5875807903133436, "grad_norm": 0.08871165663003922, "learning_rate": 8.898981082822723e-06, "loss": 0.002, "step": 35910 }, { "epoch": 0.5877444162644195, "grad_norm": 0.062440838664770126, "learning_rate": 8.89808700707325e-06, "loss": 0.0027, "step": 35920 }, { "epoch": 0.5879080422154954, "grad_norm": 0.0891856923699379, "learning_rate": 8.897192613409065e-06, "loss": 0.002, "step": 35930 }, { "epoch": 0.5880716681665712, "grad_norm": 0.2031840682029724, "learning_rate": 8.896297901903108e-06, "loss": 0.0031, "step": 35940 }, { "epoch": 0.5882352941176471, "grad_norm": 0.09049496799707413, "learning_rate": 8.895402872628352e-06, "loss": 0.0027, "step": 35950 }, { "epoch": 0.5883989200687229, "grad_norm": 0.11323459446430206, "learning_rate": 8.894507525657792e-06, "loss": 0.0023, "step": 35960 }, { "epoch": 0.5885625460197987, "grad_norm": 0.04053853824734688, "learning_rate": 8.893611861064447e-06, "loss": 0.0025, "step": 35970 }, { "epoch": 0.5887261719708746, "grad_norm": 0.1629210114479065, "learning_rate": 8.892715878921366e-06, "loss": 0.003, "step": 35980 }, { "epoch": 0.5888897979219504, "grad_norm": 0.06607711315155029, "learning_rate": 8.891819579301623e-06, "loss": 0.0033, "step": 35990 }, { "epoch": 0.5890534238730263, "grad_norm": 0.08085120469331741, "learning_rate": 8.890922962278315e-06, "loss": 0.0022, "step": 36000 }, { "epoch": 0.5892170498241021, "grad_norm": 0.07256773114204407, "learning_rate": 8.89002602792457e-06, "loss": 0.003, "step": 36010 }, { "epoch": 0.5893806757751779, "grad_norm": 0.10660307109355927, "learning_rate": 8.889128776313536e-06, "loss": 0.0025, "step": 36020 }, { "epoch": 0.5895443017262538, "grad_norm": 0.11291348189115524, "learning_rate": 8.88823120751839e-06, "loss": 0.0029, "step": 36030 }, { "epoch": 0.5897079276773296, "grad_norm": 0.03786439076066017, "learning_rate": 8.887333321612338e-06, "loss": 0.003, "step": 36040 }, { "epoch": 0.5898715536284055, "grad_norm": 0.13428544998168945, "learning_rate": 8.886435118668604e-06, "loss": 0.0031, "step": 36050 }, { "epoch": 0.5900351795794813, "grad_norm": 0.11745154112577438, "learning_rate": 8.885536598760445e-06, "loss": 0.003, "step": 36060 }, { "epoch": 0.5901988055305571, "grad_norm": 0.09285806119441986, "learning_rate": 8.884637761961142e-06, "loss": 0.0026, "step": 36070 }, { "epoch": 0.590362431481633, "grad_norm": 0.07872680574655533, "learning_rate": 8.883738608343997e-06, "loss": 0.003, "step": 36080 }, { "epoch": 0.5905260574327088, "grad_norm": 0.12723150849342346, "learning_rate": 8.882839137982347e-06, "loss": 0.002, "step": 36090 }, { "epoch": 0.5906896833837847, "grad_norm": 0.03077523224055767, "learning_rate": 8.881939350949547e-06, "loss": 0.0022, "step": 36100 }, { "epoch": 0.5908533093348605, "grad_norm": 0.15075184404850006, "learning_rate": 8.88103924731898e-06, "loss": 0.0031, "step": 36110 }, { "epoch": 0.5910169352859364, "grad_norm": 0.06483548879623413, "learning_rate": 8.880138827164057e-06, "loss": 0.0019, "step": 36120 }, { "epoch": 0.5911805612370122, "grad_norm": 0.07136178016662598, "learning_rate": 8.879238090558214e-06, "loss": 0.002, "step": 36130 }, { "epoch": 0.591344187188088, "grad_norm": 0.12176304310560226, "learning_rate": 8.87833703757491e-06, "loss": 0.0033, "step": 36140 }, { "epoch": 0.5915078131391639, "grad_norm": 0.12080062180757523, "learning_rate": 8.877435668287633e-06, "loss": 0.0016, "step": 36150 }, { "epoch": 0.5916714390902397, "grad_norm": 0.045237571001052856, "learning_rate": 8.876533982769893e-06, "loss": 0.0029, "step": 36160 }, { "epoch": 0.5918350650413156, "grad_norm": 0.05660109594464302, "learning_rate": 8.875631981095232e-06, "loss": 0.0025, "step": 36170 }, { "epoch": 0.5919986909923914, "grad_norm": 0.1006564348936081, "learning_rate": 8.874729663337213e-06, "loss": 0.0042, "step": 36180 }, { "epoch": 0.5921623169434672, "grad_norm": 0.14676658809185028, "learning_rate": 8.873827029569424e-06, "loss": 0.0033, "step": 36190 }, { "epoch": 0.5923259428945431, "grad_norm": 0.027155034244060516, "learning_rate": 8.872924079865483e-06, "loss": 0.002, "step": 36200 }, { "epoch": 0.5924895688456189, "grad_norm": 0.1798582375049591, "learning_rate": 8.87202081429903e-06, "loss": 0.0035, "step": 36210 }, { "epoch": 0.5926531947966948, "grad_norm": 0.1418972611427307, "learning_rate": 8.871117232943737e-06, "loss": 0.0037, "step": 36220 }, { "epoch": 0.5928168207477706, "grad_norm": 0.05614809691905975, "learning_rate": 8.87021333587329e-06, "loss": 0.0026, "step": 36230 }, { "epoch": 0.5929804466988464, "grad_norm": 0.055441975593566895, "learning_rate": 8.86930912316141e-06, "loss": 0.0049, "step": 36240 }, { "epoch": 0.5931440726499223, "grad_norm": 0.053149133920669556, "learning_rate": 8.868404594881846e-06, "loss": 0.0032, "step": 36250 }, { "epoch": 0.5933076986009981, "grad_norm": 0.09360457956790924, "learning_rate": 8.867499751108362e-06, "loss": 0.002, "step": 36260 }, { "epoch": 0.593471324552074, "grad_norm": 0.13754481077194214, "learning_rate": 8.866594591914756e-06, "loss": 0.0028, "step": 36270 }, { "epoch": 0.5936349505031498, "grad_norm": 0.2108798772096634, "learning_rate": 8.865689117374853e-06, "loss": 0.0023, "step": 36280 }, { "epoch": 0.5937985764542256, "grad_norm": 0.05230475962162018, "learning_rate": 8.864783327562496e-06, "loss": 0.002, "step": 36290 }, { "epoch": 0.5939622024053015, "grad_norm": 0.055224016308784485, "learning_rate": 8.86387722255156e-06, "loss": 0.0025, "step": 36300 }, { "epoch": 0.5941258283563773, "grad_norm": 0.09527760744094849, "learning_rate": 8.862970802415945e-06, "loss": 0.004, "step": 36310 }, { "epoch": 0.5942894543074532, "grad_norm": 0.07829606533050537, "learning_rate": 8.862064067229573e-06, "loss": 0.0022, "step": 36320 }, { "epoch": 0.594453080258529, "grad_norm": 0.08064805716276169, "learning_rate": 8.861157017066396e-06, "loss": 0.0042, "step": 36330 }, { "epoch": 0.5946167062096048, "grad_norm": 0.18253670632839203, "learning_rate": 8.86024965200039e-06, "loss": 0.0037, "step": 36340 }, { "epoch": 0.5947803321606807, "grad_norm": 0.06974855065345764, "learning_rate": 8.859341972105555e-06, "loss": 0.0029, "step": 36350 }, { "epoch": 0.5949439581117565, "grad_norm": 0.14941421151161194, "learning_rate": 8.858433977455921e-06, "loss": 0.0026, "step": 36360 }, { "epoch": 0.5951075840628324, "grad_norm": 0.2193268984556198, "learning_rate": 8.857525668125538e-06, "loss": 0.0035, "step": 36370 }, { "epoch": 0.5952712100139083, "grad_norm": 0.032499201595783234, "learning_rate": 8.856617044188486e-06, "loss": 0.0021, "step": 36380 }, { "epoch": 0.595434835964984, "grad_norm": 0.06605914235115051, "learning_rate": 8.855708105718869e-06, "loss": 0.0021, "step": 36390 }, { "epoch": 0.5955984619160599, "grad_norm": 0.13698311150074005, "learning_rate": 8.854798852790818e-06, "loss": 0.0041, "step": 36400 }, { "epoch": 0.5957620878671357, "grad_norm": 0.2649853825569153, "learning_rate": 8.853889285478487e-06, "loss": 0.0031, "step": 36410 }, { "epoch": 0.5959257138182116, "grad_norm": 0.09784290939569473, "learning_rate": 8.852979403856059e-06, "loss": 0.0032, "step": 36420 }, { "epoch": 0.5960893397692875, "grad_norm": 0.02051716484129429, "learning_rate": 8.852069207997737e-06, "loss": 0.0038, "step": 36430 }, { "epoch": 0.5962529657203632, "grad_norm": 0.05615180358290672, "learning_rate": 8.85115869797776e-06, "loss": 0.0042, "step": 36440 }, { "epoch": 0.5964165916714391, "grad_norm": 0.04665743559598923, "learning_rate": 8.85024787387038e-06, "loss": 0.0044, "step": 36450 }, { "epoch": 0.5965802176225149, "grad_norm": 0.0609181746840477, "learning_rate": 8.849336735749884e-06, "loss": 0.0016, "step": 36460 }, { "epoch": 0.5967438435735908, "grad_norm": 0.03353914991021156, "learning_rate": 8.84842528369058e-06, "loss": 0.0016, "step": 36470 }, { "epoch": 0.5969074695246667, "grad_norm": 0.03775521740317345, "learning_rate": 8.847513517766804e-06, "loss": 0.0015, "step": 36480 }, { "epoch": 0.5970710954757424, "grad_norm": 0.11112702637910843, "learning_rate": 8.846601438052915e-06, "loss": 0.0027, "step": 36490 }, { "epoch": 0.5972347214268183, "grad_norm": 0.09484587609767914, "learning_rate": 8.8456890446233e-06, "loss": 0.0019, "step": 36500 }, { "epoch": 0.5973983473778941, "grad_norm": 0.08690627664327621, "learning_rate": 8.844776337552372e-06, "loss": 0.0033, "step": 36510 }, { "epoch": 0.59756197332897, "grad_norm": 0.1479829102754593, "learning_rate": 8.843863316914566e-06, "loss": 0.0025, "step": 36520 }, { "epoch": 0.5977255992800458, "grad_norm": 0.10502468794584274, "learning_rate": 8.842949982784346e-06, "loss": 0.0021, "step": 36530 }, { "epoch": 0.5978892252311216, "grad_norm": 0.06019517034292221, "learning_rate": 8.842036335236199e-06, "loss": 0.0032, "step": 36540 }, { "epoch": 0.5980528511821975, "grad_norm": 0.26551783084869385, "learning_rate": 8.841122374344642e-06, "loss": 0.0046, "step": 36550 }, { "epoch": 0.5982164771332733, "grad_norm": 0.11244402080774307, "learning_rate": 8.840208100184212e-06, "loss": 0.0031, "step": 36560 }, { "epoch": 0.5983801030843492, "grad_norm": 0.06438885629177094, "learning_rate": 8.839293512829474e-06, "loss": 0.0022, "step": 36570 }, { "epoch": 0.598543729035425, "grad_norm": 0.0021785805001854897, "learning_rate": 8.83837861235502e-06, "loss": 0.004, "step": 36580 }, { "epoch": 0.5987073549865009, "grad_norm": 0.1145516037940979, "learning_rate": 8.837463398835467e-06, "loss": 0.0021, "step": 36590 }, { "epoch": 0.5988709809375767, "grad_norm": 0.050754331052303314, "learning_rate": 8.836547872345454e-06, "loss": 0.0038, "step": 36600 }, { "epoch": 0.5990346068886525, "grad_norm": 0.11124119162559509, "learning_rate": 8.835632032959651e-06, "loss": 0.0032, "step": 36610 }, { "epoch": 0.5991982328397284, "grad_norm": 0.0438012033700943, "learning_rate": 8.834715880752747e-06, "loss": 0.0023, "step": 36620 }, { "epoch": 0.5993618587908042, "grad_norm": 0.020244888961315155, "learning_rate": 8.833799415799464e-06, "loss": 0.0027, "step": 36630 }, { "epoch": 0.59952548474188, "grad_norm": 0.13828010857105255, "learning_rate": 8.832882638174545e-06, "loss": 0.0031, "step": 36640 }, { "epoch": 0.5996891106929559, "grad_norm": 0.13835541903972626, "learning_rate": 8.83196554795276e-06, "loss": 0.0021, "step": 36650 }, { "epoch": 0.5998527366440317, "grad_norm": 0.20447050034999847, "learning_rate": 8.831048145208898e-06, "loss": 0.003, "step": 36660 }, { "epoch": 0.6000163625951076, "grad_norm": 0.07340088486671448, "learning_rate": 8.830130430017788e-06, "loss": 0.004, "step": 36670 }, { "epoch": 0.6001799885461834, "grad_norm": 0.09831058979034424, "learning_rate": 8.829212402454269e-06, "loss": 0.0032, "step": 36680 }, { "epoch": 0.6003436144972593, "grad_norm": 0.09342899918556213, "learning_rate": 8.828294062593214e-06, "loss": 0.0028, "step": 36690 }, { "epoch": 0.6005072404483351, "grad_norm": 0.05146569013595581, "learning_rate": 8.827375410509522e-06, "loss": 0.0035, "step": 36700 }, { "epoch": 0.6006708663994109, "grad_norm": 0.02984634041786194, "learning_rate": 8.826456446278111e-06, "loss": 0.0039, "step": 36710 }, { "epoch": 0.6008344923504868, "grad_norm": 0.09399478137493134, "learning_rate": 8.825537169973932e-06, "loss": 0.0021, "step": 36720 }, { "epoch": 0.6009981183015626, "grad_norm": 0.02587178163230419, "learning_rate": 8.824617581671958e-06, "loss": 0.0039, "step": 36730 }, { "epoch": 0.6011617442526385, "grad_norm": 0.07559508085250854, "learning_rate": 8.823697681447186e-06, "loss": 0.002, "step": 36740 }, { "epoch": 0.6013253702037143, "grad_norm": 0.06608793139457703, "learning_rate": 8.822777469374641e-06, "loss": 0.0029, "step": 36750 }, { "epoch": 0.6014889961547901, "grad_norm": 0.09661190211772919, "learning_rate": 8.821856945529371e-06, "loss": 0.0032, "step": 36760 }, { "epoch": 0.601652622105866, "grad_norm": 0.09360766410827637, "learning_rate": 8.820936109986451e-06, "loss": 0.003, "step": 36770 }, { "epoch": 0.6018162480569418, "grad_norm": 0.08921334147453308, "learning_rate": 8.820014962820984e-06, "loss": 0.0036, "step": 36780 }, { "epoch": 0.6019798740080177, "grad_norm": 0.12837113440036774, "learning_rate": 8.819093504108092e-06, "loss": 0.0027, "step": 36790 }, { "epoch": 0.6021434999590936, "grad_norm": 0.04904981702566147, "learning_rate": 8.818171733922927e-06, "loss": 0.0015, "step": 36800 }, { "epoch": 0.6023071259101693, "grad_norm": 0.13305571675300598, "learning_rate": 8.817249652340667e-06, "loss": 0.0021, "step": 36810 }, { "epoch": 0.6024707518612452, "grad_norm": 0.20025058090686798, "learning_rate": 8.816327259436513e-06, "loss": 0.0024, "step": 36820 }, { "epoch": 0.602634377812321, "grad_norm": 0.1698462963104248, "learning_rate": 8.81540455528569e-06, "loss": 0.0029, "step": 36830 }, { "epoch": 0.6027980037633969, "grad_norm": 0.031062902882695198, "learning_rate": 8.814481539963456e-06, "loss": 0.0055, "step": 36840 }, { "epoch": 0.6029616297144728, "grad_norm": 0.06259023398160934, "learning_rate": 8.813558213545082e-06, "loss": 0.001, "step": 36850 }, { "epoch": 0.6031252556655485, "grad_norm": 0.17906297743320465, "learning_rate": 8.812634576105877e-06, "loss": 0.0043, "step": 36860 }, { "epoch": 0.6032888816166244, "grad_norm": 0.03559622913599014, "learning_rate": 8.811710627721167e-06, "loss": 0.0042, "step": 36870 }, { "epoch": 0.6034525075677002, "grad_norm": 0.05674935504794121, "learning_rate": 8.810786368466307e-06, "loss": 0.0046, "step": 36880 }, { "epoch": 0.6036161335187761, "grad_norm": 0.08643285930156708, "learning_rate": 8.809861798416675e-06, "loss": 0.0035, "step": 36890 }, { "epoch": 0.603779759469852, "grad_norm": 0.11587459594011307, "learning_rate": 8.808936917647678e-06, "loss": 0.0037, "step": 36900 }, { "epoch": 0.6039433854209277, "grad_norm": 0.07755623012781143, "learning_rate": 8.808011726234746e-06, "loss": 0.0034, "step": 36910 }, { "epoch": 0.6041070113720036, "grad_norm": 0.08583834767341614, "learning_rate": 8.807086224253332e-06, "loss": 0.0028, "step": 36920 }, { "epoch": 0.6042706373230794, "grad_norm": 0.08574008196592331, "learning_rate": 8.806160411778919e-06, "loss": 0.0036, "step": 36930 }, { "epoch": 0.6044342632741553, "grad_norm": 0.1612904965877533, "learning_rate": 8.805234288887012e-06, "loss": 0.0037, "step": 36940 }, { "epoch": 0.6045978892252312, "grad_norm": 0.038598284125328064, "learning_rate": 8.804307855653145e-06, "loss": 0.0029, "step": 36950 }, { "epoch": 0.6047615151763069, "grad_norm": 0.2639501690864563, "learning_rate": 8.80338111215287e-06, "loss": 0.0019, "step": 36960 }, { "epoch": 0.6049251411273828, "grad_norm": 0.10830695927143097, "learning_rate": 8.802454058461774e-06, "loss": 0.0045, "step": 36970 }, { "epoch": 0.6050887670784586, "grad_norm": 0.05082109943032265, "learning_rate": 8.80152669465546e-06, "loss": 0.0042, "step": 36980 }, { "epoch": 0.6052523930295345, "grad_norm": 0.14712199568748474, "learning_rate": 8.800599020809566e-06, "loss": 0.0019, "step": 36990 }, { "epoch": 0.6054160189806104, "grad_norm": 0.10536909103393555, "learning_rate": 8.799671036999743e-06, "loss": 0.0027, "step": 37000 }, { "epoch": 0.6055796449316861, "grad_norm": 0.09069158136844635, "learning_rate": 8.79874274330168e-06, "loss": 0.0033, "step": 37010 }, { "epoch": 0.605743270882762, "grad_norm": 0.050764184445142746, "learning_rate": 8.797814139791081e-06, "loss": 0.0029, "step": 37020 }, { "epoch": 0.6059068968338378, "grad_norm": 0.06032281741499901, "learning_rate": 8.796885226543682e-06, "loss": 0.0023, "step": 37030 }, { "epoch": 0.6060705227849137, "grad_norm": 0.07672061771154404, "learning_rate": 8.795956003635245e-06, "loss": 0.0022, "step": 37040 }, { "epoch": 0.6062341487359896, "grad_norm": 0.06515415012836456, "learning_rate": 8.79502647114155e-06, "loss": 0.0077, "step": 37050 }, { "epoch": 0.6063977746870653, "grad_norm": 0.2183670997619629, "learning_rate": 8.794096629138407e-06, "loss": 0.0046, "step": 37060 }, { "epoch": 0.6065614006381412, "grad_norm": 0.06741929799318314, "learning_rate": 8.793166477701653e-06, "loss": 0.0032, "step": 37070 }, { "epoch": 0.606725026589217, "grad_norm": 0.19403107464313507, "learning_rate": 8.792236016907146e-06, "loss": 0.0038, "step": 37080 }, { "epoch": 0.6068886525402929, "grad_norm": 0.17053139209747314, "learning_rate": 8.79130524683077e-06, "loss": 0.0017, "step": 37090 }, { "epoch": 0.6070522784913688, "grad_norm": 0.04111277312040329, "learning_rate": 8.790374167548438e-06, "loss": 0.0022, "step": 37100 }, { "epoch": 0.6072159044424446, "grad_norm": 0.08792674541473389, "learning_rate": 8.789442779136086e-06, "loss": 0.0036, "step": 37110 }, { "epoch": 0.6073795303935204, "grad_norm": 0.13975588977336884, "learning_rate": 8.788511081669676e-06, "loss": 0.0022, "step": 37120 }, { "epoch": 0.6075431563445962, "grad_norm": 0.060710933059453964, "learning_rate": 8.787579075225187e-06, "loss": 0.0024, "step": 37130 }, { "epoch": 0.6077067822956721, "grad_norm": 0.048313673585653305, "learning_rate": 8.786646759878638e-06, "loss": 0.0019, "step": 37140 }, { "epoch": 0.607870408246748, "grad_norm": 0.05173991620540619, "learning_rate": 8.785714135706062e-06, "loss": 0.0041, "step": 37150 }, { "epoch": 0.6080340341978238, "grad_norm": 0.05603951960802078, "learning_rate": 8.784781202783522e-06, "loss": 0.0026, "step": 37160 }, { "epoch": 0.6081976601488996, "grad_norm": 0.050557494163513184, "learning_rate": 8.783847961187102e-06, "loss": 0.0022, "step": 37170 }, { "epoch": 0.6083612860999754, "grad_norm": 0.06078165024518967, "learning_rate": 8.782914410992917e-06, "loss": 0.0039, "step": 37180 }, { "epoch": 0.6085249120510513, "grad_norm": 0.23364116251468658, "learning_rate": 8.781980552277103e-06, "loss": 0.0032, "step": 37190 }, { "epoch": 0.6086885380021272, "grad_norm": 0.09005658328533173, "learning_rate": 8.781046385115822e-06, "loss": 0.0023, "step": 37200 }, { "epoch": 0.608852163953203, "grad_norm": 0.03304097428917885, "learning_rate": 8.780111909585262e-06, "loss": 0.0029, "step": 37210 }, { "epoch": 0.6090157899042788, "grad_norm": 0.23793883621692657, "learning_rate": 8.779177125761637e-06, "loss": 0.0084, "step": 37220 }, { "epoch": 0.6091794158553546, "grad_norm": 0.18885046243667603, "learning_rate": 8.778242033721185e-06, "loss": 0.0023, "step": 37230 }, { "epoch": 0.6093430418064305, "grad_norm": 0.1231994703412056, "learning_rate": 8.777306633540164e-06, "loss": 0.0027, "step": 37240 }, { "epoch": 0.6095066677575064, "grad_norm": 0.1364826112985611, "learning_rate": 8.776370925294867e-06, "loss": 0.0026, "step": 37250 }, { "epoch": 0.6096702937085822, "grad_norm": 0.051186852157115936, "learning_rate": 8.775434909061606e-06, "loss": 0.0024, "step": 37260 }, { "epoch": 0.609833919659658, "grad_norm": 0.31306907534599304, "learning_rate": 8.774498584916718e-06, "loss": 0.0049, "step": 37270 }, { "epoch": 0.6099975456107338, "grad_norm": 0.032567791640758514, "learning_rate": 8.773561952936569e-06, "loss": 0.0022, "step": 37280 }, { "epoch": 0.6101611715618097, "grad_norm": 0.05053863301873207, "learning_rate": 8.772625013197544e-06, "loss": 0.0037, "step": 37290 }, { "epoch": 0.6103247975128856, "grad_norm": 0.029592325910925865, "learning_rate": 8.77168776577606e-06, "loss": 0.002, "step": 37300 }, { "epoch": 0.6104884234639614, "grad_norm": 0.27198657393455505, "learning_rate": 8.770750210748554e-06, "loss": 0.0021, "step": 37310 }, { "epoch": 0.6106520494150373, "grad_norm": 0.35949841141700745, "learning_rate": 8.769812348191489e-06, "loss": 0.0033, "step": 37320 }, { "epoch": 0.610815675366113, "grad_norm": 0.17255106568336487, "learning_rate": 8.768874178181355e-06, "loss": 0.002, "step": 37330 }, { "epoch": 0.6109793013171889, "grad_norm": 0.10463345050811768, "learning_rate": 8.767935700794666e-06, "loss": 0.0042, "step": 37340 }, { "epoch": 0.6111429272682648, "grad_norm": 0.05498625338077545, "learning_rate": 8.766996916107962e-06, "loss": 0.0032, "step": 37350 }, { "epoch": 0.6113065532193406, "grad_norm": 0.21080386638641357, "learning_rate": 8.766057824197804e-06, "loss": 0.0036, "step": 37360 }, { "epoch": 0.6114701791704165, "grad_norm": 0.03491256386041641, "learning_rate": 8.765118425140784e-06, "loss": 0.0023, "step": 37370 }, { "epoch": 0.6116338051214922, "grad_norm": 0.08403094857931137, "learning_rate": 8.764178719013516e-06, "loss": 0.0031, "step": 37380 }, { "epoch": 0.6117974310725681, "grad_norm": 0.045884132385253906, "learning_rate": 8.763238705892638e-06, "loss": 0.0019, "step": 37390 }, { "epoch": 0.6119610570236439, "grad_norm": 0.06711523234844208, "learning_rate": 8.762298385854814e-06, "loss": 0.0013, "step": 37400 }, { "epoch": 0.6121246829747198, "grad_norm": 0.13800066709518433, "learning_rate": 8.761357758976737e-06, "loss": 0.0029, "step": 37410 }, { "epoch": 0.6122883089257957, "grad_norm": 0.07132541388273239, "learning_rate": 8.760416825335115e-06, "loss": 0.0021, "step": 37420 }, { "epoch": 0.6124519348768714, "grad_norm": 0.06595374643802643, "learning_rate": 8.759475585006691e-06, "loss": 0.0034, "step": 37430 }, { "epoch": 0.6126155608279473, "grad_norm": 0.1668674647808075, "learning_rate": 8.758534038068231e-06, "loss": 0.0022, "step": 37440 }, { "epoch": 0.6127791867790231, "grad_norm": 0.06871428340673447, "learning_rate": 8.757592184596522e-06, "loss": 0.0018, "step": 37450 }, { "epoch": 0.612942812730099, "grad_norm": 0.07337944954633713, "learning_rate": 8.756650024668378e-06, "loss": 0.004, "step": 37460 }, { "epoch": 0.6131064386811749, "grad_norm": 0.13682547211647034, "learning_rate": 8.75570755836064e-06, "loss": 0.002, "step": 37470 }, { "epoch": 0.6132700646322506, "grad_norm": 0.20227354764938354, "learning_rate": 8.754764785750172e-06, "loss": 0.0025, "step": 37480 }, { "epoch": 0.6134336905833265, "grad_norm": 0.14616483449935913, "learning_rate": 8.753821706913862e-06, "loss": 0.0035, "step": 37490 }, { "epoch": 0.6135973165344023, "grad_norm": 0.11802705377340317, "learning_rate": 8.752878321928626e-06, "loss": 0.0029, "step": 37500 }, { "epoch": 0.6137609424854782, "grad_norm": 0.05202655866742134, "learning_rate": 8.751934630871401e-06, "loss": 0.0039, "step": 37510 }, { "epoch": 0.6139245684365541, "grad_norm": 0.20219677686691284, "learning_rate": 8.750990633819155e-06, "loss": 0.0051, "step": 37520 }, { "epoch": 0.6140881943876298, "grad_norm": 0.0687403529882431, "learning_rate": 8.750046330848872e-06, "loss": 0.0018, "step": 37530 }, { "epoch": 0.6142518203387057, "grad_norm": 0.012231512926518917, "learning_rate": 8.749101722037571e-06, "loss": 0.0043, "step": 37540 }, { "epoch": 0.6144154462897815, "grad_norm": 0.13095110654830933, "learning_rate": 8.748156807462289e-06, "loss": 0.0018, "step": 37550 }, { "epoch": 0.6145790722408574, "grad_norm": 0.0067818365059792995, "learning_rate": 8.747211587200088e-06, "loss": 0.0022, "step": 37560 }, { "epoch": 0.6147426981919333, "grad_norm": 0.0933321937918663, "learning_rate": 8.746266061328062e-06, "loss": 0.0024, "step": 37570 }, { "epoch": 0.614906324143009, "grad_norm": 0.10935989767313004, "learning_rate": 8.74532022992332e-06, "loss": 0.0032, "step": 37580 }, { "epoch": 0.6150699500940849, "grad_norm": 0.11650828272104263, "learning_rate": 8.744374093063004e-06, "loss": 0.0042, "step": 37590 }, { "epoch": 0.6152335760451607, "grad_norm": 0.07681674510240555, "learning_rate": 8.743427650824276e-06, "loss": 0.0072, "step": 37600 }, { "epoch": 0.6153972019962366, "grad_norm": 0.020434940233826637, "learning_rate": 8.742480903284326e-06, "loss": 0.002, "step": 37610 }, { "epoch": 0.6155608279473125, "grad_norm": 0.12207379192113876, "learning_rate": 8.741533850520364e-06, "loss": 0.0029, "step": 37620 }, { "epoch": 0.6157244538983883, "grad_norm": 0.03350047022104263, "learning_rate": 8.740586492609633e-06, "loss": 0.0021, "step": 37630 }, { "epoch": 0.6158880798494641, "grad_norm": 0.1759958118200302, "learning_rate": 8.739638829629394e-06, "loss": 0.0036, "step": 37640 }, { "epoch": 0.6160517058005399, "grad_norm": 0.08769656717777252, "learning_rate": 8.738690861656933e-06, "loss": 0.003, "step": 37650 }, { "epoch": 0.6162153317516158, "grad_norm": 0.15247301757335663, "learning_rate": 8.737742588769569e-06, "loss": 0.0024, "step": 37660 }, { "epoch": 0.6163789577026917, "grad_norm": 0.08341974765062332, "learning_rate": 8.736794011044633e-06, "loss": 0.0028, "step": 37670 }, { "epoch": 0.6165425836537675, "grad_norm": 0.11260994523763657, "learning_rate": 8.735845128559493e-06, "loss": 0.0035, "step": 37680 }, { "epoch": 0.6167062096048433, "grad_norm": 0.09610302001237869, "learning_rate": 8.734895941391535e-06, "loss": 0.0023, "step": 37690 }, { "epoch": 0.6168698355559191, "grad_norm": 0.1326465606689453, "learning_rate": 8.733946449618172e-06, "loss": 0.002, "step": 37700 }, { "epoch": 0.617033461506995, "grad_norm": 0.05639144778251648, "learning_rate": 8.732996653316839e-06, "loss": 0.0028, "step": 37710 }, { "epoch": 0.6171970874580709, "grad_norm": 0.3435528874397278, "learning_rate": 8.732046552565e-06, "loss": 0.0028, "step": 37720 }, { "epoch": 0.6173607134091467, "grad_norm": 0.1259031444787979, "learning_rate": 8.731096147440141e-06, "loss": 0.0032, "step": 37730 }, { "epoch": 0.6175243393602226, "grad_norm": 0.03697971999645233, "learning_rate": 8.730145438019776e-06, "loss": 0.0044, "step": 37740 }, { "epoch": 0.6176879653112983, "grad_norm": 0.1517125815153122, "learning_rate": 8.72919442438144e-06, "loss": 0.0029, "step": 37750 }, { "epoch": 0.6178515912623742, "grad_norm": 0.0528755821287632, "learning_rate": 8.728243106602694e-06, "loss": 0.0025, "step": 37760 }, { "epoch": 0.6180152172134501, "grad_norm": 0.1169549971818924, "learning_rate": 8.727291484761124e-06, "loss": 0.0042, "step": 37770 }, { "epoch": 0.6181788431645259, "grad_norm": 0.15145055949687958, "learning_rate": 8.726339558934343e-06, "loss": 0.0035, "step": 37780 }, { "epoch": 0.6183424691156018, "grad_norm": 0.026678087189793587, "learning_rate": 8.725387329199986e-06, "loss": 0.0016, "step": 37790 }, { "epoch": 0.6185060950666775, "grad_norm": 0.003591701854020357, "learning_rate": 8.724434795635712e-06, "loss": 0.0029, "step": 37800 }, { "epoch": 0.6186697210177534, "grad_norm": 0.02403920330107212, "learning_rate": 8.723481958319209e-06, "loss": 0.0024, "step": 37810 }, { "epoch": 0.6188333469688293, "grad_norm": 0.10099305212497711, "learning_rate": 8.722528817328186e-06, "loss": 0.0048, "step": 37820 }, { "epoch": 0.6189969729199051, "grad_norm": 0.07914862036705017, "learning_rate": 8.721575372740377e-06, "loss": 0.003, "step": 37830 }, { "epoch": 0.619160598870981, "grad_norm": 0.29134613275527954, "learning_rate": 8.720621624633542e-06, "loss": 0.0041, "step": 37840 }, { "epoch": 0.6193242248220567, "grad_norm": 0.09700335562229156, "learning_rate": 8.719667573085467e-06, "loss": 0.0041, "step": 37850 }, { "epoch": 0.6194878507731326, "grad_norm": 0.17290009558200836, "learning_rate": 8.718713218173958e-06, "loss": 0.0041, "step": 37860 }, { "epoch": 0.6196514767242085, "grad_norm": 0.10613647103309631, "learning_rate": 8.717758559976853e-06, "loss": 0.003, "step": 37870 }, { "epoch": 0.6198151026752843, "grad_norm": 0.048731859773397446, "learning_rate": 8.716803598572008e-06, "loss": 0.0019, "step": 37880 }, { "epoch": 0.6199787286263602, "grad_norm": 0.34867143630981445, "learning_rate": 8.715848334037307e-06, "loss": 0.0043, "step": 37890 }, { "epoch": 0.6201423545774359, "grad_norm": 0.048855897039175034, "learning_rate": 8.71489276645066e-06, "loss": 0.0024, "step": 37900 }, { "epoch": 0.6203059805285118, "grad_norm": 0.09153824299573898, "learning_rate": 8.713936895889997e-06, "loss": 0.0034, "step": 37910 }, { "epoch": 0.6204696064795877, "grad_norm": 0.0667981207370758, "learning_rate": 8.712980722433277e-06, "loss": 0.0027, "step": 37920 }, { "epoch": 0.6206332324306635, "grad_norm": 0.21692980825901031, "learning_rate": 8.712024246158482e-06, "loss": 0.0032, "step": 37930 }, { "epoch": 0.6207968583817394, "grad_norm": 0.10792932659387589, "learning_rate": 8.711067467143617e-06, "loss": 0.0059, "step": 37940 }, { "epoch": 0.6209604843328151, "grad_norm": 0.07672310620546341, "learning_rate": 8.710110385466717e-06, "loss": 0.0025, "step": 37950 }, { "epoch": 0.621124110283891, "grad_norm": 0.6200510263442993, "learning_rate": 8.709153001205837e-06, "loss": 0.0034, "step": 37960 }, { "epoch": 0.6212877362349669, "grad_norm": 0.15146096050739288, "learning_rate": 8.70819531443906e-06, "loss": 0.002, "step": 37970 }, { "epoch": 0.6214513621860427, "grad_norm": 0.12634432315826416, "learning_rate": 8.707237325244487e-06, "loss": 0.002, "step": 37980 }, { "epoch": 0.6216149881371186, "grad_norm": 0.10400105267763138, "learning_rate": 8.70627903370025e-06, "loss": 0.0032, "step": 37990 }, { "epoch": 0.6217786140881943, "grad_norm": 0.06751594692468643, "learning_rate": 8.705320439884505e-06, "loss": 0.0019, "step": 38000 }, { "epoch": 0.6219422400392702, "grad_norm": 0.04412994533777237, "learning_rate": 8.704361543875433e-06, "loss": 0.0026, "step": 38010 }, { "epoch": 0.6221058659903461, "grad_norm": 0.06331221759319305, "learning_rate": 8.703402345751237e-06, "loss": 0.0019, "step": 38020 }, { "epoch": 0.6222694919414219, "grad_norm": 0.07288952171802521, "learning_rate": 8.702442845590145e-06, "loss": 0.0027, "step": 38030 }, { "epoch": 0.6224331178924978, "grad_norm": 0.06311749666929245, "learning_rate": 8.701483043470412e-06, "loss": 0.0033, "step": 38040 }, { "epoch": 0.6225967438435736, "grad_norm": 0.029867738485336304, "learning_rate": 8.700522939470313e-06, "loss": 0.0036, "step": 38050 }, { "epoch": 0.6227603697946494, "grad_norm": 0.04987639933824539, "learning_rate": 8.699562533668155e-06, "loss": 0.0018, "step": 38060 }, { "epoch": 0.6229239957457253, "grad_norm": 0.056613337248563766, "learning_rate": 8.698601826142265e-06, "loss": 0.0021, "step": 38070 }, { "epoch": 0.6230876216968011, "grad_norm": 0.11939631402492523, "learning_rate": 8.697640816970993e-06, "loss": 0.0025, "step": 38080 }, { "epoch": 0.623251247647877, "grad_norm": 0.015964725986123085, "learning_rate": 8.696679506232714e-06, "loss": 0.003, "step": 38090 }, { "epoch": 0.6234148735989528, "grad_norm": 0.1405145823955536, "learning_rate": 8.695717894005836e-06, "loss": 0.0023, "step": 38100 }, { "epoch": 0.6235784995500286, "grad_norm": 0.30327147245407104, "learning_rate": 8.694755980368778e-06, "loss": 0.0051, "step": 38110 }, { "epoch": 0.6237421255011045, "grad_norm": 0.06534552574157715, "learning_rate": 8.693793765399993e-06, "loss": 0.0019, "step": 38120 }, { "epoch": 0.6239057514521803, "grad_norm": 0.0863557904958725, "learning_rate": 8.692831249177956e-06, "loss": 0.0017, "step": 38130 }, { "epoch": 0.6240693774032562, "grad_norm": 0.1365835964679718, "learning_rate": 8.691868431781167e-06, "loss": 0.0032, "step": 38140 }, { "epoch": 0.624233003354332, "grad_norm": 0.0490381121635437, "learning_rate": 8.69090531328815e-06, "loss": 0.0039, "step": 38150 }, { "epoch": 0.6243966293054078, "grad_norm": 0.04853466898202896, "learning_rate": 8.689941893777452e-06, "loss": 0.0047, "step": 38160 }, { "epoch": 0.6245602552564837, "grad_norm": 0.04490708187222481, "learning_rate": 8.688978173327646e-06, "loss": 0.0025, "step": 38170 }, { "epoch": 0.6247238812075595, "grad_norm": 0.11523409932851791, "learning_rate": 8.688014152017332e-06, "loss": 0.0031, "step": 38180 }, { "epoch": 0.6248875071586354, "grad_norm": 0.008040662854909897, "learning_rate": 8.687049829925134e-06, "loss": 0.0036, "step": 38190 }, { "epoch": 0.6250511331097112, "grad_norm": 0.1238519474864006, "learning_rate": 8.686085207129693e-06, "loss": 0.0022, "step": 38200 }, { "epoch": 0.625214759060787, "grad_norm": 0.0984954684972763, "learning_rate": 8.685120283709687e-06, "loss": 0.0021, "step": 38210 }, { "epoch": 0.6253783850118629, "grad_norm": 0.051364999264478683, "learning_rate": 8.684155059743806e-06, "loss": 0.0026, "step": 38220 }, { "epoch": 0.6255420109629387, "grad_norm": 0.09169857949018478, "learning_rate": 8.683189535310774e-06, "loss": 0.0018, "step": 38230 }, { "epoch": 0.6257056369140146, "grad_norm": 0.015738578513264656, "learning_rate": 8.682223710489333e-06, "loss": 0.0022, "step": 38240 }, { "epoch": 0.6258692628650904, "grad_norm": 0.26920127868652344, "learning_rate": 8.681257585358256e-06, "loss": 0.0025, "step": 38250 }, { "epoch": 0.6260328888161663, "grad_norm": 0.0809926837682724, "learning_rate": 8.680291159996334e-06, "loss": 0.0026, "step": 38260 }, { "epoch": 0.626196514767242, "grad_norm": 0.06103767827153206, "learning_rate": 8.679324434482388e-06, "loss": 0.0021, "step": 38270 }, { "epoch": 0.6263601407183179, "grad_norm": 0.19600960612297058, "learning_rate": 8.678357408895257e-06, "loss": 0.0037, "step": 38280 }, { "epoch": 0.6265237666693938, "grad_norm": 0.008998398669064045, "learning_rate": 8.677390083313812e-06, "loss": 0.0029, "step": 38290 }, { "epoch": 0.6266873926204696, "grad_norm": 0.05034337192773819, "learning_rate": 8.676422457816945e-06, "loss": 0.0022, "step": 38300 }, { "epoch": 0.6268510185715455, "grad_norm": 0.19930779933929443, "learning_rate": 8.675454532483569e-06, "loss": 0.0024, "step": 38310 }, { "epoch": 0.6270146445226212, "grad_norm": 0.2219306379556656, "learning_rate": 8.674486307392625e-06, "loss": 0.0043, "step": 38320 }, { "epoch": 0.6271782704736971, "grad_norm": 0.08216582983732224, "learning_rate": 8.67351778262308e-06, "loss": 0.0025, "step": 38330 }, { "epoch": 0.627341896424773, "grad_norm": 0.024125108495354652, "learning_rate": 8.672548958253925e-06, "loss": 0.0021, "step": 38340 }, { "epoch": 0.6275055223758488, "grad_norm": 0.06840266287326813, "learning_rate": 8.67157983436417e-06, "loss": 0.002, "step": 38350 }, { "epoch": 0.6276691483269247, "grad_norm": 0.03997747227549553, "learning_rate": 8.670610411032857e-06, "loss": 0.0043, "step": 38360 }, { "epoch": 0.6278327742780004, "grad_norm": 0.15534232556819916, "learning_rate": 8.669640688339046e-06, "loss": 0.0023, "step": 38370 }, { "epoch": 0.6279964002290763, "grad_norm": 0.1500731110572815, "learning_rate": 8.668670666361828e-06, "loss": 0.0021, "step": 38380 }, { "epoch": 0.6281600261801522, "grad_norm": 0.03249631077051163, "learning_rate": 8.667700345180309e-06, "loss": 0.0019, "step": 38390 }, { "epoch": 0.628323652131228, "grad_norm": 0.09533066302537918, "learning_rate": 8.66672972487363e-06, "loss": 0.0022, "step": 38400 }, { "epoch": 0.6284872780823039, "grad_norm": 0.07553412765264511, "learning_rate": 8.66575880552095e-06, "loss": 0.0034, "step": 38410 }, { "epoch": 0.6286509040333796, "grad_norm": 0.04963284730911255, "learning_rate": 8.664787587201454e-06, "loss": 0.0041, "step": 38420 }, { "epoch": 0.6288145299844555, "grad_norm": 0.15100255608558655, "learning_rate": 8.663816069994351e-06, "loss": 0.0029, "step": 38430 }, { "epoch": 0.6289781559355314, "grad_norm": 0.021357066929340363, "learning_rate": 8.662844253978873e-06, "loss": 0.0027, "step": 38440 }, { "epoch": 0.6291417818866072, "grad_norm": 0.09207446128129959, "learning_rate": 8.661872139234282e-06, "loss": 0.0037, "step": 38450 }, { "epoch": 0.6293054078376831, "grad_norm": 0.08249254524707794, "learning_rate": 8.660899725839857e-06, "loss": 0.0023, "step": 38460 }, { "epoch": 0.6294690337887588, "grad_norm": 0.08996037393808365, "learning_rate": 8.659927013874907e-06, "loss": 0.003, "step": 38470 }, { "epoch": 0.6296326597398347, "grad_norm": 0.03326552361249924, "learning_rate": 8.658954003418761e-06, "loss": 0.0027, "step": 38480 }, { "epoch": 0.6297962856909106, "grad_norm": 0.05299806967377663, "learning_rate": 8.657980694550777e-06, "loss": 0.0021, "step": 38490 }, { "epoch": 0.6299599116419864, "grad_norm": 0.03193550929427147, "learning_rate": 8.65700708735033e-06, "loss": 0.0019, "step": 38500 }, { "epoch": 0.6301235375930623, "grad_norm": 0.07028847187757492, "learning_rate": 8.656033181896827e-06, "loss": 0.0016, "step": 38510 }, { "epoch": 0.630287163544138, "grad_norm": 0.05462953448295593, "learning_rate": 8.655058978269699e-06, "loss": 0.0033, "step": 38520 }, { "epoch": 0.6304507894952139, "grad_norm": 0.012995736673474312, "learning_rate": 8.654084476548397e-06, "loss": 0.0021, "step": 38530 }, { "epoch": 0.6306144154462898, "grad_norm": 0.21472889184951782, "learning_rate": 8.653109676812395e-06, "loss": 0.0031, "step": 38540 }, { "epoch": 0.6307780413973656, "grad_norm": 0.07544241845607758, "learning_rate": 8.652134579141198e-06, "loss": 0.0019, "step": 38550 }, { "epoch": 0.6309416673484415, "grad_norm": 0.03559279069304466, "learning_rate": 8.651159183614331e-06, "loss": 0.0032, "step": 38560 }, { "epoch": 0.6311052932995173, "grad_norm": 0.051309216767549515, "learning_rate": 8.650183490311341e-06, "loss": 0.0025, "step": 38570 }, { "epoch": 0.6312689192505931, "grad_norm": 0.18429972231388092, "learning_rate": 8.649207499311805e-06, "loss": 0.0052, "step": 38580 }, { "epoch": 0.631432545201669, "grad_norm": 0.17924368381500244, "learning_rate": 8.648231210695323e-06, "loss": 0.0038, "step": 38590 }, { "epoch": 0.6315961711527448, "grad_norm": 0.06036658212542534, "learning_rate": 8.647254624541514e-06, "loss": 0.0019, "step": 38600 }, { "epoch": 0.6317597971038207, "grad_norm": 0.05907200649380684, "learning_rate": 8.646277740930028e-06, "loss": 0.002, "step": 38610 }, { "epoch": 0.6319234230548965, "grad_norm": 0.0779176726937294, "learning_rate": 8.645300559940535e-06, "loss": 0.0017, "step": 38620 }, { "epoch": 0.6320870490059723, "grad_norm": 0.251051127910614, "learning_rate": 8.64432308165273e-06, "loss": 0.0028, "step": 38630 }, { "epoch": 0.6322506749570482, "grad_norm": 0.1337164044380188, "learning_rate": 8.643345306146334e-06, "loss": 0.0028, "step": 38640 }, { "epoch": 0.632414300908124, "grad_norm": 0.10822925716638565, "learning_rate": 8.64236723350109e-06, "loss": 0.0034, "step": 38650 }, { "epoch": 0.6325779268591999, "grad_norm": 0.1978202909231186, "learning_rate": 8.641388863796767e-06, "loss": 0.0024, "step": 38660 }, { "epoch": 0.6327415528102757, "grad_norm": 0.16618888080120087, "learning_rate": 8.640410197113158e-06, "loss": 0.002, "step": 38670 }, { "epoch": 0.6329051787613516, "grad_norm": 0.044764094054698944, "learning_rate": 8.639431233530079e-06, "loss": 0.0022, "step": 38680 }, { "epoch": 0.6330688047124274, "grad_norm": 0.10753671079874039, "learning_rate": 8.63845197312737e-06, "loss": 0.0025, "step": 38690 }, { "epoch": 0.6332324306635032, "grad_norm": 0.038792937994003296, "learning_rate": 8.637472415984897e-06, "loss": 0.0026, "step": 38700 }, { "epoch": 0.6333960566145791, "grad_norm": 0.06564744561910629, "learning_rate": 8.636492562182553e-06, "loss": 0.0022, "step": 38710 }, { "epoch": 0.6335596825656549, "grad_norm": 0.05353368818759918, "learning_rate": 8.635512411800245e-06, "loss": 0.003, "step": 38720 }, { "epoch": 0.6337233085167308, "grad_norm": 0.03349089249968529, "learning_rate": 8.634531964917914e-06, "loss": 0.0023, "step": 38730 }, { "epoch": 0.6338869344678066, "grad_norm": 0.03374045342206955, "learning_rate": 8.63355122161552e-06, "loss": 0.0028, "step": 38740 }, { "epoch": 0.6340505604188824, "grad_norm": 0.1154458150267601, "learning_rate": 8.632570181973054e-06, "loss": 0.0023, "step": 38750 }, { "epoch": 0.6342141863699583, "grad_norm": 0.06812771409749985, "learning_rate": 8.631588846070522e-06, "loss": 0.0018, "step": 38760 }, { "epoch": 0.6343778123210341, "grad_norm": 0.12013090401887894, "learning_rate": 8.630607213987959e-06, "loss": 0.0032, "step": 38770 }, { "epoch": 0.63454143827211, "grad_norm": 0.12253228574991226, "learning_rate": 8.629625285805425e-06, "loss": 0.0033, "step": 38780 }, { "epoch": 0.6347050642231858, "grad_norm": 0.028028065338730812, "learning_rate": 8.628643061602999e-06, "loss": 0.0016, "step": 38790 }, { "epoch": 0.6348686901742616, "grad_norm": 0.03966687619686127, "learning_rate": 8.627660541460795e-06, "loss": 0.0032, "step": 38800 }, { "epoch": 0.6350323161253375, "grad_norm": 0.16435879468917847, "learning_rate": 8.626677725458935e-06, "loss": 0.0037, "step": 38810 }, { "epoch": 0.6351959420764133, "grad_norm": 0.10439508408308029, "learning_rate": 8.625694613677582e-06, "loss": 0.0037, "step": 38820 }, { "epoch": 0.6353595680274892, "grad_norm": 0.05926480516791344, "learning_rate": 8.624711206196909e-06, "loss": 0.0023, "step": 38830 }, { "epoch": 0.635523193978565, "grad_norm": 0.3598000109195709, "learning_rate": 8.623727503097126e-06, "loss": 0.0024, "step": 38840 }, { "epoch": 0.6356868199296408, "grad_norm": 0.02207525260746479, "learning_rate": 8.622743504458455e-06, "loss": 0.0038, "step": 38850 }, { "epoch": 0.6358504458807167, "grad_norm": 0.03934333845973015, "learning_rate": 8.62175921036115e-06, "loss": 0.0027, "step": 38860 }, { "epoch": 0.6360140718317925, "grad_norm": 0.018653923645615578, "learning_rate": 8.620774620885484e-06, "loss": 0.0031, "step": 38870 }, { "epoch": 0.6361776977828684, "grad_norm": 0.1300826221704483, "learning_rate": 8.619789736111762e-06, "loss": 0.0025, "step": 38880 }, { "epoch": 0.6363413237339443, "grad_norm": 0.1303454488515854, "learning_rate": 8.618804556120302e-06, "loss": 0.0032, "step": 38890 }, { "epoch": 0.63650494968502, "grad_norm": 0.1254907250404358, "learning_rate": 8.617819080991455e-06, "loss": 0.0026, "step": 38900 }, { "epoch": 0.6366685756360959, "grad_norm": 0.07402250915765762, "learning_rate": 8.616833310805593e-06, "loss": 0.0028, "step": 38910 }, { "epoch": 0.6368322015871717, "grad_norm": 0.15269005298614502, "learning_rate": 8.615847245643112e-06, "loss": 0.0024, "step": 38920 }, { "epoch": 0.6369958275382476, "grad_norm": 0.052248455584049225, "learning_rate": 8.614860885584432e-06, "loss": 0.0031, "step": 38930 }, { "epoch": 0.6371594534893235, "grad_norm": 0.09626732021570206, "learning_rate": 8.613874230709997e-06, "loss": 0.003, "step": 38940 }, { "epoch": 0.6373230794403992, "grad_norm": 0.04366279020905495, "learning_rate": 8.612887281100277e-06, "loss": 0.0027, "step": 38950 }, { "epoch": 0.6374867053914751, "grad_norm": 0.1165156364440918, "learning_rate": 8.61190003683576e-06, "loss": 0.0031, "step": 38960 }, { "epoch": 0.6376503313425509, "grad_norm": 0.26304417848587036, "learning_rate": 8.610912497996965e-06, "loss": 0.006, "step": 38970 }, { "epoch": 0.6378139572936268, "grad_norm": 0.055768873542547226, "learning_rate": 8.609924664664432e-06, "loss": 0.0027, "step": 38980 }, { "epoch": 0.6379775832447027, "grad_norm": 0.038172975182533264, "learning_rate": 8.608936536918727e-06, "loss": 0.0029, "step": 38990 }, { "epoch": 0.6381412091957784, "grad_norm": 0.07032755762338638, "learning_rate": 8.607948114840435e-06, "loss": 0.0026, "step": 39000 }, { "epoch": 0.6383048351468543, "grad_norm": 0.07438138872385025, "learning_rate": 8.606959398510169e-06, "loss": 0.005, "step": 39010 }, { "epoch": 0.6384684610979301, "grad_norm": 0.0852329358458519, "learning_rate": 8.60597038800857e-06, "loss": 0.003, "step": 39020 }, { "epoch": 0.638632087049006, "grad_norm": 0.024013763293623924, "learning_rate": 8.60498108341629e-06, "loss": 0.0016, "step": 39030 }, { "epoch": 0.6387957130000819, "grad_norm": 0.17839962244033813, "learning_rate": 8.603991484814022e-06, "loss": 0.0042, "step": 39040 }, { "epoch": 0.6389593389511576, "grad_norm": 0.21313296258449554, "learning_rate": 8.603001592282469e-06, "loss": 0.0018, "step": 39050 }, { "epoch": 0.6391229649022335, "grad_norm": 0.1219082847237587, "learning_rate": 8.602011405902364e-06, "loss": 0.002, "step": 39060 }, { "epoch": 0.6392865908533093, "grad_norm": 0.07517138868570328, "learning_rate": 8.601020925754464e-06, "loss": 0.0021, "step": 39070 }, { "epoch": 0.6394502168043852, "grad_norm": 0.02984750270843506, "learning_rate": 8.60003015191955e-06, "loss": 0.0019, "step": 39080 }, { "epoch": 0.6396138427554611, "grad_norm": 0.1653256118297577, "learning_rate": 8.599039084478425e-06, "loss": 0.0031, "step": 39090 }, { "epoch": 0.6397774687065368, "grad_norm": 0.08026986569166183, "learning_rate": 8.598047723511916e-06, "loss": 0.0036, "step": 39100 }, { "epoch": 0.6399410946576127, "grad_norm": 0.09979500621557236, "learning_rate": 8.597056069100877e-06, "loss": 0.0032, "step": 39110 }, { "epoch": 0.6401047206086885, "grad_norm": 0.06817537546157837, "learning_rate": 8.596064121326184e-06, "loss": 0.0019, "step": 39120 }, { "epoch": 0.6402683465597644, "grad_norm": 0.07053446769714355, "learning_rate": 8.595071880268735e-06, "loss": 0.0033, "step": 39130 }, { "epoch": 0.6404319725108402, "grad_norm": 0.09091051667928696, "learning_rate": 8.594079346009456e-06, "loss": 0.0025, "step": 39140 }, { "epoch": 0.640595598461916, "grad_norm": 0.09570232033729553, "learning_rate": 8.593086518629292e-06, "loss": 0.002, "step": 39150 }, { "epoch": 0.6407592244129919, "grad_norm": 0.1417010873556137, "learning_rate": 8.592093398209217e-06, "loss": 0.0022, "step": 39160 }, { "epoch": 0.6409228503640677, "grad_norm": 0.059278704226017, "learning_rate": 8.591099984830228e-06, "loss": 0.003, "step": 39170 }, { "epoch": 0.6410864763151436, "grad_norm": 0.020414268597960472, "learning_rate": 8.59010627857334e-06, "loss": 0.0022, "step": 39180 }, { "epoch": 0.6412501022662194, "grad_norm": 0.22856572270393372, "learning_rate": 8.589112279519599e-06, "loss": 0.0026, "step": 39190 }, { "epoch": 0.6414137282172953, "grad_norm": 0.23743440210819244, "learning_rate": 8.588117987750072e-06, "loss": 0.0023, "step": 39200 }, { "epoch": 0.6415773541683711, "grad_norm": 0.09644228965044022, "learning_rate": 8.58712340334585e-06, "loss": 0.0039, "step": 39210 }, { "epoch": 0.6417409801194469, "grad_norm": 0.08064088970422745, "learning_rate": 8.586128526388046e-06, "loss": 0.0043, "step": 39220 }, { "epoch": 0.6419046060705228, "grad_norm": 0.03328957036137581, "learning_rate": 8.585133356957802e-06, "loss": 0.0042, "step": 39230 }, { "epoch": 0.6420682320215986, "grad_norm": 0.1000988557934761, "learning_rate": 8.584137895136278e-06, "loss": 0.0048, "step": 39240 }, { "epoch": 0.6422318579726745, "grad_norm": 0.07038478553295135, "learning_rate": 8.583142141004662e-06, "loss": 0.0044, "step": 39250 }, { "epoch": 0.6423954839237503, "grad_norm": 0.10646302253007889, "learning_rate": 8.582146094644164e-06, "loss": 0.0028, "step": 39260 }, { "epoch": 0.6425591098748261, "grad_norm": 0.017525313422083855, "learning_rate": 8.581149756136018e-06, "loss": 0.0018, "step": 39270 }, { "epoch": 0.642722735825902, "grad_norm": 0.03510558605194092, "learning_rate": 8.580153125561482e-06, "loss": 0.0026, "step": 39280 }, { "epoch": 0.6428863617769778, "grad_norm": 0.043394170701503754, "learning_rate": 8.579156203001838e-06, "loss": 0.0021, "step": 39290 }, { "epoch": 0.6430499877280537, "grad_norm": 0.1543533056974411, "learning_rate": 8.578158988538392e-06, "loss": 0.0028, "step": 39300 }, { "epoch": 0.6432136136791295, "grad_norm": 0.015024775639176369, "learning_rate": 8.577161482252472e-06, "loss": 0.0022, "step": 39310 }, { "epoch": 0.6433772396302053, "grad_norm": 0.19195719063282013, "learning_rate": 8.576163684225432e-06, "loss": 0.0029, "step": 39320 }, { "epoch": 0.6435408655812812, "grad_norm": 0.06950744241476059, "learning_rate": 8.57516559453865e-06, "loss": 0.004, "step": 39330 }, { "epoch": 0.643704491532357, "grad_norm": 0.06668994575738907, "learning_rate": 8.574167213273524e-06, "loss": 0.0022, "step": 39340 }, { "epoch": 0.6438681174834329, "grad_norm": 0.2713773548603058, "learning_rate": 8.573168540511482e-06, "loss": 0.0032, "step": 39350 }, { "epoch": 0.6440317434345088, "grad_norm": 0.3114817142486572, "learning_rate": 8.572169576333971e-06, "loss": 0.0016, "step": 39360 }, { "epoch": 0.6441953693855845, "grad_norm": 0.09380172193050385, "learning_rate": 8.571170320822461e-06, "loss": 0.003, "step": 39370 }, { "epoch": 0.6443589953366604, "grad_norm": 0.031921155750751495, "learning_rate": 8.570170774058451e-06, "loss": 0.0024, "step": 39380 }, { "epoch": 0.6445226212877362, "grad_norm": 0.054590776562690735, "learning_rate": 8.569170936123461e-06, "loss": 0.0033, "step": 39390 }, { "epoch": 0.6446862472388121, "grad_norm": 0.03549819439649582, "learning_rate": 8.56817080709903e-06, "loss": 0.0018, "step": 39400 }, { "epoch": 0.644849873189888, "grad_norm": 0.1801433563232422, "learning_rate": 8.56717038706673e-06, "loss": 0.0042, "step": 39410 }, { "epoch": 0.6450134991409637, "grad_norm": 0.06492079049348831, "learning_rate": 8.566169676108149e-06, "loss": 0.0048, "step": 39420 }, { "epoch": 0.6451771250920396, "grad_norm": 0.07603912800550461, "learning_rate": 8.565168674304902e-06, "loss": 0.002, "step": 39430 }, { "epoch": 0.6453407510431154, "grad_norm": 0.07331918925046921, "learning_rate": 8.564167381738628e-06, "loss": 0.0022, "step": 39440 }, { "epoch": 0.6455043769941913, "grad_norm": 0.07943751662969589, "learning_rate": 8.56316579849099e-06, "loss": 0.0023, "step": 39450 }, { "epoch": 0.6456680029452672, "grad_norm": 0.08440044522285461, "learning_rate": 8.562163924643672e-06, "loss": 0.0041, "step": 39460 }, { "epoch": 0.6458316288963429, "grad_norm": 0.161666601896286, "learning_rate": 8.561161760278383e-06, "loss": 0.0061, "step": 39470 }, { "epoch": 0.6459952548474188, "grad_norm": 0.0927104502916336, "learning_rate": 8.560159305476857e-06, "loss": 0.0038, "step": 39480 }, { "epoch": 0.6461588807984946, "grad_norm": 0.023915352299809456, "learning_rate": 8.55915656032085e-06, "loss": 0.002, "step": 39490 }, { "epoch": 0.6463225067495705, "grad_norm": 0.1288783848285675, "learning_rate": 8.558153524892145e-06, "loss": 0.0031, "step": 39500 }, { "epoch": 0.6464861327006464, "grad_norm": 0.04643791913986206, "learning_rate": 8.557150199272542e-06, "loss": 0.007, "step": 39510 }, { "epoch": 0.6466497586517221, "grad_norm": 0.08360448479652405, "learning_rate": 8.556146583543874e-06, "loss": 0.0025, "step": 39520 }, { "epoch": 0.646813384602798, "grad_norm": 0.10117527842521667, "learning_rate": 8.555142677787987e-06, "loss": 0.0033, "step": 39530 }, { "epoch": 0.6469770105538738, "grad_norm": 0.057454243302345276, "learning_rate": 8.55413848208676e-06, "loss": 0.0029, "step": 39540 }, { "epoch": 0.6471406365049497, "grad_norm": 0.20288380980491638, "learning_rate": 8.553133996522092e-06, "loss": 0.0045, "step": 39550 }, { "epoch": 0.6473042624560256, "grad_norm": 0.19805529713630676, "learning_rate": 8.552129221175901e-06, "loss": 0.0027, "step": 39560 }, { "epoch": 0.6474678884071013, "grad_norm": 0.061083775013685226, "learning_rate": 8.55112415613014e-06, "loss": 0.0042, "step": 39570 }, { "epoch": 0.6476315143581772, "grad_norm": 0.061113279312849045, "learning_rate": 8.550118801466773e-06, "loss": 0.0021, "step": 39580 }, { "epoch": 0.647795140309253, "grad_norm": 0.03751441463828087, "learning_rate": 8.549113157267794e-06, "loss": 0.0036, "step": 39590 }, { "epoch": 0.6479587662603289, "grad_norm": 0.11440392583608627, "learning_rate": 8.548107223615224e-06, "loss": 0.0018, "step": 39600 }, { "epoch": 0.6481223922114048, "grad_norm": 0.07054636627435684, "learning_rate": 8.547101000591096e-06, "loss": 0.0023, "step": 39610 }, { "epoch": 0.6482860181624805, "grad_norm": 0.16492822766304016, "learning_rate": 8.546094488277482e-06, "loss": 0.0038, "step": 39620 }, { "epoch": 0.6484496441135564, "grad_norm": 0.06979059427976608, "learning_rate": 8.545087686756467e-06, "loss": 0.0027, "step": 39630 }, { "epoch": 0.6486132700646322, "grad_norm": 0.052431054413318634, "learning_rate": 8.544080596110159e-06, "loss": 0.0024, "step": 39640 }, { "epoch": 0.6487768960157081, "grad_norm": 0.1581445038318634, "learning_rate": 8.543073216420697e-06, "loss": 0.0024, "step": 39650 }, { "epoch": 0.648940521966784, "grad_norm": 0.07132606953382492, "learning_rate": 8.542065547770237e-06, "loss": 0.002, "step": 39660 }, { "epoch": 0.6491041479178598, "grad_norm": 0.07171407341957092, "learning_rate": 8.541057590240963e-06, "loss": 0.0032, "step": 39670 }, { "epoch": 0.6492677738689356, "grad_norm": 0.03588040918111801, "learning_rate": 8.54004934391508e-06, "loss": 0.0025, "step": 39680 }, { "epoch": 0.6494313998200114, "grad_norm": 0.04136650264263153, "learning_rate": 8.539040808874816e-06, "loss": 0.0025, "step": 39690 }, { "epoch": 0.6495950257710873, "grad_norm": 0.09886538982391357, "learning_rate": 8.538031985202424e-06, "loss": 0.0017, "step": 39700 }, { "epoch": 0.6497586517221632, "grad_norm": 0.07219899445772171, "learning_rate": 8.537022872980184e-06, "loss": 0.0024, "step": 39710 }, { "epoch": 0.649922277673239, "grad_norm": 0.1638651043176651, "learning_rate": 8.536013472290387e-06, "loss": 0.0019, "step": 39720 }, { "epoch": 0.6500859036243148, "grad_norm": 0.020979199558496475, "learning_rate": 8.535003783215366e-06, "loss": 0.002, "step": 39730 }, { "epoch": 0.6502495295753906, "grad_norm": 0.13245879113674164, "learning_rate": 8.533993805837463e-06, "loss": 0.0029, "step": 39740 }, { "epoch": 0.6504131555264665, "grad_norm": 0.03319181501865387, "learning_rate": 8.532983540239048e-06, "loss": 0.0044, "step": 39750 }, { "epoch": 0.6505767814775424, "grad_norm": 0.009434771724045277, "learning_rate": 8.531972986502517e-06, "loss": 0.0023, "step": 39760 }, { "epoch": 0.6507404074286182, "grad_norm": 0.12471001595258713, "learning_rate": 8.530962144710285e-06, "loss": 0.0021, "step": 39770 }, { "epoch": 0.650904033379694, "grad_norm": 0.10073836892843246, "learning_rate": 8.529951014944792e-06, "loss": 0.0023, "step": 39780 }, { "epoch": 0.6510676593307698, "grad_norm": 0.08077047020196915, "learning_rate": 8.528939597288506e-06, "loss": 0.0028, "step": 39790 }, { "epoch": 0.6512312852818457, "grad_norm": 0.11728660762310028, "learning_rate": 8.527927891823913e-06, "loss": 0.0031, "step": 39800 }, { "epoch": 0.6513949112329216, "grad_norm": 0.15916123986244202, "learning_rate": 8.526915898633524e-06, "loss": 0.0026, "step": 39810 }, { "epoch": 0.6515585371839974, "grad_norm": 0.012175176292657852, "learning_rate": 8.525903617799874e-06, "loss": 0.0041, "step": 39820 }, { "epoch": 0.6517221631350733, "grad_norm": 0.16962768137454987, "learning_rate": 8.524891049405522e-06, "loss": 0.0034, "step": 39830 }, { "epoch": 0.651885789086149, "grad_norm": 0.0806095078587532, "learning_rate": 8.523878193533047e-06, "loss": 0.0023, "step": 39840 }, { "epoch": 0.6520494150372249, "grad_norm": 0.05332040414214134, "learning_rate": 8.522865050265056e-06, "loss": 0.0022, "step": 39850 }, { "epoch": 0.6522130409883008, "grad_norm": 0.04758303239941597, "learning_rate": 8.521851619684178e-06, "loss": 0.0032, "step": 39860 }, { "epoch": 0.6523766669393766, "grad_norm": 0.0878957137465477, "learning_rate": 8.520837901873065e-06, "loss": 0.0055, "step": 39870 }, { "epoch": 0.6525402928904525, "grad_norm": 0.04823160171508789, "learning_rate": 8.519823896914391e-06, "loss": 0.0028, "step": 39880 }, { "epoch": 0.6527039188415282, "grad_norm": 0.30594176054000854, "learning_rate": 8.518809604890856e-06, "loss": 0.0036, "step": 39890 }, { "epoch": 0.6528675447926041, "grad_norm": 0.08237633854150772, "learning_rate": 8.51779502588518e-06, "loss": 0.002, "step": 39900 }, { "epoch": 0.65303117074368, "grad_norm": 0.04482179135084152, "learning_rate": 8.516780159980112e-06, "loss": 0.0046, "step": 39910 }, { "epoch": 0.6531947966947558, "grad_norm": 0.08056865632534027, "learning_rate": 8.515765007258418e-06, "loss": 0.003, "step": 39920 }, { "epoch": 0.6533584226458317, "grad_norm": 0.10867752134799957, "learning_rate": 8.514749567802892e-06, "loss": 0.002, "step": 39930 }, { "epoch": 0.6535220485969074, "grad_norm": 0.08309381455183029, "learning_rate": 8.51373384169635e-06, "loss": 0.0017, "step": 39940 }, { "epoch": 0.6536856745479833, "grad_norm": 0.048605818301439285, "learning_rate": 8.51271782902163e-06, "loss": 0.0032, "step": 39950 }, { "epoch": 0.6538493004990592, "grad_norm": 0.033906593918800354, "learning_rate": 8.511701529861595e-06, "loss": 0.0027, "step": 39960 }, { "epoch": 0.654012926450135, "grad_norm": 0.06378569453954697, "learning_rate": 8.510684944299132e-06, "loss": 0.0029, "step": 39970 }, { "epoch": 0.6541765524012109, "grad_norm": 0.06641020625829697, "learning_rate": 8.50966807241715e-06, "loss": 0.0035, "step": 39980 }, { "epoch": 0.6543401783522866, "grad_norm": 0.058488670736551285, "learning_rate": 8.50865091429858e-06, "loss": 0.0028, "step": 39990 }, { "epoch": 0.6545038043033625, "grad_norm": 0.10854487866163254, "learning_rate": 8.507633470026377e-06, "loss": 0.002, "step": 40000 }, { "epoch": 0.6546674302544384, "grad_norm": 0.09760761260986328, "learning_rate": 8.506615739683524e-06, "loss": 0.0024, "step": 40010 }, { "epoch": 0.6548310562055142, "grad_norm": 0.08055496215820312, "learning_rate": 8.505597723353022e-06, "loss": 0.0042, "step": 40020 }, { "epoch": 0.6549946821565901, "grad_norm": 0.09465897083282471, "learning_rate": 8.504579421117896e-06, "loss": 0.0027, "step": 40030 }, { "epoch": 0.6551583081076658, "grad_norm": 0.09184083342552185, "learning_rate": 8.503560833061196e-06, "loss": 0.0019, "step": 40040 }, { "epoch": 0.6553219340587417, "grad_norm": 0.046404317021369934, "learning_rate": 8.502541959265996e-06, "loss": 0.0018, "step": 40050 }, { "epoch": 0.6554855600098175, "grad_norm": 0.027325669303536415, "learning_rate": 8.501522799815389e-06, "loss": 0.0023, "step": 40060 }, { "epoch": 0.6556491859608934, "grad_norm": 0.1321181356906891, "learning_rate": 8.500503354792497e-06, "loss": 0.0034, "step": 40070 }, { "epoch": 0.6558128119119693, "grad_norm": 0.04301869124174118, "learning_rate": 8.49948362428046e-06, "loss": 0.0019, "step": 40080 }, { "epoch": 0.655976437863045, "grad_norm": 0.05978606641292572, "learning_rate": 8.498463608362445e-06, "loss": 0.0019, "step": 40090 }, { "epoch": 0.6561400638141209, "grad_norm": 0.09832644462585449, "learning_rate": 8.497443307121641e-06, "loss": 0.0015, "step": 40100 }, { "epoch": 0.6563036897651967, "grad_norm": 0.14280065894126892, "learning_rate": 8.49642272064126e-06, "loss": 0.0019, "step": 40110 }, { "epoch": 0.6564673157162726, "grad_norm": 0.03534003719687462, "learning_rate": 8.49540184900454e-06, "loss": 0.002, "step": 40120 }, { "epoch": 0.6566309416673485, "grad_norm": 0.03862011432647705, "learning_rate": 8.494380692294734e-06, "loss": 0.0049, "step": 40130 }, { "epoch": 0.6567945676184243, "grad_norm": 0.13252069056034088, "learning_rate": 8.49335925059513e-06, "loss": 0.0032, "step": 40140 }, { "epoch": 0.6569581935695001, "grad_norm": 0.21603387594223022, "learning_rate": 8.49233752398903e-06, "loss": 0.0072, "step": 40150 }, { "epoch": 0.6571218195205759, "grad_norm": 0.11427640914916992, "learning_rate": 8.491315512559764e-06, "loss": 0.0023, "step": 40160 }, { "epoch": 0.6572854454716518, "grad_norm": 0.12012048065662384, "learning_rate": 8.490293216390682e-06, "loss": 0.0027, "step": 40170 }, { "epoch": 0.6574490714227277, "grad_norm": 0.036919478327035904, "learning_rate": 8.489270635565161e-06, "loss": 0.0028, "step": 40180 }, { "epoch": 0.6576126973738035, "grad_norm": 0.0641576498746872, "learning_rate": 8.4882477701666e-06, "loss": 0.0018, "step": 40190 }, { "epoch": 0.6577763233248793, "grad_norm": 0.043436549603939056, "learning_rate": 8.487224620278415e-06, "loss": 0.0044, "step": 40200 }, { "epoch": 0.6579399492759551, "grad_norm": 0.16939841210842133, "learning_rate": 8.486201185984059e-06, "loss": 0.0032, "step": 40210 }, { "epoch": 0.658103575227031, "grad_norm": 0.09159771353006363, "learning_rate": 8.485177467366993e-06, "loss": 0.0044, "step": 40220 }, { "epoch": 0.6582672011781069, "grad_norm": 0.057120632380247116, "learning_rate": 8.484153464510712e-06, "loss": 0.0023, "step": 40230 }, { "epoch": 0.6584308271291827, "grad_norm": 0.14374807476997375, "learning_rate": 8.483129177498727e-06, "loss": 0.0027, "step": 40240 }, { "epoch": 0.6585944530802585, "grad_norm": 0.02444578893482685, "learning_rate": 8.482104606414576e-06, "loss": 0.003, "step": 40250 }, { "epoch": 0.6587580790313343, "grad_norm": 0.04823712259531021, "learning_rate": 8.481079751341823e-06, "loss": 0.0029, "step": 40260 }, { "epoch": 0.6589217049824102, "grad_norm": 0.024433519691228867, "learning_rate": 8.480054612364048e-06, "loss": 0.0014, "step": 40270 }, { "epoch": 0.6590853309334861, "grad_norm": 0.03714948520064354, "learning_rate": 8.479029189564858e-06, "loss": 0.0033, "step": 40280 }, { "epoch": 0.6592489568845619, "grad_norm": 0.08686116337776184, "learning_rate": 8.478003483027886e-06, "loss": 0.0046, "step": 40290 }, { "epoch": 0.6594125828356378, "grad_norm": 0.03075013868510723, "learning_rate": 8.476977492836782e-06, "loss": 0.0031, "step": 40300 }, { "epoch": 0.6595762087867135, "grad_norm": 0.06753110140562057, "learning_rate": 8.475951219075222e-06, "loss": 0.0023, "step": 40310 }, { "epoch": 0.6597398347377894, "grad_norm": 0.04041937366127968, "learning_rate": 8.47492466182691e-06, "loss": 0.0032, "step": 40320 }, { "epoch": 0.6599034606888653, "grad_norm": 0.04040047153830528, "learning_rate": 8.473897821175563e-06, "loss": 0.0051, "step": 40330 }, { "epoch": 0.6600670866399411, "grad_norm": 0.03707794472575188, "learning_rate": 8.47287069720493e-06, "loss": 0.002, "step": 40340 }, { "epoch": 0.660230712591017, "grad_norm": 0.11853500455617905, "learning_rate": 8.471843289998777e-06, "loss": 0.0028, "step": 40350 }, { "epoch": 0.6603943385420927, "grad_norm": 0.20351850986480713, "learning_rate": 8.470815599640898e-06, "loss": 0.0042, "step": 40360 }, { "epoch": 0.6605579644931686, "grad_norm": 0.08293548226356506, "learning_rate": 8.46978762621511e-06, "loss": 0.003, "step": 40370 }, { "epoch": 0.6607215904442445, "grad_norm": 0.01352684572339058, "learning_rate": 8.468759369805244e-06, "loss": 0.003, "step": 40380 }, { "epoch": 0.6608852163953203, "grad_norm": 0.067771315574646, "learning_rate": 8.467730830495168e-06, "loss": 0.0029, "step": 40390 }, { "epoch": 0.6610488423463962, "grad_norm": 0.06476642936468124, "learning_rate": 8.466702008368765e-06, "loss": 0.0025, "step": 40400 }, { "epoch": 0.6612124682974719, "grad_norm": 0.08182857930660248, "learning_rate": 8.465672903509939e-06, "loss": 0.0021, "step": 40410 }, { "epoch": 0.6613760942485478, "grad_norm": 0.0771382749080658, "learning_rate": 8.464643516002623e-06, "loss": 0.0022, "step": 40420 }, { "epoch": 0.6615397201996237, "grad_norm": 0.12741197645664215, "learning_rate": 8.46361384593077e-06, "loss": 0.0018, "step": 40430 }, { "epoch": 0.6617033461506995, "grad_norm": 0.02982301637530327, "learning_rate": 8.462583893378356e-06, "loss": 0.0029, "step": 40440 }, { "epoch": 0.6618669721017754, "grad_norm": 0.05917232111096382, "learning_rate": 8.461553658429378e-06, "loss": 0.0013, "step": 40450 }, { "epoch": 0.6620305980528511, "grad_norm": 0.1927354782819748, "learning_rate": 8.460523141167862e-06, "loss": 0.0027, "step": 40460 }, { "epoch": 0.662194224003927, "grad_norm": 0.25302886962890625, "learning_rate": 8.459492341677853e-06, "loss": 0.0026, "step": 40470 }, { "epoch": 0.6623578499550029, "grad_norm": 0.32481902837753296, "learning_rate": 8.45846126004342e-06, "loss": 0.0011, "step": 40480 }, { "epoch": 0.6625214759060787, "grad_norm": 0.05792341008782387, "learning_rate": 8.457429896348653e-06, "loss": 0.0022, "step": 40490 }, { "epoch": 0.6626851018571546, "grad_norm": 0.04456359148025513, "learning_rate": 8.456398250677665e-06, "loss": 0.0021, "step": 40500 }, { "epoch": 0.6628487278082303, "grad_norm": 0.19139991700649261, "learning_rate": 8.455366323114596e-06, "loss": 0.0048, "step": 40510 }, { "epoch": 0.6630123537593062, "grad_norm": 0.1043798178434372, "learning_rate": 8.454334113743605e-06, "loss": 0.002, "step": 40520 }, { "epoch": 0.6631759797103821, "grad_norm": 0.09990212321281433, "learning_rate": 8.453301622648878e-06, "loss": 0.0031, "step": 40530 }, { "epoch": 0.6633396056614579, "grad_norm": 0.060381870716810226, "learning_rate": 8.45226884991462e-06, "loss": 0.0022, "step": 40540 }, { "epoch": 0.6635032316125338, "grad_norm": 0.023226188495755196, "learning_rate": 8.45123579562506e-06, "loss": 0.0025, "step": 40550 }, { "epoch": 0.6636668575636095, "grad_norm": 0.15302708745002747, "learning_rate": 8.45020245986445e-06, "loss": 0.0033, "step": 40560 }, { "epoch": 0.6638304835146854, "grad_norm": 0.07055632025003433, "learning_rate": 8.449168842717067e-06, "loss": 0.0039, "step": 40570 }, { "epoch": 0.6639941094657613, "grad_norm": 0.014842112548649311, "learning_rate": 8.448134944267206e-06, "loss": 0.0031, "step": 40580 }, { "epoch": 0.6641577354168371, "grad_norm": 0.21070148050785065, "learning_rate": 8.447100764599192e-06, "loss": 0.0029, "step": 40590 }, { "epoch": 0.664321361367913, "grad_norm": 0.17054150998592377, "learning_rate": 8.446066303797366e-06, "loss": 0.003, "step": 40600 }, { "epoch": 0.6644849873189888, "grad_norm": 0.07928485423326492, "learning_rate": 8.4450315619461e-06, "loss": 0.0034, "step": 40610 }, { "epoch": 0.6646486132700646, "grad_norm": 0.03261743113398552, "learning_rate": 8.44399653912978e-06, "loss": 0.0021, "step": 40620 }, { "epoch": 0.6648122392211405, "grad_norm": 0.008574265986680984, "learning_rate": 8.442961235432818e-06, "loss": 0.0028, "step": 40630 }, { "epoch": 0.6649758651722163, "grad_norm": 0.18837010860443115, "learning_rate": 8.441925650939653e-06, "loss": 0.0025, "step": 40640 }, { "epoch": 0.6651394911232922, "grad_norm": 0.09124614298343658, "learning_rate": 8.440889785734742e-06, "loss": 0.0037, "step": 40650 }, { "epoch": 0.665303117074368, "grad_norm": 0.13488143682479858, "learning_rate": 8.439853639902566e-06, "loss": 0.0028, "step": 40660 }, { "epoch": 0.6654667430254438, "grad_norm": 0.19879809021949768, "learning_rate": 8.438817213527632e-06, "loss": 0.0024, "step": 40670 }, { "epoch": 0.6656303689765197, "grad_norm": 0.07655355334281921, "learning_rate": 8.437780506694465e-06, "loss": 0.0039, "step": 40680 }, { "epoch": 0.6657939949275955, "grad_norm": 0.06329560279846191, "learning_rate": 8.436743519487615e-06, "loss": 0.0029, "step": 40690 }, { "epoch": 0.6659576208786714, "grad_norm": 0.07198058068752289, "learning_rate": 8.43570625199166e-06, "loss": 0.0018, "step": 40700 }, { "epoch": 0.6661212468297472, "grad_norm": 0.12384290248155594, "learning_rate": 8.43466870429119e-06, "loss": 0.0045, "step": 40710 }, { "epoch": 0.666284872780823, "grad_norm": 0.145598903298378, "learning_rate": 8.433630876470826e-06, "loss": 0.003, "step": 40720 }, { "epoch": 0.6664484987318989, "grad_norm": 0.2404741793870926, "learning_rate": 8.43259276861521e-06, "loss": 0.0026, "step": 40730 }, { "epoch": 0.6666121246829747, "grad_norm": 0.08709389716386795, "learning_rate": 8.431554380809008e-06, "loss": 0.0024, "step": 40740 }, { "epoch": 0.6667757506340506, "grad_norm": 0.008196630515158176, "learning_rate": 8.430515713136904e-06, "loss": 0.0015, "step": 40750 }, { "epoch": 0.6669393765851264, "grad_norm": 0.05855025723576546, "learning_rate": 8.429476765683612e-06, "loss": 0.0017, "step": 40760 }, { "epoch": 0.6671030025362022, "grad_norm": 0.07866821438074112, "learning_rate": 8.428437538533861e-06, "loss": 0.0018, "step": 40770 }, { "epoch": 0.6672666284872781, "grad_norm": 0.2005240023136139, "learning_rate": 8.42739803177241e-06, "loss": 0.0032, "step": 40780 }, { "epoch": 0.6674302544383539, "grad_norm": 0.10873547196388245, "learning_rate": 8.426358245484036e-06, "loss": 0.002, "step": 40790 }, { "epoch": 0.6675938803894298, "grad_norm": 0.0981588363647461, "learning_rate": 8.425318179753542e-06, "loss": 0.0019, "step": 40800 }, { "epoch": 0.6677575063405056, "grad_norm": 0.1374509483575821, "learning_rate": 8.42427783466575e-06, "loss": 0.0029, "step": 40810 }, { "epoch": 0.6679211322915815, "grad_norm": 0.049998167902231216, "learning_rate": 8.42323721030551e-06, "loss": 0.0007, "step": 40820 }, { "epoch": 0.6680847582426573, "grad_norm": 0.0330437496304512, "learning_rate": 8.422196306757689e-06, "loss": 0.0015, "step": 40830 }, { "epoch": 0.6682483841937331, "grad_norm": 0.0394873209297657, "learning_rate": 8.42115512410718e-06, "loss": 0.0025, "step": 40840 }, { "epoch": 0.668412010144809, "grad_norm": 0.14858004450798035, "learning_rate": 8.420113662438898e-06, "loss": 0.0072, "step": 40850 }, { "epoch": 0.6685756360958848, "grad_norm": 0.055894069373607635, "learning_rate": 8.419071921837784e-06, "loss": 0.0036, "step": 40860 }, { "epoch": 0.6687392620469607, "grad_norm": 0.09399282932281494, "learning_rate": 8.418029902388796e-06, "loss": 0.0031, "step": 40870 }, { "epoch": 0.6689028879980365, "grad_norm": 0.0983235090970993, "learning_rate": 8.416987604176918e-06, "loss": 0.0029, "step": 40880 }, { "epoch": 0.6690665139491123, "grad_norm": 0.2799706757068634, "learning_rate": 8.415945027287156e-06, "loss": 0.0024, "step": 40890 }, { "epoch": 0.6692301399001882, "grad_norm": 0.010169921442866325, "learning_rate": 8.414902171804542e-06, "loss": 0.0025, "step": 40900 }, { "epoch": 0.669393765851264, "grad_norm": 0.03838230296969414, "learning_rate": 8.413859037814123e-06, "loss": 0.0021, "step": 40910 }, { "epoch": 0.6695573918023399, "grad_norm": 0.17767493426799774, "learning_rate": 8.412815625400976e-06, "loss": 0.0022, "step": 40920 }, { "epoch": 0.6697210177534156, "grad_norm": 0.07790587842464447, "learning_rate": 8.4117719346502e-06, "loss": 0.002, "step": 40930 }, { "epoch": 0.6698846437044915, "grad_norm": 0.04043501242995262, "learning_rate": 8.410727965646909e-06, "loss": 0.005, "step": 40940 }, { "epoch": 0.6700482696555674, "grad_norm": 0.032733093947172165, "learning_rate": 8.409683718476253e-06, "loss": 0.0022, "step": 40950 }, { "epoch": 0.6702118956066432, "grad_norm": 0.046128902584314346, "learning_rate": 8.408639193223392e-06, "loss": 0.0076, "step": 40960 }, { "epoch": 0.6703755215577191, "grad_norm": 0.16634483635425568, "learning_rate": 8.407594389973517e-06, "loss": 0.0035, "step": 40970 }, { "epoch": 0.6705391475087948, "grad_norm": 0.025049185380339622, "learning_rate": 8.406549308811835e-06, "loss": 0.0015, "step": 40980 }, { "epoch": 0.6707027734598707, "grad_norm": 0.08842090517282486, "learning_rate": 8.405503949823583e-06, "loss": 0.0021, "step": 40990 }, { "epoch": 0.6708663994109466, "grad_norm": 0.07626570761203766, "learning_rate": 8.404458313094015e-06, "loss": 0.0029, "step": 41000 }, { "epoch": 0.6710300253620224, "grad_norm": 0.07488197833299637, "learning_rate": 8.403412398708411e-06, "loss": 0.0019, "step": 41010 }, { "epoch": 0.6711936513130983, "grad_norm": 0.05449520796537399, "learning_rate": 8.40236620675207e-06, "loss": 0.0021, "step": 41020 }, { "epoch": 0.671357277264174, "grad_norm": 0.05049673840403557, "learning_rate": 8.401319737310318e-06, "loss": 0.0021, "step": 41030 }, { "epoch": 0.6715209032152499, "grad_norm": 0.14458458125591278, "learning_rate": 8.400272990468499e-06, "loss": 0.0026, "step": 41040 }, { "epoch": 0.6716845291663258, "grad_norm": 0.1568382829427719, "learning_rate": 8.399225966311984e-06, "loss": 0.0035, "step": 41050 }, { "epoch": 0.6718481551174016, "grad_norm": 0.015643656253814697, "learning_rate": 8.398178664926164e-06, "loss": 0.0013, "step": 41060 }, { "epoch": 0.6720117810684775, "grad_norm": 0.003948468714952469, "learning_rate": 8.397131086396455e-06, "loss": 0.0022, "step": 41070 }, { "epoch": 0.6721754070195533, "grad_norm": 0.041831858456134796, "learning_rate": 8.396083230808292e-06, "loss": 0.0026, "step": 41080 }, { "epoch": 0.6723390329706291, "grad_norm": 0.05406446382403374, "learning_rate": 8.395035098247136e-06, "loss": 0.0018, "step": 41090 }, { "epoch": 0.672502658921705, "grad_norm": 0.12080182135105133, "learning_rate": 8.393986688798468e-06, "loss": 0.0021, "step": 41100 }, { "epoch": 0.6726662848727808, "grad_norm": 0.07608948647975922, "learning_rate": 8.392938002547793e-06, "loss": 0.0035, "step": 41110 }, { "epoch": 0.6728299108238567, "grad_norm": 0.13132861256599426, "learning_rate": 8.391889039580637e-06, "loss": 0.0026, "step": 41120 }, { "epoch": 0.6729935367749325, "grad_norm": 0.09472689777612686, "learning_rate": 8.390839799982552e-06, "loss": 0.003, "step": 41130 }, { "epoch": 0.6731571627260083, "grad_norm": 0.025278786197304726, "learning_rate": 8.389790283839109e-06, "loss": 0.002, "step": 41140 }, { "epoch": 0.6733207886770842, "grad_norm": 1.022646188735962, "learning_rate": 8.388740491235905e-06, "loss": 0.0027, "step": 41150 }, { "epoch": 0.67348441462816, "grad_norm": 0.21813036501407623, "learning_rate": 8.387690422258554e-06, "loss": 0.0031, "step": 41160 }, { "epoch": 0.6736480405792359, "grad_norm": 0.4471426010131836, "learning_rate": 8.386640076992701e-06, "loss": 0.0023, "step": 41170 }, { "epoch": 0.6738116665303117, "grad_norm": 0.16581852734088898, "learning_rate": 8.385589455524002e-06, "loss": 0.0024, "step": 41180 }, { "epoch": 0.6739752924813875, "grad_norm": 0.04886772856116295, "learning_rate": 8.384538557938147e-06, "loss": 0.0027, "step": 41190 }, { "epoch": 0.6741389184324634, "grad_norm": 0.13878095149993896, "learning_rate": 8.383487384320844e-06, "loss": 0.0032, "step": 41200 }, { "epoch": 0.6743025443835392, "grad_norm": 0.03204534202814102, "learning_rate": 8.38243593475782e-06, "loss": 0.0016, "step": 41210 }, { "epoch": 0.6744661703346151, "grad_norm": 0.26388972997665405, "learning_rate": 8.38138420933483e-06, "loss": 0.0032, "step": 41220 }, { "epoch": 0.6746297962856909, "grad_norm": 0.1268002837896347, "learning_rate": 8.380332208137648e-06, "loss": 0.0041, "step": 41230 }, { "epoch": 0.6747934222367667, "grad_norm": 0.09900958836078644, "learning_rate": 8.379279931252072e-06, "loss": 0.0032, "step": 41240 }, { "epoch": 0.6749570481878426, "grad_norm": 0.26281067728996277, "learning_rate": 8.378227378763923e-06, "loss": 0.0031, "step": 41250 }, { "epoch": 0.6751206741389184, "grad_norm": 0.25603729486465454, "learning_rate": 8.377174550759043e-06, "loss": 0.006, "step": 41260 }, { "epoch": 0.6752843000899943, "grad_norm": 0.26240843534469604, "learning_rate": 8.376121447323294e-06, "loss": 0.0033, "step": 41270 }, { "epoch": 0.6754479260410701, "grad_norm": 0.07707250118255615, "learning_rate": 8.375068068542568e-06, "loss": 0.0019, "step": 41280 }, { "epoch": 0.675611551992146, "grad_norm": 0.06844502687454224, "learning_rate": 8.374014414502774e-06, "loss": 0.0045, "step": 41290 }, { "epoch": 0.6757751779432218, "grad_norm": 0.3305200934410095, "learning_rate": 8.372960485289843e-06, "loss": 0.0029, "step": 41300 }, { "epoch": 0.6759388038942976, "grad_norm": 0.056485649198293686, "learning_rate": 8.37190628098973e-06, "loss": 0.0034, "step": 41310 }, { "epoch": 0.6761024298453735, "grad_norm": 0.08391249179840088, "learning_rate": 8.370851801688413e-06, "loss": 0.002, "step": 41320 }, { "epoch": 0.6762660557964493, "grad_norm": 0.028164148330688477, "learning_rate": 8.369797047471893e-06, "loss": 0.0033, "step": 41330 }, { "epoch": 0.6764296817475252, "grad_norm": 0.2291320413351059, "learning_rate": 8.36874201842619e-06, "loss": 0.0055, "step": 41340 }, { "epoch": 0.676593307698601, "grad_norm": 0.02646239660680294, "learning_rate": 8.367686714637348e-06, "loss": 0.0016, "step": 41350 }, { "epoch": 0.6767569336496768, "grad_norm": 0.033750392496585846, "learning_rate": 8.36663113619144e-06, "loss": 0.0014, "step": 41360 }, { "epoch": 0.6769205596007527, "grad_norm": 0.041009679436683655, "learning_rate": 8.365575283174546e-06, "loss": 0.0019, "step": 41370 }, { "epoch": 0.6770841855518285, "grad_norm": 0.2438313364982605, "learning_rate": 8.364519155672783e-06, "loss": 0.0032, "step": 41380 }, { "epoch": 0.6772478115029044, "grad_norm": 0.25042006373405457, "learning_rate": 8.363462753772287e-06, "loss": 0.0014, "step": 41390 }, { "epoch": 0.6774114374539802, "grad_norm": 0.04324944689869881, "learning_rate": 8.362406077559212e-06, "loss": 0.0022, "step": 41400 }, { "epoch": 0.677575063405056, "grad_norm": 0.24871376156806946, "learning_rate": 8.361349127119735e-06, "loss": 0.0041, "step": 41410 }, { "epoch": 0.6777386893561319, "grad_norm": 0.02890959195792675, "learning_rate": 8.360291902540062e-06, "loss": 0.0033, "step": 41420 }, { "epoch": 0.6779023153072077, "grad_norm": 0.024548379704356194, "learning_rate": 8.359234403906413e-06, "loss": 0.0017, "step": 41430 }, { "epoch": 0.6780659412582836, "grad_norm": 0.2140108197927475, "learning_rate": 8.358176631305036e-06, "loss": 0.0021, "step": 41440 }, { "epoch": 0.6782295672093595, "grad_norm": 0.13933569192886353, "learning_rate": 8.357118584822197e-06, "loss": 0.0015, "step": 41450 }, { "epoch": 0.6783931931604352, "grad_norm": 0.09648358076810837, "learning_rate": 8.35606026454419e-06, "loss": 0.0031, "step": 41460 }, { "epoch": 0.6785568191115111, "grad_norm": 0.06922072917222977, "learning_rate": 8.355001670557324e-06, "loss": 0.003, "step": 41470 }, { "epoch": 0.6787204450625869, "grad_norm": 0.1881045252084732, "learning_rate": 8.353942802947938e-06, "loss": 0.0026, "step": 41480 }, { "epoch": 0.6788840710136628, "grad_norm": 0.04266451671719551, "learning_rate": 8.352883661802388e-06, "loss": 0.0028, "step": 41490 }, { "epoch": 0.6790476969647387, "grad_norm": 0.10109952837228775, "learning_rate": 8.351824247207053e-06, "loss": 0.0043, "step": 41500 }, { "epoch": 0.6792113229158144, "grad_norm": 0.10828981548547745, "learning_rate": 8.350764559248336e-06, "loss": 0.0018, "step": 41510 }, { "epoch": 0.6793749488668903, "grad_norm": 0.23151905834674835, "learning_rate": 8.349704598012664e-06, "loss": 0.0021, "step": 41520 }, { "epoch": 0.6795385748179661, "grad_norm": 0.08255578577518463, "learning_rate": 8.34864436358648e-06, "loss": 0.0017, "step": 41530 }, { "epoch": 0.679702200769042, "grad_norm": 0.26133525371551514, "learning_rate": 8.347583856056255e-06, "loss": 0.0034, "step": 41540 }, { "epoch": 0.6798658267201179, "grad_norm": 0.07293794304132462, "learning_rate": 8.346523075508481e-06, "loss": 0.0025, "step": 41550 }, { "epoch": 0.6800294526711936, "grad_norm": 0.07824334502220154, "learning_rate": 8.34546202202967e-06, "loss": 0.0037, "step": 41560 }, { "epoch": 0.6801930786222695, "grad_norm": 0.14194002747535706, "learning_rate": 8.344400695706358e-06, "loss": 0.0025, "step": 41570 }, { "epoch": 0.6803567045733453, "grad_norm": 0.0412052646279335, "learning_rate": 8.343339096625104e-06, "loss": 0.0017, "step": 41580 }, { "epoch": 0.6805203305244212, "grad_norm": 0.18909892439842224, "learning_rate": 8.34227722487249e-06, "loss": 0.0022, "step": 41590 }, { "epoch": 0.6806839564754971, "grad_norm": 0.16024208068847656, "learning_rate": 8.341215080535117e-06, "loss": 0.0028, "step": 41600 }, { "epoch": 0.6808475824265728, "grad_norm": 0.09370427578687668, "learning_rate": 8.340152663699607e-06, "loss": 0.003, "step": 41610 }, { "epoch": 0.6810112083776487, "grad_norm": 0.1780400276184082, "learning_rate": 8.339089974452613e-06, "loss": 0.0028, "step": 41620 }, { "epoch": 0.6811748343287245, "grad_norm": 0.05311296507716179, "learning_rate": 8.3380270128808e-06, "loss": 0.002, "step": 41630 }, { "epoch": 0.6813384602798004, "grad_norm": 0.22057993710041046, "learning_rate": 8.336963779070861e-06, "loss": 0.0051, "step": 41640 }, { "epoch": 0.6815020862308763, "grad_norm": 0.05289392173290253, "learning_rate": 8.33590027310951e-06, "loss": 0.0025, "step": 41650 }, { "epoch": 0.681665712181952, "grad_norm": 0.03361356630921364, "learning_rate": 8.33483649508348e-06, "loss": 0.0025, "step": 41660 }, { "epoch": 0.6818293381330279, "grad_norm": 0.04952887073159218, "learning_rate": 8.333772445079533e-06, "loss": 0.0032, "step": 41670 }, { "epoch": 0.6819929640841037, "grad_norm": 0.06632176786661148, "learning_rate": 8.33270812318445e-06, "loss": 0.003, "step": 41680 }, { "epoch": 0.6821565900351796, "grad_norm": 0.07646957039833069, "learning_rate": 8.33164352948503e-06, "loss": 0.0017, "step": 41690 }, { "epoch": 0.6823202159862555, "grad_norm": 0.07562000304460526, "learning_rate": 8.330578664068097e-06, "loss": 0.0012, "step": 41700 }, { "epoch": 0.6824838419373312, "grad_norm": 0.06474523991346359, "learning_rate": 8.3295135270205e-06, "loss": 0.0022, "step": 41710 }, { "epoch": 0.6826474678884071, "grad_norm": 0.06082943081855774, "learning_rate": 8.32844811842911e-06, "loss": 0.0019, "step": 41720 }, { "epoch": 0.6828110938394829, "grad_norm": 0.005953933112323284, "learning_rate": 8.327382438380816e-06, "loss": 0.0016, "step": 41730 }, { "epoch": 0.6829747197905588, "grad_norm": 0.11157190054655075, "learning_rate": 8.326316486962529e-06, "loss": 0.0021, "step": 41740 }, { "epoch": 0.6831383457416347, "grad_norm": 0.03926282376050949, "learning_rate": 8.325250264261187e-06, "loss": 0.0058, "step": 41750 }, { "epoch": 0.6833019716927105, "grad_norm": 0.029198868200182915, "learning_rate": 8.324183770363747e-06, "loss": 0.0016, "step": 41760 }, { "epoch": 0.6834655976437863, "grad_norm": 0.04374314472079277, "learning_rate": 8.323117005357188e-06, "loss": 0.0022, "step": 41770 }, { "epoch": 0.6836292235948621, "grad_norm": 0.045673198997974396, "learning_rate": 8.322049969328515e-06, "loss": 0.0019, "step": 41780 }, { "epoch": 0.683792849545938, "grad_norm": 0.047671206295490265, "learning_rate": 8.320982662364746e-06, "loss": 0.0067, "step": 41790 }, { "epoch": 0.6839564754970138, "grad_norm": 0.07123294472694397, "learning_rate": 8.319915084552932e-06, "loss": 0.0024, "step": 41800 }, { "epoch": 0.6841201014480897, "grad_norm": 0.018189528957009315, "learning_rate": 8.318847235980138e-06, "loss": 0.0019, "step": 41810 }, { "epoch": 0.6842837273991655, "grad_norm": 0.03999166190624237, "learning_rate": 8.317779116733455e-06, "loss": 0.0035, "step": 41820 }, { "epoch": 0.6844473533502413, "grad_norm": 0.2060263454914093, "learning_rate": 8.316710726899994e-06, "loss": 0.0032, "step": 41830 }, { "epoch": 0.6846109793013172, "grad_norm": 0.05537264421582222, "learning_rate": 8.315642066566893e-06, "loss": 0.0031, "step": 41840 }, { "epoch": 0.684774605252393, "grad_norm": 0.15973874926567078, "learning_rate": 8.314573135821304e-06, "loss": 0.0022, "step": 41850 }, { "epoch": 0.6849382312034689, "grad_norm": 0.03887307271361351, "learning_rate": 8.31350393475041e-06, "loss": 0.0023, "step": 41860 }, { "epoch": 0.6851018571545447, "grad_norm": 0.06278285384178162, "learning_rate": 8.312434463441405e-06, "loss": 0.0014, "step": 41870 }, { "epoch": 0.6852654831056205, "grad_norm": 0.07525129616260529, "learning_rate": 8.311364721981517e-06, "loss": 0.0057, "step": 41880 }, { "epoch": 0.6854291090566964, "grad_norm": 0.125365749001503, "learning_rate": 8.31029471045799e-06, "loss": 0.0027, "step": 41890 }, { "epoch": 0.6855927350077722, "grad_norm": 0.19804522395133972, "learning_rate": 8.309224428958087e-06, "loss": 0.0033, "step": 41900 }, { "epoch": 0.6857563609588481, "grad_norm": 0.1322184056043625, "learning_rate": 8.3081538775691e-06, "loss": 0.0024, "step": 41910 }, { "epoch": 0.685919986909924, "grad_norm": 0.028758447617292404, "learning_rate": 8.307083056378336e-06, "loss": 0.0029, "step": 41920 }, { "epoch": 0.6860836128609997, "grad_norm": 0.059308767318725586, "learning_rate": 8.306011965473129e-06, "loss": 0.003, "step": 41930 }, { "epoch": 0.6862472388120756, "grad_norm": 0.11813205480575562, "learning_rate": 8.304940604940836e-06, "loss": 0.0034, "step": 41940 }, { "epoch": 0.6864108647631514, "grad_norm": 0.2293846756219864, "learning_rate": 8.303868974868831e-06, "loss": 0.0023, "step": 41950 }, { "epoch": 0.6865744907142273, "grad_norm": 0.13416968286037445, "learning_rate": 8.302797075344514e-06, "loss": 0.0037, "step": 41960 }, { "epoch": 0.6867381166653032, "grad_norm": 0.12346319109201431, "learning_rate": 8.301724906455305e-06, "loss": 0.0029, "step": 41970 }, { "epoch": 0.6869017426163789, "grad_norm": 0.07347419857978821, "learning_rate": 8.300652468288643e-06, "loss": 0.0025, "step": 41980 }, { "epoch": 0.6870653685674548, "grad_norm": 0.150022953748703, "learning_rate": 8.299579760931998e-06, "loss": 0.0041, "step": 41990 }, { "epoch": 0.6872289945185306, "grad_norm": 0.05635262653231621, "learning_rate": 8.298506784472852e-06, "loss": 0.0031, "step": 42000 }, { "epoch": 0.6873926204696065, "grad_norm": 0.23792438209056854, "learning_rate": 8.297433538998718e-06, "loss": 0.0032, "step": 42010 }, { "epoch": 0.6875562464206824, "grad_norm": 0.1262994259595871, "learning_rate": 8.296360024597122e-06, "loss": 0.0019, "step": 42020 }, { "epoch": 0.6877198723717581, "grad_norm": 0.056801892817020416, "learning_rate": 8.295286241355616e-06, "loss": 0.0017, "step": 42030 }, { "epoch": 0.687883498322834, "grad_norm": 0.10647229850292206, "learning_rate": 8.294212189361778e-06, "loss": 0.0041, "step": 42040 }, { "epoch": 0.6880471242739098, "grad_norm": 0.10949555784463882, "learning_rate": 8.2931378687032e-06, "loss": 0.0036, "step": 42050 }, { "epoch": 0.6882107502249857, "grad_norm": 0.03249487653374672, "learning_rate": 8.292063279467503e-06, "loss": 0.0024, "step": 42060 }, { "epoch": 0.6883743761760616, "grad_norm": 0.10420724004507065, "learning_rate": 8.290988421742325e-06, "loss": 0.0031, "step": 42070 }, { "epoch": 0.6885380021271373, "grad_norm": 0.03690098226070404, "learning_rate": 8.289913295615328e-06, "loss": 0.0038, "step": 42080 }, { "epoch": 0.6887016280782132, "grad_norm": 0.11487086862325668, "learning_rate": 8.288837901174198e-06, "loss": 0.0039, "step": 42090 }, { "epoch": 0.688865254029289, "grad_norm": 0.047081757336854935, "learning_rate": 8.287762238506636e-06, "loss": 0.0013, "step": 42100 }, { "epoch": 0.6890288799803649, "grad_norm": 1.014747142791748, "learning_rate": 8.286686307700371e-06, "loss": 0.0038, "step": 42110 }, { "epoch": 0.6891925059314408, "grad_norm": 0.07342509180307388, "learning_rate": 8.285610108843156e-06, "loss": 0.0023, "step": 42120 }, { "epoch": 0.6893561318825165, "grad_norm": 0.08703934401273727, "learning_rate": 8.284533642022756e-06, "loss": 0.0038, "step": 42130 }, { "epoch": 0.6895197578335924, "grad_norm": 0.09622837603092194, "learning_rate": 8.28345690732697e-06, "loss": 0.0039, "step": 42140 }, { "epoch": 0.6896833837846682, "grad_norm": 0.04340745881199837, "learning_rate": 8.282379904843606e-06, "loss": 0.0017, "step": 42150 }, { "epoch": 0.6898470097357441, "grad_norm": 0.13778144121170044, "learning_rate": 8.281302634660509e-06, "loss": 0.0046, "step": 42160 }, { "epoch": 0.69001063568682, "grad_norm": 0.05705218017101288, "learning_rate": 8.28022509686553e-06, "loss": 0.0028, "step": 42170 }, { "epoch": 0.6901742616378957, "grad_norm": 0.15162791311740875, "learning_rate": 8.279147291546554e-06, "loss": 0.0018, "step": 42180 }, { "epoch": 0.6903378875889716, "grad_norm": 0.27879807353019714, "learning_rate": 8.27806921879148e-06, "loss": 0.0049, "step": 42190 }, { "epoch": 0.6905015135400474, "grad_norm": 0.09431177377700806, "learning_rate": 8.276990878688235e-06, "loss": 0.0032, "step": 42200 }, { "epoch": 0.6906651394911233, "grad_norm": 0.1065945103764534, "learning_rate": 8.275912271324763e-06, "loss": 0.0022, "step": 42210 }, { "epoch": 0.6908287654421992, "grad_norm": 0.10742338001728058, "learning_rate": 8.274833396789031e-06, "loss": 0.0019, "step": 42220 }, { "epoch": 0.690992391393275, "grad_norm": 0.11232302337884903, "learning_rate": 8.27375425516903e-06, "loss": 0.0034, "step": 42230 }, { "epoch": 0.6911560173443508, "grad_norm": 0.07168204337358475, "learning_rate": 8.27267484655277e-06, "loss": 0.0014, "step": 42240 }, { "epoch": 0.6913196432954266, "grad_norm": 0.017771488055586815, "learning_rate": 8.271595171028283e-06, "loss": 0.0016, "step": 42250 }, { "epoch": 0.6914832692465025, "grad_norm": 0.1965777575969696, "learning_rate": 8.270515228683626e-06, "loss": 0.0032, "step": 42260 }, { "epoch": 0.6916468951975784, "grad_norm": 0.025913791730999947, "learning_rate": 8.269435019606875e-06, "loss": 0.0044, "step": 42270 }, { "epoch": 0.6918105211486542, "grad_norm": 0.058756064623594284, "learning_rate": 8.268354543886126e-06, "loss": 0.0047, "step": 42280 }, { "epoch": 0.69197414709973, "grad_norm": 0.21434593200683594, "learning_rate": 8.2672738016095e-06, "loss": 0.0062, "step": 42290 }, { "epoch": 0.6921377730508058, "grad_norm": 0.06954843550920486, "learning_rate": 8.26619279286514e-06, "loss": 0.003, "step": 42300 }, { "epoch": 0.6923013990018817, "grad_norm": 0.023796798661351204, "learning_rate": 8.265111517741206e-06, "loss": 0.0025, "step": 42310 }, { "epoch": 0.6924650249529576, "grad_norm": 0.0614449679851532, "learning_rate": 8.26402997632589e-06, "loss": 0.0023, "step": 42320 }, { "epoch": 0.6926286509040334, "grad_norm": 0.11617767065763474, "learning_rate": 8.26294816870739e-06, "loss": 0.0026, "step": 42330 }, { "epoch": 0.6927922768551092, "grad_norm": 0.07790637761354446, "learning_rate": 8.26186609497394e-06, "loss": 0.0023, "step": 42340 }, { "epoch": 0.692955902806185, "grad_norm": 0.022893153131008148, "learning_rate": 8.26078375521379e-06, "loss": 0.0024, "step": 42350 }, { "epoch": 0.6931195287572609, "grad_norm": 0.036024775356054306, "learning_rate": 8.259701149515211e-06, "loss": 0.0031, "step": 42360 }, { "epoch": 0.6932831547083368, "grad_norm": 0.06227801367640495, "learning_rate": 8.258618277966498e-06, "loss": 0.0026, "step": 42370 }, { "epoch": 0.6934467806594126, "grad_norm": 0.10701841861009598, "learning_rate": 8.257535140655961e-06, "loss": 0.0018, "step": 42380 }, { "epoch": 0.6936104066104885, "grad_norm": 0.051589235663414, "learning_rate": 8.256451737671945e-06, "loss": 0.0015, "step": 42390 }, { "epoch": 0.6937740325615642, "grad_norm": 0.09103359282016754, "learning_rate": 8.255368069102803e-06, "loss": 0.0027, "step": 42400 }, { "epoch": 0.6939376585126401, "grad_norm": 0.03723711520433426, "learning_rate": 8.254284135036916e-06, "loss": 0.0013, "step": 42410 }, { "epoch": 0.694101284463716, "grad_norm": 0.0831732377409935, "learning_rate": 8.25319993556269e-06, "loss": 0.0019, "step": 42420 }, { "epoch": 0.6942649104147918, "grad_norm": 0.10774324089288712, "learning_rate": 8.252115470768543e-06, "loss": 0.0024, "step": 42430 }, { "epoch": 0.6944285363658677, "grad_norm": 0.037433020770549774, "learning_rate": 8.251030740742923e-06, "loss": 0.0013, "step": 42440 }, { "epoch": 0.6945921623169434, "grad_norm": 0.042912546545267105, "learning_rate": 8.249945745574297e-06, "loss": 0.0023, "step": 42450 }, { "epoch": 0.6947557882680193, "grad_norm": 0.0421382300555706, "learning_rate": 8.248860485351155e-06, "loss": 0.002, "step": 42460 }, { "epoch": 0.6949194142190952, "grad_norm": 0.13821718096733093, "learning_rate": 8.247774960162003e-06, "loss": 0.0043, "step": 42470 }, { "epoch": 0.695083040170171, "grad_norm": 0.06397277861833572, "learning_rate": 8.246689170095376e-06, "loss": 0.0038, "step": 42480 }, { "epoch": 0.6952466661212469, "grad_norm": 0.03633551672101021, "learning_rate": 8.245603115239826e-06, "loss": 0.0039, "step": 42490 }, { "epoch": 0.6954102920723226, "grad_norm": 0.037556543946266174, "learning_rate": 8.24451679568393e-06, "loss": 0.0016, "step": 42500 }, { "epoch": 0.6955739180233985, "grad_norm": 0.03047458454966545, "learning_rate": 8.243430211516282e-06, "loss": 0.0024, "step": 42510 }, { "epoch": 0.6957375439744744, "grad_norm": 0.06643889099359512, "learning_rate": 8.2423433628255e-06, "loss": 0.0018, "step": 42520 }, { "epoch": 0.6959011699255502, "grad_norm": 0.19985431432724, "learning_rate": 8.241256249700227e-06, "loss": 0.0013, "step": 42530 }, { "epoch": 0.6960647958766261, "grad_norm": 0.07066982239484787, "learning_rate": 8.24016887222912e-06, "loss": 0.0024, "step": 42540 }, { "epoch": 0.6962284218277018, "grad_norm": 0.29324936866760254, "learning_rate": 8.239081230500865e-06, "loss": 0.0052, "step": 42550 }, { "epoch": 0.6963920477787777, "grad_norm": 0.06926723569631577, "learning_rate": 8.237993324604166e-06, "loss": 0.0018, "step": 42560 }, { "epoch": 0.6965556737298536, "grad_norm": 0.034019507467746735, "learning_rate": 8.236905154627746e-06, "loss": 0.0022, "step": 42570 }, { "epoch": 0.6967192996809294, "grad_norm": 0.06813473254442215, "learning_rate": 8.235816720660355e-06, "loss": 0.0034, "step": 42580 }, { "epoch": 0.6968829256320053, "grad_norm": 0.06074924021959305, "learning_rate": 8.23472802279076e-06, "loss": 0.0024, "step": 42590 }, { "epoch": 0.697046551583081, "grad_norm": 0.16627155244350433, "learning_rate": 8.233639061107757e-06, "loss": 0.0019, "step": 42600 }, { "epoch": 0.6972101775341569, "grad_norm": 0.030916433781385422, "learning_rate": 8.23254983570015e-06, "loss": 0.0032, "step": 42610 }, { "epoch": 0.6973738034852328, "grad_norm": 0.045743539929389954, "learning_rate": 8.231460346656778e-06, "loss": 0.0014, "step": 42620 }, { "epoch": 0.6975374294363086, "grad_norm": 0.2038387656211853, "learning_rate": 8.230370594066494e-06, "loss": 0.0024, "step": 42630 }, { "epoch": 0.6977010553873845, "grad_norm": 0.05940770357847214, "learning_rate": 8.229280578018178e-06, "loss": 0.0023, "step": 42640 }, { "epoch": 0.6978646813384602, "grad_norm": 0.08583934605121613, "learning_rate": 8.228190298600723e-06, "loss": 0.0037, "step": 42650 }, { "epoch": 0.6980283072895361, "grad_norm": 0.17937374114990234, "learning_rate": 8.22709975590305e-06, "loss": 0.0038, "step": 42660 }, { "epoch": 0.6981919332406119, "grad_norm": 0.11187461018562317, "learning_rate": 8.226008950014099e-06, "loss": 0.0018, "step": 42670 }, { "epoch": 0.6983555591916878, "grad_norm": 0.06361231207847595, "learning_rate": 8.224917881022836e-06, "loss": 0.0019, "step": 42680 }, { "epoch": 0.6985191851427637, "grad_norm": 0.1189630925655365, "learning_rate": 8.22382654901824e-06, "loss": 0.002, "step": 42690 }, { "epoch": 0.6986828110938395, "grad_norm": 0.07285558432340622, "learning_rate": 8.222734954089323e-06, "loss": 0.0015, "step": 42700 }, { "epoch": 0.6988464370449153, "grad_norm": 0.10752551257610321, "learning_rate": 8.221643096325105e-06, "loss": 0.0031, "step": 42710 }, { "epoch": 0.6990100629959911, "grad_norm": 0.04563531279563904, "learning_rate": 8.22055097581464e-06, "loss": 0.0016, "step": 42720 }, { "epoch": 0.699173688947067, "grad_norm": 0.10486824810504913, "learning_rate": 8.219458592646991e-06, "loss": 0.0023, "step": 42730 }, { "epoch": 0.6993373148981429, "grad_norm": 0.004063060972839594, "learning_rate": 8.218365946911256e-06, "loss": 0.0024, "step": 42740 }, { "epoch": 0.6995009408492187, "grad_norm": 0.08407556265592575, "learning_rate": 8.217273038696542e-06, "loss": 0.0018, "step": 42750 }, { "epoch": 0.6996645668002945, "grad_norm": 0.01555270329117775, "learning_rate": 8.216179868091987e-06, "loss": 0.0017, "step": 42760 }, { "epoch": 0.6998281927513703, "grad_norm": 0.0795881599187851, "learning_rate": 8.215086435186743e-06, "loss": 0.0028, "step": 42770 }, { "epoch": 0.6999918187024462, "grad_norm": 0.06360206753015518, "learning_rate": 8.213992740069987e-06, "loss": 0.0026, "step": 42780 }, { "epoch": 0.7001554446535221, "grad_norm": 0.02852897346019745, "learning_rate": 8.21289878283092e-06, "loss": 0.0013, "step": 42790 }, { "epoch": 0.7003190706045979, "grad_norm": 0.04118447005748749, "learning_rate": 8.21180456355876e-06, "loss": 0.0023, "step": 42800 }, { "epoch": 0.7004826965556737, "grad_norm": 0.15166281163692474, "learning_rate": 8.210710082342744e-06, "loss": 0.0028, "step": 42810 }, { "epoch": 0.7006463225067495, "grad_norm": 0.05840795114636421, "learning_rate": 8.20961533927214e-06, "loss": 0.0028, "step": 42820 }, { "epoch": 0.7008099484578254, "grad_norm": 0.14578768610954285, "learning_rate": 8.208520334436228e-06, "loss": 0.0023, "step": 42830 }, { "epoch": 0.7009735744089013, "grad_norm": 0.0719045028090477, "learning_rate": 8.207425067924314e-06, "loss": 0.0022, "step": 42840 }, { "epoch": 0.7011372003599771, "grad_norm": 0.01881229318678379, "learning_rate": 8.206329539825724e-06, "loss": 0.0028, "step": 42850 }, { "epoch": 0.701300826311053, "grad_norm": 0.1771376132965088, "learning_rate": 8.205233750229806e-06, "loss": 0.0024, "step": 42860 }, { "epoch": 0.7014644522621287, "grad_norm": 0.0363045409321785, "learning_rate": 8.20413769922593e-06, "loss": 0.0031, "step": 42870 }, { "epoch": 0.7016280782132046, "grad_norm": 0.11194594204425812, "learning_rate": 8.203041386903483e-06, "loss": 0.0035, "step": 42880 }, { "epoch": 0.7017917041642805, "grad_norm": 0.04965907335281372, "learning_rate": 8.201944813351879e-06, "loss": 0.0019, "step": 42890 }, { "epoch": 0.7019553301153563, "grad_norm": 0.07199104130268097, "learning_rate": 8.200847978660549e-06, "loss": 0.0029, "step": 42900 }, { "epoch": 0.7021189560664322, "grad_norm": 0.1052938774228096, "learning_rate": 8.199750882918947e-06, "loss": 0.0018, "step": 42910 }, { "epoch": 0.7022825820175079, "grad_norm": 0.027919912710785866, "learning_rate": 8.198653526216552e-06, "loss": 0.0025, "step": 42920 }, { "epoch": 0.7024462079685838, "grad_norm": 0.06763521581888199, "learning_rate": 8.197555908642857e-06, "loss": 0.0015, "step": 42930 }, { "epoch": 0.7026098339196597, "grad_norm": 0.07445783168077469, "learning_rate": 8.196458030287381e-06, "loss": 0.0026, "step": 42940 }, { "epoch": 0.7027734598707355, "grad_norm": 0.010249611921608448, "learning_rate": 8.195359891239662e-06, "loss": 0.0029, "step": 42950 }, { "epoch": 0.7029370858218114, "grad_norm": 0.06104375794529915, "learning_rate": 8.194261491589265e-06, "loss": 0.0024, "step": 42960 }, { "epoch": 0.7031007117728871, "grad_norm": 0.056723762303590775, "learning_rate": 8.193162831425766e-06, "loss": 0.0023, "step": 42970 }, { "epoch": 0.703264337723963, "grad_norm": 0.06246568262577057, "learning_rate": 8.192063910838771e-06, "loss": 0.0031, "step": 42980 }, { "epoch": 0.7034279636750389, "grad_norm": 0.022989075630903244, "learning_rate": 8.190964729917904e-06, "loss": 0.0045, "step": 42990 }, { "epoch": 0.7035915896261147, "grad_norm": 0.05999006703495979, "learning_rate": 8.189865288752812e-06, "loss": 0.0013, "step": 43000 }, { "epoch": 0.7037552155771906, "grad_norm": 0.0738217905163765, "learning_rate": 8.188765587433157e-06, "loss": 0.0029, "step": 43010 }, { "epoch": 0.7039188415282663, "grad_norm": 0.06145365908741951, "learning_rate": 8.187665626048632e-06, "loss": 0.0017, "step": 43020 }, { "epoch": 0.7040824674793422, "grad_norm": 0.05235118046402931, "learning_rate": 8.186565404688942e-06, "loss": 0.0014, "step": 43030 }, { "epoch": 0.7042460934304181, "grad_norm": 0.11701613664627075, "learning_rate": 8.185464923443823e-06, "loss": 0.0046, "step": 43040 }, { "epoch": 0.7044097193814939, "grad_norm": 0.041148070245981216, "learning_rate": 8.184364182403018e-06, "loss": 0.002, "step": 43050 }, { "epoch": 0.7045733453325698, "grad_norm": 0.03790149837732315, "learning_rate": 8.183263181656308e-06, "loss": 0.0043, "step": 43060 }, { "epoch": 0.7047369712836455, "grad_norm": 0.02770465612411499, "learning_rate": 8.18216192129348e-06, "loss": 0.0026, "step": 43070 }, { "epoch": 0.7049005972347214, "grad_norm": 0.05528177693486214, "learning_rate": 8.181060401404354e-06, "loss": 0.0031, "step": 43080 }, { "epoch": 0.7050642231857973, "grad_norm": 0.03338836878538132, "learning_rate": 8.179958622078765e-06, "loss": 0.0016, "step": 43090 }, { "epoch": 0.7052278491368731, "grad_norm": 0.09353293478488922, "learning_rate": 8.178856583406569e-06, "loss": 0.0026, "step": 43100 }, { "epoch": 0.705391475087949, "grad_norm": 0.07116588205099106, "learning_rate": 8.177754285477646e-06, "loss": 0.0026, "step": 43110 }, { "epoch": 0.7055551010390247, "grad_norm": 0.11429449170827866, "learning_rate": 8.176651728381895e-06, "loss": 0.0025, "step": 43120 }, { "epoch": 0.7057187269901006, "grad_norm": 0.03536340966820717, "learning_rate": 8.175548912209239e-06, "loss": 0.0027, "step": 43130 }, { "epoch": 0.7058823529411765, "grad_norm": 0.09896332770586014, "learning_rate": 8.174445837049614e-06, "loss": 0.0017, "step": 43140 }, { "epoch": 0.7060459788922523, "grad_norm": 0.045147284865379333, "learning_rate": 8.173342502992989e-06, "loss": 0.0027, "step": 43150 }, { "epoch": 0.7062096048433282, "grad_norm": 0.15183088183403015, "learning_rate": 8.172238910129346e-06, "loss": 0.0023, "step": 43160 }, { "epoch": 0.706373230794404, "grad_norm": 0.09464211761951447, "learning_rate": 8.171135058548692e-06, "loss": 0.0023, "step": 43170 }, { "epoch": 0.7065368567454798, "grad_norm": 0.026473268866539, "learning_rate": 8.170030948341049e-06, "loss": 0.0039, "step": 43180 }, { "epoch": 0.7067004826965557, "grad_norm": 0.04579450935125351, "learning_rate": 8.168926579596469e-06, "loss": 0.0022, "step": 43190 }, { "epoch": 0.7068641086476315, "grad_norm": 0.06653797626495361, "learning_rate": 8.167821952405017e-06, "loss": 0.0041, "step": 43200 }, { "epoch": 0.7070277345987074, "grad_norm": 0.16703353822231293, "learning_rate": 8.166717066856786e-06, "loss": 0.0041, "step": 43210 }, { "epoch": 0.7071913605497832, "grad_norm": 0.10521818697452545, "learning_rate": 8.165611923041884e-06, "loss": 0.0041, "step": 43220 }, { "epoch": 0.707354986500859, "grad_norm": 0.0518835224211216, "learning_rate": 8.164506521050446e-06, "loss": 0.0078, "step": 43230 }, { "epoch": 0.7075186124519349, "grad_norm": 0.06331518292427063, "learning_rate": 8.163400860972621e-06, "loss": 0.0025, "step": 43240 }, { "epoch": 0.7076822384030107, "grad_norm": 0.07451155036687851, "learning_rate": 8.162294942898586e-06, "loss": 0.0016, "step": 43250 }, { "epoch": 0.7078458643540866, "grad_norm": 0.02100847102701664, "learning_rate": 8.161188766918533e-06, "loss": 0.0029, "step": 43260 }, { "epoch": 0.7080094903051624, "grad_norm": 0.06605824828147888, "learning_rate": 8.160082333122679e-06, "loss": 0.0021, "step": 43270 }, { "epoch": 0.7081731162562382, "grad_norm": 0.03543270379304886, "learning_rate": 8.158975641601263e-06, "loss": 0.002, "step": 43280 }, { "epoch": 0.7083367422073141, "grad_norm": 0.007369784638285637, "learning_rate": 8.157868692444541e-06, "loss": 0.0019, "step": 43290 }, { "epoch": 0.7085003681583899, "grad_norm": 0.34923169016838074, "learning_rate": 8.156761485742791e-06, "loss": 0.0019, "step": 43300 }, { "epoch": 0.7086639941094658, "grad_norm": 0.13776707649230957, "learning_rate": 8.155654021586316e-06, "loss": 0.0036, "step": 43310 }, { "epoch": 0.7088276200605416, "grad_norm": 0.1732506901025772, "learning_rate": 8.154546300065435e-06, "loss": 0.002, "step": 43320 }, { "epoch": 0.7089912460116174, "grad_norm": 0.1082187071442604, "learning_rate": 8.15343832127049e-06, "loss": 0.003, "step": 43330 }, { "epoch": 0.7091548719626933, "grad_norm": 0.11107086390256882, "learning_rate": 8.152330085291846e-06, "loss": 0.0054, "step": 43340 }, { "epoch": 0.7093184979137691, "grad_norm": 0.10254182666540146, "learning_rate": 8.151221592219885e-06, "loss": 0.005, "step": 43350 }, { "epoch": 0.709482123864845, "grad_norm": 0.026903679594397545, "learning_rate": 8.150112842145011e-06, "loss": 0.0023, "step": 43360 }, { "epoch": 0.7096457498159208, "grad_norm": 0.017902657389640808, "learning_rate": 8.149003835157651e-06, "loss": 0.0022, "step": 43370 }, { "epoch": 0.7098093757669967, "grad_norm": 0.04412378370761871, "learning_rate": 8.147894571348254e-06, "loss": 0.0023, "step": 43380 }, { "epoch": 0.7099730017180725, "grad_norm": 0.18464715778827667, "learning_rate": 8.146785050807285e-06, "loss": 0.0026, "step": 43390 }, { "epoch": 0.7101366276691483, "grad_norm": 0.10496652126312256, "learning_rate": 8.145675273625232e-06, "loss": 0.0038, "step": 43400 }, { "epoch": 0.7103002536202242, "grad_norm": 0.13076041638851166, "learning_rate": 8.144565239892608e-06, "loss": 0.0043, "step": 43410 }, { "epoch": 0.7104638795713, "grad_norm": 0.25232309103012085, "learning_rate": 8.143454949699942e-06, "loss": 0.0025, "step": 43420 }, { "epoch": 0.7106275055223759, "grad_norm": 0.037370793521404266, "learning_rate": 8.142344403137785e-06, "loss": 0.0036, "step": 43430 }, { "epoch": 0.7107911314734517, "grad_norm": 0.039053935557603836, "learning_rate": 8.14123360029671e-06, "loss": 0.0017, "step": 43440 }, { "epoch": 0.7109547574245275, "grad_norm": 0.007159300148487091, "learning_rate": 8.140122541267308e-06, "loss": 0.0019, "step": 43450 }, { "epoch": 0.7111183833756034, "grad_norm": 0.14263097941875458, "learning_rate": 8.139011226140197e-06, "loss": 0.0022, "step": 43460 }, { "epoch": 0.7112820093266792, "grad_norm": 0.1862632930278778, "learning_rate": 8.13789965500601e-06, "loss": 0.002, "step": 43470 }, { "epoch": 0.7114456352777551, "grad_norm": 0.2040887027978897, "learning_rate": 8.136787827955401e-06, "loss": 0.0043, "step": 43480 }, { "epoch": 0.711609261228831, "grad_norm": 0.077354297041893, "learning_rate": 8.135675745079053e-06, "loss": 0.0027, "step": 43490 }, { "epoch": 0.7117728871799067, "grad_norm": 0.13910935819149017, "learning_rate": 8.134563406467656e-06, "loss": 0.0016, "step": 43500 }, { "epoch": 0.7119365131309826, "grad_norm": 0.13739068806171417, "learning_rate": 8.133450812211934e-06, "loss": 0.0018, "step": 43510 }, { "epoch": 0.7121001390820584, "grad_norm": 0.10940311849117279, "learning_rate": 8.132337962402626e-06, "loss": 0.0034, "step": 43520 }, { "epoch": 0.7122637650331343, "grad_norm": 0.03299413621425629, "learning_rate": 8.131224857130488e-06, "loss": 0.0025, "step": 43530 }, { "epoch": 0.71242739098421, "grad_norm": 0.031098751351237297, "learning_rate": 8.130111496486306e-06, "loss": 0.0026, "step": 43540 }, { "epoch": 0.7125910169352859, "grad_norm": 0.09025661647319794, "learning_rate": 8.128997880560878e-06, "loss": 0.002, "step": 43550 }, { "epoch": 0.7127546428863618, "grad_norm": 0.12498236447572708, "learning_rate": 8.12788400944503e-06, "loss": 0.0018, "step": 43560 }, { "epoch": 0.7129182688374376, "grad_norm": 0.04543954133987427, "learning_rate": 8.126769883229604e-06, "loss": 0.0027, "step": 43570 }, { "epoch": 0.7130818947885135, "grad_norm": 0.23174482583999634, "learning_rate": 8.125655502005465e-06, "loss": 0.0023, "step": 43580 }, { "epoch": 0.7132455207395892, "grad_norm": 0.08633371442556381, "learning_rate": 8.124540865863497e-06, "loss": 0.0027, "step": 43590 }, { "epoch": 0.7134091466906651, "grad_norm": 0.08456216752529144, "learning_rate": 8.123425974894608e-06, "loss": 0.0027, "step": 43600 }, { "epoch": 0.713572772641741, "grad_norm": 0.07421570271253586, "learning_rate": 8.122310829189721e-06, "loss": 0.0044, "step": 43610 }, { "epoch": 0.7137363985928168, "grad_norm": 0.14526040852069855, "learning_rate": 8.121195428839787e-06, "loss": 0.0027, "step": 43620 }, { "epoch": 0.7139000245438927, "grad_norm": 0.052335113286972046, "learning_rate": 8.120079773935774e-06, "loss": 0.0041, "step": 43630 }, { "epoch": 0.7140636504949684, "grad_norm": 0.03402649611234665, "learning_rate": 8.118963864568669e-06, "loss": 0.0025, "step": 43640 }, { "epoch": 0.7142272764460443, "grad_norm": 0.015778332948684692, "learning_rate": 8.117847700829483e-06, "loss": 0.003, "step": 43650 }, { "epoch": 0.7143909023971202, "grad_norm": 0.13444769382476807, "learning_rate": 8.116731282809245e-06, "loss": 0.0025, "step": 43660 }, { "epoch": 0.714554528348196, "grad_norm": 0.11536898463964462, "learning_rate": 8.11561461059901e-06, "loss": 0.0023, "step": 43670 }, { "epoch": 0.7147181542992719, "grad_norm": 0.023170851171016693, "learning_rate": 8.114497684289846e-06, "loss": 0.0021, "step": 43680 }, { "epoch": 0.7148817802503477, "grad_norm": 0.026733066886663437, "learning_rate": 8.113380503972848e-06, "loss": 0.0017, "step": 43690 }, { "epoch": 0.7150454062014235, "grad_norm": 0.04442456364631653, "learning_rate": 8.112263069739129e-06, "loss": 0.0032, "step": 43700 }, { "epoch": 0.7152090321524994, "grad_norm": 0.04640334099531174, "learning_rate": 8.111145381679821e-06, "loss": 0.0016, "step": 43710 }, { "epoch": 0.7153726581035752, "grad_norm": 0.009622717276215553, "learning_rate": 8.110027439886084e-06, "loss": 0.0024, "step": 43720 }, { "epoch": 0.7155362840546511, "grad_norm": 0.01147310808300972, "learning_rate": 8.108909244449088e-06, "loss": 0.0021, "step": 43730 }, { "epoch": 0.7156999100057269, "grad_norm": 0.08082813769578934, "learning_rate": 8.107790795460032e-06, "loss": 0.0033, "step": 43740 }, { "epoch": 0.7158635359568027, "grad_norm": 0.13127946853637695, "learning_rate": 8.10667209301013e-06, "loss": 0.002, "step": 43750 }, { "epoch": 0.7160271619078786, "grad_norm": 0.11432065814733505, "learning_rate": 8.105553137190625e-06, "loss": 0.0024, "step": 43760 }, { "epoch": 0.7161907878589544, "grad_norm": 0.07971400767564774, "learning_rate": 8.104433928092772e-06, "loss": 0.0013, "step": 43770 }, { "epoch": 0.7163544138100303, "grad_norm": 0.1732354462146759, "learning_rate": 8.103314465807847e-06, "loss": 0.0031, "step": 43780 }, { "epoch": 0.7165180397611061, "grad_norm": 0.09525655955076218, "learning_rate": 8.102194750427156e-06, "loss": 0.002, "step": 43790 }, { "epoch": 0.716681665712182, "grad_norm": 0.07279553264379501, "learning_rate": 8.101074782042015e-06, "loss": 0.0036, "step": 43800 }, { "epoch": 0.7168452916632578, "grad_norm": 0.11666157096624374, "learning_rate": 8.099954560743764e-06, "loss": 0.003, "step": 43810 }, { "epoch": 0.7170089176143336, "grad_norm": 0.025846201926469803, "learning_rate": 8.098834086623768e-06, "loss": 0.0023, "step": 43820 }, { "epoch": 0.7171725435654095, "grad_norm": 0.072555772960186, "learning_rate": 8.097713359773405e-06, "loss": 0.0029, "step": 43830 }, { "epoch": 0.7173361695164853, "grad_norm": 0.18477746844291687, "learning_rate": 8.096592380284083e-06, "loss": 0.0038, "step": 43840 }, { "epoch": 0.7174997954675612, "grad_norm": 0.19694846868515015, "learning_rate": 8.095471148247218e-06, "loss": 0.0018, "step": 43850 }, { "epoch": 0.717663421418637, "grad_norm": 0.13902299106121063, "learning_rate": 8.094349663754259e-06, "loss": 0.0019, "step": 43860 }, { "epoch": 0.7178270473697128, "grad_norm": 0.044256895780563354, "learning_rate": 8.093227926896671e-06, "loss": 0.0018, "step": 43870 }, { "epoch": 0.7179906733207887, "grad_norm": 0.33997824788093567, "learning_rate": 8.092105937765934e-06, "loss": 0.0025, "step": 43880 }, { "epoch": 0.7181542992718645, "grad_norm": 0.0224887914955616, "learning_rate": 8.09098369645356e-06, "loss": 0.0032, "step": 43890 }, { "epoch": 0.7183179252229404, "grad_norm": 0.0422448106110096, "learning_rate": 8.089861203051072e-06, "loss": 0.0026, "step": 43900 }, { "epoch": 0.7184815511740162, "grad_norm": 0.0016009770333766937, "learning_rate": 8.088738457650016e-06, "loss": 0.0017, "step": 43910 }, { "epoch": 0.718645177125092, "grad_norm": 0.04724181070923805, "learning_rate": 8.08761546034196e-06, "loss": 0.0015, "step": 43920 }, { "epoch": 0.7188088030761679, "grad_norm": 0.09206213057041168, "learning_rate": 8.086492211218493e-06, "loss": 0.0036, "step": 43930 }, { "epoch": 0.7189724290272437, "grad_norm": 0.029400011524558067, "learning_rate": 8.085368710371221e-06, "loss": 0.001, "step": 43940 }, { "epoch": 0.7191360549783196, "grad_norm": 0.04279040917754173, "learning_rate": 8.084244957891776e-06, "loss": 0.0019, "step": 43950 }, { "epoch": 0.7192996809293954, "grad_norm": 0.025080056861042976, "learning_rate": 8.083120953871805e-06, "loss": 0.0015, "step": 43960 }, { "epoch": 0.7194633068804712, "grad_norm": 0.10105162858963013, "learning_rate": 8.081996698402979e-06, "loss": 0.0026, "step": 43970 }, { "epoch": 0.7196269328315471, "grad_norm": 0.08337251842021942, "learning_rate": 8.080872191576987e-06, "loss": 0.0024, "step": 43980 }, { "epoch": 0.7197905587826229, "grad_norm": 0.0339948907494545, "learning_rate": 8.079747433485542e-06, "loss": 0.0011, "step": 43990 }, { "epoch": 0.7199541847336988, "grad_norm": 0.20203080773353577, "learning_rate": 8.078622424220374e-06, "loss": 0.0069, "step": 44000 }, { "epoch": 0.7201178106847747, "grad_norm": 0.11097287386655807, "learning_rate": 8.077497163873237e-06, "loss": 0.0019, "step": 44010 }, { "epoch": 0.7202814366358504, "grad_norm": 0.017145076766610146, "learning_rate": 8.0763716525359e-06, "loss": 0.002, "step": 44020 }, { "epoch": 0.7204450625869263, "grad_norm": 0.04417567327618599, "learning_rate": 8.07524589030016e-06, "loss": 0.0104, "step": 44030 }, { "epoch": 0.7206086885380021, "grad_norm": 0.07366751879453659, "learning_rate": 8.074119877257824e-06, "loss": 0.0021, "step": 44040 }, { "epoch": 0.720772314489078, "grad_norm": 0.035887643694877625, "learning_rate": 8.072993613500734e-06, "loss": 0.0028, "step": 44050 }, { "epoch": 0.7209359404401539, "grad_norm": 0.023754918947815895, "learning_rate": 8.071867099120739e-06, "loss": 0.0016, "step": 44060 }, { "epoch": 0.7210995663912296, "grad_norm": 0.04827277734875679, "learning_rate": 8.070740334209712e-06, "loss": 0.0032, "step": 44070 }, { "epoch": 0.7212631923423055, "grad_norm": 0.07769101113080978, "learning_rate": 8.069613318859555e-06, "loss": 0.0028, "step": 44080 }, { "epoch": 0.7214268182933813, "grad_norm": 0.07465600967407227, "learning_rate": 8.068486053162177e-06, "loss": 0.0017, "step": 44090 }, { "epoch": 0.7215904442444572, "grad_norm": 0.07614905387163162, "learning_rate": 8.067358537209517e-06, "loss": 0.003, "step": 44100 }, { "epoch": 0.7217540701955331, "grad_norm": 0.05258912220597267, "learning_rate": 8.066230771093531e-06, "loss": 0.0026, "step": 44110 }, { "epoch": 0.7219176961466088, "grad_norm": 0.04659995064139366, "learning_rate": 8.065102754906195e-06, "loss": 0.0027, "step": 44120 }, { "epoch": 0.7220813220976847, "grad_norm": 0.022946927696466446, "learning_rate": 8.063974488739506e-06, "loss": 0.0019, "step": 44130 }, { "epoch": 0.7222449480487605, "grad_norm": 0.0068640816025435925, "learning_rate": 8.062845972685484e-06, "loss": 0.0022, "step": 44140 }, { "epoch": 0.7224085739998364, "grad_norm": 0.04052237421274185, "learning_rate": 8.061717206836163e-06, "loss": 0.0023, "step": 44150 }, { "epoch": 0.7225721999509123, "grad_norm": 0.16379417479038239, "learning_rate": 8.060588191283607e-06, "loss": 0.0037, "step": 44160 }, { "epoch": 0.722735825901988, "grad_norm": 0.10318569093942642, "learning_rate": 8.059458926119888e-06, "loss": 0.0028, "step": 44170 }, { "epoch": 0.7228994518530639, "grad_norm": 0.019126171246170998, "learning_rate": 8.05832941143711e-06, "loss": 0.0015, "step": 44180 }, { "epoch": 0.7230630778041397, "grad_norm": 0.02773362398147583, "learning_rate": 8.05719964732739e-06, "loss": 0.0025, "step": 44190 }, { "epoch": 0.7232267037552156, "grad_norm": 0.05926528945565224, "learning_rate": 8.056069633882868e-06, "loss": 0.0021, "step": 44200 }, { "epoch": 0.7233903297062915, "grad_norm": 0.06562968343496323, "learning_rate": 8.054939371195704e-06, "loss": 0.0019, "step": 44210 }, { "epoch": 0.7235539556573672, "grad_norm": 0.0780167430639267, "learning_rate": 8.053808859358079e-06, "loss": 0.0016, "step": 44220 }, { "epoch": 0.7237175816084431, "grad_norm": 0.05056006833910942, "learning_rate": 8.052678098462193e-06, "loss": 0.0023, "step": 44230 }, { "epoch": 0.7238812075595189, "grad_norm": 0.032847411930561066, "learning_rate": 8.051547088600267e-06, "loss": 0.0025, "step": 44240 }, { "epoch": 0.7240448335105948, "grad_norm": 0.05597968399524689, "learning_rate": 8.050415829864544e-06, "loss": 0.003, "step": 44250 }, { "epoch": 0.7242084594616707, "grad_norm": 0.04562026262283325, "learning_rate": 8.049284322347284e-06, "loss": 0.0021, "step": 44260 }, { "epoch": 0.7243720854127464, "grad_norm": 0.045441530644893646, "learning_rate": 8.048152566140768e-06, "loss": 0.0022, "step": 44270 }, { "epoch": 0.7245357113638223, "grad_norm": 0.05194681137800217, "learning_rate": 8.0470205613373e-06, "loss": 0.0021, "step": 44280 }, { "epoch": 0.7246993373148981, "grad_norm": 0.15032348036766052, "learning_rate": 8.045888308029204e-06, "loss": 0.0021, "step": 44290 }, { "epoch": 0.724862963265974, "grad_norm": 0.08662296831607819, "learning_rate": 8.044755806308818e-06, "loss": 0.0014, "step": 44300 }, { "epoch": 0.7250265892170499, "grad_norm": 0.06516364216804504, "learning_rate": 8.043623056268509e-06, "loss": 0.0021, "step": 44310 }, { "epoch": 0.7251902151681257, "grad_norm": 0.398365318775177, "learning_rate": 8.042490058000658e-06, "loss": 0.002, "step": 44320 }, { "epoch": 0.7253538411192015, "grad_norm": 0.04737105593085289, "learning_rate": 8.04135681159767e-06, "loss": 0.0024, "step": 44330 }, { "epoch": 0.7255174670702773, "grad_norm": 0.08958578109741211, "learning_rate": 8.040223317151967e-06, "loss": 0.0015, "step": 44340 }, { "epoch": 0.7256810930213532, "grad_norm": 0.10113321244716644, "learning_rate": 8.039089574755996e-06, "loss": 0.002, "step": 44350 }, { "epoch": 0.7258447189724291, "grad_norm": 0.0900040715932846, "learning_rate": 8.037955584502217e-06, "loss": 0.0016, "step": 44360 }, { "epoch": 0.7260083449235049, "grad_norm": 0.07053272426128387, "learning_rate": 8.036821346483116e-06, "loss": 0.002, "step": 44370 }, { "epoch": 0.7261719708745807, "grad_norm": 0.032537445425987244, "learning_rate": 8.0356868607912e-06, "loss": 0.0012, "step": 44380 }, { "epoch": 0.7263355968256565, "grad_norm": 0.15229865908622742, "learning_rate": 8.034552127518994e-06, "loss": 0.0028, "step": 44390 }, { "epoch": 0.7264992227767324, "grad_norm": 0.14430421590805054, "learning_rate": 8.033417146759037e-06, "loss": 0.0016, "step": 44400 }, { "epoch": 0.7266628487278082, "grad_norm": 0.07929670810699463, "learning_rate": 8.0322819186039e-06, "loss": 0.0038, "step": 44410 }, { "epoch": 0.7268264746788841, "grad_norm": 0.0590423084795475, "learning_rate": 8.031146443146165e-06, "loss": 0.002, "step": 44420 }, { "epoch": 0.72699010062996, "grad_norm": 0.15701057016849518, "learning_rate": 8.030010720478439e-06, "loss": 0.0031, "step": 44430 }, { "epoch": 0.7271537265810357, "grad_norm": 0.07895903289318085, "learning_rate": 8.02887475069335e-06, "loss": 0.002, "step": 44440 }, { "epoch": 0.7273173525321116, "grad_norm": 0.021112807095050812, "learning_rate": 8.027738533883539e-06, "loss": 0.0028, "step": 44450 }, { "epoch": 0.7274809784831874, "grad_norm": 0.1281396895647049, "learning_rate": 8.026602070141676e-06, "loss": 0.0021, "step": 44460 }, { "epoch": 0.7276446044342633, "grad_norm": 0.0907074436545372, "learning_rate": 8.025465359560445e-06, "loss": 0.0044, "step": 44470 }, { "epoch": 0.7278082303853391, "grad_norm": 0.11169345676898956, "learning_rate": 8.024328402232552e-06, "loss": 0.0014, "step": 44480 }, { "epoch": 0.7279718563364149, "grad_norm": 0.10047292709350586, "learning_rate": 8.023191198250724e-06, "loss": 0.0019, "step": 44490 }, { "epoch": 0.7281354822874908, "grad_norm": 0.09887635707855225, "learning_rate": 8.02205374770771e-06, "loss": 0.0019, "step": 44500 }, { "epoch": 0.7282991082385666, "grad_norm": 0.07256154716014862, "learning_rate": 8.020916050696272e-06, "loss": 0.0025, "step": 44510 }, { "epoch": 0.7284627341896425, "grad_norm": 0.08774542808532715, "learning_rate": 8.0197781073092e-06, "loss": 0.003, "step": 44520 }, { "epoch": 0.7286263601407184, "grad_norm": 0.03413913771510124, "learning_rate": 8.0186399176393e-06, "loss": 0.0017, "step": 44530 }, { "epoch": 0.7287899860917941, "grad_norm": 0.1782178431749344, "learning_rate": 8.017501481779398e-06, "loss": 0.0045, "step": 44540 }, { "epoch": 0.72895361204287, "grad_norm": 0.1473647803068161, "learning_rate": 8.016362799822342e-06, "loss": 0.0022, "step": 44550 }, { "epoch": 0.7291172379939458, "grad_norm": 0.056445784866809845, "learning_rate": 8.015223871860998e-06, "loss": 0.0028, "step": 44560 }, { "epoch": 0.7292808639450217, "grad_norm": 0.16442395746707916, "learning_rate": 8.014084697988254e-06, "loss": 0.0029, "step": 44570 }, { "epoch": 0.7294444898960976, "grad_norm": 0.053557731211185455, "learning_rate": 8.012945278297016e-06, "loss": 0.003, "step": 44580 }, { "epoch": 0.7296081158471733, "grad_norm": 0.14761383831501007, "learning_rate": 8.011805612880212e-06, "loss": 0.0031, "step": 44590 }, { "epoch": 0.7297717417982492, "grad_norm": 0.062305767089128494, "learning_rate": 8.01066570183079e-06, "loss": 0.0028, "step": 44600 }, { "epoch": 0.729935367749325, "grad_norm": 0.1019849106669426, "learning_rate": 8.009525545241717e-06, "loss": 0.0024, "step": 44610 }, { "epoch": 0.7300989937004009, "grad_norm": 0.06322908401489258, "learning_rate": 8.008385143205978e-06, "loss": 0.0028, "step": 44620 }, { "epoch": 0.7302626196514768, "grad_norm": 0.016959216445684433, "learning_rate": 8.007244495816586e-06, "loss": 0.0016, "step": 44630 }, { "epoch": 0.7304262456025525, "grad_norm": 0.04674536734819412, "learning_rate": 8.00610360316656e-06, "loss": 0.0018, "step": 44640 }, { "epoch": 0.7305898715536284, "grad_norm": 0.0854129046201706, "learning_rate": 8.004962465348956e-06, "loss": 0.0041, "step": 44650 }, { "epoch": 0.7307534975047042, "grad_norm": 0.06493894010782242, "learning_rate": 8.003821082456835e-06, "loss": 0.0018, "step": 44660 }, { "epoch": 0.7309171234557801, "grad_norm": 0.13204319775104523, "learning_rate": 8.002679454583285e-06, "loss": 0.0026, "step": 44670 }, { "epoch": 0.731080749406856, "grad_norm": 0.09227117896080017, "learning_rate": 8.001537581821416e-06, "loss": 0.0019, "step": 44680 }, { "epoch": 0.7312443753579317, "grad_norm": 0.131661519408226, "learning_rate": 8.000395464264354e-06, "loss": 0.0018, "step": 44690 }, { "epoch": 0.7314080013090076, "grad_norm": 0.3169766068458557, "learning_rate": 7.999253102005245e-06, "loss": 0.004, "step": 44700 }, { "epoch": 0.7315716272600834, "grad_norm": 0.10875862091779709, "learning_rate": 7.99811049513726e-06, "loss": 0.0017, "step": 44710 }, { "epoch": 0.7317352532111593, "grad_norm": 0.06453468650579453, "learning_rate": 7.996967643753581e-06, "loss": 0.0017, "step": 44720 }, { "epoch": 0.7318988791622352, "grad_norm": 0.16648490726947784, "learning_rate": 7.995824547947419e-06, "loss": 0.0027, "step": 44730 }, { "epoch": 0.732062505113311, "grad_norm": 0.10406239330768585, "learning_rate": 7.994681207811998e-06, "loss": 0.0016, "step": 44740 }, { "epoch": 0.7322261310643868, "grad_norm": 0.06713445484638214, "learning_rate": 7.993537623440568e-06, "loss": 0.0032, "step": 44750 }, { "epoch": 0.7323897570154626, "grad_norm": 0.05412828549742699, "learning_rate": 7.992393794926393e-06, "loss": 0.0021, "step": 44760 }, { "epoch": 0.7325533829665385, "grad_norm": 0.0850588008761406, "learning_rate": 7.991249722362762e-06, "loss": 0.0029, "step": 44770 }, { "epoch": 0.7327170089176144, "grad_norm": 0.05673776566982269, "learning_rate": 7.99010540584298e-06, "loss": 0.0021, "step": 44780 }, { "epoch": 0.7328806348686902, "grad_norm": 0.06593542546033859, "learning_rate": 7.988960845460375e-06, "loss": 0.0019, "step": 44790 }, { "epoch": 0.733044260819766, "grad_norm": 0.10725630074739456, "learning_rate": 7.987816041308293e-06, "loss": 0.0017, "step": 44800 }, { "epoch": 0.7332078867708418, "grad_norm": 0.06610086560249329, "learning_rate": 7.9866709934801e-06, "loss": 0.0012, "step": 44810 }, { "epoch": 0.7333715127219177, "grad_norm": 0.20274235308170319, "learning_rate": 7.985525702069183e-06, "loss": 0.0017, "step": 44820 }, { "epoch": 0.7335351386729936, "grad_norm": 0.12632769346237183, "learning_rate": 7.984380167168947e-06, "loss": 0.0031, "step": 44830 }, { "epoch": 0.7336987646240694, "grad_norm": 0.13647542893886566, "learning_rate": 7.983234388872816e-06, "loss": 0.0023, "step": 44840 }, { "epoch": 0.7338623905751452, "grad_norm": 0.10609569400548935, "learning_rate": 7.98208836727424e-06, "loss": 0.0028, "step": 44850 }, { "epoch": 0.734026016526221, "grad_norm": 0.029697943478822708, "learning_rate": 7.980942102466682e-06, "loss": 0.0021, "step": 44860 }, { "epoch": 0.7341896424772969, "grad_norm": 0.03464086726307869, "learning_rate": 7.97979559454363e-06, "loss": 0.0021, "step": 44870 }, { "epoch": 0.7343532684283728, "grad_norm": 0.2556789815425873, "learning_rate": 7.978648843598586e-06, "loss": 0.0045, "step": 44880 }, { "epoch": 0.7345168943794486, "grad_norm": 0.034051842987537384, "learning_rate": 7.977501849725076e-06, "loss": 0.003, "step": 44890 }, { "epoch": 0.7346805203305244, "grad_norm": 0.06016731634736061, "learning_rate": 7.976354613016646e-06, "loss": 0.0019, "step": 44900 }, { "epoch": 0.7348441462816002, "grad_norm": 0.07642786204814911, "learning_rate": 7.97520713356686e-06, "loss": 0.0023, "step": 44910 }, { "epoch": 0.7350077722326761, "grad_norm": 0.10233766585588455, "learning_rate": 7.974059411469304e-06, "loss": 0.0031, "step": 44920 }, { "epoch": 0.735171398183752, "grad_norm": 0.03389649838209152, "learning_rate": 7.972911446817579e-06, "loss": 0.0033, "step": 44930 }, { "epoch": 0.7353350241348278, "grad_norm": 0.12942197918891907, "learning_rate": 7.971763239705312e-06, "loss": 0.0024, "step": 44940 }, { "epoch": 0.7354986500859036, "grad_norm": 0.024554140865802765, "learning_rate": 7.970614790226147e-06, "loss": 0.0015, "step": 44950 }, { "epoch": 0.7356622760369794, "grad_norm": 0.012263045646250248, "learning_rate": 7.969466098473746e-06, "loss": 0.0014, "step": 44960 }, { "epoch": 0.7358259019880553, "grad_norm": 0.06102617084980011, "learning_rate": 7.968317164541792e-06, "loss": 0.0029, "step": 44970 }, { "epoch": 0.7359895279391312, "grad_norm": 0.06320632994174957, "learning_rate": 7.96716798852399e-06, "loss": 0.0023, "step": 44980 }, { "epoch": 0.736153153890207, "grad_norm": 0.07558856904506683, "learning_rate": 7.966018570514063e-06, "loss": 0.0016, "step": 44990 }, { "epoch": 0.7363167798412829, "grad_norm": 0.0770316794514656, "learning_rate": 7.964868910605751e-06, "loss": 0.0023, "step": 45000 }, { "epoch": 0.7364804057923586, "grad_norm": 0.06283899396657944, "learning_rate": 7.963719008892818e-06, "loss": 0.0038, "step": 45010 }, { "epoch": 0.7366440317434345, "grad_norm": 0.05875033512711525, "learning_rate": 7.962568865469048e-06, "loss": 0.003, "step": 45020 }, { "epoch": 0.7368076576945104, "grad_norm": 0.11941047012805939, "learning_rate": 7.961418480428239e-06, "loss": 0.0027, "step": 45030 }, { "epoch": 0.7369712836455862, "grad_norm": 0.02768949791789055, "learning_rate": 7.960267853864216e-06, "loss": 0.0017, "step": 45040 }, { "epoch": 0.7371349095966621, "grad_norm": 0.04157964885234833, "learning_rate": 7.959116985870816e-06, "loss": 0.0024, "step": 45050 }, { "epoch": 0.7372985355477378, "grad_norm": 0.06325796991586685, "learning_rate": 7.957965876541905e-06, "loss": 0.0017, "step": 45060 }, { "epoch": 0.7374621614988137, "grad_norm": 0.04996092990040779, "learning_rate": 7.95681452597136e-06, "loss": 0.0022, "step": 45070 }, { "epoch": 0.7376257874498896, "grad_norm": 0.08422041684389114, "learning_rate": 7.955662934253082e-06, "loss": 0.0034, "step": 45080 }, { "epoch": 0.7377894134009654, "grad_norm": 0.1696813404560089, "learning_rate": 7.954511101480991e-06, "loss": 0.0035, "step": 45090 }, { "epoch": 0.7379530393520413, "grad_norm": 0.24613326787948608, "learning_rate": 7.953359027749028e-06, "loss": 0.0029, "step": 45100 }, { "epoch": 0.738116665303117, "grad_norm": 0.13812978565692902, "learning_rate": 7.95220671315115e-06, "loss": 0.0022, "step": 45110 }, { "epoch": 0.7382802912541929, "grad_norm": 0.056641675531864166, "learning_rate": 7.951054157781335e-06, "loss": 0.0031, "step": 45120 }, { "epoch": 0.7384439172052688, "grad_norm": 0.22468949854373932, "learning_rate": 7.949901361733585e-06, "loss": 0.0035, "step": 45130 }, { "epoch": 0.7386075431563446, "grad_norm": 0.06773383170366287, "learning_rate": 7.948748325101916e-06, "loss": 0.0018, "step": 45140 }, { "epoch": 0.7387711691074205, "grad_norm": 0.10979906469583511, "learning_rate": 7.947595047980367e-06, "loss": 0.0012, "step": 45150 }, { "epoch": 0.7389347950584962, "grad_norm": 0.06032257899641991, "learning_rate": 7.946441530462994e-06, "loss": 0.0011, "step": 45160 }, { "epoch": 0.7390984210095721, "grad_norm": 0.019600093364715576, "learning_rate": 7.945287772643872e-06, "loss": 0.0024, "step": 45170 }, { "epoch": 0.739262046960648, "grad_norm": 0.08960779756307602, "learning_rate": 7.944133774617105e-06, "loss": 0.0024, "step": 45180 }, { "epoch": 0.7394256729117238, "grad_norm": 0.10188505053520203, "learning_rate": 7.9429795364768e-06, "loss": 0.0018, "step": 45190 }, { "epoch": 0.7395892988627997, "grad_norm": 0.07792667299509048, "learning_rate": 7.941825058317098e-06, "loss": 0.003, "step": 45200 }, { "epoch": 0.7397529248138754, "grad_norm": 0.11480510234832764, "learning_rate": 7.940670340232154e-06, "loss": 0.005, "step": 45210 }, { "epoch": 0.7399165507649513, "grad_norm": 0.07940959930419922, "learning_rate": 7.93951538231614e-06, "loss": 0.001, "step": 45220 }, { "epoch": 0.7400801767160272, "grad_norm": 0.05966443195939064, "learning_rate": 7.938360184663254e-06, "loss": 0.002, "step": 45230 }, { "epoch": 0.740243802667103, "grad_norm": 0.10636087507009506, "learning_rate": 7.93720474736771e-06, "loss": 0.0024, "step": 45240 }, { "epoch": 0.7404074286181789, "grad_norm": 0.17561720311641693, "learning_rate": 7.936049070523739e-06, "loss": 0.0021, "step": 45250 }, { "epoch": 0.7405710545692546, "grad_norm": 0.044909875839948654, "learning_rate": 7.934893154225594e-06, "loss": 0.0025, "step": 45260 }, { "epoch": 0.7407346805203305, "grad_norm": 0.09109053015708923, "learning_rate": 7.93373699856755e-06, "loss": 0.0025, "step": 45270 }, { "epoch": 0.7408983064714064, "grad_norm": 0.05211983621120453, "learning_rate": 7.932580603643896e-06, "loss": 0.0013, "step": 45280 }, { "epoch": 0.7410619324224822, "grad_norm": 0.09957781434059143, "learning_rate": 7.931423969548948e-06, "loss": 0.0029, "step": 45290 }, { "epoch": 0.7412255583735581, "grad_norm": 0.06035226210951805, "learning_rate": 7.930267096377032e-06, "loss": 0.0012, "step": 45300 }, { "epoch": 0.7413891843246339, "grad_norm": 0.15249688923358917, "learning_rate": 7.929109984222503e-06, "loss": 0.0044, "step": 45310 }, { "epoch": 0.7415528102757097, "grad_norm": 0.16975709795951843, "learning_rate": 7.927952633179728e-06, "loss": 0.0036, "step": 45320 }, { "epoch": 0.7417164362267855, "grad_norm": 0.028939656913280487, "learning_rate": 7.926795043343099e-06, "loss": 0.0038, "step": 45330 }, { "epoch": 0.7418800621778614, "grad_norm": 0.0719069316983223, "learning_rate": 7.925637214807026e-06, "loss": 0.0019, "step": 45340 }, { "epoch": 0.7420436881289373, "grad_norm": 0.024133216589689255, "learning_rate": 7.924479147665931e-06, "loss": 0.002, "step": 45350 }, { "epoch": 0.7422073140800131, "grad_norm": 0.00805605947971344, "learning_rate": 7.92332084201427e-06, "loss": 0.0015, "step": 45360 }, { "epoch": 0.7423709400310889, "grad_norm": 0.06430293619632721, "learning_rate": 7.922162297946507e-06, "loss": 0.0033, "step": 45370 }, { "epoch": 0.7425345659821647, "grad_norm": 0.12935978174209595, "learning_rate": 7.92100351555713e-06, "loss": 0.0024, "step": 45380 }, { "epoch": 0.7426981919332406, "grad_norm": 0.1596582531929016, "learning_rate": 7.91984449494064e-06, "loss": 0.0022, "step": 45390 }, { "epoch": 0.7428618178843165, "grad_norm": 0.14239265024662018, "learning_rate": 7.91868523619157e-06, "loss": 0.003, "step": 45400 }, { "epoch": 0.7430254438353923, "grad_norm": 0.051771629601716995, "learning_rate": 7.917525739404464e-06, "loss": 0.0013, "step": 45410 }, { "epoch": 0.7431890697864681, "grad_norm": 0.0413055345416069, "learning_rate": 7.916366004673882e-06, "loss": 0.0015, "step": 45420 }, { "epoch": 0.7433526957375439, "grad_norm": 0.10837849974632263, "learning_rate": 7.915206032094412e-06, "loss": 0.0029, "step": 45430 }, { "epoch": 0.7435163216886198, "grad_norm": 0.12710237503051758, "learning_rate": 7.914045821760658e-06, "loss": 0.0024, "step": 45440 }, { "epoch": 0.7436799476396957, "grad_norm": 0.03616471216082573, "learning_rate": 7.912885373767238e-06, "loss": 0.0012, "step": 45450 }, { "epoch": 0.7438435735907715, "grad_norm": 0.17294131219387054, "learning_rate": 7.911724688208801e-06, "loss": 0.0016, "step": 45460 }, { "epoch": 0.7440071995418474, "grad_norm": 0.03460134193301201, "learning_rate": 7.910563765180002e-06, "loss": 0.001, "step": 45470 }, { "epoch": 0.7441708254929231, "grad_norm": 0.11427845805883408, "learning_rate": 7.909402604775527e-06, "loss": 0.0022, "step": 45480 }, { "epoch": 0.744334451443999, "grad_norm": 0.1683957427740097, "learning_rate": 7.908241207090073e-06, "loss": 0.0024, "step": 45490 }, { "epoch": 0.7444980773950749, "grad_norm": 0.08027928322553635, "learning_rate": 7.907079572218361e-06, "loss": 0.0034, "step": 45500 }, { "epoch": 0.7446617033461507, "grad_norm": 0.004896610975265503, "learning_rate": 7.90591770025513e-06, "loss": 0.0017, "step": 45510 }, { "epoch": 0.7448253292972266, "grad_norm": 0.06502117961645126, "learning_rate": 7.90475559129514e-06, "loss": 0.0016, "step": 45520 }, { "epoch": 0.7449889552483023, "grad_norm": 0.07437731325626373, "learning_rate": 7.903593245433162e-06, "loss": 0.0014, "step": 45530 }, { "epoch": 0.7451525811993782, "grad_norm": 0.012837845832109451, "learning_rate": 7.902430662764002e-06, "loss": 0.0011, "step": 45540 }, { "epoch": 0.7453162071504541, "grad_norm": 0.02074851468205452, "learning_rate": 7.901267843382472e-06, "loss": 0.0019, "step": 45550 }, { "epoch": 0.7454798331015299, "grad_norm": 0.0744505301117897, "learning_rate": 7.900104787383407e-06, "loss": 0.0017, "step": 45560 }, { "epoch": 0.7456434590526058, "grad_norm": 0.07938183844089508, "learning_rate": 7.898941494861661e-06, "loss": 0.003, "step": 45570 }, { "epoch": 0.7458070850036815, "grad_norm": 0.05233943834900856, "learning_rate": 7.89777796591211e-06, "loss": 0.002, "step": 45580 }, { "epoch": 0.7459707109547574, "grad_norm": 0.007412207778543234, "learning_rate": 7.896614200629648e-06, "loss": 0.0046, "step": 45590 }, { "epoch": 0.7461343369058333, "grad_norm": 0.08828859031200409, "learning_rate": 7.895450199109186e-06, "loss": 0.0015, "step": 45600 }, { "epoch": 0.7462979628569091, "grad_norm": 0.048798076808452606, "learning_rate": 7.89428596144566e-06, "loss": 0.0024, "step": 45610 }, { "epoch": 0.746461588807985, "grad_norm": 0.21522916853427887, "learning_rate": 7.893121487734015e-06, "loss": 0.003, "step": 45620 }, { "epoch": 0.7466252147590607, "grad_norm": 0.047461673617362976, "learning_rate": 7.891956778069227e-06, "loss": 0.002, "step": 45630 }, { "epoch": 0.7467888407101366, "grad_norm": 0.05639554187655449, "learning_rate": 7.890791832546281e-06, "loss": 0.0032, "step": 45640 }, { "epoch": 0.7469524666612125, "grad_norm": 0.31747567653656006, "learning_rate": 7.88962665126019e-06, "loss": 0.0038, "step": 45650 }, { "epoch": 0.7471160926122883, "grad_norm": 0.023616468533873558, "learning_rate": 7.88846123430598e-06, "loss": 0.0015, "step": 45660 }, { "epoch": 0.7472797185633642, "grad_norm": 0.1407870650291443, "learning_rate": 7.8872955817787e-06, "loss": 0.0017, "step": 45670 }, { "epoch": 0.74744334451444, "grad_norm": 0.07562875747680664, "learning_rate": 7.886129693773416e-06, "loss": 0.0018, "step": 45680 }, { "epoch": 0.7476069704655158, "grad_norm": 0.013998042792081833, "learning_rate": 7.884963570385214e-06, "loss": 0.0013, "step": 45690 }, { "epoch": 0.7477705964165917, "grad_norm": 0.04817408323287964, "learning_rate": 7.883797211709196e-06, "loss": 0.0012, "step": 45700 }, { "epoch": 0.7479342223676675, "grad_norm": 0.19806192815303802, "learning_rate": 7.88263061784049e-06, "loss": 0.0033, "step": 45710 }, { "epoch": 0.7480978483187434, "grad_norm": 0.10142921656370163, "learning_rate": 7.88146378887424e-06, "loss": 0.0022, "step": 45720 }, { "epoch": 0.7482614742698191, "grad_norm": 0.06292014569044113, "learning_rate": 7.880296724905605e-06, "loss": 0.0036, "step": 45730 }, { "epoch": 0.748425100220895, "grad_norm": 0.10621743649244308, "learning_rate": 7.879129426029771e-06, "loss": 0.0027, "step": 45740 }, { "epoch": 0.7485887261719709, "grad_norm": 0.06463120877742767, "learning_rate": 7.877961892341934e-06, "loss": 0.0031, "step": 45750 }, { "epoch": 0.7487523521230467, "grad_norm": 0.09279613196849823, "learning_rate": 7.876794123937318e-06, "loss": 0.0023, "step": 45760 }, { "epoch": 0.7489159780741226, "grad_norm": 0.09475941210985184, "learning_rate": 7.875626120911162e-06, "loss": 0.0029, "step": 45770 }, { "epoch": 0.7490796040251984, "grad_norm": 0.11217482388019562, "learning_rate": 7.874457883358722e-06, "loss": 0.0017, "step": 45780 }, { "epoch": 0.7492432299762742, "grad_norm": 0.0697907954454422, "learning_rate": 7.873289411375276e-06, "loss": 0.0015, "step": 45790 }, { "epoch": 0.7494068559273501, "grad_norm": 0.1602737009525299, "learning_rate": 7.872120705056124e-06, "loss": 0.0035, "step": 45800 }, { "epoch": 0.7495704818784259, "grad_norm": 0.09871604293584824, "learning_rate": 7.870951764496575e-06, "loss": 0.0022, "step": 45810 }, { "epoch": 0.7497341078295018, "grad_norm": 0.04293878749012947, "learning_rate": 7.869782589791971e-06, "loss": 0.0013, "step": 45820 }, { "epoch": 0.7498977337805776, "grad_norm": 0.04196852818131447, "learning_rate": 7.868613181037663e-06, "loss": 0.0013, "step": 45830 }, { "epoch": 0.7500613597316534, "grad_norm": 0.07524515688419342, "learning_rate": 7.867443538329023e-06, "loss": 0.003, "step": 45840 }, { "epoch": 0.7502249856827293, "grad_norm": 0.07245465368032455, "learning_rate": 7.866273661761445e-06, "loss": 0.0018, "step": 45850 }, { "epoch": 0.7503886116338051, "grad_norm": 0.06912210583686829, "learning_rate": 7.865103551430338e-06, "loss": 0.0019, "step": 45860 }, { "epoch": 0.750552237584881, "grad_norm": 0.04246789216995239, "learning_rate": 7.863933207431133e-06, "loss": 0.0023, "step": 45870 }, { "epoch": 0.7507158635359568, "grad_norm": 0.078510582447052, "learning_rate": 7.86276262985928e-06, "loss": 0.0035, "step": 45880 }, { "epoch": 0.7508794894870326, "grad_norm": 0.049804095178842545, "learning_rate": 7.861591818810246e-06, "loss": 0.0024, "step": 45890 }, { "epoch": 0.7510431154381085, "grad_norm": 0.040483035147190094, "learning_rate": 7.86042077437952e-06, "loss": 0.003, "step": 45900 }, { "epoch": 0.7512067413891843, "grad_norm": 0.09779264032840729, "learning_rate": 7.859249496662607e-06, "loss": 0.0057, "step": 45910 }, { "epoch": 0.7513703673402602, "grad_norm": 0.2889154851436615, "learning_rate": 7.858077985755034e-06, "loss": 0.006, "step": 45920 }, { "epoch": 0.751533993291336, "grad_norm": 0.22923921048641205, "learning_rate": 7.856906241752345e-06, "loss": 0.003, "step": 45930 }, { "epoch": 0.7516976192424119, "grad_norm": 0.04952012747526169, "learning_rate": 7.855734264750102e-06, "loss": 0.0035, "step": 45940 }, { "epoch": 0.7518612451934877, "grad_norm": 0.3482204079627991, "learning_rate": 7.854562054843888e-06, "loss": 0.0019, "step": 45950 }, { "epoch": 0.7520248711445635, "grad_norm": 0.08798300474882126, "learning_rate": 7.853389612129305e-06, "loss": 0.0037, "step": 45960 }, { "epoch": 0.7521884970956394, "grad_norm": 0.05743544176220894, "learning_rate": 7.852216936701972e-06, "loss": 0.002, "step": 45970 }, { "epoch": 0.7523521230467152, "grad_norm": 0.09211505204439163, "learning_rate": 7.851044028657532e-06, "loss": 0.0028, "step": 45980 }, { "epoch": 0.7525157489977911, "grad_norm": 0.04254554584622383, "learning_rate": 7.849870888091642e-06, "loss": 0.0013, "step": 45990 }, { "epoch": 0.7526793749488669, "grad_norm": 0.0790761262178421, "learning_rate": 7.848697515099976e-06, "loss": 0.0016, "step": 46000 }, { "epoch": 0.7528430008999427, "grad_norm": 0.06166630983352661, "learning_rate": 7.847523909778234e-06, "loss": 0.0023, "step": 46010 }, { "epoch": 0.7530066268510186, "grad_norm": 0.0754413977265358, "learning_rate": 7.84635007222213e-06, "loss": 0.002, "step": 46020 }, { "epoch": 0.7531702528020944, "grad_norm": 0.09435712546110153, "learning_rate": 7.845176002527399e-06, "loss": 0.0025, "step": 46030 }, { "epoch": 0.7533338787531703, "grad_norm": 0.06301447004079819, "learning_rate": 7.844001700789791e-06, "loss": 0.0014, "step": 46040 }, { "epoch": 0.7534975047042461, "grad_norm": 0.05977576971054077, "learning_rate": 7.842827167105083e-06, "loss": 0.003, "step": 46050 }, { "epoch": 0.7536611306553219, "grad_norm": 0.06579329073429108, "learning_rate": 7.841652401569062e-06, "loss": 0.0025, "step": 46060 }, { "epoch": 0.7538247566063978, "grad_norm": 0.05518899857997894, "learning_rate": 7.84047740427754e-06, "loss": 0.0021, "step": 46070 }, { "epoch": 0.7539883825574736, "grad_norm": 0.15784424543380737, "learning_rate": 7.839302175326344e-06, "loss": 0.002, "step": 46080 }, { "epoch": 0.7541520085085495, "grad_norm": 0.055536169558763504, "learning_rate": 7.838126714811323e-06, "loss": 0.0018, "step": 46090 }, { "epoch": 0.7543156344596254, "grad_norm": 0.09416177868843079, "learning_rate": 7.836951022828343e-06, "loss": 0.0022, "step": 46100 }, { "epoch": 0.7544792604107011, "grad_norm": 0.05534076318144798, "learning_rate": 7.83577509947329e-06, "loss": 0.0017, "step": 46110 }, { "epoch": 0.754642886361777, "grad_norm": 0.09154891222715378, "learning_rate": 7.834598944842068e-06, "loss": 0.0034, "step": 46120 }, { "epoch": 0.7548065123128528, "grad_norm": 0.05945158377289772, "learning_rate": 7.833422559030602e-06, "loss": 0.0024, "step": 46130 }, { "epoch": 0.7549701382639287, "grad_norm": 0.07742059230804443, "learning_rate": 7.83224594213483e-06, "loss": 0.0024, "step": 46140 }, { "epoch": 0.7551337642150046, "grad_norm": 0.07154424488544464, "learning_rate": 7.831069094250715e-06, "loss": 0.0023, "step": 46150 }, { "epoch": 0.7552973901660803, "grad_norm": 0.15334156155586243, "learning_rate": 7.829892015474237e-06, "loss": 0.0016, "step": 46160 }, { "epoch": 0.7554610161171562, "grad_norm": 0.058418042957782745, "learning_rate": 7.828714705901395e-06, "loss": 0.0022, "step": 46170 }, { "epoch": 0.755624642068232, "grad_norm": 0.06077789515256882, "learning_rate": 7.827537165628206e-06, "loss": 0.0017, "step": 46180 }, { "epoch": 0.7557882680193079, "grad_norm": 0.08221083134412766, "learning_rate": 7.826359394750706e-06, "loss": 0.0011, "step": 46190 }, { "epoch": 0.7559518939703836, "grad_norm": 0.03951855003833771, "learning_rate": 7.825181393364949e-06, "loss": 0.0026, "step": 46200 }, { "epoch": 0.7561155199214595, "grad_norm": 0.09651871770620346, "learning_rate": 7.82400316156701e-06, "loss": 0.0015, "step": 46210 }, { "epoch": 0.7562791458725354, "grad_norm": 0.0519305020570755, "learning_rate": 7.82282469945298e-06, "loss": 0.0013, "step": 46220 }, { "epoch": 0.7564427718236112, "grad_norm": 0.049298834055662155, "learning_rate": 7.821646007118974e-06, "loss": 0.0018, "step": 46230 }, { "epoch": 0.7566063977746871, "grad_norm": 0.058265797793865204, "learning_rate": 7.820467084661118e-06, "loss": 0.0027, "step": 46240 }, { "epoch": 0.7567700237257629, "grad_norm": 0.12728208303451538, "learning_rate": 7.819287932175563e-06, "loss": 0.0033, "step": 46250 }, { "epoch": 0.7569336496768387, "grad_norm": 0.05667201802134514, "learning_rate": 7.818108549758477e-06, "loss": 0.0028, "step": 46260 }, { "epoch": 0.7570972756279146, "grad_norm": 0.06329859048128128, "learning_rate": 7.816928937506045e-06, "loss": 0.0048, "step": 46270 }, { "epoch": 0.7572609015789904, "grad_norm": 0.039452340453863144, "learning_rate": 7.815749095514474e-06, "loss": 0.002, "step": 46280 }, { "epoch": 0.7574245275300663, "grad_norm": 0.028742201626300812, "learning_rate": 7.814569023879985e-06, "loss": 0.0024, "step": 46290 }, { "epoch": 0.7575881534811421, "grad_norm": 0.23006078600883484, "learning_rate": 7.813388722698823e-06, "loss": 0.0031, "step": 46300 }, { "epoch": 0.7577517794322179, "grad_norm": 0.0632338598370552, "learning_rate": 7.81220819206725e-06, "loss": 0.0028, "step": 46310 }, { "epoch": 0.7579154053832938, "grad_norm": 0.03981895372271538, "learning_rate": 7.811027432081543e-06, "loss": 0.0052, "step": 46320 }, { "epoch": 0.7580790313343696, "grad_norm": 0.08559157699346542, "learning_rate": 7.809846442838003e-06, "loss": 0.0017, "step": 46330 }, { "epoch": 0.7582426572854455, "grad_norm": 0.0455181747674942, "learning_rate": 7.808665224432948e-06, "loss": 0.0027, "step": 46340 }, { "epoch": 0.7584062832365213, "grad_norm": 0.05995987728238106, "learning_rate": 7.807483776962712e-06, "loss": 0.0025, "step": 46350 }, { "epoch": 0.7585699091875971, "grad_norm": 0.033405058085918427, "learning_rate": 7.806302100523653e-06, "loss": 0.0021, "step": 46360 }, { "epoch": 0.758733535138673, "grad_norm": 0.06432349979877472, "learning_rate": 7.80512019521214e-06, "loss": 0.0015, "step": 46370 }, { "epoch": 0.7588971610897488, "grad_norm": 0.04573766142129898, "learning_rate": 7.80393806112457e-06, "loss": 0.0014, "step": 46380 }, { "epoch": 0.7590607870408247, "grad_norm": 0.03326824679970741, "learning_rate": 7.80275569835735e-06, "loss": 0.0013, "step": 46390 }, { "epoch": 0.7592244129919005, "grad_norm": 0.05499792471528053, "learning_rate": 7.801573107006912e-06, "loss": 0.0024, "step": 46400 }, { "epoch": 0.7593880389429764, "grad_norm": 0.09602683037519455, "learning_rate": 7.8003902871697e-06, "loss": 0.0032, "step": 46410 }, { "epoch": 0.7595516648940522, "grad_norm": 0.018890516832470894, "learning_rate": 7.799207238942188e-06, "loss": 0.0024, "step": 46420 }, { "epoch": 0.759715290845128, "grad_norm": 0.1350695937871933, "learning_rate": 7.798023962420856e-06, "loss": 0.0035, "step": 46430 }, { "epoch": 0.7598789167962039, "grad_norm": 0.08974120020866394, "learning_rate": 7.79684045770221e-06, "loss": 0.0021, "step": 46440 }, { "epoch": 0.7600425427472797, "grad_norm": 0.052961576730012894, "learning_rate": 7.79565672488277e-06, "loss": 0.0041, "step": 46450 }, { "epoch": 0.7602061686983556, "grad_norm": 0.0438653938472271, "learning_rate": 7.794472764059082e-06, "loss": 0.0022, "step": 46460 }, { "epoch": 0.7603697946494314, "grad_norm": 0.0634981170296669, "learning_rate": 7.7932885753277e-06, "loss": 0.0023, "step": 46470 }, { "epoch": 0.7605334206005072, "grad_norm": 0.1865743100643158, "learning_rate": 7.792104158785208e-06, "loss": 0.0034, "step": 46480 }, { "epoch": 0.7606970465515831, "grad_norm": 0.07153315842151642, "learning_rate": 7.7909195145282e-06, "loss": 0.0029, "step": 46490 }, { "epoch": 0.7608606725026589, "grad_norm": 0.07196561247110367, "learning_rate": 7.789734642653291e-06, "loss": 0.0017, "step": 46500 }, { "epoch": 0.7610242984537348, "grad_norm": 0.04044812172651291, "learning_rate": 7.788549543257116e-06, "loss": 0.0022, "step": 46510 }, { "epoch": 0.7611879244048106, "grad_norm": 0.07842089235782623, "learning_rate": 7.787364216436326e-06, "loss": 0.0017, "step": 46520 }, { "epoch": 0.7613515503558864, "grad_norm": 0.19229631125926971, "learning_rate": 7.786178662287596e-06, "loss": 0.0041, "step": 46530 }, { "epoch": 0.7615151763069623, "grad_norm": 0.07726782560348511, "learning_rate": 7.784992880907613e-06, "loss": 0.0028, "step": 46540 }, { "epoch": 0.7616788022580381, "grad_norm": 0.04832502454519272, "learning_rate": 7.783806872393086e-06, "loss": 0.0015, "step": 46550 }, { "epoch": 0.761842428209114, "grad_norm": 0.0372101254761219, "learning_rate": 7.782620636840743e-06, "loss": 0.0023, "step": 46560 }, { "epoch": 0.7620060541601898, "grad_norm": 0.10387596487998962, "learning_rate": 7.781434174347327e-06, "loss": 0.0028, "step": 46570 }, { "epoch": 0.7621696801112656, "grad_norm": 0.04422289505600929, "learning_rate": 7.780247485009604e-06, "loss": 0.0015, "step": 46580 }, { "epoch": 0.7623333060623415, "grad_norm": 0.028675392270088196, "learning_rate": 7.779060568924355e-06, "loss": 0.0025, "step": 46590 }, { "epoch": 0.7624969320134173, "grad_norm": 0.06437677145004272, "learning_rate": 7.77787342618838e-06, "loss": 0.0022, "step": 46600 }, { "epoch": 0.7626605579644932, "grad_norm": 0.20517979562282562, "learning_rate": 7.776686056898501e-06, "loss": 0.0054, "step": 46610 }, { "epoch": 0.762824183915569, "grad_norm": 0.09521616995334625, "learning_rate": 7.775498461151553e-06, "loss": 0.0037, "step": 46620 }, { "epoch": 0.7629878098666448, "grad_norm": 0.11395671963691711, "learning_rate": 7.774310639044395e-06, "loss": 0.002, "step": 46630 }, { "epoch": 0.7631514358177207, "grad_norm": 0.21778123080730438, "learning_rate": 7.7731225906739e-06, "loss": 0.0025, "step": 46640 }, { "epoch": 0.7633150617687965, "grad_norm": 0.15097777545452118, "learning_rate": 7.77193431613696e-06, "loss": 0.0015, "step": 46650 }, { "epoch": 0.7634786877198724, "grad_norm": 0.032192833721637726, "learning_rate": 7.77074581553049e-06, "loss": 0.0024, "step": 46660 }, { "epoch": 0.7636423136709483, "grad_norm": 0.1414719969034195, "learning_rate": 7.769557088951419e-06, "loss": 0.0016, "step": 46670 }, { "epoch": 0.763805939622024, "grad_norm": 0.08889006078243256, "learning_rate": 7.76836813649669e-06, "loss": 0.0026, "step": 46680 }, { "epoch": 0.7639695655730999, "grad_norm": 0.06315800547599792, "learning_rate": 7.76717895826328e-06, "loss": 0.0018, "step": 46690 }, { "epoch": 0.7641331915241757, "grad_norm": 0.05180824175477028, "learning_rate": 7.765989554348166e-06, "loss": 0.0022, "step": 46700 }, { "epoch": 0.7642968174752516, "grad_norm": 0.08959269523620605, "learning_rate": 7.764799924848354e-06, "loss": 0.0024, "step": 46710 }, { "epoch": 0.7644604434263275, "grad_norm": 0.2625131905078888, "learning_rate": 7.763610069860869e-06, "loss": 0.0027, "step": 46720 }, { "epoch": 0.7646240693774032, "grad_norm": 0.04443061724305153, "learning_rate": 7.762419989482748e-06, "loss": 0.0021, "step": 46730 }, { "epoch": 0.7647876953284791, "grad_norm": 0.07516305148601532, "learning_rate": 7.76122968381105e-06, "loss": 0.0028, "step": 46740 }, { "epoch": 0.7649513212795549, "grad_norm": 0.02677128277719021, "learning_rate": 7.760039152942856e-06, "loss": 0.002, "step": 46750 }, { "epoch": 0.7651149472306308, "grad_norm": 0.08414687216281891, "learning_rate": 7.758848396975258e-06, "loss": 0.0022, "step": 46760 }, { "epoch": 0.7652785731817067, "grad_norm": 0.10644156485795975, "learning_rate": 7.757657416005373e-06, "loss": 0.003, "step": 46770 }, { "epoch": 0.7654421991327824, "grad_norm": 0.20370633900165558, "learning_rate": 7.756466210130329e-06, "loss": 0.0038, "step": 46780 }, { "epoch": 0.7656058250838583, "grad_norm": 0.18757426738739014, "learning_rate": 7.75527477944728e-06, "loss": 0.0029, "step": 46790 }, { "epoch": 0.7657694510349341, "grad_norm": 0.2388020008802414, "learning_rate": 7.754083124053394e-06, "loss": 0.002, "step": 46800 }, { "epoch": 0.76593307698601, "grad_norm": 0.2437874674797058, "learning_rate": 7.752891244045859e-06, "loss": 0.0017, "step": 46810 }, { "epoch": 0.7660967029370859, "grad_norm": 0.04364819452166557, "learning_rate": 7.75169913952188e-06, "loss": 0.0038, "step": 46820 }, { "epoch": 0.7662603288881616, "grad_norm": 0.1228971779346466, "learning_rate": 7.750506810578682e-06, "loss": 0.002, "step": 46830 }, { "epoch": 0.7664239548392375, "grad_norm": 0.09557469934225082, "learning_rate": 7.749314257313506e-06, "loss": 0.004, "step": 46840 }, { "epoch": 0.7665875807903133, "grad_norm": 0.0702197328209877, "learning_rate": 7.748121479823614e-06, "loss": 0.0014, "step": 46850 }, { "epoch": 0.7667512067413892, "grad_norm": 0.06272779405117035, "learning_rate": 7.746928478206283e-06, "loss": 0.0018, "step": 46860 }, { "epoch": 0.7669148326924651, "grad_norm": 0.09017270058393478, "learning_rate": 7.745735252558811e-06, "loss": 0.0025, "step": 46870 }, { "epoch": 0.7670784586435409, "grad_norm": 0.10913238674402237, "learning_rate": 7.744541802978514e-06, "loss": 0.0021, "step": 46880 }, { "epoch": 0.7672420845946167, "grad_norm": 0.1820211559534073, "learning_rate": 7.743348129562724e-06, "loss": 0.002, "step": 46890 }, { "epoch": 0.7674057105456925, "grad_norm": 0.12953536212444305, "learning_rate": 7.742154232408796e-06, "loss": 0.0028, "step": 46900 }, { "epoch": 0.7675693364967684, "grad_norm": 0.03876791149377823, "learning_rate": 7.740960111614097e-06, "loss": 0.0026, "step": 46910 }, { "epoch": 0.7677329624478443, "grad_norm": 0.09419603645801544, "learning_rate": 7.73976576727602e-06, "loss": 0.002, "step": 46920 }, { "epoch": 0.76789658839892, "grad_norm": 0.08967334777116776, "learning_rate": 7.738571199491965e-06, "loss": 0.0027, "step": 46930 }, { "epoch": 0.7680602143499959, "grad_norm": 0.11530376225709915, "learning_rate": 7.737376408359362e-06, "loss": 0.0036, "step": 46940 }, { "epoch": 0.7682238403010717, "grad_norm": 0.02665570192039013, "learning_rate": 7.736181393975653e-06, "loss": 0.0029, "step": 46950 }, { "epoch": 0.7683874662521476, "grad_norm": 0.08262048661708832, "learning_rate": 7.734986156438296e-06, "loss": 0.0037, "step": 46960 }, { "epoch": 0.7685510922032235, "grad_norm": 0.06585193425416946, "learning_rate": 7.733790695844776e-06, "loss": 0.0017, "step": 46970 }, { "epoch": 0.7687147181542993, "grad_norm": 0.06354914605617523, "learning_rate": 7.732595012292587e-06, "loss": 0.0015, "step": 46980 }, { "epoch": 0.7688783441053751, "grad_norm": 0.20126089453697205, "learning_rate": 7.731399105879246e-06, "loss": 0.0016, "step": 46990 }, { "epoch": 0.7690419700564509, "grad_norm": 0.07253695279359818, "learning_rate": 7.730202976702288e-06, "loss": 0.0028, "step": 47000 }, { "epoch": 0.7692055960075268, "grad_norm": 0.025956712663173676, "learning_rate": 7.729006624859263e-06, "loss": 0.0018, "step": 47010 }, { "epoch": 0.7693692219586027, "grad_norm": 0.018495842814445496, "learning_rate": 7.727810050447742e-06, "loss": 0.0015, "step": 47020 }, { "epoch": 0.7695328479096785, "grad_norm": 0.07379436492919922, "learning_rate": 7.726613253565315e-06, "loss": 0.0031, "step": 47030 }, { "epoch": 0.7696964738607543, "grad_norm": 0.02228795364499092, "learning_rate": 7.725416234309589e-06, "loss": 0.0021, "step": 47040 }, { "epoch": 0.7698600998118301, "grad_norm": 0.2120303362607956, "learning_rate": 7.724218992778185e-06, "loss": 0.0063, "step": 47050 }, { "epoch": 0.770023725762906, "grad_norm": 0.050658971071243286, "learning_rate": 7.72302152906875e-06, "loss": 0.0016, "step": 47060 }, { "epoch": 0.7701873517139818, "grad_norm": 0.03249619901180267, "learning_rate": 7.721823843278944e-06, "loss": 0.0034, "step": 47070 }, { "epoch": 0.7703509776650577, "grad_norm": 0.031035101041197777, "learning_rate": 7.720625935506445e-06, "loss": 0.0015, "step": 47080 }, { "epoch": 0.7705146036161336, "grad_norm": 0.12945032119750977, "learning_rate": 7.719427805848952e-06, "loss": 0.0022, "step": 47090 }, { "epoch": 0.7706782295672093, "grad_norm": 0.04619685932993889, "learning_rate": 7.718229454404178e-06, "loss": 0.0027, "step": 47100 }, { "epoch": 0.7708418555182852, "grad_norm": 0.01806194894015789, "learning_rate": 7.717030881269858e-06, "loss": 0.0021, "step": 47110 }, { "epoch": 0.771005481469361, "grad_norm": 0.06549072265625, "learning_rate": 7.715832086543744e-06, "loss": 0.0017, "step": 47120 }, { "epoch": 0.7711691074204369, "grad_norm": 0.021185988560318947, "learning_rate": 7.714633070323607e-06, "loss": 0.0041, "step": 47130 }, { "epoch": 0.7713327333715128, "grad_norm": 0.17615261673927307, "learning_rate": 7.71343383270723e-06, "loss": 0.0024, "step": 47140 }, { "epoch": 0.7714963593225885, "grad_norm": 0.07218621671199799, "learning_rate": 7.712234373792423e-06, "loss": 0.0027, "step": 47150 }, { "epoch": 0.7716599852736644, "grad_norm": 0.10249637812376022, "learning_rate": 7.711034693677008e-06, "loss": 0.002, "step": 47160 }, { "epoch": 0.7718236112247402, "grad_norm": 0.061467766761779785, "learning_rate": 7.709834792458826e-06, "loss": 0.0016, "step": 47170 }, { "epoch": 0.7719872371758161, "grad_norm": 0.08059611171483994, "learning_rate": 7.70863467023574e-06, "loss": 0.0017, "step": 47180 }, { "epoch": 0.772150863126892, "grad_norm": 0.20557264983654022, "learning_rate": 7.707434327105625e-06, "loss": 0.0031, "step": 47190 }, { "epoch": 0.7723144890779677, "grad_norm": 0.11062387377023697, "learning_rate": 7.706233763166377e-06, "loss": 0.0033, "step": 47200 }, { "epoch": 0.7724781150290436, "grad_norm": 0.05701605603098869, "learning_rate": 7.70503297851591e-06, "loss": 0.0016, "step": 47210 }, { "epoch": 0.7726417409801194, "grad_norm": 0.06543687731027603, "learning_rate": 7.703831973252158e-06, "loss": 0.0024, "step": 47220 }, { "epoch": 0.7728053669311953, "grad_norm": 0.4352649748325348, "learning_rate": 7.70263074747307e-06, "loss": 0.0029, "step": 47230 }, { "epoch": 0.7729689928822712, "grad_norm": 0.3466147780418396, "learning_rate": 7.701429301276612e-06, "loss": 0.0023, "step": 47240 }, { "epoch": 0.7731326188333469, "grad_norm": 0.1285238116979599, "learning_rate": 7.700227634760773e-06, "loss": 0.0027, "step": 47250 }, { "epoch": 0.7732962447844228, "grad_norm": 0.10287115722894669, "learning_rate": 7.699025748023553e-06, "loss": 0.0012, "step": 47260 }, { "epoch": 0.7734598707354986, "grad_norm": 0.1444256603717804, "learning_rate": 7.697823641162978e-06, "loss": 0.0039, "step": 47270 }, { "epoch": 0.7736234966865745, "grad_norm": 0.06911500543355942, "learning_rate": 7.696621314277083e-06, "loss": 0.002, "step": 47280 }, { "epoch": 0.7737871226376504, "grad_norm": 0.032964158803224564, "learning_rate": 7.695418767463931e-06, "loss": 0.0031, "step": 47290 }, { "epoch": 0.7739507485887261, "grad_norm": 0.014431367628276348, "learning_rate": 7.694216000821592e-06, "loss": 0.0022, "step": 47300 }, { "epoch": 0.774114374539802, "grad_norm": 0.04793722182512283, "learning_rate": 7.693013014448166e-06, "loss": 0.0015, "step": 47310 }, { "epoch": 0.7742780004908778, "grad_norm": 0.10975118726491928, "learning_rate": 7.691809808441758e-06, "loss": 0.0019, "step": 47320 }, { "epoch": 0.7744416264419537, "grad_norm": 0.12805026769638062, "learning_rate": 7.6906063829005e-06, "loss": 0.0023, "step": 47330 }, { "epoch": 0.7746052523930296, "grad_norm": 0.04912872612476349, "learning_rate": 7.689402737922542e-06, "loss": 0.0024, "step": 47340 }, { "epoch": 0.7747688783441053, "grad_norm": 0.06140722706913948, "learning_rate": 7.688198873606046e-06, "loss": 0.0018, "step": 47350 }, { "epoch": 0.7749325042951812, "grad_norm": 0.09005114436149597, "learning_rate": 7.686994790049197e-06, "loss": 0.0018, "step": 47360 }, { "epoch": 0.775096130246257, "grad_norm": 0.14339587092399597, "learning_rate": 7.685790487350194e-06, "loss": 0.002, "step": 47370 }, { "epoch": 0.7752597561973329, "grad_norm": 0.16206015646457672, "learning_rate": 7.684585965607255e-06, "loss": 0.0017, "step": 47380 }, { "epoch": 0.7754233821484088, "grad_norm": 0.058691803365945816, "learning_rate": 7.68338122491862e-06, "loss": 0.0015, "step": 47390 }, { "epoch": 0.7755870080994846, "grad_norm": 0.04804212599992752, "learning_rate": 7.682176265382541e-06, "loss": 0.0016, "step": 47400 }, { "epoch": 0.7757506340505604, "grad_norm": 0.04845782741904259, "learning_rate": 7.680971087097293e-06, "loss": 0.0033, "step": 47410 }, { "epoch": 0.7759142600016362, "grad_norm": 0.10536957532167435, "learning_rate": 7.679765690161165e-06, "loss": 0.0017, "step": 47420 }, { "epoch": 0.7760778859527121, "grad_norm": 0.06472755968570709, "learning_rate": 7.678560074672461e-06, "loss": 0.0017, "step": 47430 }, { "epoch": 0.776241511903788, "grad_norm": 0.19850881397724152, "learning_rate": 7.677354240729514e-06, "loss": 0.0034, "step": 47440 }, { "epoch": 0.7764051378548638, "grad_norm": 0.07329036295413971, "learning_rate": 7.676148188430664e-06, "loss": 0.0019, "step": 47450 }, { "epoch": 0.7765687638059396, "grad_norm": 0.12139184772968292, "learning_rate": 7.67494191787427e-06, "loss": 0.0037, "step": 47460 }, { "epoch": 0.7767323897570154, "grad_norm": 0.04895130172371864, "learning_rate": 7.673735429158717e-06, "loss": 0.0035, "step": 47470 }, { "epoch": 0.7768960157080913, "grad_norm": 0.1295814961194992, "learning_rate": 7.672528722382398e-06, "loss": 0.0018, "step": 47480 }, { "epoch": 0.7770596416591672, "grad_norm": 0.11648079752922058, "learning_rate": 7.67132179764373e-06, "loss": 0.0032, "step": 47490 }, { "epoch": 0.777223267610243, "grad_norm": 0.13318321108818054, "learning_rate": 7.670114655041144e-06, "loss": 0.0016, "step": 47500 }, { "epoch": 0.7773868935613188, "grad_norm": 0.024613745510578156, "learning_rate": 7.668907294673092e-06, "loss": 0.0019, "step": 47510 }, { "epoch": 0.7775505195123946, "grad_norm": 0.07957175374031067, "learning_rate": 7.66769971663804e-06, "loss": 0.0017, "step": 47520 }, { "epoch": 0.7777141454634705, "grad_norm": 0.07746037095785141, "learning_rate": 7.666491921034478e-06, "loss": 0.0025, "step": 47530 }, { "epoch": 0.7778777714145464, "grad_norm": 0.15120545029640198, "learning_rate": 7.665283907960906e-06, "loss": 0.0045, "step": 47540 }, { "epoch": 0.7780413973656222, "grad_norm": 0.04857835918664932, "learning_rate": 7.664075677515845e-06, "loss": 0.0019, "step": 47550 }, { "epoch": 0.778205023316698, "grad_norm": 0.07643720507621765, "learning_rate": 7.662867229797837e-06, "loss": 0.0024, "step": 47560 }, { "epoch": 0.7783686492677738, "grad_norm": 0.07395889610052109, "learning_rate": 7.661658564905437e-06, "loss": 0.0024, "step": 47570 }, { "epoch": 0.7785322752188497, "grad_norm": 0.06251849979162216, "learning_rate": 7.660449682937222e-06, "loss": 0.0029, "step": 47580 }, { "epoch": 0.7786959011699256, "grad_norm": 0.08747665584087372, "learning_rate": 7.65924058399178e-06, "loss": 0.0027, "step": 47590 }, { "epoch": 0.7788595271210014, "grad_norm": 0.0925385132431984, "learning_rate": 7.658031268167724e-06, "loss": 0.0014, "step": 47600 }, { "epoch": 0.7790231530720773, "grad_norm": 0.04613550752401352, "learning_rate": 7.656821735563683e-06, "loss": 0.0013, "step": 47610 }, { "epoch": 0.779186779023153, "grad_norm": 0.1335994005203247, "learning_rate": 7.655611986278298e-06, "loss": 0.0028, "step": 47620 }, { "epoch": 0.7793504049742289, "grad_norm": 0.09227462857961655, "learning_rate": 7.654402020410236e-06, "loss": 0.0016, "step": 47630 }, { "epoch": 0.7795140309253048, "grad_norm": 0.10981418937444687, "learning_rate": 7.653191838058176e-06, "loss": 0.002, "step": 47640 }, { "epoch": 0.7796776568763806, "grad_norm": 0.4968116879463196, "learning_rate": 7.651981439320816e-06, "loss": 0.0022, "step": 47650 }, { "epoch": 0.7798412828274565, "grad_norm": 0.12973618507385254, "learning_rate": 7.650770824296873e-06, "loss": 0.0019, "step": 47660 }, { "epoch": 0.7800049087785322, "grad_norm": 0.07794830203056335, "learning_rate": 7.649559993085083e-06, "loss": 0.0023, "step": 47670 }, { "epoch": 0.7801685347296081, "grad_norm": 0.03202817216515541, "learning_rate": 7.648348945784193e-06, "loss": 0.0034, "step": 47680 }, { "epoch": 0.780332160680684, "grad_norm": 0.09393087774515152, "learning_rate": 7.647137682492972e-06, "loss": 0.0024, "step": 47690 }, { "epoch": 0.7804957866317598, "grad_norm": 0.1074301078915596, "learning_rate": 7.645926203310208e-06, "loss": 0.0017, "step": 47700 }, { "epoch": 0.7806594125828357, "grad_norm": 0.15869303047657013, "learning_rate": 7.644714508334705e-06, "loss": 0.0036, "step": 47710 }, { "epoch": 0.7808230385339114, "grad_norm": 0.06684724241495132, "learning_rate": 7.643502597665285e-06, "loss": 0.0027, "step": 47720 }, { "epoch": 0.7809866644849873, "grad_norm": 0.09461407363414764, "learning_rate": 7.642290471400788e-06, "loss": 0.0027, "step": 47730 }, { "epoch": 0.7811502904360632, "grad_norm": 0.0623343363404274, "learning_rate": 7.64107812964007e-06, "loss": 0.0041, "step": 47740 }, { "epoch": 0.781313916387139, "grad_norm": 0.036193910986185074, "learning_rate": 7.639865572482004e-06, "loss": 0.0011, "step": 47750 }, { "epoch": 0.7814775423382149, "grad_norm": 0.07433672249317169, "learning_rate": 7.638652800025484e-06, "loss": 0.0027, "step": 47760 }, { "epoch": 0.7816411682892906, "grad_norm": 0.038148537278175354, "learning_rate": 7.63743981236942e-06, "loss": 0.0032, "step": 47770 }, { "epoch": 0.7818047942403665, "grad_norm": 0.1260686218738556, "learning_rate": 7.636226609612739e-06, "loss": 0.0049, "step": 47780 }, { "epoch": 0.7819684201914424, "grad_norm": 0.059843964874744415, "learning_rate": 7.635013191854383e-06, "loss": 0.0014, "step": 47790 }, { "epoch": 0.7821320461425182, "grad_norm": 0.14283470809459686, "learning_rate": 7.633799559193317e-06, "loss": 0.0038, "step": 47800 }, { "epoch": 0.7822956720935941, "grad_norm": 0.14687062799930573, "learning_rate": 7.63258571172852e-06, "loss": 0.0037, "step": 47810 }, { "epoch": 0.7824592980446698, "grad_norm": 0.025399206206202507, "learning_rate": 7.631371649558988e-06, "loss": 0.003, "step": 47820 }, { "epoch": 0.7826229239957457, "grad_norm": 0.057816196233034134, "learning_rate": 7.630157372783738e-06, "loss": 0.0011, "step": 47830 }, { "epoch": 0.7827865499468216, "grad_norm": 0.06773620843887329, "learning_rate": 7.628942881501802e-06, "loss": 0.0026, "step": 47840 }, { "epoch": 0.7829501758978974, "grad_norm": 0.039248026907444, "learning_rate": 7.627728175812228e-06, "loss": 0.002, "step": 47850 }, { "epoch": 0.7831138018489733, "grad_norm": 0.07992345094680786, "learning_rate": 7.626513255814085e-06, "loss": 0.0021, "step": 47860 }, { "epoch": 0.783277427800049, "grad_norm": 0.11569804698228836, "learning_rate": 7.625298121606457e-06, "loss": 0.0029, "step": 47870 }, { "epoch": 0.7834410537511249, "grad_norm": 0.05941597372293472, "learning_rate": 7.624082773288446e-06, "loss": 0.0025, "step": 47880 }, { "epoch": 0.7836046797022008, "grad_norm": 0.19366200268268585, "learning_rate": 7.622867210959171e-06, "loss": 0.0014, "step": 47890 }, { "epoch": 0.7837683056532766, "grad_norm": 0.09540849924087524, "learning_rate": 7.6216514347177715e-06, "loss": 0.0023, "step": 47900 }, { "epoch": 0.7839319316043525, "grad_norm": 0.0654233768582344, "learning_rate": 7.6204354446634e-06, "loss": 0.0022, "step": 47910 }, { "epoch": 0.7840955575554283, "grad_norm": 0.029068201780319214, "learning_rate": 7.6192192408952284e-06, "loss": 0.0018, "step": 47920 }, { "epoch": 0.7842591835065041, "grad_norm": 0.036160003393888474, "learning_rate": 7.618002823512447e-06, "loss": 0.0023, "step": 47930 }, { "epoch": 0.7844228094575799, "grad_norm": 0.05089482292532921, "learning_rate": 7.616786192614264e-06, "loss": 0.0057, "step": 47940 }, { "epoch": 0.7845864354086558, "grad_norm": 0.06993527710437775, "learning_rate": 7.6155693482999e-06, "loss": 0.0035, "step": 47950 }, { "epoch": 0.7847500613597317, "grad_norm": 0.021343685686588287, "learning_rate": 7.614352290668601e-06, "loss": 0.0018, "step": 47960 }, { "epoch": 0.7849136873108075, "grad_norm": 0.04830536991357803, "learning_rate": 7.613135019819623e-06, "loss": 0.0029, "step": 47970 }, { "epoch": 0.7850773132618833, "grad_norm": 0.24141362309455872, "learning_rate": 7.6119175358522436e-06, "loss": 0.0022, "step": 47980 }, { "epoch": 0.7852409392129591, "grad_norm": 0.0946425348520279, "learning_rate": 7.610699838865756e-06, "loss": 0.0027, "step": 47990 }, { "epoch": 0.785404565164035, "grad_norm": 0.08040356636047363, "learning_rate": 7.609481928959473e-06, "loss": 0.003, "step": 48000 }, { "epoch": 0.7855681911151109, "grad_norm": 0.1269335299730301, "learning_rate": 7.6082638062327205e-06, "loss": 0.0023, "step": 48010 }, { "epoch": 0.7857318170661867, "grad_norm": 0.04364291951060295, "learning_rate": 7.607045470784847e-06, "loss": 0.002, "step": 48020 }, { "epoch": 0.7858954430172626, "grad_norm": 0.21546050906181335, "learning_rate": 7.605826922715216e-06, "loss": 0.0026, "step": 48030 }, { "epoch": 0.7860590689683383, "grad_norm": 0.06948831677436829, "learning_rate": 7.604608162123205e-06, "loss": 0.0031, "step": 48040 }, { "epoch": 0.7862226949194142, "grad_norm": 0.045591115951538086, "learning_rate": 7.603389189108215e-06, "loss": 0.0016, "step": 48050 }, { "epoch": 0.7863863208704901, "grad_norm": 0.03939118608832359, "learning_rate": 7.60217000376966e-06, "loss": 0.0011, "step": 48060 }, { "epoch": 0.7865499468215659, "grad_norm": 0.1619071215391159, "learning_rate": 7.600950606206971e-06, "loss": 0.0027, "step": 48070 }, { "epoch": 0.7867135727726418, "grad_norm": 0.06290711462497711, "learning_rate": 7.5997309965196035e-06, "loss": 0.002, "step": 48080 }, { "epoch": 0.7868771987237175, "grad_norm": 0.07672736048698425, "learning_rate": 7.5985111748070185e-06, "loss": 0.0016, "step": 48090 }, { "epoch": 0.7870408246747934, "grad_norm": 0.07655113190412521, "learning_rate": 7.597291141168704e-06, "loss": 0.002, "step": 48100 }, { "epoch": 0.7872044506258693, "grad_norm": 0.08286347985267639, "learning_rate": 7.59607089570416e-06, "loss": 0.0014, "step": 48110 }, { "epoch": 0.7873680765769451, "grad_norm": 0.04497053474187851, "learning_rate": 7.594850438512906e-06, "loss": 0.0024, "step": 48120 }, { "epoch": 0.787531702528021, "grad_norm": 0.0022733251098543406, "learning_rate": 7.593629769694479e-06, "loss": 0.0016, "step": 48130 }, { "epoch": 0.7876953284790967, "grad_norm": 0.15311624109745026, "learning_rate": 7.592408889348433e-06, "loss": 0.0018, "step": 48140 }, { "epoch": 0.7878589544301726, "grad_norm": 0.05552238970994949, "learning_rate": 7.591187797574337e-06, "loss": 0.0021, "step": 48150 }, { "epoch": 0.7880225803812485, "grad_norm": 0.09556196630001068, "learning_rate": 7.589966494471781e-06, "loss": 0.0019, "step": 48160 }, { "epoch": 0.7881862063323243, "grad_norm": 0.1586340218782425, "learning_rate": 7.58874498014037e-06, "loss": 0.0025, "step": 48170 }, { "epoch": 0.7883498322834002, "grad_norm": 0.0378156341612339, "learning_rate": 7.587523254679725e-06, "loss": 0.0024, "step": 48180 }, { "epoch": 0.7885134582344759, "grad_norm": 0.04207262396812439, "learning_rate": 7.5863013181894885e-06, "loss": 0.002, "step": 48190 }, { "epoch": 0.7886770841855518, "grad_norm": 0.02107994630932808, "learning_rate": 7.5850791707693135e-06, "loss": 0.0038, "step": 48200 }, { "epoch": 0.7888407101366277, "grad_norm": 0.08851795643568039, "learning_rate": 7.58385681251888e-06, "loss": 0.0027, "step": 48210 }, { "epoch": 0.7890043360877035, "grad_norm": 0.053407587110996246, "learning_rate": 7.582634243537872e-06, "loss": 0.0012, "step": 48220 }, { "epoch": 0.7891679620387794, "grad_norm": 0.014489405788481236, "learning_rate": 7.581411463926004e-06, "loss": 0.0025, "step": 48230 }, { "epoch": 0.7893315879898551, "grad_norm": 0.11791495978832245, "learning_rate": 7.580188473782999e-06, "loss": 0.0023, "step": 48240 }, { "epoch": 0.789495213940931, "grad_norm": 0.0511234886944294, "learning_rate": 7.578965273208601e-06, "loss": 0.0021, "step": 48250 }, { "epoch": 0.7896588398920069, "grad_norm": 0.010010137222707272, "learning_rate": 7.57774186230257e-06, "loss": 0.0036, "step": 48260 }, { "epoch": 0.7898224658430827, "grad_norm": 0.06014297530055046, "learning_rate": 7.576518241164683e-06, "loss": 0.0028, "step": 48270 }, { "epoch": 0.7899860917941586, "grad_norm": 0.06189087778329849, "learning_rate": 7.575294409894733e-06, "loss": 0.0019, "step": 48280 }, { "epoch": 0.7901497177452343, "grad_norm": 0.07456030696630478, "learning_rate": 7.574070368592534e-06, "loss": 0.0017, "step": 48290 }, { "epoch": 0.7903133436963102, "grad_norm": 0.19125349819660187, "learning_rate": 7.572846117357914e-06, "loss": 0.0035, "step": 48300 }, { "epoch": 0.7904769696473861, "grad_norm": 0.011991233564913273, "learning_rate": 7.571621656290717e-06, "loss": 0.0016, "step": 48310 }, { "epoch": 0.7906405955984619, "grad_norm": 0.1326286643743515, "learning_rate": 7.570396985490808e-06, "loss": 0.0032, "step": 48320 }, { "epoch": 0.7908042215495378, "grad_norm": 0.035251643508672714, "learning_rate": 7.569172105058064e-06, "loss": 0.0023, "step": 48330 }, { "epoch": 0.7909678475006136, "grad_norm": 0.03234274312853813, "learning_rate": 7.5679470150923876e-06, "loss": 0.0025, "step": 48340 }, { "epoch": 0.7911314734516894, "grad_norm": 0.08079449087381363, "learning_rate": 7.566721715693688e-06, "loss": 0.0036, "step": 48350 }, { "epoch": 0.7912950994027653, "grad_norm": 0.06091868504881859, "learning_rate": 7.565496206961897e-06, "loss": 0.0031, "step": 48360 }, { "epoch": 0.7914587253538411, "grad_norm": 0.05930716544389725, "learning_rate": 7.564270488996966e-06, "loss": 0.0036, "step": 48370 }, { "epoch": 0.791622351304917, "grad_norm": 0.1460629254579544, "learning_rate": 7.5630445618988566e-06, "loss": 0.003, "step": 48380 }, { "epoch": 0.7917859772559928, "grad_norm": 0.055982090532779694, "learning_rate": 7.561818425767553e-06, "loss": 0.0029, "step": 48390 }, { "epoch": 0.7919496032070686, "grad_norm": 0.020283877849578857, "learning_rate": 7.560592080703055e-06, "loss": 0.0014, "step": 48400 }, { "epoch": 0.7921132291581445, "grad_norm": 0.06365669518709183, "learning_rate": 7.55936552680538e-06, "loss": 0.002, "step": 48410 }, { "epoch": 0.7922768551092203, "grad_norm": 0.17802797257900238, "learning_rate": 7.558138764174558e-06, "loss": 0.0018, "step": 48420 }, { "epoch": 0.7924404810602962, "grad_norm": 0.1036376878619194, "learning_rate": 7.556911792910644e-06, "loss": 0.0026, "step": 48430 }, { "epoch": 0.792604107011372, "grad_norm": 0.18839232623577118, "learning_rate": 7.555684613113703e-06, "loss": 0.0022, "step": 48440 }, { "epoch": 0.7927677329624478, "grad_norm": 0.37500232458114624, "learning_rate": 7.55445722488382e-06, "loss": 0.0013, "step": 48450 }, { "epoch": 0.7929313589135237, "grad_norm": 0.06019440293312073, "learning_rate": 7.553229628321097e-06, "loss": 0.0013, "step": 48460 }, { "epoch": 0.7930949848645995, "grad_norm": 0.07467138767242432, "learning_rate": 7.552001823525652e-06, "loss": 0.0017, "step": 48470 }, { "epoch": 0.7932586108156754, "grad_norm": 0.2778966724872589, "learning_rate": 7.55077381059762e-06, "loss": 0.0018, "step": 48480 }, { "epoch": 0.7934222367667512, "grad_norm": 0.04924745112657547, "learning_rate": 7.549545589637156e-06, "loss": 0.0027, "step": 48490 }, { "epoch": 0.793585862717827, "grad_norm": 0.10963691025972366, "learning_rate": 7.548317160744427e-06, "loss": 0.0028, "step": 48500 }, { "epoch": 0.7937494886689029, "grad_norm": 0.09508652240037918, "learning_rate": 7.547088524019622e-06, "loss": 0.0023, "step": 48510 }, { "epoch": 0.7939131146199787, "grad_norm": 0.3775133192539215, "learning_rate": 7.545859679562942e-06, "loss": 0.0056, "step": 48520 }, { "epoch": 0.7940767405710546, "grad_norm": 0.19377544522285461, "learning_rate": 7.544630627474609e-06, "loss": 0.0013, "step": 48530 }, { "epoch": 0.7942403665221304, "grad_norm": 0.09341082721948624, "learning_rate": 7.543401367854859e-06, "loss": 0.0025, "step": 48540 }, { "epoch": 0.7944039924732063, "grad_norm": 0.10973850637674332, "learning_rate": 7.542171900803945e-06, "loss": 0.0036, "step": 48550 }, { "epoch": 0.7945676184242821, "grad_norm": 0.08497956395149231, "learning_rate": 7.540942226422143e-06, "loss": 0.0027, "step": 48560 }, { "epoch": 0.7947312443753579, "grad_norm": 0.07113735377788544, "learning_rate": 7.5397123448097354e-06, "loss": 0.0028, "step": 48570 }, { "epoch": 0.7948948703264338, "grad_norm": 0.13081003725528717, "learning_rate": 7.538482256067032e-06, "loss": 0.002, "step": 48580 }, { "epoch": 0.7950584962775096, "grad_norm": 0.026676848530769348, "learning_rate": 7.537251960294352e-06, "loss": 0.0016, "step": 48590 }, { "epoch": 0.7952221222285855, "grad_norm": 0.13389082252979279, "learning_rate": 7.536021457592033e-06, "loss": 0.0016, "step": 48600 }, { "epoch": 0.7953857481796613, "grad_norm": 0.01036846823990345, "learning_rate": 7.534790748060434e-06, "loss": 0.0019, "step": 48610 }, { "epoch": 0.7955493741307371, "grad_norm": 0.04020111635327339, "learning_rate": 7.533559831799925e-06, "loss": 0.0021, "step": 48620 }, { "epoch": 0.795713000081813, "grad_norm": 0.18633858859539032, "learning_rate": 7.532328708910897e-06, "loss": 0.0021, "step": 48630 }, { "epoch": 0.7958766260328888, "grad_norm": 0.18273411691188812, "learning_rate": 7.531097379493752e-06, "loss": 0.0019, "step": 48640 }, { "epoch": 0.7960402519839647, "grad_norm": 0.164573535323143, "learning_rate": 7.52986584364892e-06, "loss": 0.0027, "step": 48650 }, { "epoch": 0.7962038779350405, "grad_norm": 0.05422735959291458, "learning_rate": 7.528634101476835e-06, "loss": 0.0028, "step": 48660 }, { "epoch": 0.7963675038861163, "grad_norm": 0.09189562499523163, "learning_rate": 7.5274021530779565e-06, "loss": 0.0022, "step": 48670 }, { "epoch": 0.7965311298371922, "grad_norm": 0.01544390432536602, "learning_rate": 7.526169998552757e-06, "loss": 0.0031, "step": 48680 }, { "epoch": 0.796694755788268, "grad_norm": 0.021614916622638702, "learning_rate": 7.524937638001728e-06, "loss": 0.002, "step": 48690 }, { "epoch": 0.7968583817393439, "grad_norm": 0.2687177062034607, "learning_rate": 7.5237050715253755e-06, "loss": 0.0045, "step": 48700 }, { "epoch": 0.7970220076904198, "grad_norm": 0.2881016433238983, "learning_rate": 7.522472299224224e-06, "loss": 0.0075, "step": 48710 }, { "epoch": 0.7971856336414955, "grad_norm": 0.07347449660301208, "learning_rate": 7.521239321198813e-06, "loss": 0.0032, "step": 48720 }, { "epoch": 0.7973492595925714, "grad_norm": 0.09159993380308151, "learning_rate": 7.520006137549702e-06, "loss": 0.0025, "step": 48730 }, { "epoch": 0.7975128855436472, "grad_norm": 0.06818033754825592, "learning_rate": 7.518772748377463e-06, "loss": 0.0025, "step": 48740 }, { "epoch": 0.7976765114947231, "grad_norm": 0.07374434918165207, "learning_rate": 7.5175391537826894e-06, "loss": 0.0021, "step": 48750 }, { "epoch": 0.797840137445799, "grad_norm": 0.08558545261621475, "learning_rate": 7.516305353865988e-06, "loss": 0.002, "step": 48760 }, { "epoch": 0.7980037633968747, "grad_norm": 0.019095228984951973, "learning_rate": 7.5150713487279826e-06, "loss": 0.0019, "step": 48770 }, { "epoch": 0.7981673893479506, "grad_norm": 0.2626863420009613, "learning_rate": 7.513837138469315e-06, "loss": 0.0031, "step": 48780 }, { "epoch": 0.7983310152990264, "grad_norm": 0.11407846212387085, "learning_rate": 7.512602723190643e-06, "loss": 0.0024, "step": 48790 }, { "epoch": 0.7984946412501023, "grad_norm": 0.032676588743925095, "learning_rate": 7.5113681029926425e-06, "loss": 0.002, "step": 48800 }, { "epoch": 0.798658267201178, "grad_norm": 0.0805685818195343, "learning_rate": 7.510133277976002e-06, "loss": 0.0021, "step": 48810 }, { "epoch": 0.7988218931522539, "grad_norm": 0.1352633535861969, "learning_rate": 7.508898248241433e-06, "loss": 0.0015, "step": 48820 }, { "epoch": 0.7989855191033298, "grad_norm": 0.05259109288454056, "learning_rate": 7.50766301388966e-06, "loss": 0.0046, "step": 48830 }, { "epoch": 0.7991491450544056, "grad_norm": 0.14377066493034363, "learning_rate": 7.506427575021422e-06, "loss": 0.0037, "step": 48840 }, { "epoch": 0.7993127710054815, "grad_norm": 0.08536365628242493, "learning_rate": 7.505191931737479e-06, "loss": 0.002, "step": 48850 }, { "epoch": 0.7994763969565573, "grad_norm": 0.12085624039173126, "learning_rate": 7.503956084138604e-06, "loss": 0.0027, "step": 48860 }, { "epoch": 0.7996400229076331, "grad_norm": 0.025957763195037842, "learning_rate": 7.502720032325592e-06, "loss": 0.0021, "step": 48870 }, { "epoch": 0.799803648858709, "grad_norm": 0.03195950761437416, "learning_rate": 7.501483776399248e-06, "loss": 0.0018, "step": 48880 }, { "epoch": 0.7999672748097848, "grad_norm": 0.059326086193323135, "learning_rate": 7.500247316460399e-06, "loss": 0.0024, "step": 48890 }, { "epoch": 0.8001309007608607, "grad_norm": 0.04250849783420563, "learning_rate": 7.499010652609884e-06, "loss": 0.0022, "step": 48900 }, { "epoch": 0.8002945267119365, "grad_norm": 0.1613055318593979, "learning_rate": 7.497773784948562e-06, "loss": 0.0022, "step": 48910 }, { "epoch": 0.8004581526630123, "grad_norm": 0.05244581773877144, "learning_rate": 7.4965367135773095e-06, "loss": 0.003, "step": 48920 }, { "epoch": 0.8006217786140882, "grad_norm": 0.049232061952352524, "learning_rate": 7.495299438597017e-06, "loss": 0.002, "step": 48930 }, { "epoch": 0.800785404565164, "grad_norm": 0.18946042656898499, "learning_rate": 7.494061960108591e-06, "loss": 0.0026, "step": 48940 }, { "epoch": 0.8009490305162399, "grad_norm": 0.033381387591362, "learning_rate": 7.4928242782129575e-06, "loss": 0.0009, "step": 48950 }, { "epoch": 0.8011126564673157, "grad_norm": 0.11923344433307648, "learning_rate": 7.491586393011058e-06, "loss": 0.0025, "step": 48960 }, { "epoch": 0.8012762824183915, "grad_norm": 0.30180180072784424, "learning_rate": 7.490348304603848e-06, "loss": 0.0043, "step": 48970 }, { "epoch": 0.8014399083694674, "grad_norm": 0.01668470911681652, "learning_rate": 7.489110013092304e-06, "loss": 0.0012, "step": 48980 }, { "epoch": 0.8016035343205432, "grad_norm": 0.014078144915401936, "learning_rate": 7.487871518577417e-06, "loss": 0.0018, "step": 48990 }, { "epoch": 0.8017671602716191, "grad_norm": 0.08132600039243698, "learning_rate": 7.486632821160192e-06, "loss": 0.0019, "step": 49000 }, { "epoch": 0.8019307862226949, "grad_norm": 0.02935425005853176, "learning_rate": 7.485393920941654e-06, "loss": 0.0036, "step": 49010 }, { "epoch": 0.8020944121737708, "grad_norm": 0.029493147507309914, "learning_rate": 7.484154818022844e-06, "loss": 0.0021, "step": 49020 }, { "epoch": 0.8022580381248466, "grad_norm": 0.03932320699095726, "learning_rate": 7.48291551250482e-06, "loss": 0.0043, "step": 49030 }, { "epoch": 0.8024216640759224, "grad_norm": 0.12742534279823303, "learning_rate": 7.481676004488654e-06, "loss": 0.0031, "step": 49040 }, { "epoch": 0.8025852900269983, "grad_norm": 0.06556879729032516, "learning_rate": 7.480436294075437e-06, "loss": 0.0028, "step": 49050 }, { "epoch": 0.8027489159780741, "grad_norm": 0.09390657395124435, "learning_rate": 7.479196381366274e-06, "loss": 0.0025, "step": 49060 }, { "epoch": 0.80291254192915, "grad_norm": 0.13028788566589355, "learning_rate": 7.477956266462289e-06, "loss": 0.0018, "step": 49070 }, { "epoch": 0.8030761678802258, "grad_norm": 0.20747381448745728, "learning_rate": 7.476715949464621e-06, "loss": 0.0021, "step": 49080 }, { "epoch": 0.8032397938313016, "grad_norm": 0.0537344254553318, "learning_rate": 7.475475430474428e-06, "loss": 0.0016, "step": 49090 }, { "epoch": 0.8034034197823775, "grad_norm": 0.16066353023052216, "learning_rate": 7.4742347095928815e-06, "loss": 0.0031, "step": 49100 }, { "epoch": 0.8035670457334533, "grad_norm": 0.22176072001457214, "learning_rate": 7.47299378692117e-06, "loss": 0.0013, "step": 49110 }, { "epoch": 0.8037306716845292, "grad_norm": 0.06017155572772026, "learning_rate": 7.4717526625604984e-06, "loss": 0.0024, "step": 49120 }, { "epoch": 0.803894297635605, "grad_norm": 0.03117782063782215, "learning_rate": 7.470511336612089e-06, "loss": 0.0033, "step": 49130 }, { "epoch": 0.8040579235866808, "grad_norm": 0.1709616333246231, "learning_rate": 7.4692698091771805e-06, "loss": 0.0016, "step": 49140 }, { "epoch": 0.8042215495377567, "grad_norm": 0.027601182460784912, "learning_rate": 7.468028080357028e-06, "loss": 0.0026, "step": 49150 }, { "epoch": 0.8043851754888325, "grad_norm": 0.07321617752313614, "learning_rate": 7.466786150252903e-06, "loss": 0.0025, "step": 49160 }, { "epoch": 0.8045488014399084, "grad_norm": 0.0664752945303917, "learning_rate": 7.465544018966091e-06, "loss": 0.0039, "step": 49170 }, { "epoch": 0.8047124273909843, "grad_norm": 0.046098992228507996, "learning_rate": 7.464301686597898e-06, "loss": 0.0026, "step": 49180 }, { "epoch": 0.80487605334206, "grad_norm": 0.04111822694540024, "learning_rate": 7.463059153249644e-06, "loss": 0.004, "step": 49190 }, { "epoch": 0.8050396792931359, "grad_norm": 0.06876882910728455, "learning_rate": 7.461816419022664e-06, "loss": 0.0022, "step": 49200 }, { "epoch": 0.8052033052442117, "grad_norm": 0.11711051315069199, "learning_rate": 7.460573484018314e-06, "loss": 0.0019, "step": 49210 }, { "epoch": 0.8053669311952876, "grad_norm": 0.13310858607292175, "learning_rate": 7.459330348337963e-06, "loss": 0.0024, "step": 49220 }, { "epoch": 0.8055305571463635, "grad_norm": 0.3619876205921173, "learning_rate": 7.458087012082995e-06, "loss": 0.0023, "step": 49230 }, { "epoch": 0.8056941830974392, "grad_norm": 0.08216134458780289, "learning_rate": 7.456843475354813e-06, "loss": 0.0021, "step": 49240 }, { "epoch": 0.8058578090485151, "grad_norm": 0.2117079347372055, "learning_rate": 7.455599738254837e-06, "loss": 0.0046, "step": 49250 }, { "epoch": 0.8060214349995909, "grad_norm": 0.02750161848962307, "learning_rate": 7.4543558008845005e-06, "loss": 0.0026, "step": 49260 }, { "epoch": 0.8061850609506668, "grad_norm": 0.053104374557733536, "learning_rate": 7.453111663345255e-06, "loss": 0.001, "step": 49270 }, { "epoch": 0.8063486869017427, "grad_norm": 0.029090924188494682, "learning_rate": 7.451867325738568e-06, "loss": 0.0015, "step": 49280 }, { "epoch": 0.8065123128528184, "grad_norm": 0.11263518780469894, "learning_rate": 7.450622788165926e-06, "loss": 0.0019, "step": 49290 }, { "epoch": 0.8066759388038943, "grad_norm": 0.07891736924648285, "learning_rate": 7.449378050728826e-06, "loss": 0.0026, "step": 49300 }, { "epoch": 0.8068395647549701, "grad_norm": 0.22481808066368103, "learning_rate": 7.448133113528785e-06, "loss": 0.0046, "step": 49310 }, { "epoch": 0.807003190706046, "grad_norm": 0.02819298394024372, "learning_rate": 7.446887976667338e-06, "loss": 0.0026, "step": 49320 }, { "epoch": 0.8071668166571219, "grad_norm": 0.12364399433135986, "learning_rate": 7.445642640246032e-06, "loss": 0.0014, "step": 49330 }, { "epoch": 0.8073304426081976, "grad_norm": 0.030073098838329315, "learning_rate": 7.444397104366432e-06, "loss": 0.0016, "step": 49340 }, { "epoch": 0.8074940685592735, "grad_norm": 0.13526415824890137, "learning_rate": 7.4431513691301215e-06, "loss": 0.002, "step": 49350 }, { "epoch": 0.8076576945103493, "grad_norm": 0.1513245701789856, "learning_rate": 7.441905434638697e-06, "loss": 0.0026, "step": 49360 }, { "epoch": 0.8078213204614252, "grad_norm": 0.05508732423186302, "learning_rate": 7.4406593009937746e-06, "loss": 0.0018, "step": 49370 }, { "epoch": 0.8079849464125011, "grad_norm": 0.008226108737289906, "learning_rate": 7.4394129682969815e-06, "loss": 0.0013, "step": 49380 }, { "epoch": 0.8081485723635768, "grad_norm": 0.01117747463285923, "learning_rate": 7.438166436649968e-06, "loss": 0.0019, "step": 49390 }, { "epoch": 0.8083121983146527, "grad_norm": 0.03988465294241905, "learning_rate": 7.4369197061543955e-06, "loss": 0.0024, "step": 49400 }, { "epoch": 0.8084758242657285, "grad_norm": 0.13381509482860565, "learning_rate": 7.435672776911942e-06, "loss": 0.0024, "step": 49410 }, { "epoch": 0.8086394502168044, "grad_norm": 0.13291525840759277, "learning_rate": 7.434425649024304e-06, "loss": 0.0023, "step": 49420 }, { "epoch": 0.8088030761678803, "grad_norm": 0.0682954490184784, "learning_rate": 7.4331783225931934e-06, "loss": 0.003, "step": 49430 }, { "epoch": 0.808966702118956, "grad_norm": 0.06010191887617111, "learning_rate": 7.431930797720336e-06, "loss": 0.0008, "step": 49440 }, { "epoch": 0.8091303280700319, "grad_norm": 0.07638275623321533, "learning_rate": 7.430683074507478e-06, "loss": 0.0022, "step": 49450 }, { "epoch": 0.8092939540211077, "grad_norm": 0.10926320403814316, "learning_rate": 7.429435153056377e-06, "loss": 0.0022, "step": 49460 }, { "epoch": 0.8094575799721836, "grad_norm": 0.08077213913202286, "learning_rate": 7.428187033468811e-06, "loss": 0.0015, "step": 49470 }, { "epoch": 0.8096212059232595, "grad_norm": 0.05735640600323677, "learning_rate": 7.426938715846572e-06, "loss": 0.0014, "step": 49480 }, { "epoch": 0.8097848318743353, "grad_norm": 0.045037779957056046, "learning_rate": 7.425690200291469e-06, "loss": 0.0016, "step": 49490 }, { "epoch": 0.8099484578254111, "grad_norm": 0.06679581105709076, "learning_rate": 7.424441486905326e-06, "loss": 0.0029, "step": 49500 }, { "epoch": 0.8101120837764869, "grad_norm": 0.07620979100465775, "learning_rate": 7.423192575789984e-06, "loss": 0.0023, "step": 49510 }, { "epoch": 0.8102757097275628, "grad_norm": 0.05226852372288704, "learning_rate": 7.4219434670473e-06, "loss": 0.0018, "step": 49520 }, { "epoch": 0.8104393356786387, "grad_norm": 0.02041042409837246, "learning_rate": 7.4206941607791474e-06, "loss": 0.003, "step": 49530 }, { "epoch": 0.8106029616297145, "grad_norm": 0.12250716239213943, "learning_rate": 7.419444657087413e-06, "loss": 0.0035, "step": 49540 }, { "epoch": 0.8107665875807903, "grad_norm": 0.13922275602817535, "learning_rate": 7.418194956074007e-06, "loss": 0.0019, "step": 49550 }, { "epoch": 0.8109302135318661, "grad_norm": 0.051301538944244385, "learning_rate": 7.416945057840847e-06, "loss": 0.0022, "step": 49560 }, { "epoch": 0.811093839482942, "grad_norm": 0.06858295202255249, "learning_rate": 7.41569496248987e-06, "loss": 0.0018, "step": 49570 }, { "epoch": 0.8112574654340179, "grad_norm": 0.035342417657375336, "learning_rate": 7.414444670123031e-06, "loss": 0.0021, "step": 49580 }, { "epoch": 0.8114210913850937, "grad_norm": 0.07623059302568436, "learning_rate": 7.413194180842299e-06, "loss": 0.0032, "step": 49590 }, { "epoch": 0.8115847173361695, "grad_norm": 0.028298957273364067, "learning_rate": 7.41194349474966e-06, "loss": 0.0013, "step": 49600 }, { "epoch": 0.8117483432872453, "grad_norm": 0.020288070663809776, "learning_rate": 7.410692611947117e-06, "loss": 0.002, "step": 49610 }, { "epoch": 0.8119119692383212, "grad_norm": 0.024038251489400864, "learning_rate": 7.409441532536686e-06, "loss": 0.0028, "step": 49620 }, { "epoch": 0.8120755951893971, "grad_norm": 0.030338119715452194, "learning_rate": 7.4081902566204015e-06, "loss": 0.002, "step": 49630 }, { "epoch": 0.8122392211404729, "grad_norm": 0.01666412688791752, "learning_rate": 7.406938784300312e-06, "loss": 0.0027, "step": 49640 }, { "epoch": 0.8124028470915488, "grad_norm": 0.15088599920272827, "learning_rate": 7.4056871156784844e-06, "loss": 0.0024, "step": 49650 }, { "epoch": 0.8125664730426245, "grad_norm": 0.03537040948867798, "learning_rate": 7.404435250857002e-06, "loss": 0.0026, "step": 49660 }, { "epoch": 0.8127300989937004, "grad_norm": 0.1229194775223732, "learning_rate": 7.403183189937959e-06, "loss": 0.0017, "step": 49670 }, { "epoch": 0.8128937249447763, "grad_norm": 0.1257980316877365, "learning_rate": 7.4019309330234735e-06, "loss": 0.0021, "step": 49680 }, { "epoch": 0.8130573508958521, "grad_norm": 0.07160530984401703, "learning_rate": 7.4006784802156715e-06, "loss": 0.0013, "step": 49690 }, { "epoch": 0.813220976846928, "grad_norm": 0.08276446163654327, "learning_rate": 7.399425831616701e-06, "loss": 0.002, "step": 49700 }, { "epoch": 0.8133846027980037, "grad_norm": 0.03314686939120293, "learning_rate": 7.3981729873287234e-06, "loss": 0.0022, "step": 49710 }, { "epoch": 0.8135482287490796, "grad_norm": 0.21276918053627014, "learning_rate": 7.396919947453916e-06, "loss": 0.0021, "step": 49720 }, { "epoch": 0.8137118547001554, "grad_norm": 0.14992153644561768, "learning_rate": 7.3956667120944735e-06, "loss": 0.0033, "step": 49730 }, { "epoch": 0.8138754806512313, "grad_norm": 0.0671754702925682, "learning_rate": 7.394413281352604e-06, "loss": 0.0021, "step": 49740 }, { "epoch": 0.8140391066023072, "grad_norm": 0.09255821257829666, "learning_rate": 7.393159655330534e-06, "loss": 0.0017, "step": 49750 }, { "epoch": 0.8142027325533829, "grad_norm": 0.08014623820781708, "learning_rate": 7.391905834130504e-06, "loss": 0.0028, "step": 49760 }, { "epoch": 0.8143663585044588, "grad_norm": 0.09309299290180206, "learning_rate": 7.3906518178547725e-06, "loss": 0.0012, "step": 49770 }, { "epoch": 0.8145299844555346, "grad_norm": 0.12776319682598114, "learning_rate": 7.389397606605612e-06, "loss": 0.003, "step": 49780 }, { "epoch": 0.8146936104066105, "grad_norm": 0.07657567411661148, "learning_rate": 7.388143200485314e-06, "loss": 0.0028, "step": 49790 }, { "epoch": 0.8148572363576864, "grad_norm": 0.05055200308561325, "learning_rate": 7.38688859959618e-06, "loss": 0.0017, "step": 49800 }, { "epoch": 0.8150208623087621, "grad_norm": 0.007001059129834175, "learning_rate": 7.385633804040534e-06, "loss": 0.0018, "step": 49810 }, { "epoch": 0.815184488259838, "grad_norm": 0.03933168202638626, "learning_rate": 7.3843788139207105e-06, "loss": 0.0041, "step": 49820 }, { "epoch": 0.8153481142109138, "grad_norm": 0.059958551079034805, "learning_rate": 7.383123629339064e-06, "loss": 0.0025, "step": 49830 }, { "epoch": 0.8155117401619897, "grad_norm": 0.048081789165735245, "learning_rate": 7.3818682503979626e-06, "loss": 0.0017, "step": 49840 }, { "epoch": 0.8156753661130656, "grad_norm": 0.06314675509929657, "learning_rate": 7.38061267719979e-06, "loss": 0.002, "step": 49850 }, { "epoch": 0.8158389920641413, "grad_norm": 0.11797534674406052, "learning_rate": 7.379356909846946e-06, "loss": 0.005, "step": 49860 }, { "epoch": 0.8160026180152172, "grad_norm": 0.04023521766066551, "learning_rate": 7.37810094844185e-06, "loss": 0.002, "step": 49870 }, { "epoch": 0.816166243966293, "grad_norm": 0.06246769800782204, "learning_rate": 7.3768447930869306e-06, "loss": 0.0023, "step": 49880 }, { "epoch": 0.8163298699173689, "grad_norm": 0.07203426212072372, "learning_rate": 7.375588443884636e-06, "loss": 0.0026, "step": 49890 }, { "epoch": 0.8164934958684448, "grad_norm": 0.0768526941537857, "learning_rate": 7.37433190093743e-06, "loss": 0.003, "step": 49900 }, { "epoch": 0.8166571218195205, "grad_norm": 0.027911782264709473, "learning_rate": 7.373075164347794e-06, "loss": 0.0034, "step": 49910 }, { "epoch": 0.8168207477705964, "grad_norm": 0.03574071452021599, "learning_rate": 7.3718182342182196e-06, "loss": 0.0023, "step": 49920 }, { "epoch": 0.8169843737216722, "grad_norm": 0.0208068136125803, "learning_rate": 7.370561110651221e-06, "loss": 0.0016, "step": 49930 }, { "epoch": 0.8171479996727481, "grad_norm": 0.055411193519830704, "learning_rate": 7.369303793749323e-06, "loss": 0.0016, "step": 49940 }, { "epoch": 0.817311625623824, "grad_norm": 0.032277438789606094, "learning_rate": 7.368046283615069e-06, "loss": 0.0028, "step": 49950 }, { "epoch": 0.8174752515748998, "grad_norm": 0.052767179906368256, "learning_rate": 7.366788580351016e-06, "loss": 0.0019, "step": 49960 }, { "epoch": 0.8176388775259756, "grad_norm": 0.020436806604266167, "learning_rate": 7.3655306840597395e-06, "loss": 0.0017, "step": 49970 }, { "epoch": 0.8178025034770514, "grad_norm": 0.07657230645418167, "learning_rate": 7.364272594843829e-06, "loss": 0.0015, "step": 49980 }, { "epoch": 0.8179661294281273, "grad_norm": 0.016491534188389778, "learning_rate": 7.36301431280589e-06, "loss": 0.0027, "step": 49990 }, { "epoch": 0.8181297553792032, "grad_norm": 0.024747731164097786, "learning_rate": 7.361755838048542e-06, "loss": 0.0017, "step": 50000 }, { "epoch": 0.818293381330279, "grad_norm": 0.12155485153198242, "learning_rate": 7.3604971706744235e-06, "loss": 0.0031, "step": 50010 }, { "epoch": 0.8184570072813548, "grad_norm": 0.08477406948804855, "learning_rate": 7.359238310786187e-06, "loss": 0.0027, "step": 50020 }, { "epoch": 0.8186206332324306, "grad_norm": 0.11422862857580185, "learning_rate": 7.357979258486501e-06, "loss": 0.0025, "step": 50030 }, { "epoch": 0.8187842591835065, "grad_norm": 0.14531439542770386, "learning_rate": 7.3567200138780495e-06, "loss": 0.0032, "step": 50040 }, { "epoch": 0.8189478851345824, "grad_norm": 0.10129663348197937, "learning_rate": 7.3554605770635315e-06, "loss": 0.0022, "step": 50050 }, { "epoch": 0.8191115110856582, "grad_norm": 0.01215177308768034, "learning_rate": 7.354200948145662e-06, "loss": 0.0021, "step": 50060 }, { "epoch": 0.819275137036734, "grad_norm": 0.03285469114780426, "learning_rate": 7.352941127227174e-06, "loss": 0.0019, "step": 50070 }, { "epoch": 0.8194387629878098, "grad_norm": 0.04930134490132332, "learning_rate": 7.351681114410814e-06, "loss": 0.0016, "step": 50080 }, { "epoch": 0.8196023889388857, "grad_norm": 0.08564666658639908, "learning_rate": 7.35042090979934e-06, "loss": 0.0027, "step": 50090 }, { "epoch": 0.8197660148899616, "grad_norm": 0.10493818670511246, "learning_rate": 7.349160513495537e-06, "loss": 0.003, "step": 50100 }, { "epoch": 0.8199296408410374, "grad_norm": 0.07953081279993057, "learning_rate": 7.347899925602193e-06, "loss": 0.0027, "step": 50110 }, { "epoch": 0.8200932667921133, "grad_norm": 0.03999391198158264, "learning_rate": 7.346639146222121e-06, "loss": 0.0017, "step": 50120 }, { "epoch": 0.820256892743189, "grad_norm": 0.09898420423269272, "learning_rate": 7.3453781754581425e-06, "loss": 0.0036, "step": 50130 }, { "epoch": 0.8204205186942649, "grad_norm": 0.11206120997667313, "learning_rate": 7.3441170134131e-06, "loss": 0.0026, "step": 50140 }, { "epoch": 0.8205841446453408, "grad_norm": 0.0876530334353447, "learning_rate": 7.342855660189849e-06, "loss": 0.0022, "step": 50150 }, { "epoch": 0.8207477705964166, "grad_norm": 0.05326462909579277, "learning_rate": 7.3415941158912625e-06, "loss": 0.0019, "step": 50160 }, { "epoch": 0.8209113965474925, "grad_norm": 0.06661269813776016, "learning_rate": 7.340332380620226e-06, "loss": 0.0022, "step": 50170 }, { "epoch": 0.8210750224985682, "grad_norm": 0.06276343762874603, "learning_rate": 7.339070454479645e-06, "loss": 0.0028, "step": 50180 }, { "epoch": 0.8212386484496441, "grad_norm": 0.09733522683382034, "learning_rate": 7.337808337572434e-06, "loss": 0.0018, "step": 50190 }, { "epoch": 0.82140227440072, "grad_norm": 0.05775713920593262, "learning_rate": 7.336546030001529e-06, "loss": 0.0032, "step": 50200 }, { "epoch": 0.8215659003517958, "grad_norm": 0.06083736941218376, "learning_rate": 7.335283531869879e-06, "loss": 0.0018, "step": 50210 }, { "epoch": 0.8217295263028717, "grad_norm": 0.09311966598033905, "learning_rate": 7.334020843280451e-06, "loss": 0.002, "step": 50220 }, { "epoch": 0.8218931522539474, "grad_norm": 0.029629850760102272, "learning_rate": 7.332757964336222e-06, "loss": 0.0022, "step": 50230 }, { "epoch": 0.8220567782050233, "grad_norm": 0.05260154604911804, "learning_rate": 7.331494895140192e-06, "loss": 0.0021, "step": 50240 }, { "epoch": 0.8222204041560992, "grad_norm": 0.11988123506307602, "learning_rate": 7.330231635795369e-06, "loss": 0.0029, "step": 50250 }, { "epoch": 0.822384030107175, "grad_norm": 0.07858241349458694, "learning_rate": 7.3289681864047835e-06, "loss": 0.0017, "step": 50260 }, { "epoch": 0.8225476560582509, "grad_norm": 0.028480354696512222, "learning_rate": 7.327704547071476e-06, "loss": 0.0018, "step": 50270 }, { "epoch": 0.8227112820093266, "grad_norm": 0.08678663522005081, "learning_rate": 7.3264407178985055e-06, "loss": 0.0023, "step": 50280 }, { "epoch": 0.8228749079604025, "grad_norm": 0.01689848117530346, "learning_rate": 7.325176698988945e-06, "loss": 0.0015, "step": 50290 }, { "epoch": 0.8230385339114784, "grad_norm": 0.02860642597079277, "learning_rate": 7.323912490445884e-06, "loss": 0.0022, "step": 50300 }, { "epoch": 0.8232021598625542, "grad_norm": 0.682255208492279, "learning_rate": 7.322648092372426e-06, "loss": 0.0018, "step": 50310 }, { "epoch": 0.8233657858136301, "grad_norm": 0.11716106534004211, "learning_rate": 7.321383504871692e-06, "loss": 0.0031, "step": 50320 }, { "epoch": 0.8235294117647058, "grad_norm": 0.05334879085421562, "learning_rate": 7.320118728046818e-06, "loss": 0.0022, "step": 50330 }, { "epoch": 0.8236930377157817, "grad_norm": 0.13172274827957153, "learning_rate": 7.318853762000955e-06, "loss": 0.0015, "step": 50340 }, { "epoch": 0.8238566636668576, "grad_norm": 0.04253697395324707, "learning_rate": 7.317588606837267e-06, "loss": 0.0048, "step": 50350 }, { "epoch": 0.8240202896179334, "grad_norm": 0.16102904081344604, "learning_rate": 7.316323262658939e-06, "loss": 0.0021, "step": 50360 }, { "epoch": 0.8241839155690093, "grad_norm": 0.10712534934282303, "learning_rate": 7.3150577295691646e-06, "loss": 0.0014, "step": 50370 }, { "epoch": 0.824347541520085, "grad_norm": 0.11200733482837677, "learning_rate": 7.31379200767116e-06, "loss": 0.0009, "step": 50380 }, { "epoch": 0.8245111674711609, "grad_norm": 0.03960917890071869, "learning_rate": 7.312526097068151e-06, "loss": 0.0018, "step": 50390 }, { "epoch": 0.8246747934222368, "grad_norm": 0.0998501107096672, "learning_rate": 7.31125999786338e-06, "loss": 0.002, "step": 50400 }, { "epoch": 0.8248384193733126, "grad_norm": 0.17833255231380463, "learning_rate": 7.309993710160109e-06, "loss": 0.0024, "step": 50410 }, { "epoch": 0.8250020453243885, "grad_norm": 0.05349275469779968, "learning_rate": 7.30872723406161e-06, "loss": 0.0024, "step": 50420 }, { "epoch": 0.8251656712754643, "grad_norm": 0.04850715398788452, "learning_rate": 7.307460569671172e-06, "loss": 0.0022, "step": 50430 }, { "epoch": 0.8253292972265401, "grad_norm": 0.10100100934505463, "learning_rate": 7.306193717092101e-06, "loss": 0.0022, "step": 50440 }, { "epoch": 0.825492923177616, "grad_norm": 0.04609709605574608, "learning_rate": 7.304926676427717e-06, "loss": 0.0013, "step": 50450 }, { "epoch": 0.8256565491286918, "grad_norm": 0.05325142666697502, "learning_rate": 7.303659447781355e-06, "loss": 0.0016, "step": 50460 }, { "epoch": 0.8258201750797677, "grad_norm": 0.0937347561120987, "learning_rate": 7.3023920312563665e-06, "loss": 0.0037, "step": 50470 }, { "epoch": 0.8259838010308435, "grad_norm": 0.09343674033880234, "learning_rate": 7.301124426956117e-06, "loss": 0.0021, "step": 50480 }, { "epoch": 0.8261474269819193, "grad_norm": 0.21492396295070648, "learning_rate": 7.299856634983988e-06, "loss": 0.0024, "step": 50490 }, { "epoch": 0.8263110529329952, "grad_norm": 0.01718193292617798, "learning_rate": 7.298588655443377e-06, "loss": 0.0032, "step": 50500 }, { "epoch": 0.826474678884071, "grad_norm": 0.12893222272396088, "learning_rate": 7.297320488437697e-06, "loss": 0.003, "step": 50510 }, { "epoch": 0.8266383048351469, "grad_norm": 0.02382524125277996, "learning_rate": 7.296052134070373e-06, "loss": 0.0016, "step": 50520 }, { "epoch": 0.8268019307862227, "grad_norm": 0.1249120831489563, "learning_rate": 7.294783592444849e-06, "loss": 0.0023, "step": 50530 }, { "epoch": 0.8269655567372985, "grad_norm": 0.09398402273654938, "learning_rate": 7.293514863664581e-06, "loss": 0.0024, "step": 50540 }, { "epoch": 0.8271291826883744, "grad_norm": 0.03904254361987114, "learning_rate": 7.292245947833047e-06, "loss": 0.0019, "step": 50550 }, { "epoch": 0.8272928086394502, "grad_norm": 0.05136486515402794, "learning_rate": 7.29097684505373e-06, "loss": 0.0026, "step": 50560 }, { "epoch": 0.8274564345905261, "grad_norm": 0.06342675536870956, "learning_rate": 7.289707555430136e-06, "loss": 0.0016, "step": 50570 }, { "epoch": 0.8276200605416019, "grad_norm": 0.07060058414936066, "learning_rate": 7.288438079065786e-06, "loss": 0.0021, "step": 50580 }, { "epoch": 0.8277836864926778, "grad_norm": 0.05490008369088173, "learning_rate": 7.287168416064211e-06, "loss": 0.0019, "step": 50590 }, { "epoch": 0.8279473124437535, "grad_norm": 0.02857094258069992, "learning_rate": 7.285898566528962e-06, "loss": 0.0014, "step": 50600 }, { "epoch": 0.8281109383948294, "grad_norm": 0.04800453782081604, "learning_rate": 7.284628530563602e-06, "loss": 0.0014, "step": 50610 }, { "epoch": 0.8282745643459053, "grad_norm": 0.07711932808160782, "learning_rate": 7.283358308271713e-06, "loss": 0.0023, "step": 50620 }, { "epoch": 0.8284381902969811, "grad_norm": 0.013450018130242825, "learning_rate": 7.282087899756889e-06, "loss": 0.0013, "step": 50630 }, { "epoch": 0.828601816248057, "grad_norm": 0.08079660683870316, "learning_rate": 7.28081730512274e-06, "loss": 0.0022, "step": 50640 }, { "epoch": 0.8287654421991327, "grad_norm": 0.29463911056518555, "learning_rate": 7.279546524472893e-06, "loss": 0.0021, "step": 50650 }, { "epoch": 0.8289290681502086, "grad_norm": 0.11921440809965134, "learning_rate": 7.278275557910985e-06, "loss": 0.0031, "step": 50660 }, { "epoch": 0.8290926941012845, "grad_norm": 0.045350778847932816, "learning_rate": 7.277004405540675e-06, "loss": 0.0018, "step": 50670 }, { "epoch": 0.8292563200523603, "grad_norm": 0.05217655003070831, "learning_rate": 7.275733067465634e-06, "loss": 0.0021, "step": 50680 }, { "epoch": 0.8294199460034362, "grad_norm": 0.1516101360321045, "learning_rate": 7.2744615437895435e-06, "loss": 0.0017, "step": 50690 }, { "epoch": 0.8295835719545119, "grad_norm": 0.16141226887702942, "learning_rate": 7.27318983461611e-06, "loss": 0.0029, "step": 50700 }, { "epoch": 0.8297471979055878, "grad_norm": 0.042797550559043884, "learning_rate": 7.271917940049048e-06, "loss": 0.0017, "step": 50710 }, { "epoch": 0.8299108238566637, "grad_norm": 0.06384925544261932, "learning_rate": 7.270645860192087e-06, "loss": 0.0026, "step": 50720 }, { "epoch": 0.8300744498077395, "grad_norm": 0.018085643649101257, "learning_rate": 7.269373595148976e-06, "loss": 0.0016, "step": 50730 }, { "epoch": 0.8302380757588154, "grad_norm": 0.05272732675075531, "learning_rate": 7.2681011450234765e-06, "loss": 0.0018, "step": 50740 }, { "epoch": 0.8304017017098911, "grad_norm": 0.18431979417800903, "learning_rate": 7.266828509919362e-06, "loss": 0.0024, "step": 50750 }, { "epoch": 0.830565327660967, "grad_norm": 0.0356517918407917, "learning_rate": 7.2655556899404285e-06, "loss": 0.0023, "step": 50760 }, { "epoch": 0.8307289536120429, "grad_norm": 0.3630867600440979, "learning_rate": 7.2642826851904814e-06, "loss": 0.0024, "step": 50770 }, { "epoch": 0.8308925795631187, "grad_norm": 0.2252519577741623, "learning_rate": 7.263009495773341e-06, "loss": 0.0021, "step": 50780 }, { "epoch": 0.8310562055141946, "grad_norm": 0.08125129342079163, "learning_rate": 7.261736121792846e-06, "loss": 0.0025, "step": 50790 }, { "epoch": 0.8312198314652703, "grad_norm": 0.21658673882484436, "learning_rate": 7.260462563352848e-06, "loss": 0.003, "step": 50800 }, { "epoch": 0.8313834574163462, "grad_norm": 0.007952956482768059, "learning_rate": 7.259188820557213e-06, "loss": 0.0051, "step": 50810 }, { "epoch": 0.8315470833674221, "grad_norm": 0.03570942580699921, "learning_rate": 7.257914893509827e-06, "loss": 0.0029, "step": 50820 }, { "epoch": 0.8317107093184979, "grad_norm": 0.08199523389339447, "learning_rate": 7.256640782314581e-06, "loss": 0.0012, "step": 50830 }, { "epoch": 0.8318743352695738, "grad_norm": 0.08071230351924896, "learning_rate": 7.255366487075394e-06, "loss": 0.0032, "step": 50840 }, { "epoch": 0.8320379612206495, "grad_norm": 0.47581747174263, "learning_rate": 7.254092007896187e-06, "loss": 0.0027, "step": 50850 }, { "epoch": 0.8322015871717254, "grad_norm": 0.09428759664297104, "learning_rate": 7.252817344880907e-06, "loss": 0.0019, "step": 50860 }, { "epoch": 0.8323652131228013, "grad_norm": 0.08285456150770187, "learning_rate": 7.251542498133508e-06, "loss": 0.0032, "step": 50870 }, { "epoch": 0.8325288390738771, "grad_norm": 0.0900750681757927, "learning_rate": 7.250267467757963e-06, "loss": 0.0023, "step": 50880 }, { "epoch": 0.832692465024953, "grad_norm": 0.05965256318449974, "learning_rate": 7.2489922538582605e-06, "loss": 0.0021, "step": 50890 }, { "epoch": 0.8328560909760288, "grad_norm": 0.05914861708879471, "learning_rate": 7.247716856538399e-06, "loss": 0.0021, "step": 50900 }, { "epoch": 0.8330197169271046, "grad_norm": 0.06369437277317047, "learning_rate": 7.246441275902401e-06, "loss": 0.0031, "step": 50910 }, { "epoch": 0.8331833428781805, "grad_norm": 0.06461750715970993, "learning_rate": 7.245165512054294e-06, "loss": 0.0017, "step": 50920 }, { "epoch": 0.8333469688292563, "grad_norm": 0.15263749659061432, "learning_rate": 7.243889565098127e-06, "loss": 0.0022, "step": 50930 }, { "epoch": 0.8335105947803322, "grad_norm": 0.036695919930934906, "learning_rate": 7.242613435137961e-06, "loss": 0.0028, "step": 50940 }, { "epoch": 0.833674220731408, "grad_norm": 0.18260805308818817, "learning_rate": 7.241337122277874e-06, "loss": 0.0026, "step": 50950 }, { "epoch": 0.8338378466824838, "grad_norm": 0.08658963441848755, "learning_rate": 7.2400606266219564e-06, "loss": 0.0022, "step": 50960 }, { "epoch": 0.8340014726335597, "grad_norm": 0.012939597479999065, "learning_rate": 7.238783948274315e-06, "loss": 0.0026, "step": 50970 }, { "epoch": 0.8341650985846355, "grad_norm": 0.11531227827072144, "learning_rate": 7.237507087339073e-06, "loss": 0.0018, "step": 50980 }, { "epoch": 0.8343287245357114, "grad_norm": 0.15803728997707367, "learning_rate": 7.236230043920365e-06, "loss": 0.0039, "step": 50990 }, { "epoch": 0.8344923504867872, "grad_norm": 0.025625668466091156, "learning_rate": 7.234952818122343e-06, "loss": 0.0021, "step": 51000 }, { "epoch": 0.834655976437863, "grad_norm": 0.13776859641075134, "learning_rate": 7.233675410049172e-06, "loss": 0.0028, "step": 51010 }, { "epoch": 0.8348196023889389, "grad_norm": 0.19835378229618073, "learning_rate": 7.232397819805035e-06, "loss": 0.0042, "step": 51020 }, { "epoch": 0.8349832283400147, "grad_norm": 0.06411415338516235, "learning_rate": 7.231120047494128e-06, "loss": 0.003, "step": 51030 }, { "epoch": 0.8351468542910906, "grad_norm": 0.08993500471115112, "learning_rate": 7.2298420932206605e-06, "loss": 0.0015, "step": 51040 }, { "epoch": 0.8353104802421664, "grad_norm": 0.011704706586897373, "learning_rate": 7.228563957088858e-06, "loss": 0.0014, "step": 51050 }, { "epoch": 0.8354741061932422, "grad_norm": 0.0824776142835617, "learning_rate": 7.227285639202961e-06, "loss": 0.0024, "step": 51060 }, { "epoch": 0.8356377321443181, "grad_norm": 0.05209881812334061, "learning_rate": 7.226007139667226e-06, "loss": 0.0028, "step": 51070 }, { "epoch": 0.8358013580953939, "grad_norm": 0.08226223289966583, "learning_rate": 7.224728458585921e-06, "loss": 0.0017, "step": 51080 }, { "epoch": 0.8359649840464698, "grad_norm": 0.013612093403935432, "learning_rate": 7.223449596063334e-06, "loss": 0.0022, "step": 51090 }, { "epoch": 0.8361286099975456, "grad_norm": 0.027713894844055176, "learning_rate": 7.222170552203761e-06, "loss": 0.0014, "step": 51100 }, { "epoch": 0.8362922359486215, "grad_norm": 0.19508448243141174, "learning_rate": 7.220891327111519e-06, "loss": 0.0032, "step": 51110 }, { "epoch": 0.8364558618996973, "grad_norm": 0.0737922340631485, "learning_rate": 7.219611920890937e-06, "loss": 0.0017, "step": 51120 }, { "epoch": 0.8366194878507731, "grad_norm": 0.12655296921730042, "learning_rate": 7.2183323336463576e-06, "loss": 0.0027, "step": 51130 }, { "epoch": 0.836783113801849, "grad_norm": 0.10299160331487656, "learning_rate": 7.217052565482141e-06, "loss": 0.0026, "step": 51140 }, { "epoch": 0.8369467397529248, "grad_norm": 0.04209122806787491, "learning_rate": 7.21577261650266e-06, "loss": 0.0024, "step": 51150 }, { "epoch": 0.8371103657040007, "grad_norm": 0.0948806181550026, "learning_rate": 7.214492486812303e-06, "loss": 0.0012, "step": 51160 }, { "epoch": 0.8372739916550765, "grad_norm": 0.07744203507900238, "learning_rate": 7.213212176515474e-06, "loss": 0.0024, "step": 51170 }, { "epoch": 0.8374376176061523, "grad_norm": 0.15388254821300507, "learning_rate": 7.211931685716589e-06, "loss": 0.0033, "step": 51180 }, { "epoch": 0.8376012435572282, "grad_norm": 0.04585542902350426, "learning_rate": 7.2106510145200814e-06, "loss": 0.0031, "step": 51190 }, { "epoch": 0.837764869508304, "grad_norm": 0.04853685572743416, "learning_rate": 7.209370163030398e-06, "loss": 0.0022, "step": 51200 }, { "epoch": 0.8379284954593799, "grad_norm": 0.11210136860609055, "learning_rate": 7.208089131352002e-06, "loss": 0.0024, "step": 51210 }, { "epoch": 0.8380921214104557, "grad_norm": 0.111039899289608, "learning_rate": 7.2068079195893696e-06, "loss": 0.002, "step": 51220 }, { "epoch": 0.8382557473615315, "grad_norm": 0.034838687628507614, "learning_rate": 7.20552652784699e-06, "loss": 0.0021, "step": 51230 }, { "epoch": 0.8384193733126074, "grad_norm": 2.0237979888916016, "learning_rate": 7.20424495622937e-06, "loss": 0.0032, "step": 51240 }, { "epoch": 0.8385829992636832, "grad_norm": 0.04892745986580849, "learning_rate": 7.202963204841033e-06, "loss": 0.0021, "step": 51250 }, { "epoch": 0.8387466252147591, "grad_norm": 0.12003655731678009, "learning_rate": 7.201681273786511e-06, "loss": 0.0028, "step": 51260 }, { "epoch": 0.838910251165835, "grad_norm": 0.13234943151474, "learning_rate": 7.2003991631703554e-06, "loss": 0.0029, "step": 51270 }, { "epoch": 0.8390738771169107, "grad_norm": 0.12959690392017365, "learning_rate": 7.199116873097129e-06, "loss": 0.0016, "step": 51280 }, { "epoch": 0.8392375030679866, "grad_norm": 0.03561769425868988, "learning_rate": 7.1978344036714145e-06, "loss": 0.0024, "step": 51290 }, { "epoch": 0.8394011290190624, "grad_norm": 0.0820692628622055, "learning_rate": 7.196551754997802e-06, "loss": 0.0019, "step": 51300 }, { "epoch": 0.8395647549701383, "grad_norm": 0.22885608673095703, "learning_rate": 7.1952689271809025e-06, "loss": 0.0027, "step": 51310 }, { "epoch": 0.8397283809212142, "grad_norm": 0.0791151151061058, "learning_rate": 7.193985920325339e-06, "loss": 0.0018, "step": 51320 }, { "epoch": 0.8398920068722899, "grad_norm": 0.12285812199115753, "learning_rate": 7.192702734535747e-06, "loss": 0.0015, "step": 51330 }, { "epoch": 0.8400556328233658, "grad_norm": 0.09805543720722198, "learning_rate": 7.191419369916781e-06, "loss": 0.0022, "step": 51340 }, { "epoch": 0.8402192587744416, "grad_norm": 0.09020490199327469, "learning_rate": 7.190135826573107e-06, "loss": 0.0029, "step": 51350 }, { "epoch": 0.8403828847255175, "grad_norm": 0.06600624322891235, "learning_rate": 7.188852104609406e-06, "loss": 0.0019, "step": 51360 }, { "epoch": 0.8405465106765934, "grad_norm": 0.11946147680282593, "learning_rate": 7.187568204130375e-06, "loss": 0.0015, "step": 51370 }, { "epoch": 0.8407101366276691, "grad_norm": 0.07183331251144409, "learning_rate": 7.186284125240724e-06, "loss": 0.0025, "step": 51380 }, { "epoch": 0.840873762578745, "grad_norm": 0.13309527933597565, "learning_rate": 7.184999868045178e-06, "loss": 0.0015, "step": 51390 }, { "epoch": 0.8410373885298208, "grad_norm": 0.004618125036358833, "learning_rate": 7.183715432648477e-06, "loss": 0.002, "step": 51400 }, { "epoch": 0.8412010144808967, "grad_norm": 0.03110748715698719, "learning_rate": 7.182430819155375e-06, "loss": 0.0015, "step": 51410 }, { "epoch": 0.8413646404319726, "grad_norm": 0.048091642558574677, "learning_rate": 7.18114602767064e-06, "loss": 0.0018, "step": 51420 }, { "epoch": 0.8415282663830483, "grad_norm": 0.1743410974740982, "learning_rate": 7.1798610582990565e-06, "loss": 0.003, "step": 51430 }, { "epoch": 0.8416918923341242, "grad_norm": 0.04619912430644035, "learning_rate": 7.178575911145421e-06, "loss": 0.0017, "step": 51440 }, { "epoch": 0.8418555182852, "grad_norm": 0.03747618570923805, "learning_rate": 7.177290586314547e-06, "loss": 0.0015, "step": 51450 }, { "epoch": 0.8420191442362759, "grad_norm": 0.07370032370090485, "learning_rate": 7.17600508391126e-06, "loss": 0.0019, "step": 51460 }, { "epoch": 0.8421827701873517, "grad_norm": 0.037222664803266525, "learning_rate": 7.174719404040402e-06, "loss": 0.0027, "step": 51470 }, { "epoch": 0.8423463961384275, "grad_norm": 0.03174416348338127, "learning_rate": 7.1734335468068295e-06, "loss": 0.0015, "step": 51480 }, { "epoch": 0.8425100220895034, "grad_norm": 0.12124619632959366, "learning_rate": 7.172147512315411e-06, "loss": 0.0013, "step": 51490 }, { "epoch": 0.8426736480405792, "grad_norm": 0.08187685906887054, "learning_rate": 7.170861300671031e-06, "loss": 0.0016, "step": 51500 }, { "epoch": 0.8428372739916551, "grad_norm": 0.032854389399290085, "learning_rate": 7.16957491197859e-06, "loss": 0.0025, "step": 51510 }, { "epoch": 0.8430008999427309, "grad_norm": 0.12143535166978836, "learning_rate": 7.1682883463429995e-06, "loss": 0.0016, "step": 51520 }, { "epoch": 0.8431645258938067, "grad_norm": 0.03570203855633736, "learning_rate": 7.167001603869191e-06, "loss": 0.0014, "step": 51530 }, { "epoch": 0.8433281518448826, "grad_norm": 0.10940305143594742, "learning_rate": 7.165714684662102e-06, "loss": 0.0034, "step": 51540 }, { "epoch": 0.8434917777959584, "grad_norm": 0.04765204340219498, "learning_rate": 7.164427588826692e-06, "loss": 0.0015, "step": 51550 }, { "epoch": 0.8436554037470343, "grad_norm": 0.13376373052597046, "learning_rate": 7.1631403164679326e-06, "loss": 0.002, "step": 51560 }, { "epoch": 0.8438190296981101, "grad_norm": 0.017260512337088585, "learning_rate": 7.161852867690808e-06, "loss": 0.0018, "step": 51570 }, { "epoch": 0.843982655649186, "grad_norm": 0.15488842129707336, "learning_rate": 7.160565242600319e-06, "loss": 0.0022, "step": 51580 }, { "epoch": 0.8441462816002618, "grad_norm": 0.11717179417610168, "learning_rate": 7.159277441301481e-06, "loss": 0.0022, "step": 51590 }, { "epoch": 0.8443099075513376, "grad_norm": 0.027377715334296227, "learning_rate": 7.15798946389932e-06, "loss": 0.0028, "step": 51600 }, { "epoch": 0.8444735335024135, "grad_norm": 0.02526942454278469, "learning_rate": 7.15670131049888e-06, "loss": 0.0012, "step": 51610 }, { "epoch": 0.8446371594534893, "grad_norm": 0.13770677149295807, "learning_rate": 7.15541298120522e-06, "loss": 0.0032, "step": 51620 }, { "epoch": 0.8448007854045652, "grad_norm": 0.11904923617839813, "learning_rate": 7.154124476123409e-06, "loss": 0.0016, "step": 51630 }, { "epoch": 0.844964411355641, "grad_norm": 0.022507907822728157, "learning_rate": 7.152835795358537e-06, "loss": 0.0023, "step": 51640 }, { "epoch": 0.8451280373067168, "grad_norm": 0.02419656701385975, "learning_rate": 7.151546939015701e-06, "loss": 0.003, "step": 51650 }, { "epoch": 0.8452916632577927, "grad_norm": 0.04865049198269844, "learning_rate": 7.1502579072000165e-06, "loss": 0.0023, "step": 51660 }, { "epoch": 0.8454552892088685, "grad_norm": 0.16077959537506104, "learning_rate": 7.148968700016614e-06, "loss": 0.0054, "step": 51670 }, { "epoch": 0.8456189151599444, "grad_norm": 0.06158830597996712, "learning_rate": 7.147679317570635e-06, "loss": 0.002, "step": 51680 }, { "epoch": 0.8457825411110202, "grad_norm": 0.04843762516975403, "learning_rate": 7.146389759967237e-06, "loss": 0.0028, "step": 51690 }, { "epoch": 0.845946167062096, "grad_norm": 0.046450987458229065, "learning_rate": 7.145100027311594e-06, "loss": 0.0022, "step": 51700 }, { "epoch": 0.8461097930131719, "grad_norm": 0.031733885407447815, "learning_rate": 7.14381011970889e-06, "loss": 0.0022, "step": 51710 }, { "epoch": 0.8462734189642477, "grad_norm": 0.05632362514734268, "learning_rate": 7.142520037264328e-06, "loss": 0.0015, "step": 51720 }, { "epoch": 0.8464370449153236, "grad_norm": 0.0710706114768982, "learning_rate": 7.1412297800831206e-06, "loss": 0.0016, "step": 51730 }, { "epoch": 0.8466006708663995, "grad_norm": 0.14027522504329681, "learning_rate": 7.1399393482704975e-06, "loss": 0.003, "step": 51740 }, { "epoch": 0.8467642968174752, "grad_norm": 0.06512275338172913, "learning_rate": 7.138648741931702e-06, "loss": 0.0018, "step": 51750 }, { "epoch": 0.8469279227685511, "grad_norm": 0.09841345995664597, "learning_rate": 7.13735796117199e-06, "loss": 0.0031, "step": 51760 }, { "epoch": 0.8470915487196269, "grad_norm": 0.13326981663703918, "learning_rate": 7.136067006096636e-06, "loss": 0.0019, "step": 51770 }, { "epoch": 0.8472551746707028, "grad_norm": 0.027064789086580276, "learning_rate": 7.134775876810924e-06, "loss": 0.0033, "step": 51780 }, { "epoch": 0.8474188006217787, "grad_norm": 0.03956165164709091, "learning_rate": 7.133484573420155e-06, "loss": 0.0015, "step": 51790 }, { "epoch": 0.8475824265728544, "grad_norm": 0.1570480316877365, "learning_rate": 7.132193096029644e-06, "loss": 0.0027, "step": 51800 }, { "epoch": 0.8477460525239303, "grad_norm": 0.07073867321014404, "learning_rate": 7.130901444744717e-06, "loss": 0.0029, "step": 51810 }, { "epoch": 0.8479096784750061, "grad_norm": 0.09911315143108368, "learning_rate": 7.1296096196707185e-06, "loss": 0.0026, "step": 51820 }, { "epoch": 0.848073304426082, "grad_norm": 0.0867316722869873, "learning_rate": 7.128317620913004e-06, "loss": 0.0042, "step": 51830 }, { "epoch": 0.8482369303771579, "grad_norm": 0.05428260192275047, "learning_rate": 7.127025448576948e-06, "loss": 0.0016, "step": 51840 }, { "epoch": 0.8484005563282336, "grad_norm": 0.18079212307929993, "learning_rate": 7.12573310276793e-06, "loss": 0.0022, "step": 51850 }, { "epoch": 0.8485641822793095, "grad_norm": 0.06461817771196365, "learning_rate": 7.124440583591355e-06, "loss": 0.0022, "step": 51860 }, { "epoch": 0.8487278082303853, "grad_norm": 0.204007089138031, "learning_rate": 7.123147891152632e-06, "loss": 0.0022, "step": 51870 }, { "epoch": 0.8488914341814612, "grad_norm": 0.2365105152130127, "learning_rate": 7.121855025557192e-06, "loss": 0.0023, "step": 51880 }, { "epoch": 0.8490550601325371, "grad_norm": 0.038268886506557465, "learning_rate": 7.120561986910475e-06, "loss": 0.0016, "step": 51890 }, { "epoch": 0.8492186860836128, "grad_norm": 0.07389094680547714, "learning_rate": 7.119268775317936e-06, "loss": 0.003, "step": 51900 }, { "epoch": 0.8493823120346887, "grad_norm": 0.008556121960282326, "learning_rate": 7.117975390885048e-06, "loss": 0.0012, "step": 51910 }, { "epoch": 0.8495459379857645, "grad_norm": 0.1838950216770172, "learning_rate": 7.116681833717292e-06, "loss": 0.0035, "step": 51920 }, { "epoch": 0.8497095639368404, "grad_norm": 0.04003998264670372, "learning_rate": 7.115388103920167e-06, "loss": 0.0022, "step": 51930 }, { "epoch": 0.8498731898879163, "grad_norm": 0.039213523268699646, "learning_rate": 7.114094201599185e-06, "loss": 0.002, "step": 51940 }, { "epoch": 0.850036815838992, "grad_norm": 0.11654011160135269, "learning_rate": 7.112800126859874e-06, "loss": 0.0023, "step": 51950 }, { "epoch": 0.8502004417900679, "grad_norm": 0.1593422293663025, "learning_rate": 7.1115058798077715e-06, "loss": 0.0074, "step": 51960 }, { "epoch": 0.8503640677411437, "grad_norm": 0.05370429903268814, "learning_rate": 7.110211460548435e-06, "loss": 0.0016, "step": 51970 }, { "epoch": 0.8505276936922196, "grad_norm": 0.09903787076473236, "learning_rate": 7.10891686918743e-06, "loss": 0.0023, "step": 51980 }, { "epoch": 0.8506913196432955, "grad_norm": 0.1487654596567154, "learning_rate": 7.107622105830342e-06, "loss": 0.0025, "step": 51990 }, { "epoch": 0.8508549455943712, "grad_norm": 0.029519138857722282, "learning_rate": 7.106327170582764e-06, "loss": 0.0017, "step": 52000 }, { "epoch": 0.8510185715454471, "grad_norm": 0.016456831246614456, "learning_rate": 7.105032063550309e-06, "loss": 0.0016, "step": 52010 }, { "epoch": 0.8511821974965229, "grad_norm": 0.02960352972149849, "learning_rate": 7.1037367848386015e-06, "loss": 0.0032, "step": 52020 }, { "epoch": 0.8513458234475988, "grad_norm": 0.059133417904376984, "learning_rate": 7.102441334553279e-06, "loss": 0.0018, "step": 52030 }, { "epoch": 0.8515094493986747, "grad_norm": 0.091533362865448, "learning_rate": 7.101145712799994e-06, "loss": 0.0025, "step": 52040 }, { "epoch": 0.8516730753497505, "grad_norm": 0.04424267262220383, "learning_rate": 7.0998499196844125e-06, "loss": 0.0034, "step": 52050 }, { "epoch": 0.8518367013008263, "grad_norm": 0.023693222552537918, "learning_rate": 7.098553955312217e-06, "loss": 0.0014, "step": 52060 }, { "epoch": 0.8520003272519021, "grad_norm": 0.06740313768386841, "learning_rate": 7.0972578197891e-06, "loss": 0.0015, "step": 52070 }, { "epoch": 0.852163953202978, "grad_norm": 0.037906866520643234, "learning_rate": 7.095961513220772e-06, "loss": 0.0013, "step": 52080 }, { "epoch": 0.8523275791540539, "grad_norm": 0.03597046807408333, "learning_rate": 7.094665035712954e-06, "loss": 0.0015, "step": 52090 }, { "epoch": 0.8524912051051297, "grad_norm": 0.12819840013980865, "learning_rate": 7.093368387371382e-06, "loss": 0.0014, "step": 52100 }, { "epoch": 0.8526548310562055, "grad_norm": 0.23417770862579346, "learning_rate": 7.092071568301806e-06, "loss": 0.0016, "step": 52110 }, { "epoch": 0.8528184570072813, "grad_norm": 0.01937873661518097, "learning_rate": 7.0907745786099915e-06, "loss": 0.0022, "step": 52120 }, { "epoch": 0.8529820829583572, "grad_norm": 0.04607968032360077, "learning_rate": 7.089477418401716e-06, "loss": 0.0024, "step": 52130 }, { "epoch": 0.8531457089094331, "grad_norm": 0.008417102508246899, "learning_rate": 7.088180087782771e-06, "loss": 0.0018, "step": 52140 }, { "epoch": 0.8533093348605089, "grad_norm": 0.057662345468997955, "learning_rate": 7.086882586858962e-06, "loss": 0.0022, "step": 52150 }, { "epoch": 0.8534729608115847, "grad_norm": 0.05995975434780121, "learning_rate": 7.08558491573611e-06, "loss": 0.0019, "step": 52160 }, { "epoch": 0.8536365867626605, "grad_norm": 0.02329769730567932, "learning_rate": 7.084287074520047e-06, "loss": 0.0014, "step": 52170 }, { "epoch": 0.8538002127137364, "grad_norm": 0.09681403636932373, "learning_rate": 7.082989063316623e-06, "loss": 0.0014, "step": 52180 }, { "epoch": 0.8539638386648123, "grad_norm": 0.07927804440259933, "learning_rate": 7.0816908822316975e-06, "loss": 0.0011, "step": 52190 }, { "epoch": 0.8541274646158881, "grad_norm": 0.027724415063858032, "learning_rate": 7.080392531371145e-06, "loss": 0.0037, "step": 52200 }, { "epoch": 0.854291090566964, "grad_norm": 0.04706178605556488, "learning_rate": 7.079094010840857e-06, "loss": 0.0014, "step": 52210 }, { "epoch": 0.8544547165180397, "grad_norm": 0.11963524669408798, "learning_rate": 7.077795320746735e-06, "loss": 0.003, "step": 52220 }, { "epoch": 0.8546183424691156, "grad_norm": 0.054164644330739975, "learning_rate": 7.076496461194696e-06, "loss": 0.0029, "step": 52230 }, { "epoch": 0.8547819684201915, "grad_norm": 0.011991985142230988, "learning_rate": 7.07519743229067e-06, "loss": 0.0013, "step": 52240 }, { "epoch": 0.8549455943712673, "grad_norm": 0.08985766023397446, "learning_rate": 7.073898234140602e-06, "loss": 0.0031, "step": 52250 }, { "epoch": 0.8551092203223432, "grad_norm": 0.18537844717502594, "learning_rate": 7.0725988668504505e-06, "loss": 0.0022, "step": 52260 }, { "epoch": 0.8552728462734189, "grad_norm": 0.10014890134334564, "learning_rate": 7.0712993305261865e-06, "loss": 0.0016, "step": 52270 }, { "epoch": 0.8554364722244948, "grad_norm": 0.08260821551084518, "learning_rate": 7.069999625273796e-06, "loss": 0.0017, "step": 52280 }, { "epoch": 0.8556000981755707, "grad_norm": 0.04796145111322403, "learning_rate": 7.068699751199279e-06, "loss": 0.0018, "step": 52290 }, { "epoch": 0.8557637241266465, "grad_norm": 0.007061548065394163, "learning_rate": 7.067399708408649e-06, "loss": 0.0034, "step": 52300 }, { "epoch": 0.8559273500777224, "grad_norm": 0.07918252050876617, "learning_rate": 7.066099497007932e-06, "loss": 0.0013, "step": 52310 }, { "epoch": 0.8560909760287981, "grad_norm": 0.061982057988643646, "learning_rate": 7.06479911710317e-06, "loss": 0.0015, "step": 52320 }, { "epoch": 0.856254601979874, "grad_norm": 0.09538498520851135, "learning_rate": 7.0634985688004174e-06, "loss": 0.0016, "step": 52330 }, { "epoch": 0.8564182279309498, "grad_norm": 0.1483614444732666, "learning_rate": 7.062197852205742e-06, "loss": 0.0018, "step": 52340 }, { "epoch": 0.8565818538820257, "grad_norm": 0.08636424690485, "learning_rate": 7.060896967425226e-06, "loss": 0.0017, "step": 52350 }, { "epoch": 0.8567454798331016, "grad_norm": 0.15219436585903168, "learning_rate": 7.059595914564965e-06, "loss": 0.0019, "step": 52360 }, { "epoch": 0.8569091057841773, "grad_norm": 0.03879012539982796, "learning_rate": 7.058294693731068e-06, "loss": 0.002, "step": 52370 }, { "epoch": 0.8570727317352532, "grad_norm": 0.04547334089875221, "learning_rate": 7.05699330502966e-06, "loss": 0.0031, "step": 52380 }, { "epoch": 0.857236357686329, "grad_norm": 0.053423453122377396, "learning_rate": 7.055691748566875e-06, "loss": 0.0017, "step": 52390 }, { "epoch": 0.8573999836374049, "grad_norm": 0.043873924762010574, "learning_rate": 7.0543900244488665e-06, "loss": 0.0021, "step": 52400 }, { "epoch": 0.8575636095884808, "grad_norm": 0.025887412950396538, "learning_rate": 7.053088132781795e-06, "loss": 0.0019, "step": 52410 }, { "epoch": 0.8577272355395565, "grad_norm": 0.08187703788280487, "learning_rate": 7.051786073671843e-06, "loss": 0.003, "step": 52420 }, { "epoch": 0.8578908614906324, "grad_norm": 0.023876162245869637, "learning_rate": 7.050483847225199e-06, "loss": 0.0023, "step": 52430 }, { "epoch": 0.8580544874417082, "grad_norm": 0.036614932119846344, "learning_rate": 7.049181453548068e-06, "loss": 0.0026, "step": 52440 }, { "epoch": 0.8582181133927841, "grad_norm": 0.08037271350622177, "learning_rate": 7.047878892746671e-06, "loss": 0.0022, "step": 52450 }, { "epoch": 0.85838173934386, "grad_norm": 0.07627733051776886, "learning_rate": 7.046576164927239e-06, "loss": 0.0017, "step": 52460 }, { "epoch": 0.8585453652949357, "grad_norm": 0.20880989730358124, "learning_rate": 7.045273270196017e-06, "loss": 0.004, "step": 52470 }, { "epoch": 0.8587089912460116, "grad_norm": 0.05583646893501282, "learning_rate": 7.043970208659265e-06, "loss": 0.0017, "step": 52480 }, { "epoch": 0.8588726171970874, "grad_norm": 0.11493480950593948, "learning_rate": 7.0426669804232586e-06, "loss": 0.002, "step": 52490 }, { "epoch": 0.8590362431481633, "grad_norm": 0.1680634617805481, "learning_rate": 7.041363585594282e-06, "loss": 0.0023, "step": 52500 }, { "epoch": 0.8591998690992392, "grad_norm": 0.07949693500995636, "learning_rate": 7.040060024278636e-06, "loss": 0.0032, "step": 52510 }, { "epoch": 0.859363495050315, "grad_norm": 0.04821572080254555, "learning_rate": 7.038756296582638e-06, "loss": 0.0025, "step": 52520 }, { "epoch": 0.8595271210013908, "grad_norm": 0.14623624086380005, "learning_rate": 7.03745240261261e-06, "loss": 0.0031, "step": 52530 }, { "epoch": 0.8596907469524666, "grad_norm": 0.07858574390411377, "learning_rate": 7.036148342474898e-06, "loss": 0.0015, "step": 52540 }, { "epoch": 0.8598543729035425, "grad_norm": 0.04424387961626053, "learning_rate": 7.034844116275853e-06, "loss": 0.0026, "step": 52550 }, { "epoch": 0.8600179988546184, "grad_norm": 0.009820517152547836, "learning_rate": 7.033539724121846e-06, "loss": 0.0017, "step": 52560 }, { "epoch": 0.8601816248056942, "grad_norm": 0.07594398409128189, "learning_rate": 7.032235166119258e-06, "loss": 0.0023, "step": 52570 }, { "epoch": 0.86034525075677, "grad_norm": 0.17292647063732147, "learning_rate": 7.030930442374484e-06, "loss": 0.0024, "step": 52580 }, { "epoch": 0.8605088767078458, "grad_norm": 0.05539826303720474, "learning_rate": 7.029625552993933e-06, "loss": 0.0021, "step": 52590 }, { "epoch": 0.8606725026589217, "grad_norm": 0.1398976594209671, "learning_rate": 7.028320498084027e-06, "loss": 0.0019, "step": 52600 }, { "epoch": 0.8608361286099976, "grad_norm": 0.03826690465211868, "learning_rate": 7.027015277751201e-06, "loss": 0.0014, "step": 52610 }, { "epoch": 0.8609997545610734, "grad_norm": 0.07009593397378922, "learning_rate": 7.025709892101908e-06, "loss": 0.0059, "step": 52620 }, { "epoch": 0.8611633805121492, "grad_norm": 0.013434285297989845, "learning_rate": 7.024404341242606e-06, "loss": 0.0026, "step": 52630 }, { "epoch": 0.861327006463225, "grad_norm": 0.14003880321979523, "learning_rate": 7.023098625279774e-06, "loss": 0.0028, "step": 52640 }, { "epoch": 0.8614906324143009, "grad_norm": 0.04105537757277489, "learning_rate": 7.021792744319902e-06, "loss": 0.0033, "step": 52650 }, { "epoch": 0.8616542583653768, "grad_norm": 0.06372685730457306, "learning_rate": 7.020486698469492e-06, "loss": 0.0028, "step": 52660 }, { "epoch": 0.8618178843164526, "grad_norm": 0.07221833616495132, "learning_rate": 7.019180487835062e-06, "loss": 0.0022, "step": 52670 }, { "epoch": 0.8619815102675285, "grad_norm": 0.06876964867115021, "learning_rate": 7.01787411252314e-06, "loss": 0.0014, "step": 52680 }, { "epoch": 0.8621451362186042, "grad_norm": 0.04022214189171791, "learning_rate": 7.016567572640272e-06, "loss": 0.0019, "step": 52690 }, { "epoch": 0.8623087621696801, "grad_norm": 0.11424021422863007, "learning_rate": 7.015260868293013e-06, "loss": 0.0017, "step": 52700 }, { "epoch": 0.862472388120756, "grad_norm": 0.08551132678985596, "learning_rate": 7.013953999587935e-06, "loss": 0.0018, "step": 52710 }, { "epoch": 0.8626360140718318, "grad_norm": 0.08215713500976562, "learning_rate": 7.01264696663162e-06, "loss": 0.0022, "step": 52720 }, { "epoch": 0.8627996400229077, "grad_norm": 0.020387010648846626, "learning_rate": 7.011339769530665e-06, "loss": 0.002, "step": 52730 }, { "epoch": 0.8629632659739834, "grad_norm": 0.08078915625810623, "learning_rate": 7.010032408391683e-06, "loss": 0.0018, "step": 52740 }, { "epoch": 0.8631268919250593, "grad_norm": 0.8305329084396362, "learning_rate": 7.008724883321297e-06, "loss": 0.0026, "step": 52750 }, { "epoch": 0.8632905178761352, "grad_norm": 0.05274273455142975, "learning_rate": 7.007417194426143e-06, "loss": 0.0019, "step": 52760 }, { "epoch": 0.863454143827211, "grad_norm": 0.038838643580675125, "learning_rate": 7.0061093418128726e-06, "loss": 0.0031, "step": 52770 }, { "epoch": 0.8636177697782869, "grad_norm": 0.07990029454231262, "learning_rate": 7.00480132558815e-06, "loss": 0.0017, "step": 52780 }, { "epoch": 0.8637813957293626, "grad_norm": 0.009780745953321457, "learning_rate": 7.003493145858651e-06, "loss": 0.0035, "step": 52790 }, { "epoch": 0.8639450216804385, "grad_norm": 0.04110005125403404, "learning_rate": 7.002184802731069e-06, "loss": 0.0019, "step": 52800 }, { "epoch": 0.8641086476315144, "grad_norm": 0.12480364739894867, "learning_rate": 7.000876296312104e-06, "loss": 0.0035, "step": 52810 }, { "epoch": 0.8642722735825902, "grad_norm": 0.04220016673207283, "learning_rate": 6.999567626708479e-06, "loss": 0.0026, "step": 52820 }, { "epoch": 0.8644358995336661, "grad_norm": 0.05488692969083786, "learning_rate": 6.998258794026919e-06, "loss": 0.0063, "step": 52830 }, { "epoch": 0.8645995254847418, "grad_norm": 0.0398465059697628, "learning_rate": 6.996949798374172e-06, "loss": 0.0022, "step": 52840 }, { "epoch": 0.8647631514358177, "grad_norm": 0.007414890918880701, "learning_rate": 6.995640639856993e-06, "loss": 0.0024, "step": 52850 }, { "epoch": 0.8649267773868936, "grad_norm": 0.06604649871587753, "learning_rate": 6.9943313185821535e-06, "loss": 0.0015, "step": 52860 }, { "epoch": 0.8650904033379694, "grad_norm": 0.04440661892294884, "learning_rate": 6.993021834656437e-06, "loss": 0.0025, "step": 52870 }, { "epoch": 0.8652540292890453, "grad_norm": 0.09880305081605911, "learning_rate": 6.99171218818664e-06, "loss": 0.0023, "step": 52880 }, { "epoch": 0.865417655240121, "grad_norm": 0.04403670132160187, "learning_rate": 6.990402379279573e-06, "loss": 0.0015, "step": 52890 }, { "epoch": 0.8655812811911969, "grad_norm": 0.06780622899532318, "learning_rate": 6.98909240804206e-06, "loss": 0.0018, "step": 52900 }, { "epoch": 0.8657449071422728, "grad_norm": 0.09713041037321091, "learning_rate": 6.9877822745809385e-06, "loss": 0.0023, "step": 52910 }, { "epoch": 0.8659085330933486, "grad_norm": 0.09721215069293976, "learning_rate": 6.986471979003058e-06, "loss": 0.0016, "step": 52920 }, { "epoch": 0.8660721590444245, "grad_norm": 0.13830965757369995, "learning_rate": 6.98516152141528e-06, "loss": 0.002, "step": 52930 }, { "epoch": 0.8662357849955002, "grad_norm": 0.03229491412639618, "learning_rate": 6.983850901924484e-06, "loss": 0.0012, "step": 52940 }, { "epoch": 0.8663994109465761, "grad_norm": 0.20107528567314148, "learning_rate": 6.982540120637558e-06, "loss": 0.0029, "step": 52950 }, { "epoch": 0.866563036897652, "grad_norm": 0.09906178712844849, "learning_rate": 6.981229177661403e-06, "loss": 0.002, "step": 52960 }, { "epoch": 0.8667266628487278, "grad_norm": 0.05163905769586563, "learning_rate": 6.979918073102938e-06, "loss": 0.0021, "step": 52970 }, { "epoch": 0.8668902887998037, "grad_norm": 0.07864928245544434, "learning_rate": 6.978606807069092e-06, "loss": 0.0029, "step": 52980 }, { "epoch": 0.8670539147508795, "grad_norm": 0.21290278434753418, "learning_rate": 6.977295379666807e-06, "loss": 0.0026, "step": 52990 }, { "epoch": 0.8672175407019553, "grad_norm": 0.08762305229902267, "learning_rate": 6.975983791003037e-06, "loss": 0.004, "step": 53000 }, { "epoch": 0.8673811666530312, "grad_norm": 0.10097048431634903, "learning_rate": 6.974672041184752e-06, "loss": 0.0032, "step": 53010 }, { "epoch": 0.867544792604107, "grad_norm": 0.03274586424231529, "learning_rate": 6.973360130318933e-06, "loss": 0.0032, "step": 53020 }, { "epoch": 0.8677084185551829, "grad_norm": 0.003902699565514922, "learning_rate": 6.972048058512577e-06, "loss": 0.001, "step": 53030 }, { "epoch": 0.8678720445062587, "grad_norm": 0.5004389882087708, "learning_rate": 6.970735825872691e-06, "loss": 0.0023, "step": 53040 }, { "epoch": 0.8680356704573345, "grad_norm": 0.08635661005973816, "learning_rate": 6.9694234325062945e-06, "loss": 0.0026, "step": 53050 }, { "epoch": 0.8681992964084104, "grad_norm": 0.087120920419693, "learning_rate": 6.968110878520426e-06, "loss": 0.0012, "step": 53060 }, { "epoch": 0.8683629223594862, "grad_norm": 0.06626240909099579, "learning_rate": 6.966798164022128e-06, "loss": 0.0023, "step": 53070 }, { "epoch": 0.8685265483105621, "grad_norm": 0.038560375571250916, "learning_rate": 6.965485289118464e-06, "loss": 0.002, "step": 53080 }, { "epoch": 0.8686901742616379, "grad_norm": 0.01193628553301096, "learning_rate": 6.964172253916507e-06, "loss": 0.0031, "step": 53090 }, { "epoch": 0.8688538002127137, "grad_norm": 0.26060912013053894, "learning_rate": 6.962859058523344e-06, "loss": 0.0024, "step": 53100 }, { "epoch": 0.8690174261637896, "grad_norm": 0.10607622563838959, "learning_rate": 6.961545703046073e-06, "loss": 0.0022, "step": 53110 }, { "epoch": 0.8691810521148654, "grad_norm": 0.03295673429965973, "learning_rate": 6.96023218759181e-06, "loss": 0.0018, "step": 53120 }, { "epoch": 0.8693446780659413, "grad_norm": 0.04429740831255913, "learning_rate": 6.958918512267678e-06, "loss": 0.0023, "step": 53130 }, { "epoch": 0.8695083040170171, "grad_norm": 0.05577025189995766, "learning_rate": 6.957604677180818e-06, "loss": 0.0015, "step": 53140 }, { "epoch": 0.869671929968093, "grad_norm": 0.10671288520097733, "learning_rate": 6.95629068243838e-06, "loss": 0.0019, "step": 53150 }, { "epoch": 0.8698355559191688, "grad_norm": 0.10934911668300629, "learning_rate": 6.954976528147531e-06, "loss": 0.004, "step": 53160 }, { "epoch": 0.8699991818702446, "grad_norm": 0.07432562857866287, "learning_rate": 6.953662214415446e-06, "loss": 0.0023, "step": 53170 }, { "epoch": 0.8701628078213205, "grad_norm": 0.2075672596693039, "learning_rate": 6.952347741349319e-06, "loss": 0.0018, "step": 53180 }, { "epoch": 0.8703264337723963, "grad_norm": 0.1452101469039917, "learning_rate": 6.951033109056353e-06, "loss": 0.0021, "step": 53190 }, { "epoch": 0.8704900597234722, "grad_norm": 0.06605714559555054, "learning_rate": 6.949718317643764e-06, "loss": 0.0025, "step": 53200 }, { "epoch": 0.8706536856745479, "grad_norm": 0.09846769273281097, "learning_rate": 6.948403367218781e-06, "loss": 0.0033, "step": 53210 }, { "epoch": 0.8708173116256238, "grad_norm": 0.05244271457195282, "learning_rate": 6.94708825788865e-06, "loss": 0.0012, "step": 53220 }, { "epoch": 0.8709809375766997, "grad_norm": 0.06133155897259712, "learning_rate": 6.945772989760626e-06, "loss": 0.0055, "step": 53230 }, { "epoch": 0.8711445635277755, "grad_norm": 0.1320413202047348, "learning_rate": 6.944457562941975e-06, "loss": 0.0027, "step": 53240 }, { "epoch": 0.8713081894788514, "grad_norm": 0.006265302188694477, "learning_rate": 6.943141977539982e-06, "loss": 0.0009, "step": 53250 }, { "epoch": 0.8714718154299271, "grad_norm": 0.11734145134687424, "learning_rate": 6.941826233661941e-06, "loss": 0.0018, "step": 53260 }, { "epoch": 0.871635441381003, "grad_norm": 0.04843049868941307, "learning_rate": 6.940510331415158e-06, "loss": 0.0014, "step": 53270 }, { "epoch": 0.8717990673320789, "grad_norm": 0.040032610297203064, "learning_rate": 6.939194270906955e-06, "loss": 0.002, "step": 53280 }, { "epoch": 0.8719626932831547, "grad_norm": 0.06429308652877808, "learning_rate": 6.937878052244664e-06, "loss": 0.0022, "step": 53290 }, { "epoch": 0.8721263192342306, "grad_norm": 0.043519239872694016, "learning_rate": 6.936561675535634e-06, "loss": 0.0024, "step": 53300 }, { "epoch": 0.8722899451853063, "grad_norm": 0.0638105645775795, "learning_rate": 6.935245140887221e-06, "loss": 0.0027, "step": 53310 }, { "epoch": 0.8724535711363822, "grad_norm": 0.17934434115886688, "learning_rate": 6.9339284484068e-06, "loss": 0.0026, "step": 53320 }, { "epoch": 0.8726171970874581, "grad_norm": 0.06698743999004364, "learning_rate": 6.932611598201754e-06, "loss": 0.0036, "step": 53330 }, { "epoch": 0.8727808230385339, "grad_norm": 0.0858529731631279, "learning_rate": 6.93129459037948e-06, "loss": 0.0025, "step": 53340 }, { "epoch": 0.8729444489896098, "grad_norm": 0.07484275102615356, "learning_rate": 6.929977425047392e-06, "loss": 0.0021, "step": 53350 }, { "epoch": 0.8731080749406855, "grad_norm": 0.24180133640766144, "learning_rate": 6.928660102312911e-06, "loss": 0.0012, "step": 53360 }, { "epoch": 0.8732717008917614, "grad_norm": 0.0876980647444725, "learning_rate": 6.9273426222834735e-06, "loss": 0.0027, "step": 53370 }, { "epoch": 0.8734353268428373, "grad_norm": 0.062733493745327, "learning_rate": 6.92602498506653e-06, "loss": 0.0024, "step": 53380 }, { "epoch": 0.8735989527939131, "grad_norm": 0.12802116572856903, "learning_rate": 6.92470719076954e-06, "loss": 0.0063, "step": 53390 }, { "epoch": 0.873762578744989, "grad_norm": 0.03013026900589466, "learning_rate": 6.923389239499982e-06, "loss": 0.0017, "step": 53400 }, { "epoch": 0.8739262046960647, "grad_norm": 0.07950850576162338, "learning_rate": 6.92207113136534e-06, "loss": 0.0017, "step": 53410 }, { "epoch": 0.8740898306471406, "grad_norm": 0.040680043399333954, "learning_rate": 6.920752866473116e-06, "loss": 0.0015, "step": 53420 }, { "epoch": 0.8742534565982165, "grad_norm": 0.0779644325375557, "learning_rate": 6.919434444930823e-06, "loss": 0.0014, "step": 53430 }, { "epoch": 0.8744170825492923, "grad_norm": 0.08873041719198227, "learning_rate": 6.918115866845988e-06, "loss": 0.0028, "step": 53440 }, { "epoch": 0.8745807085003682, "grad_norm": 0.08217376470565796, "learning_rate": 6.916797132326148e-06, "loss": 0.0016, "step": 53450 }, { "epoch": 0.874744334451444, "grad_norm": 0.12217187136411667, "learning_rate": 6.915478241478857e-06, "loss": 0.0015, "step": 53460 }, { "epoch": 0.8749079604025198, "grad_norm": 0.11413371562957764, "learning_rate": 6.914159194411676e-06, "loss": 0.0038, "step": 53470 }, { "epoch": 0.8750715863535957, "grad_norm": 0.08643936365842819, "learning_rate": 6.9128399912321845e-06, "loss": 0.0032, "step": 53480 }, { "epoch": 0.8752352123046715, "grad_norm": 0.05037185177206993, "learning_rate": 6.911520632047972e-06, "loss": 0.0012, "step": 53490 }, { "epoch": 0.8753988382557474, "grad_norm": 0.06223201006650925, "learning_rate": 6.91020111696664e-06, "loss": 0.0023, "step": 53500 }, { "epoch": 0.8755624642068232, "grad_norm": 0.06319757550954819, "learning_rate": 6.9088814460958035e-06, "loss": 0.0024, "step": 53510 }, { "epoch": 0.875726090157899, "grad_norm": 0.0652029812335968, "learning_rate": 6.907561619543091e-06, "loss": 0.0017, "step": 53520 }, { "epoch": 0.8758897161089749, "grad_norm": 0.27320727705955505, "learning_rate": 6.906241637416142e-06, "loss": 0.0019, "step": 53530 }, { "epoch": 0.8760533420600507, "grad_norm": 0.062421705573797226, "learning_rate": 6.904921499822611e-06, "loss": 0.003, "step": 53540 }, { "epoch": 0.8762169680111266, "grad_norm": 0.0823979303240776, "learning_rate": 6.903601206870164e-06, "loss": 0.0018, "step": 53550 }, { "epoch": 0.8763805939622024, "grad_norm": 0.13977567851543427, "learning_rate": 6.90228075866648e-06, "loss": 0.0024, "step": 53560 }, { "epoch": 0.8765442199132782, "grad_norm": 0.0352754220366478, "learning_rate": 6.900960155319248e-06, "loss": 0.0009, "step": 53570 }, { "epoch": 0.8767078458643541, "grad_norm": 0.057808853685855865, "learning_rate": 6.899639396936173e-06, "loss": 0.0012, "step": 53580 }, { "epoch": 0.8768714718154299, "grad_norm": 0.17973849177360535, "learning_rate": 6.898318483624973e-06, "loss": 0.0013, "step": 53590 }, { "epoch": 0.8770350977665058, "grad_norm": 0.12151037156581879, "learning_rate": 6.896997415493376e-06, "loss": 0.0022, "step": 53600 }, { "epoch": 0.8771987237175816, "grad_norm": 0.025110622867941856, "learning_rate": 6.895676192649124e-06, "loss": 0.0022, "step": 53610 }, { "epoch": 0.8773623496686574, "grad_norm": 0.18021991848945618, "learning_rate": 6.894354815199971e-06, "loss": 0.0016, "step": 53620 }, { "epoch": 0.8775259756197333, "grad_norm": 0.03200940042734146, "learning_rate": 6.893033283253685e-06, "loss": 0.0021, "step": 53630 }, { "epoch": 0.8776896015708091, "grad_norm": 0.05682295188307762, "learning_rate": 6.8917115969180445e-06, "loss": 0.0018, "step": 53640 }, { "epoch": 0.877853227521885, "grad_norm": 0.1284276396036148, "learning_rate": 6.890389756300842e-06, "loss": 0.0013, "step": 53650 }, { "epoch": 0.8780168534729608, "grad_norm": 0.026704085990786552, "learning_rate": 6.8890677615098834e-06, "loss": 0.0018, "step": 53660 }, { "epoch": 0.8781804794240367, "grad_norm": 0.24410900473594666, "learning_rate": 6.887745612652985e-06, "loss": 0.003, "step": 53670 }, { "epoch": 0.8783441053751125, "grad_norm": 0.009228730574250221, "learning_rate": 6.886423309837978e-06, "loss": 0.003, "step": 53680 }, { "epoch": 0.8785077313261883, "grad_norm": 0.08482401072978973, "learning_rate": 6.885100853172702e-06, "loss": 0.0025, "step": 53690 }, { "epoch": 0.8786713572772642, "grad_norm": 0.0654645711183548, "learning_rate": 6.883778242765016e-06, "loss": 0.0016, "step": 53700 }, { "epoch": 0.87883498322834, "grad_norm": 0.08374740928411484, "learning_rate": 6.8824554787227845e-06, "loss": 0.0024, "step": 53710 }, { "epoch": 0.8789986091794159, "grad_norm": 0.029770608991384506, "learning_rate": 6.881132561153889e-06, "loss": 0.0011, "step": 53720 }, { "epoch": 0.8791622351304917, "grad_norm": 0.02724733017385006, "learning_rate": 6.879809490166223e-06, "loss": 0.0013, "step": 53730 }, { "epoch": 0.8793258610815675, "grad_norm": 0.010225177742540836, "learning_rate": 6.8784862658676895e-06, "loss": 0.0029, "step": 53740 }, { "epoch": 0.8794894870326434, "grad_norm": 0.17977429926395416, "learning_rate": 6.877162888366208e-06, "loss": 0.0022, "step": 53750 }, { "epoch": 0.8796531129837192, "grad_norm": 0.12213508784770966, "learning_rate": 6.875839357769707e-06, "loss": 0.0034, "step": 53760 }, { "epoch": 0.8798167389347951, "grad_norm": 0.026230571791529655, "learning_rate": 6.874515674186131e-06, "loss": 0.0024, "step": 53770 }, { "epoch": 0.879980364885871, "grad_norm": 0.02563793957233429, "learning_rate": 6.873191837723434e-06, "loss": 0.0017, "step": 53780 }, { "epoch": 0.8801439908369467, "grad_norm": 0.15670177340507507, "learning_rate": 6.871867848489584e-06, "loss": 0.0031, "step": 53790 }, { "epoch": 0.8803076167880226, "grad_norm": 0.0036613966803997755, "learning_rate": 6.870543706592561e-06, "loss": 0.0009, "step": 53800 }, { "epoch": 0.8804712427390984, "grad_norm": 0.0243604127317667, "learning_rate": 6.869219412140359e-06, "loss": 0.002, "step": 53810 }, { "epoch": 0.8806348686901743, "grad_norm": 0.1405079960823059, "learning_rate": 6.8678949652409785e-06, "loss": 0.0031, "step": 53820 }, { "epoch": 0.8807984946412502, "grad_norm": 0.029803363606333733, "learning_rate": 6.866570366002442e-06, "loss": 0.0021, "step": 53830 }, { "epoch": 0.8809621205923259, "grad_norm": 0.08115305006504059, "learning_rate": 6.8652456145327765e-06, "loss": 0.0032, "step": 53840 }, { "epoch": 0.8811257465434018, "grad_norm": 0.05830787867307663, "learning_rate": 6.863920710940023e-06, "loss": 0.0009, "step": 53850 }, { "epoch": 0.8812893724944776, "grad_norm": 0.032207537442445755, "learning_rate": 6.8625956553322395e-06, "loss": 0.0013, "step": 53860 }, { "epoch": 0.8814529984455535, "grad_norm": 0.05618473142385483, "learning_rate": 6.861270447817492e-06, "loss": 0.0015, "step": 53870 }, { "epoch": 0.8816166243966294, "grad_norm": 0.07736112177371979, "learning_rate": 6.859945088503858e-06, "loss": 0.0017, "step": 53880 }, { "epoch": 0.8817802503477051, "grad_norm": 0.06020403280854225, "learning_rate": 6.858619577499431e-06, "loss": 0.0022, "step": 53890 }, { "epoch": 0.881943876298781, "grad_norm": 0.05019012466073036, "learning_rate": 6.857293914912316e-06, "loss": 0.0017, "step": 53900 }, { "epoch": 0.8821075022498568, "grad_norm": 0.029793351888656616, "learning_rate": 6.855968100850626e-06, "loss": 0.0023, "step": 53910 }, { "epoch": 0.8822711282009327, "grad_norm": 0.028437718749046326, "learning_rate": 6.8546421354224946e-06, "loss": 0.0012, "step": 53920 }, { "epoch": 0.8824347541520086, "grad_norm": 0.03040366806089878, "learning_rate": 6.853316018736059e-06, "loss": 0.0024, "step": 53930 }, { "epoch": 0.8825983801030843, "grad_norm": 0.11271120607852936, "learning_rate": 6.8519897508994745e-06, "loss": 0.0036, "step": 53940 }, { "epoch": 0.8827620060541602, "grad_norm": 0.026811890304088593, "learning_rate": 6.850663332020908e-06, "loss": 0.0015, "step": 53950 }, { "epoch": 0.882925632005236, "grad_norm": 0.11508350074291229, "learning_rate": 6.849336762208536e-06, "loss": 0.0019, "step": 53960 }, { "epoch": 0.8830892579563119, "grad_norm": 0.022951634600758553, "learning_rate": 6.848010041570551e-06, "loss": 0.0017, "step": 53970 }, { "epoch": 0.8832528839073878, "grad_norm": 0.10866470634937286, "learning_rate": 6.846683170215153e-06, "loss": 0.0025, "step": 53980 }, { "epoch": 0.8834165098584635, "grad_norm": 0.17455822229385376, "learning_rate": 6.8453561482505594e-06, "loss": 0.0013, "step": 53990 }, { "epoch": 0.8835801358095394, "grad_norm": 0.01109520997852087, "learning_rate": 6.844028975784995e-06, "loss": 0.0014, "step": 54000 }, { "epoch": 0.8837437617606152, "grad_norm": 0.06662735342979431, "learning_rate": 6.842701652926703e-06, "loss": 0.0021, "step": 54010 }, { "epoch": 0.8839073877116911, "grad_norm": 0.1067625880241394, "learning_rate": 6.841374179783934e-06, "loss": 0.0012, "step": 54020 }, { "epoch": 0.884071013662767, "grad_norm": 0.03916068375110626, "learning_rate": 6.840046556464951e-06, "loss": 0.0015, "step": 54030 }, { "epoch": 0.8842346396138427, "grad_norm": 0.03386598825454712, "learning_rate": 6.838718783078031e-06, "loss": 0.0029, "step": 54040 }, { "epoch": 0.8843982655649186, "grad_norm": 0.02871730737388134, "learning_rate": 6.837390859731463e-06, "loss": 0.003, "step": 54050 }, { "epoch": 0.8845618915159944, "grad_norm": 0.10410573333501816, "learning_rate": 6.8360627865335486e-06, "loss": 0.0016, "step": 54060 }, { "epoch": 0.8847255174670703, "grad_norm": 0.11157534271478653, "learning_rate": 6.834734563592599e-06, "loss": 0.0028, "step": 54070 }, { "epoch": 0.8848891434181462, "grad_norm": 0.11567296087741852, "learning_rate": 6.833406191016941e-06, "loss": 0.002, "step": 54080 }, { "epoch": 0.885052769369222, "grad_norm": 0.10017170011997223, "learning_rate": 6.8320776689149126e-06, "loss": 0.0018, "step": 54090 }, { "epoch": 0.8852163953202978, "grad_norm": 0.11726928502321243, "learning_rate": 6.8307489973948625e-06, "loss": 0.0036, "step": 54100 }, { "epoch": 0.8853800212713736, "grad_norm": 0.04573062062263489, "learning_rate": 6.829420176565153e-06, "loss": 0.0012, "step": 54110 }, { "epoch": 0.8855436472224495, "grad_norm": 0.06171920523047447, "learning_rate": 6.828091206534157e-06, "loss": 0.0022, "step": 54120 }, { "epoch": 0.8857072731735253, "grad_norm": 0.08566936105489731, "learning_rate": 6.826762087410264e-06, "loss": 0.0029, "step": 54130 }, { "epoch": 0.8858708991246012, "grad_norm": 0.0958702340722084, "learning_rate": 6.8254328193018695e-06, "loss": 0.0012, "step": 54140 }, { "epoch": 0.886034525075677, "grad_norm": 0.24597111344337463, "learning_rate": 6.824103402317384e-06, "loss": 0.0039, "step": 54150 }, { "epoch": 0.8861981510267528, "grad_norm": 0.16293805837631226, "learning_rate": 6.8227738365652315e-06, "loss": 0.0014, "step": 54160 }, { "epoch": 0.8863617769778287, "grad_norm": 0.22481563687324524, "learning_rate": 6.821444122153846e-06, "loss": 0.0022, "step": 54170 }, { "epoch": 0.8865254029289045, "grad_norm": 0.02314728870987892, "learning_rate": 6.820114259191675e-06, "loss": 0.0021, "step": 54180 }, { "epoch": 0.8866890288799804, "grad_norm": 0.08620315045118332, "learning_rate": 6.818784247787179e-06, "loss": 0.0032, "step": 54190 }, { "epoch": 0.8868526548310562, "grad_norm": 0.05162770301103592, "learning_rate": 6.817454088048827e-06, "loss": 0.0026, "step": 54200 }, { "epoch": 0.887016280782132, "grad_norm": 0.02676309645175934, "learning_rate": 6.816123780085103e-06, "loss": 0.0025, "step": 54210 }, { "epoch": 0.8871799067332079, "grad_norm": 0.020230954512953758, "learning_rate": 6.814793324004503e-06, "loss": 0.0021, "step": 54220 }, { "epoch": 0.8873435326842837, "grad_norm": 0.05676576495170593, "learning_rate": 6.813462719915533e-06, "loss": 0.0014, "step": 54230 }, { "epoch": 0.8875071586353596, "grad_norm": 0.07944746315479279, "learning_rate": 6.812131967926714e-06, "loss": 0.0017, "step": 54240 }, { "epoch": 0.8876707845864354, "grad_norm": 0.010663527064025402, "learning_rate": 6.8108010681465775e-06, "loss": 0.0017, "step": 54250 }, { "epoch": 0.8878344105375112, "grad_norm": 0.15500694513320923, "learning_rate": 6.809470020683666e-06, "loss": 0.0029, "step": 54260 }, { "epoch": 0.8879980364885871, "grad_norm": 0.02343682385981083, "learning_rate": 6.808138825646537e-06, "loss": 0.0027, "step": 54270 }, { "epoch": 0.8881616624396629, "grad_norm": 0.14620138704776764, "learning_rate": 6.806807483143756e-06, "loss": 0.0021, "step": 54280 }, { "epoch": 0.8883252883907388, "grad_norm": 0.11204607784748077, "learning_rate": 6.805475993283904e-06, "loss": 0.0031, "step": 54290 }, { "epoch": 0.8884889143418147, "grad_norm": 0.09329970180988312, "learning_rate": 6.804144356175574e-06, "loss": 0.0019, "step": 54300 }, { "epoch": 0.8886525402928904, "grad_norm": 0.05720141530036926, "learning_rate": 6.8028125719273675e-06, "loss": 0.0011, "step": 54310 }, { "epoch": 0.8888161662439663, "grad_norm": 0.07613986730575562, "learning_rate": 6.8014806406479015e-06, "loss": 0.0031, "step": 54320 }, { "epoch": 0.8889797921950421, "grad_norm": 0.3952399492263794, "learning_rate": 6.800148562445804e-06, "loss": 0.0019, "step": 54330 }, { "epoch": 0.889143418146118, "grad_norm": 0.007523656357079744, "learning_rate": 6.7988163374297145e-06, "loss": 0.0018, "step": 54340 }, { "epoch": 0.8893070440971939, "grad_norm": 0.1378215104341507, "learning_rate": 6.797483965708284e-06, "loss": 0.0021, "step": 54350 }, { "epoch": 0.8894706700482696, "grad_norm": 0.0389818400144577, "learning_rate": 6.796151447390177e-06, "loss": 0.0022, "step": 54360 }, { "epoch": 0.8896342959993455, "grad_norm": 0.1969531923532486, "learning_rate": 6.794818782584069e-06, "loss": 0.0031, "step": 54370 }, { "epoch": 0.8897979219504213, "grad_norm": 0.023818299174308777, "learning_rate": 6.793485971398648e-06, "loss": 0.0022, "step": 54380 }, { "epoch": 0.8899615479014972, "grad_norm": 0.23318390548229218, "learning_rate": 6.792153013942613e-06, "loss": 0.0039, "step": 54390 }, { "epoch": 0.8901251738525731, "grad_norm": 0.040484681725502014, "learning_rate": 6.790819910324675e-06, "loss": 0.0015, "step": 54400 }, { "epoch": 0.8902887998036488, "grad_norm": 0.05555105581879616, "learning_rate": 6.789486660653558e-06, "loss": 0.0021, "step": 54410 }, { "epoch": 0.8904524257547247, "grad_norm": 0.025553874671459198, "learning_rate": 6.788153265037998e-06, "loss": 0.0034, "step": 54420 }, { "epoch": 0.8906160517058005, "grad_norm": 0.035305339843034744, "learning_rate": 6.78681972358674e-06, "loss": 0.002, "step": 54430 }, { "epoch": 0.8907796776568764, "grad_norm": 0.00880409125238657, "learning_rate": 6.785486036408546e-06, "loss": 0.0026, "step": 54440 }, { "epoch": 0.8909433036079523, "grad_norm": 0.05229269713163376, "learning_rate": 6.784152203612188e-06, "loss": 0.0009, "step": 54450 }, { "epoch": 0.891106929559028, "grad_norm": 0.12757587432861328, "learning_rate": 6.782818225306443e-06, "loss": 0.0029, "step": 54460 }, { "epoch": 0.8912705555101039, "grad_norm": 0.002629539230838418, "learning_rate": 6.781484101600111e-06, "loss": 0.0019, "step": 54470 }, { "epoch": 0.8914341814611797, "grad_norm": 0.08326783776283264, "learning_rate": 6.780149832601996e-06, "loss": 0.004, "step": 54480 }, { "epoch": 0.8915978074122556, "grad_norm": 0.04776367172598839, "learning_rate": 6.778815418420916e-06, "loss": 0.0025, "step": 54490 }, { "epoch": 0.8917614333633315, "grad_norm": 0.08596014231443405, "learning_rate": 6.777480859165703e-06, "loss": 0.0042, "step": 54500 }, { "epoch": 0.8919250593144072, "grad_norm": 0.1295817494392395, "learning_rate": 6.7761461549452e-06, "loss": 0.0017, "step": 54510 }, { "epoch": 0.8920886852654831, "grad_norm": 0.06088557094335556, "learning_rate": 6.774811305868259e-06, "loss": 0.0016, "step": 54520 }, { "epoch": 0.8922523112165589, "grad_norm": 0.08484260737895966, "learning_rate": 6.773476312043748e-06, "loss": 0.0019, "step": 54530 }, { "epoch": 0.8924159371676348, "grad_norm": 0.09183470159769058, "learning_rate": 6.7721411735805395e-06, "loss": 0.0017, "step": 54540 }, { "epoch": 0.8925795631187107, "grad_norm": 0.10159572958946228, "learning_rate": 6.770805890587529e-06, "loss": 0.0036, "step": 54550 }, { "epoch": 0.8927431890697864, "grad_norm": 0.08508113771677017, "learning_rate": 6.769470463173613e-06, "loss": 0.0028, "step": 54560 }, { "epoch": 0.8929068150208623, "grad_norm": 0.07915453612804413, "learning_rate": 6.768134891447708e-06, "loss": 0.0017, "step": 54570 }, { "epoch": 0.8930704409719381, "grad_norm": 0.13049060106277466, "learning_rate": 6.766799175518736e-06, "loss": 0.0015, "step": 54580 }, { "epoch": 0.893234066923014, "grad_norm": 0.07821011543273926, "learning_rate": 6.7654633154956336e-06, "loss": 0.002, "step": 54590 }, { "epoch": 0.8933976928740899, "grad_norm": 0.07520847767591476, "learning_rate": 6.76412731148735e-06, "loss": 0.0016, "step": 54600 }, { "epoch": 0.8935613188251657, "grad_norm": 0.07387443631887436, "learning_rate": 6.762791163602846e-06, "loss": 0.0024, "step": 54610 }, { "epoch": 0.8937249447762415, "grad_norm": 0.041292138397693634, "learning_rate": 6.761454871951092e-06, "loss": 0.0023, "step": 54620 }, { "epoch": 0.8938885707273173, "grad_norm": 0.06723076850175858, "learning_rate": 6.760118436641072e-06, "loss": 0.0016, "step": 54630 }, { "epoch": 0.8940521966783932, "grad_norm": 0.011046124622225761, "learning_rate": 6.75878185778178e-06, "loss": 0.0017, "step": 54640 }, { "epoch": 0.8942158226294691, "grad_norm": 0.07001950591802597, "learning_rate": 6.757445135482223e-06, "loss": 0.0038, "step": 54650 }, { "epoch": 0.8943794485805449, "grad_norm": 0.15871793031692505, "learning_rate": 6.756108269851421e-06, "loss": 0.002, "step": 54660 }, { "epoch": 0.8945430745316207, "grad_norm": 0.2555104196071625, "learning_rate": 6.754771260998404e-06, "loss": 0.0019, "step": 54670 }, { "epoch": 0.8947067004826965, "grad_norm": 0.07030268013477325, "learning_rate": 6.753434109032212e-06, "loss": 0.0012, "step": 54680 }, { "epoch": 0.8948703264337724, "grad_norm": 0.02897796221077442, "learning_rate": 6.7520968140619e-06, "loss": 0.0014, "step": 54690 }, { "epoch": 0.8950339523848483, "grad_norm": 0.06385869532823563, "learning_rate": 6.750759376196534e-06, "loss": 0.0026, "step": 54700 }, { "epoch": 0.8951975783359241, "grad_norm": 0.05134237930178642, "learning_rate": 6.749421795545188e-06, "loss": 0.0032, "step": 54710 }, { "epoch": 0.895361204287, "grad_norm": 0.15613682568073273, "learning_rate": 6.7480840722169536e-06, "loss": 0.0025, "step": 54720 }, { "epoch": 0.8955248302380757, "grad_norm": 0.0398799404501915, "learning_rate": 6.74674620632093e-06, "loss": 0.0024, "step": 54730 }, { "epoch": 0.8956884561891516, "grad_norm": 0.037842899560928345, "learning_rate": 6.745408197966228e-06, "loss": 0.0016, "step": 54740 }, { "epoch": 0.8958520821402275, "grad_norm": 0.04701778665184975, "learning_rate": 6.744070047261973e-06, "loss": 0.0019, "step": 54750 }, { "epoch": 0.8960157080913033, "grad_norm": 0.02147512137889862, "learning_rate": 6.742731754317297e-06, "loss": 0.0022, "step": 54760 }, { "epoch": 0.8961793340423791, "grad_norm": 0.01278098113834858, "learning_rate": 6.74139331924135e-06, "loss": 0.0016, "step": 54770 }, { "epoch": 0.8963429599934549, "grad_norm": 0.044319625943899155, "learning_rate": 6.740054742143288e-06, "loss": 0.0026, "step": 54780 }, { "epoch": 0.8965065859445308, "grad_norm": 0.16306254267692566, "learning_rate": 6.738716023132281e-06, "loss": 0.0021, "step": 54790 }, { "epoch": 0.8966702118956067, "grad_norm": 0.07865049690008163, "learning_rate": 6.737377162317511e-06, "loss": 0.0023, "step": 54800 }, { "epoch": 0.8968338378466825, "grad_norm": 0.05578213185071945, "learning_rate": 6.7360381598081715e-06, "loss": 0.0023, "step": 54810 }, { "epoch": 0.8969974637977584, "grad_norm": 0.021231284365057945, "learning_rate": 6.7346990157134664e-06, "loss": 0.0017, "step": 54820 }, { "epoch": 0.8971610897488341, "grad_norm": 0.03647147864103317, "learning_rate": 6.73335973014261e-06, "loss": 0.0017, "step": 54830 }, { "epoch": 0.89732471569991, "grad_norm": 0.0962071418762207, "learning_rate": 6.732020303204832e-06, "loss": 0.0018, "step": 54840 }, { "epoch": 0.8974883416509859, "grad_norm": 0.2508019804954529, "learning_rate": 6.730680735009371e-06, "loss": 0.0036, "step": 54850 }, { "epoch": 0.8976519676020617, "grad_norm": 0.05875832587480545, "learning_rate": 6.729341025665477e-06, "loss": 0.0039, "step": 54860 }, { "epoch": 0.8978155935531376, "grad_norm": 0.04013267159461975, "learning_rate": 6.728001175282414e-06, "loss": 0.0019, "step": 54870 }, { "epoch": 0.8979792195042133, "grad_norm": 0.1119929701089859, "learning_rate": 6.726661183969453e-06, "loss": 0.0035, "step": 54880 }, { "epoch": 0.8981428454552892, "grad_norm": 0.10151662677526474, "learning_rate": 6.72532105183588e-06, "loss": 0.0014, "step": 54890 }, { "epoch": 0.8983064714063651, "grad_norm": 0.09523914009332657, "learning_rate": 6.723980778990994e-06, "loss": 0.0013, "step": 54900 }, { "epoch": 0.8984700973574409, "grad_norm": 0.027653254568576813, "learning_rate": 6.7226403655441e-06, "loss": 0.0019, "step": 54910 }, { "epoch": 0.8986337233085168, "grad_norm": 0.0768464058637619, "learning_rate": 6.721299811604518e-06, "loss": 0.0016, "step": 54920 }, { "epoch": 0.8987973492595925, "grad_norm": 0.11579815298318863, "learning_rate": 6.7199591172815816e-06, "loss": 0.003, "step": 54930 }, { "epoch": 0.8989609752106684, "grad_norm": 0.11288614571094513, "learning_rate": 6.71861828268463e-06, "loss": 0.0021, "step": 54940 }, { "epoch": 0.8991246011617443, "grad_norm": 0.11000853031873703, "learning_rate": 6.717277307923019e-06, "loss": 0.0026, "step": 54950 }, { "epoch": 0.8992882271128201, "grad_norm": 0.08903923630714417, "learning_rate": 6.715936193106113e-06, "loss": 0.0011, "step": 54960 }, { "epoch": 0.899451853063896, "grad_norm": 0.06517019122838974, "learning_rate": 6.71459493834329e-06, "loss": 0.0024, "step": 54970 }, { "epoch": 0.8996154790149717, "grad_norm": 0.16023556888103485, "learning_rate": 6.713253543743936e-06, "loss": 0.0019, "step": 54980 }, { "epoch": 0.8997791049660476, "grad_norm": 0.027420731261372566, "learning_rate": 6.711912009417454e-06, "loss": 0.0027, "step": 54990 }, { "epoch": 0.8999427309171234, "grad_norm": 0.10732796788215637, "learning_rate": 6.710570335473252e-06, "loss": 0.002, "step": 55000 }, { "epoch": 0.9001063568681993, "grad_norm": 0.04320235177874565, "learning_rate": 6.709228522020752e-06, "loss": 0.0024, "step": 55010 }, { "epoch": 0.9002699828192752, "grad_norm": 0.03951322287321091, "learning_rate": 6.707886569169391e-06, "loss": 0.0036, "step": 55020 }, { "epoch": 0.900433608770351, "grad_norm": 0.05487572029232979, "learning_rate": 6.7065444770286115e-06, "loss": 0.0021, "step": 55030 }, { "epoch": 0.9005972347214268, "grad_norm": 0.031976018100976944, "learning_rate": 6.70520224570787e-06, "loss": 0.0018, "step": 55040 }, { "epoch": 0.9007608606725026, "grad_norm": 0.09151197224855423, "learning_rate": 6.703859875316635e-06, "loss": 0.0021, "step": 55050 }, { "epoch": 0.9009244866235785, "grad_norm": 0.10370881855487823, "learning_rate": 6.702517365964386e-06, "loss": 0.0027, "step": 55060 }, { "epoch": 0.9010881125746544, "grad_norm": 0.05246930569410324, "learning_rate": 6.7011747177606135e-06, "loss": 0.002, "step": 55070 }, { "epoch": 0.9012517385257302, "grad_norm": 0.05457548052072525, "learning_rate": 6.699831930814819e-06, "loss": 0.0048, "step": 55080 }, { "epoch": 0.901415364476806, "grad_norm": 0.04049844294786453, "learning_rate": 6.698489005236515e-06, "loss": 0.0022, "step": 55090 }, { "epoch": 0.9015789904278818, "grad_norm": 0.060036513954401016, "learning_rate": 6.697145941135227e-06, "loss": 0.0023, "step": 55100 }, { "epoch": 0.9017426163789577, "grad_norm": 0.042780354619026184, "learning_rate": 6.69580273862049e-06, "loss": 0.0021, "step": 55110 }, { "epoch": 0.9019062423300336, "grad_norm": 0.10763350874185562, "learning_rate": 6.694459397801851e-06, "loss": 0.0031, "step": 55120 }, { "epoch": 0.9020698682811094, "grad_norm": 0.06933766603469849, "learning_rate": 6.693115918788869e-06, "loss": 0.0024, "step": 55130 }, { "epoch": 0.9022334942321852, "grad_norm": 0.0266238022595644, "learning_rate": 6.691772301691113e-06, "loss": 0.002, "step": 55140 }, { "epoch": 0.902397120183261, "grad_norm": 0.06552164256572723, "learning_rate": 6.6904285466181625e-06, "loss": 0.0018, "step": 55150 }, { "epoch": 0.9025607461343369, "grad_norm": 0.06240471452474594, "learning_rate": 6.689084653679613e-06, "loss": 0.0026, "step": 55160 }, { "epoch": 0.9027243720854128, "grad_norm": 0.03741876035928726, "learning_rate": 6.687740622985065e-06, "loss": 0.0017, "step": 55170 }, { "epoch": 0.9028879980364886, "grad_norm": 0.02448151633143425, "learning_rate": 6.686396454644134e-06, "loss": 0.002, "step": 55180 }, { "epoch": 0.9030516239875644, "grad_norm": 0.022847145795822144, "learning_rate": 6.6850521487664465e-06, "loss": 0.0024, "step": 55190 }, { "epoch": 0.9032152499386402, "grad_norm": 0.10724387317895889, "learning_rate": 6.6837077054616376e-06, "loss": 0.0023, "step": 55200 }, { "epoch": 0.9033788758897161, "grad_norm": 0.055166326463222504, "learning_rate": 6.682363124839357e-06, "loss": 0.0015, "step": 55210 }, { "epoch": 0.903542501840792, "grad_norm": 0.06569929420948029, "learning_rate": 6.681018407009265e-06, "loss": 0.0023, "step": 55220 }, { "epoch": 0.9037061277918678, "grad_norm": 0.26221126317977905, "learning_rate": 6.679673552081029e-06, "loss": 0.003, "step": 55230 }, { "epoch": 0.9038697537429436, "grad_norm": 0.04133813828229904, "learning_rate": 6.678328560164336e-06, "loss": 0.0016, "step": 55240 }, { "epoch": 0.9040333796940194, "grad_norm": 0.05080864578485489, "learning_rate": 6.676983431368873e-06, "loss": 0.0026, "step": 55250 }, { "epoch": 0.9041970056450953, "grad_norm": 0.1001199409365654, "learning_rate": 6.675638165804348e-06, "loss": 0.0031, "step": 55260 }, { "epoch": 0.9043606315961712, "grad_norm": 0.09347344934940338, "learning_rate": 6.674292763580474e-06, "loss": 0.0024, "step": 55270 }, { "epoch": 0.904524257547247, "grad_norm": 0.04194813594222069, "learning_rate": 6.6729472248069804e-06, "loss": 0.0016, "step": 55280 }, { "epoch": 0.9046878834983229, "grad_norm": 0.06681618839502335, "learning_rate": 6.671601549593603e-06, "loss": 0.0012, "step": 55290 }, { "epoch": 0.9048515094493986, "grad_norm": 0.08751630783081055, "learning_rate": 6.670255738050089e-06, "loss": 0.0015, "step": 55300 }, { "epoch": 0.9050151354004745, "grad_norm": 0.09668885171413422, "learning_rate": 6.6689097902862e-06, "loss": 0.003, "step": 55310 }, { "epoch": 0.9051787613515504, "grad_norm": 0.009523888118565083, "learning_rate": 6.667563706411707e-06, "loss": 0.0024, "step": 55320 }, { "epoch": 0.9053423873026262, "grad_norm": 0.07577191293239594, "learning_rate": 6.666217486536393e-06, "loss": 0.0019, "step": 55330 }, { "epoch": 0.9055060132537021, "grad_norm": 0.28219079971313477, "learning_rate": 6.664871130770047e-06, "loss": 0.0028, "step": 55340 }, { "epoch": 0.9056696392047778, "grad_norm": 0.08842485398054123, "learning_rate": 6.663524639222478e-06, "loss": 0.0015, "step": 55350 }, { "epoch": 0.9058332651558537, "grad_norm": 0.03614573925733566, "learning_rate": 6.662178012003498e-06, "loss": 0.0011, "step": 55360 }, { "epoch": 0.9059968911069296, "grad_norm": 0.008397082798182964, "learning_rate": 6.660831249222936e-06, "loss": 0.0009, "step": 55370 }, { "epoch": 0.9061605170580054, "grad_norm": 0.05506163835525513, "learning_rate": 6.659484350990627e-06, "loss": 0.0028, "step": 55380 }, { "epoch": 0.9063241430090813, "grad_norm": 0.5430269241333008, "learning_rate": 6.65813731741642e-06, "loss": 0.0021, "step": 55390 }, { "epoch": 0.906487768960157, "grad_norm": 0.10015229135751724, "learning_rate": 6.656790148610176e-06, "loss": 0.0028, "step": 55400 }, { "epoch": 0.9066513949112329, "grad_norm": 0.04523171856999397, "learning_rate": 6.655442844681763e-06, "loss": 0.0017, "step": 55410 }, { "epoch": 0.9068150208623088, "grad_norm": 0.16789762675762177, "learning_rate": 6.654095405741067e-06, "loss": 0.0011, "step": 55420 }, { "epoch": 0.9069786468133846, "grad_norm": 0.03918357193470001, "learning_rate": 6.652747831897975e-06, "loss": 0.0043, "step": 55430 }, { "epoch": 0.9071422727644605, "grad_norm": 0.09510177373886108, "learning_rate": 6.651400123262392e-06, "loss": 0.0022, "step": 55440 }, { "epoch": 0.9073058987155362, "grad_norm": 0.08188805729150772, "learning_rate": 6.6500522799442345e-06, "loss": 0.0022, "step": 55450 }, { "epoch": 0.9074695246666121, "grad_norm": 0.03387482091784477, "learning_rate": 6.648704302053428e-06, "loss": 0.0016, "step": 55460 }, { "epoch": 0.907633150617688, "grad_norm": 0.03617274388670921, "learning_rate": 6.647356189699907e-06, "loss": 0.0015, "step": 55470 }, { "epoch": 0.9077967765687638, "grad_norm": 0.11838515847921371, "learning_rate": 6.646007942993619e-06, "loss": 0.0018, "step": 55480 }, { "epoch": 0.9079604025198397, "grad_norm": 0.0534416139125824, "learning_rate": 6.644659562044526e-06, "loss": 0.0015, "step": 55490 }, { "epoch": 0.9081240284709154, "grad_norm": 0.009499077685177326, "learning_rate": 6.643311046962593e-06, "loss": 0.0022, "step": 55500 }, { "epoch": 0.9082876544219913, "grad_norm": 0.047519501298666, "learning_rate": 6.641962397857802e-06, "loss": 0.0032, "step": 55510 }, { "epoch": 0.9084512803730672, "grad_norm": 0.16934987902641296, "learning_rate": 6.640613614840143e-06, "loss": 0.0017, "step": 55520 }, { "epoch": 0.908614906324143, "grad_norm": 0.13198237121105194, "learning_rate": 6.639264698019622e-06, "loss": 0.0027, "step": 55530 }, { "epoch": 0.9087785322752189, "grad_norm": 0.21142633259296417, "learning_rate": 6.637915647506248e-06, "loss": 0.0032, "step": 55540 }, { "epoch": 0.9089421582262946, "grad_norm": 0.04463517293334007, "learning_rate": 6.636566463410047e-06, "loss": 0.0016, "step": 55550 }, { "epoch": 0.9091057841773705, "grad_norm": 0.06253872066736221, "learning_rate": 6.635217145841053e-06, "loss": 0.0022, "step": 55560 }, { "epoch": 0.9092694101284464, "grad_norm": 0.057266756892204285, "learning_rate": 6.633867694909313e-06, "loss": 0.0014, "step": 55570 }, { "epoch": 0.9094330360795222, "grad_norm": 0.10594163089990616, "learning_rate": 6.632518110724882e-06, "loss": 0.0019, "step": 55580 }, { "epoch": 0.9095966620305981, "grad_norm": 0.027606617659330368, "learning_rate": 6.631168393397827e-06, "loss": 0.0014, "step": 55590 }, { "epoch": 0.9097602879816739, "grad_norm": 0.05981414392590523, "learning_rate": 6.629818543038229e-06, "loss": 0.003, "step": 55600 }, { "epoch": 0.9099239139327497, "grad_norm": 0.04365190863609314, "learning_rate": 6.628468559756175e-06, "loss": 0.0021, "step": 55610 }, { "epoch": 0.9100875398838256, "grad_norm": 0.05930043011903763, "learning_rate": 6.627118443661766e-06, "loss": 0.0016, "step": 55620 }, { "epoch": 0.9102511658349014, "grad_norm": 0.0032276480924338102, "learning_rate": 6.6257681948651135e-06, "loss": 0.0021, "step": 55630 }, { "epoch": 0.9104147917859773, "grad_norm": 0.02424374409019947, "learning_rate": 6.624417813476338e-06, "loss": 0.0014, "step": 55640 }, { "epoch": 0.9105784177370531, "grad_norm": 0.06509653478860855, "learning_rate": 6.623067299605572e-06, "loss": 0.002, "step": 55650 }, { "epoch": 0.9107420436881289, "grad_norm": 0.019142763689160347, "learning_rate": 6.621716653362959e-06, "loss": 0.0022, "step": 55660 }, { "epoch": 0.9109056696392048, "grad_norm": 0.02811865508556366, "learning_rate": 6.620365874858654e-06, "loss": 0.0017, "step": 55670 }, { "epoch": 0.9110692955902806, "grad_norm": 0.07260759174823761, "learning_rate": 6.6190149642028205e-06, "loss": 0.0021, "step": 55680 }, { "epoch": 0.9112329215413565, "grad_norm": 0.055015791207551956, "learning_rate": 6.617663921505635e-06, "loss": 0.0031, "step": 55690 }, { "epoch": 0.9113965474924323, "grad_norm": 0.012044398114085197, "learning_rate": 6.616312746877285e-06, "loss": 0.0011, "step": 55700 }, { "epoch": 0.9115601734435081, "grad_norm": 0.022058870643377304, "learning_rate": 6.614961440427965e-06, "loss": 0.0011, "step": 55710 }, { "epoch": 0.911723799394584, "grad_norm": 0.023880165070295334, "learning_rate": 6.613610002267885e-06, "loss": 0.001, "step": 55720 }, { "epoch": 0.9118874253456598, "grad_norm": 0.07042147219181061, "learning_rate": 6.612258432507264e-06, "loss": 0.0013, "step": 55730 }, { "epoch": 0.9120510512967357, "grad_norm": 0.11518635600805283, "learning_rate": 6.6109067312563304e-06, "loss": 0.0027, "step": 55740 }, { "epoch": 0.9122146772478115, "grad_norm": 0.033243972808122635, "learning_rate": 6.609554898625324e-06, "loss": 0.001, "step": 55750 }, { "epoch": 0.9123783031988874, "grad_norm": 0.11612246185541153, "learning_rate": 6.608202934724496e-06, "loss": 0.0021, "step": 55760 }, { "epoch": 0.9125419291499632, "grad_norm": 0.06864006817340851, "learning_rate": 6.606850839664109e-06, "loss": 0.001, "step": 55770 }, { "epoch": 0.912705555101039, "grad_norm": 0.09203996509313583, "learning_rate": 6.605498613554433e-06, "loss": 0.0015, "step": 55780 }, { "epoch": 0.9128691810521149, "grad_norm": 0.026516444981098175, "learning_rate": 6.604146256505755e-06, "loss": 0.0012, "step": 55790 }, { "epoch": 0.9130328070031907, "grad_norm": 0.09326178580522537, "learning_rate": 6.602793768628364e-06, "loss": 0.0019, "step": 55800 }, { "epoch": 0.9131964329542666, "grad_norm": 0.038885004818439484, "learning_rate": 6.601441150032566e-06, "loss": 0.0029, "step": 55810 }, { "epoch": 0.9133600589053424, "grad_norm": 0.03841819614171982, "learning_rate": 6.600088400828678e-06, "loss": 0.0025, "step": 55820 }, { "epoch": 0.9135236848564182, "grad_norm": 0.08481840044260025, "learning_rate": 6.598735521127023e-06, "loss": 0.0019, "step": 55830 }, { "epoch": 0.9136873108074941, "grad_norm": 0.16896317899227142, "learning_rate": 6.597382511037939e-06, "loss": 0.0019, "step": 55840 }, { "epoch": 0.9138509367585699, "grad_norm": 0.09156767278909683, "learning_rate": 6.596029370671771e-06, "loss": 0.0027, "step": 55850 }, { "epoch": 0.9140145627096458, "grad_norm": 0.05977252125740051, "learning_rate": 6.594676100138879e-06, "loss": 0.0021, "step": 55860 }, { "epoch": 0.9141781886607215, "grad_norm": 0.3450954258441925, "learning_rate": 6.593322699549629e-06, "loss": 0.0017, "step": 55870 }, { "epoch": 0.9143418146117974, "grad_norm": 0.06577891856431961, "learning_rate": 6.591969169014401e-06, "loss": 0.0026, "step": 55880 }, { "epoch": 0.9145054405628733, "grad_norm": 0.11398031562566757, "learning_rate": 6.590615508643584e-06, "loss": 0.0018, "step": 55890 }, { "epoch": 0.9146690665139491, "grad_norm": 0.04285018518567085, "learning_rate": 6.589261718547577e-06, "loss": 0.0018, "step": 55900 }, { "epoch": 0.914832692465025, "grad_norm": 0.01052926480770111, "learning_rate": 6.587907798836792e-06, "loss": 0.0023, "step": 55910 }, { "epoch": 0.9149963184161007, "grad_norm": 0.06749926507472992, "learning_rate": 6.586553749621651e-06, "loss": 0.0017, "step": 55920 }, { "epoch": 0.9151599443671766, "grad_norm": 0.051221270114183426, "learning_rate": 6.585199571012581e-06, "loss": 0.0015, "step": 55930 }, { "epoch": 0.9153235703182525, "grad_norm": 0.06960927695035934, "learning_rate": 6.5838452631200304e-06, "loss": 0.001, "step": 55940 }, { "epoch": 0.9154871962693283, "grad_norm": 0.05837035924196243, "learning_rate": 6.582490826054446e-06, "loss": 0.0017, "step": 55950 }, { "epoch": 0.9156508222204042, "grad_norm": 0.05886893719434738, "learning_rate": 6.5811362599262965e-06, "loss": 0.0036, "step": 55960 }, { "epoch": 0.91581444817148, "grad_norm": 0.11389671266078949, "learning_rate": 6.579781564846052e-06, "loss": 0.0027, "step": 55970 }, { "epoch": 0.9159780741225558, "grad_norm": 0.06820515543222427, "learning_rate": 6.578426740924197e-06, "loss": 0.0034, "step": 55980 }, { "epoch": 0.9161417000736317, "grad_norm": 0.21691784262657166, "learning_rate": 6.577071788271229e-06, "loss": 0.0027, "step": 55990 }, { "epoch": 0.9163053260247075, "grad_norm": 0.11636079102754593, "learning_rate": 6.5757167069976505e-06, "loss": 0.0014, "step": 56000 }, { "epoch": 0.9164689519757834, "grad_norm": 0.24772250652313232, "learning_rate": 6.574361497213978e-06, "loss": 0.003, "step": 56010 }, { "epoch": 0.9166325779268591, "grad_norm": 0.02042141556739807, "learning_rate": 6.573006159030739e-06, "loss": 0.0027, "step": 56020 }, { "epoch": 0.916796203877935, "grad_norm": 0.0673416405916214, "learning_rate": 6.571650692558469e-06, "loss": 0.0015, "step": 56030 }, { "epoch": 0.9169598298290109, "grad_norm": 0.08954409509897232, "learning_rate": 6.570295097907718e-06, "loss": 0.0041, "step": 56040 }, { "epoch": 0.9171234557800867, "grad_norm": 0.09719007462263107, "learning_rate": 6.568939375189038e-06, "loss": 0.0023, "step": 56050 }, { "epoch": 0.9172870817311626, "grad_norm": 0.09046126902103424, "learning_rate": 6.5675835245130025e-06, "loss": 0.0024, "step": 56060 }, { "epoch": 0.9174507076822384, "grad_norm": 0.06140618771314621, "learning_rate": 6.566227545990189e-06, "loss": 0.0026, "step": 56070 }, { "epoch": 0.9176143336333142, "grad_norm": 0.0764274075627327, "learning_rate": 6.564871439731184e-06, "loss": 0.0018, "step": 56080 }, { "epoch": 0.9177779595843901, "grad_norm": 0.02905842289328575, "learning_rate": 6.563515205846589e-06, "loss": 0.0021, "step": 56090 }, { "epoch": 0.9179415855354659, "grad_norm": 0.04256400838494301, "learning_rate": 6.562158844447013e-06, "loss": 0.0017, "step": 56100 }, { "epoch": 0.9181052114865418, "grad_norm": 0.08738549798727036, "learning_rate": 6.560802355643077e-06, "loss": 0.0033, "step": 56110 }, { "epoch": 0.9182688374376176, "grad_norm": 0.03181087225675583, "learning_rate": 6.5594457395454115e-06, "loss": 0.0016, "step": 56120 }, { "epoch": 0.9184324633886934, "grad_norm": 0.027590539306402206, "learning_rate": 6.558088996264656e-06, "loss": 0.0031, "step": 56130 }, { "epoch": 0.9185960893397693, "grad_norm": 0.07463917881250381, "learning_rate": 6.556732125911463e-06, "loss": 0.0017, "step": 56140 }, { "epoch": 0.9187597152908451, "grad_norm": 0.013279673643410206, "learning_rate": 6.555375128596495e-06, "loss": 0.0012, "step": 56150 }, { "epoch": 0.918923341241921, "grad_norm": 0.09867870807647705, "learning_rate": 6.554018004430424e-06, "loss": 0.0017, "step": 56160 }, { "epoch": 0.9190869671929968, "grad_norm": 0.22824370861053467, "learning_rate": 6.552660753523931e-06, "loss": 0.0014, "step": 56170 }, { "epoch": 0.9192505931440726, "grad_norm": 0.033258065581321716, "learning_rate": 6.55130337598771e-06, "loss": 0.0025, "step": 56180 }, { "epoch": 0.9194142190951485, "grad_norm": 0.033072132617235184, "learning_rate": 6.549945871932463e-06, "loss": 0.001, "step": 56190 }, { "epoch": 0.9195778450462243, "grad_norm": 0.05221541225910187, "learning_rate": 6.548588241468904e-06, "loss": 0.0015, "step": 56200 }, { "epoch": 0.9197414709973002, "grad_norm": 0.024098750203847885, "learning_rate": 6.547230484707758e-06, "loss": 0.0022, "step": 56210 }, { "epoch": 0.919905096948376, "grad_norm": 0.04554083198308945, "learning_rate": 6.545872601759756e-06, "loss": 0.003, "step": 56220 }, { "epoch": 0.9200687228994519, "grad_norm": 0.19266685843467712, "learning_rate": 6.544514592735645e-06, "loss": 0.0017, "step": 56230 }, { "epoch": 0.9202323488505277, "grad_norm": 0.02726871706545353, "learning_rate": 6.5431564577461795e-06, "loss": 0.0017, "step": 56240 }, { "epoch": 0.9203959748016035, "grad_norm": 0.05316530168056488, "learning_rate": 6.541798196902123e-06, "loss": 0.0018, "step": 56250 }, { "epoch": 0.9205596007526794, "grad_norm": 0.03866003081202507, "learning_rate": 6.5404398103142495e-06, "loss": 0.0013, "step": 56260 }, { "epoch": 0.9207232267037552, "grad_norm": 0.2395082712173462, "learning_rate": 6.5390812980933485e-06, "loss": 0.0031, "step": 56270 }, { "epoch": 0.9208868526548311, "grad_norm": 0.038763076066970825, "learning_rate": 6.537722660350212e-06, "loss": 0.002, "step": 56280 }, { "epoch": 0.9210504786059069, "grad_norm": 0.023509008809924126, "learning_rate": 6.536363897195648e-06, "loss": 0.0018, "step": 56290 }, { "epoch": 0.9212141045569827, "grad_norm": 0.10502973943948746, "learning_rate": 6.535005008740472e-06, "loss": 0.0019, "step": 56300 }, { "epoch": 0.9213777305080586, "grad_norm": 0.05079428851604462, "learning_rate": 6.533645995095508e-06, "loss": 0.0015, "step": 56310 }, { "epoch": 0.9215413564591344, "grad_norm": 0.09697245061397552, "learning_rate": 6.532286856371596e-06, "loss": 0.0026, "step": 56320 }, { "epoch": 0.9217049824102103, "grad_norm": 0.04611137881875038, "learning_rate": 6.530927592679581e-06, "loss": 0.0011, "step": 56330 }, { "epoch": 0.9218686083612861, "grad_norm": 0.11595282703638077, "learning_rate": 6.52956820413032e-06, "loss": 0.0019, "step": 56340 }, { "epoch": 0.9220322343123619, "grad_norm": 0.20963560044765472, "learning_rate": 6.528208690834681e-06, "loss": 0.0019, "step": 56350 }, { "epoch": 0.9221958602634378, "grad_norm": 0.14181086421012878, "learning_rate": 6.52684905290354e-06, "loss": 0.002, "step": 56360 }, { "epoch": 0.9223594862145136, "grad_norm": 0.019518647342920303, "learning_rate": 6.525489290447785e-06, "loss": 0.003, "step": 56370 }, { "epoch": 0.9225231121655895, "grad_norm": 0.022483259439468384, "learning_rate": 6.524129403578314e-06, "loss": 0.0023, "step": 56380 }, { "epoch": 0.9226867381166654, "grad_norm": 0.053641557693481445, "learning_rate": 6.522769392406035e-06, "loss": 0.0021, "step": 56390 }, { "epoch": 0.9228503640677411, "grad_norm": 0.07724954187870026, "learning_rate": 6.521409257041864e-06, "loss": 0.0021, "step": 56400 }, { "epoch": 0.923013990018817, "grad_norm": 0.09131883084774017, "learning_rate": 6.520048997596732e-06, "loss": 0.0014, "step": 56410 }, { "epoch": 0.9231776159698928, "grad_norm": 0.0458979569375515, "learning_rate": 6.518688614181575e-06, "loss": 0.0015, "step": 56420 }, { "epoch": 0.9233412419209687, "grad_norm": 0.06478177011013031, "learning_rate": 6.517328106907339e-06, "loss": 0.0011, "step": 56430 }, { "epoch": 0.9235048678720446, "grad_norm": 0.1232982873916626, "learning_rate": 6.5159674758849875e-06, "loss": 0.0026, "step": 56440 }, { "epoch": 0.9236684938231203, "grad_norm": 0.054083600640296936, "learning_rate": 6.514606721225485e-06, "loss": 0.0012, "step": 56450 }, { "epoch": 0.9238321197741962, "grad_norm": 0.06533703953027725, "learning_rate": 6.513245843039812e-06, "loss": 0.0016, "step": 56460 }, { "epoch": 0.923995745725272, "grad_norm": 0.019338658079504967, "learning_rate": 6.511884841438958e-06, "loss": 0.002, "step": 56470 }, { "epoch": 0.9241593716763479, "grad_norm": 0.08700487017631531, "learning_rate": 6.5105237165339186e-06, "loss": 0.0031, "step": 56480 }, { "epoch": 0.9243229976274238, "grad_norm": 0.0781564712524414, "learning_rate": 6.509162468435704e-06, "loss": 0.0028, "step": 56490 }, { "epoch": 0.9244866235784995, "grad_norm": 0.0034663050901144743, "learning_rate": 6.507801097255334e-06, "loss": 0.002, "step": 56500 }, { "epoch": 0.9246502495295754, "grad_norm": 0.007611203473061323, "learning_rate": 6.506439603103836e-06, "loss": 0.0014, "step": 56510 }, { "epoch": 0.9248138754806512, "grad_norm": 0.06650002300739288, "learning_rate": 6.50507798609225e-06, "loss": 0.0016, "step": 56520 }, { "epoch": 0.9249775014317271, "grad_norm": 0.07986923307180405, "learning_rate": 6.503716246331623e-06, "loss": 0.002, "step": 56530 }, { "epoch": 0.925141127382803, "grad_norm": 0.14060962200164795, "learning_rate": 6.502354383933015e-06, "loss": 0.0017, "step": 56540 }, { "epoch": 0.9253047533338787, "grad_norm": 0.012925864197313786, "learning_rate": 6.5009923990074956e-06, "loss": 0.0025, "step": 56550 }, { "epoch": 0.9254683792849546, "grad_norm": 0.02445130981504917, "learning_rate": 6.499630291666143e-06, "loss": 0.0013, "step": 56560 }, { "epoch": 0.9256320052360304, "grad_norm": 0.1041211262345314, "learning_rate": 6.4982680620200455e-06, "loss": 0.0021, "step": 56570 }, { "epoch": 0.9257956311871063, "grad_norm": 0.12634573876857758, "learning_rate": 6.496905710180304e-06, "loss": 0.0026, "step": 56580 }, { "epoch": 0.9259592571381822, "grad_norm": 0.05812188982963562, "learning_rate": 6.495543236258024e-06, "loss": 0.0024, "step": 56590 }, { "epoch": 0.9261228830892579, "grad_norm": 0.24218805134296417, "learning_rate": 6.494180640364326e-06, "loss": 0.0032, "step": 56600 }, { "epoch": 0.9262865090403338, "grad_norm": 0.036598656326532364, "learning_rate": 6.492817922610339e-06, "loss": 0.0028, "step": 56610 }, { "epoch": 0.9264501349914096, "grad_norm": 0.12396228313446045, "learning_rate": 6.491455083107201e-06, "loss": 0.002, "step": 56620 }, { "epoch": 0.9266137609424855, "grad_norm": 0.19481991231441498, "learning_rate": 6.490092121966061e-06, "loss": 0.0017, "step": 56630 }, { "epoch": 0.9267773868935614, "grad_norm": 0.0038094609044492245, "learning_rate": 6.488729039298077e-06, "loss": 0.0025, "step": 56640 }, { "epoch": 0.9269410128446371, "grad_norm": 0.08035171031951904, "learning_rate": 6.4873658352144185e-06, "loss": 0.003, "step": 56650 }, { "epoch": 0.927104638795713, "grad_norm": 0.08920537680387497, "learning_rate": 6.486002509826261e-06, "loss": 0.0016, "step": 56660 }, { "epoch": 0.9272682647467888, "grad_norm": 0.04345780983567238, "learning_rate": 6.484639063244797e-06, "loss": 0.0012, "step": 56670 }, { "epoch": 0.9274318906978647, "grad_norm": 0.05059542506933212, "learning_rate": 6.4832754955812204e-06, "loss": 0.002, "step": 56680 }, { "epoch": 0.9275955166489406, "grad_norm": 0.020313138142228127, "learning_rate": 6.481911806946743e-06, "loss": 0.0012, "step": 56690 }, { "epoch": 0.9277591426000164, "grad_norm": 0.05528176575899124, "learning_rate": 6.4805479974525786e-06, "loss": 0.0019, "step": 56700 }, { "epoch": 0.9279227685510922, "grad_norm": 0.06339520961046219, "learning_rate": 6.479184067209958e-06, "loss": 0.0019, "step": 56710 }, { "epoch": 0.928086394502168, "grad_norm": 0.08688001334667206, "learning_rate": 6.477820016330117e-06, "loss": 0.0029, "step": 56720 }, { "epoch": 0.9282500204532439, "grad_norm": 0.05946914106607437, "learning_rate": 6.476455844924303e-06, "loss": 0.0033, "step": 56730 }, { "epoch": 0.9284136464043197, "grad_norm": 0.11161893606185913, "learning_rate": 6.475091553103774e-06, "loss": 0.0017, "step": 56740 }, { "epoch": 0.9285772723553956, "grad_norm": 0.16973428428173065, "learning_rate": 6.473727140979798e-06, "loss": 0.0019, "step": 56750 }, { "epoch": 0.9287408983064714, "grad_norm": 0.046578556299209595, "learning_rate": 6.4723626086636486e-06, "loss": 0.002, "step": 56760 }, { "epoch": 0.9289045242575472, "grad_norm": 0.13174575567245483, "learning_rate": 6.470997956266614e-06, "loss": 0.0032, "step": 56770 }, { "epoch": 0.9290681502086231, "grad_norm": 0.03182876110076904, "learning_rate": 6.469633183899992e-06, "loss": 0.0013, "step": 56780 }, { "epoch": 0.9292317761596989, "grad_norm": 0.028548920527100563, "learning_rate": 6.468268291675086e-06, "loss": 0.0021, "step": 56790 }, { "epoch": 0.9293954021107748, "grad_norm": 0.13532426953315735, "learning_rate": 6.466903279703215e-06, "loss": 0.0019, "step": 56800 }, { "epoch": 0.9295590280618506, "grad_norm": 0.11298929154872894, "learning_rate": 6.465538148095704e-06, "loss": 0.0023, "step": 56810 }, { "epoch": 0.9297226540129264, "grad_norm": 0.12641289830207825, "learning_rate": 6.464172896963886e-06, "loss": 0.0019, "step": 56820 }, { "epoch": 0.9298862799640023, "grad_norm": 0.0678074061870575, "learning_rate": 6.462807526419109e-06, "loss": 0.0018, "step": 56830 }, { "epoch": 0.9300499059150781, "grad_norm": 0.1123092770576477, "learning_rate": 6.461442036572727e-06, "loss": 0.0023, "step": 56840 }, { "epoch": 0.930213531866154, "grad_norm": 0.05744916945695877, "learning_rate": 6.460076427536105e-06, "loss": 0.0018, "step": 56850 }, { "epoch": 0.9303771578172298, "grad_norm": 0.12219595909118652, "learning_rate": 6.4587106994206176e-06, "loss": 0.0029, "step": 56860 }, { "epoch": 0.9305407837683056, "grad_norm": 0.08028897643089294, "learning_rate": 6.457344852337648e-06, "loss": 0.0013, "step": 56870 }, { "epoch": 0.9307044097193815, "grad_norm": 0.007140877656638622, "learning_rate": 6.45597888639859e-06, "loss": 0.0015, "step": 56880 }, { "epoch": 0.9308680356704573, "grad_norm": 0.05165205895900726, "learning_rate": 6.454612801714848e-06, "loss": 0.0023, "step": 56890 }, { "epoch": 0.9310316616215332, "grad_norm": 0.027849525213241577, "learning_rate": 6.4532465983978354e-06, "loss": 0.0008, "step": 56900 }, { "epoch": 0.931195287572609, "grad_norm": 0.032669514417648315, "learning_rate": 6.451880276558974e-06, "loss": 0.0029, "step": 56910 }, { "epoch": 0.9313589135236848, "grad_norm": 0.04178975522518158, "learning_rate": 6.450513836309697e-06, "loss": 0.0028, "step": 56920 }, { "epoch": 0.9315225394747607, "grad_norm": 0.07078851014375687, "learning_rate": 6.449147277761447e-06, "loss": 0.0014, "step": 56930 }, { "epoch": 0.9316861654258365, "grad_norm": 0.15249699354171753, "learning_rate": 6.447780601025676e-06, "loss": 0.0025, "step": 56940 }, { "epoch": 0.9318497913769124, "grad_norm": 0.13929148018360138, "learning_rate": 6.446413806213845e-06, "loss": 0.0019, "step": 56950 }, { "epoch": 0.9320134173279883, "grad_norm": 0.08149644732475281, "learning_rate": 6.445046893437423e-06, "loss": 0.002, "step": 56960 }, { "epoch": 0.932177043279064, "grad_norm": 0.036492589861154556, "learning_rate": 6.443679862807895e-06, "loss": 0.0021, "step": 56970 }, { "epoch": 0.9323406692301399, "grad_norm": 0.06172330677509308, "learning_rate": 6.442312714436748e-06, "loss": 0.0008, "step": 56980 }, { "epoch": 0.9325042951812157, "grad_norm": 0.03780542314052582, "learning_rate": 6.4409454484354835e-06, "loss": 0.0023, "step": 56990 }, { "epoch": 0.9326679211322916, "grad_norm": 0.0671754777431488, "learning_rate": 6.4395780649156115e-06, "loss": 0.0021, "step": 57000 }, { "epoch": 0.9328315470833675, "grad_norm": 0.02179705537855625, "learning_rate": 6.438210563988649e-06, "loss": 0.0025, "step": 57010 }, { "epoch": 0.9329951730344432, "grad_norm": 0.03471015393733978, "learning_rate": 6.436842945766127e-06, "loss": 0.0019, "step": 57020 }, { "epoch": 0.9331587989855191, "grad_norm": 0.2868771255016327, "learning_rate": 6.435475210359583e-06, "loss": 0.0019, "step": 57030 }, { "epoch": 0.9333224249365949, "grad_norm": 0.006691084709018469, "learning_rate": 6.434107357880565e-06, "loss": 0.0011, "step": 57040 }, { "epoch": 0.9334860508876708, "grad_norm": 0.0503176786005497, "learning_rate": 6.4327393884406295e-06, "loss": 0.0036, "step": 57050 }, { "epoch": 0.9336496768387467, "grad_norm": 0.10211300849914551, "learning_rate": 6.431371302151344e-06, "loss": 0.0034, "step": 57060 }, { "epoch": 0.9338133027898224, "grad_norm": 0.18614116311073303, "learning_rate": 6.430003099124285e-06, "loss": 0.0021, "step": 57070 }, { "epoch": 0.9339769287408983, "grad_norm": 0.12661704421043396, "learning_rate": 6.428634779471039e-06, "loss": 0.002, "step": 57080 }, { "epoch": 0.9341405546919741, "grad_norm": 0.0122816301882267, "learning_rate": 6.4272663433032e-06, "loss": 0.0025, "step": 57090 }, { "epoch": 0.93430418064305, "grad_norm": 0.0325094610452652, "learning_rate": 6.4258977907323764e-06, "loss": 0.0018, "step": 57100 }, { "epoch": 0.9344678065941259, "grad_norm": 0.0916358008980751, "learning_rate": 6.424529121870179e-06, "loss": 0.0028, "step": 57110 }, { "epoch": 0.9346314325452016, "grad_norm": 0.05081067234277725, "learning_rate": 6.423160336828232e-06, "loss": 0.0026, "step": 57120 }, { "epoch": 0.9347950584962775, "grad_norm": 0.05515988543629646, "learning_rate": 6.421791435718171e-06, "loss": 0.0022, "step": 57130 }, { "epoch": 0.9349586844473533, "grad_norm": 0.03500219061970711, "learning_rate": 6.420422418651637e-06, "loss": 0.0016, "step": 57140 }, { "epoch": 0.9351223103984292, "grad_norm": 0.10497685521841049, "learning_rate": 6.419053285740285e-06, "loss": 0.0019, "step": 57150 }, { "epoch": 0.9352859363495051, "grad_norm": 0.14834196865558624, "learning_rate": 6.417684037095774e-06, "loss": 0.0026, "step": 57160 }, { "epoch": 0.9354495623005809, "grad_norm": 0.0800432413816452, "learning_rate": 6.416314672829775e-06, "loss": 0.0016, "step": 57170 }, { "epoch": 0.9356131882516567, "grad_norm": 0.021429333835840225, "learning_rate": 6.414945193053972e-06, "loss": 0.0014, "step": 57180 }, { "epoch": 0.9357768142027325, "grad_norm": 0.08037056773900986, "learning_rate": 6.413575597880052e-06, "loss": 0.0013, "step": 57190 }, { "epoch": 0.9359404401538084, "grad_norm": 0.0472366102039814, "learning_rate": 6.412205887419716e-06, "loss": 0.0017, "step": 57200 }, { "epoch": 0.9361040661048843, "grad_norm": 0.019507095217704773, "learning_rate": 6.4108360617846735e-06, "loss": 0.002, "step": 57210 }, { "epoch": 0.93626769205596, "grad_norm": 0.019027838483452797, "learning_rate": 6.40946612108664e-06, "loss": 0.0015, "step": 57220 }, { "epoch": 0.9364313180070359, "grad_norm": 0.06351927667856216, "learning_rate": 6.408096065437346e-06, "loss": 0.0015, "step": 57230 }, { "epoch": 0.9365949439581117, "grad_norm": 0.05936245620250702, "learning_rate": 6.406725894948528e-06, "loss": 0.0019, "step": 57240 }, { "epoch": 0.9367585699091876, "grad_norm": 0.044367656111717224, "learning_rate": 6.405355609731931e-06, "loss": 0.002, "step": 57250 }, { "epoch": 0.9369221958602635, "grad_norm": 0.09417359530925751, "learning_rate": 6.403985209899313e-06, "loss": 0.0027, "step": 57260 }, { "epoch": 0.9370858218113393, "grad_norm": 0.2645261585712433, "learning_rate": 6.402614695562437e-06, "loss": 0.0037, "step": 57270 }, { "epoch": 0.9372494477624151, "grad_norm": 0.13693548738956451, "learning_rate": 6.40124406683308e-06, "loss": 0.0019, "step": 57280 }, { "epoch": 0.9374130737134909, "grad_norm": 0.11871176958084106, "learning_rate": 6.399873323823022e-06, "loss": 0.0014, "step": 57290 }, { "epoch": 0.9375766996645668, "grad_norm": 0.17951926589012146, "learning_rate": 6.398502466644061e-06, "loss": 0.0015, "step": 57300 }, { "epoch": 0.9377403256156427, "grad_norm": 0.0491395965218544, "learning_rate": 6.397131495407997e-06, "loss": 0.0024, "step": 57310 }, { "epoch": 0.9379039515667185, "grad_norm": 0.2597217559814453, "learning_rate": 6.395760410226641e-06, "loss": 0.002, "step": 57320 }, { "epoch": 0.9380675775177943, "grad_norm": 0.13409483432769775, "learning_rate": 6.394389211211813e-06, "loss": 0.0014, "step": 57330 }, { "epoch": 0.9382312034688701, "grad_norm": 0.06022542715072632, "learning_rate": 6.393017898475346e-06, "loss": 0.0028, "step": 57340 }, { "epoch": 0.938394829419946, "grad_norm": 0.03732301667332649, "learning_rate": 6.39164647212908e-06, "loss": 0.0018, "step": 57350 }, { "epoch": 0.9385584553710219, "grad_norm": 0.026547512039542198, "learning_rate": 6.390274932284861e-06, "loss": 0.002, "step": 57360 }, { "epoch": 0.9387220813220977, "grad_norm": 0.10671032965183258, "learning_rate": 6.38890327905455e-06, "loss": 0.0016, "step": 57370 }, { "epoch": 0.9388857072731736, "grad_norm": 0.08442399650812149, "learning_rate": 6.387531512550013e-06, "loss": 0.0024, "step": 57380 }, { "epoch": 0.9390493332242493, "grad_norm": 0.008403069339692593, "learning_rate": 6.386159632883125e-06, "loss": 0.0016, "step": 57390 }, { "epoch": 0.9392129591753252, "grad_norm": 0.03750469908118248, "learning_rate": 6.384787640165775e-06, "loss": 0.0016, "step": 57400 }, { "epoch": 0.9393765851264011, "grad_norm": 0.09813922643661499, "learning_rate": 6.383415534509856e-06, "loss": 0.002, "step": 57410 }, { "epoch": 0.9395402110774769, "grad_norm": 0.2458689659833908, "learning_rate": 6.382043316027272e-06, "loss": 0.0019, "step": 57420 }, { "epoch": 0.9397038370285528, "grad_norm": 0.14486080408096313, "learning_rate": 6.380670984829939e-06, "loss": 0.0021, "step": 57430 }, { "epoch": 0.9398674629796285, "grad_norm": 0.14126110076904297, "learning_rate": 6.379298541029777e-06, "loss": 0.0024, "step": 57440 }, { "epoch": 0.9400310889307044, "grad_norm": 0.09587261080741882, "learning_rate": 6.377925984738718e-06, "loss": 0.0028, "step": 57450 }, { "epoch": 0.9401947148817803, "grad_norm": 0.06749685853719711, "learning_rate": 6.376553316068705e-06, "loss": 0.0018, "step": 57460 }, { "epoch": 0.9403583408328561, "grad_norm": 0.0586259700357914, "learning_rate": 6.375180535131686e-06, "loss": 0.0053, "step": 57470 }, { "epoch": 0.940521966783932, "grad_norm": 0.1960633546113968, "learning_rate": 6.373807642039622e-06, "loss": 0.0033, "step": 57480 }, { "epoch": 0.9406855927350077, "grad_norm": 0.05132591351866722, "learning_rate": 6.372434636904481e-06, "loss": 0.0012, "step": 57490 }, { "epoch": 0.9408492186860836, "grad_norm": 0.05858321860432625, "learning_rate": 6.37106151983824e-06, "loss": 0.0022, "step": 57500 }, { "epoch": 0.9410128446371595, "grad_norm": 0.1998930275440216, "learning_rate": 6.3696882909528865e-06, "loss": 0.0016, "step": 57510 }, { "epoch": 0.9411764705882353, "grad_norm": 0.05957844853401184, "learning_rate": 6.368314950360416e-06, "loss": 0.0019, "step": 57520 }, { "epoch": 0.9413400965393112, "grad_norm": 0.06080023944377899, "learning_rate": 6.366941498172833e-06, "loss": 0.0011, "step": 57530 }, { "epoch": 0.9415037224903869, "grad_norm": 0.04228433221578598, "learning_rate": 6.365567934502153e-06, "loss": 0.0038, "step": 57540 }, { "epoch": 0.9416673484414628, "grad_norm": 0.03258278965950012, "learning_rate": 6.364194259460397e-06, "loss": 0.0021, "step": 57550 }, { "epoch": 0.9418309743925387, "grad_norm": 0.016062675043940544, "learning_rate": 6.3628204731596e-06, "loss": 0.0023, "step": 57560 }, { "epoch": 0.9419946003436145, "grad_norm": 0.12362527847290039, "learning_rate": 6.361446575711801e-06, "loss": 0.0025, "step": 57570 }, { "epoch": 0.9421582262946904, "grad_norm": 0.0918710008263588, "learning_rate": 6.360072567229052e-06, "loss": 0.0023, "step": 57580 }, { "epoch": 0.9423218522457661, "grad_norm": 0.031730104237794876, "learning_rate": 6.358698447823411e-06, "loss": 0.0016, "step": 57590 }, { "epoch": 0.942485478196842, "grad_norm": 0.04843221232295036, "learning_rate": 6.357324217606948e-06, "loss": 0.002, "step": 57600 }, { "epoch": 0.9426491041479178, "grad_norm": 0.031060557812452316, "learning_rate": 6.355949876691739e-06, "loss": 0.0007, "step": 57610 }, { "epoch": 0.9428127300989937, "grad_norm": 0.1420125663280487, "learning_rate": 6.354575425189873e-06, "loss": 0.0023, "step": 57620 }, { "epoch": 0.9429763560500696, "grad_norm": 0.04344609007239342, "learning_rate": 6.353200863213442e-06, "loss": 0.0022, "step": 57630 }, { "epoch": 0.9431399820011453, "grad_norm": 0.11959046870470047, "learning_rate": 6.351826190874554e-06, "loss": 0.0018, "step": 57640 }, { "epoch": 0.9433036079522212, "grad_norm": 0.05591941624879837, "learning_rate": 6.350451408285321e-06, "loss": 0.0019, "step": 57650 }, { "epoch": 0.943467233903297, "grad_norm": 0.047392845153808594, "learning_rate": 6.349076515557865e-06, "loss": 0.0023, "step": 57660 }, { "epoch": 0.9436308598543729, "grad_norm": 0.16122494637966156, "learning_rate": 6.3477015128043186e-06, "loss": 0.0019, "step": 57670 }, { "epoch": 0.9437944858054488, "grad_norm": 0.0406617671251297, "learning_rate": 6.346326400136822e-06, "loss": 0.0096, "step": 57680 }, { "epoch": 0.9439581117565246, "grad_norm": 0.03396273031830788, "learning_rate": 6.344951177667525e-06, "loss": 0.0017, "step": 57690 }, { "epoch": 0.9441217377076004, "grad_norm": 0.05563920736312866, "learning_rate": 6.3435758455085875e-06, "loss": 0.0016, "step": 57700 }, { "epoch": 0.9442853636586762, "grad_norm": 0.11514732241630554, "learning_rate": 6.342200403772173e-06, "loss": 0.0023, "step": 57710 }, { "epoch": 0.9444489896097521, "grad_norm": 0.03952418640255928, "learning_rate": 6.340824852570461e-06, "loss": 0.0036, "step": 57720 }, { "epoch": 0.944612615560828, "grad_norm": 0.048269156366586685, "learning_rate": 6.339449192015636e-06, "loss": 0.0015, "step": 57730 }, { "epoch": 0.9447762415119038, "grad_norm": 0.03346279263496399, "learning_rate": 6.338073422219891e-06, "loss": 0.0016, "step": 57740 }, { "epoch": 0.9449398674629796, "grad_norm": 0.11679522693157196, "learning_rate": 6.336697543295432e-06, "loss": 0.0021, "step": 57750 }, { "epoch": 0.9451034934140554, "grad_norm": 0.10253500938415527, "learning_rate": 6.3353215553544686e-06, "loss": 0.0031, "step": 57760 }, { "epoch": 0.9452671193651313, "grad_norm": 0.08026383817195892, "learning_rate": 6.333945458509222e-06, "loss": 0.0028, "step": 57770 }, { "epoch": 0.9454307453162072, "grad_norm": 0.05896885693073273, "learning_rate": 6.332569252871923e-06, "loss": 0.0017, "step": 57780 }, { "epoch": 0.945594371267283, "grad_norm": 0.026631180197000504, "learning_rate": 6.331192938554809e-06, "loss": 0.0014, "step": 57790 }, { "epoch": 0.9457579972183588, "grad_norm": 0.03806743025779724, "learning_rate": 6.329816515670127e-06, "loss": 0.0013, "step": 57800 }, { "epoch": 0.9459216231694346, "grad_norm": 0.1733967363834381, "learning_rate": 6.328439984330136e-06, "loss": 0.003, "step": 57810 }, { "epoch": 0.9460852491205105, "grad_norm": 0.18561020493507385, "learning_rate": 6.327063344647098e-06, "loss": 0.0014, "step": 57820 }, { "epoch": 0.9462488750715864, "grad_norm": 0.07884349673986435, "learning_rate": 6.32568659673329e-06, "loss": 0.0021, "step": 57830 }, { "epoch": 0.9464125010226622, "grad_norm": 0.11453360319137573, "learning_rate": 6.324309740700993e-06, "loss": 0.0023, "step": 57840 }, { "epoch": 0.946576126973738, "grad_norm": 0.032564714550971985, "learning_rate": 6.3229327766624996e-06, "loss": 0.002, "step": 57850 }, { "epoch": 0.9467397529248138, "grad_norm": 0.050830595195293427, "learning_rate": 6.321555704730109e-06, "loss": 0.0023, "step": 57860 }, { "epoch": 0.9469033788758897, "grad_norm": 0.04336218163371086, "learning_rate": 6.320178525016133e-06, "loss": 0.0022, "step": 57870 }, { "epoch": 0.9470670048269656, "grad_norm": 0.1325337290763855, "learning_rate": 6.318801237632887e-06, "loss": 0.0033, "step": 57880 }, { "epoch": 0.9472306307780414, "grad_norm": 0.06420790404081345, "learning_rate": 6.317423842692699e-06, "loss": 0.0017, "step": 57890 }, { "epoch": 0.9473942567291173, "grad_norm": 0.05658198520541191, "learning_rate": 6.316046340307905e-06, "loss": 0.0012, "step": 57900 }, { "epoch": 0.947557882680193, "grad_norm": 0.04964936152100563, "learning_rate": 6.314668730590849e-06, "loss": 0.0037, "step": 57910 }, { "epoch": 0.9477215086312689, "grad_norm": 0.03604147210717201, "learning_rate": 6.313291013653884e-06, "loss": 0.0023, "step": 57920 }, { "epoch": 0.9478851345823448, "grad_norm": 0.05656878650188446, "learning_rate": 6.311913189609372e-06, "loss": 0.0019, "step": 57930 }, { "epoch": 0.9480487605334206, "grad_norm": 0.009907550178468227, "learning_rate": 6.3105352585696845e-06, "loss": 0.0008, "step": 57940 }, { "epoch": 0.9482123864844965, "grad_norm": 0.12604115903377533, "learning_rate": 6.3091572206472e-06, "loss": 0.0018, "step": 57950 }, { "epoch": 0.9483760124355722, "grad_norm": 0.04691174626350403, "learning_rate": 6.307779075954307e-06, "loss": 0.0023, "step": 57960 }, { "epoch": 0.9485396383866481, "grad_norm": 0.03298064321279526, "learning_rate": 6.3064008246034e-06, "loss": 0.0012, "step": 57970 }, { "epoch": 0.948703264337724, "grad_norm": 0.08867722749710083, "learning_rate": 6.305022466706889e-06, "loss": 0.0027, "step": 57980 }, { "epoch": 0.9488668902887998, "grad_norm": 0.09201445430517197, "learning_rate": 6.303644002377185e-06, "loss": 0.0015, "step": 57990 }, { "epoch": 0.9490305162398757, "grad_norm": 0.21433325111865997, "learning_rate": 6.3022654317267105e-06, "loss": 0.0016, "step": 58000 }, { "epoch": 0.9491941421909514, "grad_norm": 0.26147589087486267, "learning_rate": 6.300886754867899e-06, "loss": 0.0018, "step": 58010 }, { "epoch": 0.9493577681420273, "grad_norm": 0.10692520439624786, "learning_rate": 6.29950797191319e-06, "loss": 0.0021, "step": 58020 }, { "epoch": 0.9495213940931032, "grad_norm": 0.0820641741156578, "learning_rate": 6.298129082975031e-06, "loss": 0.0013, "step": 58030 }, { "epoch": 0.949685020044179, "grad_norm": 0.15649184584617615, "learning_rate": 6.296750088165882e-06, "loss": 0.0022, "step": 58040 }, { "epoch": 0.9498486459952549, "grad_norm": 0.10773397237062454, "learning_rate": 6.295370987598206e-06, "loss": 0.001, "step": 58050 }, { "epoch": 0.9500122719463306, "grad_norm": 0.011783221736550331, "learning_rate": 6.293991781384481e-06, "loss": 0.0011, "step": 58060 }, { "epoch": 0.9501758978974065, "grad_norm": 0.023752499371767044, "learning_rate": 6.292612469637189e-06, "loss": 0.0011, "step": 58070 }, { "epoch": 0.9503395238484824, "grad_norm": 0.03099977970123291, "learning_rate": 6.291233052468822e-06, "loss": 0.0024, "step": 58080 }, { "epoch": 0.9505031497995582, "grad_norm": 0.039513569325208664, "learning_rate": 6.28985352999188e-06, "loss": 0.0011, "step": 58090 }, { "epoch": 0.9506667757506341, "grad_norm": 0.09049548953771591, "learning_rate": 6.288473902318871e-06, "loss": 0.0039, "step": 58100 }, { "epoch": 0.9508304017017098, "grad_norm": 0.07914090901613235, "learning_rate": 6.287094169562315e-06, "loss": 0.0013, "step": 58110 }, { "epoch": 0.9509940276527857, "grad_norm": 0.12448439747095108, "learning_rate": 6.285714331834739e-06, "loss": 0.0018, "step": 58120 }, { "epoch": 0.9511576536038616, "grad_norm": 0.05316713824868202, "learning_rate": 6.2843343892486756e-06, "loss": 0.0024, "step": 58130 }, { "epoch": 0.9513212795549374, "grad_norm": 0.039714016020298004, "learning_rate": 6.2829543419166685e-06, "loss": 0.0021, "step": 58140 }, { "epoch": 0.9514849055060133, "grad_norm": 0.042398519814014435, "learning_rate": 6.281574189951271e-06, "loss": 0.0019, "step": 58150 }, { "epoch": 0.951648531457089, "grad_norm": 0.030234944075345993, "learning_rate": 6.280193933465042e-06, "loss": 0.0014, "step": 58160 }, { "epoch": 0.9518121574081649, "grad_norm": 0.07874390482902527, "learning_rate": 6.2788135725705525e-06, "loss": 0.0013, "step": 58170 }, { "epoch": 0.9519757833592408, "grad_norm": 0.0872984528541565, "learning_rate": 6.277433107380378e-06, "loss": 0.0021, "step": 58180 }, { "epoch": 0.9521394093103166, "grad_norm": 0.10335346311330795, "learning_rate": 6.276052538007107e-06, "loss": 0.0017, "step": 58190 }, { "epoch": 0.9523030352613925, "grad_norm": 0.0530158132314682, "learning_rate": 6.274671864563331e-06, "loss": 0.0017, "step": 58200 }, { "epoch": 0.9524666612124683, "grad_norm": 0.08727231621742249, "learning_rate": 6.273291087161655e-06, "loss": 0.0016, "step": 58210 }, { "epoch": 0.9526302871635441, "grad_norm": 0.005082531366497278, "learning_rate": 6.271910205914689e-06, "loss": 0.0018, "step": 58220 }, { "epoch": 0.95279391311462, "grad_norm": 0.0704679936170578, "learning_rate": 6.270529220935056e-06, "loss": 0.0013, "step": 58230 }, { "epoch": 0.9529575390656958, "grad_norm": 0.09169528633356094, "learning_rate": 6.2691481323353805e-06, "loss": 0.0019, "step": 58240 }, { "epoch": 0.9531211650167717, "grad_norm": 0.1520804464817047, "learning_rate": 6.267766940228303e-06, "loss": 0.0017, "step": 58250 }, { "epoch": 0.9532847909678475, "grad_norm": 0.08106236904859543, "learning_rate": 6.266385644726466e-06, "loss": 0.0022, "step": 58260 }, { "epoch": 0.9534484169189233, "grad_norm": 0.04998185858130455, "learning_rate": 6.265004245942525e-06, "loss": 0.0022, "step": 58270 }, { "epoch": 0.9536120428699992, "grad_norm": 0.04886684566736221, "learning_rate": 6.263622743989142e-06, "loss": 0.0013, "step": 58280 }, { "epoch": 0.953775668821075, "grad_norm": 0.06148363649845123, "learning_rate": 6.262241138978986e-06, "loss": 0.0014, "step": 58290 }, { "epoch": 0.9539392947721509, "grad_norm": 0.06408237665891647, "learning_rate": 6.260859431024738e-06, "loss": 0.0021, "step": 58300 }, { "epoch": 0.9541029207232267, "grad_norm": 0.05922083184123039, "learning_rate": 6.259477620239085e-06, "loss": 0.0016, "step": 58310 }, { "epoch": 0.9542665466743026, "grad_norm": 0.17053700983524323, "learning_rate": 6.258095706734721e-06, "loss": 0.002, "step": 58320 }, { "epoch": 0.9544301726253784, "grad_norm": 0.06367158889770508, "learning_rate": 6.256713690624353e-06, "loss": 0.0029, "step": 58330 }, { "epoch": 0.9545937985764542, "grad_norm": 0.03940455615520477, "learning_rate": 6.255331572020692e-06, "loss": 0.0025, "step": 58340 }, { "epoch": 0.9547574245275301, "grad_norm": 0.09376281499862671, "learning_rate": 6.253949351036459e-06, "loss": 0.0031, "step": 58350 }, { "epoch": 0.9549210504786059, "grad_norm": 0.04461251199245453, "learning_rate": 6.252567027784382e-06, "loss": 0.0031, "step": 58360 }, { "epoch": 0.9550846764296818, "grad_norm": 0.09415064007043839, "learning_rate": 6.251184602377202e-06, "loss": 0.0023, "step": 58370 }, { "epoch": 0.9552483023807576, "grad_norm": 0.007300138007849455, "learning_rate": 6.24980207492766e-06, "loss": 0.0031, "step": 58380 }, { "epoch": 0.9554119283318334, "grad_norm": 0.06237104535102844, "learning_rate": 6.248419445548516e-06, "loss": 0.0024, "step": 58390 }, { "epoch": 0.9555755542829093, "grad_norm": 0.06175771728157997, "learning_rate": 6.247036714352528e-06, "loss": 0.0018, "step": 58400 }, { "epoch": 0.9557391802339851, "grad_norm": 0.050690412521362305, "learning_rate": 6.245653881452468e-06, "loss": 0.001, "step": 58410 }, { "epoch": 0.955902806185061, "grad_norm": 0.01614823006093502, "learning_rate": 6.244270946961116e-06, "loss": 0.0025, "step": 58420 }, { "epoch": 0.9560664321361368, "grad_norm": 0.038604676723480225, "learning_rate": 6.2428879109912585e-06, "loss": 0.0024, "step": 58430 }, { "epoch": 0.9562300580872126, "grad_norm": 0.04415985941886902, "learning_rate": 6.241504773655692e-06, "loss": 0.0015, "step": 58440 }, { "epoch": 0.9563936840382885, "grad_norm": 0.027101153507828712, "learning_rate": 6.240121535067219e-06, "loss": 0.0022, "step": 58450 }, { "epoch": 0.9565573099893643, "grad_norm": 0.027097007259726524, "learning_rate": 6.238738195338655e-06, "loss": 0.0014, "step": 58460 }, { "epoch": 0.9567209359404402, "grad_norm": 0.04853229597210884, "learning_rate": 6.237354754582817e-06, "loss": 0.0023, "step": 58470 }, { "epoch": 0.9568845618915159, "grad_norm": 0.023506224155426025, "learning_rate": 6.235971212912535e-06, "loss": 0.0025, "step": 58480 }, { "epoch": 0.9570481878425918, "grad_norm": 0.1510210931301117, "learning_rate": 6.234587570440647e-06, "loss": 0.002, "step": 58490 }, { "epoch": 0.9572118137936677, "grad_norm": 0.06374149024486542, "learning_rate": 6.2332038272799955e-06, "loss": 0.0019, "step": 58500 }, { "epoch": 0.9573754397447435, "grad_norm": 0.032993730157613754, "learning_rate": 6.231819983543436e-06, "loss": 0.0021, "step": 58510 }, { "epoch": 0.9575390656958194, "grad_norm": 0.11612825840711594, "learning_rate": 6.2304360393438315e-06, "loss": 0.0018, "step": 58520 }, { "epoch": 0.9577026916468951, "grad_norm": 0.0704694539308548, "learning_rate": 6.229051994794047e-06, "loss": 0.0018, "step": 58530 }, { "epoch": 0.957866317597971, "grad_norm": 0.049204569309949875, "learning_rate": 6.227667850006967e-06, "loss": 0.0014, "step": 58540 }, { "epoch": 0.9580299435490469, "grad_norm": 0.00743493577465415, "learning_rate": 6.226283605095471e-06, "loss": 0.0027, "step": 58550 }, { "epoch": 0.9581935695001227, "grad_norm": 0.026673590764403343, "learning_rate": 6.224899260172458e-06, "loss": 0.0015, "step": 58560 }, { "epoch": 0.9583571954511986, "grad_norm": 0.017874035984277725, "learning_rate": 6.223514815350827e-06, "loss": 0.0019, "step": 58570 }, { "epoch": 0.9585208214022743, "grad_norm": 0.10189425945281982, "learning_rate": 6.222130270743492e-06, "loss": 0.0031, "step": 58580 }, { "epoch": 0.9586844473533502, "grad_norm": 0.025408370420336723, "learning_rate": 6.22074562646337e-06, "loss": 0.001, "step": 58590 }, { "epoch": 0.9588480733044261, "grad_norm": 0.03035455197095871, "learning_rate": 6.219360882623388e-06, "loss": 0.0012, "step": 58600 }, { "epoch": 0.9590116992555019, "grad_norm": 0.05683242529630661, "learning_rate": 6.217976039336481e-06, "loss": 0.0016, "step": 58610 }, { "epoch": 0.9591753252065778, "grad_norm": 0.029838312417268753, "learning_rate": 6.216591096715592e-06, "loss": 0.0016, "step": 58620 }, { "epoch": 0.9593389511576536, "grad_norm": 0.0384819433093071, "learning_rate": 6.215206054873672e-06, "loss": 0.0022, "step": 58630 }, { "epoch": 0.9595025771087294, "grad_norm": 0.03671186789870262, "learning_rate": 6.213820913923681e-06, "loss": 0.0012, "step": 58640 }, { "epoch": 0.9596662030598053, "grad_norm": 0.12970297038555145, "learning_rate": 6.212435673978587e-06, "loss": 0.0019, "step": 58650 }, { "epoch": 0.9598298290108811, "grad_norm": 0.12293124198913574, "learning_rate": 6.211050335151363e-06, "loss": 0.0028, "step": 58660 }, { "epoch": 0.959993454961957, "grad_norm": 0.04402632266283035, "learning_rate": 6.209664897554995e-06, "loss": 0.002, "step": 58670 }, { "epoch": 0.9601570809130328, "grad_norm": 0.11219951510429382, "learning_rate": 6.2082793613024716e-06, "loss": 0.0028, "step": 58680 }, { "epoch": 0.9603207068641086, "grad_norm": 0.05862412974238396, "learning_rate": 6.206893726506796e-06, "loss": 0.0018, "step": 58690 }, { "epoch": 0.9604843328151845, "grad_norm": 0.06713079661130905, "learning_rate": 6.205507993280975e-06, "loss": 0.0017, "step": 58700 }, { "epoch": 0.9606479587662603, "grad_norm": 0.11677181720733643, "learning_rate": 6.204122161738022e-06, "loss": 0.0012, "step": 58710 }, { "epoch": 0.9608115847173362, "grad_norm": 0.04015107825398445, "learning_rate": 6.202736231990965e-06, "loss": 0.0026, "step": 58720 }, { "epoch": 0.960975210668412, "grad_norm": 0.16898278892040253, "learning_rate": 6.201350204152831e-06, "loss": 0.0015, "step": 58730 }, { "epoch": 0.9611388366194878, "grad_norm": 0.06074877455830574, "learning_rate": 6.199964078336661e-06, "loss": 0.0019, "step": 58740 }, { "epoch": 0.9613024625705637, "grad_norm": 0.03581656515598297, "learning_rate": 6.198577854655504e-06, "loss": 0.0013, "step": 58750 }, { "epoch": 0.9614660885216395, "grad_norm": 0.032737743109464645, "learning_rate": 6.197191533222415e-06, "loss": 0.0026, "step": 58760 }, { "epoch": 0.9616297144727154, "grad_norm": 0.012240628711879253, "learning_rate": 6.195805114150458e-06, "loss": 0.0017, "step": 58770 }, { "epoch": 0.9617933404237912, "grad_norm": 0.08106410503387451, "learning_rate": 6.194418597552705e-06, "loss": 0.0019, "step": 58780 }, { "epoch": 0.961956966374867, "grad_norm": 0.05172346532344818, "learning_rate": 6.1930319835422336e-06, "loss": 0.0018, "step": 58790 }, { "epoch": 0.9621205923259429, "grad_norm": 0.055812761187553406, "learning_rate": 6.191645272232134e-06, "loss": 0.002, "step": 58800 }, { "epoch": 0.9622842182770187, "grad_norm": 0.03036658465862274, "learning_rate": 6.190258463735499e-06, "loss": 0.0016, "step": 58810 }, { "epoch": 0.9624478442280946, "grad_norm": 0.08152364939451218, "learning_rate": 6.1888715581654345e-06, "loss": 0.0014, "step": 58820 }, { "epoch": 0.9626114701791704, "grad_norm": 0.0864526778459549, "learning_rate": 6.187484555635049e-06, "loss": 0.0018, "step": 58830 }, { "epoch": 0.9627750961302463, "grad_norm": 0.07235458493232727, "learning_rate": 6.186097456257465e-06, "loss": 0.0019, "step": 58840 }, { "epoch": 0.9629387220813221, "grad_norm": 0.04425173997879028, "learning_rate": 6.184710260145807e-06, "loss": 0.0032, "step": 58850 }, { "epoch": 0.9631023480323979, "grad_norm": 0.12765848636627197, "learning_rate": 6.183322967413212e-06, "loss": 0.0025, "step": 58860 }, { "epoch": 0.9632659739834738, "grad_norm": 0.04378824308514595, "learning_rate": 6.181935578172821e-06, "loss": 0.0014, "step": 58870 }, { "epoch": 0.9634295999345496, "grad_norm": 0.06265013664960861, "learning_rate": 6.180548092537786e-06, "loss": 0.0025, "step": 58880 }, { "epoch": 0.9635932258856255, "grad_norm": 0.016524959355592728, "learning_rate": 6.179160510621264e-06, "loss": 0.0019, "step": 58890 }, { "epoch": 0.9637568518367013, "grad_norm": 0.11609126627445221, "learning_rate": 6.177772832536423e-06, "loss": 0.0013, "step": 58900 }, { "epoch": 0.9639204777877771, "grad_norm": 0.09382468461990356, "learning_rate": 6.1763850583964365e-06, "loss": 0.0017, "step": 58910 }, { "epoch": 0.964084103738853, "grad_norm": 0.04451765492558479, "learning_rate": 6.174997188314489e-06, "loss": 0.0012, "step": 58920 }, { "epoch": 0.9642477296899288, "grad_norm": 0.010108184069395065, "learning_rate": 6.173609222403767e-06, "loss": 0.0028, "step": 58930 }, { "epoch": 0.9644113556410047, "grad_norm": 0.02133953385055065, "learning_rate": 6.172221160777469e-06, "loss": 0.002, "step": 58940 }, { "epoch": 0.9645749815920805, "grad_norm": 0.049128394573926926, "learning_rate": 6.170833003548803e-06, "loss": 0.0017, "step": 58950 }, { "epoch": 0.9647386075431563, "grad_norm": 0.08993761986494064, "learning_rate": 6.1694447508309805e-06, "loss": 0.0018, "step": 58960 }, { "epoch": 0.9649022334942322, "grad_norm": 0.13894130289554596, "learning_rate": 6.168056402737222e-06, "loss": 0.0035, "step": 58970 }, { "epoch": 0.965065859445308, "grad_norm": 0.1430618315935135, "learning_rate": 6.166667959380759e-06, "loss": 0.0019, "step": 58980 }, { "epoch": 0.9652294853963839, "grad_norm": 0.014461737126111984, "learning_rate": 6.1652794208748245e-06, "loss": 0.0024, "step": 58990 }, { "epoch": 0.9653931113474598, "grad_norm": 0.05005745589733124, "learning_rate": 6.163890787332667e-06, "loss": 0.002, "step": 59000 }, { "epoch": 0.9655567372985355, "grad_norm": 0.09706447273492813, "learning_rate": 6.162502058867536e-06, "loss": 0.002, "step": 59010 }, { "epoch": 0.9657203632496114, "grad_norm": 0.11365757882595062, "learning_rate": 6.161113235592692e-06, "loss": 0.0019, "step": 59020 }, { "epoch": 0.9658839892006872, "grad_norm": 0.042573362588882446, "learning_rate": 6.1597243176214025e-06, "loss": 0.0013, "step": 59030 }, { "epoch": 0.9660476151517631, "grad_norm": 0.04736004024744034, "learning_rate": 6.158335305066945e-06, "loss": 0.003, "step": 59040 }, { "epoch": 0.966211241102839, "grad_norm": 0.13747192919254303, "learning_rate": 6.1569461980426e-06, "loss": 0.0019, "step": 59050 }, { "epoch": 0.9663748670539147, "grad_norm": 0.03255385905504227, "learning_rate": 6.155556996661659e-06, "loss": 0.0015, "step": 59060 }, { "epoch": 0.9665384930049906, "grad_norm": 0.0700443759560585, "learning_rate": 6.154167701037421e-06, "loss": 0.002, "step": 59070 }, { "epoch": 0.9667021189560664, "grad_norm": 0.0969579815864563, "learning_rate": 6.152778311283193e-06, "loss": 0.0016, "step": 59080 }, { "epoch": 0.9668657449071423, "grad_norm": 0.052093978971242905, "learning_rate": 6.151388827512287e-06, "loss": 0.0017, "step": 59090 }, { "epoch": 0.9670293708582182, "grad_norm": 0.06126511096954346, "learning_rate": 6.149999249838025e-06, "loss": 0.002, "step": 59100 }, { "epoch": 0.9671929968092939, "grad_norm": 0.09167136996984482, "learning_rate": 6.1486095783737386e-06, "loss": 0.0009, "step": 59110 }, { "epoch": 0.9673566227603698, "grad_norm": 0.041624169796705246, "learning_rate": 6.147219813232762e-06, "loss": 0.0029, "step": 59120 }, { "epoch": 0.9675202487114456, "grad_norm": 0.15207301080226898, "learning_rate": 6.145829954528441e-06, "loss": 0.0012, "step": 59130 }, { "epoch": 0.9676838746625215, "grad_norm": 0.053871866315603256, "learning_rate": 6.144440002374127e-06, "loss": 0.0015, "step": 59140 }, { "epoch": 0.9678475006135974, "grad_norm": 0.030999070033431053, "learning_rate": 6.14304995688318e-06, "loss": 0.0019, "step": 59150 }, { "epoch": 0.9680111265646731, "grad_norm": 0.07165121287107468, "learning_rate": 6.1416598181689675e-06, "loss": 0.0018, "step": 59160 }, { "epoch": 0.968174752515749, "grad_norm": 0.03951803222298622, "learning_rate": 6.140269586344864e-06, "loss": 0.0015, "step": 59170 }, { "epoch": 0.9683383784668248, "grad_norm": 0.04415475204586983, "learning_rate": 6.138879261524254e-06, "loss": 0.0015, "step": 59180 }, { "epoch": 0.9685020044179007, "grad_norm": 0.034847572445869446, "learning_rate": 6.137488843820526e-06, "loss": 0.0018, "step": 59190 }, { "epoch": 0.9686656303689766, "grad_norm": 0.04615645483136177, "learning_rate": 6.136098333347077e-06, "loss": 0.0024, "step": 59200 }, { "epoch": 0.9688292563200523, "grad_norm": 0.05982517451047897, "learning_rate": 6.1347077302173145e-06, "loss": 0.0026, "step": 59210 }, { "epoch": 0.9689928822711282, "grad_norm": 0.09822215884923935, "learning_rate": 6.133317034544649e-06, "loss": 0.0029, "step": 59220 }, { "epoch": 0.969156508222204, "grad_norm": 0.21471615135669708, "learning_rate": 6.131926246442502e-06, "loss": 0.003, "step": 59230 }, { "epoch": 0.9693201341732799, "grad_norm": 0.07293573021888733, "learning_rate": 6.130535366024302e-06, "loss": 0.0013, "step": 59240 }, { "epoch": 0.9694837601243558, "grad_norm": 0.03750929981470108, "learning_rate": 6.129144393403483e-06, "loss": 0.0013, "step": 59250 }, { "epoch": 0.9696473860754315, "grad_norm": 0.3493478298187256, "learning_rate": 6.1277533286934906e-06, "loss": 0.0031, "step": 59260 }, { "epoch": 0.9698110120265074, "grad_norm": 0.13050806522369385, "learning_rate": 6.126362172007772e-06, "loss": 0.0017, "step": 59270 }, { "epoch": 0.9699746379775832, "grad_norm": 0.021530838683247566, "learning_rate": 6.1249709234597884e-06, "loss": 0.0051, "step": 59280 }, { "epoch": 0.9701382639286591, "grad_norm": 0.12952467799186707, "learning_rate": 6.123579583163003e-06, "loss": 0.0017, "step": 59290 }, { "epoch": 0.970301889879735, "grad_norm": 0.2073030173778534, "learning_rate": 6.12218815123089e-06, "loss": 0.0012, "step": 59300 }, { "epoch": 0.9704655158308108, "grad_norm": 0.047026898711919785, "learning_rate": 6.120796627776928e-06, "loss": 0.002, "step": 59310 }, { "epoch": 0.9706291417818866, "grad_norm": 0.11334921419620514, "learning_rate": 6.119405012914608e-06, "loss": 0.002, "step": 59320 }, { "epoch": 0.9707927677329624, "grad_norm": 0.08938440680503845, "learning_rate": 6.118013306757423e-06, "loss": 0.0033, "step": 59330 }, { "epoch": 0.9709563936840383, "grad_norm": 0.1321195811033249, "learning_rate": 6.1166215094188764e-06, "loss": 0.0019, "step": 59340 }, { "epoch": 0.9711200196351142, "grad_norm": 0.057198166847229004, "learning_rate": 6.115229621012479e-06, "loss": 0.0024, "step": 59350 }, { "epoch": 0.97128364558619, "grad_norm": 0.09630824625492096, "learning_rate": 6.113837641651749e-06, "loss": 0.0032, "step": 59360 }, { "epoch": 0.9714472715372658, "grad_norm": 0.022952301427721977, "learning_rate": 6.1124455714502085e-06, "loss": 0.0011, "step": 59370 }, { "epoch": 0.9716108974883416, "grad_norm": 0.03261130303144455, "learning_rate": 6.111053410521394e-06, "loss": 0.0019, "step": 59380 }, { "epoch": 0.9717745234394175, "grad_norm": 0.2276962548494339, "learning_rate": 6.1096611589788415e-06, "loss": 0.0019, "step": 59390 }, { "epoch": 0.9719381493904933, "grad_norm": 0.04965461418032646, "learning_rate": 6.108268816936102e-06, "loss": 0.0019, "step": 59400 }, { "epoch": 0.9721017753415692, "grad_norm": 0.07307393848896027, "learning_rate": 6.106876384506727e-06, "loss": 0.0029, "step": 59410 }, { "epoch": 0.972265401292645, "grad_norm": 0.054034411907196045, "learning_rate": 6.10548386180428e-06, "loss": 0.0025, "step": 59420 }, { "epoch": 0.9724290272437208, "grad_norm": 0.09574251621961594, "learning_rate": 6.104091248942331e-06, "loss": 0.0019, "step": 59430 }, { "epoch": 0.9725926531947967, "grad_norm": 0.08367104083299637, "learning_rate": 6.102698546034456e-06, "loss": 0.0018, "step": 59440 }, { "epoch": 0.9727562791458725, "grad_norm": 0.035694271326065063, "learning_rate": 6.10130575319424e-06, "loss": 0.0024, "step": 59450 }, { "epoch": 0.9729199050969484, "grad_norm": 0.01515640877187252, "learning_rate": 6.0999128705352724e-06, "loss": 0.0013, "step": 59460 }, { "epoch": 0.9730835310480243, "grad_norm": 0.22956176102161407, "learning_rate": 6.098519898171155e-06, "loss": 0.0025, "step": 59470 }, { "epoch": 0.9732471569991, "grad_norm": 0.09979180991649628, "learning_rate": 6.097126836215491e-06, "loss": 0.0019, "step": 59480 }, { "epoch": 0.9734107829501759, "grad_norm": 0.10009083896875381, "learning_rate": 6.095733684781895e-06, "loss": 0.0025, "step": 59490 }, { "epoch": 0.9735744089012517, "grad_norm": 0.048967428505420685, "learning_rate": 6.0943404439839885e-06, "loss": 0.002, "step": 59500 }, { "epoch": 0.9737380348523276, "grad_norm": 0.04773823171854019, "learning_rate": 6.092947113935397e-06, "loss": 0.0021, "step": 59510 }, { "epoch": 0.9739016608034035, "grad_norm": 0.04499290511012077, "learning_rate": 6.091553694749759e-06, "loss": 0.0022, "step": 59520 }, { "epoch": 0.9740652867544792, "grad_norm": 0.07160546630620956, "learning_rate": 6.0901601865407144e-06, "loss": 0.0023, "step": 59530 }, { "epoch": 0.9742289127055551, "grad_norm": 0.05784238129854202, "learning_rate": 6.088766589421915e-06, "loss": 0.0021, "step": 59540 }, { "epoch": 0.9743925386566309, "grad_norm": 0.06750132888555527, "learning_rate": 6.087372903507016e-06, "loss": 0.0021, "step": 59550 }, { "epoch": 0.9745561646077068, "grad_norm": 0.019278204068541527, "learning_rate": 6.085979128909684e-06, "loss": 0.0019, "step": 59560 }, { "epoch": 0.9747197905587827, "grad_norm": 0.15544544160366058, "learning_rate": 6.084585265743588e-06, "loss": 0.0028, "step": 59570 }, { "epoch": 0.9748834165098584, "grad_norm": 0.07146583497524261, "learning_rate": 6.083191314122407e-06, "loss": 0.0021, "step": 59580 }, { "epoch": 0.9750470424609343, "grad_norm": 0.06504635512828827, "learning_rate": 6.081797274159828e-06, "loss": 0.0016, "step": 59590 }, { "epoch": 0.9752106684120101, "grad_norm": 0.09232314676046371, "learning_rate": 6.080403145969545e-06, "loss": 0.0012, "step": 59600 }, { "epoch": 0.975374294363086, "grad_norm": 0.04990586265921593, "learning_rate": 6.079008929665257e-06, "loss": 0.0019, "step": 59610 }, { "epoch": 0.9755379203141619, "grad_norm": 0.2490033656358719, "learning_rate": 6.077614625360672e-06, "loss": 0.0037, "step": 59620 }, { "epoch": 0.9757015462652376, "grad_norm": 0.18469533324241638, "learning_rate": 6.076220233169504e-06, "loss": 0.002, "step": 59630 }, { "epoch": 0.9758651722163135, "grad_norm": 0.06195841729640961, "learning_rate": 6.074825753205475e-06, "loss": 0.0013, "step": 59640 }, { "epoch": 0.9760287981673893, "grad_norm": 0.08911745995283127, "learning_rate": 6.073431185582315e-06, "loss": 0.002, "step": 59650 }, { "epoch": 0.9761924241184652, "grad_norm": 0.06178278848528862, "learning_rate": 6.072036530413759e-06, "loss": 0.0023, "step": 59660 }, { "epoch": 0.9763560500695411, "grad_norm": 0.007030330132693052, "learning_rate": 6.070641787813552e-06, "loss": 0.0016, "step": 59670 }, { "epoch": 0.9765196760206168, "grad_norm": 0.05066562816500664, "learning_rate": 6.0692469578954445e-06, "loss": 0.0015, "step": 59680 }, { "epoch": 0.9766833019716927, "grad_norm": 0.060841407626867294, "learning_rate": 6.067852040773191e-06, "loss": 0.0038, "step": 59690 }, { "epoch": 0.9768469279227685, "grad_norm": 0.05238354951143265, "learning_rate": 6.0664570365605595e-06, "loss": 0.0029, "step": 59700 }, { "epoch": 0.9770105538738444, "grad_norm": 0.009691527113318443, "learning_rate": 6.065061945371319e-06, "loss": 0.0016, "step": 59710 }, { "epoch": 0.9771741798249203, "grad_norm": 0.0825946256518364, "learning_rate": 6.0636667673192504e-06, "loss": 0.0028, "step": 59720 }, { "epoch": 0.977337805775996, "grad_norm": 0.03873920440673828, "learning_rate": 6.06227150251814e-06, "loss": 0.0017, "step": 59730 }, { "epoch": 0.9775014317270719, "grad_norm": 0.06351128965616226, "learning_rate": 6.06087615108178e-06, "loss": 0.0013, "step": 59740 }, { "epoch": 0.9776650576781477, "grad_norm": 0.006233478896319866, "learning_rate": 6.059480713123968e-06, "loss": 0.0022, "step": 59750 }, { "epoch": 0.9778286836292236, "grad_norm": 0.1972426325082779, "learning_rate": 6.058085188758517e-06, "loss": 0.0021, "step": 59760 }, { "epoch": 0.9779923095802995, "grad_norm": 0.012261101976037025, "learning_rate": 6.056689578099236e-06, "loss": 0.0015, "step": 59770 }, { "epoch": 0.9781559355313753, "grad_norm": 0.12176188081502914, "learning_rate": 6.055293881259948e-06, "loss": 0.0019, "step": 59780 }, { "epoch": 0.9783195614824511, "grad_norm": 0.13167357444763184, "learning_rate": 6.053898098354483e-06, "loss": 0.0024, "step": 59790 }, { "epoch": 0.9784831874335269, "grad_norm": 0.12744878232479095, "learning_rate": 6.052502229496674e-06, "loss": 0.0011, "step": 59800 }, { "epoch": 0.9786468133846028, "grad_norm": 0.027661819010972977, "learning_rate": 6.051106274800363e-06, "loss": 0.002, "step": 59810 }, { "epoch": 0.9788104393356787, "grad_norm": 0.03930159658193588, "learning_rate": 6.0497102343794025e-06, "loss": 0.0021, "step": 59820 }, { "epoch": 0.9789740652867545, "grad_norm": 0.016959572210907936, "learning_rate": 6.0483141083476445e-06, "loss": 0.0025, "step": 59830 }, { "epoch": 0.9791376912378303, "grad_norm": 0.0502483956515789, "learning_rate": 6.046917896818956e-06, "loss": 0.0027, "step": 59840 }, { "epoch": 0.9793013171889061, "grad_norm": 0.0883905366063118, "learning_rate": 6.045521599907206e-06, "loss": 0.0023, "step": 59850 }, { "epoch": 0.979464943139982, "grad_norm": 0.44710573554039, "learning_rate": 6.0441252177262695e-06, "loss": 0.0019, "step": 59860 }, { "epoch": 0.9796285690910579, "grad_norm": 0.04729650542140007, "learning_rate": 6.042728750390034e-06, "loss": 0.0014, "step": 59870 }, { "epoch": 0.9797921950421337, "grad_norm": 0.050038114190101624, "learning_rate": 6.04133219801239e-06, "loss": 0.0014, "step": 59880 }, { "epoch": 0.9799558209932095, "grad_norm": 0.06400980800390244, "learning_rate": 6.039935560707234e-06, "loss": 0.0025, "step": 59890 }, { "epoch": 0.9801194469442853, "grad_norm": 0.0482458621263504, "learning_rate": 6.038538838588472e-06, "loss": 0.0017, "step": 59900 }, { "epoch": 0.9802830728953612, "grad_norm": 0.07127697020769119, "learning_rate": 6.0371420317700155e-06, "loss": 0.0019, "step": 59910 }, { "epoch": 0.9804466988464371, "grad_norm": 0.029114123433828354, "learning_rate": 6.035745140365784e-06, "loss": 0.0016, "step": 59920 }, { "epoch": 0.9806103247975129, "grad_norm": 0.047244396060705185, "learning_rate": 6.0343481644897026e-06, "loss": 0.0018, "step": 59930 }, { "epoch": 0.9807739507485888, "grad_norm": 0.004442477133125067, "learning_rate": 6.032951104255704e-06, "loss": 0.0021, "step": 59940 }, { "epoch": 0.9809375766996645, "grad_norm": 0.054098594933748245, "learning_rate": 6.031553959777727e-06, "loss": 0.0014, "step": 59950 }, { "epoch": 0.9811012026507404, "grad_norm": 0.04171818122267723, "learning_rate": 6.030156731169719e-06, "loss": 0.0018, "step": 59960 }, { "epoch": 0.9812648286018163, "grad_norm": 0.09094149619340897, "learning_rate": 6.028759418545635e-06, "loss": 0.0016, "step": 59970 }, { "epoch": 0.9814284545528921, "grad_norm": 0.035672642290592194, "learning_rate": 6.027362022019431e-06, "loss": 0.0016, "step": 59980 }, { "epoch": 0.981592080503968, "grad_norm": 0.14837729930877686, "learning_rate": 6.025964541705076e-06, "loss": 0.0031, "step": 59990 }, { "epoch": 0.9817557064550437, "grad_norm": 0.09441840648651123, "learning_rate": 6.024566977716544e-06, "loss": 0.0032, "step": 60000 }, { "epoch": 0.9819193324061196, "grad_norm": 0.2889818549156189, "learning_rate": 6.023169330167815e-06, "loss": 0.0028, "step": 60010 }, { "epoch": 0.9820829583571955, "grad_norm": 0.04635530337691307, "learning_rate": 6.0217715991728766e-06, "loss": 0.0021, "step": 60020 }, { "epoch": 0.9822465843082713, "grad_norm": 0.04362974688410759, "learning_rate": 6.020373784845723e-06, "loss": 0.0021, "step": 60030 }, { "epoch": 0.9824102102593472, "grad_norm": 0.0717148631811142, "learning_rate": 6.018975887300356e-06, "loss": 0.002, "step": 60040 }, { "epoch": 0.9825738362104229, "grad_norm": 0.026522452011704445, "learning_rate": 6.01757790665078e-06, "loss": 0.0015, "step": 60050 }, { "epoch": 0.9827374621614988, "grad_norm": 0.03239431604743004, "learning_rate": 6.016179843011014e-06, "loss": 0.0023, "step": 60060 }, { "epoch": 0.9829010881125747, "grad_norm": 0.05888697877526283, "learning_rate": 6.0147816964950764e-06, "loss": 0.0023, "step": 60070 }, { "epoch": 0.9830647140636505, "grad_norm": 0.03198877349495888, "learning_rate": 6.0133834672169964e-06, "loss": 0.0012, "step": 60080 }, { "epoch": 0.9832283400147264, "grad_norm": 0.05001695826649666, "learning_rate": 6.011985155290809e-06, "loss": 0.0034, "step": 60090 }, { "epoch": 0.9833919659658021, "grad_norm": 0.029652073979377747, "learning_rate": 6.0105867608305555e-06, "loss": 0.0018, "step": 60100 }, { "epoch": 0.983555591916878, "grad_norm": 0.10867547243833542, "learning_rate": 6.0091882839502845e-06, "loss": 0.0021, "step": 60110 }, { "epoch": 0.9837192178679539, "grad_norm": 0.04657530039548874, "learning_rate": 6.007789724764049e-06, "loss": 0.0026, "step": 60120 }, { "epoch": 0.9838828438190297, "grad_norm": 0.05494051054120064, "learning_rate": 6.006391083385914e-06, "loss": 0.0032, "step": 60130 }, { "epoch": 0.9840464697701056, "grad_norm": 0.038255564868450165, "learning_rate": 6.004992359929946e-06, "loss": 0.002, "step": 60140 }, { "epoch": 0.9842100957211813, "grad_norm": 0.029380599036812782, "learning_rate": 6.00359355451022e-06, "loss": 0.0008, "step": 60150 }, { "epoch": 0.9843737216722572, "grad_norm": 0.28330400586128235, "learning_rate": 6.002194667240818e-06, "loss": 0.0033, "step": 60160 }, { "epoch": 0.9845373476233331, "grad_norm": 0.2177828699350357, "learning_rate": 6.000795698235828e-06, "loss": 0.0029, "step": 60170 }, { "epoch": 0.9847009735744089, "grad_norm": 0.0665067657828331, "learning_rate": 5.999396647609348e-06, "loss": 0.0014, "step": 60180 }, { "epoch": 0.9848645995254848, "grad_norm": 0.05530639365315437, "learning_rate": 5.997997515475476e-06, "loss": 0.0015, "step": 60190 }, { "epoch": 0.9850282254765605, "grad_norm": 0.0063982997089624405, "learning_rate": 5.996598301948322e-06, "loss": 0.0013, "step": 60200 }, { "epoch": 0.9851918514276364, "grad_norm": 0.01960398256778717, "learning_rate": 5.995199007142002e-06, "loss": 0.0009, "step": 60210 }, { "epoch": 0.9853554773787123, "grad_norm": 0.10860680043697357, "learning_rate": 5.993799631170637e-06, "loss": 0.001, "step": 60220 }, { "epoch": 0.9855191033297881, "grad_norm": 0.033449865877628326, "learning_rate": 5.992400174148355e-06, "loss": 0.0021, "step": 60230 }, { "epoch": 0.985682729280864, "grad_norm": 0.12303087115287781, "learning_rate": 5.991000636189292e-06, "loss": 0.002, "step": 60240 }, { "epoch": 0.9858463552319398, "grad_norm": 0.05814871937036514, "learning_rate": 5.989601017407587e-06, "loss": 0.0016, "step": 60250 }, { "epoch": 0.9860099811830156, "grad_norm": 0.05798044055700302, "learning_rate": 5.988201317917392e-06, "loss": 0.0019, "step": 60260 }, { "epoch": 0.9861736071340914, "grad_norm": 0.05535168945789337, "learning_rate": 5.986801537832858e-06, "loss": 0.0016, "step": 60270 }, { "epoch": 0.9863372330851673, "grad_norm": 0.13906854391098022, "learning_rate": 5.985401677268148e-06, "loss": 0.0013, "step": 60280 }, { "epoch": 0.9865008590362432, "grad_norm": 0.13225620985031128, "learning_rate": 5.98400173633743e-06, "loss": 0.0034, "step": 60290 }, { "epoch": 0.986664484987319, "grad_norm": 0.08708301186561584, "learning_rate": 5.982601715154879e-06, "loss": 0.0022, "step": 60300 }, { "epoch": 0.9868281109383948, "grad_norm": 0.0183541439473629, "learning_rate": 5.981201613834675e-06, "loss": 0.0022, "step": 60310 }, { "epoch": 0.9869917368894706, "grad_norm": 0.014991115778684616, "learning_rate": 5.9798014324910055e-06, "loss": 0.0028, "step": 60320 }, { "epoch": 0.9871553628405465, "grad_norm": 0.05471270531415939, "learning_rate": 5.9784011712380666e-06, "loss": 0.0016, "step": 60330 }, { "epoch": 0.9873189887916224, "grad_norm": 0.06115105748176575, "learning_rate": 5.977000830190055e-06, "loss": 0.0015, "step": 60340 }, { "epoch": 0.9874826147426982, "grad_norm": 0.02637716569006443, "learning_rate": 5.97560040946118e-06, "loss": 0.0018, "step": 60350 }, { "epoch": 0.987646240693774, "grad_norm": 0.1435128152370453, "learning_rate": 5.974199909165654e-06, "loss": 0.0047, "step": 60360 }, { "epoch": 0.9878098666448498, "grad_norm": 0.08010214567184448, "learning_rate": 5.972799329417699e-06, "loss": 0.0017, "step": 60370 }, { "epoch": 0.9879734925959257, "grad_norm": 0.04892215505242348, "learning_rate": 5.971398670331541e-06, "loss": 0.0018, "step": 60380 }, { "epoch": 0.9881371185470016, "grad_norm": 0.02184041030704975, "learning_rate": 5.969997932021411e-06, "loss": 0.0015, "step": 60390 }, { "epoch": 0.9883007444980774, "grad_norm": 0.07250533252954483, "learning_rate": 5.968597114601551e-06, "loss": 0.0016, "step": 60400 }, { "epoch": 0.9884643704491533, "grad_norm": 0.03990749269723892, "learning_rate": 5.967196218186204e-06, "loss": 0.0021, "step": 60410 }, { "epoch": 0.988627996400229, "grad_norm": 0.07440242916345596, "learning_rate": 5.965795242889626e-06, "loss": 0.0018, "step": 60420 }, { "epoch": 0.9887916223513049, "grad_norm": 0.016071777790784836, "learning_rate": 5.964394188826074e-06, "loss": 0.0013, "step": 60430 }, { "epoch": 0.9889552483023808, "grad_norm": 0.049649856984615326, "learning_rate": 5.962993056109812e-06, "loss": 0.0018, "step": 60440 }, { "epoch": 0.9891188742534566, "grad_norm": 0.028748657554388046, "learning_rate": 5.961591844855112e-06, "loss": 0.0017, "step": 60450 }, { "epoch": 0.9892825002045325, "grad_norm": 0.06174496188759804, "learning_rate": 5.960190555176255e-06, "loss": 0.0015, "step": 60460 }, { "epoch": 0.9894461261556082, "grad_norm": 0.10030088573694229, "learning_rate": 5.9587891871875215e-06, "loss": 0.0017, "step": 60470 }, { "epoch": 0.9896097521066841, "grad_norm": 0.061132289469242096, "learning_rate": 5.957387741003205e-06, "loss": 0.0008, "step": 60480 }, { "epoch": 0.98977337805776, "grad_norm": 0.04618103802204132, "learning_rate": 5.955986216737602e-06, "loss": 0.0025, "step": 60490 }, { "epoch": 0.9899370040088358, "grad_norm": 0.1212206780910492, "learning_rate": 5.954584614505015e-06, "loss": 0.002, "step": 60500 }, { "epoch": 0.9901006299599117, "grad_norm": 0.06300706416368484, "learning_rate": 5.953182934419753e-06, "loss": 0.0014, "step": 60510 }, { "epoch": 0.9902642559109874, "grad_norm": 0.07991975545883179, "learning_rate": 5.9517811765961365e-06, "loss": 0.0017, "step": 60520 }, { "epoch": 0.9904278818620633, "grad_norm": 0.015466952696442604, "learning_rate": 5.950379341148484e-06, "loss": 0.0021, "step": 60530 }, { "epoch": 0.9905915078131392, "grad_norm": 0.010112815536558628, "learning_rate": 5.948977428191126e-06, "loss": 0.0017, "step": 60540 }, { "epoch": 0.990755133764215, "grad_norm": 0.24883177876472473, "learning_rate": 5.9475754378383985e-06, "loss": 0.0024, "step": 60550 }, { "epoch": 0.9909187597152909, "grad_norm": 0.03571660444140434, "learning_rate": 5.946173370204642e-06, "loss": 0.0017, "step": 60560 }, { "epoch": 0.9910823856663666, "grad_norm": 0.06003880500793457, "learning_rate": 5.944771225404204e-06, "loss": 0.002, "step": 60570 }, { "epoch": 0.9912460116174425, "grad_norm": 0.06538006663322449, "learning_rate": 5.943369003551439e-06, "loss": 0.0015, "step": 60580 }, { "epoch": 0.9914096375685184, "grad_norm": 0.03284836187958717, "learning_rate": 5.941966704760709e-06, "loss": 0.0024, "step": 60590 }, { "epoch": 0.9915732635195942, "grad_norm": 0.13619862496852875, "learning_rate": 5.9405643291463775e-06, "loss": 0.0033, "step": 60600 }, { "epoch": 0.9917368894706701, "grad_norm": 0.014600131660699844, "learning_rate": 5.939161876822821e-06, "loss": 0.0022, "step": 60610 }, { "epoch": 0.9919005154217458, "grad_norm": 0.06094701215624809, "learning_rate": 5.937759347904417e-06, "loss": 0.0017, "step": 60620 }, { "epoch": 0.9920641413728217, "grad_norm": 0.12479595839977264, "learning_rate": 5.93635674250555e-06, "loss": 0.0019, "step": 60630 }, { "epoch": 0.9922277673238976, "grad_norm": 0.0912921279668808, "learning_rate": 5.934954060740614e-06, "loss": 0.0022, "step": 60640 }, { "epoch": 0.9923913932749734, "grad_norm": 0.05441417545080185, "learning_rate": 5.9335513027240065e-06, "loss": 0.0016, "step": 60650 }, { "epoch": 0.9925550192260493, "grad_norm": 0.09336307644844055, "learning_rate": 5.932148468570129e-06, "loss": 0.0037, "step": 60660 }, { "epoch": 0.992718645177125, "grad_norm": 0.0738450437784195, "learning_rate": 5.930745558393396e-06, "loss": 0.0016, "step": 60670 }, { "epoch": 0.9928822711282009, "grad_norm": 0.01890706829726696, "learning_rate": 5.92934257230822e-06, "loss": 0.0011, "step": 60680 }, { "epoch": 0.9930458970792768, "grad_norm": 0.05509449169039726, "learning_rate": 5.927939510429026e-06, "loss": 0.0029, "step": 60690 }, { "epoch": 0.9932095230303526, "grad_norm": 0.15380942821502686, "learning_rate": 5.926536372870243e-06, "loss": 0.002, "step": 60700 }, { "epoch": 0.9933731489814285, "grad_norm": 0.03707823529839516, "learning_rate": 5.925133159746305e-06, "loss": 0.0024, "step": 60710 }, { "epoch": 0.9935367749325043, "grad_norm": 0.25121182203292847, "learning_rate": 5.9237298711716565e-06, "loss": 0.0017, "step": 60720 }, { "epoch": 0.9937004008835801, "grad_norm": 0.047686394304037094, "learning_rate": 5.92232650726074e-06, "loss": 0.0023, "step": 60730 }, { "epoch": 0.993864026834656, "grad_norm": 0.07264384627342224, "learning_rate": 5.920923068128013e-06, "loss": 0.0041, "step": 60740 }, { "epoch": 0.9940276527857318, "grad_norm": 0.21187007427215576, "learning_rate": 5.919519553887933e-06, "loss": 0.0021, "step": 60750 }, { "epoch": 0.9941912787368077, "grad_norm": 0.08301796764135361, "learning_rate": 5.9181159646549666e-06, "loss": 0.0027, "step": 60760 }, { "epoch": 0.9943549046878835, "grad_norm": 0.04313843324780464, "learning_rate": 5.916712300543586e-06, "loss": 0.0022, "step": 60770 }, { "epoch": 0.9945185306389593, "grad_norm": 0.08518585562705994, "learning_rate": 5.915308561668269e-06, "loss": 0.001, "step": 60780 }, { "epoch": 0.9946821565900352, "grad_norm": 0.023523874580860138, "learning_rate": 5.913904748143501e-06, "loss": 0.0036, "step": 60790 }, { "epoch": 0.994845782541111, "grad_norm": 0.14175419509410858, "learning_rate": 5.91250086008377e-06, "loss": 0.0025, "step": 60800 }, { "epoch": 0.9950094084921869, "grad_norm": 0.16313813626766205, "learning_rate": 5.911096897603573e-06, "loss": 0.0025, "step": 60810 }, { "epoch": 0.9951730344432627, "grad_norm": 0.021661773324012756, "learning_rate": 5.909692860817413e-06, "loss": 0.0009, "step": 60820 }, { "epoch": 0.9953366603943385, "grad_norm": 0.1231006383895874, "learning_rate": 5.908288749839799e-06, "loss": 0.0026, "step": 60830 }, { "epoch": 0.9955002863454144, "grad_norm": 0.027026686817407608, "learning_rate": 5.906884564785245e-06, "loss": 0.0014, "step": 60840 }, { "epoch": 0.9956639122964902, "grad_norm": 0.026886874809861183, "learning_rate": 5.90548030576827e-06, "loss": 0.0018, "step": 60850 }, { "epoch": 0.9958275382475661, "grad_norm": 0.07238604873418808, "learning_rate": 5.9040759729034034e-06, "loss": 0.0031, "step": 60860 }, { "epoch": 0.9959911641986419, "grad_norm": 0.08987830579280853, "learning_rate": 5.902671566305177e-06, "loss": 0.0019, "step": 60870 }, { "epoch": 0.9961547901497178, "grad_norm": 0.09531639516353607, "learning_rate": 5.901267086088127e-06, "loss": 0.0016, "step": 60880 }, { "epoch": 0.9963184161007936, "grad_norm": 0.013638807460665703, "learning_rate": 5.899862532366801e-06, "loss": 0.0011, "step": 60890 }, { "epoch": 0.9964820420518694, "grad_norm": 0.24744060635566711, "learning_rate": 5.8984579052557485e-06, "loss": 0.0014, "step": 60900 }, { "epoch": 0.9966456680029453, "grad_norm": 0.008690830320119858, "learning_rate": 5.897053204869526e-06, "loss": 0.0025, "step": 60910 }, { "epoch": 0.9968092939540211, "grad_norm": 0.016616791486740112, "learning_rate": 5.895648431322696e-06, "loss": 0.0016, "step": 60920 }, { "epoch": 0.996972919905097, "grad_norm": 0.08524341136217117, "learning_rate": 5.894243584729827e-06, "loss": 0.0029, "step": 60930 }, { "epoch": 0.9971365458561728, "grad_norm": 0.07097870856523514, "learning_rate": 5.892838665205493e-06, "loss": 0.0021, "step": 60940 }, { "epoch": 0.9973001718072486, "grad_norm": 0.0369216650724411, "learning_rate": 5.891433672864276e-06, "loss": 0.002, "step": 60950 }, { "epoch": 0.9974637977583245, "grad_norm": 0.06325913220643997, "learning_rate": 5.890028607820761e-06, "loss": 0.0036, "step": 60960 }, { "epoch": 0.9976274237094003, "grad_norm": 0.128708153963089, "learning_rate": 5.888623470189542e-06, "loss": 0.0011, "step": 60970 }, { "epoch": 0.9977910496604762, "grad_norm": 0.08021515607833862, "learning_rate": 5.887218260085216e-06, "loss": 0.0024, "step": 60980 }, { "epoch": 0.997954675611552, "grad_norm": 0.052398569881916046, "learning_rate": 5.885812977622386e-06, "loss": 0.0034, "step": 60990 }, { "epoch": 0.9981183015626278, "grad_norm": 0.12271636724472046, "learning_rate": 5.8844076229156645e-06, "loss": 0.0013, "step": 61000 }, { "epoch": 0.9981183015626278, "eval_loss": 0.0012876364635303617, "eval_runtime": 5.3865, "eval_samples_per_second": 37.13, "eval_steps_per_second": 9.282, "step": 61000 }, { "epoch": 0.9982819275137037, "grad_norm": 0.06839025020599365, "learning_rate": 5.883002196079665e-06, "loss": 0.0014, "step": 61010 }, { "epoch": 0.9984455534647795, "grad_norm": 0.04630940407514572, "learning_rate": 5.881596697229011e-06, "loss": 0.0017, "step": 61020 }, { "epoch": 0.9986091794158554, "grad_norm": 0.0540347583591938, "learning_rate": 5.880191126478332e-06, "loss": 0.0014, "step": 61030 }, { "epoch": 0.9987728053669312, "grad_norm": 0.020785707980394363, "learning_rate": 5.8787854839422576e-06, "loss": 0.0013, "step": 61040 }, { "epoch": 0.998936431318007, "grad_norm": 0.05198941007256508, "learning_rate": 5.877379769735429e-06, "loss": 0.0012, "step": 61050 }, { "epoch": 0.9991000572690829, "grad_norm": 0.008295542560517788, "learning_rate": 5.8759739839724916e-06, "loss": 0.0008, "step": 61060 }, { "epoch": 0.9992636832201587, "grad_norm": 0.13869978487491608, "learning_rate": 5.8745681267680965e-06, "loss": 0.0011, "step": 61070 }, { "epoch": 0.9994273091712346, "grad_norm": 0.03864474222064018, "learning_rate": 5.873162198236901e-06, "loss": 0.0019, "step": 61080 }, { "epoch": 0.9995909351223105, "grad_norm": 0.051644012331962585, "learning_rate": 5.871756198493567e-06, "loss": 0.002, "step": 61090 }, { "epoch": 0.9997545610733862, "grad_norm": 0.029904983937740326, "learning_rate": 5.870350127652763e-06, "loss": 0.001, "step": 61100 }, { "epoch": 0.9999181870244621, "grad_norm": 0.03249731659889221, "learning_rate": 5.868943985829163e-06, "loss": 0.0015, "step": 61110 }, { "epoch": 1.000081812975538, "grad_norm": 0.059132978320121765, "learning_rate": 5.86753777313745e-06, "loss": 0.0014, "step": 61120 }, { "epoch": 1.0002454389266138, "grad_norm": 0.10480040311813354, "learning_rate": 5.866131489692306e-06, "loss": 0.0018, "step": 61130 }, { "epoch": 1.0004090648776895, "grad_norm": 0.08471012115478516, "learning_rate": 5.864725135608426e-06, "loss": 0.0015, "step": 61140 }, { "epoch": 1.0005726908287655, "grad_norm": 0.026128077879548073, "learning_rate": 5.863318711000505e-06, "loss": 0.0036, "step": 61150 }, { "epoch": 1.0007363167798413, "grad_norm": 0.0371905192732811, "learning_rate": 5.861912215983247e-06, "loss": 0.0007, "step": 61160 }, { "epoch": 1.000899942730917, "grad_norm": 0.051652416586875916, "learning_rate": 5.860505650671362e-06, "loss": 0.0015, "step": 61170 }, { "epoch": 1.0010635686819929, "grad_norm": 0.05548330768942833, "learning_rate": 5.859099015179563e-06, "loss": 0.0012, "step": 61180 }, { "epoch": 1.0012271946330689, "grad_norm": 0.028802618384361267, "learning_rate": 5.8576923096225715e-06, "loss": 0.0012, "step": 61190 }, { "epoch": 1.0013908205841446, "grad_norm": 0.028903771191835403, "learning_rate": 5.856285534115114e-06, "loss": 0.0021, "step": 61200 }, { "epoch": 1.0015544465352204, "grad_norm": 0.21797987818717957, "learning_rate": 5.854878688771921e-06, "loss": 0.0026, "step": 61210 }, { "epoch": 1.0017180724862964, "grad_norm": 0.09580961614847183, "learning_rate": 5.853471773707731e-06, "loss": 0.0013, "step": 61220 }, { "epoch": 1.0018816984373722, "grad_norm": 0.05054014176130295, "learning_rate": 5.852064789037286e-06, "loss": 0.0007, "step": 61230 }, { "epoch": 1.002045324388448, "grad_norm": 0.04053964093327522, "learning_rate": 5.8506577348753365e-06, "loss": 0.0019, "step": 61240 }, { "epoch": 1.002208950339524, "grad_norm": 0.07920017838478088, "learning_rate": 5.849250611336637e-06, "loss": 0.0025, "step": 61250 }, { "epoch": 1.0023725762905997, "grad_norm": 0.03208623826503754, "learning_rate": 5.847843418535946e-06, "loss": 0.0007, "step": 61260 }, { "epoch": 1.0025362022416755, "grad_norm": 0.0023011798039078712, "learning_rate": 5.846436156588031e-06, "loss": 0.0008, "step": 61270 }, { "epoch": 1.0026998281927513, "grad_norm": 0.027438288554549217, "learning_rate": 5.845028825607663e-06, "loss": 0.0018, "step": 61280 }, { "epoch": 1.0028634541438273, "grad_norm": 0.017761297523975372, "learning_rate": 5.843621425709618e-06, "loss": 0.0018, "step": 61290 }, { "epoch": 1.003027080094903, "grad_norm": 0.12389422208070755, "learning_rate": 5.84221395700868e-06, "loss": 0.0026, "step": 61300 }, { "epoch": 1.0031907060459788, "grad_norm": 0.030440891161561012, "learning_rate": 5.840806419619638e-06, "loss": 0.0009, "step": 61310 }, { "epoch": 1.0033543319970548, "grad_norm": 0.035298582166433334, "learning_rate": 5.839398813657284e-06, "loss": 0.0012, "step": 61320 }, { "epoch": 1.0035179579481306, "grad_norm": 0.06256187707185745, "learning_rate": 5.83799113923642e-06, "loss": 0.0015, "step": 61330 }, { "epoch": 1.0036815838992064, "grad_norm": 0.05947040766477585, "learning_rate": 5.836583396471849e-06, "loss": 0.002, "step": 61340 }, { "epoch": 1.0038452098502824, "grad_norm": 0.022738659754395485, "learning_rate": 5.835175585478383e-06, "loss": 0.0026, "step": 61350 }, { "epoch": 1.0040088358013581, "grad_norm": 0.07788850367069244, "learning_rate": 5.8337677063708364e-06, "loss": 0.0017, "step": 61360 }, { "epoch": 1.004172461752434, "grad_norm": 0.04353642091155052, "learning_rate": 5.832359759264034e-06, "loss": 0.0018, "step": 61370 }, { "epoch": 1.0043360877035097, "grad_norm": 0.07055719196796417, "learning_rate": 5.830951744272801e-06, "loss": 0.0015, "step": 61380 }, { "epoch": 1.0044997136545857, "grad_norm": 0.022255435585975647, "learning_rate": 5.829543661511972e-06, "loss": 0.0039, "step": 61390 }, { "epoch": 1.0046633396056615, "grad_norm": 0.05845420062541962, "learning_rate": 5.828135511096382e-06, "loss": 0.0018, "step": 61400 }, { "epoch": 1.0048269655567372, "grad_norm": 0.12708251178264618, "learning_rate": 5.826727293140879e-06, "loss": 0.0015, "step": 61410 }, { "epoch": 1.0049905915078132, "grad_norm": 0.03990639001131058, "learning_rate": 5.82531900776031e-06, "loss": 0.0014, "step": 61420 }, { "epoch": 1.005154217458889, "grad_norm": 0.09444354474544525, "learning_rate": 5.823910655069531e-06, "loss": 0.0019, "step": 61430 }, { "epoch": 1.0053178434099648, "grad_norm": 0.07281923294067383, "learning_rate": 5.822502235183402e-06, "loss": 0.0018, "step": 61440 }, { "epoch": 1.0054814693610408, "grad_norm": 0.02620009332895279, "learning_rate": 5.821093748216789e-06, "loss": 0.003, "step": 61450 }, { "epoch": 1.0056450953121165, "grad_norm": 0.11269406229257584, "learning_rate": 5.819685194284563e-06, "loss": 0.0015, "step": 61460 }, { "epoch": 1.0058087212631923, "grad_norm": 0.21514850854873657, "learning_rate": 5.818276573501602e-06, "loss": 0.0015, "step": 61470 }, { "epoch": 1.005972347214268, "grad_norm": 0.16029570996761322, "learning_rate": 5.816867885982787e-06, "loss": 0.0011, "step": 61480 }, { "epoch": 1.006135973165344, "grad_norm": 0.12328983843326569, "learning_rate": 5.815459131843008e-06, "loss": 0.0012, "step": 61490 }, { "epoch": 1.0062995991164199, "grad_norm": 0.2211940586566925, "learning_rate": 5.814050311197156e-06, "loss": 0.0028, "step": 61500 }, { "epoch": 1.0064632250674956, "grad_norm": 0.04661479964852333, "learning_rate": 5.81264142416013e-06, "loss": 0.002, "step": 61510 }, { "epoch": 1.0066268510185716, "grad_norm": 0.04160916805267334, "learning_rate": 5.811232470846834e-06, "loss": 0.0014, "step": 61520 }, { "epoch": 1.0067904769696474, "grad_norm": 0.05628017336130142, "learning_rate": 5.809823451372178e-06, "loss": 0.0011, "step": 61530 }, { "epoch": 1.0069541029207232, "grad_norm": 0.00382031942717731, "learning_rate": 5.808414365851076e-06, "loss": 0.0018, "step": 61540 }, { "epoch": 1.007117728871799, "grad_norm": 0.01784886233508587, "learning_rate": 5.80700521439845e-06, "loss": 0.0008, "step": 61550 }, { "epoch": 1.007281354822875, "grad_norm": 0.013976208865642548, "learning_rate": 5.805595997129223e-06, "loss": 0.001, "step": 61560 }, { "epoch": 1.0074449807739507, "grad_norm": 0.04486384615302086, "learning_rate": 5.804186714158328e-06, "loss": 0.002, "step": 61570 }, { "epoch": 1.0076086067250265, "grad_norm": 0.21445812284946442, "learning_rate": 5.802777365600701e-06, "loss": 0.0017, "step": 61580 }, { "epoch": 1.0077722326761025, "grad_norm": 0.05751577764749527, "learning_rate": 5.801367951571283e-06, "loss": 0.0024, "step": 61590 }, { "epoch": 1.0079358586271783, "grad_norm": 0.03802460804581642, "learning_rate": 5.799958472185021e-06, "loss": 0.0009, "step": 61600 }, { "epoch": 1.008099484578254, "grad_norm": 0.023952007293701172, "learning_rate": 5.798548927556869e-06, "loss": 0.0015, "step": 61610 }, { "epoch": 1.00826311052933, "grad_norm": 0.042315080761909485, "learning_rate": 5.797139317801782e-06, "loss": 0.002, "step": 61620 }, { "epoch": 1.0084267364804058, "grad_norm": 0.02831646241247654, "learning_rate": 5.795729643034725e-06, "loss": 0.001, "step": 61630 }, { "epoch": 1.0085903624314816, "grad_norm": 0.029023466631770134, "learning_rate": 5.7943199033706655e-06, "loss": 0.0009, "step": 61640 }, { "epoch": 1.0087539883825574, "grad_norm": 0.16637486219406128, "learning_rate": 5.792910098924577e-06, "loss": 0.0033, "step": 61650 }, { "epoch": 1.0089176143336334, "grad_norm": 0.06188393011689186, "learning_rate": 5.791500229811438e-06, "loss": 0.0012, "step": 61660 }, { "epoch": 1.0090812402847091, "grad_norm": 0.01722871884703636, "learning_rate": 5.7900902961462345e-06, "loss": 0.0024, "step": 61670 }, { "epoch": 1.009244866235785, "grad_norm": 0.051801424473524094, "learning_rate": 5.788680298043954e-06, "loss": 0.0012, "step": 61680 }, { "epoch": 1.009408492186861, "grad_norm": 0.025072526186704636, "learning_rate": 5.787270235619593e-06, "loss": 0.0014, "step": 61690 }, { "epoch": 1.0095721181379367, "grad_norm": 0.042015112936496735, "learning_rate": 5.785860108988148e-06, "loss": 0.002, "step": 61700 }, { "epoch": 1.0097357440890125, "grad_norm": 0.04066444933414459, "learning_rate": 5.7844499182646276e-06, "loss": 0.0016, "step": 61710 }, { "epoch": 1.0098993700400885, "grad_norm": 0.060358818620443344, "learning_rate": 5.783039663564041e-06, "loss": 0.0013, "step": 61720 }, { "epoch": 1.0100629959911642, "grad_norm": 0.08476940542459488, "learning_rate": 5.781629345001402e-06, "loss": 0.0022, "step": 61730 }, { "epoch": 1.01022662194224, "grad_norm": 0.05279752239584923, "learning_rate": 5.780218962691734e-06, "loss": 0.0007, "step": 61740 }, { "epoch": 1.0103902478933158, "grad_norm": 0.061872582882642746, "learning_rate": 5.778808516750061e-06, "loss": 0.002, "step": 61750 }, { "epoch": 1.0105538738443918, "grad_norm": 0.04641154035925865, "learning_rate": 5.777398007291416e-06, "loss": 0.0011, "step": 61760 }, { "epoch": 1.0107174997954675, "grad_norm": 0.0397113673388958, "learning_rate": 5.775987434430834e-06, "loss": 0.0017, "step": 61770 }, { "epoch": 1.0108811257465433, "grad_norm": 0.02313457429409027, "learning_rate": 5.7745767982833576e-06, "loss": 0.0011, "step": 61780 }, { "epoch": 1.0110447516976193, "grad_norm": 0.04160955920815468, "learning_rate": 5.7731660989640324e-06, "loss": 0.0011, "step": 61790 }, { "epoch": 1.011208377648695, "grad_norm": 0.03348110616207123, "learning_rate": 5.7717553365879105e-06, "loss": 0.0008, "step": 61800 }, { "epoch": 1.0113720035997709, "grad_norm": 0.10487332940101624, "learning_rate": 5.770344511270049e-06, "loss": 0.0016, "step": 61810 }, { "epoch": 1.0115356295508469, "grad_norm": 0.06869300454854965, "learning_rate": 5.768933623125511e-06, "loss": 0.0006, "step": 61820 }, { "epoch": 1.0116992555019226, "grad_norm": 0.03205036744475365, "learning_rate": 5.767522672269362e-06, "loss": 0.0011, "step": 61830 }, { "epoch": 1.0118628814529984, "grad_norm": 0.08402548730373383, "learning_rate": 5.766111658816676e-06, "loss": 0.0013, "step": 61840 }, { "epoch": 1.0120265074040742, "grad_norm": 0.0778636708855629, "learning_rate": 5.7647005828825285e-06, "loss": 0.001, "step": 61850 }, { "epoch": 1.0121901333551502, "grad_norm": 0.09324698895215988, "learning_rate": 5.763289444582005e-06, "loss": 0.0015, "step": 61860 }, { "epoch": 1.012353759306226, "grad_norm": 0.027487050741910934, "learning_rate": 5.761878244030189e-06, "loss": 0.0015, "step": 61870 }, { "epoch": 1.0125173852573017, "grad_norm": 0.051138538867235184, "learning_rate": 5.7604669813421765e-06, "loss": 0.0021, "step": 61880 }, { "epoch": 1.0126810112083777, "grad_norm": 0.08442410081624985, "learning_rate": 5.7590556566330645e-06, "loss": 0.0015, "step": 61890 }, { "epoch": 1.0128446371594535, "grad_norm": 0.0564834401011467, "learning_rate": 5.757644270017956e-06, "loss": 0.002, "step": 61900 }, { "epoch": 1.0130082631105293, "grad_norm": 0.056608185172080994, "learning_rate": 5.756232821611958e-06, "loss": 0.0018, "step": 61910 }, { "epoch": 1.0131718890616053, "grad_norm": 0.26579341292381287, "learning_rate": 5.7548213115301845e-06, "loss": 0.0013, "step": 61920 }, { "epoch": 1.013335515012681, "grad_norm": 0.023259269073605537, "learning_rate": 5.753409739887753e-06, "loss": 0.0012, "step": 61930 }, { "epoch": 1.0134991409637568, "grad_norm": 0.08676396310329437, "learning_rate": 5.751998106799786e-06, "loss": 0.0028, "step": 61940 }, { "epoch": 1.0136627669148326, "grad_norm": 0.010002187453210354, "learning_rate": 5.750586412381413e-06, "loss": 0.0011, "step": 61950 }, { "epoch": 1.0138263928659086, "grad_norm": 0.23325270414352417, "learning_rate": 5.749174656747764e-06, "loss": 0.0016, "step": 61960 }, { "epoch": 1.0139900188169844, "grad_norm": 0.07708834856748581, "learning_rate": 5.7477628400139796e-06, "loss": 0.0014, "step": 61970 }, { "epoch": 1.0141536447680601, "grad_norm": 0.14373217523097992, "learning_rate": 5.746350962295203e-06, "loss": 0.001, "step": 61980 }, { "epoch": 1.0143172707191361, "grad_norm": 0.01857146807014942, "learning_rate": 5.74493902370658e-06, "loss": 0.0015, "step": 61990 }, { "epoch": 1.014480896670212, "grad_norm": 0.09846285730600357, "learning_rate": 5.743527024363266e-06, "loss": 0.0026, "step": 62000 }, { "epoch": 1.0146445226212877, "grad_norm": 0.04691923037171364, "learning_rate": 5.742114964380418e-06, "loss": 0.0009, "step": 62010 }, { "epoch": 1.0148081485723637, "grad_norm": 0.08016195893287659, "learning_rate": 5.740702843873197e-06, "loss": 0.0022, "step": 62020 }, { "epoch": 1.0149717745234395, "grad_norm": 0.15680751204490662, "learning_rate": 5.7392906629567725e-06, "loss": 0.0018, "step": 62030 }, { "epoch": 1.0151354004745152, "grad_norm": 0.08605265617370605, "learning_rate": 5.737878421746317e-06, "loss": 0.0014, "step": 62040 }, { "epoch": 1.015299026425591, "grad_norm": 0.04326837137341499, "learning_rate": 5.736466120357008e-06, "loss": 0.0012, "step": 62050 }, { "epoch": 1.015462652376667, "grad_norm": 0.08900121599435806, "learning_rate": 5.735053758904028e-06, "loss": 0.0019, "step": 62060 }, { "epoch": 1.0156262783277428, "grad_norm": 0.09054692089557648, "learning_rate": 5.733641337502563e-06, "loss": 0.0015, "step": 62070 }, { "epoch": 1.0157899042788185, "grad_norm": 0.0965246707201004, "learning_rate": 5.732228856267808e-06, "loss": 0.0013, "step": 62080 }, { "epoch": 1.0159535302298945, "grad_norm": 0.2699553370475769, "learning_rate": 5.730816315314958e-06, "loss": 0.001, "step": 62090 }, { "epoch": 1.0161171561809703, "grad_norm": 0.1329885870218277, "learning_rate": 5.729403714759216e-06, "loss": 0.0016, "step": 62100 }, { "epoch": 1.016280782132046, "grad_norm": 0.05228469893336296, "learning_rate": 5.727991054715789e-06, "loss": 0.0023, "step": 62110 }, { "epoch": 1.016444408083122, "grad_norm": 0.03670582175254822, "learning_rate": 5.726578335299887e-06, "loss": 0.0017, "step": 62120 }, { "epoch": 1.0166080340341979, "grad_norm": 0.08487051725387573, "learning_rate": 5.72516555662673e-06, "loss": 0.0011, "step": 62130 }, { "epoch": 1.0167716599852736, "grad_norm": 0.05869929492473602, "learning_rate": 5.723752718811536e-06, "loss": 0.002, "step": 62140 }, { "epoch": 1.0169352859363494, "grad_norm": 0.03334204852581024, "learning_rate": 5.722339821969534e-06, "loss": 0.0022, "step": 62150 }, { "epoch": 1.0170989118874254, "grad_norm": 0.028556037694215775, "learning_rate": 5.720926866215952e-06, "loss": 0.0007, "step": 62160 }, { "epoch": 1.0172625378385012, "grad_norm": 0.03512772172689438, "learning_rate": 5.719513851666028e-06, "loss": 0.0014, "step": 62170 }, { "epoch": 1.017426163789577, "grad_norm": 0.10795187950134277, "learning_rate": 5.718100778435004e-06, "loss": 0.0014, "step": 62180 }, { "epoch": 1.017589789740653, "grad_norm": 0.06372316181659698, "learning_rate": 5.716687646638122e-06, "loss": 0.0017, "step": 62190 }, { "epoch": 1.0177534156917287, "grad_norm": 0.0025340444408357143, "learning_rate": 5.7152744563906345e-06, "loss": 0.0019, "step": 62200 }, { "epoch": 1.0179170416428045, "grad_norm": 0.017589906230568886, "learning_rate": 5.713861207807796e-06, "loss": 0.0025, "step": 62210 }, { "epoch": 1.0180806675938805, "grad_norm": 0.05124212056398392, "learning_rate": 5.712447901004865e-06, "loss": 0.0016, "step": 62220 }, { "epoch": 1.0182442935449563, "grad_norm": 0.5123308897018433, "learning_rate": 5.711034536097109e-06, "loss": 0.0021, "step": 62230 }, { "epoch": 1.018407919496032, "grad_norm": 0.10106267035007477, "learning_rate": 5.709621113199795e-06, "loss": 0.0018, "step": 62240 }, { "epoch": 1.0185715454471078, "grad_norm": 0.18311373889446259, "learning_rate": 5.708207632428195e-06, "loss": 0.0013, "step": 62250 }, { "epoch": 1.0187351713981838, "grad_norm": 0.014049588702619076, "learning_rate": 5.706794093897593e-06, "loss": 0.0022, "step": 62260 }, { "epoch": 1.0188987973492596, "grad_norm": 0.22256989777088165, "learning_rate": 5.705380497723268e-06, "loss": 0.0015, "step": 62270 }, { "epoch": 1.0190624233003354, "grad_norm": 0.0713861808180809, "learning_rate": 5.7039668440205096e-06, "loss": 0.0015, "step": 62280 }, { "epoch": 1.0192260492514114, "grad_norm": 0.0895313248038292, "learning_rate": 5.702553132904611e-06, "loss": 0.0017, "step": 62290 }, { "epoch": 1.0193896752024871, "grad_norm": 0.09305835515260696, "learning_rate": 5.701139364490869e-06, "loss": 0.0019, "step": 62300 }, { "epoch": 1.019553301153563, "grad_norm": 0.007383860647678375, "learning_rate": 5.699725538894586e-06, "loss": 0.0012, "step": 62310 }, { "epoch": 1.019716927104639, "grad_norm": 0.03426968306303024, "learning_rate": 5.698311656231068e-06, "loss": 0.0024, "step": 62320 }, { "epoch": 1.0198805530557147, "grad_norm": 0.08224494010210037, "learning_rate": 5.696897716615629e-06, "loss": 0.0019, "step": 62330 }, { "epoch": 1.0200441790067905, "grad_norm": 0.007065152749419212, "learning_rate": 5.695483720163582e-06, "loss": 0.0026, "step": 62340 }, { "epoch": 1.0202078049578662, "grad_norm": 0.10706252604722977, "learning_rate": 5.694069666990249e-06, "loss": 0.0016, "step": 62350 }, { "epoch": 1.0203714309089422, "grad_norm": 0.05247305706143379, "learning_rate": 5.6926555572109554e-06, "loss": 0.0009, "step": 62360 }, { "epoch": 1.020535056860018, "grad_norm": 0.04206337779760361, "learning_rate": 5.691241390941031e-06, "loss": 0.0018, "step": 62370 }, { "epoch": 1.0206986828110938, "grad_norm": 0.017959145829081535, "learning_rate": 5.689827168295811e-06, "loss": 0.0022, "step": 62380 }, { "epoch": 1.0208623087621698, "grad_norm": 0.09433262050151825, "learning_rate": 5.688412889390633e-06, "loss": 0.0019, "step": 62390 }, { "epoch": 1.0210259347132455, "grad_norm": 0.03946654871106148, "learning_rate": 5.686998554340843e-06, "loss": 0.0027, "step": 62400 }, { "epoch": 1.0211895606643213, "grad_norm": 0.029379980638623238, "learning_rate": 5.685584163261788e-06, "loss": 0.0016, "step": 62410 }, { "epoch": 1.0213531866153973, "grad_norm": 0.014345060102641582, "learning_rate": 5.6841697162688216e-06, "loss": 0.0022, "step": 62420 }, { "epoch": 1.021516812566473, "grad_norm": 0.09723767638206482, "learning_rate": 5.6827552134773015e-06, "loss": 0.0034, "step": 62430 }, { "epoch": 1.0216804385175489, "grad_norm": 0.06897973269224167, "learning_rate": 5.681340655002588e-06, "loss": 0.0013, "step": 62440 }, { "epoch": 1.0218440644686246, "grad_norm": 0.10360734909772873, "learning_rate": 5.679926040960048e-06, "loss": 0.0024, "step": 62450 }, { "epoch": 1.0220076904197006, "grad_norm": 0.36921411752700806, "learning_rate": 5.678511371465056e-06, "loss": 0.0051, "step": 62460 }, { "epoch": 1.0221713163707764, "grad_norm": 0.15142609179019928, "learning_rate": 5.677096646632983e-06, "loss": 0.0018, "step": 62470 }, { "epoch": 1.0223349423218522, "grad_norm": 0.11854346096515656, "learning_rate": 5.675681866579211e-06, "loss": 0.0015, "step": 62480 }, { "epoch": 1.0224985682729282, "grad_norm": 0.009892048314213753, "learning_rate": 5.674267031419125e-06, "loss": 0.0016, "step": 62490 }, { "epoch": 1.022662194224004, "grad_norm": 0.05725774168968201, "learning_rate": 5.672852141268115e-06, "loss": 0.0011, "step": 62500 }, { "epoch": 1.0228258201750797, "grad_norm": 0.02648870460689068, "learning_rate": 5.671437196241572e-06, "loss": 0.0016, "step": 62510 }, { "epoch": 1.0229894461261555, "grad_norm": 0.03140348568558693, "learning_rate": 5.670022196454898e-06, "loss": 0.0011, "step": 62520 }, { "epoch": 1.0231530720772315, "grad_norm": 0.1427689641714096, "learning_rate": 5.668607142023491e-06, "loss": 0.0013, "step": 62530 }, { "epoch": 1.0233166980283073, "grad_norm": 0.25043416023254395, "learning_rate": 5.667192033062762e-06, "loss": 0.0017, "step": 62540 }, { "epoch": 1.023480323979383, "grad_norm": 0.025813426822423935, "learning_rate": 5.66577686968812e-06, "loss": 0.001, "step": 62550 }, { "epoch": 1.023643949930459, "grad_norm": 0.09358217567205429, "learning_rate": 5.664361652014981e-06, "loss": 0.0019, "step": 62560 }, { "epoch": 1.0238075758815348, "grad_norm": 0.12662622332572937, "learning_rate": 5.662946380158767e-06, "loss": 0.0012, "step": 62570 }, { "epoch": 1.0239712018326106, "grad_norm": 0.061406973749399185, "learning_rate": 5.661531054234901e-06, "loss": 0.0018, "step": 62580 }, { "epoch": 1.0241348277836866, "grad_norm": 0.030233286321163177, "learning_rate": 5.6601156743588135e-06, "loss": 0.0008, "step": 62590 }, { "epoch": 1.0242984537347624, "grad_norm": 0.024516692385077477, "learning_rate": 5.658700240645938e-06, "loss": 0.0017, "step": 62600 }, { "epoch": 1.0244620796858381, "grad_norm": 0.20729252696037292, "learning_rate": 5.657284753211712e-06, "loss": 0.0022, "step": 62610 }, { "epoch": 1.024625705636914, "grad_norm": 0.02973029948771, "learning_rate": 5.655869212171577e-06, "loss": 0.0009, "step": 62620 }, { "epoch": 1.02478933158799, "grad_norm": 0.05634833872318268, "learning_rate": 5.65445361764098e-06, "loss": 0.001, "step": 62630 }, { "epoch": 1.0249529575390657, "grad_norm": 0.02505139261484146, "learning_rate": 5.6530379697353736e-06, "loss": 0.0011, "step": 62640 }, { "epoch": 1.0251165834901415, "grad_norm": 0.05938884988427162, "learning_rate": 5.651622268570212e-06, "loss": 0.0015, "step": 62650 }, { "epoch": 1.0252802094412174, "grad_norm": 0.06533920019865036, "learning_rate": 5.650206514260957e-06, "loss": 0.0037, "step": 62660 }, { "epoch": 1.0254438353922932, "grad_norm": 0.00967687088996172, "learning_rate": 5.6487907069230685e-06, "loss": 0.0023, "step": 62670 }, { "epoch": 1.025607461343369, "grad_norm": 0.04941129311919212, "learning_rate": 5.647374846672018e-06, "loss": 0.0012, "step": 62680 }, { "epoch": 1.025771087294445, "grad_norm": 0.14057283103466034, "learning_rate": 5.645958933623277e-06, "loss": 0.0009, "step": 62690 }, { "epoch": 1.0259347132455208, "grad_norm": 0.07917727530002594, "learning_rate": 5.644542967892323e-06, "loss": 0.0023, "step": 62700 }, { "epoch": 1.0260983391965965, "grad_norm": 0.06364534795284271, "learning_rate": 5.643126949594639e-06, "loss": 0.0016, "step": 62710 }, { "epoch": 1.0262619651476723, "grad_norm": 0.03275671228766441, "learning_rate": 5.641710878845708e-06, "loss": 0.0011, "step": 62720 }, { "epoch": 1.0264255910987483, "grad_norm": 0.028730599209666252, "learning_rate": 5.640294755761021e-06, "loss": 0.0016, "step": 62730 }, { "epoch": 1.026589217049824, "grad_norm": 0.05976053699851036, "learning_rate": 5.638878580456072e-06, "loss": 0.0012, "step": 62740 }, { "epoch": 1.0267528430008999, "grad_norm": 0.03755538910627365, "learning_rate": 5.63746235304636e-06, "loss": 0.0011, "step": 62750 }, { "epoch": 1.0269164689519759, "grad_norm": 0.1466907262802124, "learning_rate": 5.636046073647388e-06, "loss": 0.0016, "step": 62760 }, { "epoch": 1.0270800949030516, "grad_norm": 0.06574656814336777, "learning_rate": 5.634629742374661e-06, "loss": 0.001, "step": 62770 }, { "epoch": 1.0272437208541274, "grad_norm": 0.009138481691479683, "learning_rate": 5.6332133593436925e-06, "loss": 0.0028, "step": 62780 }, { "epoch": 1.0274073468052034, "grad_norm": 0.007317844778299332, "learning_rate": 5.631796924669996e-06, "loss": 0.0005, "step": 62790 }, { "epoch": 1.0275709727562792, "grad_norm": 0.023709839209914207, "learning_rate": 5.630380438469092e-06, "loss": 0.0014, "step": 62800 }, { "epoch": 1.027734598707355, "grad_norm": 0.0037192576564848423, "learning_rate": 5.628963900856503e-06, "loss": 0.0026, "step": 62810 }, { "epoch": 1.0278982246584307, "grad_norm": 0.15787558257579803, "learning_rate": 5.62754731194776e-06, "loss": 0.0016, "step": 62820 }, { "epoch": 1.0280618506095067, "grad_norm": 0.005675437394529581, "learning_rate": 5.626130671858393e-06, "loss": 0.0013, "step": 62830 }, { "epoch": 1.0282254765605825, "grad_norm": 0.1373959630727768, "learning_rate": 5.624713980703939e-06, "loss": 0.0016, "step": 62840 }, { "epoch": 1.0283891025116583, "grad_norm": 0.04313086345791817, "learning_rate": 5.623297238599936e-06, "loss": 0.0011, "step": 62850 }, { "epoch": 1.0285527284627343, "grad_norm": 0.0606301911175251, "learning_rate": 5.621880445661934e-06, "loss": 0.0021, "step": 62860 }, { "epoch": 1.02871635441381, "grad_norm": 0.06886827200651169, "learning_rate": 5.620463602005476e-06, "loss": 0.0018, "step": 62870 }, { "epoch": 1.0288799803648858, "grad_norm": 0.04967603459954262, "learning_rate": 5.619046707746118e-06, "loss": 0.0007, "step": 62880 }, { "epoch": 1.0290436063159618, "grad_norm": 0.028564803302288055, "learning_rate": 5.617629762999419e-06, "loss": 0.0016, "step": 62890 }, { "epoch": 1.0292072322670376, "grad_norm": 0.011786828748881817, "learning_rate": 5.616212767880936e-06, "loss": 0.0015, "step": 62900 }, { "epoch": 1.0293708582181134, "grad_norm": 0.11557739973068237, "learning_rate": 5.614795722506236e-06, "loss": 0.0017, "step": 62910 }, { "epoch": 1.0295344841691891, "grad_norm": 0.05231388285756111, "learning_rate": 5.613378626990889e-06, "loss": 0.002, "step": 62920 }, { "epoch": 1.0296981101202651, "grad_norm": 0.08543913066387177, "learning_rate": 5.611961481450468e-06, "loss": 0.0017, "step": 62930 }, { "epoch": 1.029861736071341, "grad_norm": 0.06034789979457855, "learning_rate": 5.610544286000552e-06, "loss": 0.0012, "step": 62940 }, { "epoch": 1.0300253620224167, "grad_norm": 0.03104674071073532, "learning_rate": 5.60912704075672e-06, "loss": 0.0012, "step": 62950 }, { "epoch": 1.0301889879734927, "grad_norm": 0.043217677623033524, "learning_rate": 5.6077097458345585e-06, "loss": 0.0013, "step": 62960 }, { "epoch": 1.0303526139245685, "grad_norm": 0.010800025425851345, "learning_rate": 5.606292401349659e-06, "loss": 0.0013, "step": 62970 }, { "epoch": 1.0305162398756442, "grad_norm": 0.00527599174529314, "learning_rate": 5.604875007417614e-06, "loss": 0.0012, "step": 62980 }, { "epoch": 1.0306798658267202, "grad_norm": 0.04410775750875473, "learning_rate": 5.6034575641540215e-06, "loss": 0.0015, "step": 62990 }, { "epoch": 1.030843491777796, "grad_norm": 0.02884807623922825, "learning_rate": 5.602040071674481e-06, "loss": 0.0013, "step": 63000 }, { "epoch": 1.0310071177288718, "grad_norm": 0.03039473108947277, "learning_rate": 5.600622530094603e-06, "loss": 0.0011, "step": 63010 }, { "epoch": 1.0311707436799475, "grad_norm": 0.046265389770269394, "learning_rate": 5.599204939529994e-06, "loss": 0.0023, "step": 63020 }, { "epoch": 1.0313343696310235, "grad_norm": 0.10824576765298843, "learning_rate": 5.597787300096269e-06, "loss": 0.0011, "step": 63030 }, { "epoch": 1.0314979955820993, "grad_norm": 0.08840864896774292, "learning_rate": 5.5963696119090445e-06, "loss": 0.0017, "step": 63040 }, { "epoch": 1.031661621533175, "grad_norm": 0.05065814033150673, "learning_rate": 5.594951875083945e-06, "loss": 0.0023, "step": 63050 }, { "epoch": 1.031825247484251, "grad_norm": 0.06369905173778534, "learning_rate": 5.5935340897365945e-06, "loss": 0.0009, "step": 63060 }, { "epoch": 1.0319888734353269, "grad_norm": 0.026613572612404823, "learning_rate": 5.592116255982622e-06, "loss": 0.0012, "step": 63070 }, { "epoch": 1.0321524993864026, "grad_norm": 0.07370531558990479, "learning_rate": 5.590698373937663e-06, "loss": 0.0038, "step": 63080 }, { "epoch": 1.0323161253374786, "grad_norm": 0.2020587921142578, "learning_rate": 5.589280443717354e-06, "loss": 0.0017, "step": 63090 }, { "epoch": 1.0324797512885544, "grad_norm": 0.11497752368450165, "learning_rate": 5.587862465437338e-06, "loss": 0.0015, "step": 63100 }, { "epoch": 1.0326433772396302, "grad_norm": 0.021445196121931076, "learning_rate": 5.586444439213259e-06, "loss": 0.0013, "step": 63110 }, { "epoch": 1.032807003190706, "grad_norm": 0.06567702442407608, "learning_rate": 5.585026365160766e-06, "loss": 0.0015, "step": 63120 }, { "epoch": 1.032970629141782, "grad_norm": 0.05859753116965294, "learning_rate": 5.583608243395513e-06, "loss": 0.0014, "step": 63130 }, { "epoch": 1.0331342550928577, "grad_norm": 0.1056719496846199, "learning_rate": 5.582190074033158e-06, "loss": 0.0014, "step": 63140 }, { "epoch": 1.0332978810439335, "grad_norm": 0.050497256219387054, "learning_rate": 5.580771857189361e-06, "loss": 0.0014, "step": 63150 }, { "epoch": 1.0334615069950095, "grad_norm": 0.07371681183576584, "learning_rate": 5.579353592979787e-06, "loss": 0.0018, "step": 63160 }, { "epoch": 1.0336251329460853, "grad_norm": 0.012518026866018772, "learning_rate": 5.577935281520106e-06, "loss": 0.0009, "step": 63170 }, { "epoch": 1.033788758897161, "grad_norm": 0.08353175222873688, "learning_rate": 5.576516922925988e-06, "loss": 0.0015, "step": 63180 }, { "epoch": 1.033952384848237, "grad_norm": 0.09575547277927399, "learning_rate": 5.575098517313113e-06, "loss": 0.0016, "step": 63190 }, { "epoch": 1.0341160107993128, "grad_norm": 0.07352940738201141, "learning_rate": 5.57368006479716e-06, "loss": 0.0013, "step": 63200 }, { "epoch": 1.0342796367503886, "grad_norm": 0.030067091807723045, "learning_rate": 5.572261565493812e-06, "loss": 0.0013, "step": 63210 }, { "epoch": 1.0344432627014644, "grad_norm": 0.0376623272895813, "learning_rate": 5.570843019518757e-06, "loss": 0.0012, "step": 63220 }, { "epoch": 1.0346068886525404, "grad_norm": 0.07964199781417847, "learning_rate": 5.569424426987688e-06, "loss": 0.0017, "step": 63230 }, { "epoch": 1.0347705146036161, "grad_norm": 0.04377982020378113, "learning_rate": 5.5680057880163e-06, "loss": 0.0013, "step": 63240 }, { "epoch": 1.034934140554692, "grad_norm": 0.06022028997540474, "learning_rate": 5.5665871027202925e-06, "loss": 0.0013, "step": 63250 }, { "epoch": 1.035097766505768, "grad_norm": 0.042465586215257645, "learning_rate": 5.5651683712153685e-06, "loss": 0.0016, "step": 63260 }, { "epoch": 1.0352613924568437, "grad_norm": 0.1468539535999298, "learning_rate": 5.563749593617235e-06, "loss": 0.0015, "step": 63270 }, { "epoch": 1.0354250184079195, "grad_norm": 0.15182700753211975, "learning_rate": 5.5623307700416026e-06, "loss": 0.0017, "step": 63280 }, { "epoch": 1.0355886443589952, "grad_norm": 0.053585443645715714, "learning_rate": 5.560911900604187e-06, "loss": 0.002, "step": 63290 }, { "epoch": 1.0357522703100712, "grad_norm": 0.041000962257385254, "learning_rate": 5.5594929854207045e-06, "loss": 0.0012, "step": 63300 }, { "epoch": 1.035915896261147, "grad_norm": 0.05506960302591324, "learning_rate": 5.558074024606878e-06, "loss": 0.0006, "step": 63310 }, { "epoch": 1.0360795222122228, "grad_norm": 0.03488294407725334, "learning_rate": 5.556655018278432e-06, "loss": 0.0019, "step": 63320 }, { "epoch": 1.0362431481632988, "grad_norm": 0.12159097194671631, "learning_rate": 5.555235966551097e-06, "loss": 0.0015, "step": 63330 }, { "epoch": 1.0364067741143745, "grad_norm": 0.2699704170227051, "learning_rate": 5.5538168695406046e-06, "loss": 0.0016, "step": 63340 }, { "epoch": 1.0365704000654503, "grad_norm": 0.06754027307033539, "learning_rate": 5.552397727362694e-06, "loss": 0.0014, "step": 63350 }, { "epoch": 1.0367340260165263, "grad_norm": 0.09161163866519928, "learning_rate": 5.550978540133103e-06, "loss": 0.0019, "step": 63360 }, { "epoch": 1.036897651967602, "grad_norm": 0.0479351170361042, "learning_rate": 5.549559307967578e-06, "loss": 0.0018, "step": 63370 }, { "epoch": 1.0370612779186779, "grad_norm": 0.05759137496352196, "learning_rate": 5.5481400309818645e-06, "loss": 0.001, "step": 63380 }, { "epoch": 1.0372249038697539, "grad_norm": 0.12185215204954147, "learning_rate": 5.546720709291715e-06, "loss": 0.0017, "step": 63390 }, { "epoch": 1.0373885298208296, "grad_norm": 0.05699262022972107, "learning_rate": 5.5453013430128855e-06, "loss": 0.0017, "step": 63400 }, { "epoch": 1.0375521557719054, "grad_norm": 0.10873331129550934, "learning_rate": 5.543881932261134e-06, "loss": 0.0011, "step": 63410 }, { "epoch": 1.0377157817229812, "grad_norm": 0.11183402687311172, "learning_rate": 5.542462477152222e-06, "loss": 0.002, "step": 63420 }, { "epoch": 1.0378794076740572, "grad_norm": 0.011344654485583305, "learning_rate": 5.541042977801915e-06, "loss": 0.0013, "step": 63430 }, { "epoch": 1.038043033625133, "grad_norm": 0.0033544001635164022, "learning_rate": 5.539623434325984e-06, "loss": 0.0009, "step": 63440 }, { "epoch": 1.0382066595762087, "grad_norm": 0.03782197833061218, "learning_rate": 5.538203846840202e-06, "loss": 0.0015, "step": 63450 }, { "epoch": 1.0383702855272847, "grad_norm": 0.08692289143800735, "learning_rate": 5.5367842154603456e-06, "loss": 0.0015, "step": 63460 }, { "epoch": 1.0385339114783605, "grad_norm": 0.22961123287677765, "learning_rate": 5.535364540302195e-06, "loss": 0.0045, "step": 63470 }, { "epoch": 1.0386975374294363, "grad_norm": 0.07950034737586975, "learning_rate": 5.5339448214815335e-06, "loss": 0.001, "step": 63480 }, { "epoch": 1.038861163380512, "grad_norm": 0.024478962644934654, "learning_rate": 5.53252505911415e-06, "loss": 0.0015, "step": 63490 }, { "epoch": 1.039024789331588, "grad_norm": 0.06223538517951965, "learning_rate": 5.531105253315833e-06, "loss": 0.0031, "step": 63500 }, { "epoch": 1.0391884152826638, "grad_norm": 0.007910685613751411, "learning_rate": 5.52968540420238e-06, "loss": 0.0016, "step": 63510 }, { "epoch": 1.0393520412337396, "grad_norm": 0.08627380430698395, "learning_rate": 5.528265511889585e-06, "loss": 0.0009, "step": 63520 }, { "epoch": 1.0395156671848156, "grad_norm": 0.22251693904399872, "learning_rate": 5.526845576493255e-06, "loss": 0.0022, "step": 63530 }, { "epoch": 1.0396792931358914, "grad_norm": 0.05365417152643204, "learning_rate": 5.525425598129191e-06, "loss": 0.0018, "step": 63540 }, { "epoch": 1.0398429190869671, "grad_norm": 0.05744471400976181, "learning_rate": 5.524005576913203e-06, "loss": 0.0009, "step": 63550 }, { "epoch": 1.0400065450380431, "grad_norm": 0.08375339210033417, "learning_rate": 5.522585512961103e-06, "loss": 0.0046, "step": 63560 }, { "epoch": 1.040170170989119, "grad_norm": 0.04766619950532913, "learning_rate": 5.521165406388707e-06, "loss": 0.0005, "step": 63570 }, { "epoch": 1.0403337969401947, "grad_norm": 0.04508169740438461, "learning_rate": 5.519745257311833e-06, "loss": 0.004, "step": 63580 }, { "epoch": 1.0404974228912705, "grad_norm": 0.0260345246642828, "learning_rate": 5.518325065846304e-06, "loss": 0.0019, "step": 63590 }, { "epoch": 1.0406610488423464, "grad_norm": 0.05305233970284462, "learning_rate": 5.516904832107947e-06, "loss": 0.0015, "step": 63600 }, { "epoch": 1.0408246747934222, "grad_norm": 0.010698510333895683, "learning_rate": 5.5154845562125894e-06, "loss": 0.0008, "step": 63610 }, { "epoch": 1.040988300744498, "grad_norm": 0.2244066596031189, "learning_rate": 5.514064238276067e-06, "loss": 0.0008, "step": 63620 }, { "epoch": 1.041151926695574, "grad_norm": 0.04117923602461815, "learning_rate": 5.512643878414213e-06, "loss": 0.0013, "step": 63630 }, { "epoch": 1.0413155526466498, "grad_norm": 0.05810079723596573, "learning_rate": 5.511223476742868e-06, "loss": 0.002, "step": 63640 }, { "epoch": 1.0414791785977255, "grad_norm": 0.01418503001332283, "learning_rate": 5.509803033377876e-06, "loss": 0.0022, "step": 63650 }, { "epoch": 1.0416428045488015, "grad_norm": 0.11075062304735184, "learning_rate": 5.508382548435084e-06, "loss": 0.0011, "step": 63660 }, { "epoch": 1.0418064304998773, "grad_norm": 0.032910291105508804, "learning_rate": 5.5069620220303395e-06, "loss": 0.0029, "step": 63670 }, { "epoch": 1.041970056450953, "grad_norm": 0.01531054824590683, "learning_rate": 5.505541454279498e-06, "loss": 0.0009, "step": 63680 }, { "epoch": 1.0421336824020289, "grad_norm": 0.02331588603556156, "learning_rate": 5.504120845298416e-06, "loss": 0.0023, "step": 63690 }, { "epoch": 1.0422973083531049, "grad_norm": 0.07314090430736542, "learning_rate": 5.502700195202952e-06, "loss": 0.0013, "step": 63700 }, { "epoch": 1.0424609343041806, "grad_norm": 0.08812125027179718, "learning_rate": 5.501279504108972e-06, "loss": 0.0026, "step": 63710 }, { "epoch": 1.0426245602552564, "grad_norm": 0.04878391698002815, "learning_rate": 5.49985877213234e-06, "loss": 0.0019, "step": 63720 }, { "epoch": 1.0427881862063324, "grad_norm": 0.0049498737789690495, "learning_rate": 5.498437999388927e-06, "loss": 0.0014, "step": 63730 }, { "epoch": 1.0429518121574082, "grad_norm": 0.00988365150988102, "learning_rate": 5.497017185994607e-06, "loss": 0.0018, "step": 63740 }, { "epoch": 1.043115438108484, "grad_norm": 0.02225433476269245, "learning_rate": 5.4955963320652565e-06, "loss": 0.002, "step": 63750 }, { "epoch": 1.04327906405956, "grad_norm": 0.016612669453024864, "learning_rate": 5.4941754377167555e-06, "loss": 0.0015, "step": 63760 }, { "epoch": 1.0434426900106357, "grad_norm": 0.16139495372772217, "learning_rate": 5.492754503064987e-06, "loss": 0.0027, "step": 63770 }, { "epoch": 1.0436063159617115, "grad_norm": 0.017139604315161705, "learning_rate": 5.4913335282258375e-06, "loss": 0.002, "step": 63780 }, { "epoch": 1.0437699419127873, "grad_norm": 0.07805650681257248, "learning_rate": 5.489912513315197e-06, "loss": 0.0042, "step": 63790 }, { "epoch": 1.0439335678638633, "grad_norm": 0.06536448001861572, "learning_rate": 5.488491458448958e-06, "loss": 0.001, "step": 63800 }, { "epoch": 1.044097193814939, "grad_norm": 0.027559954673051834, "learning_rate": 5.487070363743019e-06, "loss": 0.0015, "step": 63810 }, { "epoch": 1.0442608197660148, "grad_norm": 0.049371443688869476, "learning_rate": 5.485649229313277e-06, "loss": 0.0008, "step": 63820 }, { "epoch": 1.0444244457170908, "grad_norm": 0.05635242909193039, "learning_rate": 5.484228055275638e-06, "loss": 0.0017, "step": 63830 }, { "epoch": 1.0445880716681666, "grad_norm": 0.02139674313366413, "learning_rate": 5.482806841746005e-06, "loss": 0.0008, "step": 63840 }, { "epoch": 1.0447516976192424, "grad_norm": 0.03042641654610634, "learning_rate": 5.4813855888402876e-06, "loss": 0.0012, "step": 63850 }, { "epoch": 1.0449153235703184, "grad_norm": 0.09243849664926529, "learning_rate": 5.479964296674402e-06, "loss": 0.0017, "step": 63860 }, { "epoch": 1.0450789495213941, "grad_norm": 0.037908848375082016, "learning_rate": 5.47854296536426e-06, "loss": 0.0017, "step": 63870 }, { "epoch": 1.04524257547247, "grad_norm": 0.04545162618160248, "learning_rate": 5.4771215950257804e-06, "loss": 0.002, "step": 63880 }, { "epoch": 1.0454062014235457, "grad_norm": 0.013800349086523056, "learning_rate": 5.4757001857748895e-06, "loss": 0.0014, "step": 63890 }, { "epoch": 1.0455698273746217, "grad_norm": 0.054980698972940445, "learning_rate": 5.474278737727508e-06, "loss": 0.0013, "step": 63900 }, { "epoch": 1.0457334533256974, "grad_norm": 0.13138845562934875, "learning_rate": 5.472857250999567e-06, "loss": 0.0022, "step": 63910 }, { "epoch": 1.0458970792767732, "grad_norm": 0.02803453616797924, "learning_rate": 5.471435725706997e-06, "loss": 0.0012, "step": 63920 }, { "epoch": 1.0460607052278492, "grad_norm": 0.08206098526716232, "learning_rate": 5.470014161965735e-06, "loss": 0.002, "step": 63930 }, { "epoch": 1.046224331178925, "grad_norm": 0.038597412407398224, "learning_rate": 5.468592559891714e-06, "loss": 0.0037, "step": 63940 }, { "epoch": 1.0463879571300008, "grad_norm": 0.06735596060752869, "learning_rate": 5.467170919600882e-06, "loss": 0.0023, "step": 63950 }, { "epoch": 1.0465515830810768, "grad_norm": 0.08162075281143188, "learning_rate": 5.465749241209176e-06, "loss": 0.0023, "step": 63960 }, { "epoch": 1.0467152090321525, "grad_norm": 0.024629533290863037, "learning_rate": 5.464327524832548e-06, "loss": 0.0012, "step": 63970 }, { "epoch": 1.0468788349832283, "grad_norm": 0.05165449529886246, "learning_rate": 5.462905770586946e-06, "loss": 0.0015, "step": 63980 }, { "epoch": 1.047042460934304, "grad_norm": 0.03659361973404884, "learning_rate": 5.461483978588325e-06, "loss": 0.002, "step": 63990 }, { "epoch": 1.04720608688538, "grad_norm": 0.027760332450270653, "learning_rate": 5.460062148952641e-06, "loss": 0.0018, "step": 64000 }, { "epoch": 1.0473697128364559, "grad_norm": 0.06101588159799576, "learning_rate": 5.4586402817958514e-06, "loss": 0.0011, "step": 64010 }, { "epoch": 1.0475333387875316, "grad_norm": 0.054217416793107986, "learning_rate": 5.457218377233923e-06, "loss": 0.0019, "step": 64020 }, { "epoch": 1.0476969647386076, "grad_norm": 0.039044298231601715, "learning_rate": 5.4557964353828185e-06, "loss": 0.0008, "step": 64030 }, { "epoch": 1.0478605906896834, "grad_norm": 0.08019902557134628, "learning_rate": 5.454374456358508e-06, "loss": 0.0013, "step": 64040 }, { "epoch": 1.0480242166407592, "grad_norm": 0.05514813959598541, "learning_rate": 5.4529524402769615e-06, "loss": 0.001, "step": 64050 }, { "epoch": 1.048187842591835, "grad_norm": 0.049595434218645096, "learning_rate": 5.451530387254157e-06, "loss": 0.002, "step": 64060 }, { "epoch": 1.048351468542911, "grad_norm": 0.03588667884469032, "learning_rate": 5.450108297406069e-06, "loss": 0.0012, "step": 64070 }, { "epoch": 1.0485150944939867, "grad_norm": 0.09109651297330856, "learning_rate": 5.448686170848679e-06, "loss": 0.0016, "step": 64080 }, { "epoch": 1.0486787204450625, "grad_norm": 0.0289768036454916, "learning_rate": 5.447264007697973e-06, "loss": 0.0007, "step": 64090 }, { "epoch": 1.0488423463961385, "grad_norm": 0.08702289313077927, "learning_rate": 5.445841808069936e-06, "loss": 0.0029, "step": 64100 }, { "epoch": 1.0490059723472143, "grad_norm": 0.03332599997520447, "learning_rate": 5.444419572080557e-06, "loss": 0.0028, "step": 64110 }, { "epoch": 1.04916959829829, "grad_norm": 0.06822742521762848, "learning_rate": 5.442997299845832e-06, "loss": 0.0012, "step": 64120 }, { "epoch": 1.049333224249366, "grad_norm": 0.03312753140926361, "learning_rate": 5.441574991481752e-06, "loss": 0.0008, "step": 64130 }, { "epoch": 1.0494968502004418, "grad_norm": 0.03403471037745476, "learning_rate": 5.440152647104319e-06, "loss": 0.0012, "step": 64140 }, { "epoch": 1.0496604761515176, "grad_norm": 0.07530993968248367, "learning_rate": 5.438730266829533e-06, "loss": 0.0016, "step": 64150 }, { "epoch": 1.0498241021025936, "grad_norm": 0.042741235345602036, "learning_rate": 5.4373078507734e-06, "loss": 0.0009, "step": 64160 }, { "epoch": 1.0499877280536694, "grad_norm": 0.03204313665628433, "learning_rate": 5.435885399051926e-06, "loss": 0.0016, "step": 64170 }, { "epoch": 1.0501513540047451, "grad_norm": 0.04970650374889374, "learning_rate": 5.434462911781122e-06, "loss": 0.0019, "step": 64180 }, { "epoch": 1.050314979955821, "grad_norm": 0.10810843110084534, "learning_rate": 5.433040389077001e-06, "loss": 0.0014, "step": 64190 }, { "epoch": 1.050478605906897, "grad_norm": 0.009705748409032822, "learning_rate": 5.43161783105558e-06, "loss": 0.0019, "step": 64200 }, { "epoch": 1.0506422318579727, "grad_norm": 0.1793215125799179, "learning_rate": 5.430195237832876e-06, "loss": 0.0019, "step": 64210 }, { "epoch": 1.0508058578090484, "grad_norm": 0.1737505942583084, "learning_rate": 5.428772609524913e-06, "loss": 0.002, "step": 64220 }, { "epoch": 1.0509694837601244, "grad_norm": 0.05152524635195732, "learning_rate": 5.427349946247714e-06, "loss": 0.001, "step": 64230 }, { "epoch": 1.0511331097112002, "grad_norm": 0.034666758030653, "learning_rate": 5.425927248117308e-06, "loss": 0.0013, "step": 64240 }, { "epoch": 1.051296735662276, "grad_norm": 0.05017232149839401, "learning_rate": 5.424504515249725e-06, "loss": 0.0013, "step": 64250 }, { "epoch": 1.0514603616133518, "grad_norm": 0.06550250202417374, "learning_rate": 5.423081747760996e-06, "loss": 0.0016, "step": 64260 }, { "epoch": 1.0516239875644278, "grad_norm": 0.10068517923355103, "learning_rate": 5.421658945767163e-06, "loss": 0.0017, "step": 64270 }, { "epoch": 1.0517876135155035, "grad_norm": 0.056817881762981415, "learning_rate": 5.420236109384258e-06, "loss": 0.0014, "step": 64280 }, { "epoch": 1.0519512394665793, "grad_norm": 0.052699845284223557, "learning_rate": 5.418813238728327e-06, "loss": 0.0015, "step": 64290 }, { "epoch": 1.0521148654176553, "grad_norm": 0.13146892189979553, "learning_rate": 5.417390333915412e-06, "loss": 0.0019, "step": 64300 }, { "epoch": 1.052278491368731, "grad_norm": 0.04212433099746704, "learning_rate": 5.415967395061562e-06, "loss": 0.0018, "step": 64310 }, { "epoch": 1.0524421173198069, "grad_norm": 0.031882937997579575, "learning_rate": 5.414544422282826e-06, "loss": 0.001, "step": 64320 }, { "epoch": 1.0526057432708829, "grad_norm": 0.010787849314510822, "learning_rate": 5.413121415695258e-06, "loss": 0.0019, "step": 64330 }, { "epoch": 1.0527693692219586, "grad_norm": 0.04263931140303612, "learning_rate": 5.411698375414913e-06, "loss": 0.001, "step": 64340 }, { "epoch": 1.0529329951730344, "grad_norm": 0.0920867994427681, "learning_rate": 5.410275301557849e-06, "loss": 0.0016, "step": 64350 }, { "epoch": 1.0530966211241102, "grad_norm": 0.12725453078746796, "learning_rate": 5.408852194240127e-06, "loss": 0.0013, "step": 64360 }, { "epoch": 1.0532602470751862, "grad_norm": 0.014359544031322002, "learning_rate": 5.407429053577811e-06, "loss": 0.0014, "step": 64370 }, { "epoch": 1.053423873026262, "grad_norm": 0.06531564146280289, "learning_rate": 5.406005879686968e-06, "loss": 0.0008, "step": 64380 }, { "epoch": 1.0535874989773377, "grad_norm": 0.0077211507596075535, "learning_rate": 5.404582672683667e-06, "loss": 0.0014, "step": 64390 }, { "epoch": 1.0537511249284137, "grad_norm": 0.1500065177679062, "learning_rate": 5.403159432683979e-06, "loss": 0.0014, "step": 64400 }, { "epoch": 1.0539147508794895, "grad_norm": 0.06877102702856064, "learning_rate": 5.40173615980398e-06, "loss": 0.0018, "step": 64410 }, { "epoch": 1.0540783768305653, "grad_norm": 0.06654974818229675, "learning_rate": 5.400312854159746e-06, "loss": 0.0018, "step": 64420 }, { "epoch": 1.0542420027816413, "grad_norm": 0.0903286561369896, "learning_rate": 5.398889515867358e-06, "loss": 0.0013, "step": 64430 }, { "epoch": 1.054405628732717, "grad_norm": 0.044767577201128006, "learning_rate": 5.397466145042898e-06, "loss": 0.0013, "step": 64440 }, { "epoch": 1.0545692546837928, "grad_norm": 0.04485725238919258, "learning_rate": 5.3960427418024515e-06, "loss": 0.0015, "step": 64450 }, { "epoch": 1.0547328806348686, "grad_norm": 0.10228991508483887, "learning_rate": 5.394619306262106e-06, "loss": 0.0014, "step": 64460 }, { "epoch": 1.0548965065859446, "grad_norm": 0.029804689809679985, "learning_rate": 5.393195838537954e-06, "loss": 0.0015, "step": 64470 }, { "epoch": 1.0550601325370204, "grad_norm": 0.02489466592669487, "learning_rate": 5.391772338746086e-06, "loss": 0.0016, "step": 64480 }, { "epoch": 1.0552237584880961, "grad_norm": 0.05016709491610527, "learning_rate": 5.390348807002599e-06, "loss": 0.0008, "step": 64490 }, { "epoch": 1.0553873844391721, "grad_norm": 0.07112365961074829, "learning_rate": 5.388925243423591e-06, "loss": 0.001, "step": 64500 }, { "epoch": 1.055551010390248, "grad_norm": 0.04656639322638512, "learning_rate": 5.387501648125165e-06, "loss": 0.0011, "step": 64510 }, { "epoch": 1.0557146363413237, "grad_norm": 0.12064625322818756, "learning_rate": 5.386078021223422e-06, "loss": 0.0028, "step": 64520 }, { "epoch": 1.0558782622923997, "grad_norm": 0.05928665027022362, "learning_rate": 5.384654362834469e-06, "loss": 0.0017, "step": 64530 }, { "epoch": 1.0560418882434754, "grad_norm": 0.06275463104248047, "learning_rate": 5.3832306730744146e-06, "loss": 0.0012, "step": 64540 }, { "epoch": 1.0562055141945512, "grad_norm": 0.07323262840509415, "learning_rate": 5.381806952059371e-06, "loss": 0.0015, "step": 64550 }, { "epoch": 1.056369140145627, "grad_norm": 0.0842098593711853, "learning_rate": 5.380383199905451e-06, "loss": 0.0016, "step": 64560 }, { "epoch": 1.056532766096703, "grad_norm": 0.09260202199220657, "learning_rate": 5.378959416728772e-06, "loss": 0.0021, "step": 64570 }, { "epoch": 1.0566963920477788, "grad_norm": 0.07597813755273819, "learning_rate": 5.377535602645452e-06, "loss": 0.0018, "step": 64580 }, { "epoch": 1.0568600179988545, "grad_norm": 0.10727567970752716, "learning_rate": 5.3761117577716126e-06, "loss": 0.0016, "step": 64590 }, { "epoch": 1.0570236439499305, "grad_norm": 0.04645923525094986, "learning_rate": 5.374687882223378e-06, "loss": 0.0011, "step": 64600 }, { "epoch": 1.0571872699010063, "grad_norm": 0.07954151183366776, "learning_rate": 5.373263976116875e-06, "loss": 0.0012, "step": 64610 }, { "epoch": 1.057350895852082, "grad_norm": 0.09331540018320084, "learning_rate": 5.371840039568231e-06, "loss": 0.0015, "step": 64620 }, { "epoch": 1.057514521803158, "grad_norm": 0.028710681945085526, "learning_rate": 5.3704160726935795e-06, "loss": 0.0019, "step": 64630 }, { "epoch": 1.0576781477542339, "grad_norm": 0.0997256487607956, "learning_rate": 5.368992075609052e-06, "loss": 0.0015, "step": 64640 }, { "epoch": 1.0578417737053096, "grad_norm": 0.05940896272659302, "learning_rate": 5.367568048430787e-06, "loss": 0.0015, "step": 64650 }, { "epoch": 1.0580053996563854, "grad_norm": 0.06382996588945389, "learning_rate": 5.3661439912749225e-06, "loss": 0.0014, "step": 64660 }, { "epoch": 1.0581690256074614, "grad_norm": 0.06846608966588974, "learning_rate": 5.3647199042576e-06, "loss": 0.0011, "step": 64670 }, { "epoch": 1.0583326515585372, "grad_norm": 0.03451616317033768, "learning_rate": 5.363295787494963e-06, "loss": 0.0019, "step": 64680 }, { "epoch": 1.058496277509613, "grad_norm": 0.1504979431629181, "learning_rate": 5.361871641103158e-06, "loss": 0.002, "step": 64690 }, { "epoch": 1.058659903460689, "grad_norm": 0.0033024942968040705, "learning_rate": 5.360447465198332e-06, "loss": 0.0013, "step": 64700 }, { "epoch": 1.0588235294117647, "grad_norm": 0.03498594090342522, "learning_rate": 5.359023259896638e-06, "loss": 0.001, "step": 64710 }, { "epoch": 1.0589871553628405, "grad_norm": 0.04379526153206825, "learning_rate": 5.357599025314228e-06, "loss": 0.0013, "step": 64720 }, { "epoch": 1.0591507813139165, "grad_norm": 0.07929637283086777, "learning_rate": 5.356174761567259e-06, "loss": 0.0009, "step": 64730 }, { "epoch": 1.0593144072649923, "grad_norm": 0.08806032687425613, "learning_rate": 5.354750468771886e-06, "loss": 0.001, "step": 64740 }, { "epoch": 1.059478033216068, "grad_norm": 0.05688506364822388, "learning_rate": 5.3533261470442746e-06, "loss": 0.0019, "step": 64750 }, { "epoch": 1.0596416591671438, "grad_norm": 0.01541043072938919, "learning_rate": 5.351901796500583e-06, "loss": 0.0015, "step": 64760 }, { "epoch": 1.0598052851182198, "grad_norm": 0.04236844554543495, "learning_rate": 5.350477417256978e-06, "loss": 0.0014, "step": 64770 }, { "epoch": 1.0599689110692956, "grad_norm": 0.0074849361553788185, "learning_rate": 5.349053009429629e-06, "loss": 0.0019, "step": 64780 }, { "epoch": 1.0601325370203714, "grad_norm": 0.08951664716005325, "learning_rate": 5.3476285731347024e-06, "loss": 0.0013, "step": 64790 }, { "epoch": 1.0602961629714474, "grad_norm": 0.012588960118591785, "learning_rate": 5.346204108488373e-06, "loss": 0.0015, "step": 64800 }, { "epoch": 1.0604597889225231, "grad_norm": 0.08123738318681717, "learning_rate": 5.344779615606815e-06, "loss": 0.002, "step": 64810 }, { "epoch": 1.060623414873599, "grad_norm": 0.004824167117476463, "learning_rate": 5.343355094606203e-06, "loss": 0.0028, "step": 64820 }, { "epoch": 1.060787040824675, "grad_norm": 0.09063207358121872, "learning_rate": 5.341930545602718e-06, "loss": 0.0013, "step": 64830 }, { "epoch": 1.0609506667757507, "grad_norm": 0.13622954487800598, "learning_rate": 5.3405059687125415e-06, "loss": 0.0023, "step": 64840 }, { "epoch": 1.0611142927268264, "grad_norm": 0.03379924222826958, "learning_rate": 5.339081364051857e-06, "loss": 0.0021, "step": 64850 }, { "epoch": 1.0612779186779022, "grad_norm": 0.03544510528445244, "learning_rate": 5.337656731736851e-06, "loss": 0.0019, "step": 64860 }, { "epoch": 1.0614415446289782, "grad_norm": 0.04377711936831474, "learning_rate": 5.336232071883709e-06, "loss": 0.0012, "step": 64870 }, { "epoch": 1.061605170580054, "grad_norm": 0.04801655933260918, "learning_rate": 5.3348073846086235e-06, "loss": 0.0009, "step": 64880 }, { "epoch": 1.0617687965311298, "grad_norm": 0.08826633542776108, "learning_rate": 5.3333826700277866e-06, "loss": 0.002, "step": 64890 }, { "epoch": 1.0619324224822058, "grad_norm": 0.04082094132900238, "learning_rate": 5.3319579282573945e-06, "loss": 0.0014, "step": 64900 }, { "epoch": 1.0620960484332815, "grad_norm": 0.280418336391449, "learning_rate": 5.330533159413642e-06, "loss": 0.0014, "step": 64910 }, { "epoch": 1.0622596743843573, "grad_norm": 0.038144439458847046, "learning_rate": 5.32910836361273e-06, "loss": 0.0013, "step": 64920 }, { "epoch": 1.0624233003354333, "grad_norm": 0.07828328013420105, "learning_rate": 5.3276835409708605e-06, "loss": 0.001, "step": 64930 }, { "epoch": 1.062586926286509, "grad_norm": 0.028683684766292572, "learning_rate": 5.326258691604235e-06, "loss": 0.0013, "step": 64940 }, { "epoch": 1.0627505522375849, "grad_norm": 0.07371421158313751, "learning_rate": 5.3248338156290615e-06, "loss": 0.0027, "step": 64950 }, { "epoch": 1.0629141781886606, "grad_norm": 0.03749288618564606, "learning_rate": 5.323408913161546e-06, "loss": 0.0015, "step": 64960 }, { "epoch": 1.0630778041397366, "grad_norm": 0.06767037510871887, "learning_rate": 5.321983984317901e-06, "loss": 0.0016, "step": 64970 }, { "epoch": 1.0632414300908124, "grad_norm": 0.018412822857499123, "learning_rate": 5.320559029214338e-06, "loss": 0.0015, "step": 64980 }, { "epoch": 1.0634050560418882, "grad_norm": 0.07942145317792892, "learning_rate": 5.319134047967072e-06, "loss": 0.0014, "step": 64990 }, { "epoch": 1.0635686819929642, "grad_norm": 0.12868405878543854, "learning_rate": 5.317709040692316e-06, "loss": 0.0016, "step": 65000 }, { "epoch": 1.06373230794404, "grad_norm": 0.05411757901310921, "learning_rate": 5.316284007506294e-06, "loss": 0.0019, "step": 65010 }, { "epoch": 1.0638959338951157, "grad_norm": 0.05384686216711998, "learning_rate": 5.314858948525223e-06, "loss": 0.0019, "step": 65020 }, { "epoch": 1.0640595598461915, "grad_norm": 0.017097443342208862, "learning_rate": 5.313433863865328e-06, "loss": 0.0018, "step": 65030 }, { "epoch": 1.0642231857972675, "grad_norm": 0.0339205339550972, "learning_rate": 5.312008753642834e-06, "loss": 0.003, "step": 65040 }, { "epoch": 1.0643868117483433, "grad_norm": 0.044893499463796616, "learning_rate": 5.310583617973966e-06, "loss": 0.0012, "step": 65050 }, { "epoch": 1.064550437699419, "grad_norm": 0.044282782822847366, "learning_rate": 5.309158456974955e-06, "loss": 0.0012, "step": 65060 }, { "epoch": 1.064714063650495, "grad_norm": 0.03950934112071991, "learning_rate": 5.307733270762033e-06, "loss": 0.0013, "step": 65070 }, { "epoch": 1.0648776896015708, "grad_norm": 0.0735439881682396, "learning_rate": 5.306308059451433e-06, "loss": 0.0016, "step": 65080 }, { "epoch": 1.0650413155526466, "grad_norm": 0.014367000199854374, "learning_rate": 5.304882823159389e-06, "loss": 0.0008, "step": 65090 }, { "epoch": 1.0652049415037226, "grad_norm": 0.12071038782596588, "learning_rate": 5.303457562002139e-06, "loss": 0.0019, "step": 65100 }, { "epoch": 1.0653685674547984, "grad_norm": 0.07662677019834518, "learning_rate": 5.302032276095923e-06, "loss": 0.0009, "step": 65110 }, { "epoch": 1.0655321934058741, "grad_norm": 0.03474804386496544, "learning_rate": 5.300606965556983e-06, "loss": 0.0028, "step": 65120 }, { "epoch": 1.0656958193569501, "grad_norm": 0.07875080406665802, "learning_rate": 5.29918163050156e-06, "loss": 0.0009, "step": 65130 }, { "epoch": 1.065859445308026, "grad_norm": 0.062419354915618896, "learning_rate": 5.297756271045902e-06, "loss": 0.0018, "step": 65140 }, { "epoch": 1.0660230712591017, "grad_norm": 0.024496397003531456, "learning_rate": 5.296330887306256e-06, "loss": 0.0017, "step": 65150 }, { "epoch": 1.0661866972101774, "grad_norm": 0.07439788430929184, "learning_rate": 5.2949054793988695e-06, "loss": 0.0015, "step": 65160 }, { "epoch": 1.0663503231612534, "grad_norm": 0.020498117431998253, "learning_rate": 5.2934800474399976e-06, "loss": 0.0011, "step": 65170 }, { "epoch": 1.0665139491123292, "grad_norm": 0.12638750672340393, "learning_rate": 5.29205459154589e-06, "loss": 0.0026, "step": 65180 }, { "epoch": 1.066677575063405, "grad_norm": 0.09402655810117722, "learning_rate": 5.290629111832804e-06, "loss": 0.002, "step": 65190 }, { "epoch": 1.066841201014481, "grad_norm": 0.02262144722044468, "learning_rate": 5.2892036084169965e-06, "loss": 0.0015, "step": 65200 }, { "epoch": 1.0670048269655568, "grad_norm": 0.14120560884475708, "learning_rate": 5.287778081414726e-06, "loss": 0.0009, "step": 65210 }, { "epoch": 1.0671684529166325, "grad_norm": 0.03046940267086029, "learning_rate": 5.286352530942255e-06, "loss": 0.0016, "step": 65220 }, { "epoch": 1.0673320788677083, "grad_norm": 0.05819351598620415, "learning_rate": 5.284926957115846e-06, "loss": 0.0016, "step": 65230 }, { "epoch": 1.0674957048187843, "grad_norm": 0.13080142438411713, "learning_rate": 5.283501360051764e-06, "loss": 0.0011, "step": 65240 }, { "epoch": 1.06765933076986, "grad_norm": 0.008926796726882458, "learning_rate": 5.282075739866275e-06, "loss": 0.0023, "step": 65250 }, { "epoch": 1.0678229567209359, "grad_norm": 0.09713287651538849, "learning_rate": 5.280650096675648e-06, "loss": 0.002, "step": 65260 }, { "epoch": 1.0679865826720119, "grad_norm": 0.06894666701555252, "learning_rate": 5.279224430596155e-06, "loss": 0.0014, "step": 65270 }, { "epoch": 1.0681502086230876, "grad_norm": 0.1052623763680458, "learning_rate": 5.277798741744066e-06, "loss": 0.002, "step": 65280 }, { "epoch": 1.0683138345741634, "grad_norm": 0.10642183572053909, "learning_rate": 5.276373030235658e-06, "loss": 0.0018, "step": 65290 }, { "epoch": 1.0684774605252394, "grad_norm": 0.11881567537784576, "learning_rate": 5.274947296187205e-06, "loss": 0.0023, "step": 65300 }, { "epoch": 1.0686410864763152, "grad_norm": 0.09369634091854095, "learning_rate": 5.273521539714989e-06, "loss": 0.002, "step": 65310 }, { "epoch": 1.068804712427391, "grad_norm": 0.07297854870557785, "learning_rate": 5.272095760935284e-06, "loss": 0.0018, "step": 65320 }, { "epoch": 1.068968338378467, "grad_norm": 0.2970001697540283, "learning_rate": 5.270669959964374e-06, "loss": 0.0043, "step": 65330 }, { "epoch": 1.0691319643295427, "grad_norm": 0.07514619827270508, "learning_rate": 5.269244136918544e-06, "loss": 0.0024, "step": 65340 }, { "epoch": 1.0692955902806185, "grad_norm": 0.0616283044219017, "learning_rate": 5.267818291914078e-06, "loss": 0.0012, "step": 65350 }, { "epoch": 1.0694592162316943, "grad_norm": 0.027152765542268753, "learning_rate": 5.266392425067264e-06, "loss": 0.0012, "step": 65360 }, { "epoch": 1.0696228421827703, "grad_norm": 0.04551069438457489, "learning_rate": 5.2649665364943896e-06, "loss": 0.0013, "step": 65370 }, { "epoch": 1.069786468133846, "grad_norm": 0.055360324680805206, "learning_rate": 5.2635406263117465e-06, "loss": 0.0015, "step": 65380 }, { "epoch": 1.0699500940849218, "grad_norm": 0.04829278588294983, "learning_rate": 5.262114694635625e-06, "loss": 0.0018, "step": 65390 }, { "epoch": 1.0701137200359978, "grad_norm": 0.016820231452584267, "learning_rate": 5.2606887415823225e-06, "loss": 0.0016, "step": 65400 }, { "epoch": 1.0702773459870736, "grad_norm": 0.05498622730374336, "learning_rate": 5.259262767268132e-06, "loss": 0.0016, "step": 65410 }, { "epoch": 1.0704409719381494, "grad_norm": 0.023811891674995422, "learning_rate": 5.257836771809352e-06, "loss": 0.0029, "step": 65420 }, { "epoch": 1.0706045978892251, "grad_norm": 0.06769300252199173, "learning_rate": 5.2564107553222845e-06, "loss": 0.002, "step": 65430 }, { "epoch": 1.0707682238403011, "grad_norm": 0.06867410242557526, "learning_rate": 5.254984717923226e-06, "loss": 0.0016, "step": 65440 }, { "epoch": 1.070931849791377, "grad_norm": 0.015183850191533566, "learning_rate": 5.253558659728482e-06, "loss": 0.001, "step": 65450 }, { "epoch": 1.0710954757424527, "grad_norm": 0.05597613379359245, "learning_rate": 5.2521325808543565e-06, "loss": 0.0016, "step": 65460 }, { "epoch": 1.0712591016935287, "grad_norm": 0.10989191383123398, "learning_rate": 5.250706481417156e-06, "loss": 0.0024, "step": 65470 }, { "epoch": 1.0714227276446044, "grad_norm": 0.0691683441400528, "learning_rate": 5.249280361533187e-06, "loss": 0.0014, "step": 65480 }, { "epoch": 1.0715863535956802, "grad_norm": 0.026200102642178535, "learning_rate": 5.24785422131876e-06, "loss": 0.0011, "step": 65490 }, { "epoch": 1.0717499795467562, "grad_norm": 0.033837590366601944, "learning_rate": 5.246428060890186e-06, "loss": 0.0019, "step": 65500 }, { "epoch": 1.071913605497832, "grad_norm": 0.01459808275103569, "learning_rate": 5.2450018803637784e-06, "loss": 0.0018, "step": 65510 }, { "epoch": 1.0720772314489078, "grad_norm": 0.14792582392692566, "learning_rate": 5.24357567985585e-06, "loss": 0.0014, "step": 65520 }, { "epoch": 1.0722408573999835, "grad_norm": 0.04514022544026375, "learning_rate": 5.242149459482718e-06, "loss": 0.0011, "step": 65530 }, { "epoch": 1.0724044833510595, "grad_norm": 0.0576908253133297, "learning_rate": 5.240723219360701e-06, "loss": 0.0015, "step": 65540 }, { "epoch": 1.0725681093021353, "grad_norm": 0.2102101743221283, "learning_rate": 5.239296959606116e-06, "loss": 0.0027, "step": 65550 }, { "epoch": 1.072731735253211, "grad_norm": 0.06453171372413635, "learning_rate": 5.237870680335286e-06, "loss": 0.002, "step": 65560 }, { "epoch": 1.072895361204287, "grad_norm": 0.009780087508261204, "learning_rate": 5.236444381664532e-06, "loss": 0.0016, "step": 65570 }, { "epoch": 1.0730589871553629, "grad_norm": 0.028725354000926018, "learning_rate": 5.23501806371018e-06, "loss": 0.0007, "step": 65580 }, { "epoch": 1.0732226131064386, "grad_norm": 0.1053251177072525, "learning_rate": 5.233591726588552e-06, "loss": 0.0016, "step": 65590 }, { "epoch": 1.0733862390575146, "grad_norm": 0.023830266669392586, "learning_rate": 5.232165370415979e-06, "loss": 0.0011, "step": 65600 }, { "epoch": 1.0735498650085904, "grad_norm": 0.050636470317840576, "learning_rate": 5.230738995308788e-06, "loss": 0.001, "step": 65610 }, { "epoch": 1.0737134909596662, "grad_norm": 0.16297264397144318, "learning_rate": 5.229312601383311e-06, "loss": 0.0019, "step": 65620 }, { "epoch": 1.073877116910742, "grad_norm": 0.049096617847681046, "learning_rate": 5.227886188755878e-06, "loss": 0.0021, "step": 65630 }, { "epoch": 1.074040742861818, "grad_norm": 0.062334369868040085, "learning_rate": 5.226459757542822e-06, "loss": 0.0029, "step": 65640 }, { "epoch": 1.0742043688128937, "grad_norm": 0.07538007944822311, "learning_rate": 5.2250333078604785e-06, "loss": 0.0018, "step": 65650 }, { "epoch": 1.0743679947639695, "grad_norm": 0.030422566458582878, "learning_rate": 5.223606839825184e-06, "loss": 0.0014, "step": 65660 }, { "epoch": 1.0745316207150455, "grad_norm": 0.1259501874446869, "learning_rate": 5.222180353553277e-06, "loss": 0.0018, "step": 65670 }, { "epoch": 1.0746952466661213, "grad_norm": 0.08239496499300003, "learning_rate": 5.220753849161095e-06, "loss": 0.0019, "step": 65680 }, { "epoch": 1.074858872617197, "grad_norm": 0.1408461332321167, "learning_rate": 5.219327326764981e-06, "loss": 0.0022, "step": 65690 }, { "epoch": 1.075022498568273, "grad_norm": 0.10267409682273865, "learning_rate": 5.217900786481277e-06, "loss": 0.0019, "step": 65700 }, { "epoch": 1.0751861245193488, "grad_norm": 0.05968786031007767, "learning_rate": 5.2164742284263244e-06, "loss": 0.0011, "step": 65710 }, { "epoch": 1.0753497504704246, "grad_norm": 0.02846963331103325, "learning_rate": 5.215047652716473e-06, "loss": 0.0015, "step": 65720 }, { "epoch": 1.0755133764215004, "grad_norm": 0.05503297969698906, "learning_rate": 5.213621059468065e-06, "loss": 0.0015, "step": 65730 }, { "epoch": 1.0756770023725764, "grad_norm": 0.01726103201508522, "learning_rate": 5.212194448797449e-06, "loss": 0.0011, "step": 65740 }, { "epoch": 1.0758406283236521, "grad_norm": 0.09336838126182556, "learning_rate": 5.210767820820978e-06, "loss": 0.001, "step": 65750 }, { "epoch": 1.076004254274728, "grad_norm": 0.0753527358174324, "learning_rate": 5.209341175655e-06, "loss": 0.0008, "step": 65760 }, { "epoch": 1.076167880225804, "grad_norm": 0.06671423465013504, "learning_rate": 5.207914513415867e-06, "loss": 0.0011, "step": 65770 }, { "epoch": 1.0763315061768797, "grad_norm": 0.04025735333561897, "learning_rate": 5.206487834219934e-06, "loss": 0.0016, "step": 65780 }, { "epoch": 1.0764951321279554, "grad_norm": 0.045490920543670654, "learning_rate": 5.205061138183557e-06, "loss": 0.0015, "step": 65790 }, { "epoch": 1.0766587580790312, "grad_norm": 0.012613928876817226, "learning_rate": 5.20363442542309e-06, "loss": 0.0018, "step": 65800 }, { "epoch": 1.0768223840301072, "grad_norm": 0.0372697077691555, "learning_rate": 5.202207696054893e-06, "loss": 0.0012, "step": 65810 }, { "epoch": 1.076986009981183, "grad_norm": 0.17838460206985474, "learning_rate": 5.200780950195323e-06, "loss": 0.0031, "step": 65820 }, { "epoch": 1.0771496359322588, "grad_norm": 0.07225412875413895, "learning_rate": 5.1993541879607425e-06, "loss": 0.0026, "step": 65830 }, { "epoch": 1.0773132618833348, "grad_norm": 0.021484797820448875, "learning_rate": 5.197927409467513e-06, "loss": 0.0018, "step": 65840 }, { "epoch": 1.0774768878344105, "grad_norm": 0.08153106272220612, "learning_rate": 5.196500614831997e-06, "loss": 0.0015, "step": 65850 }, { "epoch": 1.0776405137854863, "grad_norm": 0.0051868511363863945, "learning_rate": 5.195073804170559e-06, "loss": 0.001, "step": 65860 }, { "epoch": 1.0778041397365623, "grad_norm": 0.027387188747525215, "learning_rate": 5.193646977599566e-06, "loss": 0.0007, "step": 65870 }, { "epoch": 1.077967765687638, "grad_norm": 0.04696065932512283, "learning_rate": 5.1922201352353844e-06, "loss": 0.0022, "step": 65880 }, { "epoch": 1.0781313916387139, "grad_norm": 0.06859666109085083, "learning_rate": 5.190793277194383e-06, "loss": 0.0014, "step": 65890 }, { "epoch": 1.0782950175897899, "grad_norm": 0.08227351307868958, "learning_rate": 5.189366403592929e-06, "loss": 0.0021, "step": 65900 }, { "epoch": 1.0784586435408656, "grad_norm": 0.062292248010635376, "learning_rate": 5.1879395145473975e-06, "loss": 0.0013, "step": 65910 }, { "epoch": 1.0786222694919414, "grad_norm": 0.06601244956254959, "learning_rate": 5.186512610174157e-06, "loss": 0.0018, "step": 65920 }, { "epoch": 1.0787858954430172, "grad_norm": 0.08488211035728455, "learning_rate": 5.185085690589584e-06, "loss": 0.0015, "step": 65930 }, { "epoch": 1.0789495213940932, "grad_norm": 0.05413906276226044, "learning_rate": 5.183658755910052e-06, "loss": 0.0024, "step": 65940 }, { "epoch": 1.079113147345169, "grad_norm": 0.020831607282161713, "learning_rate": 5.182231806251936e-06, "loss": 0.0016, "step": 65950 }, { "epoch": 1.0792767732962447, "grad_norm": 0.13814033567905426, "learning_rate": 5.180804841731613e-06, "loss": 0.0039, "step": 65960 }, { "epoch": 1.0794403992473207, "grad_norm": 0.049561165273189545, "learning_rate": 5.179377862465464e-06, "loss": 0.0011, "step": 65970 }, { "epoch": 1.0796040251983965, "grad_norm": 0.033354684710502625, "learning_rate": 5.1779508685698654e-06, "loss": 0.0008, "step": 65980 }, { "epoch": 1.0797676511494723, "grad_norm": 0.06770694255828857, "learning_rate": 5.1765238601612e-06, "loss": 0.002, "step": 65990 }, { "epoch": 1.079931277100548, "grad_norm": 0.01733277551829815, "learning_rate": 5.17509683735585e-06, "loss": 0.0012, "step": 66000 }, { "epoch": 1.080094903051624, "grad_norm": 0.04519897326827049, "learning_rate": 5.173669800270196e-06, "loss": 0.0009, "step": 66010 }, { "epoch": 1.0802585290026998, "grad_norm": 0.07872622460126877, "learning_rate": 5.1722427490206254e-06, "loss": 0.0018, "step": 66020 }, { "epoch": 1.0804221549537756, "grad_norm": 0.047858960926532745, "learning_rate": 5.170815683723521e-06, "loss": 0.0015, "step": 66030 }, { "epoch": 1.0805857809048516, "grad_norm": 0.13418707251548767, "learning_rate": 5.169388604495271e-06, "loss": 0.0015, "step": 66040 }, { "epoch": 1.0807494068559274, "grad_norm": 0.03831182047724724, "learning_rate": 5.167961511452263e-06, "loss": 0.0013, "step": 66050 }, { "epoch": 1.0809130328070031, "grad_norm": 0.12964409589767456, "learning_rate": 5.166534404710885e-06, "loss": 0.0007, "step": 66060 }, { "epoch": 1.0810766587580791, "grad_norm": 0.0464429073035717, "learning_rate": 5.165107284387528e-06, "loss": 0.0017, "step": 66070 }, { "epoch": 1.081240284709155, "grad_norm": 0.06879943609237671, "learning_rate": 5.163680150598583e-06, "loss": 0.0018, "step": 66080 }, { "epoch": 1.0814039106602307, "grad_norm": 0.18898960947990417, "learning_rate": 5.162253003460442e-06, "loss": 0.0029, "step": 66090 }, { "epoch": 1.0815675366113067, "grad_norm": 0.035316988825798035, "learning_rate": 5.160825843089496e-06, "loss": 0.0014, "step": 66100 }, { "epoch": 1.0817311625623824, "grad_norm": 0.04327777773141861, "learning_rate": 5.159398669602143e-06, "loss": 0.0011, "step": 66110 }, { "epoch": 1.0818947885134582, "grad_norm": 0.1721632033586502, "learning_rate": 5.157971483114778e-06, "loss": 0.0039, "step": 66120 }, { "epoch": 1.082058414464534, "grad_norm": 0.15825077891349792, "learning_rate": 5.156544283743794e-06, "loss": 0.0012, "step": 66130 }, { "epoch": 1.08222204041561, "grad_norm": 0.14909835159778595, "learning_rate": 5.155117071605592e-06, "loss": 0.0013, "step": 66140 }, { "epoch": 1.0823856663666858, "grad_norm": 0.06051696464419365, "learning_rate": 5.1536898468165695e-06, "loss": 0.0015, "step": 66150 }, { "epoch": 1.0825492923177615, "grad_norm": 0.17413592338562012, "learning_rate": 5.1522626094931265e-06, "loss": 0.0027, "step": 66160 }, { "epoch": 1.0827129182688375, "grad_norm": 0.01725386269390583, "learning_rate": 5.150835359751661e-06, "loss": 0.0013, "step": 66170 }, { "epoch": 1.0828765442199133, "grad_norm": 0.05856165662407875, "learning_rate": 5.149408097708578e-06, "loss": 0.0026, "step": 66180 }, { "epoch": 1.083040170170989, "grad_norm": 0.029762886464595795, "learning_rate": 5.14798082348028e-06, "loss": 0.0016, "step": 66190 }, { "epoch": 1.0832037961220649, "grad_norm": 0.12964269518852234, "learning_rate": 5.146553537183168e-06, "loss": 0.0016, "step": 66200 }, { "epoch": 1.0833674220731409, "grad_norm": 0.06808414310216904, "learning_rate": 5.145126238933649e-06, "loss": 0.0017, "step": 66210 }, { "epoch": 1.0835310480242166, "grad_norm": 0.02311863750219345, "learning_rate": 5.143698928848126e-06, "loss": 0.0013, "step": 66220 }, { "epoch": 1.0836946739752924, "grad_norm": 0.11196508258581161, "learning_rate": 5.14227160704301e-06, "loss": 0.0014, "step": 66230 }, { "epoch": 1.0838582999263684, "grad_norm": 0.09211152791976929, "learning_rate": 5.140844273634704e-06, "loss": 0.001, "step": 66240 }, { "epoch": 1.0840219258774442, "grad_norm": 0.008989924564957619, "learning_rate": 5.139416928739619e-06, "loss": 0.0012, "step": 66250 }, { "epoch": 1.08418555182852, "grad_norm": 0.10878390073776245, "learning_rate": 5.137989572474163e-06, "loss": 0.0015, "step": 66260 }, { "epoch": 1.084349177779596, "grad_norm": 0.028733471408486366, "learning_rate": 5.136562204954748e-06, "loss": 0.001, "step": 66270 }, { "epoch": 1.0845128037306717, "grad_norm": 0.06863290071487427, "learning_rate": 5.135134826297783e-06, "loss": 0.0011, "step": 66280 }, { "epoch": 1.0846764296817475, "grad_norm": 0.0035480589140206575, "learning_rate": 5.1337074366196825e-06, "loss": 0.0019, "step": 66290 }, { "epoch": 1.0848400556328233, "grad_norm": 0.08949305862188339, "learning_rate": 5.132280036036858e-06, "loss": 0.0016, "step": 66300 }, { "epoch": 1.0850036815838993, "grad_norm": 0.08759809285402298, "learning_rate": 5.130852624665723e-06, "loss": 0.0014, "step": 66310 }, { "epoch": 1.085167307534975, "grad_norm": 0.15438930690288544, "learning_rate": 5.129425202622693e-06, "loss": 0.004, "step": 66320 }, { "epoch": 1.0853309334860508, "grad_norm": 0.020953025668859482, "learning_rate": 5.1279977700241845e-06, "loss": 0.001, "step": 66330 }, { "epoch": 1.0854945594371268, "grad_norm": 0.038243480026721954, "learning_rate": 5.126570326986613e-06, "loss": 0.0016, "step": 66340 }, { "epoch": 1.0856581853882026, "grad_norm": 0.056142378598451614, "learning_rate": 5.125142873626396e-06, "loss": 0.0012, "step": 66350 }, { "epoch": 1.0858218113392784, "grad_norm": 0.10768869519233704, "learning_rate": 5.1237154100599525e-06, "loss": 0.0012, "step": 66360 }, { "epoch": 1.0859854372903543, "grad_norm": 0.043584391474723816, "learning_rate": 5.122287936403701e-06, "loss": 0.0024, "step": 66370 }, { "epoch": 1.0861490632414301, "grad_norm": 0.2161857634782791, "learning_rate": 5.120860452774062e-06, "loss": 0.001, "step": 66380 }, { "epoch": 1.086312689192506, "grad_norm": 0.12796850502490997, "learning_rate": 5.119432959287456e-06, "loss": 0.0016, "step": 66390 }, { "epoch": 1.0864763151435817, "grad_norm": 0.07119136303663254, "learning_rate": 5.118005456060303e-06, "loss": 0.0012, "step": 66400 }, { "epoch": 1.0866399410946577, "grad_norm": 0.07732295244932175, "learning_rate": 5.116577943209027e-06, "loss": 0.0028, "step": 66410 }, { "epoch": 1.0868035670457334, "grad_norm": 0.04027533903717995, "learning_rate": 5.11515042085005e-06, "loss": 0.0024, "step": 66420 }, { "epoch": 1.0869671929968092, "grad_norm": 0.043051790446043015, "learning_rate": 5.113722889099797e-06, "loss": 0.0014, "step": 66430 }, { "epoch": 1.0871308189478852, "grad_norm": 0.0551178902387619, "learning_rate": 5.112295348074692e-06, "loss": 0.0014, "step": 66440 }, { "epoch": 1.087294444898961, "grad_norm": 0.033098142594099045, "learning_rate": 5.1108677978911604e-06, "loss": 0.0018, "step": 66450 }, { "epoch": 1.0874580708500368, "grad_norm": 0.002978342352434993, "learning_rate": 5.109440238665628e-06, "loss": 0.0021, "step": 66460 }, { "epoch": 1.0876216968011128, "grad_norm": 0.07392004132270813, "learning_rate": 5.108012670514523e-06, "loss": 0.0019, "step": 66470 }, { "epoch": 1.0877853227521885, "grad_norm": 0.024156300351023674, "learning_rate": 5.106585093554272e-06, "loss": 0.0021, "step": 66480 }, { "epoch": 1.0879489487032643, "grad_norm": 0.05626894533634186, "learning_rate": 5.105157507901302e-06, "loss": 0.0016, "step": 66490 }, { "epoch": 1.08811257465434, "grad_norm": 0.050983961671590805, "learning_rate": 5.103729913672046e-06, "loss": 0.0014, "step": 66500 }, { "epoch": 1.088276200605416, "grad_norm": 0.13798587024211884, "learning_rate": 5.102302310982929e-06, "loss": 0.0021, "step": 66510 }, { "epoch": 1.0884398265564919, "grad_norm": 0.0390687994658947, "learning_rate": 5.100874699950384e-06, "loss": 0.0011, "step": 66520 }, { "epoch": 1.0886034525075676, "grad_norm": 0.03659537807106972, "learning_rate": 5.099447080690842e-06, "loss": 0.0014, "step": 66530 }, { "epoch": 1.0887670784586436, "grad_norm": 0.03784247860312462, "learning_rate": 5.0980194533207336e-06, "loss": 0.0014, "step": 66540 }, { "epoch": 1.0889307044097194, "grad_norm": 0.1008988469839096, "learning_rate": 5.096591817956493e-06, "loss": 0.0009, "step": 66550 }, { "epoch": 1.0890943303607952, "grad_norm": 0.08490591496229172, "learning_rate": 5.095164174714553e-06, "loss": 0.0015, "step": 66560 }, { "epoch": 1.089257956311871, "grad_norm": 0.021908758208155632, "learning_rate": 5.0937365237113455e-06, "loss": 0.0014, "step": 66570 }, { "epoch": 1.089421582262947, "grad_norm": 0.12063805013895035, "learning_rate": 5.092308865063307e-06, "loss": 0.0014, "step": 66580 }, { "epoch": 1.0895852082140227, "grad_norm": 0.06550191342830658, "learning_rate": 5.09088119888687e-06, "loss": 0.0013, "step": 66590 }, { "epoch": 1.0897488341650985, "grad_norm": 0.06678319722414017, "learning_rate": 5.089453525298474e-06, "loss": 0.0009, "step": 66600 }, { "epoch": 1.0899124601161745, "grad_norm": 0.03586084023118019, "learning_rate": 5.088025844414551e-06, "loss": 0.0018, "step": 66610 }, { "epoch": 1.0900760860672503, "grad_norm": 0.18501341342926025, "learning_rate": 5.086598156351541e-06, "loss": 0.0015, "step": 66620 }, { "epoch": 1.090239712018326, "grad_norm": 0.04540174454450607, "learning_rate": 5.085170461225879e-06, "loss": 0.002, "step": 66630 }, { "epoch": 1.090403337969402, "grad_norm": 0.13158740103244781, "learning_rate": 5.083742759154003e-06, "loss": 0.0012, "step": 66640 }, { "epoch": 1.0905669639204778, "grad_norm": 0.01592317968606949, "learning_rate": 5.082315050252355e-06, "loss": 0.0007, "step": 66650 }, { "epoch": 1.0907305898715536, "grad_norm": 0.05693323165178299, "learning_rate": 5.0808873346373685e-06, "loss": 0.0011, "step": 66660 }, { "epoch": 1.0908942158226296, "grad_norm": 0.043752219527959824, "learning_rate": 5.079459612425488e-06, "loss": 0.0017, "step": 66670 }, { "epoch": 1.0910578417737054, "grad_norm": 0.004869421944022179, "learning_rate": 5.078031883733152e-06, "loss": 0.0015, "step": 66680 }, { "epoch": 1.0912214677247811, "grad_norm": 0.04623570293188095, "learning_rate": 5.0766041486768005e-06, "loss": 0.0012, "step": 66690 }, { "epoch": 1.091385093675857, "grad_norm": 0.04098409041762352, "learning_rate": 5.075176407372875e-06, "loss": 0.0018, "step": 66700 }, { "epoch": 1.091548719626933, "grad_norm": 0.05187565088272095, "learning_rate": 5.073748659937819e-06, "loss": 0.0013, "step": 66710 }, { "epoch": 1.0917123455780087, "grad_norm": 0.05179828405380249, "learning_rate": 5.0723209064880705e-06, "loss": 0.0014, "step": 66720 }, { "epoch": 1.0918759715290844, "grad_norm": 0.0177445225417614, "learning_rate": 5.070893147140077e-06, "loss": 0.0011, "step": 66730 }, { "epoch": 1.0920395974801604, "grad_norm": 0.10295765101909637, "learning_rate": 5.069465382010279e-06, "loss": 0.0047, "step": 66740 }, { "epoch": 1.0922032234312362, "grad_norm": 0.030104324221611023, "learning_rate": 5.068037611215121e-06, "loss": 0.0013, "step": 66750 }, { "epoch": 1.092366849382312, "grad_norm": 0.09781965613365173, "learning_rate": 5.066609834871047e-06, "loss": 0.0018, "step": 66760 }, { "epoch": 1.0925304753333878, "grad_norm": 0.056559912860393524, "learning_rate": 5.0651820530945e-06, "loss": 0.0035, "step": 66770 }, { "epoch": 1.0926941012844638, "grad_norm": 0.02647298388183117, "learning_rate": 5.063754266001929e-06, "loss": 0.0009, "step": 66780 }, { "epoch": 1.0928577272355395, "grad_norm": 0.060371797531843185, "learning_rate": 5.062326473709775e-06, "loss": 0.0028, "step": 66790 }, { "epoch": 1.0930213531866153, "grad_norm": 0.1852189004421234, "learning_rate": 5.060898676334487e-06, "loss": 0.0021, "step": 66800 }, { "epoch": 1.0931849791376913, "grad_norm": 0.038718532770872116, "learning_rate": 5.05947087399251e-06, "loss": 0.0012, "step": 66810 }, { "epoch": 1.093348605088767, "grad_norm": 0.10918682813644409, "learning_rate": 5.058043066800291e-06, "loss": 0.0011, "step": 66820 }, { "epoch": 1.0935122310398429, "grad_norm": 0.03159303590655327, "learning_rate": 5.0566152548742766e-06, "loss": 0.0013, "step": 66830 }, { "epoch": 1.0936758569909188, "grad_norm": 0.009500847198069096, "learning_rate": 5.0551874383309145e-06, "loss": 0.0015, "step": 66840 }, { "epoch": 1.0938394829419946, "grad_norm": 0.020893540233373642, "learning_rate": 5.053759617286652e-06, "loss": 0.0007, "step": 66850 }, { "epoch": 1.0940031088930704, "grad_norm": 0.0421232245862484, "learning_rate": 5.05233179185794e-06, "loss": 0.0014, "step": 66860 }, { "epoch": 1.0941667348441464, "grad_norm": 0.10335648059844971, "learning_rate": 5.050903962161223e-06, "loss": 0.0018, "step": 66870 }, { "epoch": 1.0943303607952222, "grad_norm": 0.12847822904586792, "learning_rate": 5.049476128312954e-06, "loss": 0.0027, "step": 66880 }, { "epoch": 1.094493986746298, "grad_norm": 0.17413492500782013, "learning_rate": 5.048048290429579e-06, "loss": 0.0017, "step": 66890 }, { "epoch": 1.0946576126973737, "grad_norm": 0.011131856590509415, "learning_rate": 5.0466204486275495e-06, "loss": 0.002, "step": 66900 }, { "epoch": 1.0948212386484497, "grad_norm": 0.03642764315009117, "learning_rate": 5.0451926030233145e-06, "loss": 0.0014, "step": 66910 }, { "epoch": 1.0949848645995255, "grad_norm": 0.16153410077095032, "learning_rate": 5.043764753733326e-06, "loss": 0.0017, "step": 66920 }, { "epoch": 1.0951484905506013, "grad_norm": 0.056975413113832474, "learning_rate": 5.042336900874031e-06, "loss": 0.001, "step": 66930 }, { "epoch": 1.0953121165016773, "grad_norm": 0.06315185129642487, "learning_rate": 5.040909044561882e-06, "loss": 0.0013, "step": 66940 }, { "epoch": 1.095475742452753, "grad_norm": 0.038827188313007355, "learning_rate": 5.039481184913333e-06, "loss": 0.0008, "step": 66950 }, { "epoch": 1.0956393684038288, "grad_norm": 0.04742514714598656, "learning_rate": 5.038053322044832e-06, "loss": 0.0026, "step": 66960 }, { "epoch": 1.0958029943549046, "grad_norm": 0.03633485734462738, "learning_rate": 5.03662545607283e-06, "loss": 0.0015, "step": 66970 }, { "epoch": 1.0959666203059806, "grad_norm": 0.0665416270494461, "learning_rate": 5.0351975871137814e-06, "loss": 0.0013, "step": 66980 }, { "epoch": 1.0961302462570564, "grad_norm": 0.2053467482328415, "learning_rate": 5.033769715284137e-06, "loss": 0.0014, "step": 66990 }, { "epoch": 1.0962938722081321, "grad_norm": 0.07712331414222717, "learning_rate": 5.03234184070035e-06, "loss": 0.0018, "step": 67000 }, { "epoch": 1.0964574981592081, "grad_norm": 0.07719994336366653, "learning_rate": 5.030913963478873e-06, "loss": 0.0019, "step": 67010 }, { "epoch": 1.096621124110284, "grad_norm": 0.23459912836551666, "learning_rate": 5.029486083736157e-06, "loss": 0.0018, "step": 67020 }, { "epoch": 1.0967847500613597, "grad_norm": 0.05724671855568886, "learning_rate": 5.028058201588657e-06, "loss": 0.001, "step": 67030 }, { "epoch": 1.0969483760124357, "grad_norm": 0.010152517817914486, "learning_rate": 5.026630317152826e-06, "loss": 0.0017, "step": 67040 }, { "epoch": 1.0971120019635114, "grad_norm": 0.03183060884475708, "learning_rate": 5.025202430545116e-06, "loss": 0.0013, "step": 67050 }, { "epoch": 1.0972756279145872, "grad_norm": 0.059750061482191086, "learning_rate": 5.023774541881983e-06, "loss": 0.0015, "step": 67060 }, { "epoch": 1.0974392538656632, "grad_norm": 0.03474615141749382, "learning_rate": 5.022346651279878e-06, "loss": 0.0009, "step": 67070 }, { "epoch": 1.097602879816739, "grad_norm": 0.03113647550344467, "learning_rate": 5.020918758855257e-06, "loss": 0.0033, "step": 67080 }, { "epoch": 1.0977665057678148, "grad_norm": 0.0761863961815834, "learning_rate": 5.019490864724572e-06, "loss": 0.0021, "step": 67090 }, { "epoch": 1.0979301317188905, "grad_norm": 0.08832691609859467, "learning_rate": 5.018062969004279e-06, "loss": 0.0011, "step": 67100 }, { "epoch": 1.0980937576699665, "grad_norm": 0.05399898812174797, "learning_rate": 5.016635071810832e-06, "loss": 0.0016, "step": 67110 }, { "epoch": 1.0982573836210423, "grad_norm": 0.026567207649350166, "learning_rate": 5.015207173260684e-06, "loss": 0.0008, "step": 67120 }, { "epoch": 1.098421009572118, "grad_norm": 0.03639587387442589, "learning_rate": 5.013779273470292e-06, "loss": 0.0022, "step": 67130 }, { "epoch": 1.098584635523194, "grad_norm": 0.08234965801239014, "learning_rate": 5.012351372556109e-06, "loss": 0.0016, "step": 67140 }, { "epoch": 1.0987482614742698, "grad_norm": 0.12885858118534088, "learning_rate": 5.01092347063459e-06, "loss": 0.002, "step": 67150 }, { "epoch": 1.0989118874253456, "grad_norm": 0.07344863563776016, "learning_rate": 5.009495567822189e-06, "loss": 0.0008, "step": 67160 }, { "epoch": 1.0990755133764214, "grad_norm": 0.062437355518341064, "learning_rate": 5.008067664235363e-06, "loss": 0.001, "step": 67170 }, { "epoch": 1.0992391393274974, "grad_norm": 0.05675366148352623, "learning_rate": 5.006639759990566e-06, "loss": 0.0012, "step": 67180 }, { "epoch": 1.0994027652785732, "grad_norm": 0.05875685438513756, "learning_rate": 5.005211855204251e-06, "loss": 0.0009, "step": 67190 }, { "epoch": 1.099566391229649, "grad_norm": 0.043574266135692596, "learning_rate": 5.003783949992876e-06, "loss": 0.0019, "step": 67200 }, { "epoch": 1.099730017180725, "grad_norm": 0.0449739508330822, "learning_rate": 5.002356044472896e-06, "loss": 0.0008, "step": 67210 }, { "epoch": 1.0998936431318007, "grad_norm": 0.014705897308886051, "learning_rate": 5.0009281387607635e-06, "loss": 0.005, "step": 67220 }, { "epoch": 1.1000572690828765, "grad_norm": 0.01937803439795971, "learning_rate": 4.999500232972936e-06, "loss": 0.0019, "step": 67230 }, { "epoch": 1.1002208950339525, "grad_norm": 0.04800884798169136, "learning_rate": 4.998072327225868e-06, "loss": 0.0017, "step": 67240 }, { "epoch": 1.1003845209850283, "grad_norm": 0.032485850155353546, "learning_rate": 4.996644421636014e-06, "loss": 0.0019, "step": 67250 }, { "epoch": 1.100548146936104, "grad_norm": 0.08788475394248962, "learning_rate": 4.99521651631983e-06, "loss": 0.0021, "step": 67260 }, { "epoch": 1.1007117728871798, "grad_norm": 0.05829519405961037, "learning_rate": 4.993788611393769e-06, "loss": 0.0018, "step": 67270 }, { "epoch": 1.1008753988382558, "grad_norm": 0.03683392331004143, "learning_rate": 4.992360706974289e-06, "loss": 0.0015, "step": 67280 }, { "epoch": 1.1010390247893316, "grad_norm": 0.037919510155916214, "learning_rate": 4.9909328031778435e-06, "loss": 0.0016, "step": 67290 }, { "epoch": 1.1012026507404074, "grad_norm": 0.05159285292029381, "learning_rate": 4.9895049001208875e-06, "loss": 0.0016, "step": 67300 }, { "epoch": 1.1013662766914833, "grad_norm": 0.03023102693259716, "learning_rate": 4.988076997919877e-06, "loss": 0.0007, "step": 67310 }, { "epoch": 1.1015299026425591, "grad_norm": 0.04483753815293312, "learning_rate": 4.986649096691265e-06, "loss": 0.001, "step": 67320 }, { "epoch": 1.101693528593635, "grad_norm": 0.07059621810913086, "learning_rate": 4.985221196551508e-06, "loss": 0.0012, "step": 67330 }, { "epoch": 1.101857154544711, "grad_norm": 0.14924487471580505, "learning_rate": 4.9837932976170594e-06, "loss": 0.0029, "step": 67340 }, { "epoch": 1.1020207804957867, "grad_norm": 0.059832584112882614, "learning_rate": 4.982365400004374e-06, "loss": 0.001, "step": 67350 }, { "epoch": 1.1021844064468624, "grad_norm": 0.09202532470226288, "learning_rate": 4.980937503829907e-06, "loss": 0.0009, "step": 67360 }, { "epoch": 1.1023480323979382, "grad_norm": 0.09011992067098618, "learning_rate": 4.979509609210112e-06, "loss": 0.0015, "step": 67370 }, { "epoch": 1.1025116583490142, "grad_norm": 0.021511957049369812, "learning_rate": 4.978081716261445e-06, "loss": 0.001, "step": 67380 }, { "epoch": 1.10267528430009, "grad_norm": 0.006959167309105396, "learning_rate": 4.976653825100357e-06, "loss": 0.0018, "step": 67390 }, { "epoch": 1.1028389102511658, "grad_norm": 0.040239367634058, "learning_rate": 4.975225935843304e-06, "loss": 0.0016, "step": 67400 }, { "epoch": 1.1030025362022418, "grad_norm": 0.07228533923625946, "learning_rate": 4.97379804860674e-06, "loss": 0.0012, "step": 67410 }, { "epoch": 1.1031661621533175, "grad_norm": 0.02421848475933075, "learning_rate": 4.972370163507117e-06, "loss": 0.0009, "step": 67420 }, { "epoch": 1.1033297881043933, "grad_norm": 0.08054196089506149, "learning_rate": 4.97094228066089e-06, "loss": 0.0011, "step": 67430 }, { "epoch": 1.1034934140554693, "grad_norm": 0.0896577313542366, "learning_rate": 4.969514400184512e-06, "loss": 0.002, "step": 67440 }, { "epoch": 1.103657040006545, "grad_norm": 0.0888267308473587, "learning_rate": 4.9680865221944355e-06, "loss": 0.0014, "step": 67450 }, { "epoch": 1.1038206659576209, "grad_norm": 0.05953643098473549, "learning_rate": 4.9666586468071135e-06, "loss": 0.002, "step": 67460 }, { "epoch": 1.1039842919086966, "grad_norm": 0.06607147306203842, "learning_rate": 4.965230774138999e-06, "loss": 0.0011, "step": 67470 }, { "epoch": 1.1041479178597726, "grad_norm": 0.024466248229146004, "learning_rate": 4.963802904306545e-06, "loss": 0.0014, "step": 67480 }, { "epoch": 1.1043115438108484, "grad_norm": 0.04621598869562149, "learning_rate": 4.962375037426202e-06, "loss": 0.001, "step": 67490 }, { "epoch": 1.1044751697619242, "grad_norm": 0.03939751535654068, "learning_rate": 4.9609471736144234e-06, "loss": 0.0015, "step": 67500 }, { "epoch": 1.1046387957130002, "grad_norm": 0.24628588557243347, "learning_rate": 4.959519312987659e-06, "loss": 0.0015, "step": 67510 }, { "epoch": 1.104802421664076, "grad_norm": 0.07973644882440567, "learning_rate": 4.958091455662364e-06, "loss": 0.0012, "step": 67520 }, { "epoch": 1.1049660476151517, "grad_norm": 0.06189773604273796, "learning_rate": 4.956663601754987e-06, "loss": 0.0022, "step": 67530 }, { "epoch": 1.1051296735662275, "grad_norm": 0.04373147338628769, "learning_rate": 4.955235751381977e-06, "loss": 0.0024, "step": 67540 }, { "epoch": 1.1052932995173035, "grad_norm": 0.0825188085436821, "learning_rate": 4.9538079046597885e-06, "loss": 0.0027, "step": 67550 }, { "epoch": 1.1054569254683793, "grad_norm": 0.0289162490516901, "learning_rate": 4.952380061704871e-06, "loss": 0.0013, "step": 67560 }, { "epoch": 1.105620551419455, "grad_norm": 0.06399867683649063, "learning_rate": 4.950952222633672e-06, "loss": 0.0015, "step": 67570 }, { "epoch": 1.105784177370531, "grad_norm": 0.09018483757972717, "learning_rate": 4.9495243875626444e-06, "loss": 0.001, "step": 67580 }, { "epoch": 1.1059478033216068, "grad_norm": 0.08095432817935944, "learning_rate": 4.948096556608235e-06, "loss": 0.0015, "step": 67590 }, { "epoch": 1.1061114292726826, "grad_norm": 0.0099486093968153, "learning_rate": 4.9466687298868934e-06, "loss": 0.0018, "step": 67600 }, { "epoch": 1.1062750552237586, "grad_norm": 0.04577697440981865, "learning_rate": 4.945240907515069e-06, "loss": 0.0011, "step": 67610 }, { "epoch": 1.1064386811748343, "grad_norm": 0.08428248018026352, "learning_rate": 4.943813089609211e-06, "loss": 0.0037, "step": 67620 }, { "epoch": 1.1066023071259101, "grad_norm": 0.004601741675287485, "learning_rate": 4.942385276285765e-06, "loss": 0.0012, "step": 67630 }, { "epoch": 1.1067659330769861, "grad_norm": 0.055516455322504044, "learning_rate": 4.94095746766118e-06, "loss": 0.0013, "step": 67640 }, { "epoch": 1.106929559028062, "grad_norm": 0.06810334324836731, "learning_rate": 4.939529663851903e-06, "loss": 0.0016, "step": 67650 }, { "epoch": 1.1070931849791377, "grad_norm": 0.13163498044013977, "learning_rate": 4.938101864974381e-06, "loss": 0.0017, "step": 67660 }, { "epoch": 1.1072568109302134, "grad_norm": 0.04317409172654152, "learning_rate": 4.9366740711450605e-06, "loss": 0.0035, "step": 67670 }, { "epoch": 1.1074204368812894, "grad_norm": 0.10430760681629181, "learning_rate": 4.935246282480388e-06, "loss": 0.0014, "step": 67680 }, { "epoch": 1.1075840628323652, "grad_norm": 0.05480848252773285, "learning_rate": 4.933818499096807e-06, "loss": 0.0014, "step": 67690 }, { "epoch": 1.107747688783441, "grad_norm": 0.08361303806304932, "learning_rate": 4.932390721110765e-06, "loss": 0.0012, "step": 67700 }, { "epoch": 1.107911314734517, "grad_norm": 0.0881851464509964, "learning_rate": 4.930962948638705e-06, "loss": 0.0019, "step": 67710 }, { "epoch": 1.1080749406855928, "grad_norm": 0.06896450370550156, "learning_rate": 4.929535181797073e-06, "loss": 0.0021, "step": 67720 }, { "epoch": 1.1082385666366685, "grad_norm": 0.002867023227736354, "learning_rate": 4.92810742070231e-06, "loss": 0.0017, "step": 67730 }, { "epoch": 1.1084021925877443, "grad_norm": 0.07280313223600388, "learning_rate": 4.926679665470863e-06, "loss": 0.0021, "step": 67740 }, { "epoch": 1.1085658185388203, "grad_norm": 0.053313832730054855, "learning_rate": 4.925251916219173e-06, "loss": 0.0012, "step": 67750 }, { "epoch": 1.108729444489896, "grad_norm": 0.08107008785009384, "learning_rate": 4.923824173063681e-06, "loss": 0.0013, "step": 67760 }, { "epoch": 1.1088930704409719, "grad_norm": 0.07604475319385529, "learning_rate": 4.922396436120832e-06, "loss": 0.0018, "step": 67770 }, { "epoch": 1.1090566963920478, "grad_norm": 0.013636584393680096, "learning_rate": 4.920968705507065e-06, "loss": 0.0009, "step": 67780 }, { "epoch": 1.1092203223431236, "grad_norm": 0.030414769425988197, "learning_rate": 4.91954098133882e-06, "loss": 0.0028, "step": 67790 }, { "epoch": 1.1093839482941994, "grad_norm": 0.034453511238098145, "learning_rate": 4.918113263732541e-06, "loss": 0.0026, "step": 67800 }, { "epoch": 1.1095475742452754, "grad_norm": 0.11767400056123734, "learning_rate": 4.916685552804664e-06, "loss": 0.0017, "step": 67810 }, { "epoch": 1.1097112001963512, "grad_norm": 0.11816064268350601, "learning_rate": 4.915257848671631e-06, "loss": 0.0023, "step": 67820 }, { "epoch": 1.109874826147427, "grad_norm": 0.10383836925029755, "learning_rate": 4.913830151449879e-06, "loss": 0.0013, "step": 67830 }, { "epoch": 1.110038452098503, "grad_norm": 0.14100036025047302, "learning_rate": 4.912402461255847e-06, "loss": 0.0016, "step": 67840 }, { "epoch": 1.1102020780495787, "grad_norm": 0.13607031106948853, "learning_rate": 4.910974778205972e-06, "loss": 0.002, "step": 67850 }, { "epoch": 1.1103657040006545, "grad_norm": 0.058741047978401184, "learning_rate": 4.90954710241669e-06, "loss": 0.0014, "step": 67860 }, { "epoch": 1.1105293299517303, "grad_norm": 0.15582901239395142, "learning_rate": 4.908119434004441e-06, "loss": 0.0015, "step": 67870 }, { "epoch": 1.1106929559028063, "grad_norm": 0.06155945733189583, "learning_rate": 4.906691773085657e-06, "loss": 0.0014, "step": 67880 }, { "epoch": 1.110856581853882, "grad_norm": 0.09701423346996307, "learning_rate": 4.905264119776775e-06, "loss": 0.0007, "step": 67890 }, { "epoch": 1.1110202078049578, "grad_norm": 0.01467121671885252, "learning_rate": 4.903836474194229e-06, "loss": 0.0017, "step": 67900 }, { "epoch": 1.1111838337560338, "grad_norm": 0.032242126762866974, "learning_rate": 4.902408836454453e-06, "loss": 0.0011, "step": 67910 }, { "epoch": 1.1113474597071096, "grad_norm": 0.013647392392158508, "learning_rate": 4.90098120667388e-06, "loss": 0.001, "step": 67920 }, { "epoch": 1.1115110856581853, "grad_norm": 0.04842734709382057, "learning_rate": 4.899553584968943e-06, "loss": 0.0011, "step": 67930 }, { "epoch": 1.1116747116092611, "grad_norm": 0.060524847358465195, "learning_rate": 4.898125971456074e-06, "loss": 0.0017, "step": 67940 }, { "epoch": 1.1118383375603371, "grad_norm": 0.04990917816758156, "learning_rate": 4.896698366251703e-06, "loss": 0.0007, "step": 67950 }, { "epoch": 1.112001963511413, "grad_norm": 0.07572031766176224, "learning_rate": 4.895270769472263e-06, "loss": 0.0015, "step": 67960 }, { "epoch": 1.1121655894624887, "grad_norm": 0.09095306694507599, "learning_rate": 4.893843181234182e-06, "loss": 0.0021, "step": 67970 }, { "epoch": 1.1123292154135647, "grad_norm": 0.11911226063966751, "learning_rate": 4.892415601653891e-06, "loss": 0.0024, "step": 67980 }, { "epoch": 1.1124928413646404, "grad_norm": 0.07754993438720703, "learning_rate": 4.890988030847817e-06, "loss": 0.0017, "step": 67990 }, { "epoch": 1.1126564673157162, "grad_norm": 0.04118460789322853, "learning_rate": 4.8895604689323875e-06, "loss": 0.0012, "step": 68000 }, { "epoch": 1.1128200932667922, "grad_norm": 0.025269757956266403, "learning_rate": 4.888132916024031e-06, "loss": 0.0018, "step": 68010 }, { "epoch": 1.112983719217868, "grad_norm": 0.004718205891549587, "learning_rate": 4.886705372239174e-06, "loss": 0.003, "step": 68020 }, { "epoch": 1.1131473451689438, "grad_norm": 0.2105235904455185, "learning_rate": 4.8852778376942405e-06, "loss": 0.0027, "step": 68030 }, { "epoch": 1.1133109711200195, "grad_norm": 0.05768294632434845, "learning_rate": 4.883850312505656e-06, "loss": 0.0021, "step": 68040 }, { "epoch": 1.1134745970710955, "grad_norm": 0.07050131261348724, "learning_rate": 4.882422796789846e-06, "loss": 0.0016, "step": 68050 }, { "epoch": 1.1136382230221713, "grad_norm": 0.3542935252189636, "learning_rate": 4.880995290663231e-06, "loss": 0.0014, "step": 68060 }, { "epoch": 1.113801848973247, "grad_norm": 0.05983225256204605, "learning_rate": 4.879567794242237e-06, "loss": 0.002, "step": 68070 }, { "epoch": 1.113965474924323, "grad_norm": 0.08799834549427032, "learning_rate": 4.878140307643282e-06, "loss": 0.0015, "step": 68080 }, { "epoch": 1.1141291008753988, "grad_norm": 0.0234416201710701, "learning_rate": 4.876712830982791e-06, "loss": 0.0013, "step": 68090 }, { "epoch": 1.1142927268264746, "grad_norm": 0.1094837486743927, "learning_rate": 4.875285364377181e-06, "loss": 0.0014, "step": 68100 }, { "epoch": 1.1144563527775506, "grad_norm": 0.028089361265301704, "learning_rate": 4.873857907942872e-06, "loss": 0.0017, "step": 68110 }, { "epoch": 1.1146199787286264, "grad_norm": 0.08715226501226425, "learning_rate": 4.872430461796283e-06, "loss": 0.0013, "step": 68120 }, { "epoch": 1.1147836046797022, "grad_norm": 0.18056809902191162, "learning_rate": 4.8710030260538325e-06, "loss": 0.001, "step": 68130 }, { "epoch": 1.114947230630778, "grad_norm": 0.03907276317477226, "learning_rate": 4.869575600831936e-06, "loss": 0.0018, "step": 68140 }, { "epoch": 1.115110856581854, "grad_norm": 0.06399427354335785, "learning_rate": 4.8681481862470085e-06, "loss": 0.0018, "step": 68150 }, { "epoch": 1.1152744825329297, "grad_norm": 0.007164820097386837, "learning_rate": 4.866720782415467e-06, "loss": 0.0008, "step": 68160 }, { "epoch": 1.1154381084840055, "grad_norm": 0.02238679677248001, "learning_rate": 4.865293389453725e-06, "loss": 0.0018, "step": 68170 }, { "epoch": 1.1156017344350815, "grad_norm": 0.06743773072957993, "learning_rate": 4.863866007478197e-06, "loss": 0.0011, "step": 68180 }, { "epoch": 1.1157653603861573, "grad_norm": 0.10422379523515701, "learning_rate": 4.8624386366052925e-06, "loss": 0.0013, "step": 68190 }, { "epoch": 1.115928986337233, "grad_norm": 0.09807144105434418, "learning_rate": 4.861011276951426e-06, "loss": 0.0021, "step": 68200 }, { "epoch": 1.116092612288309, "grad_norm": 0.06299929320812225, "learning_rate": 4.859583928633007e-06, "loss": 0.001, "step": 68210 }, { "epoch": 1.1162562382393848, "grad_norm": 0.053699031472206116, "learning_rate": 4.8581565917664455e-06, "loss": 0.0012, "step": 68220 }, { "epoch": 1.1164198641904606, "grad_norm": 0.05500046908855438, "learning_rate": 4.856729266468149e-06, "loss": 0.0019, "step": 68230 }, { "epoch": 1.1165834901415364, "grad_norm": 0.23045317828655243, "learning_rate": 4.855301952854525e-06, "loss": 0.0015, "step": 68240 }, { "epoch": 1.1167471160926123, "grad_norm": 0.02562612295150757, "learning_rate": 4.853874651041983e-06, "loss": 0.001, "step": 68250 }, { "epoch": 1.1169107420436881, "grad_norm": 0.04910077154636383, "learning_rate": 4.852447361146926e-06, "loss": 0.0017, "step": 68260 }, { "epoch": 1.117074367994764, "grad_norm": 0.21876679360866547, "learning_rate": 4.851020083285761e-06, "loss": 0.0029, "step": 68270 }, { "epoch": 1.11723799394584, "grad_norm": 0.03991933539509773, "learning_rate": 4.84959281757489e-06, "loss": 0.0013, "step": 68280 }, { "epoch": 1.1174016198969157, "grad_norm": 0.04062545672059059, "learning_rate": 4.848165564130719e-06, "loss": 0.0012, "step": 68290 }, { "epoch": 1.1175652458479914, "grad_norm": 0.10241885483264923, "learning_rate": 4.846738323069647e-06, "loss": 0.0015, "step": 68300 }, { "epoch": 1.1177288717990674, "grad_norm": 0.017392035573720932, "learning_rate": 4.8453110945080764e-06, "loss": 0.0005, "step": 68310 }, { "epoch": 1.1178924977501432, "grad_norm": 0.08320208638906479, "learning_rate": 4.843883878562406e-06, "loss": 0.0022, "step": 68320 }, { "epoch": 1.118056123701219, "grad_norm": 0.031161842867732048, "learning_rate": 4.8424566753490355e-06, "loss": 0.0016, "step": 68330 }, { "epoch": 1.1182197496522948, "grad_norm": 0.04601942375302315, "learning_rate": 4.841029484984362e-06, "loss": 0.0014, "step": 68340 }, { "epoch": 1.1183833756033708, "grad_norm": 0.03770656883716583, "learning_rate": 4.839602307584783e-06, "loss": 0.0012, "step": 68350 }, { "epoch": 1.1185470015544465, "grad_norm": 0.005928488448262215, "learning_rate": 4.838175143266695e-06, "loss": 0.0022, "step": 68360 }, { "epoch": 1.1187106275055223, "grad_norm": 0.015430208295583725, "learning_rate": 4.836747992146491e-06, "loss": 0.0018, "step": 68370 }, { "epoch": 1.1188742534565983, "grad_norm": 0.0399409644305706, "learning_rate": 4.835320854340565e-06, "loss": 0.0012, "step": 68380 }, { "epoch": 1.119037879407674, "grad_norm": 0.04816249758005142, "learning_rate": 4.833893729965311e-06, "loss": 0.0014, "step": 68390 }, { "epoch": 1.1192015053587498, "grad_norm": 0.04776677489280701, "learning_rate": 4.832466619137119e-06, "loss": 0.0017, "step": 68400 }, { "epoch": 1.1193651313098258, "grad_norm": 0.05590014532208443, "learning_rate": 4.831039521972379e-06, "loss": 0.0015, "step": 68410 }, { "epoch": 1.1195287572609016, "grad_norm": 0.02409372478723526, "learning_rate": 4.829612438587481e-06, "loss": 0.0019, "step": 68420 }, { "epoch": 1.1196923832119774, "grad_norm": 0.03411567583680153, "learning_rate": 4.828185369098813e-06, "loss": 0.0015, "step": 68430 }, { "epoch": 1.1198560091630532, "grad_norm": 0.008275383152067661, "learning_rate": 4.826758313622761e-06, "loss": 0.0011, "step": 68440 }, { "epoch": 1.1200196351141292, "grad_norm": 0.5816841125488281, "learning_rate": 4.825331272275712e-06, "loss": 0.0012, "step": 68450 }, { "epoch": 1.120183261065205, "grad_norm": 0.1427685171365738, "learning_rate": 4.82390424517405e-06, "loss": 0.0016, "step": 68460 }, { "epoch": 1.1203468870162807, "grad_norm": 0.01641976833343506, "learning_rate": 4.822477232434158e-06, "loss": 0.001, "step": 68470 }, { "epoch": 1.1205105129673567, "grad_norm": 0.0024519511498510838, "learning_rate": 4.82105023417242e-06, "loss": 0.0014, "step": 68480 }, { "epoch": 1.1206741389184325, "grad_norm": 0.1062808409333229, "learning_rate": 4.819623250505216e-06, "loss": 0.0021, "step": 68490 }, { "epoch": 1.1208377648695083, "grad_norm": 0.06138540431857109, "learning_rate": 4.818196281548925e-06, "loss": 0.0014, "step": 68500 }, { "epoch": 1.121001390820584, "grad_norm": 0.05452272295951843, "learning_rate": 4.816769327419928e-06, "loss": 0.0013, "step": 68510 }, { "epoch": 1.12116501677166, "grad_norm": 0.056255191564559937, "learning_rate": 4.8153423882346005e-06, "loss": 0.0013, "step": 68520 }, { "epoch": 1.1213286427227358, "grad_norm": 0.00764923682436347, "learning_rate": 4.813915464109321e-06, "loss": 0.0021, "step": 68530 }, { "epoch": 1.1214922686738116, "grad_norm": 0.09291795641183853, "learning_rate": 4.812488555160461e-06, "loss": 0.0016, "step": 68540 }, { "epoch": 1.1216558946248876, "grad_norm": 0.06420211493968964, "learning_rate": 4.811061661504398e-06, "loss": 0.0015, "step": 68550 }, { "epoch": 1.1218195205759633, "grad_norm": 0.05445067211985588, "learning_rate": 4.809634783257502e-06, "loss": 0.002, "step": 68560 }, { "epoch": 1.1219831465270391, "grad_norm": 0.09114896506071091, "learning_rate": 4.808207920536146e-06, "loss": 0.0024, "step": 68570 }, { "epoch": 1.1221467724781151, "grad_norm": 0.06437831372022629, "learning_rate": 4.8067810734566996e-06, "loss": 0.0015, "step": 68580 }, { "epoch": 1.122310398429191, "grad_norm": 0.03637402132153511, "learning_rate": 4.805354242135531e-06, "loss": 0.0024, "step": 68590 }, { "epoch": 1.1224740243802667, "grad_norm": 0.027112627401947975, "learning_rate": 4.803927426689009e-06, "loss": 0.0023, "step": 68600 }, { "epoch": 1.1226376503313427, "grad_norm": 0.2992970049381256, "learning_rate": 4.802500627233498e-06, "loss": 0.0017, "step": 68610 }, { "epoch": 1.1228012762824184, "grad_norm": 0.010519352741539478, "learning_rate": 4.8010738438853635e-06, "loss": 0.0012, "step": 68620 }, { "epoch": 1.1229649022334942, "grad_norm": 0.004247406963258982, "learning_rate": 4.79964707676097e-06, "loss": 0.0015, "step": 68630 }, { "epoch": 1.12312852818457, "grad_norm": 0.02733306773006916, "learning_rate": 4.79822032597668e-06, "loss": 0.0019, "step": 68640 }, { "epoch": 1.123292154135646, "grad_norm": 0.06895174086093903, "learning_rate": 4.796793591648853e-06, "loss": 0.0019, "step": 68650 }, { "epoch": 1.1234557800867218, "grad_norm": 0.04173388332128525, "learning_rate": 4.79536687389385e-06, "loss": 0.0015, "step": 68660 }, { "epoch": 1.1236194060377975, "grad_norm": 0.032186008989810944, "learning_rate": 4.793940172828028e-06, "loss": 0.0012, "step": 68670 }, { "epoch": 1.1237830319888735, "grad_norm": 0.07761038094758987, "learning_rate": 4.792513488567743e-06, "loss": 0.0009, "step": 68680 }, { "epoch": 1.1239466579399493, "grad_norm": 0.14062942564487457, "learning_rate": 4.791086821229355e-06, "loss": 0.0017, "step": 68690 }, { "epoch": 1.124110283891025, "grad_norm": 0.04103397578001022, "learning_rate": 4.789660170929213e-06, "loss": 0.0011, "step": 68700 }, { "epoch": 1.1242739098421008, "grad_norm": 0.01348420511931181, "learning_rate": 4.788233537783672e-06, "loss": 0.0008, "step": 68710 }, { "epoch": 1.1244375357931768, "grad_norm": 0.011913171038031578, "learning_rate": 4.786806921909084e-06, "loss": 0.0012, "step": 68720 }, { "epoch": 1.1246011617442526, "grad_norm": 0.037176214158535004, "learning_rate": 4.785380323421797e-06, "loss": 0.0012, "step": 68730 }, { "epoch": 1.1247647876953284, "grad_norm": 0.013256115838885307, "learning_rate": 4.783953742438161e-06, "loss": 0.0014, "step": 68740 }, { "epoch": 1.1249284136464044, "grad_norm": 0.01954822614789009, "learning_rate": 4.782527179074523e-06, "loss": 0.0015, "step": 68750 }, { "epoch": 1.1250920395974802, "grad_norm": 0.05509534478187561, "learning_rate": 4.781100633447228e-06, "loss": 0.0019, "step": 68760 }, { "epoch": 1.125255665548556, "grad_norm": 0.009375140070915222, "learning_rate": 4.779674105672621e-06, "loss": 0.0016, "step": 68770 }, { "epoch": 1.125419291499632, "grad_norm": 0.06476373225450516, "learning_rate": 4.7782475958670435e-06, "loss": 0.0013, "step": 68780 }, { "epoch": 1.1255829174507077, "grad_norm": 0.015310265123844147, "learning_rate": 4.776821104146839e-06, "loss": 0.0014, "step": 68790 }, { "epoch": 1.1257465434017835, "grad_norm": 0.06258490681648254, "learning_rate": 4.7753946306283446e-06, "loss": 0.0026, "step": 68800 }, { "epoch": 1.1259101693528595, "grad_norm": 0.007618163712322712, "learning_rate": 4.773968175427901e-06, "loss": 0.001, "step": 68810 }, { "epoch": 1.1260737953039353, "grad_norm": 0.11507266759872437, "learning_rate": 4.772541738661844e-06, "loss": 0.0012, "step": 68820 }, { "epoch": 1.126237421255011, "grad_norm": 0.021891871467232704, "learning_rate": 4.771115320446508e-06, "loss": 0.0015, "step": 68830 }, { "epoch": 1.1264010472060868, "grad_norm": 0.041682783514261246, "learning_rate": 4.7696889208982275e-06, "loss": 0.0026, "step": 68840 }, { "epoch": 1.1265646731571628, "grad_norm": 0.016159404069185257, "learning_rate": 4.768262540133337e-06, "loss": 0.001, "step": 68850 }, { "epoch": 1.1267282991082386, "grad_norm": 0.07486572116613388, "learning_rate": 4.766836178268163e-06, "loss": 0.0016, "step": 68860 }, { "epoch": 1.1268919250593143, "grad_norm": 0.025512000545859337, "learning_rate": 4.765409835419039e-06, "loss": 0.0014, "step": 68870 }, { "epoch": 1.1270555510103903, "grad_norm": 0.1999233365058899, "learning_rate": 4.76398351170229e-06, "loss": 0.0018, "step": 68880 }, { "epoch": 1.1272191769614661, "grad_norm": 0.07580450177192688, "learning_rate": 4.762557207234242e-06, "loss": 0.0009, "step": 68890 }, { "epoch": 1.127382802912542, "grad_norm": 0.0494706891477108, "learning_rate": 4.761130922131221e-06, "loss": 0.0013, "step": 68900 }, { "epoch": 1.1275464288636177, "grad_norm": 0.06407617777585983, "learning_rate": 4.759704656509549e-06, "loss": 0.0006, "step": 68910 }, { "epoch": 1.1277100548146937, "grad_norm": 0.0746607854962349, "learning_rate": 4.758278410485547e-06, "loss": 0.001, "step": 68920 }, { "epoch": 1.1278736807657694, "grad_norm": 0.02848961390554905, "learning_rate": 4.756852184175537e-06, "loss": 0.0016, "step": 68930 }, { "epoch": 1.1280373067168452, "grad_norm": 0.04006713628768921, "learning_rate": 4.755425977695834e-06, "loss": 0.0026, "step": 68940 }, { "epoch": 1.1282009326679212, "grad_norm": 0.044217657297849655, "learning_rate": 4.753999791162757e-06, "loss": 0.0011, "step": 68950 }, { "epoch": 1.128364558618997, "grad_norm": 0.0506523996591568, "learning_rate": 4.75257362469262e-06, "loss": 0.0023, "step": 68960 }, { "epoch": 1.1285281845700728, "grad_norm": 0.05456908047199249, "learning_rate": 4.7511474784017365e-06, "loss": 0.0014, "step": 68970 }, { "epoch": 1.1286918105211488, "grad_norm": 0.2570742666721344, "learning_rate": 4.749721352406418e-06, "loss": 0.0023, "step": 68980 }, { "epoch": 1.1288554364722245, "grad_norm": 0.02927926741540432, "learning_rate": 4.7482952468229745e-06, "loss": 0.0014, "step": 68990 }, { "epoch": 1.1290190624233003, "grad_norm": 0.059277646243572235, "learning_rate": 4.746869161767714e-06, "loss": 0.001, "step": 69000 }, { "epoch": 1.1291826883743763, "grad_norm": 0.039500642567873, "learning_rate": 4.745443097356943e-06, "loss": 0.001, "step": 69010 }, { "epoch": 1.129346314325452, "grad_norm": 0.038523104041814804, "learning_rate": 4.744017053706967e-06, "loss": 0.0028, "step": 69020 }, { "epoch": 1.1295099402765278, "grad_norm": 0.14120125770568848, "learning_rate": 4.74259103093409e-06, "loss": 0.0029, "step": 69030 }, { "epoch": 1.1296735662276036, "grad_norm": 0.014275015331804752, "learning_rate": 4.741165029154612e-06, "loss": 0.0016, "step": 69040 }, { "epoch": 1.1298371921786796, "grad_norm": 0.031047916039824486, "learning_rate": 4.739739048484834e-06, "loss": 0.0013, "step": 69050 }, { "epoch": 1.1300008181297554, "grad_norm": 0.02242126315832138, "learning_rate": 4.7383130890410535e-06, "loss": 0.0016, "step": 69060 }, { "epoch": 1.1301644440808312, "grad_norm": 0.036595042794942856, "learning_rate": 4.736887150939568e-06, "loss": 0.0012, "step": 69070 }, { "epoch": 1.130328070031907, "grad_norm": 0.05457611009478569, "learning_rate": 4.73546123429667e-06, "loss": 0.0012, "step": 69080 }, { "epoch": 1.130491695982983, "grad_norm": 0.02000110037624836, "learning_rate": 4.734035339228655e-06, "loss": 0.0015, "step": 69090 }, { "epoch": 1.1306553219340587, "grad_norm": 0.046778496354818344, "learning_rate": 4.732609465851812e-06, "loss": 0.0017, "step": 69100 }, { "epoch": 1.1308189478851345, "grad_norm": 0.051911722868680954, "learning_rate": 4.731183614282431e-06, "loss": 0.0013, "step": 69110 }, { "epoch": 1.1309825738362105, "grad_norm": 0.07886985689401627, "learning_rate": 4.7297577846367996e-06, "loss": 0.0019, "step": 69120 }, { "epoch": 1.1311461997872863, "grad_norm": 0.06864001601934433, "learning_rate": 4.728331977031205e-06, "loss": 0.0014, "step": 69130 }, { "epoch": 1.131309825738362, "grad_norm": 0.016339551657438278, "learning_rate": 4.726906191581929e-06, "loss": 0.0015, "step": 69140 }, { "epoch": 1.131473451689438, "grad_norm": 0.07824890315532684, "learning_rate": 4.725480428405255e-06, "loss": 0.0014, "step": 69150 }, { "epoch": 1.1316370776405138, "grad_norm": 0.042329344898462296, "learning_rate": 4.724054687617464e-06, "loss": 0.001, "step": 69160 }, { "epoch": 1.1318007035915896, "grad_norm": 0.07561182230710983, "learning_rate": 4.722628969334833e-06, "loss": 0.0014, "step": 69170 }, { "epoch": 1.1319643295426656, "grad_norm": 0.05738131329417229, "learning_rate": 4.721203273673641e-06, "loss": 0.0015, "step": 69180 }, { "epoch": 1.1321279554937413, "grad_norm": 0.14139309525489807, "learning_rate": 4.7197776007501605e-06, "loss": 0.0016, "step": 69190 }, { "epoch": 1.1322915814448171, "grad_norm": 0.0735301598906517, "learning_rate": 4.7183519506806655e-06, "loss": 0.0008, "step": 69200 }, { "epoch": 1.132455207395893, "grad_norm": 0.03328855335712433, "learning_rate": 4.7169263235814275e-06, "loss": 0.0021, "step": 69210 }, { "epoch": 1.132618833346969, "grad_norm": 0.09768014401197433, "learning_rate": 4.715500719568715e-06, "loss": 0.0013, "step": 69220 }, { "epoch": 1.1327824592980447, "grad_norm": 0.031685058027505875, "learning_rate": 4.7140751387587955e-06, "loss": 0.0011, "step": 69230 }, { "epoch": 1.1329460852491204, "grad_norm": 0.057616978883743286, "learning_rate": 4.712649581267935e-06, "loss": 0.0014, "step": 69240 }, { "epoch": 1.1331097112001964, "grad_norm": 0.05230008810758591, "learning_rate": 4.711224047212397e-06, "loss": 0.0015, "step": 69250 }, { "epoch": 1.1332733371512722, "grad_norm": 0.07187466323375702, "learning_rate": 4.709798536708444e-06, "loss": 0.0012, "step": 69260 }, { "epoch": 1.133436963102348, "grad_norm": 0.12507106363773346, "learning_rate": 4.708373049872334e-06, "loss": 0.0024, "step": 69270 }, { "epoch": 1.1336005890534238, "grad_norm": 0.019464466720819473, "learning_rate": 4.706947586820327e-06, "loss": 0.0014, "step": 69280 }, { "epoch": 1.1337642150044998, "grad_norm": 0.05887354165315628, "learning_rate": 4.705522147668677e-06, "loss": 0.0008, "step": 69290 }, { "epoch": 1.1339278409555755, "grad_norm": 0.033978305757045746, "learning_rate": 4.704096732533638e-06, "loss": 0.0009, "step": 69300 }, { "epoch": 1.1340914669066513, "grad_norm": 0.034490495920181274, "learning_rate": 4.702671341531464e-06, "loss": 0.0013, "step": 69310 }, { "epoch": 1.1342550928577273, "grad_norm": 0.08320074528455734, "learning_rate": 4.701245974778403e-06, "loss": 0.0017, "step": 69320 }, { "epoch": 1.134418718808803, "grad_norm": 0.10139095783233643, "learning_rate": 4.699820632390705e-06, "loss": 0.0012, "step": 69330 }, { "epoch": 1.1345823447598788, "grad_norm": 0.027408484369516373, "learning_rate": 4.698395314484613e-06, "loss": 0.001, "step": 69340 }, { "epoch": 1.1347459707109548, "grad_norm": 0.02187284454703331, "learning_rate": 4.696970021176375e-06, "loss": 0.0013, "step": 69350 }, { "epoch": 1.1349095966620306, "grad_norm": 0.13004529476165771, "learning_rate": 4.69554475258223e-06, "loss": 0.0016, "step": 69360 }, { "epoch": 1.1350732226131064, "grad_norm": 0.04210156202316284, "learning_rate": 4.694119508818419e-06, "loss": 0.0012, "step": 69370 }, { "epoch": 1.1352368485641824, "grad_norm": 0.07349762320518494, "learning_rate": 4.692694290001181e-06, "loss": 0.0014, "step": 69380 }, { "epoch": 1.1354004745152582, "grad_norm": 0.03395136445760727, "learning_rate": 4.69126909624675e-06, "loss": 0.0022, "step": 69390 }, { "epoch": 1.135564100466334, "grad_norm": 0.038457222282886505, "learning_rate": 4.689843927671362e-06, "loss": 0.0016, "step": 69400 }, { "epoch": 1.1357277264174097, "grad_norm": 0.032105591148138046, "learning_rate": 4.688418784391247e-06, "loss": 0.0023, "step": 69410 }, { "epoch": 1.1358913523684857, "grad_norm": 0.0332166850566864, "learning_rate": 4.686993666522637e-06, "loss": 0.0011, "step": 69420 }, { "epoch": 1.1360549783195615, "grad_norm": 0.17723166942596436, "learning_rate": 4.685568574181758e-06, "loss": 0.0015, "step": 69430 }, { "epoch": 1.1362186042706373, "grad_norm": 0.057184819132089615, "learning_rate": 4.6841435074848376e-06, "loss": 0.001, "step": 69440 }, { "epoch": 1.1363822302217133, "grad_norm": 0.15394125878810883, "learning_rate": 4.682718466548096e-06, "loss": 0.0013, "step": 69450 }, { "epoch": 1.136545856172789, "grad_norm": 0.015407206490635872, "learning_rate": 4.6812934514877585e-06, "loss": 0.0026, "step": 69460 }, { "epoch": 1.1367094821238648, "grad_norm": 0.06906851381063461, "learning_rate": 4.679868462420042e-06, "loss": 0.0017, "step": 69470 }, { "epoch": 1.1368731080749406, "grad_norm": 0.036438170820474625, "learning_rate": 4.678443499461164e-06, "loss": 0.0013, "step": 69480 }, { "epoch": 1.1370367340260166, "grad_norm": 0.05990985035896301, "learning_rate": 4.677018562727341e-06, "loss": 0.0019, "step": 69490 }, { "epoch": 1.1372003599770923, "grad_norm": 0.06944531947374344, "learning_rate": 4.675593652334786e-06, "loss": 0.0014, "step": 69500 }, { "epoch": 1.1373639859281681, "grad_norm": 0.023260870948433876, "learning_rate": 4.674168768399708e-06, "loss": 0.0013, "step": 69510 }, { "epoch": 1.1375276118792441, "grad_norm": 0.11983196437358856, "learning_rate": 4.672743911038316e-06, "loss": 0.0019, "step": 69520 }, { "epoch": 1.13769123783032, "grad_norm": 0.005170944146811962, "learning_rate": 4.671319080366819e-06, "loss": 0.0018, "step": 69530 }, { "epoch": 1.1378548637813957, "grad_norm": 0.040983088314533234, "learning_rate": 4.669894276501418e-06, "loss": 0.0014, "step": 69540 }, { "epoch": 1.1380184897324717, "grad_norm": 0.012690916657447815, "learning_rate": 4.6684694995583165e-06, "loss": 0.0011, "step": 69550 }, { "epoch": 1.1381821156835474, "grad_norm": 0.10029805451631546, "learning_rate": 4.6670447496537154e-06, "loss": 0.0009, "step": 69560 }, { "epoch": 1.1383457416346232, "grad_norm": 0.031852830201387405, "learning_rate": 4.6656200269038115e-06, "loss": 0.0013, "step": 69570 }, { "epoch": 1.1385093675856992, "grad_norm": 0.014501173980534077, "learning_rate": 4.664195331424801e-06, "loss": 0.0013, "step": 69580 }, { "epoch": 1.138672993536775, "grad_norm": 0.047503355890512466, "learning_rate": 4.662770663332876e-06, "loss": 0.0019, "step": 69590 }, { "epoch": 1.1388366194878508, "grad_norm": 0.11540369689464569, "learning_rate": 4.661346022744229e-06, "loss": 0.0006, "step": 69600 }, { "epoch": 1.1390002454389265, "grad_norm": 0.015183909796178341, "learning_rate": 4.659921409775047e-06, "loss": 0.0011, "step": 69610 }, { "epoch": 1.1391638713900025, "grad_norm": 0.028284432366490364, "learning_rate": 4.658496824541518e-06, "loss": 0.0013, "step": 69620 }, { "epoch": 1.1393274973410783, "grad_norm": 0.03630579262971878, "learning_rate": 4.657072267159828e-06, "loss": 0.0014, "step": 69630 }, { "epoch": 1.139491123292154, "grad_norm": 0.01199339423328638, "learning_rate": 4.655647737746155e-06, "loss": 0.0014, "step": 69640 }, { "epoch": 1.13965474924323, "grad_norm": 0.046971119940280914, "learning_rate": 4.654223236416682e-06, "loss": 0.0023, "step": 69650 }, { "epoch": 1.1398183751943058, "grad_norm": 0.025801360607147217, "learning_rate": 4.652798763287585e-06, "loss": 0.0012, "step": 69660 }, { "epoch": 1.1399820011453816, "grad_norm": 0.03047754429280758, "learning_rate": 4.65137431847504e-06, "loss": 0.0014, "step": 69670 }, { "epoch": 1.1401456270964574, "grad_norm": 0.06464429944753647, "learning_rate": 4.6499499020952185e-06, "loss": 0.0023, "step": 69680 }, { "epoch": 1.1403092530475334, "grad_norm": 0.12241919338703156, "learning_rate": 4.648525514264293e-06, "loss": 0.002, "step": 69690 }, { "epoch": 1.1404728789986092, "grad_norm": 0.05516223981976509, "learning_rate": 4.64710115509843e-06, "loss": 0.0017, "step": 69700 }, { "epoch": 1.140636504949685, "grad_norm": 0.08677995204925537, "learning_rate": 4.645676824713797e-06, "loss": 0.0014, "step": 69710 }, { "epoch": 1.140800130900761, "grad_norm": 0.051214709877967834, "learning_rate": 4.644252523226556e-06, "loss": 0.0015, "step": 69720 }, { "epoch": 1.1409637568518367, "grad_norm": 0.044149212539196014, "learning_rate": 4.64282825075287e-06, "loss": 0.0012, "step": 69730 }, { "epoch": 1.1411273828029125, "grad_norm": 0.0165726225823164, "learning_rate": 4.641404007408895e-06, "loss": 0.0008, "step": 69740 }, { "epoch": 1.1412910087539885, "grad_norm": 0.04800814017653465, "learning_rate": 4.63997979331079e-06, "loss": 0.002, "step": 69750 }, { "epoch": 1.1414546347050643, "grad_norm": 0.055821534246206284, "learning_rate": 4.638555608574708e-06, "loss": 0.0014, "step": 69760 }, { "epoch": 1.14161826065614, "grad_norm": 0.1380537748336792, "learning_rate": 4.6371314533168e-06, "loss": 0.0015, "step": 69770 }, { "epoch": 1.141781886607216, "grad_norm": 0.018859386444091797, "learning_rate": 4.635707327653218e-06, "loss": 0.0013, "step": 69780 }, { "epoch": 1.1419455125582918, "grad_norm": 0.05863795429468155, "learning_rate": 4.6342832317001054e-06, "loss": 0.0015, "step": 69790 }, { "epoch": 1.1421091385093676, "grad_norm": 0.05261906236410141, "learning_rate": 4.632859165573609e-06, "loss": 0.0009, "step": 69800 }, { "epoch": 1.1422727644604433, "grad_norm": 0.024221068248152733, "learning_rate": 4.631435129389869e-06, "loss": 0.0016, "step": 69810 }, { "epoch": 1.1424363904115193, "grad_norm": 0.14689457416534424, "learning_rate": 4.630011123265028e-06, "loss": 0.0016, "step": 69820 }, { "epoch": 1.1426000163625951, "grad_norm": 0.11523108184337616, "learning_rate": 4.628587147315219e-06, "loss": 0.0013, "step": 69830 }, { "epoch": 1.142763642313671, "grad_norm": 0.04516945034265518, "learning_rate": 4.627163201656579e-06, "loss": 0.0013, "step": 69840 }, { "epoch": 1.1429272682647467, "grad_norm": 0.03280774876475334, "learning_rate": 4.62573928640524e-06, "loss": 0.0012, "step": 69850 }, { "epoch": 1.1430908942158227, "grad_norm": 0.11012667417526245, "learning_rate": 4.624315401677331e-06, "loss": 0.0014, "step": 69860 }, { "epoch": 1.1432545201668984, "grad_norm": 0.04875332489609718, "learning_rate": 4.6228915475889795e-06, "loss": 0.0021, "step": 69870 }, { "epoch": 1.1434181461179742, "grad_norm": 0.08149484544992447, "learning_rate": 4.621467724256311e-06, "loss": 0.0012, "step": 69880 }, { "epoch": 1.1435817720690502, "grad_norm": 0.04188698157668114, "learning_rate": 4.620043931795446e-06, "loss": 0.001, "step": 69890 }, { "epoch": 1.143745398020126, "grad_norm": 0.008410933427512646, "learning_rate": 4.618620170322506e-06, "loss": 0.0016, "step": 69900 }, { "epoch": 1.1439090239712018, "grad_norm": 0.12317752838134766, "learning_rate": 4.617196439953608e-06, "loss": 0.0024, "step": 69910 }, { "epoch": 1.1440726499222778, "grad_norm": 0.2302798479795456, "learning_rate": 4.615772740804866e-06, "loss": 0.0015, "step": 69920 }, { "epoch": 1.1442362758733535, "grad_norm": 0.03326624631881714, "learning_rate": 4.61434907299239e-06, "loss": 0.0026, "step": 69930 }, { "epoch": 1.1443999018244293, "grad_norm": 0.029295239597558975, "learning_rate": 4.612925436632293e-06, "loss": 0.0008, "step": 69940 }, { "epoch": 1.1445635277755053, "grad_norm": 0.06539978086948395, "learning_rate": 4.61150183184068e-06, "loss": 0.0011, "step": 69950 }, { "epoch": 1.144727153726581, "grad_norm": 0.038999684154987335, "learning_rate": 4.6100782587336566e-06, "loss": 0.0014, "step": 69960 }, { "epoch": 1.1448907796776568, "grad_norm": 0.10136260837316513, "learning_rate": 4.608654717427323e-06, "loss": 0.0023, "step": 69970 }, { "epoch": 1.1450544056287328, "grad_norm": 0.04178288206458092, "learning_rate": 4.607231208037779e-06, "loss": 0.0012, "step": 69980 }, { "epoch": 1.1452180315798086, "grad_norm": 0.04721640795469284, "learning_rate": 4.605807730681122e-06, "loss": 0.0016, "step": 69990 }, { "epoch": 1.1453816575308844, "grad_norm": 0.0385037362575531, "learning_rate": 4.604384285473445e-06, "loss": 0.0008, "step": 70000 }, { "epoch": 1.1455452834819602, "grad_norm": 0.09557905048131943, "learning_rate": 4.602960872530839e-06, "loss": 0.0013, "step": 70010 }, { "epoch": 1.1457089094330362, "grad_norm": 0.036662496626377106, "learning_rate": 4.601537491969394e-06, "loss": 0.0011, "step": 70020 }, { "epoch": 1.145872535384112, "grad_norm": 0.03240806236863136, "learning_rate": 4.600114143905196e-06, "loss": 0.001, "step": 70030 }, { "epoch": 1.1460361613351877, "grad_norm": 0.03840579465031624, "learning_rate": 4.598690828454327e-06, "loss": 0.0013, "step": 70040 }, { "epoch": 1.1461997872862635, "grad_norm": 0.002582667162641883, "learning_rate": 4.597267545732869e-06, "loss": 0.0012, "step": 70050 }, { "epoch": 1.1463634132373395, "grad_norm": 0.04660514369606972, "learning_rate": 4.5958442958569e-06, "loss": 0.0014, "step": 70060 }, { "epoch": 1.1465270391884153, "grad_norm": 0.17103545367717743, "learning_rate": 4.594421078942496e-06, "loss": 0.0015, "step": 70070 }, { "epoch": 1.146690665139491, "grad_norm": 0.11617986112833023, "learning_rate": 4.592997895105728e-06, "loss": 0.0032, "step": 70080 }, { "epoch": 1.146854291090567, "grad_norm": 0.017251331359148026, "learning_rate": 4.591574744462666e-06, "loss": 0.0023, "step": 70090 }, { "epoch": 1.1470179170416428, "grad_norm": 0.3622962236404419, "learning_rate": 4.59015162712938e-06, "loss": 0.0015, "step": 70100 }, { "epoch": 1.1471815429927186, "grad_norm": 0.01872352510690689, "learning_rate": 4.588728543221932e-06, "loss": 0.0016, "step": 70110 }, { "epoch": 1.1473451689437946, "grad_norm": 0.09341420233249664, "learning_rate": 4.587305492856385e-06, "loss": 0.0018, "step": 70120 }, { "epoch": 1.1475087948948703, "grad_norm": 0.05101931467652321, "learning_rate": 4.585882476148797e-06, "loss": 0.0009, "step": 70130 }, { "epoch": 1.1476724208459461, "grad_norm": 0.15150775015354156, "learning_rate": 4.584459493215228e-06, "loss": 0.0019, "step": 70140 }, { "epoch": 1.1478360467970221, "grad_norm": 0.06719735264778137, "learning_rate": 4.583036544171726e-06, "loss": 0.001, "step": 70150 }, { "epoch": 1.1479996727480979, "grad_norm": 0.042329829186201096, "learning_rate": 4.581613629134346e-06, "loss": 0.0015, "step": 70160 }, { "epoch": 1.1481632986991737, "grad_norm": 0.05940350517630577, "learning_rate": 4.580190748219135e-06, "loss": 0.001, "step": 70170 }, { "epoch": 1.1483269246502494, "grad_norm": 0.04276195168495178, "learning_rate": 4.578767901542138e-06, "loss": 0.0013, "step": 70180 }, { "epoch": 1.1484905506013254, "grad_norm": 0.07646219432353973, "learning_rate": 4.577345089219397e-06, "loss": 0.0015, "step": 70190 }, { "epoch": 1.1486541765524012, "grad_norm": 0.003967093303799629, "learning_rate": 4.575922311366954e-06, "loss": 0.0025, "step": 70200 }, { "epoch": 1.148817802503477, "grad_norm": 0.12254655361175537, "learning_rate": 4.574499568100843e-06, "loss": 0.0017, "step": 70210 }, { "epoch": 1.148981428454553, "grad_norm": 0.05390444025397301, "learning_rate": 4.5730768595371005e-06, "loss": 0.0031, "step": 70220 }, { "epoch": 1.1491450544056288, "grad_norm": 0.01958387717604637, "learning_rate": 4.571654185791757e-06, "loss": 0.0016, "step": 70230 }, { "epoch": 1.1493086803567045, "grad_norm": 0.014004549011588097, "learning_rate": 4.57023154698084e-06, "loss": 0.0016, "step": 70240 }, { "epoch": 1.1494723063077803, "grad_norm": 0.2683558762073517, "learning_rate": 4.568808943220376e-06, "loss": 0.0017, "step": 70250 }, { "epoch": 1.1496359322588563, "grad_norm": 0.0585932619869709, "learning_rate": 4.567386374626388e-06, "loss": 0.0018, "step": 70260 }, { "epoch": 1.149799558209932, "grad_norm": 0.038936033844947815, "learning_rate": 4.565963841314895e-06, "loss": 0.001, "step": 70270 }, { "epoch": 1.1499631841610078, "grad_norm": 0.04698233678936958, "learning_rate": 4.564541343401914e-06, "loss": 0.0014, "step": 70280 }, { "epoch": 1.1501268101120838, "grad_norm": 0.059814032167196274, "learning_rate": 4.563118881003461e-06, "loss": 0.002, "step": 70290 }, { "epoch": 1.1502904360631596, "grad_norm": 0.04966720938682556, "learning_rate": 4.561696454235544e-06, "loss": 0.002, "step": 70300 }, { "epoch": 1.1504540620142354, "grad_norm": 0.061718229204416275, "learning_rate": 4.560274063214174e-06, "loss": 0.0012, "step": 70310 }, { "epoch": 1.1506176879653114, "grad_norm": 0.010380145162343979, "learning_rate": 4.558851708055355e-06, "loss": 0.0011, "step": 70320 }, { "epoch": 1.1507813139163872, "grad_norm": 0.08529224991798401, "learning_rate": 4.557429388875089e-06, "loss": 0.0022, "step": 70330 }, { "epoch": 1.150944939867463, "grad_norm": 0.025229481980204582, "learning_rate": 4.556007105789377e-06, "loss": 0.0018, "step": 70340 }, { "epoch": 1.151108565818539, "grad_norm": 0.03809288144111633, "learning_rate": 4.554584858914215e-06, "loss": 0.0021, "step": 70350 }, { "epoch": 1.1512721917696147, "grad_norm": 0.03015618771314621, "learning_rate": 4.553162648365596e-06, "loss": 0.0015, "step": 70360 }, { "epoch": 1.1514358177206905, "grad_norm": 0.13260255753993988, "learning_rate": 4.5517404742595115e-06, "loss": 0.0013, "step": 70370 }, { "epoch": 1.1515994436717663, "grad_norm": 0.06396839767694473, "learning_rate": 4.550318336711949e-06, "loss": 0.0013, "step": 70380 }, { "epoch": 1.1517630696228423, "grad_norm": 0.06370850652456284, "learning_rate": 4.548896235838893e-06, "loss": 0.0006, "step": 70390 }, { "epoch": 1.151926695573918, "grad_norm": 0.0769275575876236, "learning_rate": 4.547474171756324e-06, "loss": 0.0014, "step": 70400 }, { "epoch": 1.1520903215249938, "grad_norm": 0.03137046843767166, "learning_rate": 4.546052144580224e-06, "loss": 0.0019, "step": 70410 }, { "epoch": 1.1522539474760698, "grad_norm": 0.10049285739660263, "learning_rate": 4.5446301544265645e-06, "loss": 0.0009, "step": 70420 }, { "epoch": 1.1524175734271456, "grad_norm": 0.06732705235481262, "learning_rate": 4.543208201411321e-06, "loss": 0.001, "step": 70430 }, { "epoch": 1.1525811993782213, "grad_norm": 0.06824111938476562, "learning_rate": 4.541786285650463e-06, "loss": 0.0018, "step": 70440 }, { "epoch": 1.1527448253292971, "grad_norm": 0.02567416988313198, "learning_rate": 4.540364407259957e-06, "loss": 0.0019, "step": 70450 }, { "epoch": 1.1529084512803731, "grad_norm": 0.11214583367109299, "learning_rate": 4.538942566355765e-06, "loss": 0.0014, "step": 70460 }, { "epoch": 1.1530720772314489, "grad_norm": 0.034722182899713516, "learning_rate": 4.53752076305385e-06, "loss": 0.0013, "step": 70470 }, { "epoch": 1.1532357031825247, "grad_norm": 1.3014166355133057, "learning_rate": 4.536098997470168e-06, "loss": 0.0009, "step": 70480 }, { "epoch": 1.1533993291336007, "grad_norm": 0.1458534449338913, "learning_rate": 4.534677269720672e-06, "loss": 0.001, "step": 70490 }, { "epoch": 1.1535629550846764, "grad_norm": 0.05178714916110039, "learning_rate": 4.533255579921315e-06, "loss": 0.0016, "step": 70500 }, { "epoch": 1.1537265810357522, "grad_norm": 0.0254295002669096, "learning_rate": 4.531833928188046e-06, "loss": 0.0011, "step": 70510 }, { "epoch": 1.1538902069868282, "grad_norm": 0.05432586371898651, "learning_rate": 4.530412314636808e-06, "loss": 0.0012, "step": 70520 }, { "epoch": 1.154053832937904, "grad_norm": 0.04637228697538376, "learning_rate": 4.528990739383544e-06, "loss": 0.0011, "step": 70530 }, { "epoch": 1.1542174588889798, "grad_norm": 0.03168310970067978, "learning_rate": 4.527569202544193e-06, "loss": 0.0015, "step": 70540 }, { "epoch": 1.1543810848400557, "grad_norm": 0.060829151421785355, "learning_rate": 4.526147704234691e-06, "loss": 0.0013, "step": 70550 }, { "epoch": 1.1545447107911315, "grad_norm": 0.06875636428594589, "learning_rate": 4.524726244570969e-06, "loss": 0.001, "step": 70560 }, { "epoch": 1.1547083367422073, "grad_norm": 0.05969448760151863, "learning_rate": 4.5233048236689584e-06, "loss": 0.0011, "step": 70570 }, { "epoch": 1.154871962693283, "grad_norm": 0.005917946342378855, "learning_rate": 4.521883441644583e-06, "loss": 0.0015, "step": 70580 }, { "epoch": 1.155035588644359, "grad_norm": 0.028081731870770454, "learning_rate": 4.520462098613769e-06, "loss": 0.0013, "step": 70590 }, { "epoch": 1.1551992145954348, "grad_norm": 0.039777081459760666, "learning_rate": 4.519040794692434e-06, "loss": 0.0018, "step": 70600 }, { "epoch": 1.1553628405465106, "grad_norm": 0.36060217022895813, "learning_rate": 4.517619529996496e-06, "loss": 0.0022, "step": 70610 }, { "epoch": 1.1555264664975866, "grad_norm": 0.1988147497177124, "learning_rate": 4.516198304641867e-06, "loss": 0.0019, "step": 70620 }, { "epoch": 1.1556900924486624, "grad_norm": 0.011198713444173336, "learning_rate": 4.5147771187444595e-06, "loss": 0.0018, "step": 70630 }, { "epoch": 1.1558537183997382, "grad_norm": 0.04681457206606865, "learning_rate": 4.513355972420178e-06, "loss": 0.0007, "step": 70640 }, { "epoch": 1.156017344350814, "grad_norm": 0.01267332024872303, "learning_rate": 4.511934865784929e-06, "loss": 0.0017, "step": 70650 }, { "epoch": 1.15618097030189, "grad_norm": 0.08585552126169205, "learning_rate": 4.510513798954611e-06, "loss": 0.0019, "step": 70660 }, { "epoch": 1.1563445962529657, "grad_norm": 0.03173115476965904, "learning_rate": 4.5090927720451225e-06, "loss": 0.0007, "step": 70670 }, { "epoch": 1.1565082222040415, "grad_norm": 0.0903201550245285, "learning_rate": 4.5076717851723565e-06, "loss": 0.0013, "step": 70680 }, { "epoch": 1.1566718481551175, "grad_norm": 0.0177631638944149, "learning_rate": 4.506250838452206e-06, "loss": 0.0014, "step": 70690 }, { "epoch": 1.1568354741061933, "grad_norm": 0.2032691240310669, "learning_rate": 4.504829932000556e-06, "loss": 0.0023, "step": 70700 }, { "epoch": 1.156999100057269, "grad_norm": 0.07930630445480347, "learning_rate": 4.503409065933292e-06, "loss": 0.0031, "step": 70710 }, { "epoch": 1.157162726008345, "grad_norm": 0.008853563107550144, "learning_rate": 4.501988240366296e-06, "loss": 0.0009, "step": 70720 }, { "epoch": 1.1573263519594208, "grad_norm": 0.20645996928215027, "learning_rate": 4.500567455415444e-06, "loss": 0.0017, "step": 70730 }, { "epoch": 1.1574899779104966, "grad_norm": 0.021436849609017372, "learning_rate": 4.499146711196611e-06, "loss": 0.0022, "step": 70740 }, { "epoch": 1.1576536038615726, "grad_norm": 0.06520280987024307, "learning_rate": 4.497726007825669e-06, "loss": 0.0013, "step": 70750 }, { "epoch": 1.1578172298126483, "grad_norm": 0.04408663138747215, "learning_rate": 4.496305345418485e-06, "loss": 0.0035, "step": 70760 }, { "epoch": 1.1579808557637241, "grad_norm": 0.05710349977016449, "learning_rate": 4.494884724090922e-06, "loss": 0.0013, "step": 70770 }, { "epoch": 1.1581444817148, "grad_norm": 0.13150498270988464, "learning_rate": 4.493464143958843e-06, "loss": 0.0017, "step": 70780 }, { "epoch": 1.1583081076658759, "grad_norm": 0.07247708737850189, "learning_rate": 4.492043605138108e-06, "loss": 0.0021, "step": 70790 }, { "epoch": 1.1584717336169517, "grad_norm": 0.11595527082681656, "learning_rate": 4.490623107744566e-06, "loss": 0.0016, "step": 70800 }, { "epoch": 1.1586353595680274, "grad_norm": 0.07763620465993881, "learning_rate": 4.489202651894069e-06, "loss": 0.0016, "step": 70810 }, { "epoch": 1.1587989855191032, "grad_norm": 0.06980343163013458, "learning_rate": 4.487782237702467e-06, "loss": 0.0008, "step": 70820 }, { "epoch": 1.1589626114701792, "grad_norm": 0.07394812256097794, "learning_rate": 4.4863618652856026e-06, "loss": 0.0009, "step": 70830 }, { "epoch": 1.159126237421255, "grad_norm": 0.06336156278848648, "learning_rate": 4.484941534759317e-06, "loss": 0.0009, "step": 70840 }, { "epoch": 1.1592898633723308, "grad_norm": 0.06244694069027901, "learning_rate": 4.4835212462394475e-06, "loss": 0.0015, "step": 70850 }, { "epoch": 1.1594534893234067, "grad_norm": 0.045786213129758835, "learning_rate": 4.482100999841828e-06, "loss": 0.0008, "step": 70860 }, { "epoch": 1.1596171152744825, "grad_norm": 0.07851941883563995, "learning_rate": 4.4806807956822885e-06, "loss": 0.0018, "step": 70870 }, { "epoch": 1.1597807412255583, "grad_norm": 0.09095162153244019, "learning_rate": 4.4792606338766565e-06, "loss": 0.0014, "step": 70880 }, { "epoch": 1.1599443671766343, "grad_norm": 0.005531649570912123, "learning_rate": 4.477840514540756e-06, "loss": 0.0014, "step": 70890 }, { "epoch": 1.16010799312771, "grad_norm": 0.03555836156010628, "learning_rate": 4.476420437790407e-06, "loss": 0.0019, "step": 70900 }, { "epoch": 1.1602716190787858, "grad_norm": 0.014247196726500988, "learning_rate": 4.475000403741424e-06, "loss": 0.0009, "step": 70910 }, { "epoch": 1.1604352450298618, "grad_norm": 0.03097921423614025, "learning_rate": 4.473580412509623e-06, "loss": 0.0014, "step": 70920 }, { "epoch": 1.1605988709809376, "grad_norm": 0.019966501742601395, "learning_rate": 4.472160464210814e-06, "loss": 0.0025, "step": 70930 }, { "epoch": 1.1607624969320134, "grad_norm": 0.0065721082501113415, "learning_rate": 4.470740558960799e-06, "loss": 0.0019, "step": 70940 }, { "epoch": 1.1609261228830892, "grad_norm": 0.03560645878314972, "learning_rate": 4.469320696875385e-06, "loss": 0.0013, "step": 70950 }, { "epoch": 1.1610897488341652, "grad_norm": 0.027652781456708908, "learning_rate": 4.467900878070369e-06, "loss": 0.0012, "step": 70960 }, { "epoch": 1.161253374785241, "grad_norm": 0.09856000542640686, "learning_rate": 4.466481102661546e-06, "loss": 0.0015, "step": 70970 }, { "epoch": 1.1614170007363167, "grad_norm": 0.03611590340733528, "learning_rate": 4.465061370764711e-06, "loss": 0.0012, "step": 70980 }, { "epoch": 1.1615806266873927, "grad_norm": 0.009698276408016682, "learning_rate": 4.463641682495648e-06, "loss": 0.0009, "step": 70990 }, { "epoch": 1.1617442526384685, "grad_norm": 0.07362033426761627, "learning_rate": 4.462222037970147e-06, "loss": 0.0012, "step": 71000 }, { "epoch": 1.1619078785895443, "grad_norm": 0.02714143507182598, "learning_rate": 4.460802437303986e-06, "loss": 0.0011, "step": 71010 }, { "epoch": 1.16207150454062, "grad_norm": 0.020419219508767128, "learning_rate": 4.459382880612943e-06, "loss": 0.0013, "step": 71020 }, { "epoch": 1.162235130491696, "grad_norm": 0.04754084348678589, "learning_rate": 4.457963368012794e-06, "loss": 0.0012, "step": 71030 }, { "epoch": 1.1623987564427718, "grad_norm": 0.06855122745037079, "learning_rate": 4.456543899619308e-06, "loss": 0.0009, "step": 71040 }, { "epoch": 1.1625623823938476, "grad_norm": 0.037121742963790894, "learning_rate": 4.455124475548253e-06, "loss": 0.0011, "step": 71050 }, { "epoch": 1.1627260083449236, "grad_norm": 0.0170463677495718, "learning_rate": 4.453705095915391e-06, "loss": 0.0007, "step": 71060 }, { "epoch": 1.1628896342959993, "grad_norm": 0.10404789447784424, "learning_rate": 4.452285760836484e-06, "loss": 0.0013, "step": 71070 }, { "epoch": 1.1630532602470751, "grad_norm": 0.027800675481557846, "learning_rate": 4.4508664704272855e-06, "loss": 0.0015, "step": 71080 }, { "epoch": 1.1632168861981511, "grad_norm": 0.05662749707698822, "learning_rate": 4.44944722480355e-06, "loss": 0.0015, "step": 71090 }, { "epoch": 1.1633805121492269, "grad_norm": 0.07210488617420197, "learning_rate": 4.448028024081026e-06, "loss": 0.0015, "step": 71100 }, { "epoch": 1.1635441381003027, "grad_norm": 0.04680858179926872, "learning_rate": 4.446608868375458e-06, "loss": 0.0017, "step": 71110 }, { "epoch": 1.1637077640513787, "grad_norm": 0.006319742649793625, "learning_rate": 4.4451897578025885e-06, "loss": 0.0027, "step": 71120 }, { "epoch": 1.1638713900024544, "grad_norm": 0.03585018590092659, "learning_rate": 4.443770692478154e-06, "loss": 0.0022, "step": 71130 }, { "epoch": 1.1640350159535302, "grad_norm": 0.06705167889595032, "learning_rate": 4.4423516725178895e-06, "loss": 0.0011, "step": 71140 }, { "epoch": 1.164198641904606, "grad_norm": 0.043585583567619324, "learning_rate": 4.440932698037525e-06, "loss": 0.0012, "step": 71150 }, { "epoch": 1.164362267855682, "grad_norm": 0.042774494737386703, "learning_rate": 4.439513769152788e-06, "loss": 0.0025, "step": 71160 }, { "epoch": 1.1645258938067578, "grad_norm": 0.08851239085197449, "learning_rate": 4.438094885979401e-06, "loss": 0.0009, "step": 71170 }, { "epoch": 1.1646895197578335, "grad_norm": 0.018701424822211266, "learning_rate": 4.436676048633083e-06, "loss": 0.001, "step": 71180 }, { "epoch": 1.1648531457089095, "grad_norm": 0.04496646672487259, "learning_rate": 4.43525725722955e-06, "loss": 0.0016, "step": 71190 }, { "epoch": 1.1650167716599853, "grad_norm": 0.044203028082847595, "learning_rate": 4.433838511884514e-06, "loss": 0.0018, "step": 71200 }, { "epoch": 1.165180397611061, "grad_norm": 0.2142196148633957, "learning_rate": 4.432419812713683e-06, "loss": 0.0014, "step": 71210 }, { "epoch": 1.1653440235621368, "grad_norm": 0.02357359044253826, "learning_rate": 4.4310011598327605e-06, "loss": 0.0007, "step": 71220 }, { "epoch": 1.1655076495132128, "grad_norm": 0.06358008831739426, "learning_rate": 4.4295825533574475e-06, "loss": 0.0025, "step": 71230 }, { "epoch": 1.1656712754642886, "grad_norm": 0.009030045010149479, "learning_rate": 4.428163993403441e-06, "loss": 0.0007, "step": 71240 }, { "epoch": 1.1658349014153644, "grad_norm": 0.12800511717796326, "learning_rate": 4.426745480086435e-06, "loss": 0.0011, "step": 71250 }, { "epoch": 1.1659985273664404, "grad_norm": 0.018625780940055847, "learning_rate": 4.425327013522116e-06, "loss": 0.0016, "step": 71260 }, { "epoch": 1.1661621533175162, "grad_norm": 0.04585704952478409, "learning_rate": 4.423908593826169e-06, "loss": 0.002, "step": 71270 }, { "epoch": 1.166325779268592, "grad_norm": 0.07561267167329788, "learning_rate": 4.422490221114279e-06, "loss": 0.0005, "step": 71280 }, { "epoch": 1.166489405219668, "grad_norm": 0.04992244020104408, "learning_rate": 4.421071895502121e-06, "loss": 0.001, "step": 71290 }, { "epoch": 1.1666530311707437, "grad_norm": 0.0051212003454566, "learning_rate": 4.419653617105369e-06, "loss": 0.0006, "step": 71300 }, { "epoch": 1.1668166571218195, "grad_norm": 0.06859225779771805, "learning_rate": 4.418235386039695e-06, "loss": 0.0017, "step": 71310 }, { "epoch": 1.1669802830728955, "grad_norm": 0.07315724343061447, "learning_rate": 4.416817202420762e-06, "loss": 0.0009, "step": 71320 }, { "epoch": 1.1671439090239712, "grad_norm": 0.027722079306840897, "learning_rate": 4.415399066364235e-06, "loss": 0.0014, "step": 71330 }, { "epoch": 1.167307534975047, "grad_norm": 0.08444797247648239, "learning_rate": 4.4139809779857705e-06, "loss": 0.0012, "step": 71340 }, { "epoch": 1.1674711609261228, "grad_norm": 0.08554187417030334, "learning_rate": 4.412562937401024e-06, "loss": 0.0008, "step": 71350 }, { "epoch": 1.1676347868771988, "grad_norm": 0.04930735379457474, "learning_rate": 4.411144944725645e-06, "loss": 0.002, "step": 71360 }, { "epoch": 1.1677984128282746, "grad_norm": 0.04503967985510826, "learning_rate": 4.409727000075281e-06, "loss": 0.0013, "step": 71370 }, { "epoch": 1.1679620387793503, "grad_norm": 0.05321275442838669, "learning_rate": 4.408309103565575e-06, "loss": 0.0011, "step": 71380 }, { "epoch": 1.1681256647304263, "grad_norm": 0.007587939966470003, "learning_rate": 4.406891255312166e-06, "loss": 0.0005, "step": 71390 }, { "epoch": 1.1682892906815021, "grad_norm": 0.013766760006546974, "learning_rate": 4.405473455430687e-06, "loss": 0.0009, "step": 71400 }, { "epoch": 1.1684529166325779, "grad_norm": 0.07259071618318558, "learning_rate": 4.404055704036771e-06, "loss": 0.0014, "step": 71410 }, { "epoch": 1.1686165425836537, "grad_norm": 0.061089012771844864, "learning_rate": 4.402638001246044e-06, "loss": 0.0022, "step": 71420 }, { "epoch": 1.1687801685347297, "grad_norm": 0.05355614051222801, "learning_rate": 4.401220347174129e-06, "loss": 0.0012, "step": 71430 }, { "epoch": 1.1689437944858054, "grad_norm": 0.07587563991546631, "learning_rate": 4.399802741936646e-06, "loss": 0.0022, "step": 71440 }, { "epoch": 1.1691074204368812, "grad_norm": 0.010608148761093616, "learning_rate": 4.3983851856492084e-06, "loss": 0.0007, "step": 71450 }, { "epoch": 1.1692710463879572, "grad_norm": 0.07015317678451538, "learning_rate": 4.396967678427428e-06, "loss": 0.0018, "step": 71460 }, { "epoch": 1.169434672339033, "grad_norm": 0.050035253167152405, "learning_rate": 4.395550220386913e-06, "loss": 0.0022, "step": 71470 }, { "epoch": 1.1695982982901088, "grad_norm": 0.04335442930459976, "learning_rate": 4.394132811643266e-06, "loss": 0.0008, "step": 71480 }, { "epoch": 1.1697619242411847, "grad_norm": 0.05794442817568779, "learning_rate": 4.392715452312084e-06, "loss": 0.0015, "step": 71490 }, { "epoch": 1.1699255501922605, "grad_norm": 0.05498574301600456, "learning_rate": 4.391298142508964e-06, "loss": 0.0014, "step": 71500 }, { "epoch": 1.1700891761433363, "grad_norm": 0.0739671140909195, "learning_rate": 4.389880882349497e-06, "loss": 0.0015, "step": 71510 }, { "epoch": 1.1702528020944123, "grad_norm": 0.12752996385097504, "learning_rate": 4.3884636719492694e-06, "loss": 0.0045, "step": 71520 }, { "epoch": 1.170416428045488, "grad_norm": 0.11580020189285278, "learning_rate": 4.387046511423863e-06, "loss": 0.0012, "step": 71530 }, { "epoch": 1.1705800539965638, "grad_norm": 0.09024364501237869, "learning_rate": 4.385629400888859e-06, "loss": 0.0022, "step": 71540 }, { "epoch": 1.1707436799476396, "grad_norm": 0.09970027208328247, "learning_rate": 4.384212340459831e-06, "loss": 0.0012, "step": 71550 }, { "epoch": 1.1709073058987156, "grad_norm": 0.07124566286802292, "learning_rate": 4.3827953302523485e-06, "loss": 0.0013, "step": 71560 }, { "epoch": 1.1710709318497914, "grad_norm": 0.05069902911782265, "learning_rate": 4.38137837038198e-06, "loss": 0.0013, "step": 71570 }, { "epoch": 1.1712345578008672, "grad_norm": 0.037889789789915085, "learning_rate": 4.379961460964287e-06, "loss": 0.0018, "step": 71580 }, { "epoch": 1.171398183751943, "grad_norm": 0.023270778357982635, "learning_rate": 4.378544602114826e-06, "loss": 0.0023, "step": 71590 }, { "epoch": 1.171561809703019, "grad_norm": 0.05484585836529732, "learning_rate": 4.377127793949154e-06, "loss": 0.0009, "step": 71600 }, { "epoch": 1.1717254356540947, "grad_norm": 0.05102138593792915, "learning_rate": 4.375711036582819e-06, "loss": 0.0022, "step": 71610 }, { "epoch": 1.1718890616051705, "grad_norm": 0.04460024833679199, "learning_rate": 4.374294330131369e-06, "loss": 0.0012, "step": 71620 }, { "epoch": 1.1720526875562465, "grad_norm": 0.15503236651420593, "learning_rate": 4.372877674710344e-06, "loss": 0.0017, "step": 71630 }, { "epoch": 1.1722163135073222, "grad_norm": 0.10285632312297821, "learning_rate": 4.371461070435283e-06, "loss": 0.0037, "step": 71640 }, { "epoch": 1.172379939458398, "grad_norm": 0.04496202617883682, "learning_rate": 4.3700445174217175e-06, "loss": 0.0014, "step": 71650 }, { "epoch": 1.172543565409474, "grad_norm": 0.015057187527418137, "learning_rate": 4.368628015785178e-06, "loss": 0.0009, "step": 71660 }, { "epoch": 1.1727071913605498, "grad_norm": 0.08583702892065048, "learning_rate": 4.367211565641189e-06, "loss": 0.0012, "step": 71670 }, { "epoch": 1.1728708173116256, "grad_norm": 0.15659911930561066, "learning_rate": 4.365795167105273e-06, "loss": 0.001, "step": 71680 }, { "epoch": 1.1730344432627016, "grad_norm": 0.011225015856325626, "learning_rate": 4.3643788202929446e-06, "loss": 0.0013, "step": 71690 }, { "epoch": 1.1731980692137773, "grad_norm": 0.03144923597574234, "learning_rate": 4.3629625253197176e-06, "loss": 0.0017, "step": 71700 }, { "epoch": 1.1733616951648531, "grad_norm": 0.14824244379997253, "learning_rate": 4.361546282301099e-06, "loss": 0.0019, "step": 71710 }, { "epoch": 1.173525321115929, "grad_norm": 0.02394001930952072, "learning_rate": 4.360130091352594e-06, "loss": 0.0013, "step": 71720 }, { "epoch": 1.1736889470670049, "grad_norm": 0.03789530694484711, "learning_rate": 4.358713952589702e-06, "loss": 0.0008, "step": 71730 }, { "epoch": 1.1738525730180807, "grad_norm": 0.04036388546228409, "learning_rate": 4.357297866127917e-06, "loss": 0.0017, "step": 71740 }, { "epoch": 1.1740161989691564, "grad_norm": 0.0778418779373169, "learning_rate": 4.3558818320827325e-06, "loss": 0.0012, "step": 71750 }, { "epoch": 1.1741798249202324, "grad_norm": 0.056500453501939774, "learning_rate": 4.354465850569634e-06, "loss": 0.0013, "step": 71760 }, { "epoch": 1.1743434508713082, "grad_norm": 0.054508715867996216, "learning_rate": 4.3530499217041036e-06, "loss": 0.002, "step": 71770 }, { "epoch": 1.174507076822384, "grad_norm": 0.10001111775636673, "learning_rate": 4.351634045601621e-06, "loss": 0.0011, "step": 71780 }, { "epoch": 1.1746707027734598, "grad_norm": 0.040598511695861816, "learning_rate": 4.35021822237766e-06, "loss": 0.0019, "step": 71790 }, { "epoch": 1.1748343287245357, "grad_norm": 0.06470203399658203, "learning_rate": 4.348802452147689e-06, "loss": 0.0017, "step": 71800 }, { "epoch": 1.1749979546756115, "grad_norm": 0.07201012223958969, "learning_rate": 4.347386735027176e-06, "loss": 0.0023, "step": 71810 }, { "epoch": 1.1751615806266873, "grad_norm": 0.03699260577559471, "learning_rate": 4.34597107113158e-06, "loss": 0.001, "step": 71820 }, { "epoch": 1.1753252065777633, "grad_norm": 0.002159158466383815, "learning_rate": 4.344555460576358e-06, "loss": 0.0029, "step": 71830 }, { "epoch": 1.175488832528839, "grad_norm": 0.05194035544991493, "learning_rate": 4.343139903476963e-06, "loss": 0.0018, "step": 71840 }, { "epoch": 1.1756524584799148, "grad_norm": 0.08302514255046844, "learning_rate": 4.341724399948842e-06, "loss": 0.0014, "step": 71850 }, { "epoch": 1.1758160844309908, "grad_norm": 0.005879908334463835, "learning_rate": 4.34030895010744e-06, "loss": 0.0014, "step": 71860 }, { "epoch": 1.1759797103820666, "grad_norm": 0.03511945903301239, "learning_rate": 4.338893554068195e-06, "loss": 0.0021, "step": 71870 }, { "epoch": 1.1761433363331424, "grad_norm": 0.12251469492912292, "learning_rate": 4.337478211946543e-06, "loss": 0.0024, "step": 71880 }, { "epoch": 1.1763069622842184, "grad_norm": 0.1464211642742157, "learning_rate": 4.336062923857914e-06, "loss": 0.0024, "step": 71890 }, { "epoch": 1.1764705882352942, "grad_norm": 0.05445133149623871, "learning_rate": 4.334647689917734e-06, "loss": 0.0023, "step": 71900 }, { "epoch": 1.17663421418637, "grad_norm": 0.06458507478237152, "learning_rate": 4.333232510241424e-06, "loss": 0.0014, "step": 71910 }, { "epoch": 1.1767978401374457, "grad_norm": 0.016902439296245575, "learning_rate": 4.331817384944402e-06, "loss": 0.0019, "step": 71920 }, { "epoch": 1.1769614660885217, "grad_norm": 0.02908494882285595, "learning_rate": 4.330402314142081e-06, "loss": 0.0014, "step": 71930 }, { "epoch": 1.1771250920395975, "grad_norm": 0.002099070930853486, "learning_rate": 4.328987297949869e-06, "loss": 0.0018, "step": 71940 }, { "epoch": 1.1772887179906733, "grad_norm": 0.01586686260998249, "learning_rate": 4.32757233648317e-06, "loss": 0.0008, "step": 71950 }, { "epoch": 1.1774523439417492, "grad_norm": 0.06389237195253372, "learning_rate": 4.3261574298573835e-06, "loss": 0.0016, "step": 71960 }, { "epoch": 1.177615969892825, "grad_norm": 0.04454488307237625, "learning_rate": 4.3247425781879045e-06, "loss": 0.0019, "step": 71970 }, { "epoch": 1.1777795958439008, "grad_norm": 0.08135736733675003, "learning_rate": 4.323327781590123e-06, "loss": 0.0009, "step": 71980 }, { "epoch": 1.1779432217949766, "grad_norm": 0.05804780498147011, "learning_rate": 4.321913040179426e-06, "loss": 0.0015, "step": 71990 }, { "epoch": 1.1781068477460526, "grad_norm": 0.04606780409812927, "learning_rate": 4.320498354071193e-06, "loss": 0.0016, "step": 72000 }, { "epoch": 1.1782704736971283, "grad_norm": 0.03280048072338104, "learning_rate": 4.319083723380804e-06, "loss": 0.0019, "step": 72010 }, { "epoch": 1.1784340996482041, "grad_norm": 0.03805740550160408, "learning_rate": 4.317669148223629e-06, "loss": 0.0015, "step": 72020 }, { "epoch": 1.17859772559928, "grad_norm": 0.046157293021678925, "learning_rate": 4.316254628715038e-06, "loss": 0.0011, "step": 72030 }, { "epoch": 1.1787613515503559, "grad_norm": 0.0754895955324173, "learning_rate": 4.314840164970392e-06, "loss": 0.0015, "step": 72040 }, { "epoch": 1.1789249775014317, "grad_norm": 0.0036848357412964106, "learning_rate": 4.313425757105051e-06, "loss": 0.0013, "step": 72050 }, { "epoch": 1.1790886034525077, "grad_norm": 0.04650893062353134, "learning_rate": 4.31201140523437e-06, "loss": 0.0019, "step": 72060 }, { "epoch": 1.1792522294035834, "grad_norm": 0.04566042125225067, "learning_rate": 4.3105971094736975e-06, "loss": 0.0015, "step": 72070 }, { "epoch": 1.1794158553546592, "grad_norm": 0.1227894052863121, "learning_rate": 4.309182869938379e-06, "loss": 0.0029, "step": 72080 }, { "epoch": 1.1795794813057352, "grad_norm": 0.04693392664194107, "learning_rate": 4.307768686743756e-06, "loss": 0.0014, "step": 72090 }, { "epoch": 1.179743107256811, "grad_norm": 0.11026965826749802, "learning_rate": 4.306354560005163e-06, "loss": 0.0018, "step": 72100 }, { "epoch": 1.1799067332078867, "grad_norm": 0.046785108745098114, "learning_rate": 4.304940489837932e-06, "loss": 0.0012, "step": 72110 }, { "epoch": 1.1800703591589625, "grad_norm": 0.07196402549743652, "learning_rate": 4.303526476357391e-06, "loss": 0.0022, "step": 72120 }, { "epoch": 1.1802339851100385, "grad_norm": 0.023471105843782425, "learning_rate": 4.30211251967886e-06, "loss": 0.0012, "step": 72130 }, { "epoch": 1.1803976110611143, "grad_norm": 0.0903412401676178, "learning_rate": 4.300698619917658e-06, "loss": 0.0008, "step": 72140 }, { "epoch": 1.18056123701219, "grad_norm": 0.01290425006300211, "learning_rate": 4.2992847771890965e-06, "loss": 0.001, "step": 72150 }, { "epoch": 1.180724862963266, "grad_norm": 0.06439687311649323, "learning_rate": 4.297870991608485e-06, "loss": 0.0027, "step": 72160 }, { "epoch": 1.1808884889143418, "grad_norm": 0.030832646414637566, "learning_rate": 4.296457263291127e-06, "loss": 0.0012, "step": 72170 }, { "epoch": 1.1810521148654176, "grad_norm": 0.018244683742523193, "learning_rate": 4.295043592352321e-06, "loss": 0.0011, "step": 72180 }, { "epoch": 1.1812157408164934, "grad_norm": 0.0346740260720253, "learning_rate": 4.29362997890736e-06, "loss": 0.001, "step": 72190 }, { "epoch": 1.1813793667675694, "grad_norm": 0.032389428466558456, "learning_rate": 4.2922164230715355e-06, "loss": 0.0017, "step": 72200 }, { "epoch": 1.1815429927186452, "grad_norm": 0.016945354640483856, "learning_rate": 4.290802924960132e-06, "loss": 0.0014, "step": 72210 }, { "epoch": 1.181706618669721, "grad_norm": 0.030116165056824684, "learning_rate": 4.289389484688429e-06, "loss": 0.0016, "step": 72220 }, { "epoch": 1.181870244620797, "grad_norm": 0.028042813763022423, "learning_rate": 4.287976102371701e-06, "loss": 0.0013, "step": 72230 }, { "epoch": 1.1820338705718727, "grad_norm": 0.028691904619336128, "learning_rate": 4.28656277812522e-06, "loss": 0.0022, "step": 72240 }, { "epoch": 1.1821974965229485, "grad_norm": 0.0035471832379698753, "learning_rate": 4.285149512064252e-06, "loss": 0.0008, "step": 72250 }, { "epoch": 1.1823611224740245, "grad_norm": 0.011386027559638023, "learning_rate": 4.283736304304057e-06, "loss": 0.0017, "step": 72260 }, { "epoch": 1.1825247484251002, "grad_norm": 0.12985564768314362, "learning_rate": 4.282323154959892e-06, "loss": 0.0019, "step": 72270 }, { "epoch": 1.182688374376176, "grad_norm": 0.06605116277933121, "learning_rate": 4.280910064147009e-06, "loss": 0.0012, "step": 72280 }, { "epoch": 1.182852000327252, "grad_norm": 0.031029747799038887, "learning_rate": 4.279497031980654e-06, "loss": 0.0007, "step": 72290 }, { "epoch": 1.1830156262783278, "grad_norm": 0.04837345704436302, "learning_rate": 4.278084058576071e-06, "loss": 0.0015, "step": 72300 }, { "epoch": 1.1831792522294036, "grad_norm": 0.04557085409760475, "learning_rate": 4.276671144048495e-06, "loss": 0.0008, "step": 72310 }, { "epoch": 1.1833428781804793, "grad_norm": 0.05936750769615173, "learning_rate": 4.27525828851316e-06, "loss": 0.0012, "step": 72320 }, { "epoch": 1.1835065041315553, "grad_norm": 0.08688812702894211, "learning_rate": 4.273845492085293e-06, "loss": 0.0019, "step": 72330 }, { "epoch": 1.1836701300826311, "grad_norm": 0.024858104065060616, "learning_rate": 4.272432754880117e-06, "loss": 0.0012, "step": 72340 }, { "epoch": 1.1838337560337069, "grad_norm": 0.08039912581443787, "learning_rate": 4.27102007701285e-06, "loss": 0.0013, "step": 72350 }, { "epoch": 1.1839973819847829, "grad_norm": 0.04867817834019661, "learning_rate": 4.269607458598705e-06, "loss": 0.0012, "step": 72360 }, { "epoch": 1.1841610079358587, "grad_norm": 0.020656369626522064, "learning_rate": 4.268194899752891e-06, "loss": 0.0015, "step": 72370 }, { "epoch": 1.1843246338869344, "grad_norm": 0.05521941930055618, "learning_rate": 4.2667824005906116e-06, "loss": 0.0007, "step": 72380 }, { "epoch": 1.1844882598380102, "grad_norm": 0.05653200298547745, "learning_rate": 4.265369961227065e-06, "loss": 0.0013, "step": 72390 }, { "epoch": 1.1846518857890862, "grad_norm": 0.0017546155722811818, "learning_rate": 4.2639575817774445e-06, "loss": 0.0012, "step": 72400 }, { "epoch": 1.184815511740162, "grad_norm": 0.10608597844839096, "learning_rate": 4.262545262356939e-06, "loss": 0.0018, "step": 72410 }, { "epoch": 1.1849791376912377, "grad_norm": 0.07191050797700882, "learning_rate": 4.261133003080733e-06, "loss": 0.0019, "step": 72420 }, { "epoch": 1.1851427636423137, "grad_norm": 0.030409125611186028, "learning_rate": 4.259720804064007e-06, "loss": 0.0005, "step": 72430 }, { "epoch": 1.1853063895933895, "grad_norm": 0.04548092558979988, "learning_rate": 4.258308665421932e-06, "loss": 0.0012, "step": 72440 }, { "epoch": 1.1854700155444653, "grad_norm": 0.037514809519052505, "learning_rate": 4.256896587269679e-06, "loss": 0.0024, "step": 72450 }, { "epoch": 1.1856336414955413, "grad_norm": 0.04791383445262909, "learning_rate": 4.255484569722412e-06, "loss": 0.0012, "step": 72460 }, { "epoch": 1.185797267446617, "grad_norm": 0.03941550850868225, "learning_rate": 4.254072612895291e-06, "loss": 0.001, "step": 72470 }, { "epoch": 1.1859608933976928, "grad_norm": 0.11777466535568237, "learning_rate": 4.252660716903469e-06, "loss": 0.0017, "step": 72480 }, { "epoch": 1.1861245193487688, "grad_norm": 0.07658804953098297, "learning_rate": 4.251248881862096e-06, "loss": 0.0011, "step": 72490 }, { "epoch": 1.1862881452998446, "grad_norm": 0.0373791866004467, "learning_rate": 4.249837107886318e-06, "loss": 0.0012, "step": 72500 }, { "epoch": 1.1864517712509204, "grad_norm": 0.059319544583559036, "learning_rate": 4.248425395091273e-06, "loss": 0.001, "step": 72510 }, { "epoch": 1.1866153972019962, "grad_norm": 0.01449304074048996, "learning_rate": 4.247013743592095e-06, "loss": 0.0009, "step": 72520 }, { "epoch": 1.1867790231530722, "grad_norm": 0.08761975914239883, "learning_rate": 4.245602153503915e-06, "loss": 0.0011, "step": 72530 }, { "epoch": 1.186942649104148, "grad_norm": 0.038129281252622604, "learning_rate": 4.244190624941857e-06, "loss": 0.0009, "step": 72540 }, { "epoch": 1.1871062750552237, "grad_norm": 0.03196340054273605, "learning_rate": 4.24277915802104e-06, "loss": 0.0017, "step": 72550 }, { "epoch": 1.1872699010062995, "grad_norm": 0.043143380433321, "learning_rate": 4.241367752856578e-06, "loss": 0.0014, "step": 72560 }, { "epoch": 1.1874335269573755, "grad_norm": 0.05428246408700943, "learning_rate": 4.23995640956358e-06, "loss": 0.001, "step": 72570 }, { "epoch": 1.1875971529084512, "grad_norm": 0.014837591908872128, "learning_rate": 4.238545128257154e-06, "loss": 0.002, "step": 72580 }, { "epoch": 1.187760778859527, "grad_norm": 0.024181470274925232, "learning_rate": 4.237133909052394e-06, "loss": 0.002, "step": 72590 }, { "epoch": 1.187924404810603, "grad_norm": 0.04305800795555115, "learning_rate": 4.235722752064398e-06, "loss": 0.0009, "step": 72600 }, { "epoch": 1.1880880307616788, "grad_norm": 0.14382657408714294, "learning_rate": 4.2343116574082535e-06, "loss": 0.0018, "step": 72610 }, { "epoch": 1.1882516567127546, "grad_norm": 0.05452917516231537, "learning_rate": 4.232900625199046e-06, "loss": 0.0011, "step": 72620 }, { "epoch": 1.1884152826638306, "grad_norm": 0.002851971657946706, "learning_rate": 4.2314896555518525e-06, "loss": 0.0009, "step": 72630 }, { "epoch": 1.1885789086149063, "grad_norm": 0.028746698051691055, "learning_rate": 4.230078748581749e-06, "loss": 0.0013, "step": 72640 }, { "epoch": 1.1887425345659821, "grad_norm": 0.08779220283031464, "learning_rate": 4.228667904403803e-06, "loss": 0.0019, "step": 72650 }, { "epoch": 1.188906160517058, "grad_norm": 0.002911148825660348, "learning_rate": 4.227257123133078e-06, "loss": 0.0014, "step": 72660 }, { "epoch": 1.1890697864681339, "grad_norm": 0.037185221910476685, "learning_rate": 4.225846404884633e-06, "loss": 0.0012, "step": 72670 }, { "epoch": 1.1892334124192097, "grad_norm": 0.02794252336025238, "learning_rate": 4.224435749773522e-06, "loss": 0.0015, "step": 72680 }, { "epoch": 1.1893970383702854, "grad_norm": 0.09844185411930084, "learning_rate": 4.223025157914792e-06, "loss": 0.0016, "step": 72690 }, { "epoch": 1.1895606643213614, "grad_norm": 0.16257110238075256, "learning_rate": 4.221614629423487e-06, "loss": 0.0013, "step": 72700 }, { "epoch": 1.1897242902724372, "grad_norm": 0.04554278403520584, "learning_rate": 4.220204164414644e-06, "loss": 0.0014, "step": 72710 }, { "epoch": 1.189887916223513, "grad_norm": 0.17031581699848175, "learning_rate": 4.218793763003296e-06, "loss": 0.0018, "step": 72720 }, { "epoch": 1.190051542174589, "grad_norm": 0.03764116391539574, "learning_rate": 4.217383425304472e-06, "loss": 0.0015, "step": 72730 }, { "epoch": 1.1902151681256647, "grad_norm": 0.08119495958089828, "learning_rate": 4.215973151433193e-06, "loss": 0.001, "step": 72740 }, { "epoch": 1.1903787940767405, "grad_norm": 0.040655434131622314, "learning_rate": 4.2145629415044754e-06, "loss": 0.0027, "step": 72750 }, { "epoch": 1.1905424200278163, "grad_norm": 0.021082930266857147, "learning_rate": 4.213152795633332e-06, "loss": 0.0011, "step": 72760 }, { "epoch": 1.1907060459788923, "grad_norm": 0.09340421855449677, "learning_rate": 4.2117427139347696e-06, "loss": 0.0013, "step": 72770 }, { "epoch": 1.190869671929968, "grad_norm": 0.0315910279750824, "learning_rate": 4.21033269652379e-06, "loss": 0.0016, "step": 72780 }, { "epoch": 1.1910332978810438, "grad_norm": 0.06637918949127197, "learning_rate": 4.208922743515389e-06, "loss": 0.0009, "step": 72790 }, { "epoch": 1.1911969238321198, "grad_norm": 0.024217359721660614, "learning_rate": 4.207512855024557e-06, "loss": 0.0028, "step": 72800 }, { "epoch": 1.1913605497831956, "grad_norm": 0.24659863114356995, "learning_rate": 4.206103031166281e-06, "loss": 0.0012, "step": 72810 }, { "epoch": 1.1915241757342714, "grad_norm": 0.028071578592061996, "learning_rate": 4.20469327205554e-06, "loss": 0.0008, "step": 72820 }, { "epoch": 1.1916878016853474, "grad_norm": 0.016061635687947273, "learning_rate": 4.20328357780731e-06, "loss": 0.0014, "step": 72830 }, { "epoch": 1.1918514276364232, "grad_norm": 0.022765960544347763, "learning_rate": 4.201873948536561e-06, "loss": 0.001, "step": 72840 }, { "epoch": 1.192015053587499, "grad_norm": 0.05193815752863884, "learning_rate": 4.200464384358257e-06, "loss": 0.0009, "step": 72850 }, { "epoch": 1.192178679538575, "grad_norm": 0.04159295931458473, "learning_rate": 4.199054885387359e-06, "loss": 0.0011, "step": 72860 }, { "epoch": 1.1923423054896507, "grad_norm": 0.07398375868797302, "learning_rate": 4.197645451738819e-06, "loss": 0.001, "step": 72870 }, { "epoch": 1.1925059314407265, "grad_norm": 0.07873750478029251, "learning_rate": 4.196236083527585e-06, "loss": 0.0006, "step": 72880 }, { "epoch": 1.1926695573918022, "grad_norm": 0.15952168405056, "learning_rate": 4.194826780868602e-06, "loss": 0.0018, "step": 72890 }, { "epoch": 1.1928331833428782, "grad_norm": 0.05892939120531082, "learning_rate": 4.193417543876806e-06, "loss": 0.0014, "step": 72900 }, { "epoch": 1.192996809293954, "grad_norm": 0.08886957168579102, "learning_rate": 4.192008372667133e-06, "loss": 0.0011, "step": 72910 }, { "epoch": 1.1931604352450298, "grad_norm": 0.0091168861836195, "learning_rate": 4.190599267354507e-06, "loss": 0.0008, "step": 72920 }, { "epoch": 1.1933240611961058, "grad_norm": 0.06435015052556992, "learning_rate": 4.189190228053851e-06, "loss": 0.0015, "step": 72930 }, { "epoch": 1.1934876871471816, "grad_norm": 0.06907084584236145, "learning_rate": 4.1877812548800816e-06, "loss": 0.001, "step": 72940 }, { "epoch": 1.1936513130982573, "grad_norm": 0.027643613517284393, "learning_rate": 4.186372347948109e-06, "loss": 0.0036, "step": 72950 }, { "epoch": 1.1938149390493331, "grad_norm": 0.09798567742109299, "learning_rate": 4.18496350737284e-06, "loss": 0.0012, "step": 72960 }, { "epoch": 1.193978565000409, "grad_norm": 0.026236360892653465, "learning_rate": 4.1835547332691744e-06, "loss": 0.0011, "step": 72970 }, { "epoch": 1.1941421909514849, "grad_norm": 0.044981054961681366, "learning_rate": 4.182146025752007e-06, "loss": 0.0009, "step": 72980 }, { "epoch": 1.1943058169025607, "grad_norm": 0.05679431930184364, "learning_rate": 4.180737384936227e-06, "loss": 0.0011, "step": 72990 }, { "epoch": 1.1944694428536367, "grad_norm": 0.059831857681274414, "learning_rate": 4.1793288109367185e-06, "loss": 0.0016, "step": 73000 }, { "epoch": 1.1946330688047124, "grad_norm": 0.08455739170312881, "learning_rate": 4.17792030386836e-06, "loss": 0.0013, "step": 73010 }, { "epoch": 1.1947966947557882, "grad_norm": 0.03891890496015549, "learning_rate": 4.176511863846024e-06, "loss": 0.0026, "step": 73020 }, { "epoch": 1.1949603207068642, "grad_norm": 0.10676612704992294, "learning_rate": 4.17510349098458e-06, "loss": 0.0014, "step": 73030 }, { "epoch": 1.19512394665794, "grad_norm": 0.046449173241853714, "learning_rate": 4.1736951853988875e-06, "loss": 0.0013, "step": 73040 }, { "epoch": 1.1952875726090157, "grad_norm": 0.0191277377307415, "learning_rate": 4.1722869472038055e-06, "loss": 0.0012, "step": 73050 }, { "epoch": 1.1954511985600917, "grad_norm": 0.0341968759894371, "learning_rate": 4.170878776514183e-06, "loss": 0.0017, "step": 73060 }, { "epoch": 1.1956148245111675, "grad_norm": 0.08055173605680466, "learning_rate": 4.169470673444867e-06, "loss": 0.001, "step": 73070 }, { "epoch": 1.1957784504622433, "grad_norm": 0.016361836344003677, "learning_rate": 4.168062638110697e-06, "loss": 0.0005, "step": 73080 }, { "epoch": 1.195942076413319, "grad_norm": 0.05318368598818779, "learning_rate": 4.1666546706265074e-06, "loss": 0.001, "step": 73090 }, { "epoch": 1.196105702364395, "grad_norm": 0.05574622005224228, "learning_rate": 4.165246771107128e-06, "loss": 0.002, "step": 73100 }, { "epoch": 1.1962693283154708, "grad_norm": 0.02988249436020851, "learning_rate": 4.163838939667382e-06, "loss": 0.0009, "step": 73110 }, { "epoch": 1.1964329542665466, "grad_norm": 0.01302997674793005, "learning_rate": 4.162431176422087e-06, "loss": 0.0014, "step": 73120 }, { "epoch": 1.1965965802176226, "grad_norm": 0.07631079852581024, "learning_rate": 4.161023481486056e-06, "loss": 0.0022, "step": 73130 }, { "epoch": 1.1967602061686984, "grad_norm": 0.04220232740044594, "learning_rate": 4.159615854974095e-06, "loss": 0.0011, "step": 73140 }, { "epoch": 1.1969238321197742, "grad_norm": 0.05432287976145744, "learning_rate": 4.158208297001006e-06, "loss": 0.0014, "step": 73150 }, { "epoch": 1.19708745807085, "grad_norm": 0.00919813010841608, "learning_rate": 4.1568008076815835e-06, "loss": 0.0018, "step": 73160 }, { "epoch": 1.197251084021926, "grad_norm": 0.09239612519741058, "learning_rate": 4.155393387130618e-06, "loss": 0.0015, "step": 73170 }, { "epoch": 1.1974147099730017, "grad_norm": 0.01925583928823471, "learning_rate": 4.1539860354628965e-06, "loss": 0.0015, "step": 73180 }, { "epoch": 1.1975783359240775, "grad_norm": 0.0316462516784668, "learning_rate": 4.1525787527931945e-06, "loss": 0.0012, "step": 73190 }, { "epoch": 1.1977419618751535, "grad_norm": 0.06983600556850433, "learning_rate": 4.151171539236286e-06, "loss": 0.0022, "step": 73200 }, { "epoch": 1.1979055878262292, "grad_norm": 0.07768736034631729, "learning_rate": 4.149764394906938e-06, "loss": 0.0016, "step": 73210 }, { "epoch": 1.198069213777305, "grad_norm": 0.05781316012144089, "learning_rate": 4.148357319919915e-06, "loss": 0.0015, "step": 73220 }, { "epoch": 1.198232839728381, "grad_norm": 0.06035643815994263, "learning_rate": 4.146950314389969e-06, "loss": 0.001, "step": 73230 }, { "epoch": 1.1983964656794568, "grad_norm": 0.0749223455786705, "learning_rate": 4.145543378431855e-06, "loss": 0.0015, "step": 73240 }, { "epoch": 1.1985600916305326, "grad_norm": 0.0970311164855957, "learning_rate": 4.144136512160315e-06, "loss": 0.0014, "step": 73250 }, { "epoch": 1.1987237175816086, "grad_norm": 0.03978846222162247, "learning_rate": 4.142729715690089e-06, "loss": 0.0009, "step": 73260 }, { "epoch": 1.1988873435326843, "grad_norm": 0.08357556164264679, "learning_rate": 4.141322989135912e-06, "loss": 0.001, "step": 73270 }, { "epoch": 1.19905096948376, "grad_norm": 0.04062269255518913, "learning_rate": 4.139916332612509e-06, "loss": 0.0009, "step": 73280 }, { "epoch": 1.1992145954348359, "grad_norm": 0.1443643867969513, "learning_rate": 4.138509746234604e-06, "loss": 0.0015, "step": 73290 }, { "epoch": 1.1993782213859119, "grad_norm": 0.049388084560632706, "learning_rate": 4.137103230116914e-06, "loss": 0.002, "step": 73300 }, { "epoch": 1.1995418473369877, "grad_norm": 0.06220998987555504, "learning_rate": 4.135696784374148e-06, "loss": 0.0012, "step": 73310 }, { "epoch": 1.1997054732880634, "grad_norm": 0.045358214527368546, "learning_rate": 4.134290409121012e-06, "loss": 0.0012, "step": 73320 }, { "epoch": 1.1998690992391392, "grad_norm": 0.11870374530553818, "learning_rate": 4.1328841044722046e-06, "loss": 0.0019, "step": 73330 }, { "epoch": 1.2000327251902152, "grad_norm": 0.019955415278673172, "learning_rate": 4.131477870542419e-06, "loss": 0.0017, "step": 73340 }, { "epoch": 1.200196351141291, "grad_norm": 0.03544195368885994, "learning_rate": 4.130071707446344e-06, "loss": 0.0011, "step": 73350 }, { "epoch": 1.2003599770923667, "grad_norm": 0.047319281846284866, "learning_rate": 4.128665615298661e-06, "loss": 0.0021, "step": 73360 }, { "epoch": 1.2005236030434427, "grad_norm": 0.043404094874858856, "learning_rate": 4.127259594214044e-06, "loss": 0.001, "step": 73370 }, { "epoch": 1.2006872289945185, "grad_norm": 0.028605856001377106, "learning_rate": 4.125853644307167e-06, "loss": 0.0022, "step": 73380 }, { "epoch": 1.2008508549455943, "grad_norm": 0.0625309944152832, "learning_rate": 4.124447765692693e-06, "loss": 0.0012, "step": 73390 }, { "epoch": 1.2010144808966703, "grad_norm": 0.18719732761383057, "learning_rate": 4.12304195848528e-06, "loss": 0.0012, "step": 73400 }, { "epoch": 1.201178106847746, "grad_norm": 0.04038171097636223, "learning_rate": 4.1216362227995796e-06, "loss": 0.0012, "step": 73410 }, { "epoch": 1.2013417327988218, "grad_norm": 0.08253055065870285, "learning_rate": 4.1202305587502425e-06, "loss": 0.006, "step": 73420 }, { "epoch": 1.2015053587498978, "grad_norm": 0.05317462980747223, "learning_rate": 4.118824966451906e-06, "loss": 0.0017, "step": 73430 }, { "epoch": 1.2016689847009736, "grad_norm": 0.08571598678827286, "learning_rate": 4.117419446019208e-06, "loss": 0.0012, "step": 73440 }, { "epoch": 1.2018326106520494, "grad_norm": 0.04198795557022095, "learning_rate": 4.116013997566778e-06, "loss": 0.0009, "step": 73450 }, { "epoch": 1.2019962366031254, "grad_norm": 0.044771261513233185, "learning_rate": 4.114608621209238e-06, "loss": 0.0009, "step": 73460 }, { "epoch": 1.2021598625542012, "grad_norm": 0.03170878067612648, "learning_rate": 4.1132033170612065e-06, "loss": 0.0014, "step": 73470 }, { "epoch": 1.202323488505277, "grad_norm": 0.051643356680870056, "learning_rate": 4.111798085237295e-06, "loss": 0.0016, "step": 73480 }, { "epoch": 1.2024871144563527, "grad_norm": 0.061087582260370255, "learning_rate": 4.11039292585211e-06, "loss": 0.003, "step": 73490 }, { "epoch": 1.2026507404074287, "grad_norm": 0.1084022969007492, "learning_rate": 4.108987839020252e-06, "loss": 0.0016, "step": 73500 }, { "epoch": 1.2028143663585045, "grad_norm": 0.026638222858309746, "learning_rate": 4.1075828248563145e-06, "loss": 0.0009, "step": 73510 }, { "epoch": 1.2029779923095802, "grad_norm": 0.027507686987519264, "learning_rate": 4.106177883474885e-06, "loss": 0.0013, "step": 73520 }, { "epoch": 1.203141618260656, "grad_norm": 0.045355118811130524, "learning_rate": 4.104773014990546e-06, "loss": 0.0011, "step": 73530 }, { "epoch": 1.203305244211732, "grad_norm": 0.06505275517702103, "learning_rate": 4.103368219517874e-06, "loss": 0.0012, "step": 73540 }, { "epoch": 1.2034688701628078, "grad_norm": 0.0592808797955513, "learning_rate": 4.101963497171439e-06, "loss": 0.0023, "step": 73550 }, { "epoch": 1.2036324961138836, "grad_norm": 0.03584162890911102, "learning_rate": 4.100558848065807e-06, "loss": 0.0008, "step": 73560 }, { "epoch": 1.2037961220649596, "grad_norm": 0.03921978920698166, "learning_rate": 4.099154272315535e-06, "loss": 0.0019, "step": 73570 }, { "epoch": 1.2039597480160353, "grad_norm": 0.015199974179267883, "learning_rate": 4.097749770035175e-06, "loss": 0.0013, "step": 73580 }, { "epoch": 1.204123373967111, "grad_norm": 0.0520598441362381, "learning_rate": 4.096345341339274e-06, "loss": 0.0016, "step": 73590 }, { "epoch": 1.204286999918187, "grad_norm": 0.07999885827302933, "learning_rate": 4.094940986342373e-06, "loss": 0.0013, "step": 73600 }, { "epoch": 1.2044506258692629, "grad_norm": 0.10286489129066467, "learning_rate": 4.093536705159005e-06, "loss": 0.0011, "step": 73610 }, { "epoch": 1.2046142518203387, "grad_norm": 0.047496747225522995, "learning_rate": 4.0921324979037e-06, "loss": 0.0014, "step": 73620 }, { "epoch": 1.2047778777714147, "grad_norm": 0.02437729761004448, "learning_rate": 4.0907283646909795e-06, "loss": 0.001, "step": 73630 }, { "epoch": 1.2049415037224904, "grad_norm": 0.004412082489579916, "learning_rate": 4.08932430563536e-06, "loss": 0.0021, "step": 73640 }, { "epoch": 1.2051051296735662, "grad_norm": 0.03516425937414169, "learning_rate": 4.087920320851351e-06, "loss": 0.0003, "step": 73650 }, { "epoch": 1.205268755624642, "grad_norm": 0.04848313704133034, "learning_rate": 4.086516410453458e-06, "loss": 0.0016, "step": 73660 }, { "epoch": 1.205432381575718, "grad_norm": 0.0047675808891654015, "learning_rate": 4.085112574556179e-06, "loss": 0.0012, "step": 73670 }, { "epoch": 1.2055960075267937, "grad_norm": 0.11030463129281998, "learning_rate": 4.083708813274005e-06, "loss": 0.0016, "step": 73680 }, { "epoch": 1.2057596334778695, "grad_norm": 0.10973118990659714, "learning_rate": 4.082305126721424e-06, "loss": 0.0025, "step": 73690 }, { "epoch": 1.2059232594289455, "grad_norm": 0.01860835775732994, "learning_rate": 4.080901515012914e-06, "loss": 0.0017, "step": 73700 }, { "epoch": 1.2060868853800213, "grad_norm": 0.0034170267172157764, "learning_rate": 4.079497978262948e-06, "loss": 0.0009, "step": 73710 }, { "epoch": 1.206250511331097, "grad_norm": 0.08002614229917526, "learning_rate": 4.078094516585997e-06, "loss": 0.002, "step": 73720 }, { "epoch": 1.2064141372821728, "grad_norm": 0.08197759091854095, "learning_rate": 4.0766911300965195e-06, "loss": 0.0016, "step": 73730 }, { "epoch": 1.2065777632332488, "grad_norm": 0.004951344802975655, "learning_rate": 4.0752878189089725e-06, "loss": 0.0011, "step": 73740 }, { "epoch": 1.2067413891843246, "grad_norm": 0.0046267276629805565, "learning_rate": 4.073884583137805e-06, "loss": 0.001, "step": 73750 }, { "epoch": 1.2069050151354004, "grad_norm": 0.05993367359042168, "learning_rate": 4.0724814228974595e-06, "loss": 0.0009, "step": 73760 }, { "epoch": 1.2070686410864764, "grad_norm": 0.06475260108709335, "learning_rate": 4.071078338302374e-06, "loss": 0.0011, "step": 73770 }, { "epoch": 1.2072322670375522, "grad_norm": 0.04632718488574028, "learning_rate": 4.0696753294669785e-06, "loss": 0.0008, "step": 73780 }, { "epoch": 1.207395892988628, "grad_norm": 0.07304283231496811, "learning_rate": 4.068272396505697e-06, "loss": 0.0014, "step": 73790 }, { "epoch": 1.207559518939704, "grad_norm": 0.10290410369634628, "learning_rate": 4.06686953953295e-06, "loss": 0.0006, "step": 73800 }, { "epoch": 1.2077231448907797, "grad_norm": 0.021572886034846306, "learning_rate": 4.065466758663148e-06, "loss": 0.0012, "step": 73810 }, { "epoch": 1.2078867708418555, "grad_norm": 0.0545496791601181, "learning_rate": 4.064064054010699e-06, "loss": 0.0013, "step": 73820 }, { "epoch": 1.2080503967929315, "grad_norm": 0.10006240010261536, "learning_rate": 4.062661425690001e-06, "loss": 0.0017, "step": 73830 }, { "epoch": 1.2082140227440072, "grad_norm": 0.05202379450201988, "learning_rate": 4.061258873815447e-06, "loss": 0.0019, "step": 73840 }, { "epoch": 1.208377648695083, "grad_norm": 0.04262712225317955, "learning_rate": 4.059856398501426e-06, "loss": 0.0012, "step": 73850 }, { "epoch": 1.2085412746461588, "grad_norm": 0.053624287247657776, "learning_rate": 4.0584539998623175e-06, "loss": 0.0008, "step": 73860 }, { "epoch": 1.2087049005972348, "grad_norm": 0.0672333836555481, "learning_rate": 4.057051678012499e-06, "loss": 0.001, "step": 73870 }, { "epoch": 1.2088685265483106, "grad_norm": 0.023106999695301056, "learning_rate": 4.055649433066336e-06, "loss": 0.001, "step": 73880 }, { "epoch": 1.2090321524993863, "grad_norm": 0.021868957206606865, "learning_rate": 4.0542472651381925e-06, "loss": 0.0017, "step": 73890 }, { "epoch": 1.2091957784504623, "grad_norm": 0.008650953881442547, "learning_rate": 4.052845174342424e-06, "loss": 0.0007, "step": 73900 }, { "epoch": 1.209359404401538, "grad_norm": 0.14851944148540497, "learning_rate": 4.051443160793382e-06, "loss": 0.0026, "step": 73910 }, { "epoch": 1.2095230303526139, "grad_norm": 0.13709430396556854, "learning_rate": 4.050041224605408e-06, "loss": 0.0014, "step": 73920 }, { "epoch": 1.2096866563036897, "grad_norm": 0.016379734501242638, "learning_rate": 4.048639365892839e-06, "loss": 0.0016, "step": 73930 }, { "epoch": 1.2098502822547657, "grad_norm": 0.02267172932624817, "learning_rate": 4.047237584770007e-06, "loss": 0.0013, "step": 73940 }, { "epoch": 1.2100139082058414, "grad_norm": 0.03803670033812523, "learning_rate": 4.045835881351235e-06, "loss": 0.0009, "step": 73950 }, { "epoch": 1.2101775341569172, "grad_norm": 0.06610969454050064, "learning_rate": 4.044434255750844e-06, "loss": 0.0015, "step": 73960 }, { "epoch": 1.2103411601079932, "grad_norm": 0.028342286124825478, "learning_rate": 4.043032708083143e-06, "loss": 0.001, "step": 73970 }, { "epoch": 1.210504786059069, "grad_norm": 0.04162374511361122, "learning_rate": 4.041631238462438e-06, "loss": 0.0023, "step": 73980 }, { "epoch": 1.2106684120101447, "grad_norm": 0.03267696127295494, "learning_rate": 4.040229847003029e-06, "loss": 0.0021, "step": 73990 }, { "epoch": 1.2108320379612207, "grad_norm": 0.026449577882885933, "learning_rate": 4.038828533819209e-06, "loss": 0.0011, "step": 74000 }, { "epoch": 1.2109956639122965, "grad_norm": 0.02258426509797573, "learning_rate": 4.0374272990252625e-06, "loss": 0.0011, "step": 74010 }, { "epoch": 1.2111592898633723, "grad_norm": 0.0716397762298584, "learning_rate": 4.03602614273547e-06, "loss": 0.0027, "step": 74020 }, { "epoch": 1.2113229158144483, "grad_norm": 0.05017632246017456, "learning_rate": 4.034625065064108e-06, "loss": 0.001, "step": 74030 }, { "epoch": 1.211486541765524, "grad_norm": 0.05698603391647339, "learning_rate": 4.033224066125439e-06, "loss": 0.0013, "step": 74040 }, { "epoch": 1.2116501677165998, "grad_norm": 0.06374849379062653, "learning_rate": 4.031823146033727e-06, "loss": 0.0012, "step": 74050 }, { "epoch": 1.2118137936676756, "grad_norm": 0.13887757062911987, "learning_rate": 4.030422304903225e-06, "loss": 0.0016, "step": 74060 }, { "epoch": 1.2119774196187516, "grad_norm": 0.18006059527397156, "learning_rate": 4.02902154284818e-06, "loss": 0.0013, "step": 74070 }, { "epoch": 1.2121410455698274, "grad_norm": 0.10183005779981613, "learning_rate": 4.027620859982836e-06, "loss": 0.0009, "step": 74080 }, { "epoch": 1.2123046715209032, "grad_norm": 0.07122647762298584, "learning_rate": 4.0262202564214255e-06, "loss": 0.0024, "step": 74090 }, { "epoch": 1.2124682974719792, "grad_norm": 0.08233967423439026, "learning_rate": 4.024819732278178e-06, "loss": 0.0014, "step": 74100 }, { "epoch": 1.212631923423055, "grad_norm": 0.03426395356655121, "learning_rate": 4.023419287667315e-06, "loss": 0.0011, "step": 74110 }, { "epoch": 1.2127955493741307, "grad_norm": 0.11198526620864868, "learning_rate": 4.022018922703052e-06, "loss": 0.0013, "step": 74120 }, { "epoch": 1.2129591753252065, "grad_norm": 0.004733164329081774, "learning_rate": 4.020618637499599e-06, "loss": 0.0013, "step": 74130 }, { "epoch": 1.2131228012762825, "grad_norm": 0.07090175151824951, "learning_rate": 4.019218432171158e-06, "loss": 0.0008, "step": 74140 }, { "epoch": 1.2132864272273582, "grad_norm": 0.11436349153518677, "learning_rate": 4.017818306831925e-06, "loss": 0.0011, "step": 74150 }, { "epoch": 1.213450053178434, "grad_norm": 0.027770675718784332, "learning_rate": 4.016418261596089e-06, "loss": 0.0011, "step": 74160 }, { "epoch": 1.21361367912951, "grad_norm": 0.007805570028722286, "learning_rate": 4.015018296577832e-06, "loss": 0.003, "step": 74170 }, { "epoch": 1.2137773050805858, "grad_norm": 0.04702167958021164, "learning_rate": 4.013618411891333e-06, "loss": 0.0007, "step": 74180 }, { "epoch": 1.2139409310316616, "grad_norm": 0.046294666826725006, "learning_rate": 4.012218607650759e-06, "loss": 0.001, "step": 74190 }, { "epoch": 1.2141045569827376, "grad_norm": 0.018465016037225723, "learning_rate": 4.010818883970275e-06, "loss": 0.0009, "step": 74200 }, { "epoch": 1.2142681829338133, "grad_norm": 0.0883779227733612, "learning_rate": 4.0094192409640374e-06, "loss": 0.0013, "step": 74210 }, { "epoch": 1.214431808884889, "grad_norm": 0.04356410354375839, "learning_rate": 4.008019678746197e-06, "loss": 0.0014, "step": 74220 }, { "epoch": 1.214595434835965, "grad_norm": 0.023246828466653824, "learning_rate": 4.006620197430896e-06, "loss": 0.001, "step": 74230 }, { "epoch": 1.2147590607870409, "grad_norm": 0.03040974959731102, "learning_rate": 4.005220797132271e-06, "loss": 0.0038, "step": 74240 }, { "epoch": 1.2149226867381167, "grad_norm": 0.023721804842352867, "learning_rate": 4.003821477964455e-06, "loss": 0.002, "step": 74250 }, { "epoch": 1.2150863126891924, "grad_norm": 0.06594086438417435, "learning_rate": 4.002422240041569e-06, "loss": 0.0014, "step": 74260 }, { "epoch": 1.2152499386402684, "grad_norm": 0.05709246173501015, "learning_rate": 4.001023083477731e-06, "loss": 0.0014, "step": 74270 }, { "epoch": 1.2154135645913442, "grad_norm": 0.053758423775434494, "learning_rate": 3.999624008387052e-06, "loss": 0.0012, "step": 74280 }, { "epoch": 1.21557719054242, "grad_norm": 0.2136073261499405, "learning_rate": 3.9982250148836345e-06, "loss": 0.0017, "step": 74290 }, { "epoch": 1.2157408164934957, "grad_norm": 0.0475122295320034, "learning_rate": 3.996826103081577e-06, "loss": 0.0009, "step": 74300 }, { "epoch": 1.2159044424445717, "grad_norm": 0.0780680775642395, "learning_rate": 3.995427273094969e-06, "loss": 0.0014, "step": 74310 }, { "epoch": 1.2160680683956475, "grad_norm": 0.029846929013729095, "learning_rate": 3.994028525037896e-06, "loss": 0.0015, "step": 74320 }, { "epoch": 1.2162316943467233, "grad_norm": 0.06027283892035484, "learning_rate": 3.992629859024433e-06, "loss": 0.0028, "step": 74330 }, { "epoch": 1.2163953202977993, "grad_norm": 0.0844481885433197, "learning_rate": 3.991231275168651e-06, "loss": 0.0012, "step": 74340 }, { "epoch": 1.216558946248875, "grad_norm": 0.036458928138017654, "learning_rate": 3.989832773584614e-06, "loss": 0.0009, "step": 74350 }, { "epoch": 1.2167225721999508, "grad_norm": 0.09017597883939743, "learning_rate": 3.9884343543863805e-06, "loss": 0.001, "step": 74360 }, { "epoch": 1.2168861981510268, "grad_norm": 0.037239089608192444, "learning_rate": 3.987036017687998e-06, "loss": 0.0011, "step": 74370 }, { "epoch": 1.2170498241021026, "grad_norm": 0.02048400789499283, "learning_rate": 3.985637763603512e-06, "loss": 0.0011, "step": 74380 }, { "epoch": 1.2172134500531784, "grad_norm": 0.15330049395561218, "learning_rate": 3.984239592246959e-06, "loss": 0.0012, "step": 74390 }, { "epoch": 1.2173770760042544, "grad_norm": 0.049092765897512436, "learning_rate": 3.982841503732369e-06, "loss": 0.001, "step": 74400 }, { "epoch": 1.2175407019553302, "grad_norm": 0.019493570551276207, "learning_rate": 3.981443498173764e-06, "loss": 0.0011, "step": 74410 }, { "epoch": 1.217704327906406, "grad_norm": 0.10030219703912735, "learning_rate": 3.980045575685163e-06, "loss": 0.0016, "step": 74420 }, { "epoch": 1.2178679538574817, "grad_norm": 0.07201150804758072, "learning_rate": 3.978647736380575e-06, "loss": 0.0008, "step": 74430 }, { "epoch": 1.2180315798085577, "grad_norm": 0.09272464364767075, "learning_rate": 3.977249980374002e-06, "loss": 0.0012, "step": 74440 }, { "epoch": 1.2181952057596335, "grad_norm": 0.20647422969341278, "learning_rate": 3.975852307779441e-06, "loss": 0.002, "step": 74450 }, { "epoch": 1.2183588317107092, "grad_norm": 0.07151030004024506, "learning_rate": 3.974454718710882e-06, "loss": 0.0012, "step": 74460 }, { "epoch": 1.2185224576617852, "grad_norm": 0.01113701332360506, "learning_rate": 3.973057213282307e-06, "loss": 0.0013, "step": 74470 }, { "epoch": 1.218686083612861, "grad_norm": 0.013661486096680164, "learning_rate": 3.971659791607691e-06, "loss": 0.0015, "step": 74480 }, { "epoch": 1.2188497095639368, "grad_norm": 0.021350817754864693, "learning_rate": 3.9702624538010035e-06, "loss": 0.0016, "step": 74490 }, { "epoch": 1.2190133355150126, "grad_norm": 0.03898026421666145, "learning_rate": 3.968865199976207e-06, "loss": 0.0024, "step": 74500 }, { "epoch": 1.2191769614660886, "grad_norm": 0.041934218257665634, "learning_rate": 3.9674680302472565e-06, "loss": 0.0013, "step": 74510 }, { "epoch": 1.2193405874171643, "grad_norm": 0.045785367488861084, "learning_rate": 3.966070944728101e-06, "loss": 0.001, "step": 74520 }, { "epoch": 1.21950421336824, "grad_norm": 0.07368072867393494, "learning_rate": 3.964673943532682e-06, "loss": 0.0022, "step": 74530 }, { "epoch": 1.219667839319316, "grad_norm": 0.0951850563287735, "learning_rate": 3.963277026774932e-06, "loss": 0.0008, "step": 74540 }, { "epoch": 1.2198314652703919, "grad_norm": 0.03721340000629425, "learning_rate": 3.961880194568781e-06, "loss": 0.0011, "step": 74550 }, { "epoch": 1.2199950912214677, "grad_norm": 0.019539376720786095, "learning_rate": 3.96048344702815e-06, "loss": 0.0011, "step": 74560 }, { "epoch": 1.2201587171725436, "grad_norm": 0.0532948337495327, "learning_rate": 3.959086784266952e-06, "loss": 0.0011, "step": 74570 }, { "epoch": 1.2203223431236194, "grad_norm": 0.09206970036029816, "learning_rate": 3.9576902063990945e-06, "loss": 0.0021, "step": 74580 }, { "epoch": 1.2204859690746952, "grad_norm": 0.050228238105773926, "learning_rate": 3.956293713538477e-06, "loss": 0.0021, "step": 74590 }, { "epoch": 1.2206495950257712, "grad_norm": 0.03838194161653519, "learning_rate": 3.954897305798993e-06, "loss": 0.0013, "step": 74600 }, { "epoch": 1.220813220976847, "grad_norm": 0.05389568209648132, "learning_rate": 3.9535009832945305e-06, "loss": 0.0011, "step": 74610 }, { "epoch": 1.2209768469279227, "grad_norm": 0.07620038837194443, "learning_rate": 3.952104746138966e-06, "loss": 0.0023, "step": 74620 }, { "epoch": 1.2211404728789985, "grad_norm": 0.031721144914627075, "learning_rate": 3.9507085944461735e-06, "loss": 0.0015, "step": 74630 }, { "epoch": 1.2213040988300745, "grad_norm": 0.06377547979354858, "learning_rate": 3.949312528330019e-06, "loss": 0.0013, "step": 74640 }, { "epoch": 1.2214677247811503, "grad_norm": 0.06723318994045258, "learning_rate": 3.947916547904358e-06, "loss": 0.0017, "step": 74650 }, { "epoch": 1.221631350732226, "grad_norm": 0.08901673555374146, "learning_rate": 3.946520653283045e-06, "loss": 0.0013, "step": 74660 }, { "epoch": 1.221794976683302, "grad_norm": 0.05046418681740761, "learning_rate": 3.9451248445799224e-06, "loss": 0.0007, "step": 74670 }, { "epoch": 1.2219586026343778, "grad_norm": 0.12032615393400192, "learning_rate": 3.94372912190883e-06, "loss": 0.0023, "step": 74680 }, { "epoch": 1.2221222285854536, "grad_norm": 0.07041841000318527, "learning_rate": 3.9423334853835956e-06, "loss": 0.0017, "step": 74690 }, { "epoch": 1.2222858545365294, "grad_norm": 0.029427969828248024, "learning_rate": 3.940937935118044e-06, "loss": 0.0006, "step": 74700 }, { "epoch": 1.2224494804876054, "grad_norm": 0.04333318769931793, "learning_rate": 3.9395424712259915e-06, "loss": 0.002, "step": 74710 }, { "epoch": 1.2226131064386812, "grad_norm": 0.06791647523641586, "learning_rate": 3.938147093821246e-06, "loss": 0.0011, "step": 74720 }, { "epoch": 1.222776732389757, "grad_norm": 0.0302668958902359, "learning_rate": 3.936751803017612e-06, "loss": 0.0006, "step": 74730 }, { "epoch": 1.222940358340833, "grad_norm": 0.05871063843369484, "learning_rate": 3.9353565989288825e-06, "loss": 0.0015, "step": 74740 }, { "epoch": 1.2231039842919087, "grad_norm": 0.0647435411810875, "learning_rate": 3.933961481668847e-06, "loss": 0.0015, "step": 74750 }, { "epoch": 1.2232676102429845, "grad_norm": 0.08770201355218887, "learning_rate": 3.932566451351286e-06, "loss": 0.0033, "step": 74760 }, { "epoch": 1.2234312361940605, "grad_norm": 0.07383829355239868, "learning_rate": 3.931171508089974e-06, "loss": 0.0018, "step": 74770 }, { "epoch": 1.2235948621451362, "grad_norm": 0.0903746709227562, "learning_rate": 3.9297766519986766e-06, "loss": 0.0012, "step": 74780 }, { "epoch": 1.223758488096212, "grad_norm": 0.017470426857471466, "learning_rate": 3.928381883191155e-06, "loss": 0.0016, "step": 74790 }, { "epoch": 1.223922114047288, "grad_norm": 0.02390780858695507, "learning_rate": 3.926987201781162e-06, "loss": 0.0009, "step": 74800 }, { "epoch": 1.2240857399983638, "grad_norm": 0.05151120573282242, "learning_rate": 3.92559260788244e-06, "loss": 0.0009, "step": 74810 }, { "epoch": 1.2242493659494396, "grad_norm": 0.041096292436122894, "learning_rate": 3.924198101608731e-06, "loss": 0.001, "step": 74820 }, { "epoch": 1.2244129919005153, "grad_norm": 0.03882312774658203, "learning_rate": 3.922803683073763e-06, "loss": 0.0009, "step": 74830 }, { "epoch": 1.2245766178515913, "grad_norm": 0.004259459674358368, "learning_rate": 3.921409352391262e-06, "loss": 0.0007, "step": 74840 }, { "epoch": 1.224740243802667, "grad_norm": 0.1282317042350769, "learning_rate": 3.920015109674946e-06, "loss": 0.0012, "step": 74850 }, { "epoch": 1.2249038697537429, "grad_norm": 0.05232157185673714, "learning_rate": 3.918620955038522e-06, "loss": 0.0009, "step": 74860 }, { "epoch": 1.2250674957048189, "grad_norm": 0.01994086243212223, "learning_rate": 3.917226888595693e-06, "loss": 0.001, "step": 74870 }, { "epoch": 1.2252311216558947, "grad_norm": 0.04924019053578377, "learning_rate": 3.915832910460157e-06, "loss": 0.0015, "step": 74880 }, { "epoch": 1.2253947476069704, "grad_norm": 0.21405909955501556, "learning_rate": 3.914439020745599e-06, "loss": 0.002, "step": 74890 }, { "epoch": 1.2255583735580462, "grad_norm": 0.05332926660776138, "learning_rate": 3.9130452195657006e-06, "loss": 0.0012, "step": 74900 }, { "epoch": 1.2257219995091222, "grad_norm": 0.029374608770012856, "learning_rate": 3.911651507034137e-06, "loss": 0.0014, "step": 74910 }, { "epoch": 1.225885625460198, "grad_norm": 0.045646782964468, "learning_rate": 3.9102578832645736e-06, "loss": 0.0016, "step": 74920 }, { "epoch": 1.2260492514112737, "grad_norm": 0.05251171067357063, "learning_rate": 3.908864348370669e-06, "loss": 0.0013, "step": 74930 }, { "epoch": 1.2262128773623497, "grad_norm": 0.006718936376273632, "learning_rate": 3.9074709024660765e-06, "loss": 0.0015, "step": 74940 }, { "epoch": 1.2263765033134255, "grad_norm": 0.018619919195771217, "learning_rate": 3.906077545664439e-06, "loss": 0.0006, "step": 74950 }, { "epoch": 1.2265401292645013, "grad_norm": 0.08224703371524811, "learning_rate": 3.904684278079396e-06, "loss": 0.0014, "step": 74960 }, { "epoch": 1.2267037552155773, "grad_norm": 0.009832433424890041, "learning_rate": 3.903291099824576e-06, "loss": 0.0007, "step": 74970 }, { "epoch": 1.226867381166653, "grad_norm": 0.08725149184465408, "learning_rate": 3.901898011013603e-06, "loss": 0.0018, "step": 74980 }, { "epoch": 1.2270310071177288, "grad_norm": 0.27727824449539185, "learning_rate": 3.900505011760092e-06, "loss": 0.0024, "step": 74990 }, { "epoch": 1.2271946330688048, "grad_norm": 0.045055750757455826, "learning_rate": 3.899112102177651e-06, "loss": 0.0015, "step": 75000 }, { "epoch": 1.2273582590198806, "grad_norm": 0.060741279274225235, "learning_rate": 3.897719282379883e-06, "loss": 0.001, "step": 75010 }, { "epoch": 1.2275218849709564, "grad_norm": 0.08668284863233566, "learning_rate": 3.896326552480379e-06, "loss": 0.0014, "step": 75020 }, { "epoch": 1.2276855109220322, "grad_norm": 0.05349934101104736, "learning_rate": 3.894933912592726e-06, "loss": 0.0013, "step": 75030 }, { "epoch": 1.2278491368731081, "grad_norm": 0.06193646416068077, "learning_rate": 3.893541362830503e-06, "loss": 0.0021, "step": 75040 }, { "epoch": 1.228012762824184, "grad_norm": 0.015309491194784641, "learning_rate": 3.892148903307283e-06, "loss": 0.001, "step": 75050 }, { "epoch": 1.2281763887752597, "grad_norm": 0.029573263600468636, "learning_rate": 3.890756534136629e-06, "loss": 0.0017, "step": 75060 }, { "epoch": 1.2283400147263357, "grad_norm": 0.05194779112935066, "learning_rate": 3.889364255432098e-06, "loss": 0.0011, "step": 75070 }, { "epoch": 1.2285036406774115, "grad_norm": 0.06401064991950989, "learning_rate": 3.8879720673072395e-06, "loss": 0.0011, "step": 75080 }, { "epoch": 1.2286672666284872, "grad_norm": 0.03716651722788811, "learning_rate": 3.886579969875595e-06, "loss": 0.001, "step": 75090 }, { "epoch": 1.228830892579563, "grad_norm": 0.04947042465209961, "learning_rate": 3.885187963250701e-06, "loss": 0.0012, "step": 75100 }, { "epoch": 1.228994518530639, "grad_norm": 0.06774580478668213, "learning_rate": 3.8837960475460836e-06, "loss": 0.0009, "step": 75110 }, { "epoch": 1.2291581444817148, "grad_norm": 0.015685144811868668, "learning_rate": 3.882404222875264e-06, "loss": 0.0014, "step": 75120 }, { "epoch": 1.2293217704327906, "grad_norm": 0.04278433322906494, "learning_rate": 3.881012489351752e-06, "loss": 0.0023, "step": 75130 }, { "epoch": 1.2294853963838666, "grad_norm": 0.0625486895442009, "learning_rate": 3.879620847089055e-06, "loss": 0.0015, "step": 75140 }, { "epoch": 1.2296490223349423, "grad_norm": 0.027868643403053284, "learning_rate": 3.87822929620067e-06, "loss": 0.0009, "step": 75150 }, { "epoch": 1.229812648286018, "grad_norm": 0.017256226390600204, "learning_rate": 3.876837836800086e-06, "loss": 0.0019, "step": 75160 }, { "epoch": 1.229976274237094, "grad_norm": 0.030246959999203682, "learning_rate": 3.875446469000788e-06, "loss": 0.0009, "step": 75170 }, { "epoch": 1.2301399001881699, "grad_norm": 0.09192854166030884, "learning_rate": 3.874055192916248e-06, "loss": 0.001, "step": 75180 }, { "epoch": 1.2303035261392457, "grad_norm": 0.027013687416911125, "learning_rate": 3.872664008659938e-06, "loss": 0.0008, "step": 75190 }, { "epoch": 1.2304671520903216, "grad_norm": 0.11414414644241333, "learning_rate": 3.871272916345314e-06, "loss": 0.0016, "step": 75200 }, { "epoch": 1.2306307780413974, "grad_norm": 0.23190844058990479, "learning_rate": 3.869881916085832e-06, "loss": 0.001, "step": 75210 }, { "epoch": 1.2307944039924732, "grad_norm": 0.01144721731543541, "learning_rate": 3.868491007994935e-06, "loss": 0.0014, "step": 75220 }, { "epoch": 1.230958029943549, "grad_norm": 0.028903638944029808, "learning_rate": 3.8671001921860614e-06, "loss": 0.0013, "step": 75230 }, { "epoch": 1.231121655894625, "grad_norm": 0.004566123243421316, "learning_rate": 3.8657094687726424e-06, "loss": 0.0009, "step": 75240 }, { "epoch": 1.2312852818457007, "grad_norm": 0.08521173149347305, "learning_rate": 3.8643188378681e-06, "loss": 0.0008, "step": 75250 }, { "epoch": 1.2314489077967765, "grad_norm": 0.07526182383298874, "learning_rate": 3.86292829958585e-06, "loss": 0.0021, "step": 75260 }, { "epoch": 1.2316125337478523, "grad_norm": 0.04024568572640419, "learning_rate": 3.861537854039298e-06, "loss": 0.0019, "step": 75270 }, { "epoch": 1.2317761596989283, "grad_norm": 0.10666966438293457, "learning_rate": 3.8601475013418455e-06, "loss": 0.0014, "step": 75280 }, { "epoch": 1.231939785650004, "grad_norm": 0.06438332796096802, "learning_rate": 3.858757241606884e-06, "loss": 0.002, "step": 75290 }, { "epoch": 1.2321034116010798, "grad_norm": 0.036823879927396774, "learning_rate": 3.8573670749478005e-06, "loss": 0.0013, "step": 75300 }, { "epoch": 1.2322670375521558, "grad_norm": 0.03285127878189087, "learning_rate": 3.85597700147797e-06, "loss": 0.0008, "step": 75310 }, { "epoch": 1.2324306635032316, "grad_norm": 0.06669161468744278, "learning_rate": 3.8545870213107626e-06, "loss": 0.0026, "step": 75320 }, { "epoch": 1.2325942894543074, "grad_norm": 0.04048169404268265, "learning_rate": 3.853197134559541e-06, "loss": 0.0013, "step": 75330 }, { "epoch": 1.2327579154053834, "grad_norm": 0.03150138258934021, "learning_rate": 3.85180734133766e-06, "loss": 0.0011, "step": 75340 }, { "epoch": 1.2329215413564591, "grad_norm": 0.12060286104679108, "learning_rate": 3.850417641758466e-06, "loss": 0.0014, "step": 75350 }, { "epoch": 1.233085167307535, "grad_norm": 0.04195263609290123, "learning_rate": 3.849028035935296e-06, "loss": 0.001, "step": 75360 }, { "epoch": 1.233248793258611, "grad_norm": 0.04212228208780289, "learning_rate": 3.847638523981485e-06, "loss": 0.001, "step": 75370 }, { "epoch": 1.2334124192096867, "grad_norm": 0.06619065254926682, "learning_rate": 3.846249106010355e-06, "loss": 0.001, "step": 75380 }, { "epoch": 1.2335760451607625, "grad_norm": 0.041517145931720734, "learning_rate": 3.844859782135222e-06, "loss": 0.0059, "step": 75390 }, { "epoch": 1.2337396711118382, "grad_norm": 0.10518371313810349, "learning_rate": 3.843470552469396e-06, "loss": 0.0012, "step": 75400 }, { "epoch": 1.2339032970629142, "grad_norm": 0.02846491150557995, "learning_rate": 3.842081417126176e-06, "loss": 0.001, "step": 75410 }, { "epoch": 1.23406692301399, "grad_norm": 0.03170047700405121, "learning_rate": 3.840692376218856e-06, "loss": 0.001, "step": 75420 }, { "epoch": 1.2342305489650658, "grad_norm": 0.042040154337882996, "learning_rate": 3.839303429860722e-06, "loss": 0.001, "step": 75430 }, { "epoch": 1.2343941749161418, "grad_norm": 0.0888720378279686, "learning_rate": 3.837914578165053e-06, "loss": 0.0009, "step": 75440 }, { "epoch": 1.2345578008672176, "grad_norm": 0.00809149257838726, "learning_rate": 3.836525821245115e-06, "loss": 0.0011, "step": 75450 }, { "epoch": 1.2347214268182933, "grad_norm": 0.003200968960300088, "learning_rate": 3.835137159214173e-06, "loss": 0.001, "step": 75460 }, { "epoch": 1.234885052769369, "grad_norm": 0.25250300765037537, "learning_rate": 3.833748592185481e-06, "loss": 0.0023, "step": 75470 }, { "epoch": 1.235048678720445, "grad_norm": 0.22332283854484558, "learning_rate": 3.832360120272286e-06, "loss": 0.0019, "step": 75480 }, { "epoch": 1.2352123046715209, "grad_norm": 0.015924707055091858, "learning_rate": 3.830971743587827e-06, "loss": 0.0014, "step": 75490 }, { "epoch": 1.2353759306225967, "grad_norm": 0.027634384110569954, "learning_rate": 3.829583462245335e-06, "loss": 0.0015, "step": 75500 }, { "epoch": 1.2355395565736726, "grad_norm": 0.09749562293291092, "learning_rate": 3.8281952763580335e-06, "loss": 0.0017, "step": 75510 }, { "epoch": 1.2357031825247484, "grad_norm": 0.03955467790365219, "learning_rate": 3.826807186039139e-06, "loss": 0.0017, "step": 75520 }, { "epoch": 1.2358668084758242, "grad_norm": 0.049399811774492264, "learning_rate": 3.825419191401858e-06, "loss": 0.001, "step": 75530 }, { "epoch": 1.2360304344269002, "grad_norm": 0.03131980076432228, "learning_rate": 3.824031292559392e-06, "loss": 0.0009, "step": 75540 }, { "epoch": 1.236194060377976, "grad_norm": 0.06004263088107109, "learning_rate": 3.8226434896249334e-06, "loss": 0.0011, "step": 75550 }, { "epoch": 1.2363576863290517, "grad_norm": 0.2504695951938629, "learning_rate": 3.821255782711665e-06, "loss": 0.0012, "step": 75560 }, { "epoch": 1.2365213122801277, "grad_norm": 0.05799531564116478, "learning_rate": 3.819868171932765e-06, "loss": 0.0009, "step": 75570 }, { "epoch": 1.2366849382312035, "grad_norm": 0.047677669674158096, "learning_rate": 3.8184806574014025e-06, "loss": 0.0018, "step": 75580 }, { "epoch": 1.2368485641822793, "grad_norm": 0.034939464181661606, "learning_rate": 3.817093239230737e-06, "loss": 0.0011, "step": 75590 }, { "epoch": 1.237012190133355, "grad_norm": 0.04398292303085327, "learning_rate": 3.815705917533922e-06, "loss": 0.0013, "step": 75600 }, { "epoch": 1.237175816084431, "grad_norm": 0.0009588264510966837, "learning_rate": 3.814318692424103e-06, "loss": 0.0008, "step": 75610 }, { "epoch": 1.2373394420355068, "grad_norm": 0.06344334036111832, "learning_rate": 3.8129315640144183e-06, "loss": 0.001, "step": 75620 }, { "epoch": 1.2375030679865826, "grad_norm": 0.03541975095868111, "learning_rate": 3.8115445324179955e-06, "loss": 0.0013, "step": 75630 }, { "epoch": 1.2376666939376586, "grad_norm": 0.018198177218437195, "learning_rate": 3.810157597747958e-06, "loss": 0.0008, "step": 75640 }, { "epoch": 1.2378303198887344, "grad_norm": 0.036116812378168106, "learning_rate": 3.808770760117418e-06, "loss": 0.0013, "step": 75650 }, { "epoch": 1.2379939458398102, "grad_norm": 0.2331334501504898, "learning_rate": 3.807384019639482e-06, "loss": 0.0032, "step": 75660 }, { "epoch": 1.238157571790886, "grad_norm": 0.04998109117150307, "learning_rate": 3.805997376427248e-06, "loss": 0.0006, "step": 75670 }, { "epoch": 1.238321197741962, "grad_norm": 0.026762284338474274, "learning_rate": 3.8046108305938057e-06, "loss": 0.001, "step": 75680 }, { "epoch": 1.2384848236930377, "grad_norm": 0.08631635457277298, "learning_rate": 3.8032243822522362e-06, "loss": 0.0011, "step": 75690 }, { "epoch": 1.2386484496441135, "grad_norm": 0.10220127552747726, "learning_rate": 3.801838031515615e-06, "loss": 0.0018, "step": 75700 }, { "epoch": 1.2388120755951895, "grad_norm": 0.040783800184726715, "learning_rate": 3.8004517784970072e-06, "loss": 0.0013, "step": 75710 }, { "epoch": 1.2389757015462652, "grad_norm": 0.03257445991039276, "learning_rate": 3.7990656233094712e-06, "loss": 0.0008, "step": 75720 }, { "epoch": 1.239139327497341, "grad_norm": 0.06778103858232498, "learning_rate": 3.7976795660660574e-06, "loss": 0.0014, "step": 75730 }, { "epoch": 1.239302953448417, "grad_norm": 0.07347385585308075, "learning_rate": 3.7962936068798075e-06, "loss": 0.0012, "step": 75740 }, { "epoch": 1.2394665793994928, "grad_norm": 0.012228734791278839, "learning_rate": 3.794907745863755e-06, "loss": 0.001, "step": 75750 }, { "epoch": 1.2396302053505686, "grad_norm": 0.07704233378171921, "learning_rate": 3.793521983130927e-06, "loss": 0.0016, "step": 75760 }, { "epoch": 1.2397938313016446, "grad_norm": 0.05287954583764076, "learning_rate": 3.7921363187943427e-06, "loss": 0.0017, "step": 75770 }, { "epoch": 1.2399574572527203, "grad_norm": 0.042877621948719025, "learning_rate": 3.7907507529670095e-06, "loss": 0.0017, "step": 75780 }, { "epoch": 1.240121083203796, "grad_norm": 0.1314309537410736, "learning_rate": 3.78936528576193e-06, "loss": 0.0016, "step": 75790 }, { "epoch": 1.2402847091548719, "grad_norm": 0.06001598387956619, "learning_rate": 3.7879799172920996e-06, "loss": 0.0008, "step": 75800 }, { "epoch": 1.2404483351059479, "grad_norm": 0.165829598903656, "learning_rate": 3.7865946476705033e-06, "loss": 0.0016, "step": 75810 }, { "epoch": 1.2406119610570236, "grad_norm": 0.049714647233486176, "learning_rate": 3.785209477010119e-06, "loss": 0.0008, "step": 75820 }, { "epoch": 1.2407755870080994, "grad_norm": 0.049130432307720184, "learning_rate": 3.783824405423917e-06, "loss": 0.0014, "step": 75830 }, { "epoch": 1.2409392129591754, "grad_norm": 0.05671097710728645, "learning_rate": 3.782439433024859e-06, "loss": 0.001, "step": 75840 }, { "epoch": 1.2411028389102512, "grad_norm": 0.03538745269179344, "learning_rate": 3.7810545599258973e-06, "loss": 0.0011, "step": 75850 }, { "epoch": 1.241266464861327, "grad_norm": 0.07837069034576416, "learning_rate": 3.779669786239979e-06, "loss": 0.0009, "step": 75860 }, { "epoch": 1.2414300908124027, "grad_norm": 0.07940542697906494, "learning_rate": 3.7782851120800413e-06, "loss": 0.0012, "step": 75870 }, { "epoch": 1.2415937167634787, "grad_norm": 0.10000597685575485, "learning_rate": 3.776900537559013e-06, "loss": 0.0039, "step": 75880 }, { "epoch": 1.2417573427145545, "grad_norm": 0.07211638242006302, "learning_rate": 3.7755160627898156e-06, "loss": 0.0017, "step": 75890 }, { "epoch": 1.2419209686656303, "grad_norm": 0.07236737757921219, "learning_rate": 3.774131687885362e-06, "loss": 0.0027, "step": 75900 }, { "epoch": 1.2420845946167063, "grad_norm": 0.18286970257759094, "learning_rate": 3.7727474129585575e-06, "loss": 0.0016, "step": 75910 }, { "epoch": 1.242248220567782, "grad_norm": 0.05893344804644585, "learning_rate": 3.7713632381222974e-06, "loss": 0.0014, "step": 75920 }, { "epoch": 1.2424118465188578, "grad_norm": 0.05910464748740196, "learning_rate": 3.769979163489472e-06, "loss": 0.0011, "step": 75930 }, { "epoch": 1.2425754724699338, "grad_norm": 0.027881009504199028, "learning_rate": 3.7685951891729616e-06, "loss": 0.0014, "step": 75940 }, { "epoch": 1.2427390984210096, "grad_norm": 0.02975933998823166, "learning_rate": 3.7672113152856383e-06, "loss": 0.0054, "step": 75950 }, { "epoch": 1.2429027243720854, "grad_norm": 0.026923248544335365, "learning_rate": 3.7658275419403644e-06, "loss": 0.001, "step": 75960 }, { "epoch": 1.2430663503231614, "grad_norm": 0.04315062239766121, "learning_rate": 3.7644438692499985e-06, "loss": 0.001, "step": 75970 }, { "epoch": 1.2432299762742371, "grad_norm": 0.044196173548698425, "learning_rate": 3.763060297327386e-06, "loss": 0.0022, "step": 75980 }, { "epoch": 1.243393602225313, "grad_norm": 0.1643618941307068, "learning_rate": 3.7616768262853674e-06, "loss": 0.0017, "step": 75990 }, { "epoch": 1.2435572281763887, "grad_norm": 0.2178935706615448, "learning_rate": 3.7602934562367743e-06, "loss": 0.0011, "step": 76000 }, { "epoch": 1.2437208541274647, "grad_norm": 0.00882326066493988, "learning_rate": 3.758910187294429e-06, "loss": 0.0011, "step": 76010 }, { "epoch": 1.2438844800785405, "grad_norm": 0.0069951810874044895, "learning_rate": 3.7575270195711453e-06, "loss": 0.0013, "step": 76020 }, { "epoch": 1.2440481060296162, "grad_norm": 0.014804904349148273, "learning_rate": 3.7561439531797315e-06, "loss": 0.0011, "step": 76030 }, { "epoch": 1.244211731980692, "grad_norm": 0.010005536489188671, "learning_rate": 3.754760988232985e-06, "loss": 0.0015, "step": 76040 }, { "epoch": 1.244375357931768, "grad_norm": 0.03244250267744064, "learning_rate": 3.753378124843695e-06, "loss": 0.0018, "step": 76050 }, { "epoch": 1.2445389838828438, "grad_norm": 0.04032203182578087, "learning_rate": 3.7519953631246443e-06, "loss": 0.0012, "step": 76060 }, { "epoch": 1.2447026098339196, "grad_norm": 0.02284880168735981, "learning_rate": 3.750612703188605e-06, "loss": 0.0009, "step": 76070 }, { "epoch": 1.2448662357849956, "grad_norm": 0.0598733015358448, "learning_rate": 3.749230145148344e-06, "loss": 0.0013, "step": 76080 }, { "epoch": 1.2450298617360713, "grad_norm": 0.10431163012981415, "learning_rate": 3.747847689116617e-06, "loss": 0.0011, "step": 76090 }, { "epoch": 1.245193487687147, "grad_norm": 0.0969981849193573, "learning_rate": 3.7464653352061714e-06, "loss": 0.0013, "step": 76100 }, { "epoch": 1.245357113638223, "grad_norm": 0.07172807306051254, "learning_rate": 3.745083083529748e-06, "loss": 0.0019, "step": 76110 }, { "epoch": 1.2455207395892989, "grad_norm": 0.0021600837353616953, "learning_rate": 3.743700934200079e-06, "loss": 0.001, "step": 76120 }, { "epoch": 1.2456843655403746, "grad_norm": 0.06893586367368698, "learning_rate": 3.7423188873298877e-06, "loss": 0.0009, "step": 76130 }, { "epoch": 1.2458479914914506, "grad_norm": 0.03157917037606239, "learning_rate": 3.7409369430318886e-06, "loss": 0.0008, "step": 76140 }, { "epoch": 1.2460116174425264, "grad_norm": 0.046790771186351776, "learning_rate": 3.739555101418789e-06, "loss": 0.0009, "step": 76150 }, { "epoch": 1.2461752433936022, "grad_norm": 0.03875652700662613, "learning_rate": 3.7381733626032867e-06, "loss": 0.0006, "step": 76160 }, { "epoch": 1.2463388693446782, "grad_norm": 0.076677106320858, "learning_rate": 3.736791726698072e-06, "loss": 0.0015, "step": 76170 }, { "epoch": 1.246502495295754, "grad_norm": 0.05655670166015625, "learning_rate": 3.735410193815826e-06, "loss": 0.0018, "step": 76180 }, { "epoch": 1.2466661212468297, "grad_norm": 0.08222516626119614, "learning_rate": 3.7340287640692224e-06, "loss": 0.0024, "step": 76190 }, { "epoch": 1.2468297471979055, "grad_norm": 0.09467457234859467, "learning_rate": 3.7326474375709253e-06, "loss": 0.0015, "step": 76200 }, { "epoch": 1.2469933731489815, "grad_norm": 0.06342025846242905, "learning_rate": 3.7312662144335913e-06, "loss": 0.0011, "step": 76210 }, { "epoch": 1.2471569991000573, "grad_norm": 0.06348420679569244, "learning_rate": 3.7298850947698682e-06, "loss": 0.0009, "step": 76220 }, { "epoch": 1.247320625051133, "grad_norm": 0.08698870986700058, "learning_rate": 3.728504078692396e-06, "loss": 0.001, "step": 76230 }, { "epoch": 1.2474842510022088, "grad_norm": 0.029028309509158134, "learning_rate": 3.727123166313805e-06, "loss": 0.0006, "step": 76240 }, { "epoch": 1.2476478769532848, "grad_norm": 0.15747520327568054, "learning_rate": 3.725742357746718e-06, "loss": 0.0022, "step": 76250 }, { "epoch": 1.2478115029043606, "grad_norm": 0.09941805154085159, "learning_rate": 3.724361653103748e-06, "loss": 0.0013, "step": 76260 }, { "epoch": 1.2479751288554364, "grad_norm": 0.006171914748847485, "learning_rate": 3.722981052497502e-06, "loss": 0.0021, "step": 76270 }, { "epoch": 1.2481387548065124, "grad_norm": 0.05656810477375984, "learning_rate": 3.7216005560405767e-06, "loss": 0.0009, "step": 76280 }, { "epoch": 1.2483023807575881, "grad_norm": 0.030218174681067467, "learning_rate": 3.7202201638455603e-06, "loss": 0.0013, "step": 76290 }, { "epoch": 1.248466006708664, "grad_norm": 0.028989270329475403, "learning_rate": 3.7188398760250333e-06, "loss": 0.0016, "step": 76300 }, { "epoch": 1.24862963265974, "grad_norm": 0.06538151949644089, "learning_rate": 3.7174596926915676e-06, "loss": 0.0018, "step": 76310 }, { "epoch": 1.2487932586108157, "grad_norm": 0.029546581208705902, "learning_rate": 3.716079613957725e-06, "loss": 0.0021, "step": 76320 }, { "epoch": 1.2489568845618915, "grad_norm": 0.031979162245988846, "learning_rate": 3.7146996399360615e-06, "loss": 0.0013, "step": 76330 }, { "epoch": 1.2491205105129675, "grad_norm": 0.049099151045084, "learning_rate": 3.713319770739122e-06, "loss": 0.0013, "step": 76340 }, { "epoch": 1.2492841364640432, "grad_norm": 0.022830074653029442, "learning_rate": 3.711940006479444e-06, "loss": 0.0013, "step": 76350 }, { "epoch": 1.249447762415119, "grad_norm": 0.03473230451345444, "learning_rate": 3.710560347269557e-06, "loss": 0.0011, "step": 76360 }, { "epoch": 1.2496113883661948, "grad_norm": 0.022678585723042488, "learning_rate": 3.7091807932219816e-06, "loss": 0.0013, "step": 76370 }, { "epoch": 1.2497750143172708, "grad_norm": 0.12722554802894592, "learning_rate": 3.707801344449228e-06, "loss": 0.0029, "step": 76380 }, { "epoch": 1.2499386402683466, "grad_norm": 0.0551285557448864, "learning_rate": 3.706422001063801e-06, "loss": 0.001, "step": 76390 }, { "epoch": 1.2501022662194223, "grad_norm": 0.10762631893157959, "learning_rate": 3.7050427631781936e-06, "loss": 0.0011, "step": 76400 }, { "epoch": 1.250265892170498, "grad_norm": 0.12900607287883759, "learning_rate": 3.7036636309048944e-06, "loss": 0.0009, "step": 76410 }, { "epoch": 1.250429518121574, "grad_norm": 0.11554361134767532, "learning_rate": 3.7022846043563774e-06, "loss": 0.0007, "step": 76420 }, { "epoch": 1.2505931440726499, "grad_norm": 0.038834348320961, "learning_rate": 3.7009056836451142e-06, "loss": 0.0008, "step": 76430 }, { "epoch": 1.2507567700237257, "grad_norm": 0.051226451992988586, "learning_rate": 3.6995268688835625e-06, "loss": 0.0019, "step": 76440 }, { "epoch": 1.2509203959748016, "grad_norm": 0.030855748802423477, "learning_rate": 3.6981481601841746e-06, "loss": 0.0011, "step": 76450 }, { "epoch": 1.2510840219258774, "grad_norm": 0.045061372220516205, "learning_rate": 3.696769557659394e-06, "loss": 0.0006, "step": 76460 }, { "epoch": 1.2512476478769532, "grad_norm": 0.04351581633090973, "learning_rate": 3.6953910614216537e-06, "loss": 0.0008, "step": 76470 }, { "epoch": 1.2514112738280292, "grad_norm": 0.10761336982250214, "learning_rate": 3.69401267158338e-06, "loss": 0.0013, "step": 76480 }, { "epoch": 1.251574899779105, "grad_norm": 0.048454973846673965, "learning_rate": 3.692634388256989e-06, "loss": 0.0013, "step": 76490 }, { "epoch": 1.2517385257301807, "grad_norm": 0.026084132492542267, "learning_rate": 3.6912562115548906e-06, "loss": 0.0008, "step": 76500 }, { "epoch": 1.2519021516812567, "grad_norm": 0.0019574235193431377, "learning_rate": 3.689878141589482e-06, "loss": 0.0006, "step": 76510 }, { "epoch": 1.2520657776323325, "grad_norm": 0.054881978780031204, "learning_rate": 3.6885001784731547e-06, "loss": 0.001, "step": 76520 }, { "epoch": 1.2522294035834083, "grad_norm": 0.15914474427700043, "learning_rate": 3.6871223223182905e-06, "loss": 0.0016, "step": 76530 }, { "epoch": 1.2523930295344843, "grad_norm": 0.07124340534210205, "learning_rate": 3.685744573237263e-06, "loss": 0.0015, "step": 76540 }, { "epoch": 1.25255665548556, "grad_norm": 0.08131648600101471, "learning_rate": 3.6843669313424375e-06, "loss": 0.0011, "step": 76550 }, { "epoch": 1.2527202814366358, "grad_norm": 0.08639384061098099, "learning_rate": 3.6829893967461685e-06, "loss": 0.0012, "step": 76560 }, { "epoch": 1.2528839073877118, "grad_norm": 0.05704488977789879, "learning_rate": 3.6816119695608033e-06, "loss": 0.0014, "step": 76570 }, { "epoch": 1.2530475333387876, "grad_norm": 0.06507814675569534, "learning_rate": 3.6802346498986797e-06, "loss": 0.0009, "step": 76580 }, { "epoch": 1.2532111592898634, "grad_norm": 0.12731674313545227, "learning_rate": 3.6788574378721285e-06, "loss": 0.0011, "step": 76590 }, { "epoch": 1.2533747852409391, "grad_norm": 0.14688092470169067, "learning_rate": 3.6774803335934695e-06, "loss": 0.0016, "step": 76600 }, { "epoch": 1.253538411192015, "grad_norm": 0.10999850183725357, "learning_rate": 3.6761033371750156e-06, "loss": 0.0013, "step": 76610 }, { "epoch": 1.253702037143091, "grad_norm": 0.08178862929344177, "learning_rate": 3.674726448729068e-06, "loss": 0.0018, "step": 76620 }, { "epoch": 1.2538656630941667, "grad_norm": 0.09021533280611038, "learning_rate": 3.673349668367923e-06, "loss": 0.0013, "step": 76630 }, { "epoch": 1.2540292890452425, "grad_norm": 0.11633835732936859, "learning_rate": 3.671972996203865e-06, "loss": 0.0019, "step": 76640 }, { "epoch": 1.2541929149963185, "grad_norm": 0.06238851323723793, "learning_rate": 3.670596432349172e-06, "loss": 0.0014, "step": 76650 }, { "epoch": 1.2543565409473942, "grad_norm": 0.008073466829955578, "learning_rate": 3.66921997691611e-06, "loss": 0.0012, "step": 76660 }, { "epoch": 1.25452016689847, "grad_norm": 0.06268932670354843, "learning_rate": 3.6678436300169397e-06, "loss": 0.0014, "step": 76670 }, { "epoch": 1.254683792849546, "grad_norm": 0.1938357949256897, "learning_rate": 3.6664673917639106e-06, "loss": 0.0019, "step": 76680 }, { "epoch": 1.2548474188006218, "grad_norm": 0.04001988098025322, "learning_rate": 3.665091262269263e-06, "loss": 0.001, "step": 76690 }, { "epoch": 1.2550110447516976, "grad_norm": 0.12498009949922562, "learning_rate": 3.6637152416452306e-06, "loss": 0.0008, "step": 76700 }, { "epoch": 1.2551746707027736, "grad_norm": 0.028389887884259224, "learning_rate": 3.662339330004037e-06, "loss": 0.001, "step": 76710 }, { "epoch": 1.2553382966538493, "grad_norm": 0.03279648721218109, "learning_rate": 3.6609635274578964e-06, "loss": 0.0026, "step": 76720 }, { "epoch": 1.255501922604925, "grad_norm": 0.06896865367889404, "learning_rate": 3.659587834119016e-06, "loss": 0.0015, "step": 76730 }, { "epoch": 1.255665548556001, "grad_norm": 1.2143607139587402, "learning_rate": 3.6582122500995896e-06, "loss": 0.0013, "step": 76740 }, { "epoch": 1.2558291745070769, "grad_norm": 0.04025647044181824, "learning_rate": 3.6568367755118078e-06, "loss": 0.0018, "step": 76750 }, { "epoch": 1.2559928004581526, "grad_norm": 0.0472763255238533, "learning_rate": 3.655461410467849e-06, "loss": 0.0014, "step": 76760 }, { "epoch": 1.2561564264092284, "grad_norm": 0.03411184623837471, "learning_rate": 3.6540861550798817e-06, "loss": 0.0019, "step": 76770 }, { "epoch": 1.2563200523603044, "grad_norm": 0.0291668139398098, "learning_rate": 3.652711009460069e-06, "loss": 0.0008, "step": 76780 }, { "epoch": 1.2564836783113802, "grad_norm": 0.0225231796503067, "learning_rate": 3.651335973720563e-06, "loss": 0.0013, "step": 76790 }, { "epoch": 1.256647304262456, "grad_norm": 0.06507483869791031, "learning_rate": 3.649961047973506e-06, "loss": 0.0014, "step": 76800 }, { "epoch": 1.2568109302135317, "grad_norm": 0.005823655519634485, "learning_rate": 3.648586232331033e-06, "loss": 0.0012, "step": 76810 }, { "epoch": 1.2569745561646077, "grad_norm": 0.03773728013038635, "learning_rate": 3.6472115269052687e-06, "loss": 0.0008, "step": 76820 }, { "epoch": 1.2571381821156835, "grad_norm": 0.020107142627239227, "learning_rate": 3.6458369318083296e-06, "loss": 0.0008, "step": 76830 }, { "epoch": 1.2573018080667593, "grad_norm": 0.02851218543946743, "learning_rate": 3.644462447152323e-06, "loss": 0.0015, "step": 76840 }, { "epoch": 1.2574654340178353, "grad_norm": 0.024750562384724617, "learning_rate": 3.643088073049348e-06, "loss": 0.0017, "step": 76850 }, { "epoch": 1.257629059968911, "grad_norm": 0.06721001863479614, "learning_rate": 3.6417138096114924e-06, "loss": 0.0028, "step": 76860 }, { "epoch": 1.2577926859199868, "grad_norm": 0.030058342963457108, "learning_rate": 3.6403396569508377e-06, "loss": 0.0011, "step": 76870 }, { "epoch": 1.2579563118710628, "grad_norm": 0.049991823732852936, "learning_rate": 3.6389656151794538e-06, "loss": 0.0015, "step": 76880 }, { "epoch": 1.2581199378221386, "grad_norm": 0.03598054498434067, "learning_rate": 3.637591684409404e-06, "loss": 0.0016, "step": 76890 }, { "epoch": 1.2582835637732144, "grad_norm": 0.08404979854822159, "learning_rate": 3.6362178647527413e-06, "loss": 0.0018, "step": 76900 }, { "epoch": 1.2584471897242904, "grad_norm": 0.09314731508493423, "learning_rate": 3.63484415632151e-06, "loss": 0.0012, "step": 76910 }, { "epoch": 1.2586108156753661, "grad_norm": 0.017280584201216698, "learning_rate": 3.6334705592277443e-06, "loss": 0.0007, "step": 76920 }, { "epoch": 1.258774441626442, "grad_norm": 0.059732090681791306, "learning_rate": 3.6320970735834704e-06, "loss": 0.0017, "step": 76930 }, { "epoch": 1.258938067577518, "grad_norm": 0.049863774329423904, "learning_rate": 3.6307236995007052e-06, "loss": 0.0015, "step": 76940 }, { "epoch": 1.2591016935285937, "grad_norm": 0.052244868129491806, "learning_rate": 3.6293504370914563e-06, "loss": 0.0018, "step": 76950 }, { "epoch": 1.2592653194796695, "grad_norm": 0.026911437511444092, "learning_rate": 3.627977286467722e-06, "loss": 0.0008, "step": 76960 }, { "epoch": 1.2594289454307452, "grad_norm": 0.06201561912894249, "learning_rate": 3.626604247741493e-06, "loss": 0.0013, "step": 76970 }, { "epoch": 1.2595925713818212, "grad_norm": 0.12437136471271515, "learning_rate": 3.6252313210247482e-06, "loss": 0.001, "step": 76980 }, { "epoch": 1.259756197332897, "grad_norm": 0.03702268376946449, "learning_rate": 3.6238585064294603e-06, "loss": 0.0032, "step": 76990 }, { "epoch": 1.2599198232839728, "grad_norm": 0.07538307458162308, "learning_rate": 3.6224858040675896e-06, "loss": 0.0022, "step": 77000 }, { "epoch": 1.2600834492350486, "grad_norm": 0.04808036610484123, "learning_rate": 3.6211132140510907e-06, "loss": 0.0009, "step": 77010 }, { "epoch": 1.2602470751861246, "grad_norm": 0.133243590593338, "learning_rate": 3.619740736491907e-06, "loss": 0.0014, "step": 77020 }, { "epoch": 1.2604107011372003, "grad_norm": 0.08440541476011276, "learning_rate": 3.6183683715019717e-06, "loss": 0.0015, "step": 77030 }, { "epoch": 1.260574327088276, "grad_norm": 0.026749959215521812, "learning_rate": 3.616996119193212e-06, "loss": 0.0012, "step": 77040 }, { "epoch": 1.260737953039352, "grad_norm": 0.042579926550388336, "learning_rate": 3.615623979677543e-06, "loss": 0.0016, "step": 77050 }, { "epoch": 1.2609015789904279, "grad_norm": 0.03619217872619629, "learning_rate": 3.614251953066874e-06, "loss": 0.0018, "step": 77060 }, { "epoch": 1.2610652049415036, "grad_norm": 0.05326395109295845, "learning_rate": 3.612880039473099e-06, "loss": 0.0011, "step": 77070 }, { "epoch": 1.2612288308925796, "grad_norm": 0.03684147074818611, "learning_rate": 3.611508239008109e-06, "loss": 0.0012, "step": 77080 }, { "epoch": 1.2613924568436554, "grad_norm": 0.0037807838525623083, "learning_rate": 3.6101365517837826e-06, "loss": 0.0009, "step": 77090 }, { "epoch": 1.2615560827947312, "grad_norm": 0.018600158393383026, "learning_rate": 3.6087649779119915e-06, "loss": 0.0007, "step": 77100 }, { "epoch": 1.2617197087458072, "grad_norm": 0.13016009330749512, "learning_rate": 3.6073935175045945e-06, "loss": 0.002, "step": 77110 }, { "epoch": 1.261883334696883, "grad_norm": 0.09645784646272659, "learning_rate": 3.6060221706734446e-06, "loss": 0.0014, "step": 77120 }, { "epoch": 1.2620469606479587, "grad_norm": 0.03739799186587334, "learning_rate": 3.604650937530383e-06, "loss": 0.0017, "step": 77130 }, { "epoch": 1.2622105865990347, "grad_norm": 0.03929201886057854, "learning_rate": 3.603279818187244e-06, "loss": 0.0018, "step": 77140 }, { "epoch": 1.2623742125501105, "grad_norm": 0.034221045672893524, "learning_rate": 3.601908812755851e-06, "loss": 0.0013, "step": 77150 }, { "epoch": 1.2625378385011863, "grad_norm": 0.16852647066116333, "learning_rate": 3.6005379213480186e-06, "loss": 0.0015, "step": 77160 }, { "epoch": 1.262701464452262, "grad_norm": 0.027231376618146896, "learning_rate": 3.5991671440755516e-06, "loss": 0.0009, "step": 77170 }, { "epoch": 1.2628650904033378, "grad_norm": 0.03230728209018707, "learning_rate": 3.5977964810502463e-06, "loss": 0.0014, "step": 77180 }, { "epoch": 1.2630287163544138, "grad_norm": 0.04916096478700638, "learning_rate": 3.5964259323838898e-06, "loss": 0.0008, "step": 77190 }, { "epoch": 1.2631923423054896, "grad_norm": 0.08450891822576523, "learning_rate": 3.5950554981882584e-06, "loss": 0.0029, "step": 77200 }, { "epoch": 1.2633559682565654, "grad_norm": 0.10119549930095673, "learning_rate": 3.5936851785751204e-06, "loss": 0.0013, "step": 77210 }, { "epoch": 1.2635195942076414, "grad_norm": 0.03982265666127205, "learning_rate": 3.5923149736562353e-06, "loss": 0.0015, "step": 77220 }, { "epoch": 1.2636832201587171, "grad_norm": 0.05707027018070221, "learning_rate": 3.5909448835433514e-06, "loss": 0.0023, "step": 77230 }, { "epoch": 1.263846846109793, "grad_norm": 0.028453057631850243, "learning_rate": 3.589574908348208e-06, "loss": 0.0015, "step": 77240 }, { "epoch": 1.264010472060869, "grad_norm": 0.05292585864663124, "learning_rate": 3.588205048182537e-06, "loss": 0.0008, "step": 77250 }, { "epoch": 1.2641740980119447, "grad_norm": 0.15501683950424194, "learning_rate": 3.586835303158059e-06, "loss": 0.0024, "step": 77260 }, { "epoch": 1.2643377239630205, "grad_norm": 0.05271110683679581, "learning_rate": 3.5854656733864856e-06, "loss": 0.0015, "step": 77270 }, { "epoch": 1.2645013499140965, "grad_norm": 0.02560976892709732, "learning_rate": 3.5840961589795186e-06, "loss": 0.0006, "step": 77280 }, { "epoch": 1.2646649758651722, "grad_norm": 0.028053337708115578, "learning_rate": 3.5827267600488523e-06, "loss": 0.0008, "step": 77290 }, { "epoch": 1.264828601816248, "grad_norm": 0.17916865646839142, "learning_rate": 3.5813574767061693e-06, "loss": 0.0059, "step": 77300 }, { "epoch": 1.264992227767324, "grad_norm": 0.04611418768763542, "learning_rate": 3.5799883090631436e-06, "loss": 0.001, "step": 77310 }, { "epoch": 1.2651558537183998, "grad_norm": 0.005622072611004114, "learning_rate": 3.5786192572314405e-06, "loss": 0.0012, "step": 77320 }, { "epoch": 1.2653194796694756, "grad_norm": 0.11270934343338013, "learning_rate": 3.577250321322715e-06, "loss": 0.001, "step": 77330 }, { "epoch": 1.2654831056205516, "grad_norm": 0.08596876263618469, "learning_rate": 3.575881501448612e-06, "loss": 0.0009, "step": 77340 }, { "epoch": 1.2656467315716273, "grad_norm": 0.03830437362194061, "learning_rate": 3.5745127977207687e-06, "loss": 0.0009, "step": 77350 }, { "epoch": 1.265810357522703, "grad_norm": 0.05910711735486984, "learning_rate": 3.5731442102508118e-06, "loss": 0.0012, "step": 77360 }, { "epoch": 1.2659739834737789, "grad_norm": 0.047395166009664536, "learning_rate": 3.571775739150359e-06, "loss": 0.0019, "step": 77370 }, { "epoch": 1.2661376094248546, "grad_norm": 0.013198910281062126, "learning_rate": 3.5704073845310183e-06, "loss": 0.0027, "step": 77380 }, { "epoch": 1.2663012353759306, "grad_norm": 0.004581432323902845, "learning_rate": 3.569039146504386e-06, "loss": 0.0015, "step": 77390 }, { "epoch": 1.2664648613270064, "grad_norm": 0.04696310684084892, "learning_rate": 3.5676710251820524e-06, "loss": 0.0013, "step": 77400 }, { "epoch": 1.2666284872780822, "grad_norm": 0.1709856241941452, "learning_rate": 3.5663030206755973e-06, "loss": 0.0017, "step": 77410 }, { "epoch": 1.2667921132291582, "grad_norm": 0.04741336405277252, "learning_rate": 3.56493513309659e-06, "loss": 0.0005, "step": 77420 }, { "epoch": 1.266955739180234, "grad_norm": 0.025012029334902763, "learning_rate": 3.5635673625565905e-06, "loss": 0.0015, "step": 77430 }, { "epoch": 1.2671193651313097, "grad_norm": 0.060076531022787094, "learning_rate": 3.5621997091671493e-06, "loss": 0.0012, "step": 77440 }, { "epoch": 1.2672829910823857, "grad_norm": 0.039286285638809204, "learning_rate": 3.560832173039809e-06, "loss": 0.0015, "step": 77450 }, { "epoch": 1.2674466170334615, "grad_norm": 0.038472436368465424, "learning_rate": 3.5594647542860996e-06, "loss": 0.0009, "step": 77460 }, { "epoch": 1.2676102429845373, "grad_norm": 0.1349521428346634, "learning_rate": 3.558097453017543e-06, "loss": 0.0008, "step": 77470 }, { "epoch": 1.2677738689356133, "grad_norm": 0.05331789329648018, "learning_rate": 3.5567302693456536e-06, "loss": 0.0031, "step": 77480 }, { "epoch": 1.267937494886689, "grad_norm": 0.05710127204656601, "learning_rate": 3.5553632033819326e-06, "loss": 0.0017, "step": 77490 }, { "epoch": 1.2681011208377648, "grad_norm": 0.04289095476269722, "learning_rate": 3.5539962552378734e-06, "loss": 0.0023, "step": 77500 }, { "epoch": 1.2682647467888408, "grad_norm": 0.06787627935409546, "learning_rate": 3.55262942502496e-06, "loss": 0.0012, "step": 77510 }, { "epoch": 1.2684283727399166, "grad_norm": 0.03199947625398636, "learning_rate": 3.551262712854666e-06, "loss": 0.0006, "step": 77520 }, { "epoch": 1.2685919986909924, "grad_norm": 0.09511681646108627, "learning_rate": 3.549896118838457e-06, "loss": 0.004, "step": 77530 }, { "epoch": 1.2687556246420684, "grad_norm": 0.03120916709303856, "learning_rate": 3.5485296430877857e-06, "loss": 0.0012, "step": 77540 }, { "epoch": 1.2689192505931441, "grad_norm": 0.061602529138326645, "learning_rate": 3.547163285714099e-06, "loss": 0.001, "step": 77550 }, { "epoch": 1.26908287654422, "grad_norm": 0.12144287675619125, "learning_rate": 3.545797046828832e-06, "loss": 0.0023, "step": 77560 }, { "epoch": 1.2692465024952957, "grad_norm": 0.10730836540460587, "learning_rate": 3.5444309265434102e-06, "loss": 0.001, "step": 77570 }, { "epoch": 1.2694101284463715, "grad_norm": 0.014221975579857826, "learning_rate": 3.5430649249692495e-06, "loss": 0.0011, "step": 77580 }, { "epoch": 1.2695737543974475, "grad_norm": 0.06656935811042786, "learning_rate": 3.5416990422177565e-06, "loss": 0.0008, "step": 77590 }, { "epoch": 1.2697373803485232, "grad_norm": 0.03711162880063057, "learning_rate": 3.5403332784003285e-06, "loss": 0.0014, "step": 77600 }, { "epoch": 1.269901006299599, "grad_norm": 0.031001949682831764, "learning_rate": 3.5389676336283516e-06, "loss": 0.0007, "step": 77610 }, { "epoch": 1.270064632250675, "grad_norm": 0.01777828484773636, "learning_rate": 3.537602108013204e-06, "loss": 0.0006, "step": 77620 }, { "epoch": 1.2702282582017508, "grad_norm": 0.04528407007455826, "learning_rate": 3.5362367016662526e-06, "loss": 0.0014, "step": 77630 }, { "epoch": 1.2703918841528266, "grad_norm": 0.03722751513123512, "learning_rate": 3.534871414698856e-06, "loss": 0.0006, "step": 77640 }, { "epoch": 1.2705555101039026, "grad_norm": 0.10068801790475845, "learning_rate": 3.5335062472223614e-06, "loss": 0.0011, "step": 77650 }, { "epoch": 1.2707191360549783, "grad_norm": 0.009970646351575851, "learning_rate": 3.5321411993481086e-06, "loss": 0.0012, "step": 77660 }, { "epoch": 1.270882762006054, "grad_norm": 0.024275509640574455, "learning_rate": 3.530776271187425e-06, "loss": 0.0009, "step": 77670 }, { "epoch": 1.27104638795713, "grad_norm": 0.004173334687948227, "learning_rate": 3.5294114628516305e-06, "loss": 0.0006, "step": 77680 }, { "epoch": 1.2712100139082059, "grad_norm": 0.06451655179262161, "learning_rate": 3.528046774452033e-06, "loss": 0.0012, "step": 77690 }, { "epoch": 1.2713736398592816, "grad_norm": 0.07585131376981735, "learning_rate": 3.5266822060999343e-06, "loss": 0.002, "step": 77700 }, { "epoch": 1.2715372658103576, "grad_norm": 0.057093046605587006, "learning_rate": 3.525317757906621e-06, "loss": 0.0006, "step": 77710 }, { "epoch": 1.2717008917614334, "grad_norm": 0.03583693876862526, "learning_rate": 3.523953429983374e-06, "loss": 0.0015, "step": 77720 }, { "epoch": 1.2718645177125092, "grad_norm": 0.0703345388174057, "learning_rate": 3.522589222441463e-06, "loss": 0.0013, "step": 77730 }, { "epoch": 1.272028143663585, "grad_norm": 0.030982453376054764, "learning_rate": 3.521225135392149e-06, "loss": 0.001, "step": 77740 }, { "epoch": 1.272191769614661, "grad_norm": 0.0423283651471138, "learning_rate": 3.519861168946681e-06, "loss": 0.001, "step": 77750 }, { "epoch": 1.2723553955657367, "grad_norm": 0.08313736319541931, "learning_rate": 3.5184973232163013e-06, "loss": 0.0024, "step": 77760 }, { "epoch": 1.2725190215168125, "grad_norm": 0.04455333203077316, "learning_rate": 3.517133598312238e-06, "loss": 0.0006, "step": 77770 }, { "epoch": 1.2726826474678883, "grad_norm": 0.07627793401479721, "learning_rate": 3.5157699943457145e-06, "loss": 0.0007, "step": 77780 }, { "epoch": 1.2728462734189643, "grad_norm": 0.07501964271068573, "learning_rate": 3.5144065114279394e-06, "loss": 0.001, "step": 77790 }, { "epoch": 1.27300989937004, "grad_norm": 0.12820790708065033, "learning_rate": 3.5130431496701155e-06, "loss": 0.0014, "step": 77800 }, { "epoch": 1.2731735253211158, "grad_norm": 0.020283862948417664, "learning_rate": 3.511679909183433e-06, "loss": 0.0017, "step": 77810 }, { "epoch": 1.2733371512721918, "grad_norm": 0.03896934166550636, "learning_rate": 3.5103167900790737e-06, "loss": 0.0008, "step": 77820 }, { "epoch": 1.2735007772232676, "grad_norm": 0.01519716065376997, "learning_rate": 3.5089537924682083e-06, "loss": 0.0008, "step": 77830 }, { "epoch": 1.2736644031743434, "grad_norm": 0.025452304631471634, "learning_rate": 3.507590916461999e-06, "loss": 0.0021, "step": 77840 }, { "epoch": 1.2738280291254194, "grad_norm": 0.03845059499144554, "learning_rate": 3.5062281621715965e-06, "loss": 0.0009, "step": 77850 }, { "epoch": 1.2739916550764951, "grad_norm": 0.029472118243575096, "learning_rate": 3.5048655297081436e-06, "loss": 0.001, "step": 77860 }, { "epoch": 1.274155281027571, "grad_norm": 0.03153502568602562, "learning_rate": 3.5035030191827703e-06, "loss": 0.0007, "step": 77870 }, { "epoch": 1.274318906978647, "grad_norm": 0.04291800409555435, "learning_rate": 3.5021406307065996e-06, "loss": 0.0007, "step": 77880 }, { "epoch": 1.2744825329297227, "grad_norm": 0.0939502865076065, "learning_rate": 3.5007783643907434e-06, "loss": 0.0019, "step": 77890 }, { "epoch": 1.2746461588807985, "grad_norm": 0.02433999441564083, "learning_rate": 3.499416220346302e-06, "loss": 0.0017, "step": 77900 }, { "epoch": 1.2748097848318745, "grad_norm": 0.0496753565967083, "learning_rate": 3.4980541986843684e-06, "loss": 0.001, "step": 77910 }, { "epoch": 1.2749734107829502, "grad_norm": 0.0019044345244765282, "learning_rate": 3.496692299516025e-06, "loss": 0.0008, "step": 77920 }, { "epoch": 1.275137036734026, "grad_norm": 0.02745947055518627, "learning_rate": 3.4953305229523426e-06, "loss": 0.0008, "step": 77930 }, { "epoch": 1.2753006626851018, "grad_norm": 0.030799519270658493, "learning_rate": 3.4939688691043836e-06, "loss": 0.0011, "step": 77940 }, { "epoch": 1.2754642886361778, "grad_norm": 0.13371559977531433, "learning_rate": 3.4926073380831994e-06, "loss": 0.0016, "step": 77950 }, { "epoch": 1.2756279145872536, "grad_norm": 0.02949315495789051, "learning_rate": 3.4912459299998324e-06, "loss": 0.0016, "step": 77960 }, { "epoch": 1.2757915405383293, "grad_norm": 0.03466854616999626, "learning_rate": 3.489884644965314e-06, "loss": 0.0017, "step": 77970 }, { "epoch": 1.275955166489405, "grad_norm": 0.047134753316640854, "learning_rate": 3.4885234830906663e-06, "loss": 0.0007, "step": 77980 }, { "epoch": 1.276118792440481, "grad_norm": 0.08462857455015182, "learning_rate": 3.4871624444869e-06, "loss": 0.0018, "step": 77990 }, { "epoch": 1.2762824183915569, "grad_norm": 0.013326887041330338, "learning_rate": 3.485801529265018e-06, "loss": 0.0008, "step": 78000 }, { "epoch": 1.2764460443426326, "grad_norm": 0.028164181858301163, "learning_rate": 3.484440737536012e-06, "loss": 0.001, "step": 78010 }, { "epoch": 1.2766096702937086, "grad_norm": 0.05022374913096428, "learning_rate": 3.483080069410864e-06, "loss": 0.0038, "step": 78020 }, { "epoch": 1.2767732962447844, "grad_norm": 0.02585049904882908, "learning_rate": 3.481719525000543e-06, "loss": 0.0009, "step": 78030 }, { "epoch": 1.2769369221958602, "grad_norm": 0.029151415452361107, "learning_rate": 3.4803591044160122e-06, "loss": 0.0011, "step": 78040 }, { "epoch": 1.2771005481469362, "grad_norm": 0.04458334669470787, "learning_rate": 3.478998807768223e-06, "loss": 0.0013, "step": 78050 }, { "epoch": 1.277264174098012, "grad_norm": 0.06082809716463089, "learning_rate": 3.4776386351681157e-06, "loss": 0.0009, "step": 78060 }, { "epoch": 1.2774278000490877, "grad_norm": 0.09732098877429962, "learning_rate": 3.476278586726622e-06, "loss": 0.0013, "step": 78070 }, { "epoch": 1.2775914260001637, "grad_norm": 0.08182229101657867, "learning_rate": 3.4749186625546627e-06, "loss": 0.0011, "step": 78080 }, { "epoch": 1.2777550519512395, "grad_norm": 0.007163154426962137, "learning_rate": 3.4735588627631476e-06, "loss": 0.001, "step": 78090 }, { "epoch": 1.2779186779023153, "grad_norm": 0.07749473303556442, "learning_rate": 3.472199187462979e-06, "loss": 0.0012, "step": 78100 }, { "epoch": 1.2780823038533913, "grad_norm": 0.07042301446199417, "learning_rate": 3.4708396367650466e-06, "loss": 0.0016, "step": 78110 }, { "epoch": 1.278245929804467, "grad_norm": 0.06348545849323273, "learning_rate": 3.4694802107802304e-06, "loss": 0.0024, "step": 78120 }, { "epoch": 1.2784095557555428, "grad_norm": 0.04068297892808914, "learning_rate": 3.468120909619402e-06, "loss": 0.0015, "step": 78130 }, { "epoch": 1.2785731817066186, "grad_norm": 0.009029011242091656, "learning_rate": 3.466761733393419e-06, "loss": 0.001, "step": 78140 }, { "epoch": 1.2787368076576944, "grad_norm": 0.05771932378411293, "learning_rate": 3.4654026822131335e-06, "loss": 0.001, "step": 78150 }, { "epoch": 1.2789004336087704, "grad_norm": 0.029097730293869972, "learning_rate": 3.4640437561893836e-06, "loss": 0.0012, "step": 78160 }, { "epoch": 1.2790640595598461, "grad_norm": 0.091275155544281, "learning_rate": 3.462684955432999e-06, "loss": 0.0015, "step": 78170 }, { "epoch": 1.279227685510922, "grad_norm": 0.060457684099674225, "learning_rate": 3.4613262800548e-06, "loss": 0.0008, "step": 78180 }, { "epoch": 1.279391311461998, "grad_norm": 0.07914213091135025, "learning_rate": 3.459967730165593e-06, "loss": 0.0009, "step": 78190 }, { "epoch": 1.2795549374130737, "grad_norm": 0.025470426306128502, "learning_rate": 3.4586093058761793e-06, "loss": 0.0008, "step": 78200 }, { "epoch": 1.2797185633641495, "grad_norm": 0.010108712129294872, "learning_rate": 3.4572510072973464e-06, "loss": 0.0006, "step": 78210 }, { "epoch": 1.2798821893152255, "grad_norm": 0.040509022772312164, "learning_rate": 3.4558928345398724e-06, "loss": 0.0008, "step": 78220 }, { "epoch": 1.2800458152663012, "grad_norm": 0.11198396235704422, "learning_rate": 3.454534787714525e-06, "loss": 0.0015, "step": 78230 }, { "epoch": 1.280209441217377, "grad_norm": 0.021078722551465034, "learning_rate": 3.453176866932062e-06, "loss": 0.0008, "step": 78240 }, { "epoch": 1.280373067168453, "grad_norm": 0.03657149150967598, "learning_rate": 3.4518190723032317e-06, "loss": 0.0012, "step": 78250 }, { "epoch": 1.2805366931195288, "grad_norm": 0.013931353576481342, "learning_rate": 3.4504614039387697e-06, "loss": 0.0007, "step": 78260 }, { "epoch": 1.2807003190706046, "grad_norm": 0.12084774672985077, "learning_rate": 3.4491038619494046e-06, "loss": 0.0012, "step": 78270 }, { "epoch": 1.2808639450216805, "grad_norm": 0.033430326730012894, "learning_rate": 3.447746446445851e-06, "loss": 0.0013, "step": 78280 }, { "epoch": 1.2810275709727563, "grad_norm": 0.05016624927520752, "learning_rate": 3.4463891575388164e-06, "loss": 0.0019, "step": 78290 }, { "epoch": 1.281191196923832, "grad_norm": 0.06589030474424362, "learning_rate": 3.4450319953389967e-06, "loss": 0.0011, "step": 78300 }, { "epoch": 1.281354822874908, "grad_norm": 0.07793193310499191, "learning_rate": 3.443674959957076e-06, "loss": 0.0063, "step": 78310 }, { "epoch": 1.2815184488259839, "grad_norm": 0.02572542615234852, "learning_rate": 3.4423180515037313e-06, "loss": 0.0013, "step": 78320 }, { "epoch": 1.2816820747770596, "grad_norm": 0.08930584043264389, "learning_rate": 3.440961270089626e-06, "loss": 0.0022, "step": 78330 }, { "epoch": 1.2818457007281354, "grad_norm": 0.04198780283331871, "learning_rate": 3.439604615825416e-06, "loss": 0.0012, "step": 78340 }, { "epoch": 1.2820093266792112, "grad_norm": 0.04921184852719307, "learning_rate": 3.438248088821745e-06, "loss": 0.0016, "step": 78350 }, { "epoch": 1.2821729526302872, "grad_norm": 0.04974973574280739, "learning_rate": 3.4368916891892457e-06, "loss": 0.001, "step": 78360 }, { "epoch": 1.282336578581363, "grad_norm": 0.024867065250873566, "learning_rate": 3.4355354170385413e-06, "loss": 0.0016, "step": 78370 }, { "epoch": 1.2825002045324387, "grad_norm": 0.005292154848575592, "learning_rate": 3.434179272480246e-06, "loss": 0.0007, "step": 78380 }, { "epoch": 1.2826638304835147, "grad_norm": 0.01609170250594616, "learning_rate": 3.432823255624962e-06, "loss": 0.0008, "step": 78390 }, { "epoch": 1.2828274564345905, "grad_norm": 0.009458620101213455, "learning_rate": 3.4314673665832814e-06, "loss": 0.0008, "step": 78400 }, { "epoch": 1.2829910823856663, "grad_norm": 0.02687659114599228, "learning_rate": 3.4301116054657856e-06, "loss": 0.0011, "step": 78410 }, { "epoch": 1.2831547083367423, "grad_norm": 0.09582116454839706, "learning_rate": 3.428755972383046e-06, "loss": 0.0007, "step": 78420 }, { "epoch": 1.283318334287818, "grad_norm": 0.05896228179335594, "learning_rate": 3.4274004674456233e-06, "loss": 0.0008, "step": 78430 }, { "epoch": 1.2834819602388938, "grad_norm": 0.013279764913022518, "learning_rate": 3.4260450907640684e-06, "loss": 0.001, "step": 78440 }, { "epoch": 1.2836455861899698, "grad_norm": 0.06440384685993195, "learning_rate": 3.4246898424489205e-06, "loss": 0.0016, "step": 78450 }, { "epoch": 1.2838092121410456, "grad_norm": 0.017549477517604828, "learning_rate": 3.4233347226107093e-06, "loss": 0.0013, "step": 78460 }, { "epoch": 1.2839728380921214, "grad_norm": 0.027202928438782692, "learning_rate": 3.4219797313599546e-06, "loss": 0.0024, "step": 78470 }, { "epoch": 1.2841364640431974, "grad_norm": 0.06153430417180061, "learning_rate": 3.420624868807163e-06, "loss": 0.0015, "step": 78480 }, { "epoch": 1.2843000899942731, "grad_norm": 0.04982449859380722, "learning_rate": 3.4192701350628344e-06, "loss": 0.001, "step": 78490 }, { "epoch": 1.284463715945349, "grad_norm": 0.05699589475989342, "learning_rate": 3.4179155302374557e-06, "loss": 0.0015, "step": 78500 }, { "epoch": 1.2846273418964247, "grad_norm": 0.030946504324674606, "learning_rate": 3.416561054441503e-06, "loss": 0.0008, "step": 78510 }, { "epoch": 1.2847909678475007, "grad_norm": 0.0365414060652256, "learning_rate": 3.4152067077854433e-06, "loss": 0.0026, "step": 78520 }, { "epoch": 1.2849545937985765, "grad_norm": 0.03943750262260437, "learning_rate": 3.413852490379733e-06, "loss": 0.0008, "step": 78530 }, { "epoch": 1.2851182197496522, "grad_norm": 0.03144293278455734, "learning_rate": 3.412498402334817e-06, "loss": 0.0016, "step": 78540 }, { "epoch": 1.285281845700728, "grad_norm": 0.05824761092662811, "learning_rate": 3.41114444376113e-06, "loss": 0.0012, "step": 78550 }, { "epoch": 1.285445471651804, "grad_norm": 0.04617036506533623, "learning_rate": 3.409790614769096e-06, "loss": 0.0008, "step": 78560 }, { "epoch": 1.2856090976028798, "grad_norm": 0.09552693367004395, "learning_rate": 3.40843691546913e-06, "loss": 0.0009, "step": 78570 }, { "epoch": 1.2857727235539556, "grad_norm": 0.05639279633760452, "learning_rate": 3.407083345971634e-06, "loss": 0.0014, "step": 78580 }, { "epoch": 1.2859363495050316, "grad_norm": 0.020176507532596588, "learning_rate": 3.405729906387001e-06, "loss": 0.0011, "step": 78590 }, { "epoch": 1.2860999754561073, "grad_norm": 0.052018191665410995, "learning_rate": 3.4043765968256117e-06, "loss": 0.0014, "step": 78600 }, { "epoch": 1.286263601407183, "grad_norm": 0.019295357167720795, "learning_rate": 3.4030234173978387e-06, "loss": 0.0014, "step": 78610 }, { "epoch": 1.286427227358259, "grad_norm": 0.041312236338853836, "learning_rate": 3.4016703682140427e-06, "loss": 0.0013, "step": 78620 }, { "epoch": 1.2865908533093349, "grad_norm": 0.02839718572795391, "learning_rate": 3.4003174493845737e-06, "loss": 0.0012, "step": 78630 }, { "epoch": 1.2867544792604106, "grad_norm": 0.04943479597568512, "learning_rate": 3.39896466101977e-06, "loss": 0.0016, "step": 78640 }, { "epoch": 1.2869181052114866, "grad_norm": 0.0356144942343235, "learning_rate": 3.397612003229962e-06, "loss": 0.0009, "step": 78650 }, { "epoch": 1.2870817311625624, "grad_norm": 0.03254229575395584, "learning_rate": 3.3962594761254676e-06, "loss": 0.0015, "step": 78660 }, { "epoch": 1.2872453571136382, "grad_norm": 0.06893648952245712, "learning_rate": 3.394907079816594e-06, "loss": 0.0014, "step": 78670 }, { "epoch": 1.2874089830647142, "grad_norm": 0.015596817247569561, "learning_rate": 3.3935548144136377e-06, "loss": 0.0024, "step": 78680 }, { "epoch": 1.28757260901579, "grad_norm": 0.07231036573648453, "learning_rate": 3.3922026800268847e-06, "loss": 0.002, "step": 78690 }, { "epoch": 1.2877362349668657, "grad_norm": 0.07444033771753311, "learning_rate": 3.3908506767666116e-06, "loss": 0.0014, "step": 78700 }, { "epoch": 1.2878998609179415, "grad_norm": 0.06506069004535675, "learning_rate": 3.389498804743083e-06, "loss": 0.0017, "step": 78710 }, { "epoch": 1.2880634868690175, "grad_norm": 0.28216955065727234, "learning_rate": 3.3881470640665516e-06, "loss": 0.0023, "step": 78720 }, { "epoch": 1.2882271128200933, "grad_norm": 0.1464124172925949, "learning_rate": 3.3867954548472614e-06, "loss": 0.0032, "step": 78730 }, { "epoch": 1.288390738771169, "grad_norm": 0.03206486627459526, "learning_rate": 3.3854439771954452e-06, "loss": 0.0015, "step": 78740 }, { "epoch": 1.2885543647222448, "grad_norm": 0.009534304961562157, "learning_rate": 3.384092631221325e-06, "loss": 0.0008, "step": 78750 }, { "epoch": 1.2887179906733208, "grad_norm": 0.03360547870397568, "learning_rate": 3.3827414170351125e-06, "loss": 0.0013, "step": 78760 }, { "epoch": 1.2888816166243966, "grad_norm": 0.01117360033094883, "learning_rate": 3.381390334747008e-06, "loss": 0.0008, "step": 78770 }, { "epoch": 1.2890452425754724, "grad_norm": 0.060954499989748, "learning_rate": 3.3800393844671996e-06, "loss": 0.0011, "step": 78780 }, { "epoch": 1.2892088685265484, "grad_norm": 0.0011138300178572536, "learning_rate": 3.378688566305869e-06, "loss": 0.0012, "step": 78790 }, { "epoch": 1.2893724944776241, "grad_norm": 0.09522448480129242, "learning_rate": 3.3773378803731816e-06, "loss": 0.0017, "step": 78800 }, { "epoch": 1.2895361204287, "grad_norm": 0.07202402502298355, "learning_rate": 3.375987326779297e-06, "loss": 0.0009, "step": 78810 }, { "epoch": 1.289699746379776, "grad_norm": 0.009073911234736443, "learning_rate": 3.3746369056343597e-06, "loss": 0.0015, "step": 78820 }, { "epoch": 1.2898633723308517, "grad_norm": 0.022142861038446426, "learning_rate": 3.3732866170485066e-06, "loss": 0.0018, "step": 78830 }, { "epoch": 1.2900269982819275, "grad_norm": 0.019666818901896477, "learning_rate": 3.371936461131862e-06, "loss": 0.0038, "step": 78840 }, { "epoch": 1.2901906242330035, "grad_norm": 0.105963334441185, "learning_rate": 3.370586437994541e-06, "loss": 0.0009, "step": 78850 }, { "epoch": 1.2903542501840792, "grad_norm": 0.0553797110915184, "learning_rate": 3.369236547746646e-06, "loss": 0.0013, "step": 78860 }, { "epoch": 1.290517876135155, "grad_norm": 0.061036694794893265, "learning_rate": 3.3678867904982702e-06, "loss": 0.0013, "step": 78870 }, { "epoch": 1.290681502086231, "grad_norm": 0.09497766196727753, "learning_rate": 3.366537166359495e-06, "loss": 0.0018, "step": 78880 }, { "epoch": 1.2908451280373068, "grad_norm": 0.09542693942785263, "learning_rate": 3.3651876754403905e-06, "loss": 0.0013, "step": 78890 }, { "epoch": 1.2910087539883826, "grad_norm": 0.08166547864675522, "learning_rate": 3.363838317851017e-06, "loss": 0.0013, "step": 78900 }, { "epoch": 1.2911723799394583, "grad_norm": 0.009052703157067299, "learning_rate": 3.3624890937014243e-06, "loss": 0.0009, "step": 78910 }, { "epoch": 1.2913360058905343, "grad_norm": 0.03741474077105522, "learning_rate": 3.3611400031016493e-06, "loss": 0.001, "step": 78920 }, { "epoch": 1.29149963184161, "grad_norm": 0.12131674587726593, "learning_rate": 3.35979104616172e-06, "loss": 0.0013, "step": 78930 }, { "epoch": 1.2916632577926859, "grad_norm": 0.049281589686870575, "learning_rate": 3.358442222991652e-06, "loss": 0.0015, "step": 78940 }, { "epoch": 1.2918268837437616, "grad_norm": 0.11477705836296082, "learning_rate": 3.3570935337014514e-06, "loss": 0.0016, "step": 78950 }, { "epoch": 1.2919905096948376, "grad_norm": 0.019706519320607185, "learning_rate": 3.355744978401113e-06, "loss": 0.0008, "step": 78960 }, { "epoch": 1.2921541356459134, "grad_norm": 0.0271159578114748, "learning_rate": 3.35439655720062e-06, "loss": 0.0009, "step": 78970 }, { "epoch": 1.2923177615969892, "grad_norm": 0.11278549581766129, "learning_rate": 3.353048270209944e-06, "loss": 0.0019, "step": 78980 }, { "epoch": 1.2924813875480652, "grad_norm": 0.04301435127854347, "learning_rate": 3.3517001175390483e-06, "loss": 0.0013, "step": 78990 }, { "epoch": 1.292645013499141, "grad_norm": 0.03350311517715454, "learning_rate": 3.3503520992978834e-06, "loss": 0.0008, "step": 79000 }, { "epoch": 1.2928086394502167, "grad_norm": 0.10418645292520523, "learning_rate": 3.3490042155963876e-06, "loss": 0.0012, "step": 79010 }, { "epoch": 1.2929722654012927, "grad_norm": 0.07796432822942734, "learning_rate": 3.3476564665444923e-06, "loss": 0.0008, "step": 79020 }, { "epoch": 1.2931358913523685, "grad_norm": 0.1125660315155983, "learning_rate": 3.3463088522521126e-06, "loss": 0.0019, "step": 79030 }, { "epoch": 1.2932995173034443, "grad_norm": 0.05894038453698158, "learning_rate": 3.344961372829157e-06, "loss": 0.0015, "step": 79040 }, { "epoch": 1.2934631432545203, "grad_norm": 0.011720987036824226, "learning_rate": 3.3436140283855212e-06, "loss": 0.0018, "step": 79050 }, { "epoch": 1.293626769205596, "grad_norm": 0.1287417858839035, "learning_rate": 3.3422668190310893e-06, "loss": 0.001, "step": 79060 }, { "epoch": 1.2937903951566718, "grad_norm": 0.12819166481494904, "learning_rate": 3.3409197448757356e-06, "loss": 0.0011, "step": 79070 }, { "epoch": 1.2939540211077478, "grad_norm": 0.1163904070854187, "learning_rate": 3.3395728060293226e-06, "loss": 0.0014, "step": 79080 }, { "epoch": 1.2941176470588236, "grad_norm": 0.06882356107234955, "learning_rate": 3.3382260026017027e-06, "loss": 0.0007, "step": 79090 }, { "epoch": 1.2942812730098994, "grad_norm": 0.04898802936077118, "learning_rate": 3.3368793347027163e-06, "loss": 0.0013, "step": 79100 }, { "epoch": 1.2944448989609751, "grad_norm": 0.0616704523563385, "learning_rate": 3.3355328024421926e-06, "loss": 0.0009, "step": 79110 }, { "epoch": 1.294608524912051, "grad_norm": 0.012394711375236511, "learning_rate": 3.33418640592995e-06, "loss": 0.0016, "step": 79120 }, { "epoch": 1.294772150863127, "grad_norm": 0.05662516877055168, "learning_rate": 3.3328401452757985e-06, "loss": 0.001, "step": 79130 }, { "epoch": 1.2949357768142027, "grad_norm": 0.10913223028182983, "learning_rate": 3.3314940205895312e-06, "loss": 0.0018, "step": 79140 }, { "epoch": 1.2950994027652785, "grad_norm": 0.1399538218975067, "learning_rate": 3.3301480319809353e-06, "loss": 0.0023, "step": 79150 }, { "epoch": 1.2952630287163545, "grad_norm": 0.05488545447587967, "learning_rate": 3.3288021795597835e-06, "loss": 0.0012, "step": 79160 }, { "epoch": 1.2954266546674302, "grad_norm": 0.010304092429578304, "learning_rate": 3.327456463435841e-06, "loss": 0.0007, "step": 79170 }, { "epoch": 1.295590280618506, "grad_norm": 0.04390418156981468, "learning_rate": 3.3261108837188582e-06, "loss": 0.0022, "step": 79180 }, { "epoch": 1.295753906569582, "grad_norm": 0.05978436768054962, "learning_rate": 3.3247654405185776e-06, "loss": 0.0016, "step": 79190 }, { "epoch": 1.2959175325206578, "grad_norm": 0.017376508563756943, "learning_rate": 3.323420133944727e-06, "loss": 0.0011, "step": 79200 }, { "epoch": 1.2960811584717336, "grad_norm": 0.06467296183109283, "learning_rate": 3.322074964107027e-06, "loss": 0.0016, "step": 79210 }, { "epoch": 1.2962447844228095, "grad_norm": 0.06061068922281265, "learning_rate": 3.320729931115183e-06, "loss": 0.0007, "step": 79220 }, { "epoch": 1.2964084103738853, "grad_norm": 0.02549733594059944, "learning_rate": 3.3193850350788935e-06, "loss": 0.0026, "step": 79230 }, { "epoch": 1.296572036324961, "grad_norm": 0.06459949165582657, "learning_rate": 3.3180402761078424e-06, "loss": 0.0011, "step": 79240 }, { "epoch": 1.296735662276037, "grad_norm": 0.039040639996528625, "learning_rate": 3.3166956543117035e-06, "loss": 0.0008, "step": 79250 }, { "epoch": 1.2968992882271129, "grad_norm": 0.12272796034812927, "learning_rate": 3.31535116980014e-06, "loss": 0.0026, "step": 79260 }, { "epoch": 1.2970629141781886, "grad_norm": 0.03812261298298836, "learning_rate": 3.3140068226828038e-06, "loss": 0.0008, "step": 79270 }, { "epoch": 1.2972265401292646, "grad_norm": 0.016286397352814674, "learning_rate": 3.312662613069334e-06, "loss": 0.0016, "step": 79280 }, { "epoch": 1.2973901660803404, "grad_norm": 0.07783732563257217, "learning_rate": 3.3113185410693616e-06, "loss": 0.0007, "step": 79290 }, { "epoch": 1.2975537920314162, "grad_norm": 0.02862142026424408, "learning_rate": 3.309974606792503e-06, "loss": 0.0011, "step": 79300 }, { "epoch": 1.297717417982492, "grad_norm": 0.1427776962518692, "learning_rate": 3.3086308103483657e-06, "loss": 0.0016, "step": 79310 }, { "epoch": 1.2978810439335677, "grad_norm": 0.02684645913541317, "learning_rate": 3.3072871518465456e-06, "loss": 0.002, "step": 79320 }, { "epoch": 1.2980446698846437, "grad_norm": 0.0626491904258728, "learning_rate": 3.3059436313966253e-06, "loss": 0.001, "step": 79330 }, { "epoch": 1.2982082958357195, "grad_norm": 0.05992826074361801, "learning_rate": 3.3046002491081793e-06, "loss": 0.0011, "step": 79340 }, { "epoch": 1.2983719217867953, "grad_norm": 0.028699785470962524, "learning_rate": 3.3032570050907685e-06, "loss": 0.0023, "step": 79350 }, { "epoch": 1.2985355477378713, "grad_norm": 0.04988691955804825, "learning_rate": 3.3019138994539434e-06, "loss": 0.001, "step": 79360 }, { "epoch": 1.298699173688947, "grad_norm": 0.09028506278991699, "learning_rate": 3.3005709323072436e-06, "loss": 0.0013, "step": 79370 }, { "epoch": 1.2988627996400228, "grad_norm": 0.1481347680091858, "learning_rate": 3.299228103760197e-06, "loss": 0.0017, "step": 79380 }, { "epoch": 1.2990264255910988, "grad_norm": 0.07268628478050232, "learning_rate": 3.2978854139223186e-06, "loss": 0.0014, "step": 79390 }, { "epoch": 1.2991900515421746, "grad_norm": 0.034383244812488556, "learning_rate": 3.296542862903116e-06, "loss": 0.0011, "step": 79400 }, { "epoch": 1.2993536774932504, "grad_norm": 0.19446270167827606, "learning_rate": 3.2952004508120805e-06, "loss": 0.0025, "step": 79410 }, { "epoch": 1.2995173034443264, "grad_norm": 0.060037847608327866, "learning_rate": 3.2938581777586977e-06, "loss": 0.0016, "step": 79420 }, { "epoch": 1.2996809293954021, "grad_norm": 0.029453665018081665, "learning_rate": 3.2925160438524354e-06, "loss": 0.0012, "step": 79430 }, { "epoch": 1.299844555346478, "grad_norm": 0.025623057037591934, "learning_rate": 3.291174049202756e-06, "loss": 0.0009, "step": 79440 }, { "epoch": 1.300008181297554, "grad_norm": 0.03462930768728256, "learning_rate": 3.289832193919108e-06, "loss": 0.0006, "step": 79450 }, { "epoch": 1.3001718072486297, "grad_norm": 0.006365691777318716, "learning_rate": 3.2884904781109273e-06, "loss": 0.0011, "step": 79460 }, { "epoch": 1.3003354331997055, "grad_norm": 0.10619057714939117, "learning_rate": 3.2871489018876402e-06, "loss": 0.0014, "step": 79470 }, { "epoch": 1.3004990591507812, "grad_norm": 0.11574172228574753, "learning_rate": 3.2858074653586602e-06, "loss": 0.001, "step": 79480 }, { "epoch": 1.3006626851018572, "grad_norm": 0.09558505564928055, "learning_rate": 3.284466168633392e-06, "loss": 0.0027, "step": 79490 }, { "epoch": 1.300826311052933, "grad_norm": 0.025329547002911568, "learning_rate": 3.2831250118212253e-06, "loss": 0.0016, "step": 79500 }, { "epoch": 1.3009899370040088, "grad_norm": 0.04660091549158096, "learning_rate": 3.2817839950315424e-06, "loss": 0.0008, "step": 79510 }, { "epoch": 1.3011535629550846, "grad_norm": 0.07537785172462463, "learning_rate": 3.280443118373711e-06, "loss": 0.0012, "step": 79520 }, { "epoch": 1.3013171889061605, "grad_norm": 0.09352541714906693, "learning_rate": 3.2791023819570877e-06, "loss": 0.0015, "step": 79530 }, { "epoch": 1.3014808148572363, "grad_norm": 0.021450523287057877, "learning_rate": 3.2777617858910193e-06, "loss": 0.0012, "step": 79540 }, { "epoch": 1.301644440808312, "grad_norm": 0.08590084314346313, "learning_rate": 3.2764213302848404e-06, "loss": 0.0013, "step": 79550 }, { "epoch": 1.301808066759388, "grad_norm": 0.03559460490942001, "learning_rate": 3.275081015247873e-06, "loss": 0.0004, "step": 79560 }, { "epoch": 1.3019716927104639, "grad_norm": 0.03023878112435341, "learning_rate": 3.27374084088943e-06, "loss": 0.0074, "step": 79570 }, { "epoch": 1.3021353186615396, "grad_norm": 0.0804395005106926, "learning_rate": 3.2724008073188104e-06, "loss": 0.0007, "step": 79580 }, { "epoch": 1.3022989446126156, "grad_norm": 0.013239249587059021, "learning_rate": 3.2710609146453034e-06, "loss": 0.0015, "step": 79590 }, { "epoch": 1.3024625705636914, "grad_norm": 0.13531453907489777, "learning_rate": 3.2697211629781855e-06, "loss": 0.0019, "step": 79600 }, { "epoch": 1.3026261965147672, "grad_norm": 0.029673507437109947, "learning_rate": 3.268381552426723e-06, "loss": 0.001, "step": 79610 }, { "epoch": 1.3027898224658432, "grad_norm": 0.028425974771380424, "learning_rate": 3.2670420831001694e-06, "loss": 0.0015, "step": 79620 }, { "epoch": 1.302953448416919, "grad_norm": 0.1271386593580246, "learning_rate": 3.2657027551077674e-06, "loss": 0.0011, "step": 79630 }, { "epoch": 1.3031170743679947, "grad_norm": 0.0557423010468483, "learning_rate": 3.2643635685587483e-06, "loss": 0.0009, "step": 79640 }, { "epoch": 1.3032807003190707, "grad_norm": 0.013881120830774307, "learning_rate": 3.2630245235623313e-06, "loss": 0.0009, "step": 79650 }, { "epoch": 1.3034443262701465, "grad_norm": 0.19356095790863037, "learning_rate": 3.2616856202277248e-06, "loss": 0.0017, "step": 79660 }, { "epoch": 1.3036079522212223, "grad_norm": 0.27862197160720825, "learning_rate": 3.260346858664124e-06, "loss": 0.002, "step": 79670 }, { "epoch": 1.303771578172298, "grad_norm": 0.0032116922084242105, "learning_rate": 3.2590082389807156e-06, "loss": 0.001, "step": 79680 }, { "epoch": 1.303935204123374, "grad_norm": 0.04921858012676239, "learning_rate": 3.2576697612866716e-06, "loss": 0.0008, "step": 79690 }, { "epoch": 1.3040988300744498, "grad_norm": 0.08543482422828674, "learning_rate": 3.256331425691154e-06, "loss": 0.0034, "step": 79700 }, { "epoch": 1.3042624560255256, "grad_norm": 0.05653172358870506, "learning_rate": 3.2549932323033117e-06, "loss": 0.0018, "step": 79710 }, { "epoch": 1.3044260819766014, "grad_norm": 0.024717243388295174, "learning_rate": 3.253655181232286e-06, "loss": 0.0007, "step": 79720 }, { "epoch": 1.3045897079276774, "grad_norm": 0.09563183784484863, "learning_rate": 3.252317272587201e-06, "loss": 0.0014, "step": 79730 }, { "epoch": 1.3047533338787531, "grad_norm": 0.03906206786632538, "learning_rate": 3.2509795064771738e-06, "loss": 0.0012, "step": 79740 }, { "epoch": 1.304916959829829, "grad_norm": 0.06482381373643875, "learning_rate": 3.249641883011307e-06, "loss": 0.0014, "step": 79750 }, { "epoch": 1.305080585780905, "grad_norm": 0.04216672107577324, "learning_rate": 3.248304402298693e-06, "loss": 0.0012, "step": 79760 }, { "epoch": 1.3052442117319807, "grad_norm": 0.045597292482852936, "learning_rate": 3.246967064448413e-06, "loss": 0.0018, "step": 79770 }, { "epoch": 1.3054078376830565, "grad_norm": 0.044614698737859726, "learning_rate": 3.2456298695695345e-06, "loss": 0.0017, "step": 79780 }, { "epoch": 1.3055714636341325, "grad_norm": 0.07309107482433319, "learning_rate": 3.2442928177711148e-06, "loss": 0.0015, "step": 79790 }, { "epoch": 1.3057350895852082, "grad_norm": 0.02369355596601963, "learning_rate": 3.2429559091621985e-06, "loss": 0.0015, "step": 79800 }, { "epoch": 1.305898715536284, "grad_norm": 0.05398254469037056, "learning_rate": 3.2416191438518218e-06, "loss": 0.0008, "step": 79810 }, { "epoch": 1.30606234148736, "grad_norm": 0.07313282787799835, "learning_rate": 3.240282521949004e-06, "loss": 0.0013, "step": 79820 }, { "epoch": 1.3062259674384358, "grad_norm": 0.03335161134600639, "learning_rate": 3.238946043562757e-06, "loss": 0.0009, "step": 79830 }, { "epoch": 1.3063895933895115, "grad_norm": 0.044305142015218735, "learning_rate": 3.23760970880208e-06, "loss": 0.0015, "step": 79840 }, { "epoch": 1.3065532193405875, "grad_norm": 0.018014226108789444, "learning_rate": 3.2362735177759574e-06, "loss": 0.0006, "step": 79850 }, { "epoch": 1.3067168452916633, "grad_norm": 0.18183022737503052, "learning_rate": 3.234937470593367e-06, "loss": 0.001, "step": 79860 }, { "epoch": 1.306880471242739, "grad_norm": 0.02412210963666439, "learning_rate": 3.2336015673632705e-06, "loss": 0.0019, "step": 79870 }, { "epoch": 1.3070440971938149, "grad_norm": 0.0383760929107666, "learning_rate": 3.232265808194621e-06, "loss": 0.0014, "step": 79880 }, { "epoch": 1.3072077231448906, "grad_norm": 0.030720427632331848, "learning_rate": 3.230930193196358e-06, "loss": 0.0022, "step": 79890 }, { "epoch": 1.3073713490959666, "grad_norm": 0.022045457735657692, "learning_rate": 3.2295947224774086e-06, "loss": 0.0012, "step": 79900 }, { "epoch": 1.3075349750470424, "grad_norm": 0.0737752765417099, "learning_rate": 3.2282593961466915e-06, "loss": 0.001, "step": 79910 }, { "epoch": 1.3076986009981182, "grad_norm": 0.11369406431913376, "learning_rate": 3.2269242143131086e-06, "loss": 0.0015, "step": 79920 }, { "epoch": 1.3078622269491942, "grad_norm": 0.1049065962433815, "learning_rate": 3.225589177085555e-06, "loss": 0.0009, "step": 79930 }, { "epoch": 1.30802585290027, "grad_norm": 0.030235277488827705, "learning_rate": 3.2242542845729106e-06, "loss": 0.0011, "step": 79940 }, { "epoch": 1.3081894788513457, "grad_norm": 0.007540002930909395, "learning_rate": 3.222919536884046e-06, "loss": 0.0012, "step": 79950 }, { "epoch": 1.3083531048024217, "grad_norm": 0.06803745776414871, "learning_rate": 3.2215849341278172e-06, "loss": 0.001, "step": 79960 }, { "epoch": 1.3085167307534975, "grad_norm": 0.018689248710870743, "learning_rate": 3.220250476413071e-06, "loss": 0.0008, "step": 79970 }, { "epoch": 1.3086803567045733, "grad_norm": 0.04351019114255905, "learning_rate": 3.21891616384864e-06, "loss": 0.0017, "step": 79980 }, { "epoch": 1.3088439826556493, "grad_norm": 0.03786642476916313, "learning_rate": 3.2175819965433473e-06, "loss": 0.0021, "step": 79990 }, { "epoch": 1.309007608606725, "grad_norm": 0.02765335887670517, "learning_rate": 3.2162479746060025e-06, "loss": 0.0014, "step": 80000 }, { "epoch": 1.3091712345578008, "grad_norm": 0.04041989520192146, "learning_rate": 3.2149140981454047e-06, "loss": 0.0033, "step": 80010 }, { "epoch": 1.3093348605088768, "grad_norm": 0.03959774971008301, "learning_rate": 3.213580367270339e-06, "loss": 0.0014, "step": 80020 }, { "epoch": 1.3094984864599526, "grad_norm": 0.0767705962061882, "learning_rate": 3.212246782089581e-06, "loss": 0.0011, "step": 80030 }, { "epoch": 1.3096621124110284, "grad_norm": 0.04189818352460861, "learning_rate": 3.2109133427118926e-06, "loss": 0.0017, "step": 80040 }, { "epoch": 1.3098257383621044, "grad_norm": 0.04493827000260353, "learning_rate": 3.209580049246025e-06, "loss": 0.0012, "step": 80050 }, { "epoch": 1.3099893643131801, "grad_norm": 0.04886334761977196, "learning_rate": 3.2082469018007174e-06, "loss": 0.0019, "step": 80060 }, { "epoch": 1.310152990264256, "grad_norm": 0.0646354928612709, "learning_rate": 3.206913900484696e-06, "loss": 0.0012, "step": 80070 }, { "epoch": 1.3103166162153317, "grad_norm": 0.13245901465415955, "learning_rate": 3.2055810454066766e-06, "loss": 0.0015, "step": 80080 }, { "epoch": 1.3104802421664075, "grad_norm": 0.07928046584129333, "learning_rate": 3.204248336675362e-06, "loss": 0.0011, "step": 80090 }, { "epoch": 1.3106438681174835, "grad_norm": 0.06279926747083664, "learning_rate": 3.2029157743994443e-06, "loss": 0.0009, "step": 80100 }, { "epoch": 1.3108074940685592, "grad_norm": 0.04123770445585251, "learning_rate": 3.2015833586876006e-06, "loss": 0.001, "step": 80110 }, { "epoch": 1.310971120019635, "grad_norm": 0.1268790364265442, "learning_rate": 3.2002510896484994e-06, "loss": 0.0011, "step": 80120 }, { "epoch": 1.311134745970711, "grad_norm": 0.031708262860774994, "learning_rate": 3.1989189673907963e-06, "loss": 0.0008, "step": 80130 }, { "epoch": 1.3112983719217868, "grad_norm": 0.07571687549352646, "learning_rate": 3.1975869920231345e-06, "loss": 0.0012, "step": 80140 }, { "epoch": 1.3114619978728626, "grad_norm": 0.0022230877075344324, "learning_rate": 3.1962551636541452e-06, "loss": 0.0007, "step": 80150 }, { "epoch": 1.3116256238239385, "grad_norm": 0.024761075153946877, "learning_rate": 3.194923482392448e-06, "loss": 0.0016, "step": 80160 }, { "epoch": 1.3117892497750143, "grad_norm": 0.07486965507268906, "learning_rate": 3.1935919483466497e-06, "loss": 0.0011, "step": 80170 }, { "epoch": 1.31195287572609, "grad_norm": 0.0521760955452919, "learning_rate": 3.1922605616253465e-06, "loss": 0.0009, "step": 80180 }, { "epoch": 1.312116501677166, "grad_norm": 0.009890205226838589, "learning_rate": 3.1909293223371217e-06, "loss": 0.0011, "step": 80190 }, { "epoch": 1.3122801276282419, "grad_norm": 0.027179060503840446, "learning_rate": 3.189598230590546e-06, "loss": 0.0017, "step": 80200 }, { "epoch": 1.3124437535793176, "grad_norm": 0.004196762572973967, "learning_rate": 3.1882672864941787e-06, "loss": 0.0011, "step": 80210 }, { "epoch": 1.3126073795303936, "grad_norm": 0.07651763409376144, "learning_rate": 3.186936490156568e-06, "loss": 0.0014, "step": 80220 }, { "epoch": 1.3127710054814694, "grad_norm": 0.0073889028280973434, "learning_rate": 3.185605841686249e-06, "loss": 0.0015, "step": 80230 }, { "epoch": 1.3129346314325452, "grad_norm": 0.06646845489740372, "learning_rate": 3.1842753411917437e-06, "loss": 0.0011, "step": 80240 }, { "epoch": 1.313098257383621, "grad_norm": 0.05607403442263603, "learning_rate": 3.1829449887815643e-06, "loss": 0.0017, "step": 80250 }, { "epoch": 1.313261883334697, "grad_norm": 0.0464121513068676, "learning_rate": 3.18161478456421e-06, "loss": 0.001, "step": 80260 }, { "epoch": 1.3134255092857727, "grad_norm": 0.022048866376280785, "learning_rate": 3.1802847286481665e-06, "loss": 0.0006, "step": 80270 }, { "epoch": 1.3135891352368485, "grad_norm": 0.05973271280527115, "learning_rate": 3.1789548211419092e-06, "loss": 0.0009, "step": 80280 }, { "epoch": 1.3137527611879243, "grad_norm": 0.0234842449426651, "learning_rate": 3.1776250621539007e-06, "loss": 0.0007, "step": 80290 }, { "epoch": 1.3139163871390003, "grad_norm": 0.06460271030664444, "learning_rate": 3.1762954517925925e-06, "loss": 0.0012, "step": 80300 }, { "epoch": 1.314080013090076, "grad_norm": 0.12546637654304504, "learning_rate": 3.1749659901664227e-06, "loss": 0.0012, "step": 80310 }, { "epoch": 1.3142436390411518, "grad_norm": 0.01690412312746048, "learning_rate": 3.173636677383817e-06, "loss": 0.0012, "step": 80320 }, { "epoch": 1.3144072649922278, "grad_norm": 0.04322877153754234, "learning_rate": 3.17230751355319e-06, "loss": 0.0011, "step": 80330 }, { "epoch": 1.3145708909433036, "grad_norm": 0.0616116039454937, "learning_rate": 3.170978498782944e-06, "loss": 0.0013, "step": 80340 }, { "epoch": 1.3147345168943794, "grad_norm": 0.008478286676108837, "learning_rate": 3.1696496331814684e-06, "loss": 0.0017, "step": 80350 }, { "epoch": 1.3148981428454554, "grad_norm": 0.04688458517193794, "learning_rate": 3.1683209168571417e-06, "loss": 0.0011, "step": 80360 }, { "epoch": 1.3150617687965311, "grad_norm": 0.10005426406860352, "learning_rate": 3.16699234991833e-06, "loss": 0.0017, "step": 80370 }, { "epoch": 1.315225394747607, "grad_norm": 0.028535056859254837, "learning_rate": 3.165663932473384e-06, "loss": 0.0018, "step": 80380 }, { "epoch": 1.315389020698683, "grad_norm": 0.05501551553606987, "learning_rate": 3.164335664630648e-06, "loss": 0.0017, "step": 80390 }, { "epoch": 1.3155526466497587, "grad_norm": 0.029538555070757866, "learning_rate": 3.163007546498449e-06, "loss": 0.0015, "step": 80400 }, { "epoch": 1.3157162726008345, "grad_norm": 0.061508920043706894, "learning_rate": 3.161679578185105e-06, "loss": 0.0021, "step": 80410 }, { "epoch": 1.3158798985519105, "grad_norm": 0.011630282737314701, "learning_rate": 3.160351759798921e-06, "loss": 0.0019, "step": 80420 }, { "epoch": 1.3160435245029862, "grad_norm": 0.06952890753746033, "learning_rate": 3.159024091448187e-06, "loss": 0.0007, "step": 80430 }, { "epoch": 1.316207150454062, "grad_norm": 0.04616463929414749, "learning_rate": 3.157696573241186e-06, "loss": 0.0011, "step": 80440 }, { "epoch": 1.3163707764051378, "grad_norm": 0.021005893126130104, "learning_rate": 3.1563692052861827e-06, "loss": 0.0011, "step": 80450 }, { "epoch": 1.3165344023562138, "grad_norm": 0.04831234738230705, "learning_rate": 3.155041987691435e-06, "loss": 0.0007, "step": 80460 }, { "epoch": 1.3166980283072895, "grad_norm": 0.005501890555024147, "learning_rate": 3.1537149205651864e-06, "loss": 0.0013, "step": 80470 }, { "epoch": 1.3168616542583653, "grad_norm": 0.06707277148962021, "learning_rate": 3.152388004015666e-06, "loss": 0.0011, "step": 80480 }, { "epoch": 1.317025280209441, "grad_norm": 0.061561886221170425, "learning_rate": 3.1510612381510954e-06, "loss": 0.0011, "step": 80490 }, { "epoch": 1.317188906160517, "grad_norm": 0.0730748325586319, "learning_rate": 3.1497346230796783e-06, "loss": 0.0016, "step": 80500 }, { "epoch": 1.3173525321115929, "grad_norm": 0.09601116925477982, "learning_rate": 3.1484081589096104e-06, "loss": 0.0014, "step": 80510 }, { "epoch": 1.3175161580626686, "grad_norm": 0.03992994502186775, "learning_rate": 3.1470818457490736e-06, "loss": 0.0006, "step": 80520 }, { "epoch": 1.3176797840137446, "grad_norm": 0.08607742190361023, "learning_rate": 3.1457556837062373e-06, "loss": 0.001, "step": 80530 }, { "epoch": 1.3178434099648204, "grad_norm": 0.039825137704610825, "learning_rate": 3.1444296728892588e-06, "loss": 0.0008, "step": 80540 }, { "epoch": 1.3180070359158962, "grad_norm": 0.0654398500919342, "learning_rate": 3.143103813406283e-06, "loss": 0.0012, "step": 80550 }, { "epoch": 1.3181706618669722, "grad_norm": 0.07151825726032257, "learning_rate": 3.141778105365443e-06, "loss": 0.0022, "step": 80560 }, { "epoch": 1.318334287818048, "grad_norm": 0.031865235418081284, "learning_rate": 3.1404525488748576e-06, "loss": 0.0008, "step": 80570 }, { "epoch": 1.3184979137691237, "grad_norm": 0.039745211601257324, "learning_rate": 3.139127144042636e-06, "loss": 0.0015, "step": 80580 }, { "epoch": 1.3186615397201997, "grad_norm": 0.049659788608551025, "learning_rate": 3.137801890976873e-06, "loss": 0.0018, "step": 80590 }, { "epoch": 1.3188251656712755, "grad_norm": 0.05730728432536125, "learning_rate": 3.1364767897856522e-06, "loss": 0.0008, "step": 80600 }, { "epoch": 1.3189887916223513, "grad_norm": 0.03901224955916405, "learning_rate": 3.1351518405770444e-06, "loss": 0.0005, "step": 80610 }, { "epoch": 1.3191524175734273, "grad_norm": 0.0531182698905468, "learning_rate": 3.133827043459108e-06, "loss": 0.0014, "step": 80620 }, { "epoch": 1.319316043524503, "grad_norm": 0.009435409680008888, "learning_rate": 3.1325023985398883e-06, "loss": 0.0007, "step": 80630 }, { "epoch": 1.3194796694755788, "grad_norm": 0.10078240185976028, "learning_rate": 3.131177905927419e-06, "loss": 0.0012, "step": 80640 }, { "epoch": 1.3196432954266546, "grad_norm": 0.04385104402899742, "learning_rate": 3.1298535657297213e-06, "loss": 0.001, "step": 80650 }, { "epoch": 1.3198069213777306, "grad_norm": 0.19951170682907104, "learning_rate": 3.128529378054804e-06, "loss": 0.0021, "step": 80660 }, { "epoch": 1.3199705473288064, "grad_norm": 0.03995213657617569, "learning_rate": 3.127205343010664e-06, "loss": 0.0018, "step": 80670 }, { "epoch": 1.3201341732798821, "grad_norm": 0.029720941558480263, "learning_rate": 3.1258814607052833e-06, "loss": 0.0018, "step": 80680 }, { "epoch": 1.320297799230958, "grad_norm": 0.07610039412975311, "learning_rate": 3.124557731246636e-06, "loss": 0.0014, "step": 80690 }, { "epoch": 1.320461425182034, "grad_norm": 0.018221447244286537, "learning_rate": 3.123234154742678e-06, "loss": 0.0016, "step": 80700 }, { "epoch": 1.3206250511331097, "grad_norm": 0.04896111413836479, "learning_rate": 3.1219107313013576e-06, "loss": 0.0009, "step": 80710 }, { "epoch": 1.3207886770841855, "grad_norm": 0.049727123230695724, "learning_rate": 3.1205874610306075e-06, "loss": 0.0012, "step": 80720 }, { "epoch": 1.3209523030352615, "grad_norm": 0.006221754476428032, "learning_rate": 3.119264344038351e-06, "loss": 0.0023, "step": 80730 }, { "epoch": 1.3211159289863372, "grad_norm": 0.08756019920110703, "learning_rate": 3.1179413804324955e-06, "loss": 0.0008, "step": 80740 }, { "epoch": 1.321279554937413, "grad_norm": 0.16489684581756592, "learning_rate": 3.116618570320937e-06, "loss": 0.0019, "step": 80750 }, { "epoch": 1.321443180888489, "grad_norm": 0.13592910766601562, "learning_rate": 3.11529591381156e-06, "loss": 0.003, "step": 80760 }, { "epoch": 1.3216068068395648, "grad_norm": 0.05379120260477066, "learning_rate": 3.1139734110122366e-06, "loss": 0.0006, "step": 80770 }, { "epoch": 1.3217704327906405, "grad_norm": 0.08801086992025375, "learning_rate": 3.112651062030825e-06, "loss": 0.0013, "step": 80780 }, { "epoch": 1.3219340587417165, "grad_norm": 0.08735466748476028, "learning_rate": 3.1113288669751705e-06, "loss": 0.0009, "step": 80790 }, { "epoch": 1.3220976846927923, "grad_norm": 0.06594033539295197, "learning_rate": 3.110006825953109e-06, "loss": 0.001, "step": 80800 }, { "epoch": 1.322261310643868, "grad_norm": 0.04945126920938492, "learning_rate": 3.1086849390724595e-06, "loss": 0.0011, "step": 80810 }, { "epoch": 1.322424936594944, "grad_norm": 0.032571833580732346, "learning_rate": 3.107363206441032e-06, "loss": 0.0015, "step": 80820 }, { "epoch": 1.3225885625460199, "grad_norm": 0.014670531265437603, "learning_rate": 3.1060416281666218e-06, "loss": 0.0011, "step": 80830 }, { "epoch": 1.3227521884970956, "grad_norm": 0.033089883625507355, "learning_rate": 3.104720204357013e-06, "loss": 0.0015, "step": 80840 }, { "epoch": 1.3229158144481714, "grad_norm": 0.014068972319364548, "learning_rate": 3.1033989351199757e-06, "loss": 0.0011, "step": 80850 }, { "epoch": 1.3230794403992472, "grad_norm": 0.09019894152879715, "learning_rate": 3.1020778205632684e-06, "loss": 0.0015, "step": 80860 }, { "epoch": 1.3232430663503232, "grad_norm": 0.0930638238787651, "learning_rate": 3.100756860794637e-06, "loss": 0.0007, "step": 80870 }, { "epoch": 1.323406692301399, "grad_norm": 0.04035981744527817, "learning_rate": 3.099436055921814e-06, "loss": 0.001, "step": 80880 }, { "epoch": 1.3235703182524747, "grad_norm": 0.057395558804273605, "learning_rate": 3.098115406052521e-06, "loss": 0.0013, "step": 80890 }, { "epoch": 1.3237339442035507, "grad_norm": 0.013174457475543022, "learning_rate": 3.0967949112944634e-06, "loss": 0.0009, "step": 80900 }, { "epoch": 1.3238975701546265, "grad_norm": 0.03129865229129791, "learning_rate": 3.095474571755338e-06, "loss": 0.0011, "step": 80910 }, { "epoch": 1.3240611961057023, "grad_norm": 0.13162872195243835, "learning_rate": 3.094154387542827e-06, "loss": 0.0007, "step": 80920 }, { "epoch": 1.3242248220567783, "grad_norm": 0.1469489187002182, "learning_rate": 3.0928343587646e-06, "loss": 0.001, "step": 80930 }, { "epoch": 1.324388448007854, "grad_norm": 0.019256900995969772, "learning_rate": 3.0915144855283134e-06, "loss": 0.0014, "step": 80940 }, { "epoch": 1.3245520739589298, "grad_norm": 0.09133001416921616, "learning_rate": 3.090194767941612e-06, "loss": 0.001, "step": 80950 }, { "epoch": 1.3247156999100058, "grad_norm": 0.08863049000501633, "learning_rate": 3.0888752061121284e-06, "loss": 0.0019, "step": 80960 }, { "epoch": 1.3248793258610816, "grad_norm": 0.04386939853429794, "learning_rate": 3.0875558001474804e-06, "loss": 0.0006, "step": 80970 }, { "epoch": 1.3250429518121574, "grad_norm": 0.055358823388814926, "learning_rate": 3.0862365501552747e-06, "loss": 0.0015, "step": 80980 }, { "epoch": 1.3252065777632334, "grad_norm": 0.07711359113454819, "learning_rate": 3.084917456243105e-06, "loss": 0.0009, "step": 80990 }, { "epoch": 1.3253702037143091, "grad_norm": 0.024266939610242844, "learning_rate": 3.0835985185185514e-06, "loss": 0.0011, "step": 81000 }, { "epoch": 1.325533829665385, "grad_norm": 0.03206505626440048, "learning_rate": 3.0822797370891835e-06, "loss": 0.0012, "step": 81010 }, { "epoch": 1.325697455616461, "grad_norm": 0.05650395154953003, "learning_rate": 3.0809611120625544e-06, "loss": 0.0026, "step": 81020 }, { "epoch": 1.3258610815675367, "grad_norm": 0.03850245475769043, "learning_rate": 3.079642643546208e-06, "loss": 0.0017, "step": 81030 }, { "epoch": 1.3260247075186125, "grad_norm": 0.12974824011325836, "learning_rate": 3.078324331647674e-06, "loss": 0.0031, "step": 81040 }, { "epoch": 1.3261883334696882, "grad_norm": 0.45154961943626404, "learning_rate": 3.07700617647447e-06, "loss": 0.0015, "step": 81050 }, { "epoch": 1.326351959420764, "grad_norm": 0.011137156747281551, "learning_rate": 3.0756881781341007e-06, "loss": 0.0007, "step": 81060 }, { "epoch": 1.32651558537184, "grad_norm": 0.006569497287273407, "learning_rate": 3.074370336734055e-06, "loss": 0.001, "step": 81070 }, { "epoch": 1.3266792113229158, "grad_norm": 0.1845833957195282, "learning_rate": 3.0730526523818133e-06, "loss": 0.0009, "step": 81080 }, { "epoch": 1.3268428372739915, "grad_norm": 0.0885968804359436, "learning_rate": 3.071735125184841e-06, "loss": 0.0018, "step": 81090 }, { "epoch": 1.3270064632250675, "grad_norm": 0.0576612763106823, "learning_rate": 3.0704177552505913e-06, "loss": 0.0024, "step": 81100 }, { "epoch": 1.3271700891761433, "grad_norm": 0.04037720337510109, "learning_rate": 3.069100542686505e-06, "loss": 0.001, "step": 81110 }, { "epoch": 1.327333715127219, "grad_norm": 0.018154360353946686, "learning_rate": 3.0677834876000085e-06, "loss": 0.0006, "step": 81120 }, { "epoch": 1.327497341078295, "grad_norm": 0.13168008625507355, "learning_rate": 3.066466590098517e-06, "loss": 0.001, "step": 81130 }, { "epoch": 1.3276609670293709, "grad_norm": 0.14789900183677673, "learning_rate": 3.065149850289432e-06, "loss": 0.0013, "step": 81140 }, { "epoch": 1.3278245929804466, "grad_norm": 0.05491626262664795, "learning_rate": 3.063833268280142e-06, "loss": 0.0006, "step": 81150 }, { "epoch": 1.3279882189315226, "grad_norm": 0.05457132309675217, "learning_rate": 3.0625168441780235e-06, "loss": 0.0009, "step": 81160 }, { "epoch": 1.3281518448825984, "grad_norm": 0.01319088600575924, "learning_rate": 3.061200578090439e-06, "loss": 0.0016, "step": 81170 }, { "epoch": 1.3283154708336742, "grad_norm": 0.11185579746961594, "learning_rate": 3.0598844701247395e-06, "loss": 0.0013, "step": 81180 }, { "epoch": 1.3284790967847502, "grad_norm": 0.0015857135877013206, "learning_rate": 3.0585685203882617e-06, "loss": 0.0011, "step": 81190 }, { "epoch": 1.328642722735826, "grad_norm": 0.052072227001190186, "learning_rate": 3.057252728988329e-06, "loss": 0.0009, "step": 81200 }, { "epoch": 1.3288063486869017, "grad_norm": 0.015007534995675087, "learning_rate": 3.0559370960322556e-06, "loss": 0.0013, "step": 81210 }, { "epoch": 1.3289699746379775, "grad_norm": 0.0428168885409832, "learning_rate": 3.054621621627337e-06, "loss": 0.001, "step": 81220 }, { "epoch": 1.3291336005890535, "grad_norm": 0.0176068302243948, "learning_rate": 3.053306305880861e-06, "loss": 0.0008, "step": 81230 }, { "epoch": 1.3292972265401293, "grad_norm": 0.03973715379834175, "learning_rate": 3.0519911489000985e-06, "loss": 0.0014, "step": 81240 }, { "epoch": 1.329460852491205, "grad_norm": 0.09750473499298096, "learning_rate": 3.050676150792311e-06, "loss": 0.0013, "step": 81250 }, { "epoch": 1.3296244784422808, "grad_norm": 0.006578003987669945, "learning_rate": 3.0493613116647446e-06, "loss": 0.0015, "step": 81260 }, { "epoch": 1.3297881043933568, "grad_norm": 0.019946426153182983, "learning_rate": 3.0480466316246327e-06, "loss": 0.001, "step": 81270 }, { "epoch": 1.3299517303444326, "grad_norm": 0.2481301724910736, "learning_rate": 3.046732110779197e-06, "loss": 0.0022, "step": 81280 }, { "epoch": 1.3301153562955084, "grad_norm": 0.07669579982757568, "learning_rate": 3.045417749235644e-06, "loss": 0.0026, "step": 81290 }, { "epoch": 1.3302789822465844, "grad_norm": 0.05859427899122238, "learning_rate": 3.0441035471011692e-06, "loss": 0.0012, "step": 81300 }, { "epoch": 1.3304426081976601, "grad_norm": 0.05415242165327072, "learning_rate": 3.042789504482955e-06, "loss": 0.001, "step": 81310 }, { "epoch": 1.330606234148736, "grad_norm": 0.04631884768605232, "learning_rate": 3.0414756214881696e-06, "loss": 0.0012, "step": 81320 }, { "epoch": 1.330769860099812, "grad_norm": 0.011107529513537884, "learning_rate": 3.040161898223969e-06, "loss": 0.0011, "step": 81330 }, { "epoch": 1.3309334860508877, "grad_norm": 0.015022678300738335, "learning_rate": 3.038848334797496e-06, "loss": 0.0009, "step": 81340 }, { "epoch": 1.3310971120019635, "grad_norm": 0.022628238424658775, "learning_rate": 3.037534931315881e-06, "loss": 0.0011, "step": 81350 }, { "epoch": 1.3312607379530395, "grad_norm": 0.0808190256357193, "learning_rate": 3.0362216878862394e-06, "loss": 0.0012, "step": 81360 }, { "epoch": 1.3314243639041152, "grad_norm": 0.04676836356520653, "learning_rate": 3.0349086046156763e-06, "loss": 0.0014, "step": 81370 }, { "epoch": 1.331587989855191, "grad_norm": 0.02933577261865139, "learning_rate": 3.033595681611281e-06, "loss": 0.0009, "step": 81380 }, { "epoch": 1.331751615806267, "grad_norm": 0.07370854169130325, "learning_rate": 3.032282918980133e-06, "loss": 0.0014, "step": 81390 }, { "epoch": 1.3319152417573428, "grad_norm": 0.06115110591053963, "learning_rate": 3.0309703168292937e-06, "loss": 0.0011, "step": 81400 }, { "epoch": 1.3320788677084185, "grad_norm": 0.03955576568841934, "learning_rate": 3.029657875265817e-06, "loss": 0.0015, "step": 81410 }, { "epoch": 1.3322424936594943, "grad_norm": 0.03216688707470894, "learning_rate": 3.0283455943967395e-06, "loss": 0.0009, "step": 81420 }, { "epoch": 1.3324061196105703, "grad_norm": 0.06082973629236221, "learning_rate": 3.0270334743290876e-06, "loss": 0.0007, "step": 81430 }, { "epoch": 1.332569745561646, "grad_norm": 0.03188791498541832, "learning_rate": 3.0257215151698728e-06, "loss": 0.0007, "step": 81440 }, { "epoch": 1.3327333715127219, "grad_norm": 0.0540880486369133, "learning_rate": 3.0244097170260943e-06, "loss": 0.0013, "step": 81450 }, { "epoch": 1.3328969974637976, "grad_norm": 0.01986372098326683, "learning_rate": 3.023098080004737e-06, "loss": 0.0008, "step": 81460 }, { "epoch": 1.3330606234148736, "grad_norm": 0.06811761856079102, "learning_rate": 3.0217866042127752e-06, "loss": 0.0014, "step": 81470 }, { "epoch": 1.3332242493659494, "grad_norm": 0.10229456424713135, "learning_rate": 3.0204752897571676e-06, "loss": 0.0015, "step": 81480 }, { "epoch": 1.3333878753170252, "grad_norm": 0.01309707760810852, "learning_rate": 3.0191641367448598e-06, "loss": 0.0009, "step": 81490 }, { "epoch": 1.3335515012681012, "grad_norm": 0.011736278422176838, "learning_rate": 3.0178531452827863e-06, "loss": 0.0014, "step": 81500 }, { "epoch": 1.333715127219177, "grad_norm": 0.08110702037811279, "learning_rate": 3.016542315477866e-06, "loss": 0.0012, "step": 81510 }, { "epoch": 1.3338787531702527, "grad_norm": 0.03364388272166252, "learning_rate": 3.015231647437006e-06, "loss": 0.0024, "step": 81520 }, { "epoch": 1.3340423791213287, "grad_norm": 0.022201761603355408, "learning_rate": 3.0139211412671e-06, "loss": 0.0015, "step": 81530 }, { "epoch": 1.3342060050724045, "grad_norm": 0.0016203754348680377, "learning_rate": 3.012610797075029e-06, "loss": 0.0013, "step": 81540 }, { "epoch": 1.3343696310234803, "grad_norm": 0.02517550066113472, "learning_rate": 3.0113006149676595e-06, "loss": 0.0013, "step": 81550 }, { "epoch": 1.3345332569745563, "grad_norm": 0.028854593634605408, "learning_rate": 3.009990595051846e-06, "loss": 0.0011, "step": 81560 }, { "epoch": 1.334696882925632, "grad_norm": 0.031180864199995995, "learning_rate": 3.0086807374344284e-06, "loss": 0.0019, "step": 81570 }, { "epoch": 1.3348605088767078, "grad_norm": 0.03380880504846573, "learning_rate": 3.0073710422222347e-06, "loss": 0.0012, "step": 81580 }, { "epoch": 1.3350241348277838, "grad_norm": 0.0834617167711258, "learning_rate": 3.00606150952208e-06, "loss": 0.0008, "step": 81590 }, { "epoch": 1.3351877607788596, "grad_norm": 0.10590919107198715, "learning_rate": 3.0047521394407643e-06, "loss": 0.0018, "step": 81600 }, { "epoch": 1.3353513867299354, "grad_norm": 0.09272348880767822, "learning_rate": 3.0034429320850754e-06, "loss": 0.0017, "step": 81610 }, { "epoch": 1.3355150126810111, "grad_norm": 0.032404445111751556, "learning_rate": 3.0021338875617885e-06, "loss": 0.0011, "step": 81620 }, { "epoch": 1.335678638632087, "grad_norm": 0.18106071650981903, "learning_rate": 3.0008250059776646e-06, "loss": 0.0017, "step": 81630 }, { "epoch": 1.335842264583163, "grad_norm": 0.023514172062277794, "learning_rate": 2.99951628743945e-06, "loss": 0.001, "step": 81640 }, { "epoch": 1.3360058905342387, "grad_norm": 0.05618051812052727, "learning_rate": 2.9982077320538828e-06, "loss": 0.0011, "step": 81650 }, { "epoch": 1.3361695164853145, "grad_norm": 0.04267505183815956, "learning_rate": 2.996899339927681e-06, "loss": 0.0025, "step": 81660 }, { "epoch": 1.3363331424363905, "grad_norm": 0.018733562901616096, "learning_rate": 2.995591111167554e-06, "loss": 0.001, "step": 81670 }, { "epoch": 1.3364967683874662, "grad_norm": 0.0518205501139164, "learning_rate": 2.9942830458801965e-06, "loss": 0.0007, "step": 81680 }, { "epoch": 1.336660394338542, "grad_norm": 0.042825303971767426, "learning_rate": 2.99297514417229e-06, "loss": 0.0018, "step": 81690 }, { "epoch": 1.336824020289618, "grad_norm": 0.026424622163176537, "learning_rate": 2.991667406150502e-06, "loss": 0.0008, "step": 81700 }, { "epoch": 1.3369876462406938, "grad_norm": 0.07906363904476166, "learning_rate": 2.9903598319214887e-06, "loss": 0.0013, "step": 81710 }, { "epoch": 1.3371512721917695, "grad_norm": 0.11585626751184464, "learning_rate": 2.989052421591889e-06, "loss": 0.0015, "step": 81720 }, { "epoch": 1.3373148981428455, "grad_norm": 0.09931634366512299, "learning_rate": 2.987745175268332e-06, "loss": 0.0012, "step": 81730 }, { "epoch": 1.3374785240939213, "grad_norm": 0.01809009164571762, "learning_rate": 2.9864380930574317e-06, "loss": 0.0023, "step": 81740 }, { "epoch": 1.337642150044997, "grad_norm": 0.0542956218123436, "learning_rate": 2.98513117506579e-06, "loss": 0.0012, "step": 81750 }, { "epoch": 1.337805775996073, "grad_norm": 0.07651602476835251, "learning_rate": 2.9838244213999945e-06, "loss": 0.0011, "step": 81760 }, { "epoch": 1.3379694019471489, "grad_norm": 0.03743157535791397, "learning_rate": 2.9825178321666204e-06, "loss": 0.0015, "step": 81770 }, { "epoch": 1.3381330278982246, "grad_norm": 0.006976444739848375, "learning_rate": 2.981211407472227e-06, "loss": 0.0009, "step": 81780 }, { "epoch": 1.3382966538493006, "grad_norm": 0.019666900858283043, "learning_rate": 2.979905147423363e-06, "loss": 0.0008, "step": 81790 }, { "epoch": 1.3384602798003764, "grad_norm": 0.010168484412133694, "learning_rate": 2.9785990521265617e-06, "loss": 0.0009, "step": 81800 }, { "epoch": 1.3386239057514522, "grad_norm": 0.08481744676828384, "learning_rate": 2.977293121688345e-06, "loss": 0.0007, "step": 81810 }, { "epoch": 1.338787531702528, "grad_norm": 0.07608356326818466, "learning_rate": 2.975987356215219e-06, "loss": 0.0021, "step": 81820 }, { "epoch": 1.3389511576536037, "grad_norm": 0.0253651924431324, "learning_rate": 2.974681755813678e-06, "loss": 0.0013, "step": 81830 }, { "epoch": 1.3391147836046797, "grad_norm": 0.05312076583504677, "learning_rate": 2.9733763205902022e-06, "loss": 0.0011, "step": 81840 }, { "epoch": 1.3392784095557555, "grad_norm": 0.03739145025610924, "learning_rate": 2.972071050651259e-06, "loss": 0.0004, "step": 81850 }, { "epoch": 1.3394420355068313, "grad_norm": 0.05163298174738884, "learning_rate": 2.970765946103301e-06, "loss": 0.0007, "step": 81860 }, { "epoch": 1.3396056614579073, "grad_norm": 0.007754177786409855, "learning_rate": 2.9694610070527687e-06, "loss": 0.0013, "step": 81870 }, { "epoch": 1.339769287408983, "grad_norm": 0.13393636047840118, "learning_rate": 2.968156233606088e-06, "loss": 0.001, "step": 81880 }, { "epoch": 1.3399329133600588, "grad_norm": 0.01707519218325615, "learning_rate": 2.9668516258696713e-06, "loss": 0.0015, "step": 81890 }, { "epoch": 1.3400965393111348, "grad_norm": 0.013989202678203583, "learning_rate": 2.9655471839499195e-06, "loss": 0.0006, "step": 81900 }, { "epoch": 1.3402601652622106, "grad_norm": 0.03790288418531418, "learning_rate": 2.964242907953217e-06, "loss": 0.0007, "step": 81910 }, { "epoch": 1.3404237912132864, "grad_norm": 0.0683358907699585, "learning_rate": 2.962938797985937e-06, "loss": 0.002, "step": 81920 }, { "epoch": 1.3405874171643624, "grad_norm": 0.2492551952600479, "learning_rate": 2.961634854154438e-06, "loss": 0.0016, "step": 81930 }, { "epoch": 1.3407510431154381, "grad_norm": 0.04444606229662895, "learning_rate": 2.960331076565065e-06, "loss": 0.0022, "step": 81940 }, { "epoch": 1.340914669066514, "grad_norm": 0.02913873828947544, "learning_rate": 2.9590274653241497e-06, "loss": 0.0015, "step": 81950 }, { "epoch": 1.34107829501759, "grad_norm": 0.037651486694812775, "learning_rate": 2.9577240205380107e-06, "loss": 0.0022, "step": 81960 }, { "epoch": 1.3412419209686657, "grad_norm": 0.17404231429100037, "learning_rate": 2.9564207423129522e-06, "loss": 0.0019, "step": 81970 }, { "epoch": 1.3414055469197415, "grad_norm": 0.08907478302717209, "learning_rate": 2.9551176307552642e-06, "loss": 0.0009, "step": 81980 }, { "epoch": 1.3415691728708175, "grad_norm": 0.006502117030322552, "learning_rate": 2.953814685971226e-06, "loss": 0.0016, "step": 81990 }, { "epoch": 1.3417327988218932, "grad_norm": 0.02640322409570217, "learning_rate": 2.9525119080670995e-06, "loss": 0.0018, "step": 82000 }, { "epoch": 1.341896424772969, "grad_norm": 0.09930183738470078, "learning_rate": 2.951209297149136e-06, "loss": 0.0012, "step": 82010 }, { "epoch": 1.3420600507240448, "grad_norm": 0.12355904281139374, "learning_rate": 2.9499068533235718e-06, "loss": 0.0017, "step": 82020 }, { "epoch": 1.3422236766751205, "grad_norm": 0.06289374828338623, "learning_rate": 2.948604576696631e-06, "loss": 0.0006, "step": 82030 }, { "epoch": 1.3423873026261965, "grad_norm": 0.08590333163738251, "learning_rate": 2.9473024673745197e-06, "loss": 0.002, "step": 82040 }, { "epoch": 1.3425509285772723, "grad_norm": 0.06022851541638374, "learning_rate": 2.9460005254634354e-06, "loss": 0.0018, "step": 82050 }, { "epoch": 1.342714554528348, "grad_norm": 0.011406956240534782, "learning_rate": 2.9446987510695602e-06, "loss": 0.0016, "step": 82060 }, { "epoch": 1.342878180479424, "grad_norm": 0.08705837279558182, "learning_rate": 2.9433971442990618e-06, "loss": 0.0013, "step": 82070 }, { "epoch": 1.3430418064304999, "grad_norm": 0.05391626060009003, "learning_rate": 2.9420957052580958e-06, "loss": 0.0009, "step": 82080 }, { "epoch": 1.3432054323815756, "grad_norm": 0.07751340419054031, "learning_rate": 2.9407944340528017e-06, "loss": 0.0034, "step": 82090 }, { "epoch": 1.3433690583326516, "grad_norm": 0.07951226830482483, "learning_rate": 2.939493330789308e-06, "loss": 0.0011, "step": 82100 }, { "epoch": 1.3435326842837274, "grad_norm": 0.04669647663831711, "learning_rate": 2.9381923955737277e-06, "loss": 0.0012, "step": 82110 }, { "epoch": 1.3436963102348032, "grad_norm": 0.012807745486497879, "learning_rate": 2.9368916285121605e-06, "loss": 0.0014, "step": 82120 }, { "epoch": 1.3438599361858792, "grad_norm": 0.05920378118753433, "learning_rate": 2.935591029710693e-06, "loss": 0.0017, "step": 82130 }, { "epoch": 1.344023562136955, "grad_norm": 0.03783365711569786, "learning_rate": 2.934290599275398e-06, "loss": 0.0007, "step": 82140 }, { "epoch": 1.3441871880880307, "grad_norm": 0.1284850537776947, "learning_rate": 2.9329903373123325e-06, "loss": 0.0023, "step": 82150 }, { "epoch": 1.3443508140391067, "grad_norm": 0.0661517083644867, "learning_rate": 2.931690243927543e-06, "loss": 0.0009, "step": 82160 }, { "epoch": 1.3445144399901825, "grad_norm": 0.06416383385658264, "learning_rate": 2.93039031922706e-06, "loss": 0.0016, "step": 82170 }, { "epoch": 1.3446780659412583, "grad_norm": 0.02821514382958412, "learning_rate": 2.9290905633169015e-06, "loss": 0.0009, "step": 82180 }, { "epoch": 1.344841691892334, "grad_norm": 0.016270047053694725, "learning_rate": 2.9277909763030705e-06, "loss": 0.0021, "step": 82190 }, { "epoch": 1.34500531784341, "grad_norm": 0.0017019853694364429, "learning_rate": 2.9264915582915576e-06, "loss": 0.0013, "step": 82200 }, { "epoch": 1.3451689437944858, "grad_norm": 0.006392058916389942, "learning_rate": 2.9251923093883383e-06, "loss": 0.001, "step": 82210 }, { "epoch": 1.3453325697455616, "grad_norm": 0.11702040582895279, "learning_rate": 2.9238932296993747e-06, "loss": 0.0012, "step": 82220 }, { "epoch": 1.3454961956966374, "grad_norm": 0.1232714056968689, "learning_rate": 2.922594319330616e-06, "loss": 0.0056, "step": 82230 }, { "epoch": 1.3456598216477134, "grad_norm": 0.08905120939016342, "learning_rate": 2.921295578387997e-06, "loss": 0.0014, "step": 82240 }, { "epoch": 1.3458234475987891, "grad_norm": 0.029471050947904587, "learning_rate": 2.919997006977438e-06, "loss": 0.0017, "step": 82250 }, { "epoch": 1.345987073549865, "grad_norm": 0.09401378780603409, "learning_rate": 2.9186986052048473e-06, "loss": 0.0012, "step": 82260 }, { "epoch": 1.346150699500941, "grad_norm": 0.05352969467639923, "learning_rate": 2.9174003731761157e-06, "loss": 0.0012, "step": 82270 }, { "epoch": 1.3463143254520167, "grad_norm": 0.06326586753129959, "learning_rate": 2.9161023109971255e-06, "loss": 0.0011, "step": 82280 }, { "epoch": 1.3464779514030925, "grad_norm": 0.05645281821489334, "learning_rate": 2.914804418773739e-06, "loss": 0.0007, "step": 82290 }, { "epoch": 1.3466415773541685, "grad_norm": 0.010373152792453766, "learning_rate": 2.9135066966118113e-06, "loss": 0.0017, "step": 82300 }, { "epoch": 1.3468052033052442, "grad_norm": 0.021385610103607178, "learning_rate": 2.912209144617177e-06, "loss": 0.0022, "step": 82310 }, { "epoch": 1.34696882925632, "grad_norm": 0.05037151277065277, "learning_rate": 2.9109117628956633e-06, "loss": 0.0015, "step": 82320 }, { "epoch": 1.347132455207396, "grad_norm": 0.05088755488395691, "learning_rate": 2.9096145515530765e-06, "loss": 0.0022, "step": 82330 }, { "epoch": 1.3472960811584718, "grad_norm": 0.060416366904973984, "learning_rate": 2.908317510695217e-06, "loss": 0.0014, "step": 82340 }, { "epoch": 1.3474597071095475, "grad_norm": 0.0354192741215229, "learning_rate": 2.907020640427863e-06, "loss": 0.0015, "step": 82350 }, { "epoch": 1.3476233330606235, "grad_norm": 0.04739399254322052, "learning_rate": 2.905723940856786e-06, "loss": 0.0009, "step": 82360 }, { "epoch": 1.3477869590116993, "grad_norm": 0.0759781152009964, "learning_rate": 2.9044274120877374e-06, "loss": 0.0014, "step": 82370 }, { "epoch": 1.347950584962775, "grad_norm": 0.04068463295698166, "learning_rate": 2.9031310542264614e-06, "loss": 0.001, "step": 82380 }, { "epoch": 1.3481142109138509, "grad_norm": 0.09307326376438141, "learning_rate": 2.9018348673786808e-06, "loss": 0.0021, "step": 82390 }, { "epoch": 1.3482778368649269, "grad_norm": 0.052611157298088074, "learning_rate": 2.900538851650111e-06, "loss": 0.0014, "step": 82400 }, { "epoch": 1.3484414628160026, "grad_norm": 0.037139199674129486, "learning_rate": 2.899243007146448e-06, "loss": 0.0009, "step": 82410 }, { "epoch": 1.3486050887670784, "grad_norm": 0.04838687554001808, "learning_rate": 2.89794733397338e-06, "loss": 0.001, "step": 82420 }, { "epoch": 1.3487687147181542, "grad_norm": 0.1465974897146225, "learning_rate": 2.8966518322365732e-06, "loss": 0.0018, "step": 82430 }, { "epoch": 1.3489323406692302, "grad_norm": 0.021042266860604286, "learning_rate": 2.895356502041689e-06, "loss": 0.0009, "step": 82440 }, { "epoch": 1.349095966620306, "grad_norm": 0.05505353957414627, "learning_rate": 2.894061343494366e-06, "loss": 0.0011, "step": 82450 }, { "epoch": 1.3492595925713817, "grad_norm": 0.15444590151309967, "learning_rate": 2.8927663567002366e-06, "loss": 0.002, "step": 82460 }, { "epoch": 1.3494232185224577, "grad_norm": 0.07559553533792496, "learning_rate": 2.891471541764912e-06, "loss": 0.0011, "step": 82470 }, { "epoch": 1.3495868444735335, "grad_norm": 0.04304005578160286, "learning_rate": 2.890176898793996e-06, "loss": 0.0008, "step": 82480 }, { "epoch": 1.3497504704246093, "grad_norm": 0.028131136670708656, "learning_rate": 2.888882427893073e-06, "loss": 0.0009, "step": 82490 }, { "epoch": 1.3499140963756853, "grad_norm": 0.043105218559503555, "learning_rate": 2.8875881291677177e-06, "loss": 0.0018, "step": 82500 }, { "epoch": 1.350077722326761, "grad_norm": 0.019842512905597687, "learning_rate": 2.8862940027234854e-06, "loss": 0.001, "step": 82510 }, { "epoch": 1.3502413482778368, "grad_norm": 0.05363038182258606, "learning_rate": 2.885000048665925e-06, "loss": 0.0016, "step": 82520 }, { "epoch": 1.3504049742289128, "grad_norm": 0.033374592661857605, "learning_rate": 2.8837062671005657e-06, "loss": 0.0022, "step": 82530 }, { "epoch": 1.3505686001799886, "grad_norm": 0.32421237230300903, "learning_rate": 2.882412658132919e-06, "loss": 0.0027, "step": 82540 }, { "epoch": 1.3507322261310644, "grad_norm": 0.03278248384594917, "learning_rate": 2.8811192218684946e-06, "loss": 0.001, "step": 82550 }, { "epoch": 1.3508958520821404, "grad_norm": 0.06711427867412567, "learning_rate": 2.879825958412776e-06, "loss": 0.0009, "step": 82560 }, { "epoch": 1.3510594780332161, "grad_norm": 0.017694778740406036, "learning_rate": 2.87853286787124e-06, "loss": 0.0015, "step": 82570 }, { "epoch": 1.351223103984292, "grad_norm": 0.023505406454205513, "learning_rate": 2.877239950349343e-06, "loss": 0.0016, "step": 82580 }, { "epoch": 1.3513867299353677, "grad_norm": 0.07301666587591171, "learning_rate": 2.8759472059525363e-06, "loss": 0.0011, "step": 82590 }, { "epoch": 1.3515503558864435, "grad_norm": 0.004277330823242664, "learning_rate": 2.874654634786246e-06, "loss": 0.0014, "step": 82600 }, { "epoch": 1.3517139818375195, "grad_norm": 0.03963760286569595, "learning_rate": 2.8733622369558954e-06, "loss": 0.0011, "step": 82610 }, { "epoch": 1.3518776077885952, "grad_norm": 0.006173059809952974, "learning_rate": 2.8720700125668828e-06, "loss": 0.0021, "step": 82620 }, { "epoch": 1.352041233739671, "grad_norm": 0.025558434426784515, "learning_rate": 2.8707779617246025e-06, "loss": 0.0006, "step": 82630 }, { "epoch": 1.352204859690747, "grad_norm": 0.025679195299744606, "learning_rate": 2.8694860845344254e-06, "loss": 0.0016, "step": 82640 }, { "epoch": 1.3523684856418228, "grad_norm": 0.08831024169921875, "learning_rate": 2.8681943811017153e-06, "loss": 0.0017, "step": 82650 }, { "epoch": 1.3525321115928985, "grad_norm": 0.06250100582838058, "learning_rate": 2.866902851531818e-06, "loss": 0.0013, "step": 82660 }, { "epoch": 1.3526957375439745, "grad_norm": 0.038387566804885864, "learning_rate": 2.8656114959300685e-06, "loss": 0.001, "step": 82670 }, { "epoch": 1.3528593634950503, "grad_norm": 0.10904587805271149, "learning_rate": 2.8643203144017812e-06, "loss": 0.0012, "step": 82680 }, { "epoch": 1.353022989446126, "grad_norm": 0.045857395976781845, "learning_rate": 2.8630293070522654e-06, "loss": 0.0036, "step": 82690 }, { "epoch": 1.353186615397202, "grad_norm": 0.0243227481842041, "learning_rate": 2.861738473986806e-06, "loss": 0.0027, "step": 82700 }, { "epoch": 1.3533502413482779, "grad_norm": 0.10378051549196243, "learning_rate": 2.860447815310684e-06, "loss": 0.0011, "step": 82710 }, { "epoch": 1.3535138672993536, "grad_norm": 0.01445746049284935, "learning_rate": 2.8591573311291564e-06, "loss": 0.0012, "step": 82720 }, { "epoch": 1.3536774932504296, "grad_norm": 0.028539516031742096, "learning_rate": 2.857867021547476e-06, "loss": 0.0023, "step": 82730 }, { "epoch": 1.3538411192015054, "grad_norm": 0.04224391281604767, "learning_rate": 2.8565768866708697e-06, "loss": 0.0011, "step": 82740 }, { "epoch": 1.3540047451525812, "grad_norm": 0.06839609891176224, "learning_rate": 2.8552869266045636e-06, "loss": 0.0016, "step": 82750 }, { "epoch": 1.3541683711036572, "grad_norm": 0.007490002084523439, "learning_rate": 2.8539971414537558e-06, "loss": 0.001, "step": 82760 }, { "epoch": 1.354331997054733, "grad_norm": 0.028109358623623848, "learning_rate": 2.8527075313236417e-06, "loss": 0.0008, "step": 82770 }, { "epoch": 1.3544956230058087, "grad_norm": 0.11394378542900085, "learning_rate": 2.8514180963193944e-06, "loss": 0.0011, "step": 82780 }, { "epoch": 1.3546592489568845, "grad_norm": 0.04138392582535744, "learning_rate": 2.8501288365461787e-06, "loss": 0.0008, "step": 82790 }, { "epoch": 1.3548228749079603, "grad_norm": 0.004836897365748882, "learning_rate": 2.848839752109138e-06, "loss": 0.0007, "step": 82800 }, { "epoch": 1.3549865008590363, "grad_norm": 0.04805408790707588, "learning_rate": 2.8475508431134115e-06, "loss": 0.0023, "step": 82810 }, { "epoch": 1.355150126810112, "grad_norm": 0.038444846868515015, "learning_rate": 2.8462621096641118e-06, "loss": 0.0015, "step": 82820 }, { "epoch": 1.3553137527611878, "grad_norm": 0.01575840823352337, "learning_rate": 2.8449735518663497e-06, "loss": 0.0023, "step": 82830 }, { "epoch": 1.3554773787122638, "grad_norm": 0.044413212686777115, "learning_rate": 2.843685169825212e-06, "loss": 0.0009, "step": 82840 }, { "epoch": 1.3556410046633396, "grad_norm": 0.004706390667706728, "learning_rate": 2.842396963645774e-06, "loss": 0.0011, "step": 82850 }, { "epoch": 1.3558046306144154, "grad_norm": 0.04555555433034897, "learning_rate": 2.8411089334330997e-06, "loss": 0.0011, "step": 82860 }, { "epoch": 1.3559682565654914, "grad_norm": 0.027418551966547966, "learning_rate": 2.8398210792922355e-06, "loss": 0.0008, "step": 82870 }, { "epoch": 1.3561318825165671, "grad_norm": 0.05459018051624298, "learning_rate": 2.838533401328215e-06, "loss": 0.0009, "step": 82880 }, { "epoch": 1.356295508467643, "grad_norm": 0.0671369731426239, "learning_rate": 2.837245899646055e-06, "loss": 0.0012, "step": 82890 }, { "epoch": 1.356459134418719, "grad_norm": 0.06007509306073189, "learning_rate": 2.835958574350762e-06, "loss": 0.0016, "step": 82900 }, { "epoch": 1.3566227603697947, "grad_norm": 0.04670794680714607, "learning_rate": 2.8346714255473245e-06, "loss": 0.002, "step": 82910 }, { "epoch": 1.3567863863208705, "grad_norm": 0.03598122298717499, "learning_rate": 2.8333844533407195e-06, "loss": 0.0011, "step": 82920 }, { "epoch": 1.3569500122719464, "grad_norm": 0.053446996957063675, "learning_rate": 2.8320976578359057e-06, "loss": 0.0011, "step": 82930 }, { "epoch": 1.3571136382230222, "grad_norm": 0.04230347275733948, "learning_rate": 2.830811039137833e-06, "loss": 0.0012, "step": 82940 }, { "epoch": 1.357277264174098, "grad_norm": 0.041250068694353104, "learning_rate": 2.829524597351429e-06, "loss": 0.001, "step": 82950 }, { "epoch": 1.3574408901251738, "grad_norm": 0.04978470876812935, "learning_rate": 2.8282383325816165e-06, "loss": 0.0013, "step": 82960 }, { "epoch": 1.3576045160762498, "grad_norm": 0.15342646837234497, "learning_rate": 2.8269522449332954e-06, "loss": 0.0012, "step": 82970 }, { "epoch": 1.3577681420273255, "grad_norm": 0.22620487213134766, "learning_rate": 2.8256663345113567e-06, "loss": 0.0012, "step": 82980 }, { "epoch": 1.3579317679784013, "grad_norm": 0.06654641777276993, "learning_rate": 2.8243806014206732e-06, "loss": 0.0016, "step": 82990 }, { "epoch": 1.358095393929477, "grad_norm": 0.16387392580509186, "learning_rate": 2.8230950457661064e-06, "loss": 0.003, "step": 83000 }, { "epoch": 1.358259019880553, "grad_norm": 0.026688173413276672, "learning_rate": 2.8218096676525002e-06, "loss": 0.0009, "step": 83010 }, { "epoch": 1.3584226458316289, "grad_norm": 0.028287360444664955, "learning_rate": 2.8205244671846886e-06, "loss": 0.0015, "step": 83020 }, { "epoch": 1.3585862717827046, "grad_norm": 0.08470385521650314, "learning_rate": 2.8192394444674842e-06, "loss": 0.0007, "step": 83030 }, { "epoch": 1.3587498977337806, "grad_norm": 0.007314716000109911, "learning_rate": 2.817954599605693e-06, "loss": 0.001, "step": 83040 }, { "epoch": 1.3589135236848564, "grad_norm": 0.04179174825549126, "learning_rate": 2.816669932704098e-06, "loss": 0.0014, "step": 83050 }, { "epoch": 1.3590771496359322, "grad_norm": 0.04235430434346199, "learning_rate": 2.8153854438674778e-06, "loss": 0.0011, "step": 83060 }, { "epoch": 1.3592407755870082, "grad_norm": 0.04849351570010185, "learning_rate": 2.8141011332005864e-06, "loss": 0.0023, "step": 83070 }, { "epoch": 1.359404401538084, "grad_norm": 0.07125011086463928, "learning_rate": 2.8128170008081705e-06, "loss": 0.0012, "step": 83080 }, { "epoch": 1.3595680274891597, "grad_norm": 0.024520372971892357, "learning_rate": 2.8115330467949577e-06, "loss": 0.001, "step": 83090 }, { "epoch": 1.3597316534402357, "grad_norm": 0.07137442380189896, "learning_rate": 2.810249271265665e-06, "loss": 0.001, "step": 83100 }, { "epoch": 1.3598952793913115, "grad_norm": 0.11436280608177185, "learning_rate": 2.8089656743249895e-06, "loss": 0.0012, "step": 83110 }, { "epoch": 1.3600589053423873, "grad_norm": 0.03243430703878403, "learning_rate": 2.807682256077622e-06, "loss": 0.0016, "step": 83120 }, { "epoch": 1.3602225312934633, "grad_norm": 0.027852749451994896, "learning_rate": 2.806399016628227e-06, "loss": 0.0014, "step": 83130 }, { "epoch": 1.360386157244539, "grad_norm": 0.08546815067529678, "learning_rate": 2.805115956081469e-06, "loss": 0.0017, "step": 83140 }, { "epoch": 1.3605497831956148, "grad_norm": 0.08746512979269028, "learning_rate": 2.803833074541983e-06, "loss": 0.0009, "step": 83150 }, { "epoch": 1.3607134091466906, "grad_norm": 0.0853603258728981, "learning_rate": 2.802550372114401e-06, "loss": 0.0017, "step": 83160 }, { "epoch": 1.3608770350977666, "grad_norm": 0.031326375901699066, "learning_rate": 2.8012678489033352e-06, "loss": 0.0006, "step": 83170 }, { "epoch": 1.3610406610488424, "grad_norm": 0.012908640317618847, "learning_rate": 2.79998550501338e-06, "loss": 0.0012, "step": 83180 }, { "epoch": 1.3612042869999181, "grad_norm": 0.006007511168718338, "learning_rate": 2.798703340549124e-06, "loss": 0.001, "step": 83190 }, { "epoch": 1.361367912950994, "grad_norm": 0.0022917778696864843, "learning_rate": 2.797421355615133e-06, "loss": 0.0011, "step": 83200 }, { "epoch": 1.36153153890207, "grad_norm": 0.027564259245991707, "learning_rate": 2.796139550315964e-06, "loss": 0.002, "step": 83210 }, { "epoch": 1.3616951648531457, "grad_norm": 0.012744871899485588, "learning_rate": 2.7948579247561526e-06, "loss": 0.0017, "step": 83220 }, { "epoch": 1.3618587908042215, "grad_norm": 0.01554274931550026, "learning_rate": 2.7935764790402287e-06, "loss": 0.0007, "step": 83230 }, { "epoch": 1.3620224167552974, "grad_norm": 0.07746699452400208, "learning_rate": 2.7922952132726983e-06, "loss": 0.001, "step": 83240 }, { "epoch": 1.3621860427063732, "grad_norm": 0.10139969736337662, "learning_rate": 2.791014127558061e-06, "loss": 0.0019, "step": 83250 }, { "epoch": 1.362349668657449, "grad_norm": 0.09875639528036118, "learning_rate": 2.789733222000794e-06, "loss": 0.004, "step": 83260 }, { "epoch": 1.362513294608525, "grad_norm": 0.014498264528810978, "learning_rate": 2.7884524967053672e-06, "loss": 0.0014, "step": 83270 }, { "epoch": 1.3626769205596008, "grad_norm": 0.009169657714664936, "learning_rate": 2.7871719517762284e-06, "loss": 0.0011, "step": 83280 }, { "epoch": 1.3628405465106765, "grad_norm": 0.011819208972156048, "learning_rate": 2.785891587317818e-06, "loss": 0.001, "step": 83290 }, { "epoch": 1.3630041724617525, "grad_norm": 0.04689594730734825, "learning_rate": 2.7846114034345562e-06, "loss": 0.0014, "step": 83300 }, { "epoch": 1.3631677984128283, "grad_norm": 0.044827960431575775, "learning_rate": 2.783331400230852e-06, "loss": 0.0015, "step": 83310 }, { "epoch": 1.363331424363904, "grad_norm": 0.07038811594247818, "learning_rate": 2.7820515778110946e-06, "loss": 0.0011, "step": 83320 }, { "epoch": 1.36349505031498, "grad_norm": 0.0676996037364006, "learning_rate": 2.780771936279667e-06, "loss": 0.0021, "step": 83330 }, { "epoch": 1.3636586762660559, "grad_norm": 0.06889384984970093, "learning_rate": 2.7794924757409282e-06, "loss": 0.0015, "step": 83340 }, { "epoch": 1.3638223022171316, "grad_norm": 0.05268380045890808, "learning_rate": 2.778213196299229e-06, "loss": 0.0024, "step": 83350 }, { "epoch": 1.3639859281682074, "grad_norm": 0.0043445006012916565, "learning_rate": 2.776934098058901e-06, "loss": 0.0009, "step": 83360 }, { "epoch": 1.3641495541192832, "grad_norm": 0.09102828800678253, "learning_rate": 2.7756551811242673e-06, "loss": 0.0009, "step": 83370 }, { "epoch": 1.3643131800703592, "grad_norm": 0.03233233466744423, "learning_rate": 2.7743764455996257e-06, "loss": 0.001, "step": 83380 }, { "epoch": 1.364476806021435, "grad_norm": 0.08766317367553711, "learning_rate": 2.773097891589272e-06, "loss": 0.0008, "step": 83390 }, { "epoch": 1.3646404319725107, "grad_norm": 0.015489128418266773, "learning_rate": 2.771819519197475e-06, "loss": 0.001, "step": 83400 }, { "epoch": 1.3648040579235867, "grad_norm": 0.04057205468416214, "learning_rate": 2.7705413285285e-06, "loss": 0.0015, "step": 83410 }, { "epoch": 1.3649676838746625, "grad_norm": 0.02744865044951439, "learning_rate": 2.7692633196865858e-06, "loss": 0.0007, "step": 83420 }, { "epoch": 1.3651313098257383, "grad_norm": 0.0014298424357548356, "learning_rate": 2.7679854927759685e-06, "loss": 0.0014, "step": 83430 }, { "epoch": 1.3652949357768143, "grad_norm": 0.08127010613679886, "learning_rate": 2.766707847900858e-06, "loss": 0.0011, "step": 83440 }, { "epoch": 1.36545856172789, "grad_norm": 0.00600480567663908, "learning_rate": 2.7654303851654597e-06, "loss": 0.001, "step": 83450 }, { "epoch": 1.3656221876789658, "grad_norm": 0.009183033369481564, "learning_rate": 2.764153104673954e-06, "loss": 0.0012, "step": 83460 }, { "epoch": 1.3657858136300418, "grad_norm": 0.1788499504327774, "learning_rate": 2.762876006530516e-06, "loss": 0.0009, "step": 83470 }, { "epoch": 1.3659494395811176, "grad_norm": 0.03937594220042229, "learning_rate": 2.7615990908393e-06, "loss": 0.0014, "step": 83480 }, { "epoch": 1.3661130655321934, "grad_norm": 0.07301946729421616, "learning_rate": 2.7603223577044436e-06, "loss": 0.0011, "step": 83490 }, { "epoch": 1.3662766914832694, "grad_norm": 0.0333046093583107, "learning_rate": 2.7590458072300787e-06, "loss": 0.0009, "step": 83500 }, { "epoch": 1.3664403174343451, "grad_norm": 0.03378937765955925, "learning_rate": 2.757769439520311e-06, "loss": 0.0013, "step": 83510 }, { "epoch": 1.366603943385421, "grad_norm": 0.02556074783205986, "learning_rate": 2.7564932546792413e-06, "loss": 0.0016, "step": 83520 }, { "epoch": 1.366767569336497, "grad_norm": 0.03481189161539078, "learning_rate": 2.7552172528109458e-06, "loss": 0.0014, "step": 83530 }, { "epoch": 1.3669311952875727, "grad_norm": 0.030714863911271095, "learning_rate": 2.7539414340194963e-06, "loss": 0.0017, "step": 83540 }, { "epoch": 1.3670948212386484, "grad_norm": 0.1253112554550171, "learning_rate": 2.75266579840894e-06, "loss": 0.0015, "step": 83550 }, { "epoch": 1.3672584471897242, "grad_norm": 0.07900375872850418, "learning_rate": 2.751390346083317e-06, "loss": 0.002, "step": 83560 }, { "epoch": 1.3674220731408, "grad_norm": 0.048464708030223846, "learning_rate": 2.7501150771466444e-06, "loss": 0.0011, "step": 83570 }, { "epoch": 1.367585699091876, "grad_norm": 0.02625541016459465, "learning_rate": 2.748839991702934e-06, "loss": 0.0011, "step": 83580 }, { "epoch": 1.3677493250429518, "grad_norm": 0.0070399693213403225, "learning_rate": 2.7475650898561725e-06, "loss": 0.0006, "step": 83590 }, { "epoch": 1.3679129509940275, "grad_norm": 0.13729895651340485, "learning_rate": 2.7462903717103407e-06, "loss": 0.0017, "step": 83600 }, { "epoch": 1.3680765769451035, "grad_norm": 0.03516119718551636, "learning_rate": 2.7450158373693963e-06, "loss": 0.001, "step": 83610 }, { "epoch": 1.3682402028961793, "grad_norm": 0.025441166013479233, "learning_rate": 2.7437414869372903e-06, "loss": 0.0006, "step": 83620 }, { "epoch": 1.368403828847255, "grad_norm": 0.01052811462432146, "learning_rate": 2.7424673205179497e-06, "loss": 0.0011, "step": 83630 }, { "epoch": 1.368567454798331, "grad_norm": 0.0376594215631485, "learning_rate": 2.7411933382152966e-06, "loss": 0.002, "step": 83640 }, { "epoch": 1.3687310807494069, "grad_norm": 0.04426310956478119, "learning_rate": 2.739919540133227e-06, "loss": 0.0011, "step": 83650 }, { "epoch": 1.3688947067004826, "grad_norm": 0.04762754216790199, "learning_rate": 2.7386459263756327e-06, "loss": 0.0008, "step": 83660 }, { "epoch": 1.3690583326515586, "grad_norm": 0.13647077977657318, "learning_rate": 2.7373724970463805e-06, "loss": 0.0012, "step": 83670 }, { "epoch": 1.3692219586026344, "grad_norm": 0.017382225021719933, "learning_rate": 2.736099252249331e-06, "loss": 0.0014, "step": 83680 }, { "epoch": 1.3693855845537102, "grad_norm": 0.014112511649727821, "learning_rate": 2.734826192088323e-06, "loss": 0.0012, "step": 83690 }, { "epoch": 1.3695492105047862, "grad_norm": 0.13683795928955078, "learning_rate": 2.733553316667186e-06, "loss": 0.0012, "step": 83700 }, { "epoch": 1.369712836455862, "grad_norm": 0.06252920627593994, "learning_rate": 2.732280626089726e-06, "loss": 0.0008, "step": 83710 }, { "epoch": 1.3698764624069377, "grad_norm": 0.015655100345611572, "learning_rate": 2.731008120459746e-06, "loss": 0.0014, "step": 83720 }, { "epoch": 1.3700400883580137, "grad_norm": 0.021501947194337845, "learning_rate": 2.7297357998810212e-06, "loss": 0.0013, "step": 83730 }, { "epoch": 1.3702037143090895, "grad_norm": 0.04292135313153267, "learning_rate": 2.7284636644573225e-06, "loss": 0.001, "step": 83740 }, { "epoch": 1.3703673402601653, "grad_norm": 0.022099150344729424, "learning_rate": 2.7271917142923976e-06, "loss": 0.0017, "step": 83750 }, { "epoch": 1.370530966211241, "grad_norm": 0.031449537724256516, "learning_rate": 2.7259199494899847e-06, "loss": 0.0009, "step": 83760 }, { "epoch": 1.3706945921623168, "grad_norm": 0.07820586860179901, "learning_rate": 2.724648370153802e-06, "loss": 0.002, "step": 83770 }, { "epoch": 1.3708582181133928, "grad_norm": 0.08235494792461395, "learning_rate": 2.7233769763875595e-06, "loss": 0.0021, "step": 83780 }, { "epoch": 1.3710218440644686, "grad_norm": 0.026190664619207382, "learning_rate": 2.7221057682949427e-06, "loss": 0.0021, "step": 83790 }, { "epoch": 1.3711854700155444, "grad_norm": 0.04493686929345131, "learning_rate": 2.7208347459796304e-06, "loss": 0.0016, "step": 83800 }, { "epoch": 1.3713490959666204, "grad_norm": 0.16385577619075775, "learning_rate": 2.719563909545283e-06, "loss": 0.0033, "step": 83810 }, { "epoch": 1.3715127219176961, "grad_norm": 0.0782640352845192, "learning_rate": 2.718293259095542e-06, "loss": 0.0011, "step": 83820 }, { "epoch": 1.371676347868772, "grad_norm": 0.02138345316052437, "learning_rate": 2.7170227947340416e-06, "loss": 0.001, "step": 83830 }, { "epoch": 1.371839973819848, "grad_norm": 0.04858659952878952, "learning_rate": 2.715752516564393e-06, "loss": 0.0012, "step": 83840 }, { "epoch": 1.3720035997709237, "grad_norm": 0.054707422852516174, "learning_rate": 2.714482424690199e-06, "loss": 0.0007, "step": 83850 }, { "epoch": 1.3721672257219995, "grad_norm": 0.06697214394807816, "learning_rate": 2.713212519215041e-06, "loss": 0.0013, "step": 83860 }, { "epoch": 1.3723308516730754, "grad_norm": 0.021314896643161774, "learning_rate": 2.711942800242492e-06, "loss": 0.0006, "step": 83870 }, { "epoch": 1.3724944776241512, "grad_norm": 0.13214902579784393, "learning_rate": 2.7106732678761004e-06, "loss": 0.0012, "step": 83880 }, { "epoch": 1.372658103575227, "grad_norm": 0.04471101239323616, "learning_rate": 2.7094039222194106e-06, "loss": 0.0017, "step": 83890 }, { "epoch": 1.372821729526303, "grad_norm": 0.05017504096031189, "learning_rate": 2.708134763375942e-06, "loss": 0.001, "step": 83900 }, { "epoch": 1.3729853554773788, "grad_norm": 0.018335238099098206, "learning_rate": 2.7068657914492057e-06, "loss": 0.001, "step": 83910 }, { "epoch": 1.3731489814284545, "grad_norm": 0.05811738595366478, "learning_rate": 2.7055970065426922e-06, "loss": 0.0023, "step": 83920 }, { "epoch": 1.3733126073795303, "grad_norm": 0.03068411909043789, "learning_rate": 2.7043284087598824e-06, "loss": 0.0015, "step": 83930 }, { "epoch": 1.3734762333306063, "grad_norm": 0.031612154096364975, "learning_rate": 2.7030599982042357e-06, "loss": 0.003, "step": 83940 }, { "epoch": 1.373639859281682, "grad_norm": 0.05159648135304451, "learning_rate": 2.7017917749792016e-06, "loss": 0.001, "step": 83950 }, { "epoch": 1.3738034852327579, "grad_norm": 0.03812577575445175, "learning_rate": 2.70052373918821e-06, "loss": 0.0015, "step": 83960 }, { "epoch": 1.3739671111838336, "grad_norm": 0.03210095688700676, "learning_rate": 2.69925589093468e-06, "loss": 0.0011, "step": 83970 }, { "epoch": 1.3741307371349096, "grad_norm": 0.0571649968624115, "learning_rate": 2.69798823032201e-06, "loss": 0.0008, "step": 83980 }, { "epoch": 1.3742943630859854, "grad_norm": 0.10776115208864212, "learning_rate": 2.69672075745359e-06, "loss": 0.0012, "step": 83990 }, { "epoch": 1.3744579890370612, "grad_norm": 0.06793531775474548, "learning_rate": 2.6954534724327865e-06, "loss": 0.0013, "step": 84000 }, { "epoch": 1.3746216149881372, "grad_norm": 0.02788708172738552, "learning_rate": 2.6941863753629593e-06, "loss": 0.0013, "step": 84010 }, { "epoch": 1.374785240939213, "grad_norm": 0.08497747033834457, "learning_rate": 2.6929194663474438e-06, "loss": 0.0006, "step": 84020 }, { "epoch": 1.3749488668902887, "grad_norm": 0.027750637382268906, "learning_rate": 2.691652745489569e-06, "loss": 0.0008, "step": 84030 }, { "epoch": 1.3751124928413647, "grad_norm": 0.07567192614078522, "learning_rate": 2.6903862128926415e-06, "loss": 0.0028, "step": 84040 }, { "epoch": 1.3752761187924405, "grad_norm": 0.004019399639219046, "learning_rate": 2.6891198686599584e-06, "loss": 0.0019, "step": 84050 }, { "epoch": 1.3754397447435163, "grad_norm": 0.04956422001123428, "learning_rate": 2.687853712894794e-06, "loss": 0.0014, "step": 84060 }, { "epoch": 1.3756033706945923, "grad_norm": 0.09158650785684586, "learning_rate": 2.6865877457004164e-06, "loss": 0.001, "step": 84070 }, { "epoch": 1.375766996645668, "grad_norm": 0.12129585444927216, "learning_rate": 2.685321967180069e-06, "loss": 0.0016, "step": 84080 }, { "epoch": 1.3759306225967438, "grad_norm": 0.08762133121490479, "learning_rate": 2.684056377436989e-06, "loss": 0.0013, "step": 84090 }, { "epoch": 1.3760942485478198, "grad_norm": 0.07286391407251358, "learning_rate": 2.68279097657439e-06, "loss": 0.0023, "step": 84100 }, { "epoch": 1.3762578744988956, "grad_norm": 0.05570758134126663, "learning_rate": 2.6815257646954762e-06, "loss": 0.0011, "step": 84110 }, { "epoch": 1.3764215004499714, "grad_norm": 0.039847783744335175, "learning_rate": 2.680260741903431e-06, "loss": 0.001, "step": 84120 }, { "epoch": 1.3765851264010471, "grad_norm": 0.16149753332138062, "learning_rate": 2.6789959083014295e-06, "loss": 0.0018, "step": 84130 }, { "epoch": 1.3767487523521231, "grad_norm": 0.12277252227067947, "learning_rate": 2.677731263992625e-06, "loss": 0.0013, "step": 84140 }, { "epoch": 1.376912378303199, "grad_norm": 0.043828777968883514, "learning_rate": 2.6764668090801555e-06, "loss": 0.0016, "step": 84150 }, { "epoch": 1.3770760042542747, "grad_norm": 0.18554279208183289, "learning_rate": 2.6752025436671497e-06, "loss": 0.0028, "step": 84160 }, { "epoch": 1.3772396302053505, "grad_norm": 0.09850435703992844, "learning_rate": 2.6739384678567127e-06, "loss": 0.0014, "step": 84170 }, { "epoch": 1.3774032561564264, "grad_norm": 0.04618208110332489, "learning_rate": 2.6726745817519428e-06, "loss": 0.0014, "step": 84180 }, { "epoch": 1.3775668821075022, "grad_norm": 0.028582289814949036, "learning_rate": 2.671410885455913e-06, "loss": 0.0007, "step": 84190 }, { "epoch": 1.377730508058578, "grad_norm": 0.06658206135034561, "learning_rate": 2.6701473790716913e-06, "loss": 0.0012, "step": 84200 }, { "epoch": 1.377894134009654, "grad_norm": 0.010035544633865356, "learning_rate": 2.6688840627023203e-06, "loss": 0.0012, "step": 84210 }, { "epoch": 1.3780577599607298, "grad_norm": 0.1793397217988968, "learning_rate": 2.6676209364508365e-06, "loss": 0.0023, "step": 84220 }, { "epoch": 1.3782213859118055, "grad_norm": 0.05178650841116905, "learning_rate": 2.6663580004202517e-06, "loss": 0.0008, "step": 84230 }, { "epoch": 1.3783850118628815, "grad_norm": 0.02752397023141384, "learning_rate": 2.66509525471357e-06, "loss": 0.0007, "step": 84240 }, { "epoch": 1.3785486378139573, "grad_norm": 0.007300386670976877, "learning_rate": 2.6638326994337736e-06, "loss": 0.0005, "step": 84250 }, { "epoch": 1.378712263765033, "grad_norm": 0.09950890392065048, "learning_rate": 2.6625703346838363e-06, "loss": 0.0012, "step": 84260 }, { "epoch": 1.378875889716109, "grad_norm": 0.03508450463414192, "learning_rate": 2.6613081605667068e-06, "loss": 0.0006, "step": 84270 }, { "epoch": 1.3790395156671849, "grad_norm": 0.034476395696401596, "learning_rate": 2.660046177185329e-06, "loss": 0.0011, "step": 84280 }, { "epoch": 1.3792031416182606, "grad_norm": 0.05864248052239418, "learning_rate": 2.658784384642623e-06, "loss": 0.0011, "step": 84290 }, { "epoch": 1.3793667675693366, "grad_norm": 0.05984555557370186, "learning_rate": 2.6575227830414984e-06, "loss": 0.001, "step": 84300 }, { "epoch": 1.3795303935204124, "grad_norm": 0.02164645865559578, "learning_rate": 2.6562613724848437e-06, "loss": 0.0012, "step": 84310 }, { "epoch": 1.3796940194714882, "grad_norm": 0.04665246233344078, "learning_rate": 2.6550001530755393e-06, "loss": 0.0009, "step": 84320 }, { "epoch": 1.379857645422564, "grad_norm": 0.04419153928756714, "learning_rate": 2.6537391249164424e-06, "loss": 0.0009, "step": 84330 }, { "epoch": 1.3800212713736397, "grad_norm": 0.049217589199543, "learning_rate": 2.6524782881104015e-06, "loss": 0.0015, "step": 84340 }, { "epoch": 1.3801848973247157, "grad_norm": 0.07900845259428024, "learning_rate": 2.651217642760242e-06, "loss": 0.002, "step": 84350 }, { "epoch": 1.3803485232757915, "grad_norm": 0.026565400883555412, "learning_rate": 2.6499571889687835e-06, "loss": 0.0009, "step": 84360 }, { "epoch": 1.3805121492268673, "grad_norm": 0.005271803587675095, "learning_rate": 2.6486969268388186e-06, "loss": 0.0008, "step": 84370 }, { "epoch": 1.3806757751779433, "grad_norm": 0.07838542014360428, "learning_rate": 2.647436856473134e-06, "loss": 0.0012, "step": 84380 }, { "epoch": 1.380839401129019, "grad_norm": 0.07470891624689102, "learning_rate": 2.646176977974494e-06, "loss": 0.001, "step": 84390 }, { "epoch": 1.3810030270800948, "grad_norm": 0.01837877556681633, "learning_rate": 2.6449172914456524e-06, "loss": 0.001, "step": 84400 }, { "epoch": 1.3811666530311708, "grad_norm": 0.04809350147843361, "learning_rate": 2.643657796989343e-06, "loss": 0.0013, "step": 84410 }, { "epoch": 1.3813302789822466, "grad_norm": 0.06591973453760147, "learning_rate": 2.6423984947082874e-06, "loss": 0.0021, "step": 84420 }, { "epoch": 1.3814939049333224, "grad_norm": 0.08016178011894226, "learning_rate": 2.641139384705188e-06, "loss": 0.0029, "step": 84430 }, { "epoch": 1.3816575308843984, "grad_norm": 0.14264336228370667, "learning_rate": 2.6398804670827372e-06, "loss": 0.0018, "step": 84440 }, { "epoch": 1.3818211568354741, "grad_norm": 0.03798792511224747, "learning_rate": 2.6386217419436057e-06, "loss": 0.0015, "step": 84450 }, { "epoch": 1.38198478278655, "grad_norm": 0.05401057004928589, "learning_rate": 2.6373632093904477e-06, "loss": 0.0012, "step": 84460 }, { "epoch": 1.382148408737626, "grad_norm": 0.054623693227767944, "learning_rate": 2.636104869525911e-06, "loss": 0.002, "step": 84470 }, { "epoch": 1.3823120346887017, "grad_norm": 0.04498565196990967, "learning_rate": 2.6348467224526163e-06, "loss": 0.0007, "step": 84480 }, { "epoch": 1.3824756606397774, "grad_norm": 0.010171068832278252, "learning_rate": 2.6335887682731774e-06, "loss": 0.001, "step": 84490 }, { "epoch": 1.3826392865908534, "grad_norm": 0.04460660740733147, "learning_rate": 2.6323310070901863e-06, "loss": 0.0008, "step": 84500 }, { "epoch": 1.3828029125419292, "grad_norm": 0.024205192923545837, "learning_rate": 2.6310734390062244e-06, "loss": 0.001, "step": 84510 }, { "epoch": 1.382966538493005, "grad_norm": 0.0299739558249712, "learning_rate": 2.6298160641238507e-06, "loss": 0.0009, "step": 84520 }, { "epoch": 1.3831301644440808, "grad_norm": 0.06419893354177475, "learning_rate": 2.6285588825456165e-06, "loss": 0.0013, "step": 84530 }, { "epoch": 1.3832937903951565, "grad_norm": 0.060932599008083344, "learning_rate": 2.6273018943740503e-06, "loss": 0.0014, "step": 84540 }, { "epoch": 1.3834574163462325, "grad_norm": 0.037657011300325394, "learning_rate": 2.6260450997116705e-06, "loss": 0.0014, "step": 84550 }, { "epoch": 1.3836210422973083, "grad_norm": 0.02954825758934021, "learning_rate": 2.624788498660974e-06, "loss": 0.0013, "step": 84560 }, { "epoch": 1.383784668248384, "grad_norm": 0.1386798918247223, "learning_rate": 2.623532091324448e-06, "loss": 0.0012, "step": 84570 }, { "epoch": 1.38394829419946, "grad_norm": 0.008061363361775875, "learning_rate": 2.622275877804557e-06, "loss": 0.0007, "step": 84580 }, { "epoch": 1.3841119201505359, "grad_norm": 0.008589118719100952, "learning_rate": 2.621019858203758e-06, "loss": 0.0007, "step": 84590 }, { "epoch": 1.3842755461016116, "grad_norm": 0.0730748102068901, "learning_rate": 2.6197640326244834e-06, "loss": 0.0007, "step": 84600 }, { "epoch": 1.3844391720526876, "grad_norm": 0.030864287167787552, "learning_rate": 2.6185084011691574e-06, "loss": 0.001, "step": 84610 }, { "epoch": 1.3846027980037634, "grad_norm": 0.0010963203385472298, "learning_rate": 2.617252963940182e-06, "loss": 0.0013, "step": 84620 }, { "epoch": 1.3847664239548392, "grad_norm": 0.025900447741150856, "learning_rate": 2.61599772103995e-06, "loss": 0.0011, "step": 84630 }, { "epoch": 1.3849300499059152, "grad_norm": 0.04189382120966911, "learning_rate": 2.6147426725708315e-06, "loss": 0.0012, "step": 84640 }, { "epoch": 1.385093675856991, "grad_norm": 0.10469403862953186, "learning_rate": 2.6134878186351863e-06, "loss": 0.002, "step": 84650 }, { "epoch": 1.3852573018080667, "grad_norm": 0.013182587921619415, "learning_rate": 2.6122331593353537e-06, "loss": 0.0009, "step": 84660 }, { "epoch": 1.3854209277591427, "grad_norm": 0.05186162889003754, "learning_rate": 2.6109786947736626e-06, "loss": 0.0007, "step": 84670 }, { "epoch": 1.3855845537102185, "grad_norm": 0.07378797233104706, "learning_rate": 2.609724425052419e-06, "loss": 0.0014, "step": 84680 }, { "epoch": 1.3857481796612943, "grad_norm": 0.027884868904948235, "learning_rate": 2.6084703502739206e-06, "loss": 0.0008, "step": 84690 }, { "epoch": 1.38591180561237, "grad_norm": 0.054000888019800186, "learning_rate": 2.607216470540442e-06, "loss": 0.0033, "step": 84700 }, { "epoch": 1.386075431563446, "grad_norm": 0.07974854111671448, "learning_rate": 2.6059627859542492e-06, "loss": 0.0016, "step": 84710 }, { "epoch": 1.3862390575145218, "grad_norm": 0.12347791343927383, "learning_rate": 2.604709296617584e-06, "loss": 0.0017, "step": 84720 }, { "epoch": 1.3864026834655976, "grad_norm": 0.18000462651252747, "learning_rate": 2.6034560026326815e-06, "loss": 0.001, "step": 84730 }, { "epoch": 1.3865663094166734, "grad_norm": 0.0015927114291116595, "learning_rate": 2.6022029041017517e-06, "loss": 0.0008, "step": 84740 }, { "epoch": 1.3867299353677494, "grad_norm": 0.17558711767196655, "learning_rate": 2.600950001126996e-06, "loss": 0.0013, "step": 84750 }, { "epoch": 1.3868935613188251, "grad_norm": 0.045367199927568436, "learning_rate": 2.5996972938105945e-06, "loss": 0.0009, "step": 84760 }, { "epoch": 1.387057187269901, "grad_norm": 0.01811496540904045, "learning_rate": 2.5984447822547176e-06, "loss": 0.0008, "step": 84770 }, { "epoch": 1.387220813220977, "grad_norm": 0.11393507570028305, "learning_rate": 2.5971924665615133e-06, "loss": 0.0021, "step": 84780 }, { "epoch": 1.3873844391720527, "grad_norm": 0.06782524287700653, "learning_rate": 2.5959403468331145e-06, "loss": 0.0008, "step": 84790 }, { "epoch": 1.3875480651231284, "grad_norm": 0.16156239807605743, "learning_rate": 2.5946884231716435e-06, "loss": 0.0011, "step": 84800 }, { "epoch": 1.3877116910742044, "grad_norm": 0.06091412901878357, "learning_rate": 2.593436695679199e-06, "loss": 0.0007, "step": 84810 }, { "epoch": 1.3878753170252802, "grad_norm": 0.05284532532095909, "learning_rate": 2.592185164457872e-06, "loss": 0.0011, "step": 84820 }, { "epoch": 1.388038942976356, "grad_norm": 0.006466014310717583, "learning_rate": 2.5909338296097285e-06, "loss": 0.0008, "step": 84830 }, { "epoch": 1.388202568927432, "grad_norm": 0.008024314418435097, "learning_rate": 2.5896826912368282e-06, "loss": 0.002, "step": 84840 }, { "epoch": 1.3883661948785078, "grad_norm": 0.019224250689148903, "learning_rate": 2.588431749441205e-06, "loss": 0.0005, "step": 84850 }, { "epoch": 1.3885298208295835, "grad_norm": 0.003075191518291831, "learning_rate": 2.5871810043248847e-06, "loss": 0.0007, "step": 84860 }, { "epoch": 1.3886934467806595, "grad_norm": 0.05049090087413788, "learning_rate": 2.5859304559898712e-06, "loss": 0.0029, "step": 84870 }, { "epoch": 1.3888570727317353, "grad_norm": 0.01819625310599804, "learning_rate": 2.5846801045381576e-06, "loss": 0.0007, "step": 84880 }, { "epoch": 1.389020698682811, "grad_norm": 0.024031417444348335, "learning_rate": 2.583429950071716e-06, "loss": 0.0014, "step": 84890 }, { "epoch": 1.3891843246338869, "grad_norm": 0.05024215951561928, "learning_rate": 2.5821799926925073e-06, "loss": 0.0009, "step": 84900 }, { "epoch": 1.3893479505849629, "grad_norm": 0.06953619420528412, "learning_rate": 2.58093023250247e-06, "loss": 0.0009, "step": 84910 }, { "epoch": 1.3895115765360386, "grad_norm": 0.04103608801960945, "learning_rate": 2.579680669603535e-06, "loss": 0.0007, "step": 84920 }, { "epoch": 1.3896752024871144, "grad_norm": 0.010032951831817627, "learning_rate": 2.5784313040976076e-06, "loss": 0.001, "step": 84930 }, { "epoch": 1.3898388284381902, "grad_norm": 0.01916610635817051, "learning_rate": 2.5771821360865867e-06, "loss": 0.0006, "step": 84940 }, { "epoch": 1.3900024543892662, "grad_norm": 0.03895369544625282, "learning_rate": 2.5759331656723452e-06, "loss": 0.0013, "step": 84950 }, { "epoch": 1.390166080340342, "grad_norm": 0.0250666756182909, "learning_rate": 2.574684392956749e-06, "loss": 0.0009, "step": 84960 }, { "epoch": 1.3903297062914177, "grad_norm": 0.04116955026984215, "learning_rate": 2.573435818041641e-06, "loss": 0.001, "step": 84970 }, { "epoch": 1.3904933322424937, "grad_norm": 0.06238459050655365, "learning_rate": 2.5721874410288527e-06, "loss": 0.001, "step": 84980 }, { "epoch": 1.3906569581935695, "grad_norm": 0.0622844360768795, "learning_rate": 2.5709392620201955e-06, "loss": 0.0012, "step": 84990 }, { "epoch": 1.3908205841446453, "grad_norm": 0.030324935913085938, "learning_rate": 2.569691281117469e-06, "loss": 0.0011, "step": 85000 }, { "epoch": 1.3909842100957213, "grad_norm": 0.06983377784490585, "learning_rate": 2.5684434984224516e-06, "loss": 0.002, "step": 85010 }, { "epoch": 1.391147836046797, "grad_norm": 0.011028064414858818, "learning_rate": 2.567195914036911e-06, "loss": 0.0007, "step": 85020 }, { "epoch": 1.3913114619978728, "grad_norm": 0.012050114572048187, "learning_rate": 2.565948528062593e-06, "loss": 0.0006, "step": 85030 }, { "epoch": 1.3914750879489488, "grad_norm": 0.20281486213207245, "learning_rate": 2.5647013406012333e-06, "loss": 0.002, "step": 85040 }, { "epoch": 1.3916387139000246, "grad_norm": 0.07458246499300003, "learning_rate": 2.563454351754545e-06, "loss": 0.0008, "step": 85050 }, { "epoch": 1.3918023398511004, "grad_norm": 0.02601015754044056, "learning_rate": 2.562207561624232e-06, "loss": 0.0009, "step": 85060 }, { "epoch": 1.3919659658021764, "grad_norm": 0.03659119829535484, "learning_rate": 2.5609609703119743e-06, "loss": 0.0007, "step": 85070 }, { "epoch": 1.3921295917532521, "grad_norm": 0.04428410157561302, "learning_rate": 2.5597145779194433e-06, "loss": 0.0007, "step": 85080 }, { "epoch": 1.392293217704328, "grad_norm": 0.057671818882226944, "learning_rate": 2.5584683845482868e-06, "loss": 0.0013, "step": 85090 }, { "epoch": 1.3924568436554037, "grad_norm": 0.05824888497591019, "learning_rate": 2.5572223903001435e-06, "loss": 0.0008, "step": 85100 }, { "epoch": 1.3926204696064794, "grad_norm": 0.008045400492846966, "learning_rate": 2.5559765952766313e-06, "loss": 0.0011, "step": 85110 }, { "epoch": 1.3927840955575554, "grad_norm": 0.05307982489466667, "learning_rate": 2.5547309995793512e-06, "loss": 0.0007, "step": 85120 }, { "epoch": 1.3929477215086312, "grad_norm": 0.04576743394136429, "learning_rate": 2.553485603309893e-06, "loss": 0.0015, "step": 85130 }, { "epoch": 1.393111347459707, "grad_norm": 0.05727805569767952, "learning_rate": 2.552240406569823e-06, "loss": 0.0007, "step": 85140 }, { "epoch": 1.393274973410783, "grad_norm": 0.057195328176021576, "learning_rate": 2.550995409460699e-06, "loss": 0.0011, "step": 85150 }, { "epoch": 1.3934385993618588, "grad_norm": 0.14627176523208618, "learning_rate": 2.5497506120840563e-06, "loss": 0.0013, "step": 85160 }, { "epoch": 1.3936022253129345, "grad_norm": 0.0019530359422788024, "learning_rate": 2.5485060145414186e-06, "loss": 0.0005, "step": 85170 }, { "epoch": 1.3937658512640105, "grad_norm": 0.05330021306872368, "learning_rate": 2.5472616169342878e-06, "loss": 0.0012, "step": 85180 }, { "epoch": 1.3939294772150863, "grad_norm": 0.004152234178036451, "learning_rate": 2.5460174193641564e-06, "loss": 0.0014, "step": 85190 }, { "epoch": 1.394093103166162, "grad_norm": 0.017515040934085846, "learning_rate": 2.5447734219324937e-06, "loss": 0.0013, "step": 85200 }, { "epoch": 1.394256729117238, "grad_norm": 0.003851355519145727, "learning_rate": 2.543529624740758e-06, "loss": 0.0008, "step": 85210 }, { "epoch": 1.3944203550683139, "grad_norm": 0.056642767041921616, "learning_rate": 2.5422860278903884e-06, "loss": 0.0019, "step": 85220 }, { "epoch": 1.3945839810193896, "grad_norm": 0.07989409565925598, "learning_rate": 2.5410426314828084e-06, "loss": 0.0009, "step": 85230 }, { "epoch": 1.3947476069704656, "grad_norm": 0.0885382816195488, "learning_rate": 2.5397994356194246e-06, "loss": 0.0008, "step": 85240 }, { "epoch": 1.3949112329215414, "grad_norm": 0.020241836085915565, "learning_rate": 2.5385564404016305e-06, "loss": 0.0005, "step": 85250 }, { "epoch": 1.3950748588726172, "grad_norm": 0.056566961109638214, "learning_rate": 2.537313645930796e-06, "loss": 0.0008, "step": 85260 }, { "epoch": 1.3952384848236932, "grad_norm": 0.013217160478234291, "learning_rate": 2.536071052308284e-06, "loss": 0.0007, "step": 85270 }, { "epoch": 1.395402110774769, "grad_norm": 0.006033493205904961, "learning_rate": 2.534828659635432e-06, "loss": 0.001, "step": 85280 }, { "epoch": 1.3955657367258447, "grad_norm": 0.0031652438919991255, "learning_rate": 2.5335864680135693e-06, "loss": 0.0009, "step": 85290 }, { "epoch": 1.3957293626769205, "grad_norm": 0.0006656902260147035, "learning_rate": 2.5323444775440007e-06, "loss": 0.001, "step": 85300 }, { "epoch": 1.3958929886279963, "grad_norm": 0.061206698417663574, "learning_rate": 2.531102688328023e-06, "loss": 0.0009, "step": 85310 }, { "epoch": 1.3960566145790723, "grad_norm": 0.00743982195854187, "learning_rate": 2.5298611004669083e-06, "loss": 0.001, "step": 85320 }, { "epoch": 1.396220240530148, "grad_norm": 0.03856433182954788, "learning_rate": 2.5286197140619193e-06, "loss": 0.001, "step": 85330 }, { "epoch": 1.3963838664812238, "grad_norm": 0.03750927746295929, "learning_rate": 2.5273785292142973e-06, "loss": 0.001, "step": 85340 }, { "epoch": 1.3965474924322998, "grad_norm": 0.009348989464342594, "learning_rate": 2.5261375460252707e-06, "loss": 0.0012, "step": 85350 }, { "epoch": 1.3967111183833756, "grad_norm": 0.018941443413496017, "learning_rate": 2.5248967645960476e-06, "loss": 0.0011, "step": 85360 }, { "epoch": 1.3968747443344514, "grad_norm": 0.0721622183918953, "learning_rate": 2.5236561850278253e-06, "loss": 0.0029, "step": 85370 }, { "epoch": 1.3970383702855274, "grad_norm": 0.04899194464087486, "learning_rate": 2.522415807421777e-06, "loss": 0.001, "step": 85380 }, { "epoch": 1.3972019962366031, "grad_norm": 0.043110739439725876, "learning_rate": 2.521175631879068e-06, "loss": 0.0007, "step": 85390 }, { "epoch": 1.397365622187679, "grad_norm": 0.03885679319500923, "learning_rate": 2.5199356585008393e-06, "loss": 0.0011, "step": 85400 }, { "epoch": 1.397529248138755, "grad_norm": 0.03648418188095093, "learning_rate": 2.5186958873882216e-06, "loss": 0.0005, "step": 85410 }, { "epoch": 1.3976928740898307, "grad_norm": 0.03852356970310211, "learning_rate": 2.517456318642326e-06, "loss": 0.0009, "step": 85420 }, { "epoch": 1.3978565000409064, "grad_norm": 0.032116807997226715, "learning_rate": 2.5162169523642443e-06, "loss": 0.0008, "step": 85430 }, { "epoch": 1.3980201259919824, "grad_norm": 0.01025864202529192, "learning_rate": 2.5149777886550597e-06, "loss": 0.0006, "step": 85440 }, { "epoch": 1.3981837519430582, "grad_norm": 0.018044443801045418, "learning_rate": 2.513738827615829e-06, "loss": 0.0016, "step": 85450 }, { "epoch": 1.398347377894134, "grad_norm": 0.04339112341403961, "learning_rate": 2.512500069347604e-06, "loss": 0.0009, "step": 85460 }, { "epoch": 1.39851100384521, "grad_norm": 0.02578670345246792, "learning_rate": 2.5112615139514073e-06, "loss": 0.0006, "step": 85470 }, { "epoch": 1.3986746297962858, "grad_norm": 0.02312774583697319, "learning_rate": 2.5100231615282556e-06, "loss": 0.001, "step": 85480 }, { "epoch": 1.3988382557473615, "grad_norm": 0.11079282313585281, "learning_rate": 2.5087850121791422e-06, "loss": 0.0018, "step": 85490 }, { "epoch": 1.3990018816984373, "grad_norm": 0.18890371918678284, "learning_rate": 2.5075470660050487e-06, "loss": 0.0014, "step": 85500 }, { "epoch": 1.399165507649513, "grad_norm": 0.0513865128159523, "learning_rate": 2.5063093231069346e-06, "loss": 0.0009, "step": 85510 }, { "epoch": 1.399329133600589, "grad_norm": 0.09298305958509445, "learning_rate": 2.5050717835857497e-06, "loss": 0.0011, "step": 85520 }, { "epoch": 1.3994927595516649, "grad_norm": 0.02303282916545868, "learning_rate": 2.50383444754242e-06, "loss": 0.0011, "step": 85530 }, { "epoch": 1.3996563855027406, "grad_norm": 0.10980135202407837, "learning_rate": 2.502597315077862e-06, "loss": 0.0014, "step": 85540 }, { "epoch": 1.3998200114538166, "grad_norm": 0.020838702097535133, "learning_rate": 2.501360386292968e-06, "loss": 0.0012, "step": 85550 }, { "epoch": 1.3999836374048924, "grad_norm": 0.014282509684562683, "learning_rate": 2.5001236612886216e-06, "loss": 0.0006, "step": 85560 }, { "epoch": 1.4001472633559682, "grad_norm": 0.017213759943842888, "learning_rate": 2.498887140165683e-06, "loss": 0.001, "step": 85570 }, { "epoch": 1.4003108893070442, "grad_norm": 0.060502659529447556, "learning_rate": 2.497650823025001e-06, "loss": 0.0012, "step": 85580 }, { "epoch": 1.40047451525812, "grad_norm": 0.05678383260965347, "learning_rate": 2.496414709967402e-06, "loss": 0.0011, "step": 85590 }, { "epoch": 1.4006381412091957, "grad_norm": 0.06617666035890579, "learning_rate": 2.495178801093703e-06, "loss": 0.0019, "step": 85600 }, { "epoch": 1.4008017671602717, "grad_norm": 0.05336611717939377, "learning_rate": 2.4939430965046974e-06, "loss": 0.0006, "step": 85610 }, { "epoch": 1.4009653931113475, "grad_norm": 0.07783443480730057, "learning_rate": 2.4927075963011684e-06, "loss": 0.0017, "step": 85620 }, { "epoch": 1.4011290190624233, "grad_norm": 0.09148307144641876, "learning_rate": 2.491472300583875e-06, "loss": 0.0009, "step": 85630 }, { "epoch": 1.4012926450134993, "grad_norm": 0.013781259767711163, "learning_rate": 2.490237209453569e-06, "loss": 0.0007, "step": 85640 }, { "epoch": 1.401456270964575, "grad_norm": 0.09493014216423035, "learning_rate": 2.4890023230109744e-06, "loss": 0.0018, "step": 85650 }, { "epoch": 1.4016198969156508, "grad_norm": 0.03304464742541313, "learning_rate": 2.487767641356809e-06, "loss": 0.0007, "step": 85660 }, { "epoch": 1.4017835228667266, "grad_norm": 0.1106976643204689, "learning_rate": 2.4865331645917663e-06, "loss": 0.0011, "step": 85670 }, { "epoch": 1.4019471488178026, "grad_norm": 0.037574999034404755, "learning_rate": 2.4852988928165283e-06, "loss": 0.0009, "step": 85680 }, { "epoch": 1.4021107747688784, "grad_norm": 0.12366770952939987, "learning_rate": 2.4840648261317553e-06, "loss": 0.0033, "step": 85690 }, { "epoch": 1.4022744007199541, "grad_norm": 0.029934987425804138, "learning_rate": 2.4828309646380964e-06, "loss": 0.0009, "step": 85700 }, { "epoch": 1.40243802667103, "grad_norm": 0.04335254430770874, "learning_rate": 2.481597308436179e-06, "loss": 0.0031, "step": 85710 }, { "epoch": 1.402601652622106, "grad_norm": 0.07084373384714127, "learning_rate": 2.480363857626618e-06, "loss": 0.0014, "step": 85720 }, { "epoch": 1.4027652785731817, "grad_norm": 0.006869551260024309, "learning_rate": 2.4791306123100066e-06, "loss": 0.0016, "step": 85730 }, { "epoch": 1.4029289045242574, "grad_norm": 0.033188678324222565, "learning_rate": 2.4778975725869275e-06, "loss": 0.0011, "step": 85740 }, { "epoch": 1.4030925304753334, "grad_norm": 0.05597894638776779, "learning_rate": 2.4766647385579416e-06, "loss": 0.0008, "step": 85750 }, { "epoch": 1.4032561564264092, "grad_norm": 0.08012067526578903, "learning_rate": 2.4754321103235923e-06, "loss": 0.0013, "step": 85760 }, { "epoch": 1.403419782377485, "grad_norm": 0.03605680912733078, "learning_rate": 2.4741996879844134e-06, "loss": 0.0007, "step": 85770 }, { "epoch": 1.403583408328561, "grad_norm": 0.032011374831199646, "learning_rate": 2.4729674716409114e-06, "loss": 0.0018, "step": 85780 }, { "epoch": 1.4037470342796368, "grad_norm": 0.05075502023100853, "learning_rate": 2.471735461393587e-06, "loss": 0.0013, "step": 85790 }, { "epoch": 1.4039106602307125, "grad_norm": 0.025442451238632202, "learning_rate": 2.4705036573429153e-06, "loss": 0.0008, "step": 85800 }, { "epoch": 1.4040742861817885, "grad_norm": 0.01608380116522312, "learning_rate": 2.4692720595893606e-06, "loss": 0.0023, "step": 85810 }, { "epoch": 1.4042379121328643, "grad_norm": 0.05921518802642822, "learning_rate": 2.468040668233364e-06, "loss": 0.0012, "step": 85820 }, { "epoch": 1.40440153808394, "grad_norm": 0.012568363919854164, "learning_rate": 2.4668094833753584e-06, "loss": 0.0007, "step": 85830 }, { "epoch": 1.404565164035016, "grad_norm": 0.0022954826708883047, "learning_rate": 2.465578505115751e-06, "loss": 0.0011, "step": 85840 }, { "epoch": 1.4047287899860919, "grad_norm": 0.1080038920044899, "learning_rate": 2.4643477335549397e-06, "loss": 0.0015, "step": 85850 }, { "epoch": 1.4048924159371676, "grad_norm": 0.011237172409892082, "learning_rate": 2.4631171687932982e-06, "loss": 0.001, "step": 85860 }, { "epoch": 1.4050560418882434, "grad_norm": 0.10426940768957138, "learning_rate": 2.461886810931191e-06, "loss": 0.0017, "step": 85870 }, { "epoch": 1.4052196678393194, "grad_norm": 0.0911511778831482, "learning_rate": 2.4606566600689586e-06, "loss": 0.0015, "step": 85880 }, { "epoch": 1.4053832937903952, "grad_norm": 0.0014476514188572764, "learning_rate": 2.459426716306931e-06, "loss": 0.0007, "step": 85890 }, { "epoch": 1.405546919741471, "grad_norm": 0.0435417965054512, "learning_rate": 2.4581969797454146e-06, "loss": 0.0014, "step": 85900 }, { "epoch": 1.4057105456925467, "grad_norm": 0.05716950446367264, "learning_rate": 2.456967450484707e-06, "loss": 0.0009, "step": 85910 }, { "epoch": 1.4058741716436227, "grad_norm": 0.007711363025009632, "learning_rate": 2.4557381286250793e-06, "loss": 0.0008, "step": 85920 }, { "epoch": 1.4060377975946985, "grad_norm": 0.054224394261837006, "learning_rate": 2.454509014266796e-06, "loss": 0.0009, "step": 85930 }, { "epoch": 1.4062014235457743, "grad_norm": 0.01425554696470499, "learning_rate": 2.453280107510095e-06, "loss": 0.0011, "step": 85940 }, { "epoch": 1.4063650494968503, "grad_norm": 0.05185844376683235, "learning_rate": 2.4520514084552054e-06, "loss": 0.001, "step": 85950 }, { "epoch": 1.406528675447926, "grad_norm": 0.040036190301179886, "learning_rate": 2.4508229172023324e-06, "loss": 0.0018, "step": 85960 }, { "epoch": 1.4066923013990018, "grad_norm": 0.026100879535079002, "learning_rate": 2.4495946338516706e-06, "loss": 0.0014, "step": 85970 }, { "epoch": 1.4068559273500778, "grad_norm": 0.04681260883808136, "learning_rate": 2.4483665585033923e-06, "loss": 0.0031, "step": 85980 }, { "epoch": 1.4070195533011536, "grad_norm": 0.07585979998111725, "learning_rate": 2.4471386912576574e-06, "loss": 0.0007, "step": 85990 }, { "epoch": 1.4071831792522294, "grad_norm": 0.09745154529809952, "learning_rate": 2.4459110322146034e-06, "loss": 0.0017, "step": 86000 }, { "epoch": 1.4073468052033054, "grad_norm": 0.06328898668289185, "learning_rate": 2.4446835814743576e-06, "loss": 0.0016, "step": 86010 }, { "epoch": 1.4075104311543811, "grad_norm": 0.03818634897470474, "learning_rate": 2.443456339137023e-06, "loss": 0.0009, "step": 86020 }, { "epoch": 1.407674057105457, "grad_norm": 0.0605267770588398, "learning_rate": 2.4422293053026936e-06, "loss": 0.0011, "step": 86030 }, { "epoch": 1.407837683056533, "grad_norm": 0.04194982722401619, "learning_rate": 2.4410024800714376e-06, "loss": 0.0014, "step": 86040 }, { "epoch": 1.4080013090076087, "grad_norm": 0.18008437752723694, "learning_rate": 2.439775863543315e-06, "loss": 0.0027, "step": 86050 }, { "epoch": 1.4081649349586844, "grad_norm": 0.050394292920827866, "learning_rate": 2.438549455818362e-06, "loss": 0.0011, "step": 86060 }, { "epoch": 1.4083285609097602, "grad_norm": 0.0009599836193956435, "learning_rate": 2.4373232569965987e-06, "loss": 0.001, "step": 86070 }, { "epoch": 1.408492186860836, "grad_norm": 0.011497218161821365, "learning_rate": 2.436097267178034e-06, "loss": 0.0006, "step": 86080 }, { "epoch": 1.408655812811912, "grad_norm": 0.026954131200909615, "learning_rate": 2.4348714864626505e-06, "loss": 0.0008, "step": 86090 }, { "epoch": 1.4088194387629878, "grad_norm": 0.040744245052337646, "learning_rate": 2.4336459149504233e-06, "loss": 0.0007, "step": 86100 }, { "epoch": 1.4089830647140635, "grad_norm": 0.09333061426877975, "learning_rate": 2.432420552741302e-06, "loss": 0.0013, "step": 86110 }, { "epoch": 1.4091466906651395, "grad_norm": 0.033017560839653015, "learning_rate": 2.4311953999352265e-06, "loss": 0.0008, "step": 86120 }, { "epoch": 1.4093103166162153, "grad_norm": 0.03325832262635231, "learning_rate": 2.4299704566321123e-06, "loss": 0.0012, "step": 86130 }, { "epoch": 1.409473942567291, "grad_norm": 0.023710599169135094, "learning_rate": 2.428745722931866e-06, "loss": 0.0013, "step": 86140 }, { "epoch": 1.409637568518367, "grad_norm": 0.1300840526819229, "learning_rate": 2.427521198934369e-06, "loss": 0.0014, "step": 86150 }, { "epoch": 1.4098011944694429, "grad_norm": 0.1166706532239914, "learning_rate": 2.426296884739491e-06, "loss": 0.0012, "step": 86160 }, { "epoch": 1.4099648204205186, "grad_norm": 0.06065169721841812, "learning_rate": 2.425072780447082e-06, "loss": 0.0007, "step": 86170 }, { "epoch": 1.4101284463715946, "grad_norm": 0.003930584527552128, "learning_rate": 2.423848886156978e-06, "loss": 0.0012, "step": 86180 }, { "epoch": 1.4102920723226704, "grad_norm": 0.036340244114398956, "learning_rate": 2.4226252019689923e-06, "loss": 0.0007, "step": 86190 }, { "epoch": 1.4104556982737462, "grad_norm": 0.05645986273884773, "learning_rate": 2.4214017279829273e-06, "loss": 0.0006, "step": 86200 }, { "epoch": 1.4106193242248222, "grad_norm": 0.029943697154521942, "learning_rate": 2.4201784642985625e-06, "loss": 0.0021, "step": 86210 }, { "epoch": 1.410782950175898, "grad_norm": 0.07911103218793869, "learning_rate": 2.418955411015667e-06, "loss": 0.0006, "step": 86220 }, { "epoch": 1.4109465761269737, "grad_norm": 0.036517757922410965, "learning_rate": 2.4177325682339854e-06, "loss": 0.0016, "step": 86230 }, { "epoch": 1.4111102020780497, "grad_norm": 0.007980478927493095, "learning_rate": 2.4165099360532505e-06, "loss": 0.0011, "step": 86240 }, { "epoch": 1.4112738280291255, "grad_norm": 0.04216919094324112, "learning_rate": 2.4152875145731747e-06, "loss": 0.0007, "step": 86250 }, { "epoch": 1.4114374539802013, "grad_norm": 0.033767424523830414, "learning_rate": 2.414065303893456e-06, "loss": 0.0015, "step": 86260 }, { "epoch": 1.411601079931277, "grad_norm": 0.019887957721948624, "learning_rate": 2.412843304113772e-06, "loss": 0.0011, "step": 86270 }, { "epoch": 1.4117647058823528, "grad_norm": 0.03932947292923927, "learning_rate": 2.411621515333788e-06, "loss": 0.0009, "step": 86280 }, { "epoch": 1.4119283318334288, "grad_norm": 0.054890550673007965, "learning_rate": 2.4103999376531445e-06, "loss": 0.0013, "step": 86290 }, { "epoch": 1.4120919577845046, "grad_norm": 0.025379173457622528, "learning_rate": 2.4091785711714734e-06, "loss": 0.0009, "step": 86300 }, { "epoch": 1.4122555837355804, "grad_norm": 0.022573180496692657, "learning_rate": 2.407957415988381e-06, "loss": 0.0015, "step": 86310 }, { "epoch": 1.4124192096866564, "grad_norm": 0.030358633026480675, "learning_rate": 2.4067364722034653e-06, "loss": 0.001, "step": 86320 }, { "epoch": 1.4125828356377321, "grad_norm": 0.05885162204504013, "learning_rate": 2.4055157399162975e-06, "loss": 0.0008, "step": 86330 }, { "epoch": 1.412746461588808, "grad_norm": 0.06898149102926254, "learning_rate": 2.4042952192264413e-06, "loss": 0.0012, "step": 86340 }, { "epoch": 1.412910087539884, "grad_norm": 0.023307858034968376, "learning_rate": 2.403074910233433e-06, "loss": 0.003, "step": 86350 }, { "epoch": 1.4130737134909597, "grad_norm": 0.03626040369272232, "learning_rate": 2.401854813036802e-06, "loss": 0.0023, "step": 86360 }, { "epoch": 1.4132373394420354, "grad_norm": 0.0758916512131691, "learning_rate": 2.400634927736051e-06, "loss": 0.0012, "step": 86370 }, { "epoch": 1.4134009653931114, "grad_norm": 0.04178851842880249, "learning_rate": 2.3994152544306725e-06, "loss": 0.0014, "step": 86380 }, { "epoch": 1.4135645913441872, "grad_norm": 0.04437820985913277, "learning_rate": 2.398195793220139e-06, "loss": 0.0014, "step": 86390 }, { "epoch": 1.413728217295263, "grad_norm": 0.03609314188361168, "learning_rate": 2.3969765442039017e-06, "loss": 0.001, "step": 86400 }, { "epoch": 1.413891843246339, "grad_norm": 0.02759036235511303, "learning_rate": 2.3957575074814037e-06, "loss": 0.0007, "step": 86410 }, { "epoch": 1.4140554691974148, "grad_norm": 0.019759638234972954, "learning_rate": 2.3945386831520606e-06, "loss": 0.0012, "step": 86420 }, { "epoch": 1.4142190951484905, "grad_norm": 0.074102982878685, "learning_rate": 2.3933200713152806e-06, "loss": 0.0012, "step": 86430 }, { "epoch": 1.4143827210995663, "grad_norm": 0.041933875530958176, "learning_rate": 2.3921016720704444e-06, "loss": 0.001, "step": 86440 }, { "epoch": 1.4145463470506423, "grad_norm": 0.050445713102817535, "learning_rate": 2.390883485516925e-06, "loss": 0.0026, "step": 86450 }, { "epoch": 1.414709973001718, "grad_norm": 0.044565193355083466, "learning_rate": 2.3896655117540695e-06, "loss": 0.0006, "step": 86460 }, { "epoch": 1.4148735989527939, "grad_norm": 0.07884562760591507, "learning_rate": 2.388447750881216e-06, "loss": 0.0009, "step": 86470 }, { "epoch": 1.4150372249038696, "grad_norm": 0.058861423283815384, "learning_rate": 2.3872302029976763e-06, "loss": 0.0012, "step": 86480 }, { "epoch": 1.4152008508549456, "grad_norm": 0.011417794972658157, "learning_rate": 2.386012868202754e-06, "loss": 0.0016, "step": 86490 }, { "epoch": 1.4153644768060214, "grad_norm": 0.060272276401519775, "learning_rate": 2.384795746595727e-06, "loss": 0.0016, "step": 86500 }, { "epoch": 1.4155281027570972, "grad_norm": 0.06133784353733063, "learning_rate": 2.383578838275863e-06, "loss": 0.0007, "step": 86510 }, { "epoch": 1.4156917287081732, "grad_norm": 0.04298656806349754, "learning_rate": 2.382362143342405e-06, "loss": 0.0008, "step": 86520 }, { "epoch": 1.415855354659249, "grad_norm": 0.06629273295402527, "learning_rate": 2.381145661894586e-06, "loss": 0.0012, "step": 86530 }, { "epoch": 1.4160189806103247, "grad_norm": 0.050635725259780884, "learning_rate": 2.379929394031616e-06, "loss": 0.0014, "step": 86540 }, { "epoch": 1.4161826065614007, "grad_norm": 0.02372150681912899, "learning_rate": 2.378713339852691e-06, "loss": 0.0011, "step": 86550 }, { "epoch": 1.4163462325124765, "grad_norm": 0.07492823898792267, "learning_rate": 2.3774974994569866e-06, "loss": 0.0021, "step": 86560 }, { "epoch": 1.4165098584635523, "grad_norm": 0.06238546967506409, "learning_rate": 2.3762818729436653e-06, "loss": 0.0013, "step": 86570 }, { "epoch": 1.4166734844146283, "grad_norm": 0.08350105583667755, "learning_rate": 2.375066460411866e-06, "loss": 0.0019, "step": 86580 }, { "epoch": 1.416837110365704, "grad_norm": 0.12237215042114258, "learning_rate": 2.373851261960717e-06, "loss": 0.0018, "step": 86590 }, { "epoch": 1.4170007363167798, "grad_norm": 0.025111624971032143, "learning_rate": 2.3726362776893223e-06, "loss": 0.0021, "step": 86600 }, { "epoch": 1.4171643622678558, "grad_norm": 0.02818678505718708, "learning_rate": 2.371421507696776e-06, "loss": 0.0008, "step": 86610 }, { "epoch": 1.4173279882189316, "grad_norm": 0.05422881245613098, "learning_rate": 2.3702069520821482e-06, "loss": 0.001, "step": 86620 }, { "epoch": 1.4174916141700074, "grad_norm": 0.04695925489068031, "learning_rate": 2.368992610944494e-06, "loss": 0.0007, "step": 86630 }, { "epoch": 1.4176552401210831, "grad_norm": 0.009760403074324131, "learning_rate": 2.367778484382849e-06, "loss": 0.0012, "step": 86640 }, { "epoch": 1.4178188660721591, "grad_norm": 0.020390138030052185, "learning_rate": 2.3665645724962365e-06, "loss": 0.0011, "step": 86650 }, { "epoch": 1.417982492023235, "grad_norm": 0.08425728976726532, "learning_rate": 2.365350875383657e-06, "loss": 0.0013, "step": 86660 }, { "epoch": 1.4181461179743107, "grad_norm": 0.013156197965145111, "learning_rate": 2.3641373931440976e-06, "loss": 0.0012, "step": 86670 }, { "epoch": 1.4183097439253864, "grad_norm": 0.014625060372054577, "learning_rate": 2.3629241258765224e-06, "loss": 0.0011, "step": 86680 }, { "epoch": 1.4184733698764624, "grad_norm": 0.15262331068515778, "learning_rate": 2.3617110736798854e-06, "loss": 0.0026, "step": 86690 }, { "epoch": 1.4186369958275382, "grad_norm": 0.018499836325645447, "learning_rate": 2.3604982366531154e-06, "loss": 0.0007, "step": 86700 }, { "epoch": 1.418800621778614, "grad_norm": 0.03466907516121864, "learning_rate": 2.3592856148951306e-06, "loss": 0.0015, "step": 86710 }, { "epoch": 1.41896424772969, "grad_norm": 0.025678353384137154, "learning_rate": 2.358073208504824e-06, "loss": 0.0011, "step": 86720 }, { "epoch": 1.4191278736807658, "grad_norm": 0.029115119948983192, "learning_rate": 2.356861017581081e-06, "loss": 0.0013, "step": 86730 }, { "epoch": 1.4192914996318415, "grad_norm": 0.006678566336631775, "learning_rate": 2.355649042222758e-06, "loss": 0.0009, "step": 86740 }, { "epoch": 1.4194551255829175, "grad_norm": 0.08240357786417007, "learning_rate": 2.354437282528704e-06, "loss": 0.0013, "step": 86750 }, { "epoch": 1.4196187515339933, "grad_norm": 0.0028957827016711235, "learning_rate": 2.3532257385977433e-06, "loss": 0.0013, "step": 86760 }, { "epoch": 1.419782377485069, "grad_norm": 0.04983939230442047, "learning_rate": 2.3520144105286867e-06, "loss": 0.0007, "step": 86770 }, { "epoch": 1.419946003436145, "grad_norm": 0.042318642139434814, "learning_rate": 2.3508032984203248e-06, "loss": 0.0008, "step": 86780 }, { "epoch": 1.4201096293872209, "grad_norm": 0.01818975619971752, "learning_rate": 2.3495924023714338e-06, "loss": 0.001, "step": 86790 }, { "epoch": 1.4202732553382966, "grad_norm": 0.015192101709544659, "learning_rate": 2.3483817224807677e-06, "loss": 0.0014, "step": 86800 }, { "epoch": 1.4204368812893726, "grad_norm": 0.031194766983389854, "learning_rate": 2.3471712588470684e-06, "loss": 0.0013, "step": 86810 }, { "epoch": 1.4206005072404484, "grad_norm": 0.10979337245225906, "learning_rate": 2.3459610115690534e-06, "loss": 0.0018, "step": 86820 }, { "epoch": 1.4207641331915242, "grad_norm": 0.09447083622217178, "learning_rate": 2.3447509807454305e-06, "loss": 0.0013, "step": 86830 }, { "epoch": 1.4209277591426, "grad_norm": 0.06405788660049438, "learning_rate": 2.343541166474882e-06, "loss": 0.001, "step": 86840 }, { "epoch": 1.4210913850936757, "grad_norm": 0.07527408748865128, "learning_rate": 2.3423315688560794e-06, "loss": 0.0017, "step": 86850 }, { "epoch": 1.4212550110447517, "grad_norm": 0.0402020663022995, "learning_rate": 2.3411221879876722e-06, "loss": 0.0022, "step": 86860 }, { "epoch": 1.4214186369958275, "grad_norm": 0.006235907785594463, "learning_rate": 2.339913023968291e-06, "loss": 0.0008, "step": 86870 }, { "epoch": 1.4215822629469033, "grad_norm": 0.003380571026355028, "learning_rate": 2.3387040768965553e-06, "loss": 0.0012, "step": 86880 }, { "epoch": 1.4217458888979793, "grad_norm": 0.10519898682832718, "learning_rate": 2.337495346871058e-06, "loss": 0.0013, "step": 86890 }, { "epoch": 1.421909514849055, "grad_norm": 0.010904007591307163, "learning_rate": 2.3362868339903843e-06, "loss": 0.001, "step": 86900 }, { "epoch": 1.4220731408001308, "grad_norm": 0.02424856647849083, "learning_rate": 2.3350785383530918e-06, "loss": 0.0011, "step": 86910 }, { "epoch": 1.4222367667512068, "grad_norm": 0.03161376342177391, "learning_rate": 2.3338704600577285e-06, "loss": 0.0012, "step": 86920 }, { "epoch": 1.4224003927022826, "grad_norm": 0.07663678377866745, "learning_rate": 2.3326625992028175e-06, "loss": 0.0009, "step": 86930 }, { "epoch": 1.4225640186533584, "grad_norm": 0.005474789533764124, "learning_rate": 2.3314549558868717e-06, "loss": 0.0018, "step": 86940 }, { "epoch": 1.4227276446044343, "grad_norm": 0.023548053577542305, "learning_rate": 2.330247530208379e-06, "loss": 0.001, "step": 86950 }, { "epoch": 1.4228912705555101, "grad_norm": 0.002747412072494626, "learning_rate": 2.329040322265816e-06, "loss": 0.0012, "step": 86960 }, { "epoch": 1.423054896506586, "grad_norm": 0.04941778630018234, "learning_rate": 2.3278333321576347e-06, "loss": 0.0008, "step": 86970 }, { "epoch": 1.423218522457662, "grad_norm": 0.07427161931991577, "learning_rate": 2.326626559982278e-06, "loss": 0.0008, "step": 86980 }, { "epoch": 1.4233821484087377, "grad_norm": 0.15990613400936127, "learning_rate": 2.325420005838161e-06, "loss": 0.0009, "step": 86990 }, { "epoch": 1.4235457743598134, "grad_norm": 0.06716874241828918, "learning_rate": 2.3242136698236905e-06, "loss": 0.0011, "step": 87000 }, { "epoch": 1.4237094003108894, "grad_norm": 0.025656558573246002, "learning_rate": 2.3230075520372473e-06, "loss": 0.0015, "step": 87010 }, { "epoch": 1.4238730262619652, "grad_norm": 0.10640022903680801, "learning_rate": 2.321801652577203e-06, "loss": 0.0015, "step": 87020 }, { "epoch": 1.424036652213041, "grad_norm": 0.09093756228685379, "learning_rate": 2.3205959715419014e-06, "loss": 0.0015, "step": 87030 }, { "epoch": 1.4242002781641168, "grad_norm": 0.03602614626288414, "learning_rate": 2.3193905090296783e-06, "loss": 0.0021, "step": 87040 }, { "epoch": 1.4243639041151925, "grad_norm": 0.04593077674508095, "learning_rate": 2.3181852651388426e-06, "loss": 0.0011, "step": 87050 }, { "epoch": 1.4245275300662685, "grad_norm": 0.03151620179414749, "learning_rate": 2.316980239967695e-06, "loss": 0.001, "step": 87060 }, { "epoch": 1.4246911560173443, "grad_norm": 0.06314272433519363, "learning_rate": 2.3157754336145085e-06, "loss": 0.0008, "step": 87070 }, { "epoch": 1.42485478196842, "grad_norm": 0.03376390039920807, "learning_rate": 2.3145708461775467e-06, "loss": 0.0006, "step": 87080 }, { "epoch": 1.425018407919496, "grad_norm": 0.06842463463544846, "learning_rate": 2.3133664777550486e-06, "loss": 0.0017, "step": 87090 }, { "epoch": 1.4251820338705719, "grad_norm": 0.007555176038295031, "learning_rate": 2.3121623284452414e-06, "loss": 0.0008, "step": 87100 }, { "epoch": 1.4253456598216476, "grad_norm": 0.0578356608748436, "learning_rate": 2.310958398346328e-06, "loss": 0.0005, "step": 87110 }, { "epoch": 1.4255092857727236, "grad_norm": 0.03102637641131878, "learning_rate": 2.3097546875565006e-06, "loss": 0.0013, "step": 87120 }, { "epoch": 1.4256729117237994, "grad_norm": 0.032348643988370895, "learning_rate": 2.3085511961739266e-06, "loss": 0.0009, "step": 87130 }, { "epoch": 1.4258365376748752, "grad_norm": 0.16237132251262665, "learning_rate": 2.307347924296761e-06, "loss": 0.0011, "step": 87140 }, { "epoch": 1.4260001636259512, "grad_norm": 0.03619246557354927, "learning_rate": 2.3061448720231362e-06, "loss": 0.0008, "step": 87150 }, { "epoch": 1.426163789577027, "grad_norm": 0.2018430531024933, "learning_rate": 2.304942039451172e-06, "loss": 0.0005, "step": 87160 }, { "epoch": 1.4263274155281027, "grad_norm": 0.024624843150377274, "learning_rate": 2.3037394266789652e-06, "loss": 0.0009, "step": 87170 }, { "epoch": 1.4264910414791787, "grad_norm": 0.01159820705652237, "learning_rate": 2.302537033804596e-06, "loss": 0.0004, "step": 87180 }, { "epoch": 1.4266546674302545, "grad_norm": 0.013265382498502731, "learning_rate": 2.3013348609261303e-06, "loss": 0.0006, "step": 87190 }, { "epoch": 1.4268182933813303, "grad_norm": 0.011792325414717197, "learning_rate": 2.3001329081416105e-06, "loss": 0.0015, "step": 87200 }, { "epoch": 1.4269819193324063, "grad_norm": 0.031673651188611984, "learning_rate": 2.298931175549066e-06, "loss": 0.0013, "step": 87210 }, { "epoch": 1.427145545283482, "grad_norm": 0.02618395909667015, "learning_rate": 2.297729663246504e-06, "loss": 0.0008, "step": 87220 }, { "epoch": 1.4273091712345578, "grad_norm": 0.08764577656984329, "learning_rate": 2.296528371331918e-06, "loss": 0.0014, "step": 87230 }, { "epoch": 1.4274727971856336, "grad_norm": 0.18212631344795227, "learning_rate": 2.2953272999032784e-06, "loss": 0.0014, "step": 87240 }, { "epoch": 1.4276364231367094, "grad_norm": 0.06772510707378387, "learning_rate": 2.2941264490585434e-06, "loss": 0.0011, "step": 87250 }, { "epoch": 1.4278000490877854, "grad_norm": 0.013120506890118122, "learning_rate": 2.2929258188956484e-06, "loss": 0.0012, "step": 87260 }, { "epoch": 1.4279636750388611, "grad_norm": 0.011625261045992374, "learning_rate": 2.291725409512514e-06, "loss": 0.0014, "step": 87270 }, { "epoch": 1.428127300989937, "grad_norm": 0.021880803629755974, "learning_rate": 2.2905252210070395e-06, "loss": 0.0017, "step": 87280 }, { "epoch": 1.428290926941013, "grad_norm": 0.06603413820266724, "learning_rate": 2.2893252534771115e-06, "loss": 0.0009, "step": 87290 }, { "epoch": 1.4284545528920887, "grad_norm": 0.01816193014383316, "learning_rate": 2.288125507020591e-06, "loss": 0.001, "step": 87300 }, { "epoch": 1.4286181788431644, "grad_norm": 0.025098584592342377, "learning_rate": 2.2869259817353302e-06, "loss": 0.0013, "step": 87310 }, { "epoch": 1.4287818047942404, "grad_norm": 0.037802111357450485, "learning_rate": 2.2857266777191535e-06, "loss": 0.0008, "step": 87320 }, { "epoch": 1.4289454307453162, "grad_norm": 0.04814010486006737, "learning_rate": 2.2845275950698753e-06, "loss": 0.0011, "step": 87330 }, { "epoch": 1.429109056696392, "grad_norm": 0.0005579679273068905, "learning_rate": 2.283328733885287e-06, "loss": 0.0005, "step": 87340 }, { "epoch": 1.429272682647468, "grad_norm": 0.06661123037338257, "learning_rate": 2.2821300942631653e-06, "loss": 0.0012, "step": 87350 }, { "epoch": 1.4294363085985438, "grad_norm": 0.07266577333211899, "learning_rate": 2.280931676301264e-06, "loss": 0.002, "step": 87360 }, { "epoch": 1.4295999345496195, "grad_norm": 0.015039430931210518, "learning_rate": 2.2797334800973265e-06, "loss": 0.0012, "step": 87370 }, { "epoch": 1.4297635605006955, "grad_norm": 0.032793078571558, "learning_rate": 2.278535505749069e-06, "loss": 0.0014, "step": 87380 }, { "epoch": 1.4299271864517713, "grad_norm": 0.012454529292881489, "learning_rate": 2.277337753354199e-06, "loss": 0.0016, "step": 87390 }, { "epoch": 1.430090812402847, "grad_norm": 0.030338380485773087, "learning_rate": 2.276140223010396e-06, "loss": 0.0008, "step": 87400 }, { "epoch": 1.4302544383539229, "grad_norm": 0.004007387440651655, "learning_rate": 2.274942914815331e-06, "loss": 0.0007, "step": 87410 }, { "epoch": 1.4304180643049988, "grad_norm": 0.009625481441617012, "learning_rate": 2.2737458288666487e-06, "loss": 0.0007, "step": 87420 }, { "epoch": 1.4305816902560746, "grad_norm": 0.17188963294029236, "learning_rate": 2.272548965261983e-06, "loss": 0.0016, "step": 87430 }, { "epoch": 1.4307453162071504, "grad_norm": 0.029408516362309456, "learning_rate": 2.271352324098942e-06, "loss": 0.0007, "step": 87440 }, { "epoch": 1.4309089421582262, "grad_norm": 0.01963796466588974, "learning_rate": 2.2701559054751244e-06, "loss": 0.0016, "step": 87450 }, { "epoch": 1.4310725681093022, "grad_norm": 0.0038243767339736223, "learning_rate": 2.2689597094881007e-06, "loss": 0.001, "step": 87460 }, { "epoch": 1.431236194060378, "grad_norm": 0.025113839656114578, "learning_rate": 2.267763736235433e-06, "loss": 0.0009, "step": 87470 }, { "epoch": 1.4313998200114537, "grad_norm": 0.4092933237552643, "learning_rate": 2.266567985814658e-06, "loss": 0.001, "step": 87480 }, { "epoch": 1.4315634459625297, "grad_norm": 0.0572710819542408, "learning_rate": 2.2653724583232995e-06, "loss": 0.001, "step": 87490 }, { "epoch": 1.4317270719136055, "grad_norm": 0.03292551636695862, "learning_rate": 2.2641771538588595e-06, "loss": 0.0009, "step": 87500 }, { "epoch": 1.4318906978646813, "grad_norm": 0.03163839131593704, "learning_rate": 2.262982072518821e-06, "loss": 0.0009, "step": 87510 }, { "epoch": 1.4320543238157573, "grad_norm": 0.04434162750840187, "learning_rate": 2.2617872144006543e-06, "loss": 0.0006, "step": 87520 }, { "epoch": 1.432217949766833, "grad_norm": 0.02326543629169464, "learning_rate": 2.260592579601804e-06, "loss": 0.0007, "step": 87530 }, { "epoch": 1.4323815757179088, "grad_norm": 0.0011645479826256633, "learning_rate": 2.2593981682197047e-06, "loss": 0.0009, "step": 87540 }, { "epoch": 1.4325452016689848, "grad_norm": 0.0031618080101907253, "learning_rate": 2.2582039803517646e-06, "loss": 0.0008, "step": 87550 }, { "epoch": 1.4327088276200606, "grad_norm": 0.22771863639354706, "learning_rate": 2.257010016095382e-06, "loss": 0.0007, "step": 87560 }, { "epoch": 1.4328724535711364, "grad_norm": 0.018685875460505486, "learning_rate": 2.2558162755479273e-06, "loss": 0.0007, "step": 87570 }, { "epoch": 1.4330360795222123, "grad_norm": 0.018462343141436577, "learning_rate": 2.2546227588067626e-06, "loss": 0.0017, "step": 87580 }, { "epoch": 1.4331997054732881, "grad_norm": 0.060862280428409576, "learning_rate": 2.2534294659692233e-06, "loss": 0.0009, "step": 87590 }, { "epoch": 1.433363331424364, "grad_norm": 0.002800151938572526, "learning_rate": 2.2522363971326344e-06, "loss": 0.0007, "step": 87600 }, { "epoch": 1.4335269573754397, "grad_norm": 0.19052881002426147, "learning_rate": 2.251043552394294e-06, "loss": 0.0013, "step": 87610 }, { "epoch": 1.4336905833265157, "grad_norm": 0.05560789257287979, "learning_rate": 2.2498509318514907e-06, "loss": 0.0008, "step": 87620 }, { "epoch": 1.4338542092775914, "grad_norm": 0.01368323341012001, "learning_rate": 2.2486585356014868e-06, "loss": 0.0009, "step": 87630 }, { "epoch": 1.4340178352286672, "grad_norm": 0.044441115111112595, "learning_rate": 2.2474663637415334e-06, "loss": 0.002, "step": 87640 }, { "epoch": 1.434181461179743, "grad_norm": 0.04331725835800171, "learning_rate": 2.246274416368857e-06, "loss": 0.0005, "step": 87650 }, { "epoch": 1.434345087130819, "grad_norm": 0.00791282206773758, "learning_rate": 2.2450826935806717e-06, "loss": 0.001, "step": 87660 }, { "epoch": 1.4345087130818948, "grad_norm": 0.02606225572526455, "learning_rate": 2.243891195474167e-06, "loss": 0.0007, "step": 87670 }, { "epoch": 1.4346723390329705, "grad_norm": 0.05236673355102539, "learning_rate": 2.242699922146522e-06, "loss": 0.0006, "step": 87680 }, { "epoch": 1.4348359649840465, "grad_norm": 0.054539211094379425, "learning_rate": 2.2415088736948876e-06, "loss": 0.0015, "step": 87690 }, { "epoch": 1.4349995909351223, "grad_norm": 0.023323310539126396, "learning_rate": 2.2403180502164063e-06, "loss": 0.0012, "step": 87700 }, { "epoch": 1.435163216886198, "grad_norm": 0.12575753033161163, "learning_rate": 2.2391274518081936e-06, "loss": 0.0014, "step": 87710 }, { "epoch": 1.435326842837274, "grad_norm": 0.005651365965604782, "learning_rate": 2.2379370785673543e-06, "loss": 0.0009, "step": 87720 }, { "epoch": 1.4354904687883498, "grad_norm": 0.06766383349895477, "learning_rate": 2.236746930590968e-06, "loss": 0.0019, "step": 87730 }, { "epoch": 1.4356540947394256, "grad_norm": 0.10399864614009857, "learning_rate": 2.2355570079761024e-06, "loss": 0.0024, "step": 87740 }, { "epoch": 1.4358177206905016, "grad_norm": 0.03292897716164589, "learning_rate": 2.2343673108198e-06, "loss": 0.0009, "step": 87750 }, { "epoch": 1.4359813466415774, "grad_norm": 0.05341261997818947, "learning_rate": 2.2331778392190917e-06, "loss": 0.0019, "step": 87760 }, { "epoch": 1.4361449725926532, "grad_norm": 0.001011172542348504, "learning_rate": 2.231988593270984e-06, "loss": 0.0015, "step": 87770 }, { "epoch": 1.4363085985437292, "grad_norm": 0.06599834561347961, "learning_rate": 2.2307995730724704e-06, "loss": 0.0011, "step": 87780 }, { "epoch": 1.436472224494805, "grad_norm": 0.031680092215538025, "learning_rate": 2.2296107787205202e-06, "loss": 0.0015, "step": 87790 }, { "epoch": 1.4366358504458807, "grad_norm": 0.10382623970508575, "learning_rate": 2.228422210312091e-06, "loss": 0.001, "step": 87800 }, { "epoch": 1.4367994763969565, "grad_norm": 0.043182507157325745, "learning_rate": 2.2272338679441175e-06, "loss": 0.0007, "step": 87810 }, { "epoch": 1.4369631023480323, "grad_norm": 0.031067989766597748, "learning_rate": 2.226045751713513e-06, "loss": 0.0009, "step": 87820 }, { "epoch": 1.4371267282991083, "grad_norm": 0.012914981693029404, "learning_rate": 2.224857861717182e-06, "loss": 0.0007, "step": 87830 }, { "epoch": 1.437290354250184, "grad_norm": 0.032071635127067566, "learning_rate": 2.2236701980519994e-06, "loss": 0.0019, "step": 87840 }, { "epoch": 1.4374539802012598, "grad_norm": 0.057056866586208344, "learning_rate": 2.2224827608148313e-06, "loss": 0.0013, "step": 87850 }, { "epoch": 1.4376176061523358, "grad_norm": 0.04092451557517052, "learning_rate": 2.221295550102518e-06, "loss": 0.0013, "step": 87860 }, { "epoch": 1.4377812321034116, "grad_norm": 0.07200897485017776, "learning_rate": 2.220108566011887e-06, "loss": 0.0007, "step": 87870 }, { "epoch": 1.4379448580544874, "grad_norm": 0.011565347202122211, "learning_rate": 2.218921808639742e-06, "loss": 0.0008, "step": 87880 }, { "epoch": 1.4381084840055633, "grad_norm": 0.02137884497642517, "learning_rate": 2.2177352780828735e-06, "loss": 0.0007, "step": 87890 }, { "epoch": 1.4382721099566391, "grad_norm": 0.009534936398267746, "learning_rate": 2.2165489744380485e-06, "loss": 0.0006, "step": 87900 }, { "epoch": 1.438435735907715, "grad_norm": 0.12051648646593094, "learning_rate": 2.2153628978020203e-06, "loss": 0.0016, "step": 87910 }, { "epoch": 1.438599361858791, "grad_norm": 0.07292230427265167, "learning_rate": 2.2141770482715186e-06, "loss": 0.0011, "step": 87920 }, { "epoch": 1.4387629878098667, "grad_norm": 0.04109984636306763, "learning_rate": 2.21299142594326e-06, "loss": 0.001, "step": 87930 }, { "epoch": 1.4389266137609424, "grad_norm": 0.07503499835729599, "learning_rate": 2.2118060309139373e-06, "loss": 0.0012, "step": 87940 }, { "epoch": 1.4390902397120184, "grad_norm": 0.07943184673786163, "learning_rate": 2.2106208632802295e-06, "loss": 0.0016, "step": 87950 }, { "epoch": 1.4392538656630942, "grad_norm": 0.01143516506999731, "learning_rate": 2.2094359231387922e-06, "loss": 0.0014, "step": 87960 }, { "epoch": 1.43941749161417, "grad_norm": 0.030652880668640137, "learning_rate": 2.208251210586269e-06, "loss": 0.0012, "step": 87970 }, { "epoch": 1.439581117565246, "grad_norm": 0.044436436146497726, "learning_rate": 2.207066725719277e-06, "loss": 0.0003, "step": 87980 }, { "epoch": 1.4397447435163218, "grad_norm": 0.09317509084939957, "learning_rate": 2.2058824686344216e-06, "loss": 0.0012, "step": 87990 }, { "epoch": 1.4399083694673975, "grad_norm": 0.0381840243935585, "learning_rate": 2.2046984394282838e-06, "loss": 0.0012, "step": 88000 }, { "epoch": 1.4400719954184733, "grad_norm": 0.04159509390592575, "learning_rate": 2.2035146381974333e-06, "loss": 0.0013, "step": 88010 }, { "epoch": 1.440235621369549, "grad_norm": 0.003345638746395707, "learning_rate": 2.202331065038412e-06, "loss": 0.0013, "step": 88020 }, { "epoch": 1.440399247320625, "grad_norm": 0.17782385647296906, "learning_rate": 2.2011477200477526e-06, "loss": 0.0008, "step": 88030 }, { "epoch": 1.4405628732717008, "grad_norm": 0.13986875116825104, "learning_rate": 2.199964603321961e-06, "loss": 0.0015, "step": 88040 }, { "epoch": 1.4407264992227766, "grad_norm": 0.028128251433372498, "learning_rate": 2.1987817149575313e-06, "loss": 0.0008, "step": 88050 }, { "epoch": 1.4408901251738526, "grad_norm": 0.011632639914751053, "learning_rate": 2.197599055050933e-06, "loss": 0.0013, "step": 88060 }, { "epoch": 1.4410537511249284, "grad_norm": 0.047060687094926834, "learning_rate": 2.1964166236986227e-06, "loss": 0.002, "step": 88070 }, { "epoch": 1.4412173770760042, "grad_norm": 0.012457093223929405, "learning_rate": 2.1952344209970324e-06, "loss": 0.001, "step": 88080 }, { "epoch": 1.4413810030270802, "grad_norm": 0.12541504204273224, "learning_rate": 2.194052447042582e-06, "loss": 0.0011, "step": 88090 }, { "epoch": 1.441544628978156, "grad_norm": 0.04264990985393524, "learning_rate": 2.192870701931665e-06, "loss": 0.0008, "step": 88100 }, { "epoch": 1.4417082549292317, "grad_norm": 0.03025136888027191, "learning_rate": 2.191689185760665e-06, "loss": 0.0013, "step": 88110 }, { "epoch": 1.4418718808803077, "grad_norm": 0.02718346379697323, "learning_rate": 2.190507898625938e-06, "loss": 0.0015, "step": 88120 }, { "epoch": 1.4420355068313835, "grad_norm": 0.05797566846013069, "learning_rate": 2.1893268406238303e-06, "loss": 0.0016, "step": 88130 }, { "epoch": 1.4421991327824593, "grad_norm": 0.039778679609298706, "learning_rate": 2.1881460118506625e-06, "loss": 0.0014, "step": 88140 }, { "epoch": 1.4423627587335353, "grad_norm": 0.045133478939533234, "learning_rate": 2.1869654124027378e-06, "loss": 0.0013, "step": 88150 }, { "epoch": 1.442526384684611, "grad_norm": 0.04567984491586685, "learning_rate": 2.185785042376345e-06, "loss": 0.001, "step": 88160 }, { "epoch": 1.4426900106356868, "grad_norm": 0.08447040617465973, "learning_rate": 2.1846049018677472e-06, "loss": 0.0009, "step": 88170 }, { "epoch": 1.4428536365867626, "grad_norm": 0.04492397606372833, "learning_rate": 2.1834249909731976e-06, "loss": 0.0034, "step": 88180 }, { "epoch": 1.4430172625378386, "grad_norm": 0.10677868872880936, "learning_rate": 2.18224530978892e-06, "loss": 0.0018, "step": 88190 }, { "epoch": 1.4431808884889143, "grad_norm": 0.014056211337447166, "learning_rate": 2.1810658584111305e-06, "loss": 0.0008, "step": 88200 }, { "epoch": 1.4433445144399901, "grad_norm": 0.014277152717113495, "learning_rate": 2.1798866369360166e-06, "loss": 0.0005, "step": 88210 }, { "epoch": 1.443508140391066, "grad_norm": 0.07201260328292847, "learning_rate": 2.1787076454597556e-06, "loss": 0.0012, "step": 88220 }, { "epoch": 1.443671766342142, "grad_norm": 0.10153778642416, "learning_rate": 2.1775288840784984e-06, "loss": 0.001, "step": 88230 }, { "epoch": 1.4438353922932177, "grad_norm": 0.11902565509080887, "learning_rate": 2.1763503528883846e-06, "loss": 0.0025, "step": 88240 }, { "epoch": 1.4439990182442934, "grad_norm": 0.02921682596206665, "learning_rate": 2.1751720519855274e-06, "loss": 0.0016, "step": 88250 }, { "epoch": 1.4441626441953694, "grad_norm": 0.058205220848321915, "learning_rate": 2.173993981466028e-06, "loss": 0.0009, "step": 88260 }, { "epoch": 1.4443262701464452, "grad_norm": 0.06497303396463394, "learning_rate": 2.172816141425963e-06, "loss": 0.0012, "step": 88270 }, { "epoch": 1.444489896097521, "grad_norm": 0.11249633878469467, "learning_rate": 2.1716385319613966e-06, "loss": 0.0011, "step": 88280 }, { "epoch": 1.444653522048597, "grad_norm": 0.11125393956899643, "learning_rate": 2.1704611531683667e-06, "loss": 0.0017, "step": 88290 }, { "epoch": 1.4448171479996728, "grad_norm": 0.10672051459550858, "learning_rate": 2.1692840051429e-06, "loss": 0.002, "step": 88300 }, { "epoch": 1.4449807739507485, "grad_norm": 0.04128502309322357, "learning_rate": 2.1681070879809967e-06, "loss": 0.0005, "step": 88310 }, { "epoch": 1.4451443999018245, "grad_norm": 0.10260944068431854, "learning_rate": 2.166930401778647e-06, "loss": 0.0019, "step": 88320 }, { "epoch": 1.4453080258529003, "grad_norm": 0.003928142134100199, "learning_rate": 2.165753946631812e-06, "loss": 0.0006, "step": 88330 }, { "epoch": 1.445471651803976, "grad_norm": 0.08012647926807404, "learning_rate": 2.164577722636445e-06, "loss": 0.0007, "step": 88340 }, { "epoch": 1.445635277755052, "grad_norm": 0.051866721361875534, "learning_rate": 2.1634017298884695e-06, "loss": 0.0011, "step": 88350 }, { "epoch": 1.4457989037061278, "grad_norm": 0.021905949339270592, "learning_rate": 2.1622259684838002e-06, "loss": 0.0014, "step": 88360 }, { "epoch": 1.4459625296572036, "grad_norm": 0.03850788623094559, "learning_rate": 2.161050438518324e-06, "loss": 0.001, "step": 88370 }, { "epoch": 1.4461261556082794, "grad_norm": 0.14281781017780304, "learning_rate": 2.159875140087917e-06, "loss": 0.0016, "step": 88380 }, { "epoch": 1.4462897815593554, "grad_norm": 0.05758731812238693, "learning_rate": 2.158700073288429e-06, "loss": 0.0014, "step": 88390 }, { "epoch": 1.4464534075104312, "grad_norm": 0.019825385883450508, "learning_rate": 2.1575252382156973e-06, "loss": 0.001, "step": 88400 }, { "epoch": 1.446617033461507, "grad_norm": 0.028340870514512062, "learning_rate": 2.156350634965535e-06, "loss": 0.0014, "step": 88410 }, { "epoch": 1.4467806594125827, "grad_norm": 0.012340308167040348, "learning_rate": 2.1551762636337413e-06, "loss": 0.0013, "step": 88420 }, { "epoch": 1.4469442853636587, "grad_norm": 0.03235310688614845, "learning_rate": 2.1540021243160912e-06, "loss": 0.0013, "step": 88430 }, { "epoch": 1.4471079113147345, "grad_norm": 0.17577512562274933, "learning_rate": 2.1528282171083462e-06, "loss": 0.0024, "step": 88440 }, { "epoch": 1.4472715372658103, "grad_norm": 0.09183309972286224, "learning_rate": 2.1516545421062435e-06, "loss": 0.0012, "step": 88450 }, { "epoch": 1.4474351632168863, "grad_norm": 0.03897007182240486, "learning_rate": 2.1504810994055074e-06, "loss": 0.0012, "step": 88460 }, { "epoch": 1.447598789167962, "grad_norm": 0.03891608119010925, "learning_rate": 2.1493078891018375e-06, "loss": 0.0015, "step": 88470 }, { "epoch": 1.4477624151190378, "grad_norm": 0.02291872538626194, "learning_rate": 2.148134911290916e-06, "loss": 0.0017, "step": 88480 }, { "epoch": 1.4479260410701138, "grad_norm": 0.019518906250596046, "learning_rate": 2.14696216606841e-06, "loss": 0.0012, "step": 88490 }, { "epoch": 1.4480896670211896, "grad_norm": 0.06341265887022018, "learning_rate": 2.1457896535299605e-06, "loss": 0.0012, "step": 88500 }, { "epoch": 1.4482532929722653, "grad_norm": 0.03984321281313896, "learning_rate": 2.1446173737711985e-06, "loss": 0.0011, "step": 88510 }, { "epoch": 1.4484169189233413, "grad_norm": 0.10460025072097778, "learning_rate": 2.143445326887726e-06, "loss": 0.0016, "step": 88520 }, { "epoch": 1.4485805448744171, "grad_norm": 0.007973943836987019, "learning_rate": 2.142273512975136e-06, "loss": 0.002, "step": 88530 }, { "epoch": 1.448744170825493, "grad_norm": 0.02535553090274334, "learning_rate": 2.1411019321289937e-06, "loss": 0.001, "step": 88540 }, { "epoch": 1.448907796776569, "grad_norm": 0.08888189494609833, "learning_rate": 2.1399305844448533e-06, "loss": 0.001, "step": 88550 }, { "epoch": 1.4490714227276447, "grad_norm": 0.052672844380140305, "learning_rate": 2.138759470018241e-06, "loss": 0.0008, "step": 88560 }, { "epoch": 1.4492350486787204, "grad_norm": 0.016947384923696518, "learning_rate": 2.1375885889446736e-06, "loss": 0.0013, "step": 88570 }, { "epoch": 1.4493986746297962, "grad_norm": 0.2120617926120758, "learning_rate": 2.1364179413196408e-06, "loss": 0.0011, "step": 88580 }, { "epoch": 1.4495623005808722, "grad_norm": 0.03426506742835045, "learning_rate": 2.135247527238619e-06, "loss": 0.0008, "step": 88590 }, { "epoch": 1.449725926531948, "grad_norm": 0.002394516486674547, "learning_rate": 2.134077346797061e-06, "loss": 0.0007, "step": 88600 }, { "epoch": 1.4498895524830238, "grad_norm": 0.032250385731458664, "learning_rate": 2.1329074000904056e-06, "loss": 0.0017, "step": 88610 }, { "epoch": 1.4500531784340995, "grad_norm": 0.06429596990346909, "learning_rate": 2.1317376872140654e-06, "loss": 0.0007, "step": 88620 }, { "epoch": 1.4502168043851755, "grad_norm": 0.09414245188236237, "learning_rate": 2.130568208263443e-06, "loss": 0.0041, "step": 88630 }, { "epoch": 1.4503804303362513, "grad_norm": 0.1963680386543274, "learning_rate": 2.1293989633339136e-06, "loss": 0.0015, "step": 88640 }, { "epoch": 1.450544056287327, "grad_norm": 0.09079791605472565, "learning_rate": 2.1282299525208393e-06, "loss": 0.0015, "step": 88650 }, { "epoch": 1.450707682238403, "grad_norm": 0.043481383472681046, "learning_rate": 2.1270611759195583e-06, "loss": 0.0012, "step": 88660 }, { "epoch": 1.4508713081894788, "grad_norm": 0.048390574753284454, "learning_rate": 2.125892633625394e-06, "loss": 0.0011, "step": 88670 }, { "epoch": 1.4510349341405546, "grad_norm": 0.03354208916425705, "learning_rate": 2.124724325733647e-06, "loss": 0.0012, "step": 88680 }, { "epoch": 1.4511985600916306, "grad_norm": 0.03864798694849014, "learning_rate": 2.123556252339603e-06, "loss": 0.0008, "step": 88690 }, { "epoch": 1.4513621860427064, "grad_norm": 0.04838123917579651, "learning_rate": 2.1223884135385232e-06, "loss": 0.0016, "step": 88700 }, { "epoch": 1.4515258119937822, "grad_norm": 0.02041090466082096, "learning_rate": 2.121220809425655e-06, "loss": 0.0006, "step": 88710 }, { "epoch": 1.4516894379448582, "grad_norm": 0.045102670788764954, "learning_rate": 2.1200534400962216e-06, "loss": 0.0008, "step": 88720 }, { "epoch": 1.451853063895934, "grad_norm": 0.08513277024030685, "learning_rate": 2.1188863056454327e-06, "loss": 0.0015, "step": 88730 }, { "epoch": 1.4520166898470097, "grad_norm": 0.047499481588602066, "learning_rate": 2.117719406168473e-06, "loss": 0.0013, "step": 88740 }, { "epoch": 1.4521803157980857, "grad_norm": 0.05745978280901909, "learning_rate": 2.116552741760514e-06, "loss": 0.0012, "step": 88750 }, { "epoch": 1.4523439417491615, "grad_norm": 0.015010926872491837, "learning_rate": 2.1153863125167007e-06, "loss": 0.0016, "step": 88760 }, { "epoch": 1.4525075677002373, "grad_norm": 0.022948946803808212, "learning_rate": 2.1142201185321674e-06, "loss": 0.0007, "step": 88770 }, { "epoch": 1.452671193651313, "grad_norm": 0.01724308170378208, "learning_rate": 2.1130541599020236e-06, "loss": 0.0007, "step": 88780 }, { "epoch": 1.4528348196023888, "grad_norm": 0.006474709138274193, "learning_rate": 2.1118884367213583e-06, "loss": 0.0008, "step": 88790 }, { "epoch": 1.4529984455534648, "grad_norm": 0.08029213547706604, "learning_rate": 2.110722949085247e-06, "loss": 0.0011, "step": 88800 }, { "epoch": 1.4531620715045406, "grad_norm": 0.044175248593091965, "learning_rate": 2.1095576970887405e-06, "loss": 0.0004, "step": 88810 }, { "epoch": 1.4533256974556163, "grad_norm": 0.0312805250287056, "learning_rate": 2.108392680826876e-06, "loss": 0.0011, "step": 88820 }, { "epoch": 1.4534893234066923, "grad_norm": 0.021895818412303925, "learning_rate": 2.1072279003946643e-06, "loss": 0.0014, "step": 88830 }, { "epoch": 1.4536529493577681, "grad_norm": 0.009866050444543362, "learning_rate": 2.1060633558871046e-06, "loss": 0.0016, "step": 88840 }, { "epoch": 1.453816575308844, "grad_norm": 0.03385284170508385, "learning_rate": 2.10489904739917e-06, "loss": 0.0014, "step": 88850 }, { "epoch": 1.45398020125992, "grad_norm": 0.027375230565667152, "learning_rate": 2.103734975025821e-06, "loss": 0.0013, "step": 88860 }, { "epoch": 1.4541438272109957, "grad_norm": 0.03915165737271309, "learning_rate": 2.1025711388619917e-06, "loss": 0.0009, "step": 88870 }, { "epoch": 1.4543074531620714, "grad_norm": 0.06073424965143204, "learning_rate": 2.101407539002604e-06, "loss": 0.0012, "step": 88880 }, { "epoch": 1.4544710791131474, "grad_norm": 0.08118850737810135, "learning_rate": 2.100244175542554e-06, "loss": 0.002, "step": 88890 }, { "epoch": 1.4546347050642232, "grad_norm": 0.026000458747148514, "learning_rate": 2.099081048576725e-06, "loss": 0.0012, "step": 88900 }, { "epoch": 1.454798331015299, "grad_norm": 0.08672899752855301, "learning_rate": 2.097918158199974e-06, "loss": 0.0012, "step": 88910 }, { "epoch": 1.454961956966375, "grad_norm": 0.053741566836833954, "learning_rate": 2.0967555045071465e-06, "loss": 0.001, "step": 88920 }, { "epoch": 1.4551255829174508, "grad_norm": 0.06015285104513168, "learning_rate": 2.0955930875930598e-06, "loss": 0.0013, "step": 88930 }, { "epoch": 1.4552892088685265, "grad_norm": 0.01598251424729824, "learning_rate": 2.0944309075525215e-06, "loss": 0.0014, "step": 88940 }, { "epoch": 1.4554528348196025, "grad_norm": 0.11105940490961075, "learning_rate": 2.0932689644803105e-06, "loss": 0.0015, "step": 88950 }, { "epoch": 1.4556164607706783, "grad_norm": 0.055214956402778625, "learning_rate": 2.0921072584711952e-06, "loss": 0.001, "step": 88960 }, { "epoch": 1.455780086721754, "grad_norm": 0.05880969017744064, "learning_rate": 2.090945789619917e-06, "loss": 0.0011, "step": 88970 }, { "epoch": 1.4559437126728298, "grad_norm": 0.06411729007959366, "learning_rate": 2.0897845580212046e-06, "loss": 0.0015, "step": 88980 }, { "epoch": 1.4561073386239056, "grad_norm": 0.02467980608344078, "learning_rate": 2.08862356376976e-06, "loss": 0.0015, "step": 88990 }, { "epoch": 1.4562709645749816, "grad_norm": 0.05376598238945007, "learning_rate": 2.0874628069602744e-06, "loss": 0.001, "step": 89000 }, { "epoch": 1.4564345905260574, "grad_norm": 0.021031277254223824, "learning_rate": 2.0863022876874106e-06, "loss": 0.0005, "step": 89010 }, { "epoch": 1.4565982164771332, "grad_norm": 0.1225455030798912, "learning_rate": 2.085142006045821e-06, "loss": 0.001, "step": 89020 }, { "epoch": 1.4567618424282092, "grad_norm": 0.026533327996730804, "learning_rate": 2.0839819621301304e-06, "loss": 0.0006, "step": 89030 }, { "epoch": 1.456925468379285, "grad_norm": 0.0300903283059597, "learning_rate": 2.082822156034952e-06, "loss": 0.0017, "step": 89040 }, { "epoch": 1.4570890943303607, "grad_norm": 0.05557401105761528, "learning_rate": 2.0816625878548712e-06, "loss": 0.0007, "step": 89050 }, { "epoch": 1.4572527202814367, "grad_norm": 0.08851264417171478, "learning_rate": 2.0805032576844624e-06, "loss": 0.0046, "step": 89060 }, { "epoch": 1.4574163462325125, "grad_norm": 0.05466889962553978, "learning_rate": 2.0793441656182732e-06, "loss": 0.0009, "step": 89070 }, { "epoch": 1.4575799721835883, "grad_norm": 0.04374682903289795, "learning_rate": 2.078185311750839e-06, "loss": 0.0013, "step": 89080 }, { "epoch": 1.4577435981346643, "grad_norm": 0.07083718478679657, "learning_rate": 2.077026696176668e-06, "loss": 0.0012, "step": 89090 }, { "epoch": 1.45790722408574, "grad_norm": 0.03942270949482918, "learning_rate": 2.075868318990257e-06, "loss": 0.001, "step": 89100 }, { "epoch": 1.4580708500368158, "grad_norm": 0.23095588386058807, "learning_rate": 2.0747101802860764e-06, "loss": 0.0013, "step": 89110 }, { "epoch": 1.4582344759878918, "grad_norm": 0.08636157959699631, "learning_rate": 2.0735522801585794e-06, "loss": 0.0016, "step": 89120 }, { "epoch": 1.4583981019389676, "grad_norm": 0.009325859136879444, "learning_rate": 2.072394618702203e-06, "loss": 0.0007, "step": 89130 }, { "epoch": 1.4585617278900433, "grad_norm": 0.1484357863664627, "learning_rate": 2.0712371960113604e-06, "loss": 0.0008, "step": 89140 }, { "epoch": 1.4587253538411191, "grad_norm": 0.01525159738957882, "learning_rate": 2.0700800121804483e-06, "loss": 0.0008, "step": 89150 }, { "epoch": 1.4588889797921951, "grad_norm": 0.052751071751117706, "learning_rate": 2.0689230673038406e-06, "loss": 0.0016, "step": 89160 }, { "epoch": 1.459052605743271, "grad_norm": 0.09104198962450027, "learning_rate": 2.0677663614758968e-06, "loss": 0.0022, "step": 89170 }, { "epoch": 1.4592162316943467, "grad_norm": 0.03054741583764553, "learning_rate": 2.0666098947909504e-06, "loss": 0.001, "step": 89180 }, { "epoch": 1.4593798576454224, "grad_norm": 0.10792868584394455, "learning_rate": 2.065453667343323e-06, "loss": 0.0018, "step": 89190 }, { "epoch": 1.4595434835964984, "grad_norm": 0.06750574707984924, "learning_rate": 2.0642976792273077e-06, "loss": 0.0008, "step": 89200 }, { "epoch": 1.4597071095475742, "grad_norm": 0.05620532110333443, "learning_rate": 2.063141930537188e-06, "loss": 0.0009, "step": 89210 }, { "epoch": 1.45987073549865, "grad_norm": 0.01012419443577528, "learning_rate": 2.061986421367218e-06, "loss": 0.0017, "step": 89220 }, { "epoch": 1.460034361449726, "grad_norm": 0.043559737503528595, "learning_rate": 2.060831151811642e-06, "loss": 0.0007, "step": 89230 }, { "epoch": 1.4601979874008018, "grad_norm": 0.03996361792087555, "learning_rate": 2.059676121964675e-06, "loss": 0.0005, "step": 89240 }, { "epoch": 1.4603616133518775, "grad_norm": 0.07819166034460068, "learning_rate": 2.0585213319205218e-06, "loss": 0.0014, "step": 89250 }, { "epoch": 1.4605252393029535, "grad_norm": 0.007949229329824448, "learning_rate": 2.057366781773359e-06, "loss": 0.0011, "step": 89260 }, { "epoch": 1.4606888652540293, "grad_norm": 0.028248926624655724, "learning_rate": 2.0562124716173514e-06, "loss": 0.0015, "step": 89270 }, { "epoch": 1.460852491205105, "grad_norm": 0.16604283452033997, "learning_rate": 2.0550584015466376e-06, "loss": 0.001, "step": 89280 }, { "epoch": 1.461016117156181, "grad_norm": 0.04688572883605957, "learning_rate": 2.0539045716553423e-06, "loss": 0.0017, "step": 89290 }, { "epoch": 1.4611797431072568, "grad_norm": 0.039852436631917953, "learning_rate": 2.052750982037565e-06, "loss": 0.0009, "step": 89300 }, { "epoch": 1.4613433690583326, "grad_norm": 0.037024062126874924, "learning_rate": 2.051597632787393e-06, "loss": 0.0015, "step": 89310 }, { "epoch": 1.4615069950094086, "grad_norm": 0.041955363005399704, "learning_rate": 2.0504445239988836e-06, "loss": 0.001, "step": 89320 }, { "epoch": 1.4616706209604844, "grad_norm": 0.06854652613401413, "learning_rate": 2.0492916557660857e-06, "loss": 0.0005, "step": 89330 }, { "epoch": 1.4618342469115602, "grad_norm": 0.009871524758636951, "learning_rate": 2.0481390281830193e-06, "loss": 0.0007, "step": 89340 }, { "epoch": 1.461997872862636, "grad_norm": 0.07811180502176285, "learning_rate": 2.0469866413436927e-06, "loss": 0.0015, "step": 89350 }, { "epoch": 1.462161498813712, "grad_norm": 0.02097170054912567, "learning_rate": 2.0458344953420872e-06, "loss": 0.0015, "step": 89360 }, { "epoch": 1.4623251247647877, "grad_norm": 0.017110660672187805, "learning_rate": 2.0446825902721706e-06, "loss": 0.0006, "step": 89370 }, { "epoch": 1.4624887507158635, "grad_norm": 0.04992340877652168, "learning_rate": 2.043530926227886e-06, "loss": 0.0008, "step": 89380 }, { "epoch": 1.4626523766669393, "grad_norm": 0.03482833504676819, "learning_rate": 2.0423795033031607e-06, "loss": 0.0022, "step": 89390 }, { "epoch": 1.4628160026180153, "grad_norm": 0.07968378812074661, "learning_rate": 2.0412283215919e-06, "loss": 0.0008, "step": 89400 }, { "epoch": 1.462979628569091, "grad_norm": 0.15413126349449158, "learning_rate": 2.0400773811879915e-06, "loss": 0.0013, "step": 89410 }, { "epoch": 1.4631432545201668, "grad_norm": 0.07705624401569366, "learning_rate": 2.0389266821853e-06, "loss": 0.0011, "step": 89420 }, { "epoch": 1.4633068804712428, "grad_norm": 0.06214204430580139, "learning_rate": 2.0377762246776754e-06, "loss": 0.0014, "step": 89430 }, { "epoch": 1.4634705064223186, "grad_norm": 0.0140265803784132, "learning_rate": 2.036626008758944e-06, "loss": 0.0014, "step": 89440 }, { "epoch": 1.4636341323733943, "grad_norm": 0.0016831799875944853, "learning_rate": 2.0354760345229115e-06, "loss": 0.0016, "step": 89450 }, { "epoch": 1.4637977583244703, "grad_norm": 0.1347316950559616, "learning_rate": 2.0343263020633687e-06, "loss": 0.0011, "step": 89460 }, { "epoch": 1.4639613842755461, "grad_norm": 0.1418701410293579, "learning_rate": 2.0331768114740807e-06, "loss": 0.0018, "step": 89470 }, { "epoch": 1.464125010226622, "grad_norm": 0.005771338008344173, "learning_rate": 2.0320275628487994e-06, "loss": 0.0018, "step": 89480 }, { "epoch": 1.4642886361776979, "grad_norm": 0.08620180189609528, "learning_rate": 2.030878556281251e-06, "loss": 0.0014, "step": 89490 }, { "epoch": 1.4644522621287737, "grad_norm": 0.003427609335631132, "learning_rate": 2.0297297918651476e-06, "loss": 0.0005, "step": 89500 }, { "epoch": 1.4646158880798494, "grad_norm": 0.01985606551170349, "learning_rate": 2.028581269694175e-06, "loss": 0.0007, "step": 89510 }, { "epoch": 1.4647795140309254, "grad_norm": 0.04124069586396217, "learning_rate": 2.0274329898620055e-06, "loss": 0.0007, "step": 89520 }, { "epoch": 1.4649431399820012, "grad_norm": 0.029155099764466286, "learning_rate": 2.0262849524622865e-06, "loss": 0.0007, "step": 89530 }, { "epoch": 1.465106765933077, "grad_norm": 0.09577871114015579, "learning_rate": 2.025137157588651e-06, "loss": 0.0008, "step": 89540 }, { "epoch": 1.4652703918841528, "grad_norm": 0.04181332141160965, "learning_rate": 2.023989605334706e-06, "loss": 0.0006, "step": 89550 }, { "epoch": 1.4654340178352285, "grad_norm": 0.07948426157236099, "learning_rate": 2.022842295794045e-06, "loss": 0.0012, "step": 89560 }, { "epoch": 1.4655976437863045, "grad_norm": 0.04818016663193703, "learning_rate": 2.0216952290602355e-06, "loss": 0.0006, "step": 89570 }, { "epoch": 1.4657612697373803, "grad_norm": 0.02989812009036541, "learning_rate": 2.0205484052268313e-06, "loss": 0.0006, "step": 89580 }, { "epoch": 1.465924895688456, "grad_norm": 0.025143815204501152, "learning_rate": 2.0194018243873612e-06, "loss": 0.0008, "step": 89590 }, { "epoch": 1.466088521639532, "grad_norm": 0.014957061968743801, "learning_rate": 2.0182554866353394e-06, "loss": 0.0006, "step": 89600 }, { "epoch": 1.4662521475906078, "grad_norm": 0.03021889552474022, "learning_rate": 2.0171093920642524e-06, "loss": 0.0012, "step": 89610 }, { "epoch": 1.4664157735416836, "grad_norm": 0.03508780524134636, "learning_rate": 2.0159635407675776e-06, "loss": 0.0008, "step": 89620 }, { "epoch": 1.4665793994927596, "grad_norm": 0.12080138921737671, "learning_rate": 2.0148179328387617e-06, "loss": 0.0012, "step": 89630 }, { "epoch": 1.4667430254438354, "grad_norm": 0.1634090095758438, "learning_rate": 2.0136725683712405e-06, "loss": 0.0008, "step": 89640 }, { "epoch": 1.4669066513949112, "grad_norm": 0.005211548879742622, "learning_rate": 2.012527447458422e-06, "loss": 0.0005, "step": 89650 }, { "epoch": 1.4670702773459872, "grad_norm": 0.08439070731401443, "learning_rate": 2.0113825701937033e-06, "loss": 0.0012, "step": 89660 }, { "epoch": 1.467233903297063, "grad_norm": 0.025132741779088974, "learning_rate": 2.0102379366704518e-06, "loss": 0.0011, "step": 89670 }, { "epoch": 1.4673975292481387, "grad_norm": 0.030115440487861633, "learning_rate": 2.009093546982024e-06, "loss": 0.0019, "step": 89680 }, { "epoch": 1.4675611551992147, "grad_norm": 0.02979099191725254, "learning_rate": 2.007949401221749e-06, "loss": 0.0008, "step": 89690 }, { "epoch": 1.4677247811502905, "grad_norm": 0.0384887233376503, "learning_rate": 2.0068054994829423e-06, "loss": 0.0012, "step": 89700 }, { "epoch": 1.4678884071013663, "grad_norm": 0.05716992914676666, "learning_rate": 2.0056618418588936e-06, "loss": 0.0015, "step": 89710 }, { "epoch": 1.4680520330524423, "grad_norm": 0.046387363225221634, "learning_rate": 2.004518428442879e-06, "loss": 0.001, "step": 89720 }, { "epoch": 1.468215659003518, "grad_norm": 0.026477502658963203, "learning_rate": 2.0033752593281485e-06, "loss": 0.0008, "step": 89730 }, { "epoch": 1.4683792849545938, "grad_norm": 0.0986756682395935, "learning_rate": 2.0022323346079387e-06, "loss": 0.0012, "step": 89740 }, { "epoch": 1.4685429109056696, "grad_norm": 0.09719856828451157, "learning_rate": 2.00108965437546e-06, "loss": 0.0012, "step": 89750 }, { "epoch": 1.4687065368567453, "grad_norm": 0.032134849578142166, "learning_rate": 1.9999472187239037e-06, "loss": 0.001, "step": 89760 }, { "epoch": 1.4688701628078213, "grad_norm": 0.06840632855892181, "learning_rate": 1.9988050277464473e-06, "loss": 0.0011, "step": 89770 }, { "epoch": 1.4690337887588971, "grad_norm": 0.05544612184166908, "learning_rate": 1.9976630815362402e-06, "loss": 0.0019, "step": 89780 }, { "epoch": 1.469197414709973, "grad_norm": 0.053231481462717056, "learning_rate": 1.996521380186419e-06, "loss": 0.0014, "step": 89790 }, { "epoch": 1.469361040661049, "grad_norm": 0.15656037628650665, "learning_rate": 1.9953799237900934e-06, "loss": 0.0049, "step": 89800 }, { "epoch": 1.4695246666121247, "grad_norm": 0.11974067240953445, "learning_rate": 1.9942387124403607e-06, "loss": 0.0009, "step": 89810 }, { "epoch": 1.4696882925632004, "grad_norm": 0.08129055052995682, "learning_rate": 1.99309774623029e-06, "loss": 0.0009, "step": 89820 }, { "epoch": 1.4698519185142764, "grad_norm": 0.03509863466024399, "learning_rate": 1.9919570252529393e-06, "loss": 0.0006, "step": 89830 }, { "epoch": 1.4700155444653522, "grad_norm": 0.11475770175457001, "learning_rate": 1.9908165496013365e-06, "loss": 0.0008, "step": 89840 }, { "epoch": 1.470179170416428, "grad_norm": 0.03813806548714638, "learning_rate": 1.9896763193685004e-06, "loss": 0.0006, "step": 89850 }, { "epoch": 1.470342796367504, "grad_norm": 0.026975499466061592, "learning_rate": 1.9885363346474194e-06, "loss": 0.0018, "step": 89860 }, { "epoch": 1.4705064223185798, "grad_norm": 0.014688033610582352, "learning_rate": 1.987396595531071e-06, "loss": 0.001, "step": 89870 }, { "epoch": 1.4706700482696555, "grad_norm": 0.14553506672382355, "learning_rate": 1.986257102112405e-06, "loss": 0.0015, "step": 89880 }, { "epoch": 1.4708336742207315, "grad_norm": 0.0654304027557373, "learning_rate": 1.9851178544843576e-06, "loss": 0.0011, "step": 89890 }, { "epoch": 1.4709973001718073, "grad_norm": 0.002384435385465622, "learning_rate": 1.983978852739839e-06, "loss": 0.0013, "step": 89900 }, { "epoch": 1.471160926122883, "grad_norm": 0.01792038604617119, "learning_rate": 1.9828400969717446e-06, "loss": 0.0009, "step": 89910 }, { "epoch": 1.4713245520739588, "grad_norm": 0.08039540797472, "learning_rate": 1.981701587272946e-06, "loss": 0.001, "step": 89920 }, { "epoch": 1.4714881780250348, "grad_norm": 0.015828078612685204, "learning_rate": 1.980563323736298e-06, "loss": 0.0006, "step": 89930 }, { "epoch": 1.4716518039761106, "grad_norm": 0.02373330667614937, "learning_rate": 1.979425306454631e-06, "loss": 0.0022, "step": 89940 }, { "epoch": 1.4718154299271864, "grad_norm": 0.039735570549964905, "learning_rate": 1.9782875355207605e-06, "loss": 0.0016, "step": 89950 }, { "epoch": 1.4719790558782622, "grad_norm": 0.07227891683578491, "learning_rate": 1.977150011027476e-06, "loss": 0.0005, "step": 89960 }, { "epoch": 1.4721426818293382, "grad_norm": 0.04523607715964317, "learning_rate": 1.976012733067554e-06, "loss": 0.0014, "step": 89970 }, { "epoch": 1.472306307780414, "grad_norm": 0.04044328257441521, "learning_rate": 1.9748757017337433e-06, "loss": 0.0008, "step": 89980 }, { "epoch": 1.4724699337314897, "grad_norm": 0.020450161769986153, "learning_rate": 1.9737389171187793e-06, "loss": 0.0009, "step": 89990 }, { "epoch": 1.4726335596825657, "grad_norm": 0.05939488485455513, "learning_rate": 1.9726023793153714e-06, "loss": 0.0007, "step": 90000 }, { "epoch": 1.4727971856336415, "grad_norm": 0.030129818245768547, "learning_rate": 1.971466088416216e-06, "loss": 0.0018, "step": 90010 }, { "epoch": 1.4729608115847173, "grad_norm": 0.07449831813573837, "learning_rate": 1.9703300445139793e-06, "loss": 0.0013, "step": 90020 }, { "epoch": 1.4731244375357933, "grad_norm": 0.03683117404580116, "learning_rate": 1.969194247701319e-06, "loss": 0.0013, "step": 90030 }, { "epoch": 1.473288063486869, "grad_norm": 0.03523913770914078, "learning_rate": 1.968058698070862e-06, "loss": 0.0007, "step": 90040 }, { "epoch": 1.4734516894379448, "grad_norm": 0.0219389908015728, "learning_rate": 1.966923395715224e-06, "loss": 0.0008, "step": 90050 }, { "epoch": 1.4736153153890208, "grad_norm": 0.05452648550271988, "learning_rate": 1.965788340726993e-06, "loss": 0.0013, "step": 90060 }, { "epoch": 1.4737789413400966, "grad_norm": 0.08179844170808792, "learning_rate": 1.9646535331987426e-06, "loss": 0.0028, "step": 90070 }, { "epoch": 1.4739425672911723, "grad_norm": 0.053484898060560226, "learning_rate": 1.963518973223024e-06, "loss": 0.0008, "step": 90080 }, { "epoch": 1.4741061932422483, "grad_norm": 0.04054323211312294, "learning_rate": 1.962384660892364e-06, "loss": 0.0015, "step": 90090 }, { "epoch": 1.4742698191933241, "grad_norm": 0.035754457116127014, "learning_rate": 1.9612505962992785e-06, "loss": 0.002, "step": 90100 }, { "epoch": 1.4744334451444, "grad_norm": 0.021980365738272667, "learning_rate": 1.9601167795362537e-06, "loss": 0.0006, "step": 90110 }, { "epoch": 1.4745970710954757, "grad_norm": 0.05765829607844353, "learning_rate": 1.9589832106957635e-06, "loss": 0.0013, "step": 90120 }, { "epoch": 1.4747606970465517, "grad_norm": 0.025607964023947716, "learning_rate": 1.9578498898702545e-06, "loss": 0.0008, "step": 90130 }, { "epoch": 1.4749243229976274, "grad_norm": 0.02451637014746666, "learning_rate": 1.9567168171521607e-06, "loss": 0.0015, "step": 90140 }, { "epoch": 1.4750879489487032, "grad_norm": 0.07796599715948105, "learning_rate": 1.9555839926338863e-06, "loss": 0.0013, "step": 90150 }, { "epoch": 1.475251574899779, "grad_norm": 0.14252610504627228, "learning_rate": 1.954451416407826e-06, "loss": 0.0009, "step": 90160 }, { "epoch": 1.475415200850855, "grad_norm": 0.016709337010979652, "learning_rate": 1.953319088566344e-06, "loss": 0.0008, "step": 90170 }, { "epoch": 1.4755788268019308, "grad_norm": 0.0331064909696579, "learning_rate": 1.9521870092017937e-06, "loss": 0.0012, "step": 90180 }, { "epoch": 1.4757424527530065, "grad_norm": 0.0621693953871727, "learning_rate": 1.9510551784064992e-06, "loss": 0.0007, "step": 90190 }, { "epoch": 1.4759060787040825, "grad_norm": 0.03573783114552498, "learning_rate": 1.949923596272773e-06, "loss": 0.0009, "step": 90200 }, { "epoch": 1.4760697046551583, "grad_norm": 0.052000194787979126, "learning_rate": 1.948792262892899e-06, "loss": 0.001, "step": 90210 }, { "epoch": 1.476233330606234, "grad_norm": 0.04357015714049339, "learning_rate": 1.9476611783591487e-06, "loss": 0.0013, "step": 90220 }, { "epoch": 1.47639695655731, "grad_norm": 0.08963147550821304, "learning_rate": 1.946530342763766e-06, "loss": 0.0012, "step": 90230 }, { "epoch": 1.4765605825083858, "grad_norm": 0.10553299635648727, "learning_rate": 1.9453997561989816e-06, "loss": 0.0016, "step": 90240 }, { "epoch": 1.4767242084594616, "grad_norm": 0.10988305509090424, "learning_rate": 1.944269418756999e-06, "loss": 0.0015, "step": 90250 }, { "epoch": 1.4768878344105376, "grad_norm": 0.06891978532075882, "learning_rate": 1.943139330530008e-06, "loss": 0.0015, "step": 90260 }, { "epoch": 1.4770514603616134, "grad_norm": 0.019649969413876534, "learning_rate": 1.94200949161017e-06, "loss": 0.0021, "step": 90270 }, { "epoch": 1.4772150863126892, "grad_norm": 0.016747767105698586, "learning_rate": 1.9408799020896365e-06, "loss": 0.0021, "step": 90280 }, { "epoch": 1.4773787122637652, "grad_norm": 0.02739819325506687, "learning_rate": 1.9397505620605278e-06, "loss": 0.0011, "step": 90290 }, { "epoch": 1.477542338214841, "grad_norm": 0.07685421407222748, "learning_rate": 1.9386214716149536e-06, "loss": 0.0009, "step": 90300 }, { "epoch": 1.4777059641659167, "grad_norm": 0.0713479295372963, "learning_rate": 1.9374926308449944e-06, "loss": 0.001, "step": 90310 }, { "epoch": 1.4778695901169925, "grad_norm": 0.008855665102601051, "learning_rate": 1.9363640398427185e-06, "loss": 0.0007, "step": 90320 }, { "epoch": 1.4780332160680685, "grad_norm": 0.05054214596748352, "learning_rate": 1.9352356987001666e-06, "loss": 0.0011, "step": 90330 }, { "epoch": 1.4781968420191443, "grad_norm": 0.05083496496081352, "learning_rate": 1.934107607509365e-06, "loss": 0.0016, "step": 90340 }, { "epoch": 1.47836046797022, "grad_norm": 0.046345412731170654, "learning_rate": 1.9329797663623146e-06, "loss": 0.0011, "step": 90350 }, { "epoch": 1.4785240939212958, "grad_norm": 0.04118720814585686, "learning_rate": 1.931852175351001e-06, "loss": 0.0005, "step": 90360 }, { "epoch": 1.4786877198723718, "grad_norm": 0.015313656069338322, "learning_rate": 1.930724834567384e-06, "loss": 0.0032, "step": 90370 }, { "epoch": 1.4788513458234476, "grad_norm": 0.061858244240283966, "learning_rate": 1.9295977441034087e-06, "loss": 0.0008, "step": 90380 }, { "epoch": 1.4790149717745233, "grad_norm": 0.005056298803538084, "learning_rate": 1.928470904050994e-06, "loss": 0.0004, "step": 90390 }, { "epoch": 1.4791785977255993, "grad_norm": 0.006792113184928894, "learning_rate": 1.9273443145020417e-06, "loss": 0.0014, "step": 90400 }, { "epoch": 1.4793422236766751, "grad_norm": 0.0516655333340168, "learning_rate": 1.926217975548434e-06, "loss": 0.001, "step": 90410 }, { "epoch": 1.479505849627751, "grad_norm": 0.09765911847352982, "learning_rate": 1.9250918872820284e-06, "loss": 0.0018, "step": 90420 }, { "epoch": 1.4796694755788269, "grad_norm": 0.06497883796691895, "learning_rate": 1.9239660497946686e-06, "loss": 0.0016, "step": 90430 }, { "epoch": 1.4798331015299027, "grad_norm": 0.04156436026096344, "learning_rate": 1.9228404631781712e-06, "loss": 0.0008, "step": 90440 }, { "epoch": 1.4799967274809784, "grad_norm": 0.02495254948735237, "learning_rate": 1.9217151275243368e-06, "loss": 0.0016, "step": 90450 }, { "epoch": 1.4801603534320544, "grad_norm": 0.055173467844724655, "learning_rate": 1.9205900429249426e-06, "loss": 0.001, "step": 90460 }, { "epoch": 1.4803239793831302, "grad_norm": 0.0012991520343348384, "learning_rate": 1.9194652094717485e-06, "loss": 0.0007, "step": 90470 }, { "epoch": 1.480487605334206, "grad_norm": 0.03873857110738754, "learning_rate": 1.9183406272564896e-06, "loss": 0.0013, "step": 90480 }, { "epoch": 1.480651231285282, "grad_norm": 0.0435195155441761, "learning_rate": 1.9172162963708864e-06, "loss": 0.0024, "step": 90490 }, { "epoch": 1.4808148572363578, "grad_norm": 0.0818653479218483, "learning_rate": 1.9160922169066313e-06, "loss": 0.0012, "step": 90500 }, { "epoch": 1.4809784831874335, "grad_norm": 0.07184914499521255, "learning_rate": 1.914968388955405e-06, "loss": 0.0005, "step": 90510 }, { "epoch": 1.4811421091385093, "grad_norm": 0.008615637198090553, "learning_rate": 1.9138448126088584e-06, "loss": 0.0004, "step": 90520 }, { "epoch": 1.481305735089585, "grad_norm": 0.02594771608710289, "learning_rate": 1.912721487958631e-06, "loss": 0.0011, "step": 90530 }, { "epoch": 1.481469361040661, "grad_norm": 0.03695163503289223, "learning_rate": 1.911598415096333e-06, "loss": 0.0009, "step": 90540 }, { "epoch": 1.4816329869917368, "grad_norm": 0.13411551713943481, "learning_rate": 1.910475594113563e-06, "loss": 0.0017, "step": 90550 }, { "epoch": 1.4817966129428126, "grad_norm": 0.037620969116687775, "learning_rate": 1.9093530251018892e-06, "loss": 0.0005, "step": 90560 }, { "epoch": 1.4819602388938886, "grad_norm": 0.06277339905500412, "learning_rate": 1.90823070815287e-06, "loss": 0.0013, "step": 90570 }, { "epoch": 1.4821238648449644, "grad_norm": 0.04689271003007889, "learning_rate": 1.9071086433580332e-06, "loss": 0.0023, "step": 90580 }, { "epoch": 1.4822874907960402, "grad_norm": 0.04348761960864067, "learning_rate": 1.9059868308088948e-06, "loss": 0.0008, "step": 90590 }, { "epoch": 1.4824511167471162, "grad_norm": 0.13316167891025543, "learning_rate": 1.9048652705969412e-06, "loss": 0.0012, "step": 90600 }, { "epoch": 1.482614742698192, "grad_norm": 0.047682132571935654, "learning_rate": 1.9037439628136479e-06, "loss": 0.0007, "step": 90610 }, { "epoch": 1.4827783686492677, "grad_norm": 0.03104541078209877, "learning_rate": 1.9026229075504604e-06, "loss": 0.0011, "step": 90620 }, { "epoch": 1.4829419946003437, "grad_norm": 0.047130435705184937, "learning_rate": 1.9015021048988124e-06, "loss": 0.0015, "step": 90630 }, { "epoch": 1.4831056205514195, "grad_norm": 0.036360953003168106, "learning_rate": 1.9003815549501093e-06, "loss": 0.0019, "step": 90640 }, { "epoch": 1.4832692465024953, "grad_norm": 0.04357534646987915, "learning_rate": 1.899261257795742e-06, "loss": 0.0008, "step": 90650 }, { "epoch": 1.4834328724535712, "grad_norm": 0.05648239701986313, "learning_rate": 1.898141213527075e-06, "loss": 0.0011, "step": 90660 }, { "epoch": 1.483596498404647, "grad_norm": 0.023698559030890465, "learning_rate": 1.8970214222354593e-06, "loss": 0.0005, "step": 90670 }, { "epoch": 1.4837601243557228, "grad_norm": 0.03344593197107315, "learning_rate": 1.8959018840122174e-06, "loss": 0.0009, "step": 90680 }, { "epoch": 1.4839237503067988, "grad_norm": 0.0575055293738842, "learning_rate": 1.8947825989486585e-06, "loss": 0.0008, "step": 90690 }, { "epoch": 1.4840873762578746, "grad_norm": 0.015905845910310745, "learning_rate": 1.8936635671360642e-06, "loss": 0.0005, "step": 90700 }, { "epoch": 1.4842510022089503, "grad_norm": 0.03286319226026535, "learning_rate": 1.8925447886657022e-06, "loss": 0.0022, "step": 90710 }, { "epoch": 1.4844146281600261, "grad_norm": 0.03279919549822807, "learning_rate": 1.891426263628815e-06, "loss": 0.0007, "step": 90720 }, { "epoch": 1.484578254111102, "grad_norm": 0.04007800295948982, "learning_rate": 1.890307992116624e-06, "loss": 0.0009, "step": 90730 }, { "epoch": 1.4847418800621779, "grad_norm": 0.0338972844183445, "learning_rate": 1.8891899742203346e-06, "loss": 0.0011, "step": 90740 }, { "epoch": 1.4849055060132537, "grad_norm": 0.006547426339238882, "learning_rate": 1.888072210031125e-06, "loss": 0.0007, "step": 90750 }, { "epoch": 1.4850691319643294, "grad_norm": 0.04488673061132431, "learning_rate": 1.88695469964016e-06, "loss": 0.0012, "step": 90760 }, { "epoch": 1.4852327579154054, "grad_norm": 0.013546115718781948, "learning_rate": 1.8858374431385767e-06, "loss": 0.0012, "step": 90770 }, { "epoch": 1.4853963838664812, "grad_norm": 0.06485360860824585, "learning_rate": 1.884720440617498e-06, "loss": 0.0011, "step": 90780 }, { "epoch": 1.485560009817557, "grad_norm": 0.009377405047416687, "learning_rate": 1.883603692168019e-06, "loss": 0.0021, "step": 90790 }, { "epoch": 1.485723635768633, "grad_norm": 0.03613147512078285, "learning_rate": 1.882487197881222e-06, "loss": 0.0023, "step": 90800 }, { "epoch": 1.4858872617197088, "grad_norm": 0.09895659238100052, "learning_rate": 1.8813709578481609e-06, "loss": 0.0015, "step": 90810 }, { "epoch": 1.4860508876707845, "grad_norm": 0.017641883343458176, "learning_rate": 1.8802549721598757e-06, "loss": 0.0014, "step": 90820 }, { "epoch": 1.4862145136218605, "grad_norm": 0.0377693772315979, "learning_rate": 1.8791392409073783e-06, "loss": 0.0005, "step": 90830 }, { "epoch": 1.4863781395729363, "grad_norm": 0.014671116136014462, "learning_rate": 1.878023764181669e-06, "loss": 0.0015, "step": 90840 }, { "epoch": 1.486541765524012, "grad_norm": 0.029847491532564163, "learning_rate": 1.8769085420737171e-06, "loss": 0.0006, "step": 90850 }, { "epoch": 1.486705391475088, "grad_norm": 0.08111515641212463, "learning_rate": 1.875793574674481e-06, "loss": 0.0012, "step": 90860 }, { "epoch": 1.4868690174261638, "grad_norm": 0.020809266716241837, "learning_rate": 1.8746788620748896e-06, "loss": 0.0007, "step": 90870 }, { "epoch": 1.4870326433772396, "grad_norm": 0.01035325601696968, "learning_rate": 1.8735644043658585e-06, "loss": 0.0007, "step": 90880 }, { "epoch": 1.4871962693283154, "grad_norm": 0.06289991736412048, "learning_rate": 1.8724502016382761e-06, "loss": 0.0012, "step": 90890 }, { "epoch": 1.4873598952793914, "grad_norm": 0.025515517219901085, "learning_rate": 1.871336253983016e-06, "loss": 0.0007, "step": 90900 }, { "epoch": 1.4875235212304672, "grad_norm": 0.04921700060367584, "learning_rate": 1.8702225614909247e-06, "loss": 0.0008, "step": 90910 }, { "epoch": 1.487687147181543, "grad_norm": 0.10310881584882736, "learning_rate": 1.8691091242528341e-06, "loss": 0.0018, "step": 90920 }, { "epoch": 1.4878507731326187, "grad_norm": 0.02996991015970707, "learning_rate": 1.86799594235955e-06, "loss": 0.001, "step": 90930 }, { "epoch": 1.4880143990836947, "grad_norm": 0.15167322754859924, "learning_rate": 1.8668830159018624e-06, "loss": 0.0008, "step": 90940 }, { "epoch": 1.4881780250347705, "grad_norm": 0.08759970217943192, "learning_rate": 1.865770344970535e-06, "loss": 0.0009, "step": 90950 }, { "epoch": 1.4883416509858463, "grad_norm": 0.08242107182741165, "learning_rate": 1.8646579296563155e-06, "loss": 0.0009, "step": 90960 }, { "epoch": 1.4885052769369223, "grad_norm": 0.01976623386144638, "learning_rate": 1.8635457700499271e-06, "loss": 0.001, "step": 90970 }, { "epoch": 1.488668902887998, "grad_norm": 0.0412474200129509, "learning_rate": 1.8624338662420754e-06, "loss": 0.0005, "step": 90980 }, { "epoch": 1.4888325288390738, "grad_norm": 0.022464029490947723, "learning_rate": 1.8613222183234414e-06, "loss": 0.0008, "step": 90990 }, { "epoch": 1.4889961547901498, "grad_norm": 0.07071196287870407, "learning_rate": 1.8602108263846903e-06, "loss": 0.0007, "step": 91000 }, { "epoch": 1.4891597807412256, "grad_norm": 0.06409469246864319, "learning_rate": 1.85909969051646e-06, "loss": 0.0006, "step": 91010 }, { "epoch": 1.4893234066923013, "grad_norm": 0.08950179815292358, "learning_rate": 1.8579888108093742e-06, "loss": 0.0012, "step": 91020 }, { "epoch": 1.4894870326433773, "grad_norm": 0.00314501510001719, "learning_rate": 1.8568781873540292e-06, "loss": 0.001, "step": 91030 }, { "epoch": 1.4896506585944531, "grad_norm": 0.12697337567806244, "learning_rate": 1.8557678202410074e-06, "loss": 0.0014, "step": 91040 }, { "epoch": 1.4898142845455289, "grad_norm": 0.12479368597269058, "learning_rate": 1.8546577095608648e-06, "loss": 0.0007, "step": 91050 }, { "epoch": 1.4899779104966049, "grad_norm": 0.013313405215740204, "learning_rate": 1.853547855404136e-06, "loss": 0.001, "step": 91060 }, { "epoch": 1.4901415364476807, "grad_norm": 0.06134331598877907, "learning_rate": 1.8524382578613404e-06, "loss": 0.0014, "step": 91070 }, { "epoch": 1.4903051623987564, "grad_norm": 0.037143003195524216, "learning_rate": 1.8513289170229704e-06, "loss": 0.0007, "step": 91080 }, { "epoch": 1.4904687883498322, "grad_norm": 0.09902860969305038, "learning_rate": 1.8502198329795024e-06, "loss": 0.0012, "step": 91090 }, { "epoch": 1.4906324143009082, "grad_norm": 0.07716687768697739, "learning_rate": 1.8491110058213867e-06, "loss": 0.0004, "step": 91100 }, { "epoch": 1.490796040251984, "grad_norm": 0.014123808592557907, "learning_rate": 1.8480024356390592e-06, "loss": 0.0011, "step": 91110 }, { "epoch": 1.4909596662030598, "grad_norm": 0.0366465225815773, "learning_rate": 1.846894122522927e-06, "loss": 0.0006, "step": 91120 }, { "epoch": 1.4911232921541355, "grad_norm": 0.049732182174921036, "learning_rate": 1.8457860665633843e-06, "loss": 0.0011, "step": 91130 }, { "epoch": 1.4912869181052115, "grad_norm": 0.05516284331679344, "learning_rate": 1.8446782678507962e-06, "loss": 0.0018, "step": 91140 }, { "epoch": 1.4914505440562873, "grad_norm": 0.052313219755887985, "learning_rate": 1.8435707264755153e-06, "loss": 0.0026, "step": 91150 }, { "epoch": 1.491614170007363, "grad_norm": 0.08951608836650848, "learning_rate": 1.8424634425278653e-06, "loss": 0.0015, "step": 91160 }, { "epoch": 1.491777795958439, "grad_norm": 0.30258411169052124, "learning_rate": 1.8413564160981562e-06, "loss": 0.0022, "step": 91170 }, { "epoch": 1.4919414219095148, "grad_norm": 0.03965074196457863, "learning_rate": 1.8402496472766685e-06, "loss": 0.0012, "step": 91180 }, { "epoch": 1.4921050478605906, "grad_norm": 0.08147792518138885, "learning_rate": 1.8391431361536716e-06, "loss": 0.001, "step": 91190 }, { "epoch": 1.4922686738116666, "grad_norm": 0.038175102323293686, "learning_rate": 1.8380368828194044e-06, "loss": 0.001, "step": 91200 }, { "epoch": 1.4924322997627424, "grad_norm": 0.002345207380130887, "learning_rate": 1.836930887364093e-06, "loss": 0.001, "step": 91210 }, { "epoch": 1.4925959257138182, "grad_norm": 0.046156398952007294, "learning_rate": 1.8358251498779345e-06, "loss": 0.001, "step": 91220 }, { "epoch": 1.4927595516648942, "grad_norm": 0.06583865731954575, "learning_rate": 1.8347196704511134e-06, "loss": 0.0014, "step": 91230 }, { "epoch": 1.49292317761597, "grad_norm": 0.027767017483711243, "learning_rate": 1.833614449173785e-06, "loss": 0.0012, "step": 91240 }, { "epoch": 1.4930868035670457, "grad_norm": 0.0442766398191452, "learning_rate": 1.832509486136091e-06, "loss": 0.0007, "step": 91250 }, { "epoch": 1.4932504295181217, "grad_norm": 0.06051894649863243, "learning_rate": 1.8314047814281443e-06, "loss": 0.001, "step": 91260 }, { "epoch": 1.4934140554691975, "grad_norm": 0.07792415469884872, "learning_rate": 1.8303003351400456e-06, "loss": 0.0016, "step": 91270 }, { "epoch": 1.4935776814202733, "grad_norm": 0.02484029345214367, "learning_rate": 1.8291961473618653e-06, "loss": 0.0014, "step": 91280 }, { "epoch": 1.493741307371349, "grad_norm": 0.025423802435398102, "learning_rate": 1.8280922181836607e-06, "loss": 0.0011, "step": 91290 }, { "epoch": 1.4939049333224248, "grad_norm": 0.07797304540872574, "learning_rate": 1.8269885476954614e-06, "loss": 0.001, "step": 91300 }, { "epoch": 1.4940685592735008, "grad_norm": 0.023377392441034317, "learning_rate": 1.8258851359872825e-06, "loss": 0.0011, "step": 91310 }, { "epoch": 1.4942321852245766, "grad_norm": 0.02874838560819626, "learning_rate": 1.8247819831491109e-06, "loss": 0.0005, "step": 91320 }, { "epoch": 1.4943958111756523, "grad_norm": 0.06698146462440491, "learning_rate": 1.8236790892709193e-06, "loss": 0.0011, "step": 91330 }, { "epoch": 1.4945594371267283, "grad_norm": 0.09435980767011642, "learning_rate": 1.8225764544426533e-06, "loss": 0.0016, "step": 91340 }, { "epoch": 1.4947230630778041, "grad_norm": 0.06800531595945358, "learning_rate": 1.8214740787542422e-06, "loss": 0.0006, "step": 91350 }, { "epoch": 1.49488668902888, "grad_norm": 0.058543041348457336, "learning_rate": 1.8203719622955911e-06, "loss": 0.0007, "step": 91360 }, { "epoch": 1.4950503149799559, "grad_norm": 0.125640869140625, "learning_rate": 1.8192701051565831e-06, "loss": 0.0006, "step": 91370 }, { "epoch": 1.4952139409310317, "grad_norm": 0.11544572561979294, "learning_rate": 1.818168507427085e-06, "loss": 0.0011, "step": 91380 }, { "epoch": 1.4953775668821074, "grad_norm": 0.03796697407960892, "learning_rate": 1.8170671691969366e-06, "loss": 0.0009, "step": 91390 }, { "epoch": 1.4955411928331834, "grad_norm": 0.01932319439947605, "learning_rate": 1.8159660905559622e-06, "loss": 0.0013, "step": 91400 }, { "epoch": 1.4957048187842592, "grad_norm": 0.050985969603061676, "learning_rate": 1.8148652715939585e-06, "loss": 0.0013, "step": 91410 }, { "epoch": 1.495868444735335, "grad_norm": 0.06053866818547249, "learning_rate": 1.8137647124007084e-06, "loss": 0.0008, "step": 91420 }, { "epoch": 1.496032070686411, "grad_norm": 0.06278298050165176, "learning_rate": 1.8126644130659659e-06, "loss": 0.0011, "step": 91430 }, { "epoch": 1.4961956966374867, "grad_norm": 0.041511520743370056, "learning_rate": 1.8115643736794714e-06, "loss": 0.0018, "step": 91440 }, { "epoch": 1.4963593225885625, "grad_norm": 0.03669146075844765, "learning_rate": 1.8104645943309374e-06, "loss": 0.0009, "step": 91450 }, { "epoch": 1.4965229485396385, "grad_norm": 0.008665479719638824, "learning_rate": 1.8093650751100605e-06, "loss": 0.0013, "step": 91460 }, { "epoch": 1.4966865744907143, "grad_norm": 0.05738081783056259, "learning_rate": 1.8082658161065108e-06, "loss": 0.0008, "step": 91470 }, { "epoch": 1.49685020044179, "grad_norm": 0.008138302713632584, "learning_rate": 1.8071668174099439e-06, "loss": 0.0006, "step": 91480 }, { "epoch": 1.4970138263928658, "grad_norm": 0.07915917783975601, "learning_rate": 1.8060680791099867e-06, "loss": 0.0011, "step": 91490 }, { "epoch": 1.4971774523439416, "grad_norm": 0.09052205085754395, "learning_rate": 1.804969601296252e-06, "loss": 0.002, "step": 91500 }, { "epoch": 1.4973410782950176, "grad_norm": 0.051362503319978714, "learning_rate": 1.8038713840583243e-06, "loss": 0.0009, "step": 91510 }, { "epoch": 1.4975047042460934, "grad_norm": 0.07138554006814957, "learning_rate": 1.8027734274857744e-06, "loss": 0.0013, "step": 91520 }, { "epoch": 1.4976683301971692, "grad_norm": 0.04177491366863251, "learning_rate": 1.8016757316681438e-06, "loss": 0.0009, "step": 91530 }, { "epoch": 1.4978319561482452, "grad_norm": 0.005156593397259712, "learning_rate": 1.8005782966949609e-06, "loss": 0.0012, "step": 91540 }, { "epoch": 1.497995582099321, "grad_norm": 0.06645479053258896, "learning_rate": 1.7994811226557246e-06, "loss": 0.0015, "step": 91550 }, { "epoch": 1.4981592080503967, "grad_norm": 0.08459978550672531, "learning_rate": 1.7983842096399211e-06, "loss": 0.0009, "step": 91560 }, { "epoch": 1.4983228340014727, "grad_norm": 0.07986968755722046, "learning_rate": 1.7972875577370063e-06, "loss": 0.001, "step": 91570 }, { "epoch": 1.4984864599525485, "grad_norm": 0.04276851937174797, "learning_rate": 1.796191167036424e-06, "loss": 0.0011, "step": 91580 }, { "epoch": 1.4986500859036243, "grad_norm": 0.10228171944618225, "learning_rate": 1.7950950376275878e-06, "loss": 0.0014, "step": 91590 }, { "epoch": 1.4988137118547002, "grad_norm": 0.027184845879673958, "learning_rate": 1.7939991695998981e-06, "loss": 0.0007, "step": 91600 }, { "epoch": 1.498977337805776, "grad_norm": 0.022818472236394882, "learning_rate": 1.7929035630427266e-06, "loss": 0.0012, "step": 91610 }, { "epoch": 1.4991409637568518, "grad_norm": 0.06028661131858826, "learning_rate": 1.7918082180454305e-06, "loss": 0.0011, "step": 91620 }, { "epoch": 1.4993045897079278, "grad_norm": 0.05397883430123329, "learning_rate": 1.7907131346973394e-06, "loss": 0.0006, "step": 91630 }, { "epoch": 1.4994682156590036, "grad_norm": 0.06542099267244339, "learning_rate": 1.789618313087768e-06, "loss": 0.001, "step": 91640 }, { "epoch": 1.4996318416100793, "grad_norm": 0.0512886568903923, "learning_rate": 1.7885237533060023e-06, "loss": 0.0008, "step": 91650 }, { "epoch": 1.4997954675611553, "grad_norm": 0.04877230152487755, "learning_rate": 1.7874294554413152e-06, "loss": 0.001, "step": 91660 }, { "epoch": 1.4999590935122311, "grad_norm": 0.032953184098005295, "learning_rate": 1.7863354195829497e-06, "loss": 0.0008, "step": 91670 }, { "epoch": 1.5001227194633069, "grad_norm": 0.08210153132677078, "learning_rate": 1.7852416458201348e-06, "loss": 0.001, "step": 91680 }, { "epoch": 1.5002863454143829, "grad_norm": 0.05134357511997223, "learning_rate": 1.784148134242074e-06, "loss": 0.0009, "step": 91690 }, { "epoch": 1.5004499713654584, "grad_norm": 0.017645183950662613, "learning_rate": 1.7830548849379481e-06, "loss": 0.0006, "step": 91700 }, { "epoch": 1.5006135973165344, "grad_norm": 0.010288752615451813, "learning_rate": 1.7819618979969228e-06, "loss": 0.0011, "step": 91710 }, { "epoch": 1.5007772232676102, "grad_norm": 0.015275281853973866, "learning_rate": 1.7808691735081347e-06, "loss": 0.0006, "step": 91720 }, { "epoch": 1.500940849218686, "grad_norm": 0.08501256257295609, "learning_rate": 1.7797767115607062e-06, "loss": 0.0005, "step": 91730 }, { "epoch": 1.501104475169762, "grad_norm": 0.03945817053318024, "learning_rate": 1.7786845122437307e-06, "loss": 0.0009, "step": 91740 }, { "epoch": 1.5012681011208378, "grad_norm": 0.05070319399237633, "learning_rate": 1.7775925756462887e-06, "loss": 0.0014, "step": 91750 }, { "epoch": 1.5014317270719135, "grad_norm": 0.0801292136311531, "learning_rate": 1.7765009018574307e-06, "loss": 0.0014, "step": 91760 }, { "epoch": 1.5015953530229895, "grad_norm": 0.006221620365977287, "learning_rate": 1.775409490966194e-06, "loss": 0.0025, "step": 91770 }, { "epoch": 1.5017589789740653, "grad_norm": 0.0658557116985321, "learning_rate": 1.7743183430615863e-06, "loss": 0.0009, "step": 91780 }, { "epoch": 1.501922604925141, "grad_norm": 0.018184788525104523, "learning_rate": 1.7732274582326015e-06, "loss": 0.0016, "step": 91790 }, { "epoch": 1.502086230876217, "grad_norm": 0.03207259625196457, "learning_rate": 1.7721368365682045e-06, "loss": 0.0005, "step": 91800 }, { "epoch": 1.5022498568272928, "grad_norm": 0.0893886610865593, "learning_rate": 1.7710464781573473e-06, "loss": 0.0007, "step": 91810 }, { "epoch": 1.5024134827783686, "grad_norm": 0.07772957533597946, "learning_rate": 1.7699563830889517e-06, "loss": 0.0019, "step": 91820 }, { "epoch": 1.5025771087294446, "grad_norm": 0.024866385385394096, "learning_rate": 1.7688665514519253e-06, "loss": 0.001, "step": 91830 }, { "epoch": 1.5027407346805204, "grad_norm": 0.043451279401779175, "learning_rate": 1.7677769833351488e-06, "loss": 0.0012, "step": 91840 }, { "epoch": 1.5029043606315962, "grad_norm": 0.05371924489736557, "learning_rate": 1.7666876788274857e-06, "loss": 0.001, "step": 91850 }, { "epoch": 1.5030679865826722, "grad_norm": 0.0037235706113278866, "learning_rate": 1.765598638017773e-06, "loss": 0.001, "step": 91860 }, { "epoch": 1.5032316125337477, "grad_norm": 0.02677789144217968, "learning_rate": 1.764509860994833e-06, "loss": 0.0007, "step": 91870 }, { "epoch": 1.5033952384848237, "grad_norm": 0.09653905779123306, "learning_rate": 1.7634213478474588e-06, "loss": 0.001, "step": 91880 }, { "epoch": 1.5035588644358997, "grad_norm": 0.029980888590216637, "learning_rate": 1.7623330986644294e-06, "loss": 0.0005, "step": 91890 }, { "epoch": 1.5037224903869753, "grad_norm": 0.052308861166238785, "learning_rate": 1.7612451135344954e-06, "loss": 0.001, "step": 91900 }, { "epoch": 1.5038861163380512, "grad_norm": 0.025419622659683228, "learning_rate": 1.760157392546392e-06, "loss": 0.0006, "step": 91910 }, { "epoch": 1.504049742289127, "grad_norm": 0.10279243439435959, "learning_rate": 1.759069935788828e-06, "loss": 0.0012, "step": 91920 }, { "epoch": 1.5042133682402028, "grad_norm": 0.03728652000427246, "learning_rate": 1.7579827433504943e-06, "loss": 0.0009, "step": 91930 }, { "epoch": 1.5043769941912788, "grad_norm": 0.0016000282485038042, "learning_rate": 1.7568958153200565e-06, "loss": 0.001, "step": 91940 }, { "epoch": 1.5045406201423546, "grad_norm": 0.04359014332294464, "learning_rate": 1.7558091517861637e-06, "loss": 0.001, "step": 91950 }, { "epoch": 1.5047042460934303, "grad_norm": 0.02580469846725464, "learning_rate": 1.7547227528374365e-06, "loss": 0.0005, "step": 91960 }, { "epoch": 1.5048678720445063, "grad_norm": 0.05423831194639206, "learning_rate": 1.7536366185624825e-06, "loss": 0.0008, "step": 91970 }, { "epoch": 1.5050314979955821, "grad_norm": 0.11060641705989838, "learning_rate": 1.752550749049879e-06, "loss": 0.0009, "step": 91980 }, { "epoch": 1.5051951239466579, "grad_norm": 0.04595125839114189, "learning_rate": 1.7514651443881891e-06, "loss": 0.0009, "step": 91990 }, { "epoch": 1.5053587498977339, "grad_norm": 0.05706120282411575, "learning_rate": 1.7503798046659481e-06, "loss": 0.0015, "step": 92000 }, { "epoch": 1.5055223758488097, "grad_norm": 0.04266009479761124, "learning_rate": 1.7492947299716755e-06, "loss": 0.0012, "step": 92010 }, { "epoch": 1.5056860017998854, "grad_norm": 0.0051775239408016205, "learning_rate": 1.748209920393865e-06, "loss": 0.0008, "step": 92020 }, { "epoch": 1.5058496277509614, "grad_norm": 0.02514449506998062, "learning_rate": 1.7471253760209883e-06, "loss": 0.0009, "step": 92030 }, { "epoch": 1.506013253702037, "grad_norm": 0.01568145677447319, "learning_rate": 1.7460410969415003e-06, "loss": 0.0022, "step": 92040 }, { "epoch": 1.506176879653113, "grad_norm": 0.009448518045246601, "learning_rate": 1.7449570832438278e-06, "loss": 0.0005, "step": 92050 }, { "epoch": 1.506340505604189, "grad_norm": 0.04503456503152847, "learning_rate": 1.7438733350163828e-06, "loss": 0.0013, "step": 92060 }, { "epoch": 1.5065041315552645, "grad_norm": 0.005923328921198845, "learning_rate": 1.7427898523475483e-06, "loss": 0.0006, "step": 92070 }, { "epoch": 1.5066677575063405, "grad_norm": 0.026780812069773674, "learning_rate": 1.7417066353256933e-06, "loss": 0.0011, "step": 92080 }, { "epoch": 1.5068313834574163, "grad_norm": 0.06798375397920609, "learning_rate": 1.7406236840391578e-06, "loss": 0.0023, "step": 92090 }, { "epoch": 1.506995009408492, "grad_norm": 0.01833983324468136, "learning_rate": 1.7395409985762673e-06, "loss": 0.0019, "step": 92100 }, { "epoch": 1.507158635359568, "grad_norm": 0.037574201822280884, "learning_rate": 1.7384585790253178e-06, "loss": 0.0009, "step": 92110 }, { "epoch": 1.5073222613106438, "grad_norm": 0.004330089315772057, "learning_rate": 1.7373764254745917e-06, "loss": 0.0003, "step": 92120 }, { "epoch": 1.5074858872617196, "grad_norm": 0.0919056385755539, "learning_rate": 1.736294538012342e-06, "loss": 0.0012, "step": 92130 }, { "epoch": 1.5076495132127956, "grad_norm": 0.043939974159002304, "learning_rate": 1.7352129167268078e-06, "loss": 0.0012, "step": 92140 }, { "epoch": 1.5078131391638714, "grad_norm": 0.03530528396368027, "learning_rate": 1.7341315617061981e-06, "loss": 0.0011, "step": 92150 }, { "epoch": 1.5079767651149472, "grad_norm": 0.040463365614414215, "learning_rate": 1.7330504730387088e-06, "loss": 0.0007, "step": 92160 }, { "epoch": 1.5081403910660232, "grad_norm": 0.014003302901983261, "learning_rate": 1.731969650812506e-06, "loss": 0.001, "step": 92170 }, { "epoch": 1.508304017017099, "grad_norm": 0.03766884654760361, "learning_rate": 1.7308890951157415e-06, "loss": 0.0012, "step": 92180 }, { "epoch": 1.5084676429681747, "grad_norm": 0.054084427654743195, "learning_rate": 1.7298088060365382e-06, "loss": 0.0009, "step": 92190 }, { "epoch": 1.5086312689192507, "grad_norm": 0.001971858786419034, "learning_rate": 1.7287287836630046e-06, "loss": 0.0007, "step": 92200 }, { "epoch": 1.5087948948703265, "grad_norm": 0.03660333529114723, "learning_rate": 1.7276490280832192e-06, "loss": 0.0011, "step": 92210 }, { "epoch": 1.5089585208214022, "grad_norm": 0.14026103913784027, "learning_rate": 1.7265695393852478e-06, "loss": 0.0013, "step": 92220 }, { "epoch": 1.5091221467724782, "grad_norm": 0.10266295820474625, "learning_rate": 1.7254903176571258e-06, "loss": 0.0011, "step": 92230 }, { "epoch": 1.5092857727235538, "grad_norm": 0.019358551129698753, "learning_rate": 1.7244113629868742e-06, "loss": 0.0007, "step": 92240 }, { "epoch": 1.5094493986746298, "grad_norm": 0.05355290323495865, "learning_rate": 1.7233326754624852e-06, "loss": 0.0023, "step": 92250 }, { "epoch": 1.5096130246257058, "grad_norm": 0.10064847767353058, "learning_rate": 1.7222542551719373e-06, "loss": 0.0013, "step": 92260 }, { "epoch": 1.5097766505767813, "grad_norm": 0.07960085570812225, "learning_rate": 1.7211761022031787e-06, "loss": 0.002, "step": 92270 }, { "epoch": 1.5099402765278573, "grad_norm": 0.012889365665614605, "learning_rate": 1.7200982166441433e-06, "loss": 0.0011, "step": 92280 }, { "epoch": 1.5101039024789331, "grad_norm": 0.080244280397892, "learning_rate": 1.7190205985827364e-06, "loss": 0.0009, "step": 92290 }, { "epoch": 1.5102675284300089, "grad_norm": 0.03683922067284584, "learning_rate": 1.7179432481068486e-06, "loss": 0.0007, "step": 92300 }, { "epoch": 1.5104311543810849, "grad_norm": 0.11845352500677109, "learning_rate": 1.716866165304341e-06, "loss": 0.0019, "step": 92310 }, { "epoch": 1.5105947803321607, "grad_norm": 0.01868821308016777, "learning_rate": 1.7157893502630608e-06, "loss": 0.0018, "step": 92320 }, { "epoch": 1.5107584062832364, "grad_norm": 0.040416914969682693, "learning_rate": 1.7147128030708266e-06, "loss": 0.0009, "step": 92330 }, { "epoch": 1.5109220322343124, "grad_norm": 0.10213281214237213, "learning_rate": 1.7136365238154378e-06, "loss": 0.0011, "step": 92340 }, { "epoch": 1.5110856581853882, "grad_norm": 0.07440522313117981, "learning_rate": 1.7125605125846738e-06, "loss": 0.0015, "step": 92350 }, { "epoch": 1.511249284136464, "grad_norm": 0.021405886858701706, "learning_rate": 1.7114847694662888e-06, "loss": 0.0006, "step": 92360 }, { "epoch": 1.51141291008754, "grad_norm": 0.037976738065481186, "learning_rate": 1.7104092945480189e-06, "loss": 0.001, "step": 92370 }, { "epoch": 1.5115765360386157, "grad_norm": 0.04305002838373184, "learning_rate": 1.709334087917573e-06, "loss": 0.001, "step": 92380 }, { "epoch": 1.5117401619896915, "grad_norm": 0.03911440819501877, "learning_rate": 1.7082591496626444e-06, "loss": 0.0007, "step": 92390 }, { "epoch": 1.5119037879407675, "grad_norm": 0.07353448122739792, "learning_rate": 1.7071844798708986e-06, "loss": 0.0009, "step": 92400 }, { "epoch": 1.5120674138918433, "grad_norm": 0.05609318986535072, "learning_rate": 1.7061100786299856e-06, "loss": 0.0015, "step": 92410 }, { "epoch": 1.512231039842919, "grad_norm": 0.01326186116784811, "learning_rate": 1.7050359460275257e-06, "loss": 0.0005, "step": 92420 }, { "epoch": 1.512394665793995, "grad_norm": 0.058928415179252625, "learning_rate": 1.7039620821511255e-06, "loss": 0.0011, "step": 92430 }, { "epoch": 1.5125582917450706, "grad_norm": 0.032803524285554886, "learning_rate": 1.7028884870883617e-06, "loss": 0.0021, "step": 92440 }, { "epoch": 1.5127219176961466, "grad_norm": 0.03961893543601036, "learning_rate": 1.7018151609267975e-06, "loss": 0.0007, "step": 92450 }, { "epoch": 1.5128855436472226, "grad_norm": 0.043346889317035675, "learning_rate": 1.700742103753965e-06, "loss": 0.0005, "step": 92460 }, { "epoch": 1.5130491695982982, "grad_norm": 0.0646902546286583, "learning_rate": 1.6996693156573835e-06, "loss": 0.0011, "step": 92470 }, { "epoch": 1.5132127955493742, "grad_norm": 0.09837473928928375, "learning_rate": 1.6985967967245426e-06, "loss": 0.001, "step": 92480 }, { "epoch": 1.51337642150045, "grad_norm": 0.02862619049847126, "learning_rate": 1.6975245470429158e-06, "loss": 0.0011, "step": 92490 }, { "epoch": 1.5135400474515257, "grad_norm": 0.015240843407809734, "learning_rate": 1.6964525666999493e-06, "loss": 0.0021, "step": 92500 }, { "epoch": 1.5137036734026017, "grad_norm": 0.06871083378791809, "learning_rate": 1.6953808557830737e-06, "loss": 0.0011, "step": 92510 }, { "epoch": 1.5138672993536775, "grad_norm": 0.08516515046358109, "learning_rate": 1.6943094143796906e-06, "loss": 0.0015, "step": 92520 }, { "epoch": 1.5140309253047533, "grad_norm": 0.061271414160728455, "learning_rate": 1.693238242577186e-06, "loss": 0.0006, "step": 92530 }, { "epoch": 1.5141945512558292, "grad_norm": 0.06548325717449188, "learning_rate": 1.6921673404629185e-06, "loss": 0.0044, "step": 92540 }, { "epoch": 1.514358177206905, "grad_norm": 0.07551167905330658, "learning_rate": 1.6910967081242296e-06, "loss": 0.0005, "step": 92550 }, { "epoch": 1.5145218031579808, "grad_norm": 0.032453831285238266, "learning_rate": 1.6900263456484345e-06, "loss": 0.0033, "step": 92560 }, { "epoch": 1.5146854291090568, "grad_norm": 0.02420182339847088, "learning_rate": 1.6889562531228304e-06, "loss": 0.0011, "step": 92570 }, { "epoch": 1.5148490550601326, "grad_norm": 0.03445042297244072, "learning_rate": 1.687886430634687e-06, "loss": 0.002, "step": 92580 }, { "epoch": 1.5150126810112083, "grad_norm": 0.08497074246406555, "learning_rate": 1.68681687827126e-06, "loss": 0.0008, "step": 92590 }, { "epoch": 1.5151763069622843, "grad_norm": 0.03298862650990486, "learning_rate": 1.685747596119774e-06, "loss": 0.0007, "step": 92600 }, { "epoch": 1.51533993291336, "grad_norm": 0.028297094628214836, "learning_rate": 1.6846785842674396e-06, "loss": 0.0015, "step": 92610 }, { "epoch": 1.5155035588644359, "grad_norm": 0.049940288066864014, "learning_rate": 1.6836098428014386e-06, "loss": 0.0011, "step": 92620 }, { "epoch": 1.5156671848155119, "grad_norm": 0.039305780082941055, "learning_rate": 1.6825413718089372e-06, "loss": 0.0007, "step": 92630 }, { "epoch": 1.5158308107665874, "grad_norm": 0.11006177961826324, "learning_rate": 1.6814731713770727e-06, "loss": 0.0012, "step": 92640 }, { "epoch": 1.5159944367176634, "grad_norm": 0.015549235977232456, "learning_rate": 1.6804052415929672e-06, "loss": 0.0008, "step": 92650 }, { "epoch": 1.5161580626687394, "grad_norm": 0.041310086846351624, "learning_rate": 1.6793375825437163e-06, "loss": 0.0011, "step": 92660 }, { "epoch": 1.516321688619815, "grad_norm": 0.09205932170152664, "learning_rate": 1.6782701943163926e-06, "loss": 0.0014, "step": 92670 }, { "epoch": 1.516485314570891, "grad_norm": 0.043636806309223175, "learning_rate": 1.6772030769980519e-06, "loss": 0.0007, "step": 92680 }, { "epoch": 1.5166489405219667, "grad_norm": 0.10762016475200653, "learning_rate": 1.6761362306757217e-06, "loss": 0.001, "step": 92690 }, { "epoch": 1.5168125664730425, "grad_norm": 0.033088572323322296, "learning_rate": 1.6750696554364132e-06, "loss": 0.0007, "step": 92700 }, { "epoch": 1.5169761924241185, "grad_norm": 0.0982365757226944, "learning_rate": 1.67400335136711e-06, "loss": 0.0015, "step": 92710 }, { "epoch": 1.5171398183751943, "grad_norm": 0.062267448753118515, "learning_rate": 1.6729373185547788e-06, "loss": 0.0007, "step": 92720 }, { "epoch": 1.51730344432627, "grad_norm": 0.15004102885723114, "learning_rate": 1.671871557086358e-06, "loss": 0.0019, "step": 92730 }, { "epoch": 1.517467070277346, "grad_norm": 0.05737435445189476, "learning_rate": 1.670806067048772e-06, "loss": 0.001, "step": 92740 }, { "epoch": 1.5176306962284218, "grad_norm": 0.013875706121325493, "learning_rate": 1.6697408485289145e-06, "loss": 0.0007, "step": 92750 }, { "epoch": 1.5177943221794976, "grad_norm": 0.008382689207792282, "learning_rate": 1.6686759016136645e-06, "loss": 0.0012, "step": 92760 }, { "epoch": 1.5179579481305736, "grad_norm": 0.011063765734434128, "learning_rate": 1.6676112263898725e-06, "loss": 0.0009, "step": 92770 }, { "epoch": 1.5181215740816494, "grad_norm": 0.014943902380764484, "learning_rate": 1.6665468229443721e-06, "loss": 0.0008, "step": 92780 }, { "epoch": 1.5182852000327252, "grad_norm": 0.034328170120716095, "learning_rate": 1.6654826913639705e-06, "loss": 0.0005, "step": 92790 }, { "epoch": 1.5184488259838012, "grad_norm": 0.012002581730484962, "learning_rate": 1.664418831735457e-06, "loss": 0.0012, "step": 92800 }, { "epoch": 1.5186124519348767, "grad_norm": 0.03546871617436409, "learning_rate": 1.663355244145593e-06, "loss": 0.0009, "step": 92810 }, { "epoch": 1.5187760778859527, "grad_norm": 0.041267212480306625, "learning_rate": 1.6622919286811251e-06, "loss": 0.0009, "step": 92820 }, { "epoch": 1.5189397038370287, "grad_norm": 0.031755756586790085, "learning_rate": 1.6612288854287695e-06, "loss": 0.0007, "step": 92830 }, { "epoch": 1.5191033297881043, "grad_norm": 0.15127228200435638, "learning_rate": 1.6601661144752284e-06, "loss": 0.0015, "step": 92840 }, { "epoch": 1.5192669557391802, "grad_norm": 0.07316996902227402, "learning_rate": 1.6591036159071744e-06, "loss": 0.0014, "step": 92850 }, { "epoch": 1.519430581690256, "grad_norm": 0.0188047606498003, "learning_rate": 1.6580413898112646e-06, "loss": 0.0017, "step": 92860 }, { "epoch": 1.5195942076413318, "grad_norm": 0.020672926679253578, "learning_rate": 1.6569794362741265e-06, "loss": 0.0004, "step": 92870 }, { "epoch": 1.5197578335924078, "grad_norm": 0.08664774894714355, "learning_rate": 1.6559177553823736e-06, "loss": 0.0006, "step": 92880 }, { "epoch": 1.5199214595434836, "grad_norm": 0.056326452642679214, "learning_rate": 1.6548563472225892e-06, "loss": 0.0012, "step": 92890 }, { "epoch": 1.5200850854945593, "grad_norm": 0.041529614478349686, "learning_rate": 1.6537952118813423e-06, "loss": 0.0008, "step": 92900 }, { "epoch": 1.5202487114456353, "grad_norm": 0.009428061544895172, "learning_rate": 1.652734349445171e-06, "loss": 0.0007, "step": 92910 }, { "epoch": 1.5204123373967111, "grad_norm": 0.03109990619122982, "learning_rate": 1.6516737600005994e-06, "loss": 0.0011, "step": 92920 }, { "epoch": 1.5205759633477869, "grad_norm": 0.013769006356596947, "learning_rate": 1.650613443634122e-06, "loss": 0.0011, "step": 92930 }, { "epoch": 1.5207395892988629, "grad_norm": 0.010103962384164333, "learning_rate": 1.6495534004322183e-06, "loss": 0.0012, "step": 92940 }, { "epoch": 1.5209032152499387, "grad_norm": 0.001887293765321374, "learning_rate": 1.6484936304813387e-06, "loss": 0.0014, "step": 92950 }, { "epoch": 1.5210668412010144, "grad_norm": 0.1247955784201622, "learning_rate": 1.6474341338679173e-06, "loss": 0.002, "step": 92960 }, { "epoch": 1.5212304671520904, "grad_norm": 0.05327566713094711, "learning_rate": 1.64637491067836e-06, "loss": 0.0007, "step": 92970 }, { "epoch": 1.5213940931031662, "grad_norm": 0.11890830844640732, "learning_rate": 1.6453159609990565e-06, "loss": 0.0013, "step": 92980 }, { "epoch": 1.521557719054242, "grad_norm": 0.061663951724767685, "learning_rate": 1.6442572849163695e-06, "loss": 0.0011, "step": 92990 }, { "epoch": 1.521721345005318, "grad_norm": 0.0016993772005662322, "learning_rate": 1.64319888251664e-06, "loss": 0.0006, "step": 93000 }, { "epoch": 1.5218849709563935, "grad_norm": 0.15409444272518158, "learning_rate": 1.64214075388619e-06, "loss": 0.0018, "step": 93010 }, { "epoch": 1.5220485969074695, "grad_norm": 0.046371039003133774, "learning_rate": 1.6410828991113143e-06, "loss": 0.0011, "step": 93020 }, { "epoch": 1.5222122228585455, "grad_norm": 0.06476078927516937, "learning_rate": 1.6400253182782905e-06, "loss": 0.0016, "step": 93030 }, { "epoch": 1.522375848809621, "grad_norm": 0.0310774315148592, "learning_rate": 1.6389680114733686e-06, "loss": 0.0015, "step": 93040 }, { "epoch": 1.522539474760697, "grad_norm": 0.14070840179920197, "learning_rate": 1.6379109787827824e-06, "loss": 0.0016, "step": 93050 }, { "epoch": 1.5227031007117728, "grad_norm": 0.062336936593055725, "learning_rate": 1.6368542202927362e-06, "loss": 0.0014, "step": 93060 }, { "epoch": 1.5228667266628486, "grad_norm": 0.026353567838668823, "learning_rate": 1.6357977360894188e-06, "loss": 0.0008, "step": 93070 }, { "epoch": 1.5230303526139246, "grad_norm": 0.06551194190979004, "learning_rate": 1.6347415262589906e-06, "loss": 0.0016, "step": 93080 }, { "epoch": 1.5231939785650004, "grad_norm": 0.11125332862138748, "learning_rate": 1.6336855908875958e-06, "loss": 0.001, "step": 93090 }, { "epoch": 1.5233576045160762, "grad_norm": 0.06439101696014404, "learning_rate": 1.632629930061349e-06, "loss": 0.0014, "step": 93100 }, { "epoch": 1.5235212304671522, "grad_norm": 0.03578665480017662, "learning_rate": 1.63157454386635e-06, "loss": 0.001, "step": 93110 }, { "epoch": 1.523684856418228, "grad_norm": 0.057246576994657516, "learning_rate": 1.6305194323886698e-06, "loss": 0.0011, "step": 93120 }, { "epoch": 1.5238484823693037, "grad_norm": 0.10694283992052078, "learning_rate": 1.6294645957143618e-06, "loss": 0.0008, "step": 93130 }, { "epoch": 1.5240121083203797, "grad_norm": 0.011965298093855381, "learning_rate": 1.6284100339294518e-06, "loss": 0.0005, "step": 93140 }, { "epoch": 1.5241757342714555, "grad_norm": 0.020136523991823196, "learning_rate": 1.6273557471199508e-06, "loss": 0.0021, "step": 93150 }, { "epoch": 1.5243393602225312, "grad_norm": 0.005472683813422918, "learning_rate": 1.626301735371838e-06, "loss": 0.001, "step": 93160 }, { "epoch": 1.5245029861736072, "grad_norm": 0.12966491281986237, "learning_rate": 1.6252479987710795e-06, "loss": 0.0013, "step": 93170 }, { "epoch": 1.524666612124683, "grad_norm": 0.07363984733819962, "learning_rate": 1.62419453740361e-06, "loss": 0.001, "step": 93180 }, { "epoch": 1.5248302380757588, "grad_norm": 0.006952513940632343, "learning_rate": 1.6231413513553506e-06, "loss": 0.0008, "step": 93190 }, { "epoch": 1.5249938640268348, "grad_norm": 0.022590002045035362, "learning_rate": 1.6220884407121924e-06, "loss": 0.0013, "step": 93200 }, { "epoch": 1.5251574899779103, "grad_norm": 0.14063996076583862, "learning_rate": 1.6210358055600095e-06, "loss": 0.0014, "step": 93210 }, { "epoch": 1.5253211159289863, "grad_norm": 0.043043188750743866, "learning_rate": 1.619983445984648e-06, "loss": 0.0012, "step": 93220 }, { "epoch": 1.5254847418800623, "grad_norm": 0.021678877994418144, "learning_rate": 1.6189313620719388e-06, "loss": 0.0009, "step": 93230 }, { "epoch": 1.5256483678311379, "grad_norm": 0.031118957325816154, "learning_rate": 1.617879553907683e-06, "loss": 0.0017, "step": 93240 }, { "epoch": 1.5258119937822139, "grad_norm": 0.048221733421087265, "learning_rate": 1.6168280215776649e-06, "loss": 0.0007, "step": 93250 }, { "epoch": 1.5259756197332897, "grad_norm": 0.06168932095170021, "learning_rate": 1.615776765167642e-06, "loss": 0.0009, "step": 93260 }, { "epoch": 1.5261392456843654, "grad_norm": 0.010130315087735653, "learning_rate": 1.614725784763353e-06, "loss": 0.0005, "step": 93270 }, { "epoch": 1.5263028716354414, "grad_norm": 0.09475027024745941, "learning_rate": 1.6136750804505096e-06, "loss": 0.0007, "step": 93280 }, { "epoch": 1.5264664975865172, "grad_norm": 0.02594883181154728, "learning_rate": 1.6126246523148075e-06, "loss": 0.0007, "step": 93290 }, { "epoch": 1.526630123537593, "grad_norm": 0.05157390981912613, "learning_rate": 1.6115745004419136e-06, "loss": 0.001, "step": 93300 }, { "epoch": 1.526793749488669, "grad_norm": 0.18255183100700378, "learning_rate": 1.6105246249174733e-06, "loss": 0.0022, "step": 93310 }, { "epoch": 1.5269573754397447, "grad_norm": 0.007249930407851934, "learning_rate": 1.609475025827114e-06, "loss": 0.0022, "step": 93320 }, { "epoch": 1.5271210013908205, "grad_norm": 0.04296432062983513, "learning_rate": 1.6084257032564349e-06, "loss": 0.0068, "step": 93330 }, { "epoch": 1.5272846273418965, "grad_norm": 0.03299983590841293, "learning_rate": 1.6073766572910183e-06, "loss": 0.001, "step": 93340 }, { "epoch": 1.5274482532929723, "grad_norm": 0.05760448798537254, "learning_rate": 1.6063278880164163e-06, "loss": 0.0013, "step": 93350 }, { "epoch": 1.527611879244048, "grad_norm": 0.01109104324132204, "learning_rate": 1.6052793955181673e-06, "loss": 0.0008, "step": 93360 }, { "epoch": 1.527775505195124, "grad_norm": 0.025729773566126823, "learning_rate": 1.60423117988178e-06, "loss": 0.0011, "step": 93370 }, { "epoch": 1.5279391311461998, "grad_norm": 0.007150289602577686, "learning_rate": 1.6031832411927451e-06, "loss": 0.0009, "step": 93380 }, { "epoch": 1.5281027570972756, "grad_norm": 0.030729493126273155, "learning_rate": 1.6021355795365268e-06, "loss": 0.0017, "step": 93390 }, { "epoch": 1.5282663830483516, "grad_norm": 0.032178524881601334, "learning_rate": 1.6010881949985718e-06, "loss": 0.0008, "step": 93400 }, { "epoch": 1.5284300089994272, "grad_norm": 0.08488941192626953, "learning_rate": 1.600041087664298e-06, "loss": 0.0013, "step": 93410 }, { "epoch": 1.5285936349505032, "grad_norm": 0.06927945464849472, "learning_rate": 1.5989942576191064e-06, "loss": 0.0012, "step": 93420 }, { "epoch": 1.5287572609015792, "grad_norm": 0.025662565603852272, "learning_rate": 1.597947704948371e-06, "loss": 0.0005, "step": 93430 }, { "epoch": 1.5289208868526547, "grad_norm": 0.03501514717936516, "learning_rate": 1.5969014297374475e-06, "loss": 0.001, "step": 93440 }, { "epoch": 1.5290845128037307, "grad_norm": 0.07361352443695068, "learning_rate": 1.595855432071663e-06, "loss": 0.0014, "step": 93450 }, { "epoch": 1.5292481387548065, "grad_norm": 0.09956180304288864, "learning_rate": 1.59480971203633e-06, "loss": 0.0018, "step": 93460 }, { "epoch": 1.5294117647058822, "grad_norm": 0.024720311164855957, "learning_rate": 1.5937642697167288e-06, "loss": 0.0008, "step": 93470 }, { "epoch": 1.5295753906569582, "grad_norm": 0.0025174375623464584, "learning_rate": 1.5927191051981273e-06, "loss": 0.0009, "step": 93480 }, { "epoch": 1.529739016608034, "grad_norm": 0.0181483905762434, "learning_rate": 1.5916742185657608e-06, "loss": 0.0016, "step": 93490 }, { "epoch": 1.5299026425591098, "grad_norm": 0.10036621987819672, "learning_rate": 1.5906296099048508e-06, "loss": 0.0014, "step": 93500 }, { "epoch": 1.5300662685101858, "grad_norm": 0.03790244832634926, "learning_rate": 1.5895852793005883e-06, "loss": 0.0006, "step": 93510 }, { "epoch": 1.5302298944612616, "grad_norm": 0.07828021049499512, "learning_rate": 1.588541226838149e-06, "loss": 0.0008, "step": 93520 }, { "epoch": 1.5303935204123373, "grad_norm": 0.0891806110739708, "learning_rate": 1.5874974526026783e-06, "loss": 0.0014, "step": 93530 }, { "epoch": 1.5305571463634133, "grad_norm": 0.020414000377058983, "learning_rate": 1.5864539566793075e-06, "loss": 0.0007, "step": 93540 }, { "epoch": 1.530720772314489, "grad_norm": 0.08390213549137115, "learning_rate": 1.585410739153136e-06, "loss": 0.0017, "step": 93550 }, { "epoch": 1.5308843982655649, "grad_norm": 0.13077345490455627, "learning_rate": 1.584367800109249e-06, "loss": 0.0023, "step": 93560 }, { "epoch": 1.5310480242166409, "grad_norm": 0.1087348535656929, "learning_rate": 1.583325139632701e-06, "loss": 0.0011, "step": 93570 }, { "epoch": 1.5312116501677167, "grad_norm": 0.03183962404727936, "learning_rate": 1.5822827578085327e-06, "loss": 0.0006, "step": 93580 }, { "epoch": 1.5313752761187924, "grad_norm": 0.13251346349716187, "learning_rate": 1.5812406547217524e-06, "loss": 0.0008, "step": 93590 }, { "epoch": 1.5315389020698684, "grad_norm": 0.11284696310758591, "learning_rate": 1.580198830457354e-06, "loss": 0.0016, "step": 93600 }, { "epoch": 1.531702528020944, "grad_norm": 0.005216659512370825, "learning_rate": 1.579157285100303e-06, "loss": 0.0018, "step": 93610 }, { "epoch": 1.53186615397202, "grad_norm": 0.06284871697425842, "learning_rate": 1.578116018735546e-06, "loss": 0.0008, "step": 93620 }, { "epoch": 1.532029779923096, "grad_norm": 0.05726700276136398, "learning_rate": 1.5770750314480043e-06, "loss": 0.0014, "step": 93630 }, { "epoch": 1.5321934058741715, "grad_norm": 0.03996344655752182, "learning_rate": 1.5760343233225761e-06, "loss": 0.0012, "step": 93640 }, { "epoch": 1.5323570318252475, "grad_norm": 0.05469159409403801, "learning_rate": 1.5749938944441407e-06, "loss": 0.0008, "step": 93650 }, { "epoch": 1.5325206577763233, "grad_norm": 0.07408096641302109, "learning_rate": 1.5739537448975483e-06, "loss": 0.0011, "step": 93660 }, { "epoch": 1.532684283727399, "grad_norm": 0.011616322211921215, "learning_rate": 1.5729138747676343e-06, "loss": 0.0016, "step": 93670 }, { "epoch": 1.532847909678475, "grad_norm": 0.0322139598429203, "learning_rate": 1.5718742841392027e-06, "loss": 0.0006, "step": 93680 }, { "epoch": 1.5330115356295508, "grad_norm": 0.0314924418926239, "learning_rate": 1.5708349730970424e-06, "loss": 0.0008, "step": 93690 }, { "epoch": 1.5331751615806266, "grad_norm": 0.002176120178773999, "learning_rate": 1.5697959417259134e-06, "loss": 0.0013, "step": 93700 }, { "epoch": 1.5333387875317026, "grad_norm": 0.11143457144498825, "learning_rate": 1.5687571901105586e-06, "loss": 0.0008, "step": 93710 }, { "epoch": 1.5335024134827784, "grad_norm": 0.05008303374052048, "learning_rate": 1.5677187183356912e-06, "loss": 0.0012, "step": 93720 }, { "epoch": 1.5336660394338542, "grad_norm": 0.04798712953925133, "learning_rate": 1.5666805264860096e-06, "loss": 0.0014, "step": 93730 }, { "epoch": 1.5338296653849302, "grad_norm": 0.019730838015675545, "learning_rate": 1.5656426146461812e-06, "loss": 0.0008, "step": 93740 }, { "epoch": 1.533993291336006, "grad_norm": 0.12769195437431335, "learning_rate": 1.564604982900858e-06, "loss": 0.0012, "step": 93750 }, { "epoch": 1.5341569172870817, "grad_norm": 0.09792885929346085, "learning_rate": 1.5635676313346627e-06, "loss": 0.0018, "step": 93760 }, { "epoch": 1.5343205432381577, "grad_norm": 0.055695660412311554, "learning_rate": 1.5625305600322016e-06, "loss": 0.0008, "step": 93770 }, { "epoch": 1.5344841691892332, "grad_norm": 0.06768687069416046, "learning_rate": 1.5614937690780506e-06, "loss": 0.0008, "step": 93780 }, { "epoch": 1.5346477951403092, "grad_norm": 0.08766273409128189, "learning_rate": 1.5604572585567707e-06, "loss": 0.0011, "step": 93790 }, { "epoch": 1.5348114210913852, "grad_norm": 0.042774517089128494, "learning_rate": 1.5594210285528932e-06, "loss": 0.0007, "step": 93800 }, { "epoch": 1.5349750470424608, "grad_norm": 0.05352846905589104, "learning_rate": 1.558385079150932e-06, "loss": 0.0016, "step": 93810 }, { "epoch": 1.5351386729935368, "grad_norm": 0.04533109441399574, "learning_rate": 1.5573494104353726e-06, "loss": 0.0013, "step": 93820 }, { "epoch": 1.5353022989446126, "grad_norm": 0.05043867230415344, "learning_rate": 1.5563140224906841e-06, "loss": 0.0011, "step": 93830 }, { "epoch": 1.5354659248956883, "grad_norm": 0.03465856984257698, "learning_rate": 1.555278915401306e-06, "loss": 0.001, "step": 93840 }, { "epoch": 1.5356295508467643, "grad_norm": 0.01542655285447836, "learning_rate": 1.5542440892516614e-06, "loss": 0.0008, "step": 93850 }, { "epoch": 1.53579317679784, "grad_norm": 0.052481234073638916, "learning_rate": 1.5532095441261437e-06, "loss": 0.0019, "step": 93860 }, { "epoch": 1.5359568027489159, "grad_norm": 0.014215247705578804, "learning_rate": 1.55217528010913e-06, "loss": 0.0007, "step": 93870 }, { "epoch": 1.5361204286999919, "grad_norm": 0.03126933053135872, "learning_rate": 1.5511412972849682e-06, "loss": 0.0006, "step": 93880 }, { "epoch": 1.5362840546510677, "grad_norm": 0.01727559231221676, "learning_rate": 1.5501075957379903e-06, "loss": 0.0018, "step": 93890 }, { "epoch": 1.5364476806021434, "grad_norm": 0.008257654495537281, "learning_rate": 1.5490741755524973e-06, "loss": 0.0008, "step": 93900 }, { "epoch": 1.5366113065532194, "grad_norm": 0.01606535166501999, "learning_rate": 1.5480410368127752e-06, "loss": 0.0012, "step": 93910 }, { "epoch": 1.5367749325042952, "grad_norm": 0.014916467480361462, "learning_rate": 1.54700817960308e-06, "loss": 0.0006, "step": 93920 }, { "epoch": 1.536938558455371, "grad_norm": 0.06592939049005508, "learning_rate": 1.5459756040076512e-06, "loss": 0.0104, "step": 93930 }, { "epoch": 1.537102184406447, "grad_norm": 0.019060326740145683, "learning_rate": 1.5449433101107002e-06, "loss": 0.0005, "step": 93940 }, { "epoch": 1.5372658103575227, "grad_norm": 0.0022325606551021338, "learning_rate": 1.5439112979964167e-06, "loss": 0.0009, "step": 93950 }, { "epoch": 1.5374294363085985, "grad_norm": 0.032140132039785385, "learning_rate": 1.5428795677489704e-06, "loss": 0.0006, "step": 93960 }, { "epoch": 1.5375930622596745, "grad_norm": 0.021835198625922203, "learning_rate": 1.5418481194525032e-06, "loss": 0.0007, "step": 93970 }, { "epoch": 1.53775668821075, "grad_norm": 0.02313242293894291, "learning_rate": 1.5408169531911388e-06, "loss": 0.0012, "step": 93980 }, { "epoch": 1.537920314161826, "grad_norm": 0.04488152638077736, "learning_rate": 1.539786069048973e-06, "loss": 0.0013, "step": 93990 }, { "epoch": 1.538083940112902, "grad_norm": 0.23308402299880981, "learning_rate": 1.5387554671100841e-06, "loss": 0.002, "step": 94000 }, { "epoch": 1.5382475660639776, "grad_norm": 0.11276835948228836, "learning_rate": 1.537725147458522e-06, "loss": 0.0014, "step": 94010 }, { "epoch": 1.5384111920150536, "grad_norm": 0.04927190765738487, "learning_rate": 1.536695110178319e-06, "loss": 0.002, "step": 94020 }, { "epoch": 1.5385748179661294, "grad_norm": 0.02559305541217327, "learning_rate": 1.5356653553534772e-06, "loss": 0.0017, "step": 94030 }, { "epoch": 1.5387384439172052, "grad_norm": 0.039904557168483734, "learning_rate": 1.5346358830679847e-06, "loss": 0.0011, "step": 94040 }, { "epoch": 1.5389020698682812, "grad_norm": 0.00763119338080287, "learning_rate": 1.5336066934057974e-06, "loss": 0.0008, "step": 94050 }, { "epoch": 1.539065695819357, "grad_norm": 0.09840935468673706, "learning_rate": 1.5325777864508562e-06, "loss": 0.002, "step": 94060 }, { "epoch": 1.5392293217704327, "grad_norm": 0.048158399760723114, "learning_rate": 1.531549162287072e-06, "loss": 0.0011, "step": 94070 }, { "epoch": 1.5393929477215087, "grad_norm": 0.03175771236419678, "learning_rate": 1.530520820998339e-06, "loss": 0.0012, "step": 94080 }, { "epoch": 1.5395565736725845, "grad_norm": 0.04326491802930832, "learning_rate": 1.529492762668522e-06, "loss": 0.0015, "step": 94090 }, { "epoch": 1.5397201996236602, "grad_norm": 0.05250867083668709, "learning_rate": 1.5284649873814693e-06, "loss": 0.0011, "step": 94100 }, { "epoch": 1.5398838255747362, "grad_norm": 0.06214847415685654, "learning_rate": 1.5274374952209997e-06, "loss": 0.0012, "step": 94110 }, { "epoch": 1.540047451525812, "grad_norm": 0.018423575907945633, "learning_rate": 1.5264102862709145e-06, "loss": 0.001, "step": 94120 }, { "epoch": 1.5402110774768878, "grad_norm": 0.04551853984594345, "learning_rate": 1.5253833606149864e-06, "loss": 0.0009, "step": 94130 }, { "epoch": 1.5403747034279638, "grad_norm": 0.1042928695678711, "learning_rate": 1.5243567183369718e-06, "loss": 0.0015, "step": 94140 }, { "epoch": 1.5405383293790396, "grad_norm": 0.03132132068276405, "learning_rate": 1.5233303595205962e-06, "loss": 0.0007, "step": 94150 }, { "epoch": 1.5407019553301153, "grad_norm": 0.04126354679465294, "learning_rate": 1.5223042842495695e-06, "loss": 0.0018, "step": 94160 }, { "epoch": 1.5408655812811913, "grad_norm": 0.06351091712713242, "learning_rate": 1.521278492607572e-06, "loss": 0.0009, "step": 94170 }, { "epoch": 1.5410292072322669, "grad_norm": 0.0328158438205719, "learning_rate": 1.520252984678266e-06, "loss": 0.0015, "step": 94180 }, { "epoch": 1.5411928331833429, "grad_norm": 0.04032239690423012, "learning_rate": 1.519227760545286e-06, "loss": 0.0016, "step": 94190 }, { "epoch": 1.5413564591344189, "grad_norm": 0.03198780491948128, "learning_rate": 1.5182028202922494e-06, "loss": 0.0008, "step": 94200 }, { "epoch": 1.5415200850854944, "grad_norm": 0.04413412883877754, "learning_rate": 1.517178164002742e-06, "loss": 0.0009, "step": 94210 }, { "epoch": 1.5416837110365704, "grad_norm": 0.01389741525053978, "learning_rate": 1.5161537917603363e-06, "loss": 0.0009, "step": 94220 }, { "epoch": 1.5418473369876462, "grad_norm": 0.01119904313236475, "learning_rate": 1.5151297036485729e-06, "loss": 0.0008, "step": 94230 }, { "epoch": 1.542010962938722, "grad_norm": 0.0933723971247673, "learning_rate": 1.5141058997509755e-06, "loss": 0.001, "step": 94240 }, { "epoch": 1.542174588889798, "grad_norm": 0.03460763022303581, "learning_rate": 1.5130823801510397e-06, "loss": 0.0007, "step": 94250 }, { "epoch": 1.5423382148408737, "grad_norm": 0.05208886042237282, "learning_rate": 1.5120591449322425e-06, "loss": 0.0013, "step": 94260 }, { "epoch": 1.5425018407919495, "grad_norm": 0.03544263169169426, "learning_rate": 1.511036194178035e-06, "loss": 0.0009, "step": 94270 }, { "epoch": 1.5426654667430255, "grad_norm": 0.04742789641022682, "learning_rate": 1.510013527971843e-06, "loss": 0.0012, "step": 94280 }, { "epoch": 1.5428290926941013, "grad_norm": 0.02192148193717003, "learning_rate": 1.5089911463970752e-06, "loss": 0.001, "step": 94290 }, { "epoch": 1.542992718645177, "grad_norm": 0.03941531851887703, "learning_rate": 1.5079690495371109e-06, "loss": 0.0011, "step": 94300 }, { "epoch": 1.543156344596253, "grad_norm": 0.037867944687604904, "learning_rate": 1.5069472374753113e-06, "loss": 0.0008, "step": 94310 }, { "epoch": 1.5433199705473288, "grad_norm": 0.09288782626390457, "learning_rate": 1.5059257102950092e-06, "loss": 0.0009, "step": 94320 }, { "epoch": 1.5434835964984046, "grad_norm": 0.08688194304704666, "learning_rate": 1.5049044680795193e-06, "loss": 0.0011, "step": 94330 }, { "epoch": 1.5436472224494806, "grad_norm": 0.022290518507361412, "learning_rate": 1.5038835109121285e-06, "loss": 0.001, "step": 94340 }, { "epoch": 1.5438108484005564, "grad_norm": 0.04387383908033371, "learning_rate": 1.5028628388761058e-06, "loss": 0.0007, "step": 94350 }, { "epoch": 1.5439744743516322, "grad_norm": 0.0038051707670092583, "learning_rate": 1.5018424520546893e-06, "loss": 0.0009, "step": 94360 }, { "epoch": 1.5441381003027081, "grad_norm": 0.011049297638237476, "learning_rate": 1.5008223505311026e-06, "loss": 0.0012, "step": 94370 }, { "epoch": 1.5443017262537837, "grad_norm": 0.057600632309913635, "learning_rate": 1.4998025343885386e-06, "loss": 0.0013, "step": 94380 }, { "epoch": 1.5444653522048597, "grad_norm": 0.0033113150857388973, "learning_rate": 1.498783003710172e-06, "loss": 0.0006, "step": 94390 }, { "epoch": 1.5446289781559357, "grad_norm": 0.05279870331287384, "learning_rate": 1.4977637585791504e-06, "loss": 0.0013, "step": 94400 }, { "epoch": 1.5447926041070112, "grad_norm": 0.036784250289201736, "learning_rate": 1.496744799078602e-06, "loss": 0.002, "step": 94410 }, { "epoch": 1.5449562300580872, "grad_norm": 0.03191140294075012, "learning_rate": 1.4957261252916277e-06, "loss": 0.0006, "step": 94420 }, { "epoch": 1.545119856009163, "grad_norm": 0.055791281163692474, "learning_rate": 1.4947077373013096e-06, "loss": 0.0015, "step": 94430 }, { "epoch": 1.5452834819602388, "grad_norm": 0.00501165771856904, "learning_rate": 1.4936896351907003e-06, "loss": 0.0015, "step": 94440 }, { "epoch": 1.5454471079113148, "grad_norm": 0.027386831119656563, "learning_rate": 1.4926718190428369e-06, "loss": 0.0015, "step": 94450 }, { "epoch": 1.5456107338623906, "grad_norm": 0.03638581931591034, "learning_rate": 1.4916542889407254e-06, "loss": 0.0008, "step": 94460 }, { "epoch": 1.5457743598134663, "grad_norm": 0.14036647975444794, "learning_rate": 1.4906370449673551e-06, "loss": 0.0014, "step": 94470 }, { "epoch": 1.5459379857645423, "grad_norm": 0.08946513384580612, "learning_rate": 1.4896200872056859e-06, "loss": 0.0015, "step": 94480 }, { "epoch": 1.546101611715618, "grad_norm": 0.2316887229681015, "learning_rate": 1.4886034157386608e-06, "loss": 0.0014, "step": 94490 }, { "epoch": 1.5462652376666939, "grad_norm": 0.07644547522068024, "learning_rate": 1.4875870306491929e-06, "loss": 0.0015, "step": 94500 }, { "epoch": 1.5464288636177699, "grad_norm": 0.04918934404850006, "learning_rate": 1.4865709320201777e-06, "loss": 0.0015, "step": 94510 }, { "epoch": 1.5465924895688457, "grad_norm": 0.047421399503946304, "learning_rate": 1.485555119934482e-06, "loss": 0.0007, "step": 94520 }, { "epoch": 1.5467561155199214, "grad_norm": 0.038735903799533844, "learning_rate": 1.4845395944749553e-06, "loss": 0.0005, "step": 94530 }, { "epoch": 1.5469197414709974, "grad_norm": 0.05673230439424515, "learning_rate": 1.4835243557244188e-06, "loss": 0.0007, "step": 94540 }, { "epoch": 1.547083367422073, "grad_norm": 0.06433865427970886, "learning_rate": 1.4825094037656717e-06, "loss": 0.0009, "step": 94550 }, { "epoch": 1.547246993373149, "grad_norm": 0.052542008459568024, "learning_rate": 1.481494738681488e-06, "loss": 0.0011, "step": 94560 }, { "epoch": 1.547410619324225, "grad_norm": 0.018018361181020737, "learning_rate": 1.4804803605546246e-06, "loss": 0.0015, "step": 94570 }, { "epoch": 1.5475742452753005, "grad_norm": 0.025132155045866966, "learning_rate": 1.4794662694678069e-06, "loss": 0.001, "step": 94580 }, { "epoch": 1.5477378712263765, "grad_norm": 0.0924421176314354, "learning_rate": 1.4784524655037436e-06, "loss": 0.0009, "step": 94590 }, { "epoch": 1.5479014971774523, "grad_norm": 0.0738072469830513, "learning_rate": 1.4774389487451146e-06, "loss": 0.0008, "step": 94600 }, { "epoch": 1.548065123128528, "grad_norm": 0.08844118565320969, "learning_rate": 1.4764257192745818e-06, "loss": 0.0012, "step": 94610 }, { "epoch": 1.548228749079604, "grad_norm": 0.03137395903468132, "learning_rate": 1.4754127771747772e-06, "loss": 0.0011, "step": 94620 }, { "epoch": 1.5483923750306798, "grad_norm": 0.12428475171327591, "learning_rate": 1.4744001225283167e-06, "loss": 0.0026, "step": 94630 }, { "epoch": 1.5485560009817556, "grad_norm": 0.02913321554660797, "learning_rate": 1.473387755417785e-06, "loss": 0.0008, "step": 94640 }, { "epoch": 1.5487196269328316, "grad_norm": 0.030589599162340164, "learning_rate": 1.4723756759257514e-06, "loss": 0.0008, "step": 94650 }, { "epoch": 1.5488832528839074, "grad_norm": 0.09706137329339981, "learning_rate": 1.471363884134754e-06, "loss": 0.0012, "step": 94660 }, { "epoch": 1.5490468788349832, "grad_norm": 0.04359102621674538, "learning_rate": 1.4703523801273145e-06, "loss": 0.0004, "step": 94670 }, { "epoch": 1.5492105047860592, "grad_norm": 0.00965199340134859, "learning_rate": 1.4693411639859239e-06, "loss": 0.0007, "step": 94680 }, { "epoch": 1.549374130737135, "grad_norm": 0.011208704672753811, "learning_rate": 1.468330235793058e-06, "loss": 0.0011, "step": 94690 }, { "epoch": 1.5495377566882107, "grad_norm": 0.04968635365366936, "learning_rate": 1.4673195956311598e-06, "loss": 0.001, "step": 94700 }, { "epoch": 1.5497013826392867, "grad_norm": 0.03595474734902382, "learning_rate": 1.4663092435826587e-06, "loss": 0.0006, "step": 94710 }, { "epoch": 1.5498650085903625, "grad_norm": 0.047162409871816635, "learning_rate": 1.465299179729951e-06, "loss": 0.0007, "step": 94720 }, { "epoch": 1.5500286345414382, "grad_norm": 0.008742806501686573, "learning_rate": 1.4642894041554174e-06, "loss": 0.001, "step": 94730 }, { "epoch": 1.5501922604925142, "grad_norm": 0.04749373346567154, "learning_rate": 1.4632799169414109e-06, "loss": 0.0006, "step": 94740 }, { "epoch": 1.5503558864435898, "grad_norm": 0.05303960293531418, "learning_rate": 1.4622707181702594e-06, "loss": 0.001, "step": 94750 }, { "epoch": 1.5505195123946658, "grad_norm": 0.059382062405347824, "learning_rate": 1.4612618079242736e-06, "loss": 0.0007, "step": 94760 }, { "epoch": 1.5506831383457418, "grad_norm": 0.04280172288417816, "learning_rate": 1.4602531862857333e-06, "loss": 0.0007, "step": 94770 }, { "epoch": 1.5508467642968173, "grad_norm": 0.01855129934847355, "learning_rate": 1.4592448533369013e-06, "loss": 0.0016, "step": 94780 }, { "epoch": 1.5510103902478933, "grad_norm": 0.061799775809049606, "learning_rate": 1.4582368091600107e-06, "loss": 0.001, "step": 94790 }, { "epoch": 1.551174016198969, "grad_norm": 0.08428354561328888, "learning_rate": 1.457229053837278e-06, "loss": 0.0015, "step": 94800 }, { "epoch": 1.5513376421500449, "grad_norm": 0.2039078027009964, "learning_rate": 1.4562215874508883e-06, "loss": 0.001, "step": 94810 }, { "epoch": 1.5515012681011209, "grad_norm": 0.04823237657546997, "learning_rate": 1.45521441008301e-06, "loss": 0.0008, "step": 94820 }, { "epoch": 1.5516648940521967, "grad_norm": 0.03977847471833229, "learning_rate": 1.454207521815783e-06, "loss": 0.0011, "step": 94830 }, { "epoch": 1.5518285200032724, "grad_norm": 0.003271718043833971, "learning_rate": 1.4532009227313281e-06, "loss": 0.001, "step": 94840 }, { "epoch": 1.5519921459543484, "grad_norm": 0.04756302013993263, "learning_rate": 1.452194612911737e-06, "loss": 0.0012, "step": 94850 }, { "epoch": 1.5521557719054242, "grad_norm": 0.03337939456105232, "learning_rate": 1.4511885924390844e-06, "loss": 0.0009, "step": 94860 }, { "epoch": 1.5523193978565, "grad_norm": 0.0635802373290062, "learning_rate": 1.4501828613954143e-06, "loss": 0.0015, "step": 94870 }, { "epoch": 1.552483023807576, "grad_norm": 0.03897978365421295, "learning_rate": 1.4491774198627545e-06, "loss": 0.0013, "step": 94880 }, { "epoch": 1.5526466497586517, "grad_norm": 0.06139339134097099, "learning_rate": 1.4481722679231014e-06, "loss": 0.0007, "step": 94890 }, { "epoch": 1.5528102757097275, "grad_norm": 0.05021706968545914, "learning_rate": 1.4471674056584357e-06, "loss": 0.0019, "step": 94900 }, { "epoch": 1.5529739016608035, "grad_norm": 0.03962188959121704, "learning_rate": 1.4461628331507066e-06, "loss": 0.0014, "step": 94910 }, { "epoch": 1.5531375276118793, "grad_norm": 0.048101626336574554, "learning_rate": 1.4451585504818478e-06, "loss": 0.0011, "step": 94920 }, { "epoch": 1.553301153562955, "grad_norm": 0.0283831637352705, "learning_rate": 1.4441545577337606e-06, "loss": 0.0007, "step": 94930 }, { "epoch": 1.553464779514031, "grad_norm": 0.07085319608449936, "learning_rate": 1.4431508549883316e-06, "loss": 0.0005, "step": 94940 }, { "epoch": 1.5536284054651066, "grad_norm": 0.01940939761698246, "learning_rate": 1.4421474423274162e-06, "loss": 0.0011, "step": 94950 }, { "epoch": 1.5537920314161826, "grad_norm": 0.04680795595049858, "learning_rate": 1.4411443198328517e-06, "loss": 0.0007, "step": 94960 }, { "epoch": 1.5539556573672586, "grad_norm": 0.12658245861530304, "learning_rate": 1.4401414875864467e-06, "loss": 0.0009, "step": 94970 }, { "epoch": 1.5541192833183342, "grad_norm": 0.011278039775788784, "learning_rate": 1.4391389456699923e-06, "loss": 0.0008, "step": 94980 }, { "epoch": 1.5542829092694102, "grad_norm": 0.036382418125867844, "learning_rate": 1.438136694165248e-06, "loss": 0.0019, "step": 94990 }, { "epoch": 1.554446535220486, "grad_norm": 0.12231285870075226, "learning_rate": 1.4371347331539587e-06, "loss": 0.0008, "step": 95000 }, { "epoch": 1.5546101611715617, "grad_norm": 0.01714707911014557, "learning_rate": 1.4361330627178367e-06, "loss": 0.0011, "step": 95010 }, { "epoch": 1.5547737871226377, "grad_norm": 0.061439938843250275, "learning_rate": 1.4351316829385785e-06, "loss": 0.001, "step": 95020 }, { "epoch": 1.5549374130737135, "grad_norm": 0.04495241865515709, "learning_rate": 1.4341305938978501e-06, "loss": 0.0013, "step": 95030 }, { "epoch": 1.5551010390247892, "grad_norm": 0.05146379396319389, "learning_rate": 1.4331297956772995e-06, "loss": 0.0007, "step": 95040 }, { "epoch": 1.5552646649758652, "grad_norm": 0.020773475989699364, "learning_rate": 1.4321292883585475e-06, "loss": 0.0011, "step": 95050 }, { "epoch": 1.555428290926941, "grad_norm": 0.01674269698560238, "learning_rate": 1.4311290720231902e-06, "loss": 0.0008, "step": 95060 }, { "epoch": 1.5555919168780168, "grad_norm": 0.029754532501101494, "learning_rate": 1.430129146752805e-06, "loss": 0.0006, "step": 95070 }, { "epoch": 1.5557555428290928, "grad_norm": 0.18505147099494934, "learning_rate": 1.429129512628939e-06, "loss": 0.0022, "step": 95080 }, { "epoch": 1.5559191687801686, "grad_norm": 0.0631747916340828, "learning_rate": 1.4281301697331223e-06, "loss": 0.0008, "step": 95090 }, { "epoch": 1.5560827947312443, "grad_norm": 0.04979654774069786, "learning_rate": 1.4271311181468555e-06, "loss": 0.001, "step": 95100 }, { "epoch": 1.5562464206823203, "grad_norm": 0.06759896874427795, "learning_rate": 1.42613235795162e-06, "loss": 0.001, "step": 95110 }, { "epoch": 1.556410046633396, "grad_norm": 0.045523688197135925, "learning_rate": 1.4251338892288684e-06, "loss": 0.0012, "step": 95120 }, { "epoch": 1.5565736725844719, "grad_norm": 0.0628371313214302, "learning_rate": 1.4241357120600358e-06, "loss": 0.0007, "step": 95130 }, { "epoch": 1.5567372985355479, "grad_norm": 0.0038243625313043594, "learning_rate": 1.423137826526526e-06, "loss": 0.0003, "step": 95140 }, { "epoch": 1.5569009244866234, "grad_norm": 0.0647701621055603, "learning_rate": 1.4221402327097284e-06, "loss": 0.0007, "step": 95150 }, { "epoch": 1.5570645504376994, "grad_norm": 0.05577058717608452, "learning_rate": 1.421142930690998e-06, "loss": 0.001, "step": 95160 }, { "epoch": 1.5572281763887754, "grad_norm": 0.04225318878889084, "learning_rate": 1.4201459205516754e-06, "loss": 0.0012, "step": 95170 }, { "epoch": 1.557391802339851, "grad_norm": 0.010365060530602932, "learning_rate": 1.4191492023730706e-06, "loss": 0.0009, "step": 95180 }, { "epoch": 1.557555428290927, "grad_norm": 0.06492631137371063, "learning_rate": 1.4181527762364755e-06, "loss": 0.0007, "step": 95190 }, { "epoch": 1.5577190542420027, "grad_norm": 0.10063893347978592, "learning_rate": 1.4171566422231515e-06, "loss": 0.0007, "step": 95200 }, { "epoch": 1.5578826801930785, "grad_norm": 0.06483485549688339, "learning_rate": 1.4161608004143435e-06, "loss": 0.0012, "step": 95210 }, { "epoch": 1.5580463061441545, "grad_norm": 0.06489508599042892, "learning_rate": 1.415165250891266e-06, "loss": 0.0011, "step": 95220 }, { "epoch": 1.5582099320952303, "grad_norm": 0.041939400136470795, "learning_rate": 1.4141699937351156e-06, "loss": 0.0009, "step": 95230 }, { "epoch": 1.558373558046306, "grad_norm": 0.04188789799809456, "learning_rate": 1.4131750290270585e-06, "loss": 0.0008, "step": 95240 }, { "epoch": 1.558537183997382, "grad_norm": 0.01635364629328251, "learning_rate": 1.4121803568482445e-06, "loss": 0.0006, "step": 95250 }, { "epoch": 1.5587008099484578, "grad_norm": 0.09771008044481277, "learning_rate": 1.411185977279792e-06, "loss": 0.0009, "step": 95260 }, { "epoch": 1.5588644358995336, "grad_norm": 0.026777304708957672, "learning_rate": 1.4101918904028028e-06, "loss": 0.0013, "step": 95270 }, { "epoch": 1.5590280618506096, "grad_norm": 0.044010795652866364, "learning_rate": 1.4091980962983475e-06, "loss": 0.0007, "step": 95280 }, { "epoch": 1.5591916878016854, "grad_norm": 0.045072562992572784, "learning_rate": 1.4082045950474804e-06, "loss": 0.0007, "step": 95290 }, { "epoch": 1.5593553137527612, "grad_norm": 0.031395263969898224, "learning_rate": 1.4072113867312243e-06, "loss": 0.001, "step": 95300 }, { "epoch": 1.5595189397038371, "grad_norm": 0.04804563894867897, "learning_rate": 1.406218471430586e-06, "loss": 0.0009, "step": 95310 }, { "epoch": 1.559682565654913, "grad_norm": 0.040642041712999344, "learning_rate": 1.4052258492265398e-06, "loss": 0.0009, "step": 95320 }, { "epoch": 1.5598461916059887, "grad_norm": 0.06850877404212952, "learning_rate": 1.4042335202000445e-06, "loss": 0.0011, "step": 95330 }, { "epoch": 1.5600098175570647, "grad_norm": 0.05043303593993187, "learning_rate": 1.403241484432028e-06, "loss": 0.0014, "step": 95340 }, { "epoch": 1.5601734435081402, "grad_norm": 0.07418040186166763, "learning_rate": 1.402249742003401e-06, "loss": 0.0019, "step": 95350 }, { "epoch": 1.5603370694592162, "grad_norm": 0.1309330016374588, "learning_rate": 1.401258292995042e-06, "loss": 0.001, "step": 95360 }, { "epoch": 1.5605006954102922, "grad_norm": 0.004077328834682703, "learning_rate": 1.4002671374878146e-06, "loss": 0.0011, "step": 95370 }, { "epoch": 1.5606643213613678, "grad_norm": 0.055469583719968796, "learning_rate": 1.3992762755625515e-06, "loss": 0.0013, "step": 95380 }, { "epoch": 1.5608279473124438, "grad_norm": 0.020231179893016815, "learning_rate": 1.398285707300064e-06, "loss": 0.0003, "step": 95390 }, { "epoch": 1.5609915732635196, "grad_norm": 0.02965341880917549, "learning_rate": 1.397295432781141e-06, "loss": 0.0005, "step": 95400 }, { "epoch": 1.5611551992145953, "grad_norm": 0.007162534631788731, "learning_rate": 1.3963054520865439e-06, "loss": 0.0015, "step": 95410 }, { "epoch": 1.5613188251656713, "grad_norm": 0.14514701068401337, "learning_rate": 1.395315765297015e-06, "loss": 0.0011, "step": 95420 }, { "epoch": 1.561482451116747, "grad_norm": 0.09444594383239746, "learning_rate": 1.394326372493266e-06, "loss": 0.0012, "step": 95430 }, { "epoch": 1.5616460770678229, "grad_norm": 0.19214867055416107, "learning_rate": 1.3933372737559924e-06, "loss": 0.0013, "step": 95440 }, { "epoch": 1.5618097030188989, "grad_norm": 0.0075097642838954926, "learning_rate": 1.3923484691658583e-06, "loss": 0.0009, "step": 95450 }, { "epoch": 1.5619733289699747, "grad_norm": 0.011223035864531994, "learning_rate": 1.3913599588035098e-06, "loss": 0.0007, "step": 95460 }, { "epoch": 1.5621369549210504, "grad_norm": 0.019771186634898186, "learning_rate": 1.3903717427495645e-06, "loss": 0.0008, "step": 95470 }, { "epoch": 1.5623005808721264, "grad_norm": 0.06696370244026184, "learning_rate": 1.38938382108462e-06, "loss": 0.0008, "step": 95480 }, { "epoch": 1.5624642068232022, "grad_norm": 0.027209773659706116, "learning_rate": 1.3883961938892454e-06, "loss": 0.0013, "step": 95490 }, { "epoch": 1.562627832774278, "grad_norm": 0.016985386610031128, "learning_rate": 1.3874088612439911e-06, "loss": 0.0012, "step": 95500 }, { "epoch": 1.562791458725354, "grad_norm": 0.05902815982699394, "learning_rate": 1.386421823229377e-06, "loss": 0.0014, "step": 95510 }, { "epoch": 1.5629550846764295, "grad_norm": 0.0026475745253264904, "learning_rate": 1.3854350799259065e-06, "loss": 0.0007, "step": 95520 }, { "epoch": 1.5631187106275055, "grad_norm": 0.011088100261986256, "learning_rate": 1.3844486314140515e-06, "loss": 0.0029, "step": 95530 }, { "epoch": 1.5632823365785815, "grad_norm": 0.03482355177402496, "learning_rate": 1.3834624777742662e-06, "loss": 0.0011, "step": 95540 }, { "epoch": 1.563445962529657, "grad_norm": 0.018787400797009468, "learning_rate": 1.3824766190869753e-06, "loss": 0.0006, "step": 95550 }, { "epoch": 1.563609588480733, "grad_norm": 0.040789831429719925, "learning_rate": 1.381491055432585e-06, "loss": 0.001, "step": 95560 }, { "epoch": 1.5637732144318088, "grad_norm": 0.014312353916466236, "learning_rate": 1.3805057868914712e-06, "loss": 0.0005, "step": 95570 }, { "epoch": 1.5639368403828846, "grad_norm": 0.03847702592611313, "learning_rate": 1.379520813543993e-06, "loss": 0.0006, "step": 95580 }, { "epoch": 1.5641004663339606, "grad_norm": 0.0028449888341128826, "learning_rate": 1.3785361354704774e-06, "loss": 0.0008, "step": 95590 }, { "epoch": 1.5642640922850364, "grad_norm": 0.018051045015454292, "learning_rate": 1.377551752751235e-06, "loss": 0.0015, "step": 95600 }, { "epoch": 1.5644277182361122, "grad_norm": 0.02913535386323929, "learning_rate": 1.376567665466545e-06, "loss": 0.0005, "step": 95610 }, { "epoch": 1.5645913441871881, "grad_norm": 0.043097734451293945, "learning_rate": 1.3755838736966703e-06, "loss": 0.0008, "step": 95620 }, { "epoch": 1.564754970138264, "grad_norm": 0.011552899144589901, "learning_rate": 1.3746003775218415e-06, "loss": 0.001, "step": 95630 }, { "epoch": 1.5649185960893397, "grad_norm": 0.0521833561360836, "learning_rate": 1.3736171770222734e-06, "loss": 0.0009, "step": 95640 }, { "epoch": 1.5650822220404157, "grad_norm": 0.04688671603798866, "learning_rate": 1.3726342722781483e-06, "loss": 0.0013, "step": 95650 }, { "epoch": 1.5652458479914915, "grad_norm": 0.012001353316009045, "learning_rate": 1.3716516633696319e-06, "loss": 0.0013, "step": 95660 }, { "epoch": 1.5654094739425672, "grad_norm": 0.04682119935750961, "learning_rate": 1.3706693503768603e-06, "loss": 0.0009, "step": 95670 }, { "epoch": 1.5655730998936432, "grad_norm": 0.007234207820147276, "learning_rate": 1.3696873333799499e-06, "loss": 0.0011, "step": 95680 }, { "epoch": 1.565736725844719, "grad_norm": 0.05061132460832596, "learning_rate": 1.3687056124589887e-06, "loss": 0.0017, "step": 95690 }, { "epoch": 1.5659003517957948, "grad_norm": 0.08397451788187027, "learning_rate": 1.3677241876940417e-06, "loss": 0.001, "step": 95700 }, { "epoch": 1.5660639777468708, "grad_norm": 0.07349889725446701, "learning_rate": 1.3667430591651532e-06, "loss": 0.0009, "step": 95710 }, { "epoch": 1.5662276036979463, "grad_norm": 0.022956345230340958, "learning_rate": 1.3657622269523385e-06, "loss": 0.0014, "step": 95720 }, { "epoch": 1.5663912296490223, "grad_norm": 0.0800904855132103, "learning_rate": 1.3647816911355926e-06, "loss": 0.0014, "step": 95730 }, { "epoch": 1.5665548556000983, "grad_norm": 0.044382184743881226, "learning_rate": 1.3638014517948829e-06, "loss": 0.0007, "step": 95740 }, { "epoch": 1.5667184815511739, "grad_norm": 0.03818973898887634, "learning_rate": 1.3628215090101564e-06, "loss": 0.0009, "step": 95750 }, { "epoch": 1.5668821075022499, "grad_norm": 0.044001106172800064, "learning_rate": 1.3618418628613318e-06, "loss": 0.0008, "step": 95760 }, { "epoch": 1.5670457334533257, "grad_norm": 0.005132677964866161, "learning_rate": 1.3608625134283082e-06, "loss": 0.0009, "step": 95770 }, { "epoch": 1.5672093594044014, "grad_norm": 0.07564137130975723, "learning_rate": 1.359883460790955e-06, "loss": 0.0016, "step": 95780 }, { "epoch": 1.5673729853554774, "grad_norm": 0.055294036865234375, "learning_rate": 1.3589047050291238e-06, "loss": 0.0013, "step": 95790 }, { "epoch": 1.5675366113065532, "grad_norm": 0.022641586139798164, "learning_rate": 1.357926246222635e-06, "loss": 0.0007, "step": 95800 }, { "epoch": 1.567700237257629, "grad_norm": 0.008204330690205097, "learning_rate": 1.3569480844512918e-06, "loss": 0.0011, "step": 95810 }, { "epoch": 1.567863863208705, "grad_norm": 0.08772075921297073, "learning_rate": 1.355970219794866e-06, "loss": 0.001, "step": 95820 }, { "epoch": 1.5680274891597807, "grad_norm": 0.005941058974713087, "learning_rate": 1.354992652333113e-06, "loss": 0.0013, "step": 95830 }, { "epoch": 1.5681911151108565, "grad_norm": 0.044869646430015564, "learning_rate": 1.3540153821457563e-06, "loss": 0.0009, "step": 95840 }, { "epoch": 1.5683547410619325, "grad_norm": 0.03574323654174805, "learning_rate": 1.3530384093125021e-06, "loss": 0.0022, "step": 95850 }, { "epoch": 1.5685183670130083, "grad_norm": 0.04942172020673752, "learning_rate": 1.3520617339130254e-06, "loss": 0.0015, "step": 95860 }, { "epoch": 1.568681992964084, "grad_norm": 0.05153922364115715, "learning_rate": 1.3510853560269838e-06, "loss": 0.0008, "step": 95870 }, { "epoch": 1.56884561891516, "grad_norm": 0.0160396546125412, "learning_rate": 1.3501092757340045e-06, "loss": 0.0012, "step": 95880 }, { "epoch": 1.5690092448662358, "grad_norm": 0.035457707941532135, "learning_rate": 1.3491334931136962e-06, "loss": 0.0005, "step": 95890 }, { "epoch": 1.5691728708173116, "grad_norm": 0.09537525475025177, "learning_rate": 1.3481580082456374e-06, "loss": 0.0012, "step": 95900 }, { "epoch": 1.5693364967683876, "grad_norm": 0.048981811851263046, "learning_rate": 1.3471828212093885e-06, "loss": 0.0011, "step": 95910 }, { "epoch": 1.5695001227194632, "grad_norm": 0.07334478199481964, "learning_rate": 1.346207932084479e-06, "loss": 0.0008, "step": 95920 }, { "epoch": 1.5696637486705391, "grad_norm": 0.051195401698350906, "learning_rate": 1.3452333409504204e-06, "loss": 0.0014, "step": 95930 }, { "epoch": 1.5698273746216151, "grad_norm": 0.025683952495455742, "learning_rate": 1.3442590478866952e-06, "loss": 0.0012, "step": 95940 }, { "epoch": 1.5699910005726907, "grad_norm": 0.008572527207434177, "learning_rate": 1.3432850529727647e-06, "loss": 0.0006, "step": 95950 }, { "epoch": 1.5701546265237667, "grad_norm": 0.10819711536169052, "learning_rate": 1.3423113562880636e-06, "loss": 0.0013, "step": 95960 }, { "epoch": 1.5703182524748425, "grad_norm": 0.0014823760138824582, "learning_rate": 1.3413379579120045e-06, "loss": 0.0016, "step": 95970 }, { "epoch": 1.5704818784259182, "grad_norm": 0.042564619332551956, "learning_rate": 1.3403648579239726e-06, "loss": 0.001, "step": 95980 }, { "epoch": 1.5706455043769942, "grad_norm": 0.04699201136827469, "learning_rate": 1.339392056403333e-06, "loss": 0.0004, "step": 95990 }, { "epoch": 1.57080913032807, "grad_norm": 0.07357124239206314, "learning_rate": 1.3384195534294214e-06, "loss": 0.0009, "step": 96000 }, { "epoch": 1.5709727562791458, "grad_norm": 0.0437801368534565, "learning_rate": 1.3374473490815549e-06, "loss": 0.0017, "step": 96010 }, { "epoch": 1.5711363822302218, "grad_norm": 0.07364551723003387, "learning_rate": 1.3364754434390214e-06, "loss": 0.0016, "step": 96020 }, { "epoch": 1.5713000081812976, "grad_norm": 0.015064758248627186, "learning_rate": 1.3355038365810846e-06, "loss": 0.0022, "step": 96030 }, { "epoch": 1.5714636341323733, "grad_norm": 0.007456564344465733, "learning_rate": 1.3345325285869893e-06, "loss": 0.0011, "step": 96040 }, { "epoch": 1.5716272600834493, "grad_norm": 0.03210440278053284, "learning_rate": 1.3335615195359481e-06, "loss": 0.0007, "step": 96050 }, { "epoch": 1.571790886034525, "grad_norm": 0.029707500711083412, "learning_rate": 1.3325908095071565e-06, "loss": 0.0005, "step": 96060 }, { "epoch": 1.5719545119856009, "grad_norm": 0.008204796351492405, "learning_rate": 1.3316203985797794e-06, "loss": 0.001, "step": 96070 }, { "epoch": 1.5721181379366769, "grad_norm": 0.1412242203950882, "learning_rate": 1.3306502868329634e-06, "loss": 0.0021, "step": 96080 }, { "epoch": 1.5722817638877526, "grad_norm": 0.04805987328290939, "learning_rate": 1.3296804743458241e-06, "loss": 0.0009, "step": 96090 }, { "epoch": 1.5724453898388284, "grad_norm": 0.0050872149877250195, "learning_rate": 1.3287109611974592e-06, "loss": 0.0003, "step": 96100 }, { "epoch": 1.5726090157899044, "grad_norm": 0.0342746302485466, "learning_rate": 1.3277417474669364e-06, "loss": 0.001, "step": 96110 }, { "epoch": 1.57277264174098, "grad_norm": 0.05099215731024742, "learning_rate": 1.3267728332333035e-06, "loss": 0.0005, "step": 96120 }, { "epoch": 1.572936267692056, "grad_norm": 0.07273607701063156, "learning_rate": 1.3258042185755798e-06, "loss": 0.0015, "step": 96130 }, { "epoch": 1.573099893643132, "grad_norm": 0.04864228516817093, "learning_rate": 1.3248359035727649e-06, "loss": 0.001, "step": 96140 }, { "epoch": 1.5732635195942075, "grad_norm": 0.00710119167342782, "learning_rate": 1.323867888303828e-06, "loss": 0.0005, "step": 96150 }, { "epoch": 1.5734271455452835, "grad_norm": 0.051200784742832184, "learning_rate": 1.3229001728477204e-06, "loss": 0.0017, "step": 96160 }, { "epoch": 1.5735907714963593, "grad_norm": 0.04332570731639862, "learning_rate": 1.321932757283363e-06, "loss": 0.0009, "step": 96170 }, { "epoch": 1.573754397447435, "grad_norm": 0.10078773647546768, "learning_rate": 1.320965641689657e-06, "loss": 0.0012, "step": 96180 }, { "epoch": 1.573918023398511, "grad_norm": 0.01057474035769701, "learning_rate": 1.319998826145475e-06, "loss": 0.001, "step": 96190 }, { "epoch": 1.5740816493495868, "grad_norm": 0.052918098866939545, "learning_rate": 1.3190323107296692e-06, "loss": 0.0019, "step": 96200 }, { "epoch": 1.5742452753006626, "grad_norm": 0.0329354852437973, "learning_rate": 1.3180660955210638e-06, "loss": 0.0011, "step": 96210 }, { "epoch": 1.5744089012517386, "grad_norm": 0.02556995116174221, "learning_rate": 1.3171001805984613e-06, "loss": 0.0011, "step": 96220 }, { "epoch": 1.5745725272028144, "grad_norm": 0.03863323852419853, "learning_rate": 1.3161345660406366e-06, "loss": 0.0011, "step": 96230 }, { "epoch": 1.5747361531538902, "grad_norm": 0.032791465520858765, "learning_rate": 1.3151692519263447e-06, "loss": 0.0012, "step": 96240 }, { "epoch": 1.5748997791049661, "grad_norm": 0.03509007394313812, "learning_rate": 1.3142042383343101e-06, "loss": 0.0008, "step": 96250 }, { "epoch": 1.575063405056042, "grad_norm": 0.014239206910133362, "learning_rate": 1.3132395253432395e-06, "loss": 0.0012, "step": 96260 }, { "epoch": 1.5752270310071177, "grad_norm": 0.051493994891643524, "learning_rate": 1.312275113031808e-06, "loss": 0.0008, "step": 96270 }, { "epoch": 1.5753906569581937, "grad_norm": 0.03945068269968033, "learning_rate": 1.3113110014786728e-06, "loss": 0.0011, "step": 96280 }, { "epoch": 1.5755542829092692, "grad_norm": 0.0906679630279541, "learning_rate": 1.310347190762461e-06, "loss": 0.0015, "step": 96290 }, { "epoch": 1.5757179088603452, "grad_norm": 0.05404764041304588, "learning_rate": 1.3093836809617811e-06, "loss": 0.0013, "step": 96300 }, { "epoch": 1.5758815348114212, "grad_norm": 0.026969052851200104, "learning_rate": 1.3084204721552097e-06, "loss": 0.0009, "step": 96310 }, { "epoch": 1.5760451607624968, "grad_norm": 0.03980583697557449, "learning_rate": 1.3074575644213061e-06, "loss": 0.0027, "step": 96320 }, { "epoch": 1.5762087867135728, "grad_norm": 0.035800158977508545, "learning_rate": 1.3064949578385988e-06, "loss": 0.0004, "step": 96330 }, { "epoch": 1.5763724126646486, "grad_norm": 0.034289948642253876, "learning_rate": 1.3055326524855982e-06, "loss": 0.0008, "step": 96340 }, { "epoch": 1.5765360386157243, "grad_norm": 0.025562861934304237, "learning_rate": 1.3045706484407843e-06, "loss": 0.001, "step": 96350 }, { "epoch": 1.5766996645668003, "grad_norm": 0.030591795220971107, "learning_rate": 1.3036089457826144e-06, "loss": 0.0008, "step": 96360 }, { "epoch": 1.576863290517876, "grad_norm": 0.013254762627184391, "learning_rate": 1.3026475445895243e-06, "loss": 0.0017, "step": 96370 }, { "epoch": 1.5770269164689519, "grad_norm": 0.06676498055458069, "learning_rate": 1.3016864449399191e-06, "loss": 0.0018, "step": 96380 }, { "epoch": 1.5771905424200279, "grad_norm": 0.04152214527130127, "learning_rate": 1.300725646912186e-06, "loss": 0.0006, "step": 96390 }, { "epoch": 1.5773541683711036, "grad_norm": 0.06061022728681564, "learning_rate": 1.299765150584682e-06, "loss": 0.0013, "step": 96400 }, { "epoch": 1.5775177943221794, "grad_norm": 0.08126376569271088, "learning_rate": 1.2988049560357447e-06, "loss": 0.0012, "step": 96410 }, { "epoch": 1.5776814202732554, "grad_norm": 0.07870087027549744, "learning_rate": 1.2978450633436806e-06, "loss": 0.0015, "step": 96420 }, { "epoch": 1.5778450462243312, "grad_norm": 0.02038705348968506, "learning_rate": 1.2968854725867792e-06, "loss": 0.0036, "step": 96430 }, { "epoch": 1.578008672175407, "grad_norm": 0.035232771188020706, "learning_rate": 1.295926183843298e-06, "loss": 0.001, "step": 96440 }, { "epoch": 1.578172298126483, "grad_norm": 0.005245399195700884, "learning_rate": 1.2949671971914762e-06, "loss": 0.0006, "step": 96450 }, { "epoch": 1.5783359240775587, "grad_norm": 0.09929900616407394, "learning_rate": 1.294008512709523e-06, "loss": 0.001, "step": 96460 }, { "epoch": 1.5784995500286345, "grad_norm": 0.02546124905347824, "learning_rate": 1.2930501304756277e-06, "loss": 0.0014, "step": 96470 }, { "epoch": 1.5786631759797105, "grad_norm": 0.045356739312410355, "learning_rate": 1.2920920505679507e-06, "loss": 0.0009, "step": 96480 }, { "epoch": 1.578826801930786, "grad_norm": 0.08393066376447678, "learning_rate": 1.291134273064632e-06, "loss": 0.0015, "step": 96490 }, { "epoch": 1.578990427881862, "grad_norm": 0.03120879828929901, "learning_rate": 1.2901767980437813e-06, "loss": 0.0006, "step": 96500 }, { "epoch": 1.579154053832938, "grad_norm": 0.03244754672050476, "learning_rate": 1.289219625583491e-06, "loss": 0.0008, "step": 96510 }, { "epoch": 1.5793176797840136, "grad_norm": 0.03129557892680168, "learning_rate": 1.2882627557618215e-06, "loss": 0.001, "step": 96520 }, { "epoch": 1.5794813057350896, "grad_norm": 0.04987405985593796, "learning_rate": 1.2873061886568145e-06, "loss": 0.001, "step": 96530 }, { "epoch": 1.5796449316861654, "grad_norm": 0.04315370321273804, "learning_rate": 1.2863499243464817e-06, "loss": 0.0016, "step": 96540 }, { "epoch": 1.5798085576372412, "grad_norm": 0.05131954699754715, "learning_rate": 1.2853939629088152e-06, "loss": 0.0009, "step": 96550 }, { "epoch": 1.5799721835883171, "grad_norm": 0.049112603068351746, "learning_rate": 1.284438304421778e-06, "loss": 0.0005, "step": 96560 }, { "epoch": 1.580135809539393, "grad_norm": 0.1093360036611557, "learning_rate": 1.2834829489633126e-06, "loss": 0.0008, "step": 96570 }, { "epoch": 1.5802994354904687, "grad_norm": 0.0360153429210186, "learning_rate": 1.2825278966113314e-06, "loss": 0.0006, "step": 96580 }, { "epoch": 1.5804630614415447, "grad_norm": 0.05481434240937233, "learning_rate": 1.2815731474437288e-06, "loss": 0.0012, "step": 96590 }, { "epoch": 1.5806266873926205, "grad_norm": 0.07745036482810974, "learning_rate": 1.2806187015383676e-06, "loss": 0.0004, "step": 96600 }, { "epoch": 1.5807903133436962, "grad_norm": 0.10974860936403275, "learning_rate": 1.2796645589730922e-06, "loss": 0.001, "step": 96610 }, { "epoch": 1.5809539392947722, "grad_norm": 0.041746024042367935, "learning_rate": 1.2787107198257158e-06, "loss": 0.0008, "step": 96620 }, { "epoch": 1.581117565245848, "grad_norm": 0.034387823194265366, "learning_rate": 1.2777571841740343e-06, "loss": 0.0007, "step": 96630 }, { "epoch": 1.5812811911969238, "grad_norm": 0.06622235476970673, "learning_rate": 1.2768039520958108e-06, "loss": 0.0008, "step": 96640 }, { "epoch": 1.5814448171479998, "grad_norm": 0.008857461623847485, "learning_rate": 1.275851023668791e-06, "loss": 0.0007, "step": 96650 }, { "epoch": 1.5816084430990756, "grad_norm": 0.026604799553751945, "learning_rate": 1.2748983989706915e-06, "loss": 0.0007, "step": 96660 }, { "epoch": 1.5817720690501513, "grad_norm": 0.09414137899875641, "learning_rate": 1.273946078079203e-06, "loss": 0.001, "step": 96670 }, { "epoch": 1.5819356950012273, "grad_norm": 0.015447148121893406, "learning_rate": 1.2729940610719966e-06, "loss": 0.0015, "step": 96680 }, { "epoch": 1.5820993209523029, "grad_norm": 0.04328058287501335, "learning_rate": 1.2720423480267124e-06, "loss": 0.0022, "step": 96690 }, { "epoch": 1.5822629469033789, "grad_norm": 0.06319353729486465, "learning_rate": 1.271090939020973e-06, "loss": 0.001, "step": 96700 }, { "epoch": 1.5824265728544549, "grad_norm": 0.03200283646583557, "learning_rate": 1.270139834132368e-06, "loss": 0.0007, "step": 96710 }, { "epoch": 1.5825901988055304, "grad_norm": 0.06690200418233871, "learning_rate": 1.2691890334384687e-06, "loss": 0.0021, "step": 96720 }, { "epoch": 1.5827538247566064, "grad_norm": 0.09653203189373016, "learning_rate": 1.2682385370168182e-06, "loss": 0.0007, "step": 96730 }, { "epoch": 1.5829174507076822, "grad_norm": 0.12001989781856537, "learning_rate": 1.2672883449449368e-06, "loss": 0.0011, "step": 96740 }, { "epoch": 1.583081076658758, "grad_norm": 0.09170501679182053, "learning_rate": 1.2663384573003167e-06, "loss": 0.0005, "step": 96750 }, { "epoch": 1.583244702609834, "grad_norm": 0.017824146896600723, "learning_rate": 1.2653888741604309e-06, "loss": 0.0009, "step": 96760 }, { "epoch": 1.5834083285609097, "grad_norm": 0.07801946252584457, "learning_rate": 1.2644395956027206e-06, "loss": 0.0007, "step": 96770 }, { "epoch": 1.5835719545119855, "grad_norm": 0.05510590597987175, "learning_rate": 1.2634906217046088e-06, "loss": 0.0006, "step": 96780 }, { "epoch": 1.5837355804630615, "grad_norm": 0.004306759685277939, "learning_rate": 1.2625419525434878e-06, "loss": 0.0007, "step": 96790 }, { "epoch": 1.5838992064141373, "grad_norm": 0.04925316572189331, "learning_rate": 1.2615935881967312e-06, "loss": 0.0008, "step": 96800 }, { "epoch": 1.584062832365213, "grad_norm": 0.0014622610760852695, "learning_rate": 1.2606455287416803e-06, "loss": 0.0009, "step": 96810 }, { "epoch": 1.584226458316289, "grad_norm": 0.02716750092804432, "learning_rate": 1.2596977742556593e-06, "loss": 0.0011, "step": 96820 }, { "epoch": 1.5843900842673648, "grad_norm": 0.04361797496676445, "learning_rate": 1.258750324815961e-06, "loss": 0.0005, "step": 96830 }, { "epoch": 1.5845537102184406, "grad_norm": 0.025874778628349304, "learning_rate": 1.2578031804998586e-06, "loss": 0.0011, "step": 96840 }, { "epoch": 1.5847173361695166, "grad_norm": 0.014456517063081264, "learning_rate": 1.2568563413845952e-06, "loss": 0.0008, "step": 96850 }, { "epoch": 1.5848809621205924, "grad_norm": 0.04577498510479927, "learning_rate": 1.2559098075473946e-06, "loss": 0.0005, "step": 96860 }, { "epoch": 1.5850445880716681, "grad_norm": 0.08066362887620926, "learning_rate": 1.2549635790654508e-06, "loss": 0.001, "step": 96870 }, { "epoch": 1.5852082140227441, "grad_norm": 0.08461767435073853, "learning_rate": 1.2540176560159368e-06, "loss": 0.0008, "step": 96880 }, { "epoch": 1.5853718399738197, "grad_norm": 0.02184385247528553, "learning_rate": 1.2530720384759964e-06, "loss": 0.0009, "step": 96890 }, { "epoch": 1.5855354659248957, "grad_norm": 0.04110664501786232, "learning_rate": 1.2521267265227538e-06, "loss": 0.0006, "step": 96900 }, { "epoch": 1.5856990918759717, "grad_norm": 0.0821894183754921, "learning_rate": 1.2511817202333027e-06, "loss": 0.0008, "step": 96910 }, { "epoch": 1.5858627178270472, "grad_norm": 0.022542444989085197, "learning_rate": 1.2502370196847175e-06, "loss": 0.0005, "step": 96920 }, { "epoch": 1.5860263437781232, "grad_norm": 0.050582028925418854, "learning_rate": 1.2492926249540416e-06, "loss": 0.0014, "step": 96930 }, { "epoch": 1.586189969729199, "grad_norm": 0.05709616467356682, "learning_rate": 1.2483485361182994e-06, "loss": 0.0007, "step": 96940 }, { "epoch": 1.5863535956802748, "grad_norm": 0.06084844842553139, "learning_rate": 1.2474047532544852e-06, "loss": 0.0006, "step": 96950 }, { "epoch": 1.5865172216313508, "grad_norm": 0.03821864724159241, "learning_rate": 1.2464612764395734e-06, "loss": 0.0005, "step": 96960 }, { "epoch": 1.5866808475824266, "grad_norm": 0.02189842239022255, "learning_rate": 1.2455181057505077e-06, "loss": 0.0011, "step": 96970 }, { "epoch": 1.5868444735335023, "grad_norm": 0.0485713854432106, "learning_rate": 1.2445752412642126e-06, "loss": 0.0013, "step": 96980 }, { "epoch": 1.5870080994845783, "grad_norm": 0.19116142392158508, "learning_rate": 1.243632683057584e-06, "loss": 0.0035, "step": 96990 }, { "epoch": 1.587171725435654, "grad_norm": 0.07462039589881897, "learning_rate": 1.2426904312074923e-06, "loss": 0.0012, "step": 97000 }, { "epoch": 1.5873353513867299, "grad_norm": 0.062257200479507446, "learning_rate": 1.241748485790787e-06, "loss": 0.0007, "step": 97010 }, { "epoch": 1.5874989773378059, "grad_norm": 0.034018050879240036, "learning_rate": 1.2408068468842866e-06, "loss": 0.0011, "step": 97020 }, { "epoch": 1.5876626032888816, "grad_norm": 0.07112865895032883, "learning_rate": 1.239865514564792e-06, "loss": 0.001, "step": 97030 }, { "epoch": 1.5878262292399574, "grad_norm": 0.08996989578008652, "learning_rate": 1.2389244889090707e-06, "loss": 0.0008, "step": 97040 }, { "epoch": 1.5879898551910334, "grad_norm": 0.020867759361863136, "learning_rate": 1.237983769993874e-06, "loss": 0.001, "step": 97050 }, { "epoch": 1.5881534811421092, "grad_norm": 0.01002001203596592, "learning_rate": 1.2370433578959195e-06, "loss": 0.0012, "step": 97060 }, { "epoch": 1.588317107093185, "grad_norm": 0.0489850752055645, "learning_rate": 1.2361032526919076e-06, "loss": 0.001, "step": 97070 }, { "epoch": 1.588480733044261, "grad_norm": 0.009347106330096722, "learning_rate": 1.2351634544585072e-06, "loss": 0.0012, "step": 97080 }, { "epoch": 1.5886443589953365, "grad_norm": 0.04090132564306259, "learning_rate": 1.2342239632723679e-06, "loss": 0.0018, "step": 97090 }, { "epoch": 1.5888079849464125, "grad_norm": 0.03768495097756386, "learning_rate": 1.2332847792101087e-06, "loss": 0.0007, "step": 97100 }, { "epoch": 1.5889716108974885, "grad_norm": 0.1349332630634308, "learning_rate": 1.2323459023483286e-06, "loss": 0.002, "step": 97110 }, { "epoch": 1.589135236848564, "grad_norm": 0.06317277997732162, "learning_rate": 1.231407332763596e-06, "loss": 0.0007, "step": 97120 }, { "epoch": 1.58929886279964, "grad_norm": 0.10471262037754059, "learning_rate": 1.2304690705324618e-06, "loss": 0.001, "step": 97130 }, { "epoch": 1.5894624887507158, "grad_norm": 0.09951486438512802, "learning_rate": 1.2295311157314432e-06, "loss": 0.0014, "step": 97140 }, { "epoch": 1.5896261147017916, "grad_norm": 0.03760366514325142, "learning_rate": 1.2285934684370404e-06, "loss": 0.0006, "step": 97150 }, { "epoch": 1.5897897406528676, "grad_norm": 0.06821509450674057, "learning_rate": 1.2276561287257211e-06, "loss": 0.0008, "step": 97160 }, { "epoch": 1.5899533666039434, "grad_norm": 0.023200569674372673, "learning_rate": 1.226719096673935e-06, "loss": 0.0013, "step": 97170 }, { "epoch": 1.5901169925550191, "grad_norm": 0.022301355376839638, "learning_rate": 1.2257823723581003e-06, "loss": 0.001, "step": 97180 }, { "epoch": 1.5902806185060951, "grad_norm": 0.07437782734632492, "learning_rate": 1.2248459558546154e-06, "loss": 0.001, "step": 97190 }, { "epoch": 1.590444244457171, "grad_norm": 0.012176129035651684, "learning_rate": 1.2239098472398487e-06, "loss": 0.001, "step": 97200 }, { "epoch": 1.5906078704082467, "grad_norm": 0.04271703585982323, "learning_rate": 1.222974046590149e-06, "loss": 0.0008, "step": 97210 }, { "epoch": 1.5907714963593227, "grad_norm": 0.00456777960062027, "learning_rate": 1.2220385539818341e-06, "loss": 0.0006, "step": 97220 }, { "epoch": 1.5909351223103985, "grad_norm": 0.04272052273154259, "learning_rate": 1.2211033694912021e-06, "loss": 0.0044, "step": 97230 }, { "epoch": 1.5910987482614742, "grad_norm": 0.016012227162718773, "learning_rate": 1.2201684931945213e-06, "loss": 0.0007, "step": 97240 }, { "epoch": 1.5912623742125502, "grad_norm": 0.08335350453853607, "learning_rate": 1.2192339251680395e-06, "loss": 0.0007, "step": 97250 }, { "epoch": 1.5914260001636258, "grad_norm": 0.030794652178883553, "learning_rate": 1.2182996654879742e-06, "loss": 0.0009, "step": 97260 }, { "epoch": 1.5915896261147018, "grad_norm": 0.058969493955373764, "learning_rate": 1.2173657142305229e-06, "loss": 0.0018, "step": 97270 }, { "epoch": 1.5917532520657778, "grad_norm": 0.022355305030941963, "learning_rate": 1.2164320714718531e-06, "loss": 0.0005, "step": 97280 }, { "epoch": 1.5919168780168533, "grad_norm": 0.056606002151966095, "learning_rate": 1.2154987372881121e-06, "loss": 0.0005, "step": 97290 }, { "epoch": 1.5920805039679293, "grad_norm": 0.06301253288984299, "learning_rate": 1.2145657117554166e-06, "loss": 0.0009, "step": 97300 }, { "epoch": 1.592244129919005, "grad_norm": 0.05343060567975044, "learning_rate": 1.2136329949498638e-06, "loss": 0.0011, "step": 97310 }, { "epoch": 1.5924077558700809, "grad_norm": 0.004658139310777187, "learning_rate": 1.2127005869475216e-06, "loss": 0.0013, "step": 97320 }, { "epoch": 1.5925713818211569, "grad_norm": 0.027589093893766403, "learning_rate": 1.211768487824433e-06, "loss": 0.0008, "step": 97330 }, { "epoch": 1.5927350077722326, "grad_norm": 0.08682414144277573, "learning_rate": 1.2108366976566188e-06, "loss": 0.0016, "step": 97340 }, { "epoch": 1.5928986337233084, "grad_norm": 0.04159991815686226, "learning_rate": 1.209905216520071e-06, "loss": 0.0008, "step": 97350 }, { "epoch": 1.5930622596743844, "grad_norm": 0.002184477401897311, "learning_rate": 1.2089740444907593e-06, "loss": 0.0008, "step": 97360 }, { "epoch": 1.5932258856254602, "grad_norm": 0.008638736791908741, "learning_rate": 1.2080431816446253e-06, "loss": 0.0003, "step": 97370 }, { "epoch": 1.593389511576536, "grad_norm": 0.051601581275463104, "learning_rate": 1.2071126280575895e-06, "loss": 0.0007, "step": 97380 }, { "epoch": 1.593553137527612, "grad_norm": 0.11896104365587234, "learning_rate": 1.206182383805542e-06, "loss": 0.0007, "step": 97390 }, { "epoch": 1.5937167634786877, "grad_norm": 0.02824978157877922, "learning_rate": 1.2052524489643525e-06, "loss": 0.0011, "step": 97400 }, { "epoch": 1.5938803894297635, "grad_norm": 0.05780195817351341, "learning_rate": 1.2043228236098613e-06, "loss": 0.0012, "step": 97410 }, { "epoch": 1.5940440153808395, "grad_norm": 0.081435427069664, "learning_rate": 1.2033935078178876e-06, "loss": 0.0004, "step": 97420 }, { "epoch": 1.5942076413319153, "grad_norm": 0.08824429661035538, "learning_rate": 1.2024645016642216e-06, "loss": 0.001, "step": 97430 }, { "epoch": 1.594371267282991, "grad_norm": 0.013098395429551601, "learning_rate": 1.2015358052246313e-06, "loss": 0.0031, "step": 97440 }, { "epoch": 1.594534893234067, "grad_norm": 0.0372081995010376, "learning_rate": 1.2006074185748563e-06, "loss": 0.0006, "step": 97450 }, { "epoch": 1.5946985191851426, "grad_norm": 0.013786882162094116, "learning_rate": 1.1996793417906145e-06, "loss": 0.0009, "step": 97460 }, { "epoch": 1.5948621451362186, "grad_norm": 0.18417958915233612, "learning_rate": 1.1987515749475948e-06, "loss": 0.0018, "step": 97470 }, { "epoch": 1.5950257710872946, "grad_norm": 0.02820058912038803, "learning_rate": 1.197824118121465e-06, "loss": 0.0013, "step": 97480 }, { "epoch": 1.5951893970383701, "grad_norm": 0.12244687974452972, "learning_rate": 1.1968969713878626e-06, "loss": 0.0007, "step": 97490 }, { "epoch": 1.5953530229894461, "grad_norm": 0.027667613700032234, "learning_rate": 1.1959701348224052e-06, "loss": 0.0008, "step": 97500 }, { "epoch": 1.595516648940522, "grad_norm": 0.03789461776614189, "learning_rate": 1.1950436085006795e-06, "loss": 0.0011, "step": 97510 }, { "epoch": 1.5956802748915977, "grad_norm": 0.054429762065410614, "learning_rate": 1.1941173924982535e-06, "loss": 0.0009, "step": 97520 }, { "epoch": 1.5958439008426737, "grad_norm": 0.03577103465795517, "learning_rate": 1.1931914868906624e-06, "loss": 0.0022, "step": 97530 }, { "epoch": 1.5960075267937495, "grad_norm": 0.04397017881274223, "learning_rate": 1.1922658917534236e-06, "loss": 0.001, "step": 97540 }, { "epoch": 1.5961711527448252, "grad_norm": 0.04712901636958122, "learning_rate": 1.1913406071620215e-06, "loss": 0.0019, "step": 97550 }, { "epoch": 1.5963347786959012, "grad_norm": 0.07704362273216248, "learning_rate": 1.1904156331919232e-06, "loss": 0.0012, "step": 97560 }, { "epoch": 1.596498404646977, "grad_norm": 0.10225943475961685, "learning_rate": 1.1894909699185631e-06, "loss": 0.0007, "step": 97570 }, { "epoch": 1.5966620305980528, "grad_norm": 0.14104241132736206, "learning_rate": 1.1885666174173565e-06, "loss": 0.0011, "step": 97580 }, { "epoch": 1.5968256565491288, "grad_norm": 0.02985711768269539, "learning_rate": 1.1876425757636877e-06, "loss": 0.0013, "step": 97590 }, { "epoch": 1.5969892825002046, "grad_norm": 0.09078379720449448, "learning_rate": 1.1867188450329209e-06, "loss": 0.001, "step": 97600 }, { "epoch": 1.5971529084512803, "grad_norm": 0.05256466194987297, "learning_rate": 1.1857954253003894e-06, "loss": 0.0018, "step": 97610 }, { "epoch": 1.5973165344023563, "grad_norm": 0.06322584301233292, "learning_rate": 1.1848723166414077e-06, "loss": 0.0024, "step": 97620 }, { "epoch": 1.597480160353432, "grad_norm": 0.06846736371517181, "learning_rate": 1.18394951913126e-06, "loss": 0.0009, "step": 97630 }, { "epoch": 1.5976437863045079, "grad_norm": 0.04839754104614258, "learning_rate": 1.1830270328452042e-06, "loss": 0.0011, "step": 97640 }, { "epoch": 1.5978074122555839, "grad_norm": 0.06665943562984467, "learning_rate": 1.1821048578584787e-06, "loss": 0.0013, "step": 97650 }, { "epoch": 1.5979710382066594, "grad_norm": 0.0429532490670681, "learning_rate": 1.1811829942462904e-06, "loss": 0.0014, "step": 97660 }, { "epoch": 1.5981346641577354, "grad_norm": 0.03213972970843315, "learning_rate": 1.1802614420838254e-06, "loss": 0.0009, "step": 97670 }, { "epoch": 1.5982982901088114, "grad_norm": 0.03794856742024422, "learning_rate": 1.1793402014462395e-06, "loss": 0.0011, "step": 97680 }, { "epoch": 1.598461916059887, "grad_norm": 0.012777460739016533, "learning_rate": 1.1784192724086701e-06, "loss": 0.0011, "step": 97690 }, { "epoch": 1.598625542010963, "grad_norm": 0.006780116818845272, "learning_rate": 1.177498655046221e-06, "loss": 0.0007, "step": 97700 }, { "epoch": 1.5987891679620387, "grad_norm": 0.06594547629356384, "learning_rate": 1.1765783494339771e-06, "loss": 0.0016, "step": 97710 }, { "epoch": 1.5989527939131145, "grad_norm": 0.016113415360450745, "learning_rate": 1.1756583556469937e-06, "loss": 0.0004, "step": 97720 }, { "epoch": 1.5991164198641905, "grad_norm": 0.06291721016168594, "learning_rate": 1.174738673760305e-06, "loss": 0.0006, "step": 97730 }, { "epoch": 1.5992800458152663, "grad_norm": 0.02736041694879532, "learning_rate": 1.1738193038489133e-06, "loss": 0.0009, "step": 97740 }, { "epoch": 1.599443671766342, "grad_norm": 0.03677744418382645, "learning_rate": 1.172900245987803e-06, "loss": 0.0012, "step": 97750 }, { "epoch": 1.599607297717418, "grad_norm": 0.05457146838307381, "learning_rate": 1.1719815002519268e-06, "loss": 0.001, "step": 97760 }, { "epoch": 1.5997709236684938, "grad_norm": 0.15523658692836761, "learning_rate": 1.1710630667162165e-06, "loss": 0.0013, "step": 97770 }, { "epoch": 1.5999345496195696, "grad_norm": 0.03381544351577759, "learning_rate": 1.1701449454555736e-06, "loss": 0.0015, "step": 97780 }, { "epoch": 1.6000981755706456, "grad_norm": 0.13160747289657593, "learning_rate": 1.1692271365448803e-06, "loss": 0.0009, "step": 97790 }, { "epoch": 1.6002618015217214, "grad_norm": 0.02754007652401924, "learning_rate": 1.168309640058987e-06, "loss": 0.0017, "step": 97800 }, { "epoch": 1.6004254274727971, "grad_norm": 0.08474765717983246, "learning_rate": 1.1673924560727245e-06, "loss": 0.0012, "step": 97810 }, { "epoch": 1.6005890534238731, "grad_norm": 0.09033343195915222, "learning_rate": 1.1664755846608917e-06, "loss": 0.0009, "step": 97820 }, { "epoch": 1.600752679374949, "grad_norm": 0.040223345160484314, "learning_rate": 1.1655590258982691e-06, "loss": 0.0004, "step": 97830 }, { "epoch": 1.6009163053260247, "grad_norm": 0.06419704854488373, "learning_rate": 1.164642779859605e-06, "loss": 0.0011, "step": 97840 }, { "epoch": 1.6010799312771007, "grad_norm": 0.001630942104384303, "learning_rate": 1.1637268466196283e-06, "loss": 0.0011, "step": 97850 }, { "epoch": 1.6012435572281762, "grad_norm": 0.03925986588001251, "learning_rate": 1.1628112262530367e-06, "loss": 0.0012, "step": 97860 }, { "epoch": 1.6014071831792522, "grad_norm": 0.04779164493083954, "learning_rate": 1.1618959188345069e-06, "loss": 0.001, "step": 97870 }, { "epoch": 1.6015708091303282, "grad_norm": 0.0023507389705628157, "learning_rate": 1.160980924438687e-06, "loss": 0.0009, "step": 97880 }, { "epoch": 1.6017344350814038, "grad_norm": 0.2227538377046585, "learning_rate": 1.1600662431402028e-06, "loss": 0.0011, "step": 97890 }, { "epoch": 1.6018980610324798, "grad_norm": 0.01190287247300148, "learning_rate": 1.1591518750136494e-06, "loss": 0.0017, "step": 97900 }, { "epoch": 1.6020616869835556, "grad_norm": 0.03589490428566933, "learning_rate": 1.1582378201336025e-06, "loss": 0.0008, "step": 97910 }, { "epoch": 1.6022253129346313, "grad_norm": 0.02147943153977394, "learning_rate": 1.157324078574607e-06, "loss": 0.0009, "step": 97920 }, { "epoch": 1.6023889388857073, "grad_norm": 0.08050645887851715, "learning_rate": 1.1564106504111876e-06, "loss": 0.0017, "step": 97930 }, { "epoch": 1.602552564836783, "grad_norm": 0.03861328214406967, "learning_rate": 1.1554975357178365e-06, "loss": 0.001, "step": 97940 }, { "epoch": 1.6027161907878589, "grad_norm": 0.02591322734951973, "learning_rate": 1.154584734569028e-06, "loss": 0.0007, "step": 97950 }, { "epoch": 1.6028798167389349, "grad_norm": 0.03869979828596115, "learning_rate": 1.1536722470392059e-06, "loss": 0.0011, "step": 97960 }, { "epoch": 1.6030434426900106, "grad_norm": 0.054462533444166183, "learning_rate": 1.1527600732027866e-06, "loss": 0.0007, "step": 97970 }, { "epoch": 1.6032070686410864, "grad_norm": 0.08680848032236099, "learning_rate": 1.1518482131341685e-06, "loss": 0.0009, "step": 97980 }, { "epoch": 1.6033706945921624, "grad_norm": 0.04948004335165024, "learning_rate": 1.1509366669077155e-06, "loss": 0.0021, "step": 97990 }, { "epoch": 1.6035343205432382, "grad_norm": 0.11467420309782028, "learning_rate": 1.150025434597774e-06, "loss": 0.0011, "step": 98000 }, { "epoch": 1.603697946494314, "grad_norm": 0.023892199620604515, "learning_rate": 1.1491145162786582e-06, "loss": 0.0007, "step": 98010 }, { "epoch": 1.60386157244539, "grad_norm": 0.05300400033593178, "learning_rate": 1.1482039120246612e-06, "loss": 0.0021, "step": 98020 }, { "epoch": 1.6040251983964657, "grad_norm": 0.03293321654200554, "learning_rate": 1.1472936219100478e-06, "loss": 0.0005, "step": 98030 }, { "epoch": 1.6041888243475415, "grad_norm": 0.05904914438724518, "learning_rate": 1.1463836460090594e-06, "loss": 0.0012, "step": 98040 }, { "epoch": 1.6043524502986175, "grad_norm": 0.02522321231663227, "learning_rate": 1.1454739843959084e-06, "loss": 0.0006, "step": 98050 }, { "epoch": 1.604516076249693, "grad_norm": 0.0607261024415493, "learning_rate": 1.144564637144786e-06, "loss": 0.0006, "step": 98060 }, { "epoch": 1.604679702200769, "grad_norm": 0.07843679189682007, "learning_rate": 1.1436556043298535e-06, "loss": 0.0014, "step": 98070 }, { "epoch": 1.6048433281518448, "grad_norm": 0.020113442093133926, "learning_rate": 1.142746886025251e-06, "loss": 0.0007, "step": 98080 }, { "epoch": 1.6050069541029206, "grad_norm": 0.04199831560254097, "learning_rate": 1.1418384823050871e-06, "loss": 0.0013, "step": 98090 }, { "epoch": 1.6051705800539966, "grad_norm": 0.00481654005125165, "learning_rate": 1.1409303932434519e-06, "loss": 0.001, "step": 98100 }, { "epoch": 1.6053342060050724, "grad_norm": 0.02471465989947319, "learning_rate": 1.1400226189144025e-06, "loss": 0.0014, "step": 98110 }, { "epoch": 1.6054978319561481, "grad_norm": 0.056144922971725464, "learning_rate": 1.1391151593919768e-06, "loss": 0.001, "step": 98120 }, { "epoch": 1.6056614579072241, "grad_norm": 0.020133955404162407, "learning_rate": 1.1382080147501817e-06, "loss": 0.0013, "step": 98130 }, { "epoch": 1.6058250838583, "grad_norm": 0.009174136444926262, "learning_rate": 1.1373011850630034e-06, "loss": 0.001, "step": 98140 }, { "epoch": 1.6059887098093757, "grad_norm": 0.02165425568819046, "learning_rate": 1.1363946704043966e-06, "loss": 0.0015, "step": 98150 }, { "epoch": 1.6061523357604517, "grad_norm": 0.03630847483873367, "learning_rate": 1.1354884708482972e-06, "loss": 0.0012, "step": 98160 }, { "epoch": 1.6063159617115275, "grad_norm": 0.05408246070146561, "learning_rate": 1.1345825864686083e-06, "loss": 0.0011, "step": 98170 }, { "epoch": 1.6064795876626032, "grad_norm": 0.03296400234103203, "learning_rate": 1.1336770173392136e-06, "loss": 0.0009, "step": 98180 }, { "epoch": 1.6066432136136792, "grad_norm": 0.06882283836603165, "learning_rate": 1.1327717635339657e-06, "loss": 0.0008, "step": 98190 }, { "epoch": 1.606806839564755, "grad_norm": 0.015450521372258663, "learning_rate": 1.1318668251266972e-06, "loss": 0.0008, "step": 98200 }, { "epoch": 1.6069704655158308, "grad_norm": 0.14532433450222015, "learning_rate": 1.1309622021912081e-06, "loss": 0.0019, "step": 98210 }, { "epoch": 1.6071340914669068, "grad_norm": 0.04961596801877022, "learning_rate": 1.1300578948012798e-06, "loss": 0.0003, "step": 98220 }, { "epoch": 1.6072977174179823, "grad_norm": 0.060220684856176376, "learning_rate": 1.1291539030306608e-06, "loss": 0.0008, "step": 98230 }, { "epoch": 1.6074613433690583, "grad_norm": 0.06398151069879532, "learning_rate": 1.128250226953082e-06, "loss": 0.0007, "step": 98240 }, { "epoch": 1.6076249693201343, "grad_norm": 0.09978048503398895, "learning_rate": 1.1273468666422398e-06, "loss": 0.0011, "step": 98250 }, { "epoch": 1.6077885952712099, "grad_norm": 0.048443857580423355, "learning_rate": 1.1264438221718133e-06, "loss": 0.0015, "step": 98260 }, { "epoch": 1.6079522212222859, "grad_norm": 0.01663028448820114, "learning_rate": 1.1255410936154492e-06, "loss": 0.0008, "step": 98270 }, { "epoch": 1.6081158471733616, "grad_norm": 0.02853105030953884, "learning_rate": 1.1246386810467702e-06, "loss": 0.0027, "step": 98280 }, { "epoch": 1.6082794731244374, "grad_norm": 0.01624145917594433, "learning_rate": 1.1237365845393767e-06, "loss": 0.0006, "step": 98290 }, { "epoch": 1.6084430990755134, "grad_norm": 0.01780993863940239, "learning_rate": 1.122834804166838e-06, "loss": 0.001, "step": 98300 }, { "epoch": 1.6086067250265892, "grad_norm": 0.04460993781685829, "learning_rate": 1.121933340002702e-06, "loss": 0.001, "step": 98310 }, { "epoch": 1.608770350977665, "grad_norm": 0.05030401423573494, "learning_rate": 1.1210321921204881e-06, "loss": 0.0009, "step": 98320 }, { "epoch": 1.608933976928741, "grad_norm": 0.074622243642807, "learning_rate": 1.120131360593692e-06, "loss": 0.0006, "step": 98330 }, { "epoch": 1.6090976028798167, "grad_norm": 0.041184570640325546, "learning_rate": 1.11923084549578e-06, "loss": 0.0014, "step": 98340 }, { "epoch": 1.6092612288308925, "grad_norm": 0.0906820222735405, "learning_rate": 1.1183306469001986e-06, "loss": 0.0008, "step": 98350 }, { "epoch": 1.6094248547819685, "grad_norm": 0.024200350046157837, "learning_rate": 1.1174307648803617e-06, "loss": 0.002, "step": 98360 }, { "epoch": 1.6095884807330443, "grad_norm": 0.03529239445924759, "learning_rate": 1.1165311995096633e-06, "loss": 0.0009, "step": 98370 }, { "epoch": 1.60975210668412, "grad_norm": 0.058973196893930435, "learning_rate": 1.1156319508614666e-06, "loss": 0.0006, "step": 98380 }, { "epoch": 1.609915732635196, "grad_norm": 0.004053601995110512, "learning_rate": 1.1147330190091127e-06, "loss": 0.0009, "step": 98390 }, { "epoch": 1.6100793585862718, "grad_norm": 0.06835632771253586, "learning_rate": 1.1138344040259142e-06, "loss": 0.0014, "step": 98400 }, { "epoch": 1.6102429845373476, "grad_norm": 0.055411599576473236, "learning_rate": 1.1129361059851611e-06, "loss": 0.0014, "step": 98410 }, { "epoch": 1.6104066104884236, "grad_norm": 0.10034821927547455, "learning_rate": 1.1120381249601126e-06, "loss": 0.0006, "step": 98420 }, { "epoch": 1.6105702364394991, "grad_norm": 0.018842680379748344, "learning_rate": 1.1111404610240085e-06, "loss": 0.0008, "step": 98430 }, { "epoch": 1.6107338623905751, "grad_norm": 0.09404448419809341, "learning_rate": 1.1102431142500559e-06, "loss": 0.0016, "step": 98440 }, { "epoch": 1.6108974883416511, "grad_norm": 0.005391061305999756, "learning_rate": 1.1093460847114418e-06, "loss": 0.0005, "step": 98450 }, { "epoch": 1.6110611142927267, "grad_norm": 0.03553809970617294, "learning_rate": 1.1084493724813229e-06, "loss": 0.0008, "step": 98460 }, { "epoch": 1.6112247402438027, "grad_norm": 0.01727294921875, "learning_rate": 1.1075529776328335e-06, "loss": 0.0008, "step": 98470 }, { "epoch": 1.6113883661948785, "grad_norm": 0.04744759574532509, "learning_rate": 1.1066569002390798e-06, "loss": 0.0013, "step": 98480 }, { "epoch": 1.6115519921459542, "grad_norm": 0.09186459332704544, "learning_rate": 1.1057611403731438e-06, "loss": 0.0011, "step": 98490 }, { "epoch": 1.6117156180970302, "grad_norm": 0.05243953689932823, "learning_rate": 1.1048656981080785e-06, "loss": 0.0009, "step": 98500 }, { "epoch": 1.611879244048106, "grad_norm": 0.05669968202710152, "learning_rate": 1.1039705735169159e-06, "loss": 0.0018, "step": 98510 }, { "epoch": 1.6120428699991818, "grad_norm": 0.17999234795570374, "learning_rate": 1.1030757666726567e-06, "loss": 0.0009, "step": 98520 }, { "epoch": 1.6122064959502578, "grad_norm": 0.05805385485291481, "learning_rate": 1.1021812776482804e-06, "loss": 0.0008, "step": 98530 }, { "epoch": 1.6123701219013336, "grad_norm": 0.0329163633286953, "learning_rate": 1.1012871065167362e-06, "loss": 0.001, "step": 98540 }, { "epoch": 1.6125337478524093, "grad_norm": 0.011239580810070038, "learning_rate": 1.1003932533509531e-06, "loss": 0.0004, "step": 98550 }, { "epoch": 1.6126973738034853, "grad_norm": 0.039462849497795105, "learning_rate": 1.0994997182238266e-06, "loss": 0.0008, "step": 98560 }, { "epoch": 1.612860999754561, "grad_norm": 0.08043213188648224, "learning_rate": 1.0986065012082343e-06, "loss": 0.0011, "step": 98570 }, { "epoch": 1.6130246257056369, "grad_norm": 0.03515748679637909, "learning_rate": 1.0977136023770202e-06, "loss": 0.0007, "step": 98580 }, { "epoch": 1.6131882516567129, "grad_norm": 0.024439388886094093, "learning_rate": 1.0968210218030096e-06, "loss": 0.0006, "step": 98590 }, { "epoch": 1.6133518776077886, "grad_norm": 0.05537058785557747, "learning_rate": 1.0959287595589968e-06, "loss": 0.0009, "step": 98600 }, { "epoch": 1.6135155035588644, "grad_norm": 0.017928028479218483, "learning_rate": 1.0950368157177498e-06, "loss": 0.0009, "step": 98610 }, { "epoch": 1.6136791295099404, "grad_norm": 0.04345674812793732, "learning_rate": 1.0941451903520162e-06, "loss": 0.0011, "step": 98620 }, { "epoch": 1.613842755461016, "grad_norm": 0.01877068541944027, "learning_rate": 1.0932538835345103e-06, "loss": 0.0008, "step": 98630 }, { "epoch": 1.614006381412092, "grad_norm": 0.04336192458868027, "learning_rate": 1.0923628953379273e-06, "loss": 0.0008, "step": 98640 }, { "epoch": 1.614170007363168, "grad_norm": 0.0044768378138542175, "learning_rate": 1.0914722258349303e-06, "loss": 0.0013, "step": 98650 }, { "epoch": 1.6143336333142435, "grad_norm": 0.03292006626725197, "learning_rate": 1.0905818750981618e-06, "loss": 0.0012, "step": 98660 }, { "epoch": 1.6144972592653195, "grad_norm": 0.11813094466924667, "learning_rate": 1.0896918432002334e-06, "loss": 0.001, "step": 98670 }, { "epoch": 1.6146608852163953, "grad_norm": 0.06800729781389236, "learning_rate": 1.0888021302137352e-06, "loss": 0.001, "step": 98680 }, { "epoch": 1.614824511167471, "grad_norm": 0.009437814354896545, "learning_rate": 1.0879127362112274e-06, "loss": 0.0009, "step": 98690 }, { "epoch": 1.614988137118547, "grad_norm": 0.0062317312695086, "learning_rate": 1.0870236612652474e-06, "loss": 0.0019, "step": 98700 }, { "epoch": 1.6151517630696228, "grad_norm": 0.011862434446811676, "learning_rate": 1.086134905448304e-06, "loss": 0.0007, "step": 98710 }, { "epoch": 1.6153153890206986, "grad_norm": 0.05355127155780792, "learning_rate": 1.0852464688328824e-06, "loss": 0.0018, "step": 98720 }, { "epoch": 1.6154790149717746, "grad_norm": 0.022363979369401932, "learning_rate": 1.0843583514914385e-06, "loss": 0.0008, "step": 98730 }, { "epoch": 1.6156426409228504, "grad_norm": 0.23203960061073303, "learning_rate": 1.0834705534964063e-06, "loss": 0.0017, "step": 98740 }, { "epoch": 1.6158062668739261, "grad_norm": 0.05894729495048523, "learning_rate": 1.0825830749201893e-06, "loss": 0.0011, "step": 98750 }, { "epoch": 1.6159698928250021, "grad_norm": 0.08380956947803497, "learning_rate": 1.0816959158351703e-06, "loss": 0.0012, "step": 98760 }, { "epoch": 1.616133518776078, "grad_norm": 0.011260194703936577, "learning_rate": 1.0808090763136997e-06, "loss": 0.0007, "step": 98770 }, { "epoch": 1.6162971447271537, "grad_norm": 0.029289012774825096, "learning_rate": 1.079922556428108e-06, "loss": 0.0006, "step": 98780 }, { "epoch": 1.6164607706782297, "grad_norm": 0.03287067636847496, "learning_rate": 1.0790363562506933e-06, "loss": 0.0008, "step": 98790 }, { "epoch": 1.6166243966293055, "grad_norm": 0.057073742151260376, "learning_rate": 1.0781504758537352e-06, "loss": 0.0011, "step": 98800 }, { "epoch": 1.6167880225803812, "grad_norm": 0.003922164905816317, "learning_rate": 1.0772649153094793e-06, "loss": 0.0004, "step": 98810 }, { "epoch": 1.6169516485314572, "grad_norm": 0.04353697970509529, "learning_rate": 1.0763796746901522e-06, "loss": 0.0013, "step": 98820 }, { "epoch": 1.6171152744825328, "grad_norm": 0.10883992910385132, "learning_rate": 1.0754947540679483e-06, "loss": 0.0008, "step": 98830 }, { "epoch": 1.6172789004336088, "grad_norm": 0.08834085613489151, "learning_rate": 1.074610153515041e-06, "loss": 0.0008, "step": 98840 }, { "epoch": 1.6174425263846848, "grad_norm": 0.0750710740685463, "learning_rate": 1.0737258731035728e-06, "loss": 0.0009, "step": 98850 }, { "epoch": 1.6176061523357603, "grad_norm": 0.036313727498054504, "learning_rate": 1.0728419129056655e-06, "loss": 0.0014, "step": 98860 }, { "epoch": 1.6177697782868363, "grad_norm": 0.02966841496527195, "learning_rate": 1.0719582729934092e-06, "loss": 0.002, "step": 98870 }, { "epoch": 1.617933404237912, "grad_norm": 0.0079027796164155, "learning_rate": 1.071074953438873e-06, "loss": 0.0005, "step": 98880 }, { "epoch": 1.6180970301889879, "grad_norm": 0.0825885757803917, "learning_rate": 1.0701919543140955e-06, "loss": 0.0051, "step": 98890 }, { "epoch": 1.6182606561400639, "grad_norm": 0.03835207596421242, "learning_rate": 1.0693092756910922e-06, "loss": 0.001, "step": 98900 }, { "epoch": 1.6184242820911396, "grad_norm": 0.03957780823111534, "learning_rate": 1.0684269176418504e-06, "loss": 0.0009, "step": 98910 }, { "epoch": 1.6185879080422154, "grad_norm": 0.08176249265670776, "learning_rate": 1.0675448802383336e-06, "loss": 0.0012, "step": 98920 }, { "epoch": 1.6187515339932914, "grad_norm": 0.0636419877409935, "learning_rate": 1.0666631635524776e-06, "loss": 0.0012, "step": 98930 }, { "epoch": 1.6189151599443672, "grad_norm": 0.027811024338006973, "learning_rate": 1.06578176765619e-06, "loss": 0.0005, "step": 98940 }, { "epoch": 1.619078785895443, "grad_norm": 0.0021214059088379145, "learning_rate": 1.0649006926213573e-06, "loss": 0.0007, "step": 98950 }, { "epoch": 1.619242411846519, "grad_norm": 0.01910286210477352, "learning_rate": 1.0640199385198347e-06, "loss": 0.001, "step": 98960 }, { "epoch": 1.6194060377975947, "grad_norm": 0.10791837424039841, "learning_rate": 1.0631395054234556e-06, "loss": 0.0009, "step": 98970 }, { "epoch": 1.6195696637486705, "grad_norm": 0.04680231958627701, "learning_rate": 1.0622593934040226e-06, "loss": 0.0008, "step": 98980 }, { "epoch": 1.6197332896997465, "grad_norm": 0.11000838130712509, "learning_rate": 1.0613796025333178e-06, "loss": 0.0008, "step": 98990 }, { "epoch": 1.619896915650822, "grad_norm": 0.07441253960132599, "learning_rate": 1.060500132883091e-06, "loss": 0.0018, "step": 99000 }, { "epoch": 1.620060541601898, "grad_norm": 0.05682549998164177, "learning_rate": 1.059620984525071e-06, "loss": 0.0004, "step": 99010 }, { "epoch": 1.620224167552974, "grad_norm": 0.05946957319974899, "learning_rate": 1.0587421575309559e-06, "loss": 0.002, "step": 99020 }, { "epoch": 1.6203877935040496, "grad_norm": 0.09401339292526245, "learning_rate": 1.0578636519724229e-06, "loss": 0.0013, "step": 99030 }, { "epoch": 1.6205514194551256, "grad_norm": 0.016316894441843033, "learning_rate": 1.056985467921116e-06, "loss": 0.0009, "step": 99040 }, { "epoch": 1.6207150454062014, "grad_norm": 0.03270907327532768, "learning_rate": 1.0561076054486614e-06, "loss": 0.0014, "step": 99050 }, { "epoch": 1.6208786713572771, "grad_norm": 0.11566010862588882, "learning_rate": 1.05523006462665e-06, "loss": 0.0017, "step": 99060 }, { "epoch": 1.6210422973083531, "grad_norm": 0.01913970150053501, "learning_rate": 1.054352845526655e-06, "loss": 0.0005, "step": 99070 }, { "epoch": 1.621205923259429, "grad_norm": 0.06576588749885559, "learning_rate": 1.0534759482202162e-06, "loss": 0.0009, "step": 99080 }, { "epoch": 1.6213695492105047, "grad_norm": 0.0531679131090641, "learning_rate": 1.0525993727788535e-06, "loss": 0.001, "step": 99090 }, { "epoch": 1.6215331751615807, "grad_norm": 0.026732036843895912, "learning_rate": 1.051723119274054e-06, "loss": 0.0009, "step": 99100 }, { "epoch": 1.6216968011126565, "grad_norm": 0.04521513730287552, "learning_rate": 1.0508471877772852e-06, "loss": 0.001, "step": 99110 }, { "epoch": 1.6218604270637322, "grad_norm": 0.09972858428955078, "learning_rate": 1.0499715783599824e-06, "loss": 0.0012, "step": 99120 }, { "epoch": 1.6220240530148082, "grad_norm": 0.041070688515901566, "learning_rate": 1.04909629109356e-06, "loss": 0.0008, "step": 99130 }, { "epoch": 1.622187678965884, "grad_norm": 0.06659958511590958, "learning_rate": 1.0482213260494e-06, "loss": 0.001, "step": 99140 }, { "epoch": 1.6223513049169598, "grad_norm": 0.0031932243146002293, "learning_rate": 1.047346683298865e-06, "loss": 0.0005, "step": 99150 }, { "epoch": 1.6225149308680358, "grad_norm": 0.01182716153562069, "learning_rate": 1.046472362913285e-06, "loss": 0.0008, "step": 99160 }, { "epoch": 1.6226785568191116, "grad_norm": 0.12931151688098907, "learning_rate": 1.04559836496397e-06, "loss": 0.0009, "step": 99170 }, { "epoch": 1.6228421827701873, "grad_norm": 0.1108604148030281, "learning_rate": 1.0447246895221963e-06, "loss": 0.0013, "step": 99180 }, { "epoch": 1.6230058087212633, "grad_norm": 0.0400497131049633, "learning_rate": 1.0438513366592218e-06, "loss": 0.0011, "step": 99190 }, { "epoch": 1.6231694346723389, "grad_norm": 0.0018103771144524217, "learning_rate": 1.0429783064462706e-06, "loss": 0.0007, "step": 99200 }, { "epoch": 1.6233330606234149, "grad_norm": 0.007359854876995087, "learning_rate": 1.0421055989545476e-06, "loss": 0.0007, "step": 99210 }, { "epoch": 1.6234966865744909, "grad_norm": 0.06547354906797409, "learning_rate": 1.041233214255224e-06, "loss": 0.0009, "step": 99220 }, { "epoch": 1.6236603125255664, "grad_norm": 0.027127254754304886, "learning_rate": 1.0403611524194523e-06, "loss": 0.001, "step": 99230 }, { "epoch": 1.6238239384766424, "grad_norm": 0.0034067730884999037, "learning_rate": 1.039489413518353e-06, "loss": 0.0004, "step": 99240 }, { "epoch": 1.6239875644277182, "grad_norm": 0.01794705167412758, "learning_rate": 1.038617997623021e-06, "loss": 0.0006, "step": 99250 }, { "epoch": 1.624151190378794, "grad_norm": 0.021203018724918365, "learning_rate": 1.0377469048045285e-06, "loss": 0.0014, "step": 99260 }, { "epoch": 1.62431481632987, "grad_norm": 0.0713459774851799, "learning_rate": 1.0368761351339169e-06, "loss": 0.0008, "step": 99270 }, { "epoch": 1.6244784422809457, "grad_norm": 0.07734189927577972, "learning_rate": 1.036005688682205e-06, "loss": 0.0018, "step": 99280 }, { "epoch": 1.6246420682320215, "grad_norm": 0.016635021194815636, "learning_rate": 1.035135565520381e-06, "loss": 0.0008, "step": 99290 }, { "epoch": 1.6248056941830975, "grad_norm": 0.030284764245152473, "learning_rate": 1.0342657657194122e-06, "loss": 0.0004, "step": 99300 }, { "epoch": 1.6249693201341733, "grad_norm": 0.03503705561161041, "learning_rate": 1.0333962893502336e-06, "loss": 0.0008, "step": 99310 }, { "epoch": 1.625132946085249, "grad_norm": 0.050276078283786774, "learning_rate": 1.0325271364837592e-06, "loss": 0.0008, "step": 99320 }, { "epoch": 1.625296572036325, "grad_norm": 0.0807928740978241, "learning_rate": 1.031658307190872e-06, "loss": 0.0009, "step": 99330 }, { "epoch": 1.6254601979874008, "grad_norm": 0.03641953691840172, "learning_rate": 1.0307898015424328e-06, "loss": 0.0004, "step": 99340 }, { "epoch": 1.6256238239384766, "grad_norm": 0.2234962284564972, "learning_rate": 1.0299216196092721e-06, "loss": 0.0015, "step": 99350 }, { "epoch": 1.6257874498895526, "grad_norm": 0.04085862636566162, "learning_rate": 1.0290537614621976e-06, "loss": 0.0008, "step": 99360 }, { "epoch": 1.6259510758406284, "grad_norm": 0.007594408467411995, "learning_rate": 1.0281862271719867e-06, "loss": 0.0008, "step": 99370 }, { "epoch": 1.6261147017917041, "grad_norm": 0.0083228824660182, "learning_rate": 1.0273190168093955e-06, "loss": 0.0006, "step": 99380 }, { "epoch": 1.6262783277427801, "grad_norm": 0.1275256872177124, "learning_rate": 1.0264521304451474e-06, "loss": 0.0006, "step": 99390 }, { "epoch": 1.6264419536938557, "grad_norm": 0.005033647175878286, "learning_rate": 1.025585568149946e-06, "loss": 0.0014, "step": 99400 }, { "epoch": 1.6266055796449317, "grad_norm": 0.08270313590765, "learning_rate": 1.0247193299944618e-06, "loss": 0.0007, "step": 99410 }, { "epoch": 1.6267692055960077, "grad_norm": 0.030194906517863274, "learning_rate": 1.0238534160493457e-06, "loss": 0.0016, "step": 99420 }, { "epoch": 1.6269328315470832, "grad_norm": 0.1548902839422226, "learning_rate": 1.0229878263852156e-06, "loss": 0.0011, "step": 99430 }, { "epoch": 1.6270964574981592, "grad_norm": 0.09061524271965027, "learning_rate": 1.0221225610726688e-06, "loss": 0.0009, "step": 99440 }, { "epoch": 1.627260083449235, "grad_norm": 0.08053354918956757, "learning_rate": 1.02125762018227e-06, "loss": 0.0019, "step": 99450 }, { "epoch": 1.6274237094003108, "grad_norm": 0.020897533744573593, "learning_rate": 1.020393003784565e-06, "loss": 0.0012, "step": 99460 }, { "epoch": 1.6275873353513868, "grad_norm": 0.017086738720536232, "learning_rate": 1.0195287119500652e-06, "loss": 0.0007, "step": 99470 }, { "epoch": 1.6277509613024626, "grad_norm": 0.05146488547325134, "learning_rate": 1.018664744749262e-06, "loss": 0.0011, "step": 99480 }, { "epoch": 1.6279145872535383, "grad_norm": 0.03859664499759674, "learning_rate": 1.0178011022526157e-06, "loss": 0.0009, "step": 99490 }, { "epoch": 1.6280782132046143, "grad_norm": 0.07819285988807678, "learning_rate": 1.0169377845305639e-06, "loss": 0.001, "step": 99500 }, { "epoch": 1.62824183915569, "grad_norm": 0.03664161264896393, "learning_rate": 1.0160747916535135e-06, "loss": 0.0012, "step": 99510 }, { "epoch": 1.6284054651067659, "grad_norm": 0.10607807338237762, "learning_rate": 1.01521212369185e-06, "loss": 0.001, "step": 99520 }, { "epoch": 1.6285690910578419, "grad_norm": 0.014757922850549221, "learning_rate": 1.014349780715927e-06, "loss": 0.0006, "step": 99530 }, { "epoch": 1.6287327170089176, "grad_norm": 0.058034446090459824, "learning_rate": 1.0134877627960765e-06, "loss": 0.0025, "step": 99540 }, { "epoch": 1.6288963429599934, "grad_norm": 0.038525357842445374, "learning_rate": 1.0126260700025997e-06, "loss": 0.0012, "step": 99550 }, { "epoch": 1.6290599689110694, "grad_norm": 0.0739142894744873, "learning_rate": 1.0117647024057759e-06, "loss": 0.0009, "step": 99560 }, { "epoch": 1.6292235948621452, "grad_norm": 0.006620502565056086, "learning_rate": 1.0109036600758538e-06, "loss": 0.0006, "step": 99570 }, { "epoch": 1.629387220813221, "grad_norm": 0.005684231407940388, "learning_rate": 1.0100429430830561e-06, "loss": 0.0004, "step": 99580 }, { "epoch": 1.629550846764297, "grad_norm": 0.05469898506999016, "learning_rate": 1.0091825514975818e-06, "loss": 0.0009, "step": 99590 }, { "epoch": 1.6297144727153725, "grad_norm": 0.01911795139312744, "learning_rate": 1.008322485389599e-06, "loss": 0.0012, "step": 99600 }, { "epoch": 1.6298780986664485, "grad_norm": 0.05313611030578613, "learning_rate": 1.0074627448292557e-06, "loss": 0.0013, "step": 99610 }, { "epoch": 1.6300417246175245, "grad_norm": 0.03985777497291565, "learning_rate": 1.0066033298866651e-06, "loss": 0.001, "step": 99620 }, { "epoch": 1.6302053505686, "grad_norm": 0.006057702004909515, "learning_rate": 1.0057442406319219e-06, "loss": 0.0009, "step": 99630 }, { "epoch": 1.630368976519676, "grad_norm": 0.06132945418357849, "learning_rate": 1.004885477135087e-06, "loss": 0.0015, "step": 99640 }, { "epoch": 1.6305326024707518, "grad_norm": 0.18481603264808655, "learning_rate": 1.0040270394662016e-06, "loss": 0.0009, "step": 99650 }, { "epoch": 1.6306962284218276, "grad_norm": 0.16483499109745026, "learning_rate": 1.0031689276952743e-06, "loss": 0.0033, "step": 99660 }, { "epoch": 1.6308598543729036, "grad_norm": 0.055334024131298065, "learning_rate": 1.002311141892292e-06, "loss": 0.0008, "step": 99670 }, { "epoch": 1.6310234803239794, "grad_norm": 0.03972171992063522, "learning_rate": 1.0014536821272103e-06, "loss": 0.0027, "step": 99680 }, { "epoch": 1.6311871062750551, "grad_norm": 0.01582186669111252, "learning_rate": 1.0005965484699637e-06, "loss": 0.0009, "step": 99690 }, { "epoch": 1.6313507322261311, "grad_norm": 0.047537438571453094, "learning_rate": 9.997397409904535e-07, "loss": 0.0004, "step": 99700 }, { "epoch": 1.631514358177207, "grad_norm": 0.02975868247449398, "learning_rate": 9.988832597585619e-07, "loss": 0.001, "step": 99710 }, { "epoch": 1.6316779841282827, "grad_norm": 0.03457397222518921, "learning_rate": 9.98027104844137e-07, "loss": 0.0008, "step": 99720 }, { "epoch": 1.6318416100793587, "grad_norm": 0.036272987723350525, "learning_rate": 9.971712763170071e-07, "loss": 0.001, "step": 99730 }, { "epoch": 1.6320052360304345, "grad_norm": 0.07712455838918686, "learning_rate": 9.963157742469676e-07, "loss": 0.0016, "step": 99740 }, { "epoch": 1.6321688619815102, "grad_norm": 0.0029385199304670095, "learning_rate": 9.95460598703793e-07, "loss": 0.0011, "step": 99750 }, { "epoch": 1.6323324879325862, "grad_norm": 0.049355171620845795, "learning_rate": 9.946057497572265e-07, "loss": 0.0015, "step": 99760 }, { "epoch": 1.632496113883662, "grad_norm": 0.02615751326084137, "learning_rate": 9.937512274769883e-07, "loss": 0.0015, "step": 99770 }, { "epoch": 1.6326597398347378, "grad_norm": 0.04165137931704521, "learning_rate": 9.928970319327685e-07, "loss": 0.0006, "step": 99780 }, { "epoch": 1.6328233657858138, "grad_norm": 0.07442167401313782, "learning_rate": 9.92043163194235e-07, "loss": 0.0006, "step": 99790 }, { "epoch": 1.6329869917368893, "grad_norm": 0.03294137120246887, "learning_rate": 9.911896213310235e-07, "loss": 0.0011, "step": 99800 }, { "epoch": 1.6331506176879653, "grad_norm": 0.021243317052721977, "learning_rate": 9.90336406412748e-07, "loss": 0.0013, "step": 99810 }, { "epoch": 1.633314243639041, "grad_norm": 0.027108022943139076, "learning_rate": 9.894835185089924e-07, "loss": 0.0007, "step": 99820 }, { "epoch": 1.6334778695901169, "grad_norm": 0.14141453802585602, "learning_rate": 9.886309576893172e-07, "loss": 0.0012, "step": 99830 }, { "epoch": 1.6336414955411929, "grad_norm": 0.04646940529346466, "learning_rate": 9.877787240232523e-07, "loss": 0.0013, "step": 99840 }, { "epoch": 1.6338051214922686, "grad_norm": 0.03067663684487343, "learning_rate": 9.869268175803048e-07, "loss": 0.0008, "step": 99850 }, { "epoch": 1.6339687474433444, "grad_norm": 0.05898122861981392, "learning_rate": 9.86075238429951e-07, "loss": 0.0011, "step": 99860 }, { "epoch": 1.6341323733944204, "grad_norm": 0.0041008577682077885, "learning_rate": 9.852239866416458e-07, "loss": 0.0009, "step": 99870 }, { "epoch": 1.6342959993454962, "grad_norm": 0.049132224172353745, "learning_rate": 9.843730622848114e-07, "loss": 0.001, "step": 99880 }, { "epoch": 1.634459625296572, "grad_norm": 0.02173960953950882, "learning_rate": 9.835224654288494e-07, "loss": 0.0005, "step": 99890 }, { "epoch": 1.634623251247648, "grad_norm": 0.07617775350809097, "learning_rate": 9.82672196143129e-07, "loss": 0.0011, "step": 99900 }, { "epoch": 1.6347868771987237, "grad_norm": 0.04964715614914894, "learning_rate": 9.818222544969959e-07, "loss": 0.001, "step": 99910 }, { "epoch": 1.6349505031497995, "grad_norm": 0.1036185622215271, "learning_rate": 9.809726405597692e-07, "loss": 0.001, "step": 99920 }, { "epoch": 1.6351141291008755, "grad_norm": 0.08874023705720901, "learning_rate": 9.801233544007389e-07, "loss": 0.001, "step": 99930 }, { "epoch": 1.6352777550519513, "grad_norm": 0.020390445366501808, "learning_rate": 9.792743960891726e-07, "loss": 0.0008, "step": 99940 }, { "epoch": 1.635441381003027, "grad_norm": 0.28065553307533264, "learning_rate": 9.784257656943053e-07, "loss": 0.0023, "step": 99950 }, { "epoch": 1.635605006954103, "grad_norm": 0.0179117601364851, "learning_rate": 9.775774632853513e-07, "loss": 0.0004, "step": 99960 }, { "epoch": 1.6357686329051786, "grad_norm": 0.017978258430957794, "learning_rate": 9.767294889314926e-07, "loss": 0.0026, "step": 99970 }, { "epoch": 1.6359322588562546, "grad_norm": 0.08978407084941864, "learning_rate": 9.758818427018897e-07, "loss": 0.0011, "step": 99980 }, { "epoch": 1.6360958848073306, "grad_norm": 0.04969654604792595, "learning_rate": 9.750345246656712e-07, "loss": 0.0011, "step": 99990 }, { "epoch": 1.6362595107584061, "grad_norm": 0.030709777027368546, "learning_rate": 9.741875348919443e-07, "loss": 0.0004, "step": 100000 }, { "epoch": 1.6364231367094821, "grad_norm": 0.011034307070076466, "learning_rate": 9.733408734497834e-07, "loss": 0.0008, "step": 100010 }, { "epoch": 1.636586762660558, "grad_norm": 0.06286372244358063, "learning_rate": 9.724945404082425e-07, "loss": 0.0006, "step": 100020 }, { "epoch": 1.6367503886116337, "grad_norm": 0.028471173718571663, "learning_rate": 9.71648535836343e-07, "loss": 0.0008, "step": 100030 }, { "epoch": 1.6369140145627097, "grad_norm": 0.0518898069858551, "learning_rate": 9.708028598030844e-07, "loss": 0.0007, "step": 100040 }, { "epoch": 1.6370776405137855, "grad_norm": 0.12626807391643524, "learning_rate": 9.699575123774346e-07, "loss": 0.0011, "step": 100050 }, { "epoch": 1.6372412664648612, "grad_norm": 0.05185278505086899, "learning_rate": 9.691124936283403e-07, "loss": 0.0012, "step": 100060 }, { "epoch": 1.6374048924159372, "grad_norm": 0.0829366147518158, "learning_rate": 9.68267803624715e-07, "loss": 0.0011, "step": 100070 }, { "epoch": 1.637568518367013, "grad_norm": 0.06289747357368469, "learning_rate": 9.674234424354522e-07, "loss": 0.0008, "step": 100080 }, { "epoch": 1.6377321443180888, "grad_norm": 0.07616087794303894, "learning_rate": 9.665794101294119e-07, "loss": 0.0007, "step": 100090 }, { "epoch": 1.6378957702691648, "grad_norm": 0.014592207036912441, "learning_rate": 9.657357067754335e-07, "loss": 0.001, "step": 100100 }, { "epoch": 1.6380593962202405, "grad_norm": 0.053025439381599426, "learning_rate": 9.648923324423238e-07, "loss": 0.0017, "step": 100110 }, { "epoch": 1.6382230221713163, "grad_norm": 0.04782039672136307, "learning_rate": 9.640492871988683e-07, "loss": 0.0023, "step": 100120 }, { "epoch": 1.6383866481223923, "grad_norm": 0.012251359410583973, "learning_rate": 9.632065711138194e-07, "loss": 0.0012, "step": 100130 }, { "epoch": 1.638550274073468, "grad_norm": 0.013355978764593601, "learning_rate": 9.6236418425591e-07, "loss": 0.0007, "step": 100140 }, { "epoch": 1.6387139000245439, "grad_norm": 0.24423478543758392, "learning_rate": 9.615221266938397e-07, "loss": 0.0022, "step": 100150 }, { "epoch": 1.6388775259756199, "grad_norm": 0.016653427854180336, "learning_rate": 9.606803984962853e-07, "loss": 0.0007, "step": 100160 }, { "epoch": 1.6390411519266954, "grad_norm": 0.03255431726574898, "learning_rate": 9.598389997318934e-07, "loss": 0.001, "step": 100170 }, { "epoch": 1.6392047778777714, "grad_norm": 0.03150234743952751, "learning_rate": 9.589979304692882e-07, "loss": 0.0006, "step": 100180 }, { "epoch": 1.6393684038288474, "grad_norm": 0.006022465415298939, "learning_rate": 9.58157190777062e-07, "loss": 0.0014, "step": 100190 }, { "epoch": 1.639532029779923, "grad_norm": 0.04250042513012886, "learning_rate": 9.573167807237848e-07, "loss": 0.0014, "step": 100200 }, { "epoch": 1.639695655730999, "grad_norm": 0.012929433025419712, "learning_rate": 9.564767003779974e-07, "loss": 0.0006, "step": 100210 }, { "epoch": 1.6398592816820747, "grad_norm": 0.08184831589460373, "learning_rate": 9.556369498082108e-07, "loss": 0.001, "step": 100220 }, { "epoch": 1.6400229076331505, "grad_norm": 0.058843448758125305, "learning_rate": 9.547975290829158e-07, "loss": 0.001, "step": 100230 }, { "epoch": 1.6401865335842265, "grad_norm": 0.0030664834193885326, "learning_rate": 9.539584382705708e-07, "loss": 0.0009, "step": 100240 }, { "epoch": 1.6403501595353023, "grad_norm": 0.050919629633426666, "learning_rate": 9.531196774396107e-07, "loss": 0.001, "step": 100250 }, { "epoch": 1.640513785486378, "grad_norm": 0.007630038075149059, "learning_rate": 9.522812466584392e-07, "loss": 0.0008, "step": 100260 }, { "epoch": 1.640677411437454, "grad_norm": 0.0943123921751976, "learning_rate": 9.514431459954398e-07, "loss": 0.0011, "step": 100270 }, { "epoch": 1.6408410373885298, "grad_norm": 0.03166643902659416, "learning_rate": 9.50605375518961e-07, "loss": 0.0011, "step": 100280 }, { "epoch": 1.6410046633396056, "grad_norm": 0.021564824506640434, "learning_rate": 9.497679352973321e-07, "loss": 0.0007, "step": 100290 }, { "epoch": 1.6411682892906816, "grad_norm": 0.10713506489992142, "learning_rate": 9.489308253988494e-07, "loss": 0.0016, "step": 100300 }, { "epoch": 1.6413319152417574, "grad_norm": 0.04868478327989578, "learning_rate": 9.480940458917864e-07, "loss": 0.0013, "step": 100310 }, { "epoch": 1.6414955411928331, "grad_norm": 0.010210786014795303, "learning_rate": 9.472575968443864e-07, "loss": 0.0009, "step": 100320 }, { "epoch": 1.6416591671439091, "grad_norm": 0.023481430485844612, "learning_rate": 9.464214783248693e-07, "loss": 0.0006, "step": 100330 }, { "epoch": 1.641822793094985, "grad_norm": 0.028715720400214195, "learning_rate": 9.455856904014238e-07, "loss": 0.0011, "step": 100340 }, { "epoch": 1.6419864190460607, "grad_norm": 0.09974674135446548, "learning_rate": 9.447502331422159e-07, "loss": 0.0018, "step": 100350 }, { "epoch": 1.6421500449971367, "grad_norm": 0.09422918409109116, "learning_rate": 9.439151066153812e-07, "loss": 0.0015, "step": 100360 }, { "epoch": 1.6423136709482122, "grad_norm": 0.019053339958190918, "learning_rate": 9.430803108890313e-07, "loss": 0.0007, "step": 100370 }, { "epoch": 1.6424772968992882, "grad_norm": 0.04648305103182793, "learning_rate": 9.422458460312473e-07, "loss": 0.001, "step": 100380 }, { "epoch": 1.6426409228503642, "grad_norm": 0.01793096214532852, "learning_rate": 9.414117121100874e-07, "loss": 0.0005, "step": 100390 }, { "epoch": 1.6428045488014398, "grad_norm": 0.037642136216163635, "learning_rate": 9.405779091935791e-07, "loss": 0.0012, "step": 100400 }, { "epoch": 1.6429681747525158, "grad_norm": 0.005648561753332615, "learning_rate": 9.397444373497255e-07, "loss": 0.0015, "step": 100410 }, { "epoch": 1.6431318007035915, "grad_norm": 0.0291750431060791, "learning_rate": 9.389112966465008e-07, "loss": 0.0013, "step": 100420 }, { "epoch": 1.6432954266546673, "grad_norm": 0.11103682219982147, "learning_rate": 9.38078487151855e-07, "loss": 0.0013, "step": 100430 }, { "epoch": 1.6434590526057433, "grad_norm": 0.012171434238553047, "learning_rate": 9.372460089337066e-07, "loss": 0.0014, "step": 100440 }, { "epoch": 1.643622678556819, "grad_norm": 0.024769367650151253, "learning_rate": 9.364138620599522e-07, "loss": 0.0017, "step": 100450 }, { "epoch": 1.6437863045078949, "grad_norm": 0.04039948433637619, "learning_rate": 9.355820465984566e-07, "loss": 0.0006, "step": 100460 }, { "epoch": 1.6439499304589709, "grad_norm": 0.06217660754919052, "learning_rate": 9.347505626170622e-07, "loss": 0.0011, "step": 100470 }, { "epoch": 1.6441135564100466, "grad_norm": 0.044760867953300476, "learning_rate": 9.339194101835791e-07, "loss": 0.0012, "step": 100480 }, { "epoch": 1.6442771823611224, "grad_norm": 0.02198943682014942, "learning_rate": 9.330885893657965e-07, "loss": 0.0008, "step": 100490 }, { "epoch": 1.6444408083121984, "grad_norm": 0.03514161705970764, "learning_rate": 9.322581002314701e-07, "loss": 0.0012, "step": 100500 }, { "epoch": 1.6446044342632742, "grad_norm": 0.08054398745298386, "learning_rate": 9.314279428483353e-07, "loss": 0.001, "step": 100510 }, { "epoch": 1.64476806021435, "grad_norm": 0.01564517803490162, "learning_rate": 9.305981172840933e-07, "loss": 0.0006, "step": 100520 }, { "epoch": 1.644931686165426, "grad_norm": 0.08523036539554596, "learning_rate": 9.297686236064246e-07, "loss": 0.001, "step": 100530 }, { "epoch": 1.6450953121165017, "grad_norm": 0.08250916749238968, "learning_rate": 9.289394618829794e-07, "loss": 0.0012, "step": 100540 }, { "epoch": 1.6452589380675775, "grad_norm": 0.0643003061413765, "learning_rate": 9.281106321813793e-07, "loss": 0.0018, "step": 100550 }, { "epoch": 1.6454225640186535, "grad_norm": 0.02502853237092495, "learning_rate": 9.272821345692235e-07, "loss": 0.0006, "step": 100560 }, { "epoch": 1.645586189969729, "grad_norm": 0.04937305301427841, "learning_rate": 9.26453969114079e-07, "loss": 0.0007, "step": 100570 }, { "epoch": 1.645749815920805, "grad_norm": 0.02921679988503456, "learning_rate": 9.2562613588349e-07, "loss": 0.0003, "step": 100580 }, { "epoch": 1.645913441871881, "grad_norm": 0.047889672219753265, "learning_rate": 9.247986349449705e-07, "loss": 0.0016, "step": 100590 }, { "epoch": 1.6460770678229566, "grad_norm": 0.06035612151026726, "learning_rate": 9.239714663660105e-07, "loss": 0.0008, "step": 100600 }, { "epoch": 1.6462406937740326, "grad_norm": 0.03663825988769531, "learning_rate": 9.231446302140684e-07, "loss": 0.0009, "step": 100610 }, { "epoch": 1.6464043197251084, "grad_norm": 0.009869643487036228, "learning_rate": 9.223181265565812e-07, "loss": 0.0008, "step": 100620 }, { "epoch": 1.6465679456761841, "grad_norm": 0.0018252196023240685, "learning_rate": 9.21491955460953e-07, "loss": 0.0008, "step": 100630 }, { "epoch": 1.6467315716272601, "grad_norm": 0.026634203270077705, "learning_rate": 9.206661169945652e-07, "loss": 0.001, "step": 100640 }, { "epoch": 1.646895197578336, "grad_norm": 0.0012128757080063224, "learning_rate": 9.198406112247693e-07, "loss": 0.0008, "step": 100650 }, { "epoch": 1.6470588235294117, "grad_norm": 0.028131825849413872, "learning_rate": 9.190154382188921e-07, "loss": 0.0005, "step": 100660 }, { "epoch": 1.6472224494804877, "grad_norm": 0.00040771570638753474, "learning_rate": 9.181905980442296e-07, "loss": 0.0011, "step": 100670 }, { "epoch": 1.6473860754315635, "grad_norm": 0.06667282432317734, "learning_rate": 9.173660907680559e-07, "loss": 0.0021, "step": 100680 }, { "epoch": 1.6475497013826392, "grad_norm": 0.03825167194008827, "learning_rate": 9.165419164576122e-07, "loss": 0.0004, "step": 100690 }, { "epoch": 1.6477133273337152, "grad_norm": 0.0063306232914328575, "learning_rate": 9.157180751801182e-07, "loss": 0.0016, "step": 100700 }, { "epoch": 1.647876953284791, "grad_norm": 0.0033302123192697763, "learning_rate": 9.148945670027604e-07, "loss": 0.001, "step": 100710 }, { "epoch": 1.6480405792358668, "grad_norm": 0.1574021577835083, "learning_rate": 9.140713919927047e-07, "loss": 0.0015, "step": 100720 }, { "epoch": 1.6482042051869428, "grad_norm": 0.06321648508310318, "learning_rate": 9.132485502170829e-07, "loss": 0.0014, "step": 100730 }, { "epoch": 1.6483678311380183, "grad_norm": 0.033277928829193115, "learning_rate": 9.12426041743007e-07, "loss": 0.0017, "step": 100740 }, { "epoch": 1.6485314570890943, "grad_norm": 0.04327253997325897, "learning_rate": 9.116038666375543e-07, "loss": 0.0004, "step": 100750 }, { "epoch": 1.6486950830401703, "grad_norm": 0.03747379779815674, "learning_rate": 9.107820249677812e-07, "loss": 0.0015, "step": 100760 }, { "epoch": 1.6488587089912459, "grad_norm": 0.045122239738702774, "learning_rate": 9.09960516800713e-07, "loss": 0.0008, "step": 100770 }, { "epoch": 1.6490223349423219, "grad_norm": 0.03921327367424965, "learning_rate": 9.091393422033501e-07, "loss": 0.0007, "step": 100780 }, { "epoch": 1.6491859608933976, "grad_norm": 0.018943939357995987, "learning_rate": 9.083185012426632e-07, "loss": 0.0006, "step": 100790 }, { "epoch": 1.6493495868444734, "grad_norm": 0.05014105513691902, "learning_rate": 9.074979939855993e-07, "loss": 0.0004, "step": 100800 }, { "epoch": 1.6495132127955494, "grad_norm": 0.0029739083256572485, "learning_rate": 9.066778204990745e-07, "loss": 0.0009, "step": 100810 }, { "epoch": 1.6496768387466252, "grad_norm": 0.12536069750785828, "learning_rate": 9.058579808499806e-07, "loss": 0.0012, "step": 100820 }, { "epoch": 1.649840464697701, "grad_norm": 0.018265223130583763, "learning_rate": 9.050384751051789e-07, "loss": 0.0011, "step": 100830 }, { "epoch": 1.650004090648777, "grad_norm": 0.005306945648044348, "learning_rate": 9.042193033315084e-07, "loss": 0.0008, "step": 100840 }, { "epoch": 1.6501677165998527, "grad_norm": 0.050336480140686035, "learning_rate": 9.034004655957767e-07, "loss": 0.0008, "step": 100850 }, { "epoch": 1.6503313425509285, "grad_norm": 0.001689878641627729, "learning_rate": 9.025819619647636e-07, "loss": 0.0006, "step": 100860 }, { "epoch": 1.6504949685020045, "grad_norm": 0.06841377168893814, "learning_rate": 9.017637925052264e-07, "loss": 0.001, "step": 100870 }, { "epoch": 1.6506585944530803, "grad_norm": 0.10851097106933594, "learning_rate": 9.009459572838896e-07, "loss": 0.0011, "step": 100880 }, { "epoch": 1.650822220404156, "grad_norm": 0.11003616452217102, "learning_rate": 9.001284563674562e-07, "loss": 0.0009, "step": 100890 }, { "epoch": 1.650985846355232, "grad_norm": 0.042798299342393875, "learning_rate": 8.993112898225948e-07, "loss": 0.0014, "step": 100900 }, { "epoch": 1.6511494723063078, "grad_norm": 0.031225567683577538, "learning_rate": 8.984944577159549e-07, "loss": 0.0015, "step": 100910 }, { "epoch": 1.6513130982573836, "grad_norm": 0.13762202858924866, "learning_rate": 8.976779601141505e-07, "loss": 0.0017, "step": 100920 }, { "epoch": 1.6514767242084596, "grad_norm": 0.05078810080885887, "learning_rate": 8.968617970837762e-07, "loss": 0.0007, "step": 100930 }, { "epoch": 1.6516403501595351, "grad_norm": 0.12267827242612839, "learning_rate": 8.960459686913919e-07, "loss": 0.0021, "step": 100940 }, { "epoch": 1.6518039761106111, "grad_norm": 0.055990397930145264, "learning_rate": 8.952304750035373e-07, "loss": 0.0008, "step": 100950 }, { "epoch": 1.6519676020616871, "grad_norm": 0.032464973628520966, "learning_rate": 8.94415316086718e-07, "loss": 0.001, "step": 100960 }, { "epoch": 1.6521312280127627, "grad_norm": 0.03776313364505768, "learning_rate": 8.936004920074182e-07, "loss": 0.0009, "step": 100970 }, { "epoch": 1.6522948539638387, "grad_norm": 0.05661027878522873, "learning_rate": 8.9278600283209e-07, "loss": 0.0012, "step": 100980 }, { "epoch": 1.6524584799149145, "grad_norm": 0.01972813531756401, "learning_rate": 8.919718486271628e-07, "loss": 0.0011, "step": 100990 }, { "epoch": 1.6526221058659902, "grad_norm": 0.039506617933511734, "learning_rate": 8.911580294590332e-07, "loss": 0.0005, "step": 101000 }, { "epoch": 1.6527857318170662, "grad_norm": 0.0016242810525000095, "learning_rate": 8.903445453940773e-07, "loss": 0.0007, "step": 101010 }, { "epoch": 1.652949357768142, "grad_norm": 0.02541460655629635, "learning_rate": 8.895313964986363e-07, "loss": 0.0013, "step": 101020 }, { "epoch": 1.6531129837192178, "grad_norm": 0.02735931985080242, "learning_rate": 8.887185828390304e-07, "loss": 0.001, "step": 101030 }, { "epoch": 1.6532766096702938, "grad_norm": 0.03217087686061859, "learning_rate": 8.879061044815484e-07, "loss": 0.0013, "step": 101040 }, { "epoch": 1.6534402356213695, "grad_norm": 0.006479606498032808, "learning_rate": 8.870939614924551e-07, "loss": 0.0009, "step": 101050 }, { "epoch": 1.6536038615724453, "grad_norm": 0.04930521175265312, "learning_rate": 8.86282153937984e-07, "loss": 0.0008, "step": 101060 }, { "epoch": 1.6537674875235213, "grad_norm": 0.016630342230200768, "learning_rate": 8.854706818843456e-07, "loss": 0.0009, "step": 101070 }, { "epoch": 1.653931113474597, "grad_norm": 0.0023141137789934874, "learning_rate": 8.84659545397718e-07, "loss": 0.0002, "step": 101080 }, { "epoch": 1.6540947394256729, "grad_norm": 0.0774788111448288, "learning_rate": 8.838487445442573e-07, "loss": 0.0005, "step": 101090 }, { "epoch": 1.6542583653767489, "grad_norm": 0.027807947248220444, "learning_rate": 8.830382793900877e-07, "loss": 0.0012, "step": 101100 }, { "epoch": 1.6544219913278246, "grad_norm": 0.009336289949715137, "learning_rate": 8.822281500013102e-07, "loss": 0.0007, "step": 101110 }, { "epoch": 1.6545856172789004, "grad_norm": 0.008764766156673431, "learning_rate": 8.814183564439937e-07, "loss": 0.0015, "step": 101120 }, { "epoch": 1.6547492432299764, "grad_norm": 0.022864574566483498, "learning_rate": 8.806088987841843e-07, "loss": 0.0011, "step": 101130 }, { "epoch": 1.654912869181052, "grad_norm": 0.03213363140821457, "learning_rate": 8.797997770878969e-07, "loss": 0.0014, "step": 101140 }, { "epoch": 1.655076495132128, "grad_norm": 0.17430591583251953, "learning_rate": 8.789909914211226e-07, "loss": 0.0009, "step": 101150 }, { "epoch": 1.655240121083204, "grad_norm": 0.00819132849574089, "learning_rate": 8.781825418498213e-07, "loss": 0.0007, "step": 101160 }, { "epoch": 1.6554037470342795, "grad_norm": 0.05609424039721489, "learning_rate": 8.773744284399288e-07, "loss": 0.0016, "step": 101170 }, { "epoch": 1.6555673729853555, "grad_norm": 0.015797214582562447, "learning_rate": 8.765666512573523e-07, "loss": 0.0008, "step": 101180 }, { "epoch": 1.6557309989364313, "grad_norm": 0.0739670991897583, "learning_rate": 8.757592103679685e-07, "loss": 0.0011, "step": 101190 }, { "epoch": 1.655894624887507, "grad_norm": 0.03178118169307709, "learning_rate": 8.749521058376336e-07, "loss": 0.0007, "step": 101200 }, { "epoch": 1.656058250838583, "grad_norm": 0.006787098944187164, "learning_rate": 8.741453377321685e-07, "loss": 0.0005, "step": 101210 }, { "epoch": 1.6562218767896588, "grad_norm": 0.11626537144184113, "learning_rate": 8.733389061173742e-07, "loss": 0.0006, "step": 101220 }, { "epoch": 1.6563855027407346, "grad_norm": 0.08527819812297821, "learning_rate": 8.72532811059017e-07, "loss": 0.0009, "step": 101230 }, { "epoch": 1.6565491286918106, "grad_norm": 0.01779862307012081, "learning_rate": 8.717270526228422e-07, "loss": 0.001, "step": 101240 }, { "epoch": 1.6567127546428864, "grad_norm": 0.048301730304956436, "learning_rate": 8.709216308745616e-07, "loss": 0.0008, "step": 101250 }, { "epoch": 1.6568763805939621, "grad_norm": 0.009677115827798843, "learning_rate": 8.701165458798666e-07, "loss": 0.0008, "step": 101260 }, { "epoch": 1.6570400065450381, "grad_norm": 0.045163266360759735, "learning_rate": 8.69311797704413e-07, "loss": 0.0008, "step": 101270 }, { "epoch": 1.657203632496114, "grad_norm": 0.05419962853193283, "learning_rate": 8.685073864138371e-07, "loss": 0.0015, "step": 101280 }, { "epoch": 1.6573672584471897, "grad_norm": 0.07545183598995209, "learning_rate": 8.677033120737405e-07, "loss": 0.0016, "step": 101290 }, { "epoch": 1.6575308843982657, "grad_norm": 0.047006651759147644, "learning_rate": 8.668995747497045e-07, "loss": 0.0006, "step": 101300 }, { "epoch": 1.6576945103493415, "grad_norm": 0.00753290019929409, "learning_rate": 8.660961745072755e-07, "loss": 0.0005, "step": 101310 }, { "epoch": 1.6578581363004172, "grad_norm": 0.08932722359895706, "learning_rate": 8.652931114119789e-07, "loss": 0.0004, "step": 101320 }, { "epoch": 1.6580217622514932, "grad_norm": 0.052556682378053665, "learning_rate": 8.644903855293074e-07, "loss": 0.0021, "step": 101330 }, { "epoch": 1.6581853882025688, "grad_norm": 0.0639825388789177, "learning_rate": 8.636879969247308e-07, "loss": 0.0009, "step": 101340 }, { "epoch": 1.6583490141536448, "grad_norm": 0.0941416472196579, "learning_rate": 8.628859456636873e-07, "loss": 0.0012, "step": 101350 }, { "epoch": 1.6585126401047208, "grad_norm": 0.043087173253297806, "learning_rate": 8.620842318115919e-07, "loss": 0.0008, "step": 101360 }, { "epoch": 1.6586762660557963, "grad_norm": 0.02111460268497467, "learning_rate": 8.612828554338265e-07, "loss": 0.0008, "step": 101370 }, { "epoch": 1.6588398920068723, "grad_norm": 0.06270020455121994, "learning_rate": 8.604818165957518e-07, "loss": 0.0012, "step": 101380 }, { "epoch": 1.659003517957948, "grad_norm": 0.043625134974718094, "learning_rate": 8.59681115362695e-07, "loss": 0.001, "step": 101390 }, { "epoch": 1.6591671439090239, "grad_norm": 0.11834300309419632, "learning_rate": 8.588807517999609e-07, "loss": 0.0012, "step": 101400 }, { "epoch": 1.6593307698600999, "grad_norm": 0.08660273998975754, "learning_rate": 8.580807259728225e-07, "loss": 0.0008, "step": 101410 }, { "epoch": 1.6594943958111756, "grad_norm": 0.03922102227807045, "learning_rate": 8.572810379465291e-07, "loss": 0.0011, "step": 101420 }, { "epoch": 1.6596580217622514, "grad_norm": 0.04858485981822014, "learning_rate": 8.564816877862981e-07, "loss": 0.0012, "step": 101430 }, { "epoch": 1.6598216477133274, "grad_norm": 0.009295348078012466, "learning_rate": 8.556826755573245e-07, "loss": 0.0013, "step": 101440 }, { "epoch": 1.6599852736644032, "grad_norm": 0.0582355335354805, "learning_rate": 8.548840013247706e-07, "loss": 0.0067, "step": 101450 }, { "epoch": 1.660148899615479, "grad_norm": 0.03318160027265549, "learning_rate": 8.540856651537754e-07, "loss": 0.0009, "step": 101460 }, { "epoch": 1.660312525566555, "grad_norm": 0.04215823486447334, "learning_rate": 8.53287667109447e-07, "loss": 0.0006, "step": 101470 }, { "epoch": 1.6604761515176307, "grad_norm": 0.008919753134250641, "learning_rate": 8.524900072568693e-07, "loss": 0.0007, "step": 101480 }, { "epoch": 1.6606397774687065, "grad_norm": 0.055964186787605286, "learning_rate": 8.51692685661094e-07, "loss": 0.0011, "step": 101490 }, { "epoch": 1.6608034034197825, "grad_norm": 0.07094421982765198, "learning_rate": 8.508957023871512e-07, "loss": 0.0013, "step": 101500 }, { "epoch": 1.6609670293708583, "grad_norm": 0.011620515026152134, "learning_rate": 8.500990575000384e-07, "loss": 0.0005, "step": 101510 }, { "epoch": 1.661130655321934, "grad_norm": 0.04202982038259506, "learning_rate": 8.493027510647256e-07, "loss": 0.0006, "step": 101520 }, { "epoch": 1.66129428127301, "grad_norm": 0.07425212115049362, "learning_rate": 8.485067831461602e-07, "loss": 0.001, "step": 101530 }, { "epoch": 1.6614579072240856, "grad_norm": 0.053521011024713516, "learning_rate": 8.477111538092552e-07, "loss": 0.0009, "step": 101540 }, { "epoch": 1.6616215331751616, "grad_norm": 0.009066687896847725, "learning_rate": 8.469158631189028e-07, "loss": 0.0011, "step": 101550 }, { "epoch": 1.6617851591262376, "grad_norm": 0.0825028344988823, "learning_rate": 8.461209111399615e-07, "loss": 0.0014, "step": 101560 }, { "epoch": 1.6619487850773131, "grad_norm": 0.15130744874477386, "learning_rate": 8.453262979372672e-07, "loss": 0.0011, "step": 101570 }, { "epoch": 1.6621124110283891, "grad_norm": 0.01658623106777668, "learning_rate": 8.445320235756233e-07, "loss": 0.0009, "step": 101580 }, { "epoch": 1.662276036979465, "grad_norm": 0.01609295792877674, "learning_rate": 8.437380881198104e-07, "loss": 0.0006, "step": 101590 }, { "epoch": 1.6624396629305407, "grad_norm": 0.10220176726579666, "learning_rate": 8.429444916345774e-07, "loss": 0.001, "step": 101600 }, { "epoch": 1.6626032888816167, "grad_norm": 0.02159099653363228, "learning_rate": 8.421512341846494e-07, "loss": 0.0008, "step": 101610 }, { "epoch": 1.6627669148326925, "grad_norm": 0.03925452381372452, "learning_rate": 8.413583158347189e-07, "loss": 0.0008, "step": 101620 }, { "epoch": 1.6629305407837682, "grad_norm": 0.03249557316303253, "learning_rate": 8.405657366494563e-07, "loss": 0.0008, "step": 101630 }, { "epoch": 1.6630941667348442, "grad_norm": 0.02010810747742653, "learning_rate": 8.397734966935001e-07, "loss": 0.0009, "step": 101640 }, { "epoch": 1.66325779268592, "grad_norm": 0.14067593216896057, "learning_rate": 8.38981596031464e-07, "loss": 0.001, "step": 101650 }, { "epoch": 1.6634214186369958, "grad_norm": 0.06923440098762512, "learning_rate": 8.381900347279304e-07, "loss": 0.001, "step": 101660 }, { "epoch": 1.6635850445880718, "grad_norm": 0.034058842808008194, "learning_rate": 8.373988128474597e-07, "loss": 0.0007, "step": 101670 }, { "epoch": 1.6637486705391475, "grad_norm": 0.008833528496325016, "learning_rate": 8.366079304545776e-07, "loss": 0.0009, "step": 101680 }, { "epoch": 1.6639122964902233, "grad_norm": 0.08756767958402634, "learning_rate": 8.358173876137893e-07, "loss": 0.0015, "step": 101690 }, { "epoch": 1.6640759224412993, "grad_norm": 0.011731510981917381, "learning_rate": 8.350271843895658e-07, "loss": 0.0005, "step": 101700 }, { "epoch": 1.6642395483923749, "grad_norm": 0.08542631566524506, "learning_rate": 8.342373208463556e-07, "loss": 0.0009, "step": 101710 }, { "epoch": 1.6644031743434509, "grad_norm": 0.02307390235364437, "learning_rate": 8.334477970485755e-07, "loss": 0.0018, "step": 101720 }, { "epoch": 1.6645668002945269, "grad_norm": 0.05497150868177414, "learning_rate": 8.326586130606184e-07, "loss": 0.0017, "step": 101730 }, { "epoch": 1.6647304262456024, "grad_norm": 0.04673496261239052, "learning_rate": 8.318697689468447e-07, "loss": 0.0005, "step": 101740 }, { "epoch": 1.6648940521966784, "grad_norm": 0.026257064193487167, "learning_rate": 8.310812647715932e-07, "loss": 0.0009, "step": 101750 }, { "epoch": 1.6650576781477542, "grad_norm": 0.003084322903305292, "learning_rate": 8.302931005991682e-07, "loss": 0.0005, "step": 101760 }, { "epoch": 1.66522130409883, "grad_norm": 0.07489932328462601, "learning_rate": 8.295052764938528e-07, "loss": 0.0014, "step": 101770 }, { "epoch": 1.665384930049906, "grad_norm": 0.00158429064322263, "learning_rate": 8.287177925198969e-07, "loss": 0.001, "step": 101780 }, { "epoch": 1.6655485560009817, "grad_norm": 0.014105689711868763, "learning_rate": 8.279306487415267e-07, "loss": 0.0013, "step": 101790 }, { "epoch": 1.6657121819520575, "grad_norm": 0.04114721715450287, "learning_rate": 8.271438452229369e-07, "loss": 0.0008, "step": 101800 }, { "epoch": 1.6658758079031335, "grad_norm": 0.11233578622341156, "learning_rate": 8.263573820282994e-07, "loss": 0.0011, "step": 101810 }, { "epoch": 1.6660394338542093, "grad_norm": 0.06644126027822495, "learning_rate": 8.255712592217536e-07, "loss": 0.0008, "step": 101820 }, { "epoch": 1.666203059805285, "grad_norm": 0.0266830176115036, "learning_rate": 8.247854768674119e-07, "loss": 0.0008, "step": 101830 }, { "epoch": 1.666366685756361, "grad_norm": 0.061021775007247925, "learning_rate": 8.240000350293625e-07, "loss": 0.0008, "step": 101840 }, { "epoch": 1.6665303117074368, "grad_norm": 0.03792758658528328, "learning_rate": 8.232149337716616e-07, "loss": 0.0007, "step": 101850 }, { "epoch": 1.6666939376585126, "grad_norm": 0.024493524804711342, "learning_rate": 8.224301731583412e-07, "loss": 0.0007, "step": 101860 }, { "epoch": 1.6668575636095886, "grad_norm": 0.03626318648457527, "learning_rate": 8.216457532534011e-07, "loss": 0.0004, "step": 101870 }, { "epoch": 1.6670211895606644, "grad_norm": 0.10387244075536728, "learning_rate": 8.208616741208186e-07, "loss": 0.0012, "step": 101880 }, { "epoch": 1.6671848155117401, "grad_norm": 0.03235061094164848, "learning_rate": 8.200779358245381e-07, "loss": 0.0007, "step": 101890 }, { "epoch": 1.6673484414628161, "grad_norm": 0.03363650664687157, "learning_rate": 8.192945384284812e-07, "loss": 0.0009, "step": 101900 }, { "epoch": 1.6675120674138917, "grad_norm": 0.13841702044010162, "learning_rate": 8.185114819965362e-07, "loss": 0.001, "step": 101910 }, { "epoch": 1.6676756933649677, "grad_norm": 0.058803677558898926, "learning_rate": 8.177287665925693e-07, "loss": 0.0005, "step": 101920 }, { "epoch": 1.6678393193160437, "grad_norm": 0.032006945461034775, "learning_rate": 8.169463922804139e-07, "loss": 0.0006, "step": 101930 }, { "epoch": 1.6680029452671192, "grad_norm": 0.06506620347499847, "learning_rate": 8.1616435912388e-07, "loss": 0.0007, "step": 101940 }, { "epoch": 1.6681665712181952, "grad_norm": 0.13441327214241028, "learning_rate": 8.153826671867454e-07, "loss": 0.002, "step": 101950 }, { "epoch": 1.668330197169271, "grad_norm": 0.014130309224128723, "learning_rate": 8.146013165327637e-07, "loss": 0.0007, "step": 101960 }, { "epoch": 1.6684938231203468, "grad_norm": 0.017316646873950958, "learning_rate": 8.138203072256579e-07, "loss": 0.0008, "step": 101970 }, { "epoch": 1.6686574490714228, "grad_norm": 0.05530500039458275, "learning_rate": 8.130396393291262e-07, "loss": 0.0009, "step": 101980 }, { "epoch": 1.6688210750224985, "grad_norm": 0.1263323724269867, "learning_rate": 8.122593129068351e-07, "loss": 0.0014, "step": 101990 }, { "epoch": 1.6689847009735743, "grad_norm": 0.00629509286954999, "learning_rate": 8.114793280224281e-07, "loss": 0.0006, "step": 102000 }, { "epoch": 1.6691483269246503, "grad_norm": 0.012647460214793682, "learning_rate": 8.106996847395149e-07, "loss": 0.0008, "step": 102010 }, { "epoch": 1.669311952875726, "grad_norm": 0.005564653780311346, "learning_rate": 8.099203831216834e-07, "loss": 0.0016, "step": 102020 }, { "epoch": 1.6694755788268019, "grad_norm": 0.03609232231974602, "learning_rate": 8.091414232324884e-07, "loss": 0.0013, "step": 102030 }, { "epoch": 1.6696392047778779, "grad_norm": 0.020126482471823692, "learning_rate": 8.08362805135462e-07, "loss": 0.0004, "step": 102040 }, { "epoch": 1.6698028307289536, "grad_norm": 0.14946407079696655, "learning_rate": 8.075845288941026e-07, "loss": 0.0015, "step": 102050 }, { "epoch": 1.6699664566800294, "grad_norm": 0.05827036127448082, "learning_rate": 8.068065945718862e-07, "loss": 0.0009, "step": 102060 }, { "epoch": 1.6701300826311054, "grad_norm": 0.026677848771214485, "learning_rate": 8.060290022322565e-07, "loss": 0.0005, "step": 102070 }, { "epoch": 1.6702937085821812, "grad_norm": 0.049987468868494034, "learning_rate": 8.052517519386332e-07, "loss": 0.0009, "step": 102080 }, { "epoch": 1.670457334533257, "grad_norm": 0.0814337506890297, "learning_rate": 8.044748437544042e-07, "loss": 0.0011, "step": 102090 }, { "epoch": 1.670620960484333, "grad_norm": 0.01272518653422594, "learning_rate": 8.036982777429336e-07, "loss": 0.0008, "step": 102100 }, { "epoch": 1.6707845864354085, "grad_norm": 0.0295509472489357, "learning_rate": 8.029220539675531e-07, "loss": 0.0011, "step": 102110 }, { "epoch": 1.6709482123864845, "grad_norm": 0.037884145975112915, "learning_rate": 8.021461724915719e-07, "loss": 0.0007, "step": 102120 }, { "epoch": 1.6711118383375605, "grad_norm": 0.07349441200494766, "learning_rate": 8.013706333782651e-07, "loss": 0.0011, "step": 102130 }, { "epoch": 1.671275464288636, "grad_norm": 0.009841103106737137, "learning_rate": 8.005954366908853e-07, "loss": 0.0007, "step": 102140 }, { "epoch": 1.671439090239712, "grad_norm": 0.02861565724015236, "learning_rate": 7.998205824926547e-07, "loss": 0.0009, "step": 102150 }, { "epoch": 1.6716027161907878, "grad_norm": 0.15918786823749542, "learning_rate": 7.990460708467656e-07, "loss": 0.0013, "step": 102160 }, { "epoch": 1.6717663421418636, "grad_norm": 0.045576311647892, "learning_rate": 7.982719018163871e-07, "loss": 0.0009, "step": 102170 }, { "epoch": 1.6719299680929396, "grad_norm": 0.010951322503387928, "learning_rate": 7.974980754646561e-07, "loss": 0.0008, "step": 102180 }, { "epoch": 1.6720935940440154, "grad_norm": 0.0369180329144001, "learning_rate": 7.967245918546845e-07, "loss": 0.0007, "step": 102190 }, { "epoch": 1.6722572199950911, "grad_norm": 0.00865790992975235, "learning_rate": 7.959514510495536e-07, "loss": 0.0007, "step": 102200 }, { "epoch": 1.6724208459461671, "grad_norm": 0.018436286598443985, "learning_rate": 7.951786531123202e-07, "loss": 0.001, "step": 102210 }, { "epoch": 1.672584471897243, "grad_norm": 0.0849384069442749, "learning_rate": 7.944061981060086e-07, "loss": 0.0013, "step": 102220 }, { "epoch": 1.6727480978483187, "grad_norm": 0.020758312195539474, "learning_rate": 7.936340860936198e-07, "loss": 0.0007, "step": 102230 }, { "epoch": 1.6729117237993947, "grad_norm": 0.09301984310150146, "learning_rate": 7.928623171381222e-07, "loss": 0.0012, "step": 102240 }, { "epoch": 1.6730753497504705, "grad_norm": 0.07311088591814041, "learning_rate": 7.920908913024616e-07, "loss": 0.0005, "step": 102250 }, { "epoch": 1.6732389757015462, "grad_norm": 0.032767944037914276, "learning_rate": 7.913198086495505e-07, "loss": 0.0005, "step": 102260 }, { "epoch": 1.6734026016526222, "grad_norm": 0.10399367660284042, "learning_rate": 7.905490692422774e-07, "loss": 0.0004, "step": 102270 }, { "epoch": 1.673566227603698, "grad_norm": 0.09880376607179642, "learning_rate": 7.897786731434992e-07, "loss": 0.001, "step": 102280 }, { "epoch": 1.6737298535547738, "grad_norm": 0.029697954654693604, "learning_rate": 7.890086204160497e-07, "loss": 0.0009, "step": 102290 }, { "epoch": 1.6738934795058498, "grad_norm": 0.21316224336624146, "learning_rate": 7.882389111227284e-07, "loss": 0.0007, "step": 102300 }, { "epoch": 1.6740571054569253, "grad_norm": 0.08886222541332245, "learning_rate": 7.874695453263131e-07, "loss": 0.001, "step": 102310 }, { "epoch": 1.6742207314080013, "grad_norm": 0.041089996695518494, "learning_rate": 7.867005230895485e-07, "loss": 0.001, "step": 102320 }, { "epoch": 1.6743843573590773, "grad_norm": 0.05217922851443291, "learning_rate": 7.859318444751557e-07, "loss": 0.0008, "step": 102330 }, { "epoch": 1.6745479833101529, "grad_norm": 0.051311932504177094, "learning_rate": 7.851635095458226e-07, "loss": 0.0006, "step": 102340 }, { "epoch": 1.6747116092612289, "grad_norm": 0.06850788742303848, "learning_rate": 7.843955183642143e-07, "loss": 0.0007, "step": 102350 }, { "epoch": 1.6748752352123046, "grad_norm": 0.0644073411822319, "learning_rate": 7.836278709929645e-07, "loss": 0.001, "step": 102360 }, { "epoch": 1.6750388611633804, "grad_norm": 0.01286712009459734, "learning_rate": 7.82860567494681e-07, "loss": 0.0017, "step": 102370 }, { "epoch": 1.6752024871144564, "grad_norm": 0.022207358852028847, "learning_rate": 7.820936079319403e-07, "loss": 0.0009, "step": 102380 }, { "epoch": 1.6753661130655322, "grad_norm": 0.05233767628669739, "learning_rate": 7.813269923672956e-07, "loss": 0.0013, "step": 102390 }, { "epoch": 1.675529739016608, "grad_norm": 0.06963980942964554, "learning_rate": 7.805607208632676e-07, "loss": 0.0009, "step": 102400 }, { "epoch": 1.675693364967684, "grad_norm": 0.005263608414679766, "learning_rate": 7.797947934823519e-07, "loss": 0.0004, "step": 102410 }, { "epoch": 1.6758569909187597, "grad_norm": 0.01303224265575409, "learning_rate": 7.79029210287014e-07, "loss": 0.001, "step": 102420 }, { "epoch": 1.6760206168698355, "grad_norm": 0.040707413107156754, "learning_rate": 7.782639713396933e-07, "loss": 0.0008, "step": 102430 }, { "epoch": 1.6761842428209115, "grad_norm": 0.002935064723715186, "learning_rate": 7.774990767027985e-07, "loss": 0.0005, "step": 102440 }, { "epoch": 1.6763478687719873, "grad_norm": 0.022606706246733665, "learning_rate": 7.767345264387155e-07, "loss": 0.0005, "step": 102450 }, { "epoch": 1.676511494723063, "grad_norm": 0.057300008833408356, "learning_rate": 7.75970320609793e-07, "loss": 0.001, "step": 102460 }, { "epoch": 1.676675120674139, "grad_norm": 0.06762918829917908, "learning_rate": 7.752064592783609e-07, "loss": 0.0009, "step": 102470 }, { "epoch": 1.6768387466252146, "grad_norm": 0.05211671069264412, "learning_rate": 7.744429425067152e-07, "loss": 0.0008, "step": 102480 }, { "epoch": 1.6770023725762906, "grad_norm": 0.008036550134420395, "learning_rate": 7.73679770357127e-07, "loss": 0.0017, "step": 102490 }, { "epoch": 1.6771659985273666, "grad_norm": 0.055241260677576065, "learning_rate": 7.729169428918371e-07, "loss": 0.0011, "step": 102500 }, { "epoch": 1.6773296244784421, "grad_norm": 0.04189576581120491, "learning_rate": 7.721544601730601e-07, "loss": 0.0004, "step": 102510 }, { "epoch": 1.6774932504295181, "grad_norm": 0.03463882580399513, "learning_rate": 7.713923222629805e-07, "loss": 0.0007, "step": 102520 }, { "epoch": 1.677656876380594, "grad_norm": 0.05555075779557228, "learning_rate": 7.706305292237564e-07, "loss": 0.001, "step": 102530 }, { "epoch": 1.6778205023316697, "grad_norm": 0.009825214743614197, "learning_rate": 7.698690811175164e-07, "loss": 0.0004, "step": 102540 }, { "epoch": 1.6779841282827457, "grad_norm": 0.05704585835337639, "learning_rate": 7.691079780063631e-07, "loss": 0.0007, "step": 102550 }, { "epoch": 1.6781477542338215, "grad_norm": 0.03767823427915573, "learning_rate": 7.683472199523667e-07, "loss": 0.0006, "step": 102560 }, { "epoch": 1.6783113801848972, "grad_norm": 0.00850795116275549, "learning_rate": 7.675868070175752e-07, "loss": 0.0011, "step": 102570 }, { "epoch": 1.6784750061359732, "grad_norm": 0.04947146400809288, "learning_rate": 7.668267392640027e-07, "loss": 0.0007, "step": 102580 }, { "epoch": 1.678638632087049, "grad_norm": 0.11861610412597656, "learning_rate": 7.660670167536399e-07, "loss": 0.0016, "step": 102590 }, { "epoch": 1.6788022580381248, "grad_norm": 0.019659509882330894, "learning_rate": 7.653076395484454e-07, "loss": 0.0007, "step": 102600 }, { "epoch": 1.6789658839892008, "grad_norm": 0.11795555800199509, "learning_rate": 7.645486077103525e-07, "loss": 0.0065, "step": 102610 }, { "epoch": 1.6791295099402765, "grad_norm": 0.09073202311992645, "learning_rate": 7.637899213012657e-07, "loss": 0.001, "step": 102620 }, { "epoch": 1.6792931358913523, "grad_norm": 0.0036755509208887815, "learning_rate": 7.630315803830585e-07, "loss": 0.0009, "step": 102630 }, { "epoch": 1.6794567618424283, "grad_norm": 0.03757796809077263, "learning_rate": 7.622735850175811e-07, "loss": 0.0007, "step": 102640 }, { "epoch": 1.679620387793504, "grad_norm": 0.04312099516391754, "learning_rate": 7.615159352666512e-07, "loss": 0.001, "step": 102650 }, { "epoch": 1.6797840137445799, "grad_norm": 0.061894431710243225, "learning_rate": 7.607586311920622e-07, "loss": 0.0008, "step": 102660 }, { "epoch": 1.6799476396956559, "grad_norm": 0.05961727350950241, "learning_rate": 7.600016728555753e-07, "loss": 0.0007, "step": 102670 }, { "epoch": 1.6801112656467314, "grad_norm": 0.00849273893982172, "learning_rate": 7.592450603189272e-07, "loss": 0.0017, "step": 102680 }, { "epoch": 1.6802748915978074, "grad_norm": 0.031189408153295517, "learning_rate": 7.584887936438224e-07, "loss": 0.0007, "step": 102690 }, { "epoch": 1.6804385175488834, "grad_norm": 0.010038296692073345, "learning_rate": 7.57732872891942e-07, "loss": 0.0007, "step": 102700 }, { "epoch": 1.680602143499959, "grad_norm": 0.04147586598992348, "learning_rate": 7.569772981249335e-07, "loss": 0.0008, "step": 102710 }, { "epoch": 1.680765769451035, "grad_norm": 0.008245404809713364, "learning_rate": 7.56222069404422e-07, "loss": 0.0014, "step": 102720 }, { "epoch": 1.6809293954021107, "grad_norm": 0.054408587515354156, "learning_rate": 7.554671867919989e-07, "loss": 0.0009, "step": 102730 }, { "epoch": 1.6810930213531865, "grad_norm": 0.042227186262607574, "learning_rate": 7.547126503492319e-07, "loss": 0.0011, "step": 102740 }, { "epoch": 1.6812566473042625, "grad_norm": 0.08288440853357315, "learning_rate": 7.539584601376565e-07, "loss": 0.0013, "step": 102750 }, { "epoch": 1.6814202732553383, "grad_norm": 0.20849573612213135, "learning_rate": 7.532046162187839e-07, "loss": 0.0007, "step": 102760 }, { "epoch": 1.681583899206414, "grad_norm": 0.05855753645300865, "learning_rate": 7.524511186540928e-07, "loss": 0.0007, "step": 102770 }, { "epoch": 1.68174752515749, "grad_norm": 0.04733557254076004, "learning_rate": 7.516979675050379e-07, "loss": 0.001, "step": 102780 }, { "epoch": 1.6819111511085658, "grad_norm": 0.058088548481464386, "learning_rate": 7.509451628330417e-07, "loss": 0.001, "step": 102790 }, { "epoch": 1.6820747770596416, "grad_norm": 0.026194332167506218, "learning_rate": 7.501927046995028e-07, "loss": 0.0011, "step": 102800 }, { "epoch": 1.6822384030107176, "grad_norm": 0.05519293621182442, "learning_rate": 7.494405931657866e-07, "loss": 0.0007, "step": 102810 }, { "epoch": 1.6824020289617934, "grad_norm": 0.02002830244600773, "learning_rate": 7.486888282932353e-07, "loss": 0.0008, "step": 102820 }, { "epoch": 1.6825656549128691, "grad_norm": 0.05044343322515488, "learning_rate": 7.479374101431575e-07, "loss": 0.0011, "step": 102830 }, { "epoch": 1.6827292808639451, "grad_norm": 0.007632914464920759, "learning_rate": 7.471863387768391e-07, "loss": 0.0006, "step": 102840 }, { "epoch": 1.682892906815021, "grad_norm": 0.09232573956251144, "learning_rate": 7.464356142555323e-07, "loss": 0.0007, "step": 102850 }, { "epoch": 1.6830565327660967, "grad_norm": 0.04856385663151741, "learning_rate": 7.456852366404665e-07, "loss": 0.0006, "step": 102860 }, { "epoch": 1.6832201587171727, "grad_norm": 0.01687176339328289, "learning_rate": 7.449352059928366e-07, "loss": 0.0004, "step": 102870 }, { "epoch": 1.6833837846682482, "grad_norm": 0.11527708172798157, "learning_rate": 7.441855223738159e-07, "loss": 0.0009, "step": 102880 }, { "epoch": 1.6835474106193242, "grad_norm": 0.017740240320563316, "learning_rate": 7.434361858445432e-07, "loss": 0.0012, "step": 102890 }, { "epoch": 1.6837110365704002, "grad_norm": 0.008882102556526661, "learning_rate": 7.426871964661342e-07, "loss": 0.0009, "step": 102900 }, { "epoch": 1.6838746625214758, "grad_norm": 0.18059316277503967, "learning_rate": 7.419385542996721e-07, "loss": 0.001, "step": 102910 }, { "epoch": 1.6840382884725518, "grad_norm": 0.023213600739836693, "learning_rate": 7.411902594062148e-07, "loss": 0.0006, "step": 102920 }, { "epoch": 1.6842019144236275, "grad_norm": 0.09939147531986237, "learning_rate": 7.404423118467907e-07, "loss": 0.001, "step": 102930 }, { "epoch": 1.6843655403747033, "grad_norm": 0.024556171149015427, "learning_rate": 7.396947116823983e-07, "loss": 0.005, "step": 102940 }, { "epoch": 1.6845291663257793, "grad_norm": 0.0024782780092209578, "learning_rate": 7.389474589740108e-07, "loss": 0.0011, "step": 102950 }, { "epoch": 1.684692792276855, "grad_norm": 0.03002997487783432, "learning_rate": 7.38200553782571e-07, "loss": 0.001, "step": 102960 }, { "epoch": 1.6848564182279309, "grad_norm": 0.02018268220126629, "learning_rate": 7.374539961689947e-07, "loss": 0.0008, "step": 102970 }, { "epoch": 1.6850200441790069, "grad_norm": 0.1748068630695343, "learning_rate": 7.367077861941668e-07, "loss": 0.0016, "step": 102980 }, { "epoch": 1.6851836701300826, "grad_norm": 0.03704323619604111, "learning_rate": 7.359619239189486e-07, "loss": 0.0016, "step": 102990 }, { "epoch": 1.6853472960811584, "grad_norm": 0.04993622004985809, "learning_rate": 7.352164094041669e-07, "loss": 0.0009, "step": 103000 }, { "epoch": 1.6855109220322344, "grad_norm": 0.025849327445030212, "learning_rate": 7.34471242710626e-07, "loss": 0.0007, "step": 103010 }, { "epoch": 1.6856745479833102, "grad_norm": 0.03651169314980507, "learning_rate": 7.337264238990966e-07, "loss": 0.0014, "step": 103020 }, { "epoch": 1.685838173934386, "grad_norm": 0.04672649875283241, "learning_rate": 7.329819530303262e-07, "loss": 0.0011, "step": 103030 }, { "epoch": 1.686001799885462, "grad_norm": 0.23591391742229462, "learning_rate": 7.32237830165029e-07, "loss": 0.0017, "step": 103040 }, { "epoch": 1.6861654258365377, "grad_norm": 0.033353328704833984, "learning_rate": 7.314940553638956e-07, "loss": 0.0008, "step": 103050 }, { "epoch": 1.6863290517876135, "grad_norm": 0.03773863613605499, "learning_rate": 7.307506286875826e-07, "loss": 0.0015, "step": 103060 }, { "epoch": 1.6864926777386895, "grad_norm": 0.060078006237745285, "learning_rate": 7.300075501967246e-07, "loss": 0.001, "step": 103070 }, { "epoch": 1.686656303689765, "grad_norm": 0.002214373555034399, "learning_rate": 7.292648199519215e-07, "loss": 0.0015, "step": 103080 }, { "epoch": 1.686819929640841, "grad_norm": 0.06906892359256744, "learning_rate": 7.285224380137512e-07, "loss": 0.0017, "step": 103090 }, { "epoch": 1.686983555591917, "grad_norm": 0.00794029701501131, "learning_rate": 7.277804044427561e-07, "loss": 0.0005, "step": 103100 }, { "epoch": 1.6871471815429926, "grad_norm": 0.040784694254398346, "learning_rate": 7.270387192994576e-07, "loss": 0.0012, "step": 103110 }, { "epoch": 1.6873108074940686, "grad_norm": 0.032550711184740067, "learning_rate": 7.262973826443421e-07, "loss": 0.0011, "step": 103120 }, { "epoch": 1.6874744334451444, "grad_norm": 0.03277921304106712, "learning_rate": 7.25556394537873e-07, "loss": 0.001, "step": 103130 }, { "epoch": 1.6876380593962201, "grad_norm": 0.044698312878608704, "learning_rate": 7.248157550404799e-07, "loss": 0.001, "step": 103140 }, { "epoch": 1.6878016853472961, "grad_norm": 0.02356862835586071, "learning_rate": 7.240754642125697e-07, "loss": 0.0006, "step": 103150 }, { "epoch": 1.687965311298372, "grad_norm": 0.030401326715946198, "learning_rate": 7.233355221145155e-07, "loss": 0.0008, "step": 103160 }, { "epoch": 1.6881289372494477, "grad_norm": 0.03275863081216812, "learning_rate": 7.225959288066665e-07, "loss": 0.0014, "step": 103170 }, { "epoch": 1.6882925632005237, "grad_norm": 0.05609789118170738, "learning_rate": 7.218566843493396e-07, "loss": 0.0009, "step": 103180 }, { "epoch": 1.6884561891515995, "grad_norm": 0.00938730500638485, "learning_rate": 7.211177888028276e-07, "loss": 0.0014, "step": 103190 }, { "epoch": 1.6886198151026752, "grad_norm": 0.0029103972483426332, "learning_rate": 7.203792422273892e-07, "loss": 0.0006, "step": 103200 }, { "epoch": 1.6887834410537512, "grad_norm": 0.05422814562916756, "learning_rate": 7.196410446832603e-07, "loss": 0.0009, "step": 103210 }, { "epoch": 1.688947067004827, "grad_norm": 0.031813185662031174, "learning_rate": 7.189031962306436e-07, "loss": 0.0016, "step": 103220 }, { "epoch": 1.6891106929559028, "grad_norm": 0.033441491425037384, "learning_rate": 7.181656969297185e-07, "loss": 0.0008, "step": 103230 }, { "epoch": 1.6892743189069788, "grad_norm": 0.029408013448119164, "learning_rate": 7.174285468406289e-07, "loss": 0.0011, "step": 103240 }, { "epoch": 1.6894379448580545, "grad_norm": 0.048699889332056046, "learning_rate": 7.166917460234985e-07, "loss": 0.001, "step": 103250 }, { "epoch": 1.6896015708091303, "grad_norm": 0.03850254416465759, "learning_rate": 7.159552945384157e-07, "loss": 0.0007, "step": 103260 }, { "epoch": 1.6897651967602063, "grad_norm": 0.008482254110276699, "learning_rate": 7.152191924454427e-07, "loss": 0.0006, "step": 103270 }, { "epoch": 1.6899288227112819, "grad_norm": 0.04625152051448822, "learning_rate": 7.144834398046152e-07, "loss": 0.0006, "step": 103280 }, { "epoch": 1.6900924486623579, "grad_norm": 0.04332206770777702, "learning_rate": 7.137480366759364e-07, "loss": 0.0015, "step": 103290 }, { "epoch": 1.6902560746134339, "grad_norm": 0.029152631759643555, "learning_rate": 7.130129831193861e-07, "loss": 0.0011, "step": 103300 }, { "epoch": 1.6904197005645094, "grad_norm": 0.040577176958322525, "learning_rate": 7.122782791949101e-07, "loss": 0.0005, "step": 103310 }, { "epoch": 1.6905833265155854, "grad_norm": 0.036893464624881744, "learning_rate": 7.115439249624312e-07, "loss": 0.0007, "step": 103320 }, { "epoch": 1.6907469524666612, "grad_norm": 0.02258090116083622, "learning_rate": 7.108099204818375e-07, "loss": 0.0006, "step": 103330 }, { "epoch": 1.690910578417737, "grad_norm": 0.16975213587284088, "learning_rate": 7.100762658129945e-07, "loss": 0.0016, "step": 103340 }, { "epoch": 1.691074204368813, "grad_norm": 0.0500812865793705, "learning_rate": 7.093429610157348e-07, "loss": 0.0007, "step": 103350 }, { "epoch": 1.6912378303198887, "grad_norm": 0.012306282296776772, "learning_rate": 7.086100061498663e-07, "loss": 0.0005, "step": 103360 }, { "epoch": 1.6914014562709645, "grad_norm": 0.024904262274503708, "learning_rate": 7.078774012751644e-07, "loss": 0.0013, "step": 103370 }, { "epoch": 1.6915650822220405, "grad_norm": 0.12684333324432373, "learning_rate": 7.07145146451379e-07, "loss": 0.0012, "step": 103380 }, { "epoch": 1.6917287081731163, "grad_norm": 0.028593583032488823, "learning_rate": 7.064132417382291e-07, "loss": 0.0009, "step": 103390 }, { "epoch": 1.691892334124192, "grad_norm": 0.06034789979457855, "learning_rate": 7.056816871954086e-07, "loss": 0.0008, "step": 103400 }, { "epoch": 1.692055960075268, "grad_norm": 0.05239923670887947, "learning_rate": 7.049504828825776e-07, "loss": 0.0006, "step": 103410 }, { "epoch": 1.6922195860263438, "grad_norm": 0.057671964168548584, "learning_rate": 7.042196288593734e-07, "loss": 0.0015, "step": 103420 }, { "epoch": 1.6923832119774196, "grad_norm": 0.018480369821190834, "learning_rate": 7.034891251853998e-07, "loss": 0.0023, "step": 103430 }, { "epoch": 1.6925468379284956, "grad_norm": 0.0787099152803421, "learning_rate": 7.02758971920236e-07, "loss": 0.0007, "step": 103440 }, { "epoch": 1.6927104638795711, "grad_norm": 0.0029936376959085464, "learning_rate": 7.020291691234287e-07, "loss": 0.0006, "step": 103450 }, { "epoch": 1.6928740898306471, "grad_norm": 0.03102288767695427, "learning_rate": 7.01299716854501e-07, "loss": 0.0006, "step": 103460 }, { "epoch": 1.6930377157817231, "grad_norm": 0.015558217652142048, "learning_rate": 7.005706151729419e-07, "loss": 0.0008, "step": 103470 }, { "epoch": 1.6932013417327987, "grad_norm": 0.036682598292827606, "learning_rate": 6.998418641382165e-07, "loss": 0.0012, "step": 103480 }, { "epoch": 1.6933649676838747, "grad_norm": 0.051463935524225235, "learning_rate": 6.991134638097569e-07, "loss": 0.0014, "step": 103490 }, { "epoch": 1.6935285936349505, "grad_norm": 0.003980196081101894, "learning_rate": 6.983854142469715e-07, "loss": 0.0006, "step": 103500 }, { "epoch": 1.6936922195860262, "grad_norm": 0.07924409955739975, "learning_rate": 6.976577155092351e-07, "loss": 0.0009, "step": 103510 }, { "epoch": 1.6938558455371022, "grad_norm": 0.12490904331207275, "learning_rate": 6.969303676558992e-07, "loss": 0.0006, "step": 103520 }, { "epoch": 1.694019471488178, "grad_norm": 0.0019140464719384909, "learning_rate": 6.962033707462807e-07, "loss": 0.0004, "step": 103530 }, { "epoch": 1.6941830974392538, "grad_norm": 0.007023755926638842, "learning_rate": 6.954767248396737e-07, "loss": 0.0007, "step": 103540 }, { "epoch": 1.6943467233903298, "grad_norm": 0.031072556972503662, "learning_rate": 6.94750429995339e-07, "loss": 0.0006, "step": 103550 }, { "epoch": 1.6945103493414055, "grad_norm": 0.005813030991703272, "learning_rate": 6.94024486272512e-07, "loss": 0.0006, "step": 103560 }, { "epoch": 1.6946739752924813, "grad_norm": 0.04161210358142853, "learning_rate": 6.932988937303981e-07, "loss": 0.0013, "step": 103570 }, { "epoch": 1.6948376012435573, "grad_norm": 0.027132993564009666, "learning_rate": 6.925736524281723e-07, "loss": 0.0016, "step": 103580 }, { "epoch": 1.695001227194633, "grad_norm": 0.0214987900108099, "learning_rate": 6.918487624249859e-07, "loss": 0.0015, "step": 103590 }, { "epoch": 1.6951648531457089, "grad_norm": 0.05383111163973808, "learning_rate": 6.911242237799554e-07, "loss": 0.0012, "step": 103600 }, { "epoch": 1.6953284790967849, "grad_norm": 0.007977821864187717, "learning_rate": 6.90400036552174e-07, "loss": 0.0016, "step": 103610 }, { "epoch": 1.6954921050478606, "grad_norm": 0.001280922326259315, "learning_rate": 6.896762008007024e-07, "loss": 0.0003, "step": 103620 }, { "epoch": 1.6956557309989364, "grad_norm": 0.03987139090895653, "learning_rate": 6.889527165845761e-07, "loss": 0.0011, "step": 103630 }, { "epoch": 1.6958193569500124, "grad_norm": 0.04433751478791237, "learning_rate": 6.882295839627972e-07, "loss": 0.0006, "step": 103640 }, { "epoch": 1.695982982901088, "grad_norm": 0.0042603472247719765, "learning_rate": 6.875068029943449e-07, "loss": 0.001, "step": 103650 }, { "epoch": 1.696146608852164, "grad_norm": 0.07267089933156967, "learning_rate": 6.867843737381646e-07, "loss": 0.0014, "step": 103660 }, { "epoch": 1.69631023480324, "grad_norm": 0.000646169122774154, "learning_rate": 6.860622962531766e-07, "loss": 0.0006, "step": 103670 }, { "epoch": 1.6964738607543155, "grad_norm": 0.13197201490402222, "learning_rate": 6.853405705982697e-07, "loss": 0.0013, "step": 103680 }, { "epoch": 1.6966374867053915, "grad_norm": 0.015983087942004204, "learning_rate": 6.846191968323068e-07, "loss": 0.0008, "step": 103690 }, { "epoch": 1.6968011126564673, "grad_norm": 0.042224958539009094, "learning_rate": 6.838981750141188e-07, "loss": 0.0012, "step": 103700 }, { "epoch": 1.696964738607543, "grad_norm": 0.030647391453385353, "learning_rate": 6.831775052025125e-07, "loss": 0.0013, "step": 103710 }, { "epoch": 1.697128364558619, "grad_norm": 0.009895850904285908, "learning_rate": 6.824571874562608e-07, "loss": 0.0012, "step": 103720 }, { "epoch": 1.6972919905096948, "grad_norm": 0.03890618309378624, "learning_rate": 6.817372218341123e-07, "loss": 0.0013, "step": 103730 }, { "epoch": 1.6974556164607706, "grad_norm": 0.05881976708769798, "learning_rate": 6.810176083947828e-07, "loss": 0.0022, "step": 103740 }, { "epoch": 1.6976192424118466, "grad_norm": 0.033913180232048035, "learning_rate": 6.802983471969638e-07, "loss": 0.0012, "step": 103750 }, { "epoch": 1.6977828683629224, "grad_norm": 0.051612187176942825, "learning_rate": 6.795794382993137e-07, "loss": 0.0009, "step": 103760 }, { "epoch": 1.6979464943139981, "grad_norm": 0.06663978844881058, "learning_rate": 6.788608817604658e-07, "loss": 0.0008, "step": 103770 }, { "epoch": 1.6981101202650741, "grad_norm": 0.00887993536889553, "learning_rate": 6.781426776390221e-07, "loss": 0.001, "step": 103780 }, { "epoch": 1.69827374621615, "grad_norm": 0.0070457784458994865, "learning_rate": 6.774248259935584e-07, "loss": 0.0004, "step": 103790 }, { "epoch": 1.6984373721672257, "grad_norm": 0.06422353535890579, "learning_rate": 6.767073268826185e-07, "loss": 0.0017, "step": 103800 }, { "epoch": 1.6986009981183017, "grad_norm": 0.10052681714296341, "learning_rate": 6.759901803647207e-07, "loss": 0.0015, "step": 103810 }, { "epoch": 1.6987646240693774, "grad_norm": 0.02152802050113678, "learning_rate": 6.752733864983507e-07, "loss": 0.0012, "step": 103820 }, { "epoch": 1.6989282500204532, "grad_norm": 0.019163277000188828, "learning_rate": 6.745569453419709e-07, "loss": 0.0005, "step": 103830 }, { "epoch": 1.6990918759715292, "grad_norm": 0.060903165489435196, "learning_rate": 6.73840856954009e-07, "loss": 0.0009, "step": 103840 }, { "epoch": 1.6992555019226048, "grad_norm": 0.134161576628685, "learning_rate": 6.731251213928685e-07, "loss": 0.001, "step": 103850 }, { "epoch": 1.6994191278736808, "grad_norm": 0.03672676905989647, "learning_rate": 6.724097387169215e-07, "loss": 0.0006, "step": 103860 }, { "epoch": 1.6995827538247568, "grad_norm": 0.06439151614904404, "learning_rate": 6.716947089845133e-07, "loss": 0.0012, "step": 103870 }, { "epoch": 1.6997463797758323, "grad_norm": 0.06246178597211838, "learning_rate": 6.709800322539572e-07, "loss": 0.0006, "step": 103880 }, { "epoch": 1.6999100057269083, "grad_norm": 0.026958303526043892, "learning_rate": 6.702657085835424e-07, "loss": 0.0012, "step": 103890 }, { "epoch": 1.700073631677984, "grad_norm": 0.0357089564204216, "learning_rate": 6.695517380315253e-07, "loss": 0.0005, "step": 103900 }, { "epoch": 1.7002372576290599, "grad_norm": 0.032679539173841476, "learning_rate": 6.688381206561339e-07, "loss": 0.0013, "step": 103910 }, { "epoch": 1.7004008835801359, "grad_norm": 0.02173207700252533, "learning_rate": 6.681248565155707e-07, "loss": 0.0003, "step": 103920 }, { "epoch": 1.7005645095312116, "grad_norm": 0.034323230385780334, "learning_rate": 6.674119456680045e-07, "loss": 0.0009, "step": 103930 }, { "epoch": 1.7007281354822874, "grad_norm": 0.020370975136756897, "learning_rate": 6.666993881715805e-07, "loss": 0.0006, "step": 103940 }, { "epoch": 1.7008917614333634, "grad_norm": 0.12958072125911713, "learning_rate": 6.659871840844101e-07, "loss": 0.0031, "step": 103950 }, { "epoch": 1.7010553873844392, "grad_norm": 0.06326296925544739, "learning_rate": 6.652753334645806e-07, "loss": 0.0011, "step": 103960 }, { "epoch": 1.701219013335515, "grad_norm": 0.030958905816078186, "learning_rate": 6.645638363701463e-07, "loss": 0.0014, "step": 103970 }, { "epoch": 1.701382639286591, "grad_norm": 0.0015847939066588879, "learning_rate": 6.638526928591355e-07, "loss": 0.0008, "step": 103980 }, { "epoch": 1.7015462652376667, "grad_norm": 0.060073256492614746, "learning_rate": 6.631419029895453e-07, "loss": 0.001, "step": 103990 }, { "epoch": 1.7017098911887425, "grad_norm": 0.02088100090622902, "learning_rate": 6.624314668193472e-07, "loss": 0.0005, "step": 104000 }, { "epoch": 1.7018735171398185, "grad_norm": 0.025589486584067345, "learning_rate": 6.617213844064796e-07, "loss": 0.0025, "step": 104010 }, { "epoch": 1.7020371430908943, "grad_norm": 0.0009547353256493807, "learning_rate": 6.610116558088575e-07, "loss": 0.0011, "step": 104020 }, { "epoch": 1.70220076904197, "grad_norm": 0.0042994339019060135, "learning_rate": 6.603022810843606e-07, "loss": 0.0005, "step": 104030 }, { "epoch": 1.702364394993046, "grad_norm": 0.023244405165314674, "learning_rate": 6.595932602908461e-07, "loss": 0.0006, "step": 104040 }, { "epoch": 1.7025280209441216, "grad_norm": 0.01974932849407196, "learning_rate": 6.588845934861371e-07, "loss": 0.001, "step": 104050 }, { "epoch": 1.7026916468951976, "grad_norm": 0.04217154532670975, "learning_rate": 6.581762807280317e-07, "loss": 0.0009, "step": 104060 }, { "epoch": 1.7028552728462736, "grad_norm": 0.029763035476207733, "learning_rate": 6.574683220742956e-07, "loss": 0.0002, "step": 104070 }, { "epoch": 1.7030188987973491, "grad_norm": 0.07592908293008804, "learning_rate": 6.567607175826702e-07, "loss": 0.0014, "step": 104080 }, { "epoch": 1.7031825247484251, "grad_norm": 0.0875341147184372, "learning_rate": 6.560534673108621e-07, "loss": 0.0009, "step": 104090 }, { "epoch": 1.703346150699501, "grad_norm": 0.005179502069950104, "learning_rate": 6.553465713165547e-07, "loss": 0.0006, "step": 104100 }, { "epoch": 1.7035097766505767, "grad_norm": 0.021889645606279373, "learning_rate": 6.546400296573985e-07, "loss": 0.0007, "step": 104110 }, { "epoch": 1.7036734026016527, "grad_norm": 0.17082317173480988, "learning_rate": 6.53933842391018e-07, "loss": 0.0011, "step": 104120 }, { "epoch": 1.7038370285527284, "grad_norm": 0.025410214439034462, "learning_rate": 6.53228009575006e-07, "loss": 0.0006, "step": 104130 }, { "epoch": 1.7040006545038042, "grad_norm": 0.016821803525090218, "learning_rate": 6.525225312669292e-07, "loss": 0.0008, "step": 104140 }, { "epoch": 1.7041642804548802, "grad_norm": 0.0037400545552372932, "learning_rate": 6.518174075243228e-07, "loss": 0.0006, "step": 104150 }, { "epoch": 1.704327906405956, "grad_norm": 0.018201595172286034, "learning_rate": 6.511126384046956e-07, "loss": 0.0015, "step": 104160 }, { "epoch": 1.7044915323570318, "grad_norm": 0.016506675630807877, "learning_rate": 6.504082239655246e-07, "loss": 0.001, "step": 104170 }, { "epoch": 1.7046551583081078, "grad_norm": 0.005652797874063253, "learning_rate": 6.49704164264261e-07, "loss": 0.0006, "step": 104180 }, { "epoch": 1.7048187842591835, "grad_norm": 0.08837218582630157, "learning_rate": 6.490004593583238e-07, "loss": 0.0006, "step": 104190 }, { "epoch": 1.7049824102102593, "grad_norm": 0.022922301664948463, "learning_rate": 6.482971093051072e-07, "loss": 0.0009, "step": 104200 }, { "epoch": 1.7051460361613353, "grad_norm": 0.006444236263632774, "learning_rate": 6.475941141619707e-07, "loss": 0.0009, "step": 104210 }, { "epoch": 1.7053096621124109, "grad_norm": 0.028774041682481766, "learning_rate": 6.46891473986252e-07, "loss": 0.001, "step": 104220 }, { "epoch": 1.7054732880634869, "grad_norm": 0.03530431166291237, "learning_rate": 6.461891888352539e-07, "loss": 0.0003, "step": 104230 }, { "epoch": 1.7056369140145629, "grad_norm": 0.032492201775312424, "learning_rate": 6.454872587662514e-07, "loss": 0.0008, "step": 104240 }, { "epoch": 1.7058005399656384, "grad_norm": 0.025364955887198448, "learning_rate": 6.447856838364935e-07, "loss": 0.0015, "step": 104250 }, { "epoch": 1.7059641659167144, "grad_norm": 0.04326396435499191, "learning_rate": 6.440844641031969e-07, "loss": 0.0011, "step": 104260 }, { "epoch": 1.7061277918677902, "grad_norm": 0.018982864916324615, "learning_rate": 6.433835996235522e-07, "loss": 0.0008, "step": 104270 }, { "epoch": 1.706291417818866, "grad_norm": 0.036128368228673935, "learning_rate": 6.426830904547176e-07, "loss": 0.0008, "step": 104280 }, { "epoch": 1.706455043769942, "grad_norm": 0.02752913162112236, "learning_rate": 6.419829366538266e-07, "loss": 0.0011, "step": 104290 }, { "epoch": 1.7066186697210177, "grad_norm": 0.0069655366241931915, "learning_rate": 6.412831382779789e-07, "loss": 0.0012, "step": 104300 }, { "epoch": 1.7067822956720935, "grad_norm": 0.05464395508170128, "learning_rate": 6.405836953842493e-07, "loss": 0.0012, "step": 104310 }, { "epoch": 1.7069459216231695, "grad_norm": 0.0283599104732275, "learning_rate": 6.398846080296811e-07, "loss": 0.0014, "step": 104320 }, { "epoch": 1.7071095475742453, "grad_norm": 0.05556324124336243, "learning_rate": 6.391858762712904e-07, "loss": 0.0006, "step": 104330 }, { "epoch": 1.707273173525321, "grad_norm": 0.011731604114174843, "learning_rate": 6.384875001660623e-07, "loss": 0.0007, "step": 104340 }, { "epoch": 1.707436799476397, "grad_norm": 0.052520766854286194, "learning_rate": 6.37789479770955e-07, "loss": 0.0006, "step": 104350 }, { "epoch": 1.7076004254274728, "grad_norm": 0.028871996328234673, "learning_rate": 6.370918151428957e-07, "loss": 0.0007, "step": 104360 }, { "epoch": 1.7077640513785486, "grad_norm": 0.008424033410847187, "learning_rate": 6.363945063387849e-07, "loss": 0.0005, "step": 104370 }, { "epoch": 1.7079276773296246, "grad_norm": 0.01747305691242218, "learning_rate": 6.356975534154908e-07, "loss": 0.0013, "step": 104380 }, { "epoch": 1.7080913032807004, "grad_norm": 0.0701238289475441, "learning_rate": 6.350009564298564e-07, "loss": 0.0008, "step": 104390 }, { "epoch": 1.7082549292317761, "grad_norm": 0.1218705028295517, "learning_rate": 6.343047154386927e-07, "loss": 0.0016, "step": 104400 }, { "epoch": 1.7084185551828521, "grad_norm": 0.03191279247403145, "learning_rate": 6.336088304987836e-07, "loss": 0.0006, "step": 104410 }, { "epoch": 1.7085821811339277, "grad_norm": 0.029151448979973793, "learning_rate": 6.329133016668815e-07, "loss": 0.0008, "step": 104420 }, { "epoch": 1.7087458070850037, "grad_norm": 0.03438221663236618, "learning_rate": 6.322181289997131e-07, "loss": 0.001, "step": 104430 }, { "epoch": 1.7089094330360797, "grad_norm": 0.014078960753977299, "learning_rate": 6.315233125539733e-07, "loss": 0.0006, "step": 104440 }, { "epoch": 1.7090730589871552, "grad_norm": 0.06922315806150436, "learning_rate": 6.308288523863298e-07, "loss": 0.0011, "step": 104450 }, { "epoch": 1.7092366849382312, "grad_norm": 0.0694236233830452, "learning_rate": 6.301347485534193e-07, "loss": 0.0005, "step": 104460 }, { "epoch": 1.709400310889307, "grad_norm": 0.015566708520054817, "learning_rate": 6.294410011118518e-07, "loss": 0.0008, "step": 104470 }, { "epoch": 1.7095639368403828, "grad_norm": 0.02127688191831112, "learning_rate": 6.287476101182055e-07, "loss": 0.0012, "step": 104480 }, { "epoch": 1.7097275627914588, "grad_norm": 0.0193184781819582, "learning_rate": 6.280545756290323e-07, "loss": 0.0015, "step": 104490 }, { "epoch": 1.7098911887425345, "grad_norm": 0.009278331883251667, "learning_rate": 6.273618977008527e-07, "loss": 0.0004, "step": 104500 }, { "epoch": 1.7100548146936103, "grad_norm": 0.031821269541978836, "learning_rate": 6.266695763901609e-07, "loss": 0.0005, "step": 104510 }, { "epoch": 1.7102184406446863, "grad_norm": 0.014671229757368565, "learning_rate": 6.259776117534177e-07, "loss": 0.0008, "step": 104520 }, { "epoch": 1.710382066595762, "grad_norm": 0.07001157850027084, "learning_rate": 6.252860038470604e-07, "loss": 0.001, "step": 104530 }, { "epoch": 1.7105456925468379, "grad_norm": 0.017514387145638466, "learning_rate": 6.245947527274921e-07, "loss": 0.0006, "step": 104540 }, { "epoch": 1.7107093184979139, "grad_norm": 0.014102383516728878, "learning_rate": 6.239038584510887e-07, "loss": 0.0005, "step": 104550 }, { "epoch": 1.7108729444489896, "grad_norm": 0.028875062242150307, "learning_rate": 6.232133210741987e-07, "loss": 0.0008, "step": 104560 }, { "epoch": 1.7110365704000654, "grad_norm": 0.10573095083236694, "learning_rate": 6.22523140653138e-07, "loss": 0.0011, "step": 104570 }, { "epoch": 1.7112001963511414, "grad_norm": 0.02643732912838459, "learning_rate": 6.21833317244197e-07, "loss": 0.0006, "step": 104580 }, { "epoch": 1.7113638223022172, "grad_norm": 0.040627654641866684, "learning_rate": 6.211438509036343e-07, "loss": 0.0007, "step": 104590 }, { "epoch": 1.711527448253293, "grad_norm": 0.010557485744357109, "learning_rate": 6.204547416876822e-07, "loss": 0.001, "step": 104600 }, { "epoch": 1.711691074204369, "grad_norm": 0.06961768120527267, "learning_rate": 6.197659896525393e-07, "loss": 0.001, "step": 104610 }, { "epoch": 1.7118547001554445, "grad_norm": 0.09079907089471817, "learning_rate": 6.190775948543809e-07, "loss": 0.0009, "step": 104620 }, { "epoch": 1.7120183261065205, "grad_norm": 0.06503836065530777, "learning_rate": 6.183895573493476e-07, "loss": 0.0014, "step": 104630 }, { "epoch": 1.7121819520575965, "grad_norm": 0.021787263453006744, "learning_rate": 6.177018771935556e-07, "loss": 0.0004, "step": 104640 }, { "epoch": 1.712345578008672, "grad_norm": 0.02542039193212986, "learning_rate": 6.170145544430872e-07, "loss": 0.0014, "step": 104650 }, { "epoch": 1.712509203959748, "grad_norm": 0.0482574887573719, "learning_rate": 6.16327589154001e-07, "loss": 0.0009, "step": 104660 }, { "epoch": 1.7126728299108238, "grad_norm": 0.045166414231061935, "learning_rate": 6.156409813823211e-07, "loss": 0.0013, "step": 104670 }, { "epoch": 1.7128364558618996, "grad_norm": 0.019430572167038918, "learning_rate": 6.149547311840464e-07, "loss": 0.0011, "step": 104680 }, { "epoch": 1.7130000818129756, "grad_norm": 0.051017872989177704, "learning_rate": 6.14268838615144e-07, "loss": 0.0013, "step": 104690 }, { "epoch": 1.7131637077640514, "grad_norm": 0.05045241490006447, "learning_rate": 6.135833037315541e-07, "loss": 0.0006, "step": 104700 }, { "epoch": 1.7133273337151271, "grad_norm": 0.08285973221063614, "learning_rate": 6.128981265891859e-07, "loss": 0.0011, "step": 104710 }, { "epoch": 1.7134909596662031, "grad_norm": 0.013021004386246204, "learning_rate": 6.122133072439207e-07, "loss": 0.0011, "step": 104720 }, { "epoch": 1.713654585617279, "grad_norm": 0.011671613901853561, "learning_rate": 6.115288457516089e-07, "loss": 0.0006, "step": 104730 }, { "epoch": 1.7138182115683547, "grad_norm": 0.06137021631002426, "learning_rate": 6.108447421680741e-07, "loss": 0.0006, "step": 104740 }, { "epoch": 1.7139818375194307, "grad_norm": 0.0050993310287594795, "learning_rate": 6.101609965491084e-07, "loss": 0.001, "step": 104750 }, { "epoch": 1.7141454634705064, "grad_norm": 0.06072403863072395, "learning_rate": 6.094776089504773e-07, "loss": 0.0007, "step": 104760 }, { "epoch": 1.7143090894215822, "grad_norm": 0.09018073230981827, "learning_rate": 6.087945794279132e-07, "loss": 0.0009, "step": 104770 }, { "epoch": 1.7144727153726582, "grad_norm": 0.01661476120352745, "learning_rate": 6.081119080371239e-07, "loss": 0.0012, "step": 104780 }, { "epoch": 1.714636341323734, "grad_norm": 0.048093412071466446, "learning_rate": 6.074295948337844e-07, "loss": 0.0006, "step": 104790 }, { "epoch": 1.7147999672748098, "grad_norm": 0.038393646478652954, "learning_rate": 6.067476398735433e-07, "loss": 0.0007, "step": 104800 }, { "epoch": 1.7149635932258858, "grad_norm": 0.03215879946947098, "learning_rate": 6.060660432120163e-07, "loss": 0.0012, "step": 104810 }, { "epoch": 1.7151272191769613, "grad_norm": 0.04612722620368004, "learning_rate": 6.053848049047944e-07, "loss": 0.0008, "step": 104820 }, { "epoch": 1.7152908451280373, "grad_norm": 0.042219240218400955, "learning_rate": 6.047039250074354e-07, "loss": 0.0005, "step": 104830 }, { "epoch": 1.7154544710791133, "grad_norm": 0.01066769752651453, "learning_rate": 6.040234035754705e-07, "loss": 0.0034, "step": 104840 }, { "epoch": 1.7156180970301889, "grad_norm": 0.010555176995694637, "learning_rate": 6.033432406644002e-07, "loss": 0.0011, "step": 104850 }, { "epoch": 1.7157817229812649, "grad_norm": 0.025802545249462128, "learning_rate": 6.026634363296968e-07, "loss": 0.0008, "step": 104860 }, { "epoch": 1.7159453489323406, "grad_norm": 0.030181996524333954, "learning_rate": 6.019839906268026e-07, "loss": 0.0008, "step": 104870 }, { "epoch": 1.7161089748834164, "grad_norm": 0.012528599239885807, "learning_rate": 6.013049036111301e-07, "loss": 0.0007, "step": 104880 }, { "epoch": 1.7162726008344924, "grad_norm": 0.0035704607143998146, "learning_rate": 6.006261753380649e-07, "loss": 0.0009, "step": 104890 }, { "epoch": 1.7164362267855682, "grad_norm": 0.03126341477036476, "learning_rate": 5.999478058629599e-07, "loss": 0.0015, "step": 104900 }, { "epoch": 1.716599852736644, "grad_norm": 0.035922903567552567, "learning_rate": 5.992697952411425e-07, "loss": 0.0007, "step": 104910 }, { "epoch": 1.71676347868772, "grad_norm": 0.012180100195109844, "learning_rate": 5.985921435279069e-07, "loss": 0.0008, "step": 104920 }, { "epoch": 1.7169271046387957, "grad_norm": 0.025365808978676796, "learning_rate": 5.979148507785226e-07, "loss": 0.0005, "step": 104930 }, { "epoch": 1.7170907305898715, "grad_norm": 0.13826243579387665, "learning_rate": 5.972379170482245e-07, "loss": 0.0014, "step": 104940 }, { "epoch": 1.7172543565409475, "grad_norm": 0.020765230059623718, "learning_rate": 5.965613423922234e-07, "loss": 0.0003, "step": 104950 }, { "epoch": 1.7174179824920233, "grad_norm": 0.041690435260534286, "learning_rate": 5.958851268656968e-07, "loss": 0.0008, "step": 104960 }, { "epoch": 1.717581608443099, "grad_norm": 0.11864360421895981, "learning_rate": 5.952092705237961e-07, "loss": 0.0008, "step": 104970 }, { "epoch": 1.717745234394175, "grad_norm": 0.03102138265967369, "learning_rate": 5.945337734216394e-07, "loss": 0.0006, "step": 104980 }, { "epoch": 1.7179088603452508, "grad_norm": 0.04831431061029434, "learning_rate": 5.938586356143205e-07, "loss": 0.0006, "step": 104990 }, { "epoch": 1.7180724862963266, "grad_norm": 0.019998712465167046, "learning_rate": 5.931838571568998e-07, "loss": 0.0015, "step": 105000 }, { "epoch": 1.7182361122474026, "grad_norm": 0.016343412920832634, "learning_rate": 5.925094381044111e-07, "loss": 0.001, "step": 105010 }, { "epoch": 1.7183997381984781, "grad_norm": 0.11839848011732101, "learning_rate": 5.918353785118564e-07, "loss": 0.0013, "step": 105020 }, { "epoch": 1.7185633641495541, "grad_norm": 0.02918507345020771, "learning_rate": 5.911616784342112e-07, "loss": 0.0007, "step": 105030 }, { "epoch": 1.7187269901006301, "grad_norm": 0.03054187446832657, "learning_rate": 5.904883379264181e-07, "loss": 0.0007, "step": 105040 }, { "epoch": 1.7188906160517057, "grad_norm": 0.12949982285499573, "learning_rate": 5.898153570433945e-07, "loss": 0.001, "step": 105050 }, { "epoch": 1.7190542420027817, "grad_norm": 0.012905827723443508, "learning_rate": 5.891427358400248e-07, "loss": 0.0009, "step": 105060 }, { "epoch": 1.7192178679538574, "grad_norm": 0.05726579576730728, "learning_rate": 5.884704743711678e-07, "loss": 0.0012, "step": 105070 }, { "epoch": 1.7193814939049332, "grad_norm": 0.09188417345285416, "learning_rate": 5.877985726916485e-07, "loss": 0.0016, "step": 105080 }, { "epoch": 1.7195451198560092, "grad_norm": 0.06933452934026718, "learning_rate": 5.87127030856267e-07, "loss": 0.0009, "step": 105090 }, { "epoch": 1.719708745807085, "grad_norm": 0.02457580342888832, "learning_rate": 5.864558489197903e-07, "loss": 0.002, "step": 105100 }, { "epoch": 1.7198723717581608, "grad_norm": 0.04385444149374962, "learning_rate": 5.857850269369586e-07, "loss": 0.0015, "step": 105110 }, { "epoch": 1.7200359977092368, "grad_norm": 0.012341826222836971, "learning_rate": 5.851145649624812e-07, "loss": 0.0009, "step": 105120 }, { "epoch": 1.7201996236603125, "grad_norm": 0.0032532778568565845, "learning_rate": 5.844444630510404e-07, "loss": 0.0009, "step": 105130 }, { "epoch": 1.7203632496113883, "grad_norm": 0.1358296126127243, "learning_rate": 5.837747212572847e-07, "loss": 0.0016, "step": 105140 }, { "epoch": 1.7205268755624643, "grad_norm": 0.2498563677072525, "learning_rate": 5.831053396358383e-07, "loss": 0.0019, "step": 105150 }, { "epoch": 1.72069050151354, "grad_norm": 0.021418867632746696, "learning_rate": 5.824363182412923e-07, "loss": 0.0009, "step": 105160 }, { "epoch": 1.7208541274646159, "grad_norm": 0.009927124716341496, "learning_rate": 5.81767657128211e-07, "loss": 0.0008, "step": 105170 }, { "epoch": 1.7210177534156919, "grad_norm": 0.030217165127396584, "learning_rate": 5.810993563511263e-07, "loss": 0.001, "step": 105180 }, { "epoch": 1.7211813793667674, "grad_norm": 0.061702560633420944, "learning_rate": 5.804314159645452e-07, "loss": 0.0005, "step": 105190 }, { "epoch": 1.7213450053178434, "grad_norm": 0.03343638777732849, "learning_rate": 5.797638360229413e-07, "loss": 0.0013, "step": 105200 }, { "epoch": 1.7215086312689194, "grad_norm": 0.018403608351945877, "learning_rate": 5.790966165807582e-07, "loss": 0.0013, "step": 105210 }, { "epoch": 1.721672257219995, "grad_norm": 0.019773226231336594, "learning_rate": 5.784297576924152e-07, "loss": 0.0023, "step": 105220 }, { "epoch": 1.721835883171071, "grad_norm": 0.04786520078778267, "learning_rate": 5.777632594122967e-07, "loss": 0.0013, "step": 105230 }, { "epoch": 1.7219995091221467, "grad_norm": 0.018433310091495514, "learning_rate": 5.770971217947624e-07, "loss": 0.0013, "step": 105240 }, { "epoch": 1.7221631350732225, "grad_norm": 0.04635376110672951, "learning_rate": 5.764313448941372e-07, "loss": 0.0005, "step": 105250 }, { "epoch": 1.7223267610242985, "grad_norm": 0.030670208856463432, "learning_rate": 5.757659287647227e-07, "loss": 0.001, "step": 105260 }, { "epoch": 1.7224903869753743, "grad_norm": 0.06548034399747849, "learning_rate": 5.751008734607855e-07, "loss": 0.0011, "step": 105270 }, { "epoch": 1.72265401292645, "grad_norm": 0.057641465216875076, "learning_rate": 5.744361790365665e-07, "loss": 0.0006, "step": 105280 }, { "epoch": 1.722817638877526, "grad_norm": 0.021024368703365326, "learning_rate": 5.737718455462754e-07, "loss": 0.0008, "step": 105290 }, { "epoch": 1.7229812648286018, "grad_norm": 0.026323791593313217, "learning_rate": 5.731078730440942e-07, "loss": 0.0007, "step": 105300 }, { "epoch": 1.7231448907796776, "grad_norm": 0.03075254335999489, "learning_rate": 5.724442615841719e-07, "loss": 0.0006, "step": 105310 }, { "epoch": 1.7233085167307536, "grad_norm": 0.09230765700340271, "learning_rate": 5.717810112206335e-07, "loss": 0.0014, "step": 105320 }, { "epoch": 1.7234721426818294, "grad_norm": 0.013636085204780102, "learning_rate": 5.711181220075679e-07, "loss": 0.0011, "step": 105330 }, { "epoch": 1.7236357686329051, "grad_norm": 0.03024132549762726, "learning_rate": 5.704555939990414e-07, "loss": 0.0023, "step": 105340 }, { "epoch": 1.7237993945839811, "grad_norm": 0.034921981394290924, "learning_rate": 5.697934272490852e-07, "loss": 0.0013, "step": 105350 }, { "epoch": 1.723963020535057, "grad_norm": 0.058230914175510406, "learning_rate": 5.691316218117049e-07, "loss": 0.0007, "step": 105360 }, { "epoch": 1.7241266464861327, "grad_norm": 0.03640428185462952, "learning_rate": 5.68470177740873e-07, "loss": 0.0005, "step": 105370 }, { "epoch": 1.7242902724372087, "grad_norm": 0.0640496164560318, "learning_rate": 5.678090950905379e-07, "loss": 0.0012, "step": 105380 }, { "epoch": 1.7244538983882842, "grad_norm": 0.035792384296655655, "learning_rate": 5.671483739146116e-07, "loss": 0.0008, "step": 105390 }, { "epoch": 1.7246175243393602, "grad_norm": 0.005545841529965401, "learning_rate": 5.664880142669838e-07, "loss": 0.0008, "step": 105400 }, { "epoch": 1.7247811502904362, "grad_norm": 0.011500423774123192, "learning_rate": 5.658280162015078e-07, "loss": 0.001, "step": 105410 }, { "epoch": 1.7249447762415118, "grad_norm": 0.04240519180893898, "learning_rate": 5.651683797720136e-07, "loss": 0.0012, "step": 105420 }, { "epoch": 1.7251084021925878, "grad_norm": 0.06422310322523117, "learning_rate": 5.645091050322971e-07, "loss": 0.0005, "step": 105430 }, { "epoch": 1.7252720281436635, "grad_norm": 0.009497438557446003, "learning_rate": 5.638501920361278e-07, "loss": 0.0009, "step": 105440 }, { "epoch": 1.7254356540947393, "grad_norm": 0.12026920169591904, "learning_rate": 5.631916408372429e-07, "loss": 0.0007, "step": 105450 }, { "epoch": 1.7255992800458153, "grad_norm": 0.09134354442358017, "learning_rate": 5.625334514893532e-07, "loss": 0.0004, "step": 105460 }, { "epoch": 1.725762905996891, "grad_norm": 0.028013909235596657, "learning_rate": 5.618756240461371e-07, "loss": 0.0007, "step": 105470 }, { "epoch": 1.7259265319479669, "grad_norm": 0.09132256358861923, "learning_rate": 5.612181585612459e-07, "loss": 0.0013, "step": 105480 }, { "epoch": 1.7260901578990429, "grad_norm": 0.15687842667102814, "learning_rate": 5.605610550882989e-07, "loss": 0.0016, "step": 105490 }, { "epoch": 1.7262537838501186, "grad_norm": 0.02945963852107525, "learning_rate": 5.59904313680889e-07, "loss": 0.0006, "step": 105500 }, { "epoch": 1.7264174098011944, "grad_norm": 0.05370291694998741, "learning_rate": 5.592479343925766e-07, "loss": 0.0012, "step": 105510 }, { "epoch": 1.7265810357522704, "grad_norm": 0.09495176374912262, "learning_rate": 5.585919172768928e-07, "loss": 0.0007, "step": 105520 }, { "epoch": 1.7267446617033462, "grad_norm": 0.025100622326135635, "learning_rate": 5.579362623873425e-07, "loss": 0.001, "step": 105530 }, { "epoch": 1.726908287654422, "grad_norm": 0.018807893618941307, "learning_rate": 5.572809697773962e-07, "loss": 0.0007, "step": 105540 }, { "epoch": 1.727071913605498, "grad_norm": 0.0693151205778122, "learning_rate": 5.566260395004996e-07, "loss": 0.0012, "step": 105550 }, { "epoch": 1.7272355395565737, "grad_norm": 0.011685054749250412, "learning_rate": 5.559714716100645e-07, "loss": 0.0004, "step": 105560 }, { "epoch": 1.7273991655076495, "grad_norm": 0.17558389902114868, "learning_rate": 5.553172661594774e-07, "loss": 0.0012, "step": 105570 }, { "epoch": 1.7275627914587255, "grad_norm": 0.22072334587574005, "learning_rate": 5.546634232020914e-07, "loss": 0.0011, "step": 105580 }, { "epoch": 1.727726417409801, "grad_norm": 0.05899371579289436, "learning_rate": 5.540099427912326e-07, "loss": 0.0009, "step": 105590 }, { "epoch": 1.727890043360877, "grad_norm": 0.05088042467832565, "learning_rate": 5.533568249801957e-07, "loss": 0.0008, "step": 105600 }, { "epoch": 1.728053669311953, "grad_norm": 0.06190170720219612, "learning_rate": 5.52704069822248e-07, "loss": 0.0013, "step": 105610 }, { "epoch": 1.7282172952630286, "grad_norm": 0.0814126506447792, "learning_rate": 5.520516773706247e-07, "loss": 0.0012, "step": 105620 }, { "epoch": 1.7283809212141046, "grad_norm": 0.028053781017661095, "learning_rate": 5.513996476785344e-07, "loss": 0.0007, "step": 105630 }, { "epoch": 1.7285445471651804, "grad_norm": 0.025459809228777885, "learning_rate": 5.507479807991522e-07, "loss": 0.001, "step": 105640 }, { "epoch": 1.7287081731162561, "grad_norm": 0.04505978524684906, "learning_rate": 5.500966767856281e-07, "loss": 0.0006, "step": 105650 }, { "epoch": 1.7288717990673321, "grad_norm": 0.08860975503921509, "learning_rate": 5.494457356910782e-07, "loss": 0.0005, "step": 105660 }, { "epoch": 1.729035425018408, "grad_norm": 0.04567332938313484, "learning_rate": 5.487951575685929e-07, "loss": 0.0011, "step": 105670 }, { "epoch": 1.7291990509694837, "grad_norm": 0.07471456378698349, "learning_rate": 5.481449424712293e-07, "loss": 0.0012, "step": 105680 }, { "epoch": 1.7293626769205597, "grad_norm": 0.09998209029436111, "learning_rate": 5.474950904520187e-07, "loss": 0.0012, "step": 105690 }, { "epoch": 1.7295263028716354, "grad_norm": 0.05922834202647209, "learning_rate": 5.468456015639584e-07, "loss": 0.0006, "step": 105700 }, { "epoch": 1.7296899288227112, "grad_norm": 0.048251040279865265, "learning_rate": 5.461964758600213e-07, "loss": 0.0009, "step": 105710 }, { "epoch": 1.7298535547737872, "grad_norm": 0.021195555105805397, "learning_rate": 5.455477133931459e-07, "loss": 0.0009, "step": 105720 }, { "epoch": 1.730017180724863, "grad_norm": 0.05589756742119789, "learning_rate": 5.44899314216244e-07, "loss": 0.0008, "step": 105730 }, { "epoch": 1.7301808066759388, "grad_norm": 0.04453357309103012, "learning_rate": 5.442512783821957e-07, "loss": 0.0005, "step": 105740 }, { "epoch": 1.7303444326270148, "grad_norm": 0.0006272813188843429, "learning_rate": 5.436036059438543e-07, "loss": 0.0006, "step": 105750 }, { "epoch": 1.7305080585780905, "grad_norm": 0.011169848963618279, "learning_rate": 5.429562969540398e-07, "loss": 0.0012, "step": 105760 }, { "epoch": 1.7306716845291663, "grad_norm": 0.00949709489941597, "learning_rate": 5.423093514655464e-07, "loss": 0.0007, "step": 105770 }, { "epoch": 1.7308353104802423, "grad_norm": 0.06974199414253235, "learning_rate": 5.41662769531135e-07, "loss": 0.0009, "step": 105780 }, { "epoch": 1.7309989364313179, "grad_norm": 0.014135818928480148, "learning_rate": 5.41016551203541e-07, "loss": 0.0003, "step": 105790 }, { "epoch": 1.7311625623823939, "grad_norm": 0.013617103919386864, "learning_rate": 5.40370696535465e-07, "loss": 0.0006, "step": 105800 }, { "epoch": 1.7313261883334699, "grad_norm": 0.02486949786543846, "learning_rate": 5.397252055795827e-07, "loss": 0.0008, "step": 105810 }, { "epoch": 1.7314898142845454, "grad_norm": 0.053359244018793106, "learning_rate": 5.39080078388537e-07, "loss": 0.0009, "step": 105820 }, { "epoch": 1.7316534402356214, "grad_norm": 0.02739722840487957, "learning_rate": 5.384353150149429e-07, "loss": 0.0011, "step": 105830 }, { "epoch": 1.7318170661866972, "grad_norm": 0.075003482401371, "learning_rate": 5.377909155113858e-07, "loss": 0.0008, "step": 105840 }, { "epoch": 1.731980692137773, "grad_norm": 0.022716032341122627, "learning_rate": 5.371468799304186e-07, "loss": 0.0011, "step": 105850 }, { "epoch": 1.732144318088849, "grad_norm": 0.06845776736736298, "learning_rate": 5.365032083245691e-07, "loss": 0.0007, "step": 105860 }, { "epoch": 1.7323079440399247, "grad_norm": 0.030856722965836525, "learning_rate": 5.358599007463305e-07, "loss": 0.0007, "step": 105870 }, { "epoch": 1.7324715699910005, "grad_norm": 0.043110672384500504, "learning_rate": 5.352169572481713e-07, "loss": 0.0008, "step": 105880 }, { "epoch": 1.7326351959420765, "grad_norm": 0.024211639538407326, "learning_rate": 5.345743778825258e-07, "loss": 0.001, "step": 105890 }, { "epoch": 1.7327988218931523, "grad_norm": 0.01669558510184288, "learning_rate": 5.339321627018018e-07, "loss": 0.0011, "step": 105900 }, { "epoch": 1.732962447844228, "grad_norm": 0.04783722013235092, "learning_rate": 5.332903117583749e-07, "loss": 0.0006, "step": 105910 }, { "epoch": 1.733126073795304, "grad_norm": 0.1694510579109192, "learning_rate": 5.326488251045941e-07, "loss": 0.0006, "step": 105920 }, { "epoch": 1.7332896997463798, "grad_norm": 0.05942774564027786, "learning_rate": 5.32007702792775e-07, "loss": 0.001, "step": 105930 }, { "epoch": 1.7334533256974556, "grad_norm": 0.010538343340158463, "learning_rate": 5.313669448752073e-07, "loss": 0.0006, "step": 105940 }, { "epoch": 1.7336169516485316, "grad_norm": 0.08307705074548721, "learning_rate": 5.307265514041471e-07, "loss": 0.0006, "step": 105950 }, { "epoch": 1.7337805775996074, "grad_norm": 0.012879393063485622, "learning_rate": 5.300865224318241e-07, "loss": 0.0009, "step": 105960 }, { "epoch": 1.7339442035506831, "grad_norm": 0.05131203308701515, "learning_rate": 5.294468580104356e-07, "loss": 0.0011, "step": 105970 }, { "epoch": 1.7341078295017591, "grad_norm": 0.03458397835493088, "learning_rate": 5.288075581921526e-07, "loss": 0.0006, "step": 105980 }, { "epoch": 1.7342714554528347, "grad_norm": 0.06781768798828125, "learning_rate": 5.281686230291111e-07, "loss": 0.0005, "step": 105990 }, { "epoch": 1.7344350814039107, "grad_norm": 0.012050077319145203, "learning_rate": 5.275300525734239e-07, "loss": 0.0004, "step": 106000 }, { "epoch": 1.7345987073549864, "grad_norm": 0.021382614970207214, "learning_rate": 5.268918468771678e-07, "loss": 0.0009, "step": 106010 }, { "epoch": 1.7347623333060622, "grad_norm": 0.06707541644573212, "learning_rate": 5.262540059923949e-07, "loss": 0.001, "step": 106020 }, { "epoch": 1.7349259592571382, "grad_norm": 0.002782185561954975, "learning_rate": 5.25616529971123e-07, "loss": 0.0011, "step": 106030 }, { "epoch": 1.735089585208214, "grad_norm": 0.05399351194500923, "learning_rate": 5.249794188653446e-07, "loss": 0.0005, "step": 106040 }, { "epoch": 1.7352532111592898, "grad_norm": 0.03972519561648369, "learning_rate": 5.24342672727019e-07, "loss": 0.0013, "step": 106050 }, { "epoch": 1.7354168371103658, "grad_norm": 0.008709500543773174, "learning_rate": 5.237062916080782e-07, "loss": 0.0005, "step": 106060 }, { "epoch": 1.7355804630614415, "grad_norm": 0.0478438064455986, "learning_rate": 5.230702755604216e-07, "loss": 0.0007, "step": 106070 }, { "epoch": 1.7357440890125173, "grad_norm": 0.028151944279670715, "learning_rate": 5.224346246359225e-07, "loss": 0.0005, "step": 106080 }, { "epoch": 1.7359077149635933, "grad_norm": 0.050489380955696106, "learning_rate": 5.2179933888642e-07, "loss": 0.0007, "step": 106090 }, { "epoch": 1.736071340914669, "grad_norm": 0.028123734518885612, "learning_rate": 5.211644183637287e-07, "loss": 0.0003, "step": 106100 }, { "epoch": 1.7362349668657449, "grad_norm": 0.00917387381196022, "learning_rate": 5.205298631196287e-07, "loss": 0.0006, "step": 106110 }, { "epoch": 1.7363985928168209, "grad_norm": 0.04121044650673866, "learning_rate": 5.19895673205873e-07, "loss": 0.0008, "step": 106120 }, { "epoch": 1.7365622187678966, "grad_norm": 0.014919224195182323, "learning_rate": 5.192618486741824e-07, "loss": 0.0019, "step": 106130 }, { "epoch": 1.7367258447189724, "grad_norm": 0.04376554116606712, "learning_rate": 5.186283895762517e-07, "loss": 0.0008, "step": 106140 }, { "epoch": 1.7368894706700484, "grad_norm": 0.0658923089504242, "learning_rate": 5.17995295963743e-07, "loss": 0.0008, "step": 106150 }, { "epoch": 1.737053096621124, "grad_norm": 0.009980090893805027, "learning_rate": 5.173625678882871e-07, "loss": 0.0012, "step": 106160 }, { "epoch": 1.7372167225722, "grad_norm": 0.003842758946120739, "learning_rate": 5.167302054014906e-07, "loss": 0.0009, "step": 106170 }, { "epoch": 1.737380348523276, "grad_norm": 0.04095356911420822, "learning_rate": 5.160982085549238e-07, "loss": 0.0006, "step": 106180 }, { "epoch": 1.7375439744743515, "grad_norm": 0.14131717383861542, "learning_rate": 5.154665774001327e-07, "loss": 0.001, "step": 106190 }, { "epoch": 1.7377076004254275, "grad_norm": 0.01662352867424488, "learning_rate": 5.14835311988629e-07, "loss": 0.0009, "step": 106200 }, { "epoch": 1.7378712263765033, "grad_norm": 0.05708159878849983, "learning_rate": 5.142044123718981e-07, "loss": 0.0003, "step": 106210 }, { "epoch": 1.738034852327579, "grad_norm": 0.12500154972076416, "learning_rate": 5.135738786013927e-07, "loss": 0.0017, "step": 106220 }, { "epoch": 1.738198478278655, "grad_norm": 0.02598184160888195, "learning_rate": 5.129437107285385e-07, "loss": 0.0006, "step": 106230 }, { "epoch": 1.7383621042297308, "grad_norm": 0.14675581455230713, "learning_rate": 5.12313908804728e-07, "loss": 0.002, "step": 106240 }, { "epoch": 1.7385257301808066, "grad_norm": 0.19847004115581512, "learning_rate": 5.116844728813275e-07, "loss": 0.001, "step": 106250 }, { "epoch": 1.7386893561318826, "grad_norm": 0.04709034785628319, "learning_rate": 5.110554030096698e-07, "loss": 0.001, "step": 106260 }, { "epoch": 1.7388529820829584, "grad_norm": 0.07444377988576889, "learning_rate": 5.104266992410616e-07, "loss": 0.0009, "step": 106270 }, { "epoch": 1.7390166080340341, "grad_norm": 0.02570188045501709, "learning_rate": 5.09798361626776e-07, "loss": 0.0008, "step": 106280 }, { "epoch": 1.7391802339851101, "grad_norm": 0.10372823476791382, "learning_rate": 5.091703902180606e-07, "loss": 0.0007, "step": 106290 }, { "epoch": 1.739343859936186, "grad_norm": 0.11849724501371384, "learning_rate": 5.085427850661273e-07, "loss": 0.0014, "step": 106300 }, { "epoch": 1.7395074858872617, "grad_norm": 0.030570365488529205, "learning_rate": 5.079155462221647e-07, "loss": 0.0005, "step": 106310 }, { "epoch": 1.7396711118383377, "grad_norm": 0.014777791686356068, "learning_rate": 5.072886737373255e-07, "loss": 0.0006, "step": 106320 }, { "epoch": 1.7398347377894134, "grad_norm": 0.03123929537832737, "learning_rate": 5.066621676627376e-07, "loss": 0.0008, "step": 106330 }, { "epoch": 1.7399983637404892, "grad_norm": 0.024036722257733345, "learning_rate": 5.060360280494947e-07, "loss": 0.0008, "step": 106340 }, { "epoch": 1.7401619896915652, "grad_norm": 0.03935510665178299, "learning_rate": 5.054102549486645e-07, "loss": 0.0009, "step": 106350 }, { "epoch": 1.7403256156426408, "grad_norm": 0.03374578058719635, "learning_rate": 5.047848484112811e-07, "loss": 0.0006, "step": 106360 }, { "epoch": 1.7404892415937168, "grad_norm": 0.07095539569854736, "learning_rate": 5.041598084883526e-07, "loss": 0.0002, "step": 106370 }, { "epoch": 1.7406528675447928, "grad_norm": 0.031577467918395996, "learning_rate": 5.035351352308526e-07, "loss": 0.0009, "step": 106380 }, { "epoch": 1.7408164934958683, "grad_norm": 0.06595504283905029, "learning_rate": 5.029108286897299e-07, "loss": 0.0012, "step": 106390 }, { "epoch": 1.7409801194469443, "grad_norm": 0.020363593474030495, "learning_rate": 5.022868889158982e-07, "loss": 0.0006, "step": 106400 }, { "epoch": 1.74114374539802, "grad_norm": 0.0649137869477272, "learning_rate": 5.016633159602469e-07, "loss": 0.0011, "step": 106410 }, { "epoch": 1.7413073713490959, "grad_norm": 0.03311983123421669, "learning_rate": 5.010401098736295e-07, "loss": 0.0013, "step": 106420 }, { "epoch": 1.7414709973001719, "grad_norm": 0.022958895191550255, "learning_rate": 5.004172707068749e-07, "loss": 0.001, "step": 106430 }, { "epoch": 1.7416346232512476, "grad_norm": 0.06251645088195801, "learning_rate": 4.997947985107782e-07, "loss": 0.0011, "step": 106440 }, { "epoch": 1.7417982492023234, "grad_norm": 0.018483279272913933, "learning_rate": 4.991726933361074e-07, "loss": 0.0013, "step": 106450 }, { "epoch": 1.7419618751533994, "grad_norm": 0.11677878350019455, "learning_rate": 4.985509552335977e-07, "loss": 0.0011, "step": 106460 }, { "epoch": 1.7421255011044752, "grad_norm": 0.058015260845422745, "learning_rate": 4.979295842539577e-07, "loss": 0.0011, "step": 106470 }, { "epoch": 1.742289127055551, "grad_norm": 0.09229929745197296, "learning_rate": 4.973085804478634e-07, "loss": 0.0009, "step": 106480 }, { "epoch": 1.742452753006627, "grad_norm": 0.018100852146744728, "learning_rate": 4.966879438659611e-07, "loss": 0.0007, "step": 106490 }, { "epoch": 1.7426163789577027, "grad_norm": 0.11842692643404007, "learning_rate": 4.960676745588694e-07, "loss": 0.0014, "step": 106500 }, { "epoch": 1.7427800049087785, "grad_norm": 0.03797077387571335, "learning_rate": 4.954477725771734e-07, "loss": 0.0005, "step": 106510 }, { "epoch": 1.7429436308598545, "grad_norm": 0.009182128123939037, "learning_rate": 4.94828237971432e-07, "loss": 0.0005, "step": 106520 }, { "epoch": 1.7431072568109303, "grad_norm": 0.008334016427397728, "learning_rate": 4.94209070792171e-07, "loss": 0.0009, "step": 106530 }, { "epoch": 1.743270882762006, "grad_norm": 0.036363158375024796, "learning_rate": 4.935902710898888e-07, "loss": 0.0011, "step": 106540 }, { "epoch": 1.743434508713082, "grad_norm": 0.023352665826678276, "learning_rate": 4.929718389150512e-07, "loss": 0.0006, "step": 106550 }, { "epoch": 1.7435981346641576, "grad_norm": 0.028563102707266808, "learning_rate": 4.923537743180973e-07, "loss": 0.0007, "step": 106560 }, { "epoch": 1.7437617606152336, "grad_norm": 0.03631766512989998, "learning_rate": 4.917360773494318e-07, "loss": 0.0008, "step": 106570 }, { "epoch": 1.7439253865663096, "grad_norm": 0.22517696022987366, "learning_rate": 4.911187480594348e-07, "loss": 0.0015, "step": 106580 }, { "epoch": 1.7440890125173851, "grad_norm": 0.011045229621231556, "learning_rate": 4.905017864984513e-07, "loss": 0.0034, "step": 106590 }, { "epoch": 1.7442526384684611, "grad_norm": 0.009721971116960049, "learning_rate": 4.898851927167997e-07, "loss": 0.0005, "step": 106600 }, { "epoch": 1.744416264419537, "grad_norm": 0.06691710650920868, "learning_rate": 4.892689667647666e-07, "loss": 0.001, "step": 106610 }, { "epoch": 1.7445798903706127, "grad_norm": 0.005570719949901104, "learning_rate": 4.88653108692611e-07, "loss": 0.0008, "step": 106620 }, { "epoch": 1.7447435163216887, "grad_norm": 0.009508315473794937, "learning_rate": 4.880376185505576e-07, "loss": 0.0007, "step": 106630 }, { "epoch": 1.7449071422727644, "grad_norm": 0.06472483277320862, "learning_rate": 4.874224963888058e-07, "loss": 0.0007, "step": 106640 }, { "epoch": 1.7450707682238402, "grad_norm": 0.048835352063179016, "learning_rate": 4.868077422575213e-07, "loss": 0.0014, "step": 106650 }, { "epoch": 1.7452343941749162, "grad_norm": 0.04501650854945183, "learning_rate": 4.861933562068433e-07, "loss": 0.001, "step": 106660 }, { "epoch": 1.745398020125992, "grad_norm": 0.01128881610929966, "learning_rate": 4.855793382868768e-07, "loss": 0.0014, "step": 106670 }, { "epoch": 1.7455616460770678, "grad_norm": 0.03785865753889084, "learning_rate": 4.849656885477006e-07, "loss": 0.0012, "step": 106680 }, { "epoch": 1.7457252720281438, "grad_norm": 0.08019950985908508, "learning_rate": 4.843524070393607e-07, "loss": 0.0011, "step": 106690 }, { "epoch": 1.7458888979792195, "grad_norm": 0.02137044258415699, "learning_rate": 4.837394938118761e-07, "loss": 0.0004, "step": 106700 }, { "epoch": 1.7460525239302953, "grad_norm": 0.0762724056839943, "learning_rate": 4.831269489152318e-07, "loss": 0.001, "step": 106710 }, { "epoch": 1.7462161498813713, "grad_norm": 0.038665663450956345, "learning_rate": 4.825147723993867e-07, "loss": 0.0007, "step": 106720 }, { "epoch": 1.746379775832447, "grad_norm": 0.03914206847548485, "learning_rate": 4.819029643142659e-07, "loss": 0.001, "step": 106730 }, { "epoch": 1.7465434017835229, "grad_norm": 0.03710709884762764, "learning_rate": 4.812915247097683e-07, "loss": 0.002, "step": 106740 }, { "epoch": 1.7467070277345988, "grad_norm": 0.054355598986148834, "learning_rate": 4.80680453635759e-07, "loss": 0.0009, "step": 106750 }, { "epoch": 1.7468706536856744, "grad_norm": 0.09634795039892197, "learning_rate": 4.800697511420771e-07, "loss": 0.0007, "step": 106760 }, { "epoch": 1.7470342796367504, "grad_norm": 0.035127975046634674, "learning_rate": 4.794594172785267e-07, "loss": 0.0007, "step": 106770 }, { "epoch": 1.7471979055878264, "grad_norm": 0.03766074404120445, "learning_rate": 4.788494520948872e-07, "loss": 0.001, "step": 106780 }, { "epoch": 1.747361531538902, "grad_norm": 0.005453648045659065, "learning_rate": 4.782398556409034e-07, "loss": 0.001, "step": 106790 }, { "epoch": 1.747525157489978, "grad_norm": 0.050250910222530365, "learning_rate": 4.776306279662934e-07, "loss": 0.0009, "step": 106800 }, { "epoch": 1.7476887834410537, "grad_norm": 0.020884646102786064, "learning_rate": 4.77021769120743e-07, "loss": 0.0006, "step": 106810 }, { "epoch": 1.7478524093921295, "grad_norm": 0.02867044508457184, "learning_rate": 4.764132791539078e-07, "loss": 0.0006, "step": 106820 }, { "epoch": 1.7480160353432055, "grad_norm": 0.021044151857495308, "learning_rate": 4.758051581154155e-07, "loss": 0.0006, "step": 106830 }, { "epoch": 1.7481796612942813, "grad_norm": 0.0913689136505127, "learning_rate": 4.751974060548614e-07, "loss": 0.0015, "step": 106840 }, { "epoch": 1.748343287245357, "grad_norm": 0.051156170666217804, "learning_rate": 4.745900230218126e-07, "loss": 0.0011, "step": 106850 }, { "epoch": 1.748506913196433, "grad_norm": 0.06647111475467682, "learning_rate": 4.739830090658043e-07, "loss": 0.0015, "step": 106860 }, { "epoch": 1.7486705391475088, "grad_norm": 0.03345661237835884, "learning_rate": 4.733763642363437e-07, "loss": 0.0008, "step": 106870 }, { "epoch": 1.7488341650985846, "grad_norm": 0.017066478729248047, "learning_rate": 4.7277008858290506e-07, "loss": 0.001, "step": 106880 }, { "epoch": 1.7489977910496606, "grad_norm": 0.015604664571583271, "learning_rate": 4.7216418215493555e-07, "loss": 0.0005, "step": 106890 }, { "epoch": 1.7491614170007364, "grad_norm": 0.026232518255710602, "learning_rate": 4.7155864500185e-07, "loss": 0.0002, "step": 106900 }, { "epoch": 1.7493250429518121, "grad_norm": 0.03570883348584175, "learning_rate": 4.709534771730345e-07, "loss": 0.0013, "step": 106910 }, { "epoch": 1.7494886689028881, "grad_norm": 0.037644702941179276, "learning_rate": 4.7034867871784406e-07, "loss": 0.0005, "step": 106920 }, { "epoch": 1.7496522948539637, "grad_norm": 0.11534906923770905, "learning_rate": 4.6974424968560475e-07, "loss": 0.0009, "step": 106930 }, { "epoch": 1.7498159208050397, "grad_norm": 0.06984814256429672, "learning_rate": 4.691401901256104e-07, "loss": 0.001, "step": 106940 }, { "epoch": 1.7499795467561157, "grad_norm": 0.014700477011501789, "learning_rate": 4.6853650008712723e-07, "loss": 0.0011, "step": 106950 }, { "epoch": 1.7501431727071912, "grad_norm": 0.04776924103498459, "learning_rate": 4.679331796193892e-07, "loss": 0.001, "step": 106960 }, { "epoch": 1.7503067986582672, "grad_norm": 0.06454283744096756, "learning_rate": 4.673302287716025e-07, "loss": 0.0009, "step": 106970 }, { "epoch": 1.750470424609343, "grad_norm": 0.03165587782859802, "learning_rate": 4.6672764759294e-07, "loss": 0.0009, "step": 106980 }, { "epoch": 1.7506340505604188, "grad_norm": 0.025406716391444206, "learning_rate": 4.661254361325479e-07, "loss": 0.0003, "step": 106990 }, { "epoch": 1.7507976765114948, "grad_norm": 0.02245040237903595, "learning_rate": 4.6552359443953923e-07, "loss": 0.001, "step": 107000 }, { "epoch": 1.7509613024625705, "grad_norm": 0.003539147786796093, "learning_rate": 4.6492212256299906e-07, "loss": 0.0009, "step": 107010 }, { "epoch": 1.7511249284136463, "grad_norm": 0.0016249327454715967, "learning_rate": 4.6432102055197994e-07, "loss": 0.0007, "step": 107020 }, { "epoch": 1.7512885543647223, "grad_norm": 0.05109637230634689, "learning_rate": 4.6372028845550754e-07, "loss": 0.0007, "step": 107030 }, { "epoch": 1.751452180315798, "grad_norm": 0.02515731193125248, "learning_rate": 4.6311992632257384e-07, "loss": 0.0009, "step": 107040 }, { "epoch": 1.7516158062668739, "grad_norm": 0.0056059337221086025, "learning_rate": 4.6251993420214416e-07, "loss": 0.0004, "step": 107050 }, { "epoch": 1.7517794322179499, "grad_norm": 0.001545050647109747, "learning_rate": 4.619203121431498e-07, "loss": 0.0014, "step": 107060 }, { "epoch": 1.7519430581690256, "grad_norm": 0.017759894952178, "learning_rate": 4.6132106019449564e-07, "loss": 0.0007, "step": 107070 }, { "epoch": 1.7521066841201014, "grad_norm": 0.03263549506664276, "learning_rate": 4.607221784050531e-07, "loss": 0.0011, "step": 107080 }, { "epoch": 1.7522703100711774, "grad_norm": 0.12587085366249084, "learning_rate": 4.6012366682366636e-07, "loss": 0.0009, "step": 107090 }, { "epoch": 1.7524339360222532, "grad_norm": 0.0250666756182909, "learning_rate": 4.595255254991471e-07, "loss": 0.0008, "step": 107100 }, { "epoch": 1.752597561973329, "grad_norm": 0.020386964082717896, "learning_rate": 4.589277544802784e-07, "loss": 0.0006, "step": 107110 }, { "epoch": 1.752761187924405, "grad_norm": 0.041317541152238846, "learning_rate": 4.583303538158124e-07, "loss": 0.0011, "step": 107120 }, { "epoch": 1.7529248138754805, "grad_norm": 0.022674575448036194, "learning_rate": 4.577333235544695e-07, "loss": 0.0012, "step": 107130 }, { "epoch": 1.7530884398265565, "grad_norm": 0.05014091730117798, "learning_rate": 4.5713666374494313e-07, "loss": 0.0033, "step": 107140 }, { "epoch": 1.7532520657776325, "grad_norm": 0.024792658165097237, "learning_rate": 4.565403744358937e-07, "loss": 0.0011, "step": 107150 }, { "epoch": 1.753415691728708, "grad_norm": 0.04676548019051552, "learning_rate": 4.55944455675954e-07, "loss": 0.0008, "step": 107160 }, { "epoch": 1.753579317679784, "grad_norm": 0.1387605369091034, "learning_rate": 4.5534890751372354e-07, "loss": 0.0009, "step": 107170 }, { "epoch": 1.7537429436308598, "grad_norm": 0.05712341144680977, "learning_rate": 4.54753729997775e-07, "loss": 0.0007, "step": 107180 }, { "epoch": 1.7539065695819356, "grad_norm": 0.023189526051282883, "learning_rate": 4.541589231766469e-07, "loss": 0.0014, "step": 107190 }, { "epoch": 1.7540701955330116, "grad_norm": 0.0023703663609921932, "learning_rate": 4.5356448709885146e-07, "loss": 0.0004, "step": 107200 }, { "epoch": 1.7542338214840874, "grad_norm": 0.021885264664888382, "learning_rate": 4.529704218128672e-07, "loss": 0.0016, "step": 107210 }, { "epoch": 1.7543974474351631, "grad_norm": 0.11768580973148346, "learning_rate": 4.5237672736714645e-07, "loss": 0.0016, "step": 107220 }, { "epoch": 1.7545610733862391, "grad_norm": 0.05352596938610077, "learning_rate": 4.51783403810106e-07, "loss": 0.0012, "step": 107230 }, { "epoch": 1.754724699337315, "grad_norm": 0.05097443610429764, "learning_rate": 4.511904511901377e-07, "loss": 0.001, "step": 107240 }, { "epoch": 1.7548883252883907, "grad_norm": 0.0020308836828917265, "learning_rate": 4.5059786955559906e-07, "loss": 0.0006, "step": 107250 }, { "epoch": 1.7550519512394667, "grad_norm": 0.031343474984169006, "learning_rate": 4.500056589548207e-07, "loss": 0.0016, "step": 107260 }, { "epoch": 1.7552155771905424, "grad_norm": 0.014045115560293198, "learning_rate": 4.4941381943609977e-07, "loss": 0.0007, "step": 107270 }, { "epoch": 1.7553792031416182, "grad_norm": 0.07368356734514236, "learning_rate": 4.4882235104770577e-07, "loss": 0.0011, "step": 107280 }, { "epoch": 1.7555428290926942, "grad_norm": 0.05008498206734657, "learning_rate": 4.482312538378758e-07, "loss": 0.001, "step": 107290 }, { "epoch": 1.75570645504377, "grad_norm": 0.07638438791036606, "learning_rate": 4.4764052785481904e-07, "loss": 0.0006, "step": 107300 }, { "epoch": 1.7558700809948458, "grad_norm": 0.03753861412405968, "learning_rate": 4.470501731467114e-07, "loss": 0.0009, "step": 107310 }, { "epoch": 1.7560337069459218, "grad_norm": 0.15613305568695068, "learning_rate": 4.4646018976170215e-07, "loss": 0.0014, "step": 107320 }, { "epoch": 1.7561973328969973, "grad_norm": 0.024668697267770767, "learning_rate": 4.458705777479061e-07, "loss": 0.0007, "step": 107330 }, { "epoch": 1.7563609588480733, "grad_norm": 0.10352221876382828, "learning_rate": 4.452813371534126e-07, "loss": 0.001, "step": 107340 }, { "epoch": 1.7565245847991493, "grad_norm": 0.023088568821549416, "learning_rate": 4.4469246802627544e-07, "loss": 0.0004, "step": 107350 }, { "epoch": 1.7566882107502249, "grad_norm": 0.036313802003860474, "learning_rate": 4.441039704145234e-07, "loss": 0.0006, "step": 107360 }, { "epoch": 1.7568518367013009, "grad_norm": 0.008619832806289196, "learning_rate": 4.4351584436615027e-07, "loss": 0.0005, "step": 107370 }, { "epoch": 1.7570154626523766, "grad_norm": 0.021129556000232697, "learning_rate": 4.4292808992912273e-07, "loss": 0.0014, "step": 107380 }, { "epoch": 1.7571790886034524, "grad_norm": 0.012276737950742245, "learning_rate": 4.423407071513752e-07, "loss": 0.0006, "step": 107390 }, { "epoch": 1.7573427145545284, "grad_norm": 0.10430112481117249, "learning_rate": 4.417536960808139e-07, "loss": 0.0006, "step": 107400 }, { "epoch": 1.7575063405056042, "grad_norm": 0.18361087143421173, "learning_rate": 4.4116705676531146e-07, "loss": 0.001, "step": 107410 }, { "epoch": 1.75766996645668, "grad_norm": 0.004871009849011898, "learning_rate": 4.4058078925271476e-07, "loss": 0.0007, "step": 107420 }, { "epoch": 1.757833592407756, "grad_norm": 0.020443012937903404, "learning_rate": 4.3999489359083494e-07, "loss": 0.0004, "step": 107430 }, { "epoch": 1.7579972183588317, "grad_norm": 0.2269364893436432, "learning_rate": 4.394093698274582e-07, "loss": 0.0013, "step": 107440 }, { "epoch": 1.7581608443099075, "grad_norm": 0.009524994529783726, "learning_rate": 4.3882421801033693e-07, "loss": 0.0007, "step": 107450 }, { "epoch": 1.7583244702609835, "grad_norm": 0.07494428753852844, "learning_rate": 4.382394381871935e-07, "loss": 0.0012, "step": 107460 }, { "epoch": 1.7584880962120593, "grad_norm": 0.03661913424730301, "learning_rate": 4.3765503040572146e-07, "loss": 0.0003, "step": 107470 }, { "epoch": 1.758651722163135, "grad_norm": 0.08206179738044739, "learning_rate": 4.370709947135815e-07, "loss": 0.0018, "step": 107480 }, { "epoch": 1.758815348114211, "grad_norm": 0.001558939111419022, "learning_rate": 4.3648733115840836e-07, "loss": 0.0007, "step": 107490 }, { "epoch": 1.7589789740652868, "grad_norm": 0.08039776980876923, "learning_rate": 4.359040397878006e-07, "loss": 0.0015, "step": 107500 }, { "epoch": 1.7591426000163626, "grad_norm": 0.03547074645757675, "learning_rate": 4.353211206493324e-07, "loss": 0.0005, "step": 107510 }, { "epoch": 1.7593062259674386, "grad_norm": 0.1029684916138649, "learning_rate": 4.3473857379054183e-07, "loss": 0.001, "step": 107520 }, { "epoch": 1.7594698519185141, "grad_norm": 0.00187123310752213, "learning_rate": 4.3415639925894203e-07, "loss": 0.0009, "step": 107530 }, { "epoch": 1.7596334778695901, "grad_norm": 0.11042116582393646, "learning_rate": 4.335745971020111e-07, "loss": 0.001, "step": 107540 }, { "epoch": 1.7597971038206661, "grad_norm": 0.0460456945002079, "learning_rate": 4.3299316736719997e-07, "loss": 0.0012, "step": 107550 }, { "epoch": 1.7599607297717417, "grad_norm": 0.03791511431336403, "learning_rate": 4.324121101019274e-07, "loss": 0.001, "step": 107560 }, { "epoch": 1.7601243557228177, "grad_norm": 0.029190704226493835, "learning_rate": 4.318314253535838e-07, "loss": 0.0008, "step": 107570 }, { "epoch": 1.7602879816738934, "grad_norm": 0.0013233766658231616, "learning_rate": 4.312511131695263e-07, "loss": 0.0009, "step": 107580 }, { "epoch": 1.7604516076249692, "grad_norm": 0.058730173856019974, "learning_rate": 4.306711735970842e-07, "loss": 0.0007, "step": 107590 }, { "epoch": 1.7606152335760452, "grad_norm": 0.1165233626961708, "learning_rate": 4.300916066835542e-07, "loss": 0.0013, "step": 107600 }, { "epoch": 1.760778859527121, "grad_norm": 0.05455328896641731, "learning_rate": 4.295124124762057e-07, "loss": 0.0007, "step": 107610 }, { "epoch": 1.7609424854781968, "grad_norm": 0.03936642408370972, "learning_rate": 4.289335910222736e-07, "loss": 0.0008, "step": 107620 }, { "epoch": 1.7611061114292728, "grad_norm": 0.0059580253437161446, "learning_rate": 4.2835514236896635e-07, "loss": 0.0007, "step": 107630 }, { "epoch": 1.7612697373803485, "grad_norm": 0.09812729805707932, "learning_rate": 4.2777706656345895e-07, "loss": 0.0009, "step": 107640 }, { "epoch": 1.7614333633314243, "grad_norm": 0.17266543209552765, "learning_rate": 4.271993636528993e-07, "loss": 0.0018, "step": 107650 }, { "epoch": 1.7615969892825003, "grad_norm": 0.06077754870057106, "learning_rate": 4.2662203368440014e-07, "loss": 0.0011, "step": 107660 }, { "epoch": 1.761760615233576, "grad_norm": 0.1097792237997055, "learning_rate": 4.260450767050489e-07, "loss": 0.0011, "step": 107670 }, { "epoch": 1.7619242411846519, "grad_norm": 0.029920652508735657, "learning_rate": 4.254684927618985e-07, "loss": 0.0013, "step": 107680 }, { "epoch": 1.7620878671357278, "grad_norm": 0.059843726456165314, "learning_rate": 4.2489228190197463e-07, "loss": 0.0005, "step": 107690 }, { "epoch": 1.7622514930868036, "grad_norm": 0.03530824929475784, "learning_rate": 4.2431644417226913e-07, "loss": 0.0008, "step": 107700 }, { "epoch": 1.7624151190378794, "grad_norm": 0.03113321028649807, "learning_rate": 4.2374097961974834e-07, "loss": 0.0009, "step": 107710 }, { "epoch": 1.7625787449889554, "grad_norm": 0.20169009268283844, "learning_rate": 4.2316588829134197e-07, "loss": 0.0008, "step": 107720 }, { "epoch": 1.762742370940031, "grad_norm": 0.049306392669677734, "learning_rate": 4.2259117023395525e-07, "loss": 0.0012, "step": 107730 }, { "epoch": 1.762905996891107, "grad_norm": 0.005846387706696987, "learning_rate": 4.220168254944579e-07, "loss": 0.0011, "step": 107740 }, { "epoch": 1.7630696228421827, "grad_norm": 0.02031685598194599, "learning_rate": 4.2144285411969356e-07, "loss": 0.0008, "step": 107750 }, { "epoch": 1.7632332487932585, "grad_norm": 0.061661701649427414, "learning_rate": 4.2086925615647143e-07, "loss": 0.0005, "step": 107760 }, { "epoch": 1.7633968747443345, "grad_norm": 0.015344727784395218, "learning_rate": 4.202960316515747e-07, "loss": 0.0008, "step": 107770 }, { "epoch": 1.7635605006954103, "grad_norm": 0.042269229888916016, "learning_rate": 4.1972318065175156e-07, "loss": 0.0007, "step": 107780 }, { "epoch": 1.763724126646486, "grad_norm": 0.041248686611652374, "learning_rate": 4.1915070320372175e-07, "loss": 0.0011, "step": 107790 }, { "epoch": 1.763887752597562, "grad_norm": 0.022548416629433632, "learning_rate": 4.185785993541758e-07, "loss": 0.0005, "step": 107800 }, { "epoch": 1.7640513785486378, "grad_norm": 0.04156475141644478, "learning_rate": 4.180068691497713e-07, "loss": 0.0008, "step": 107810 }, { "epoch": 1.7642150044997136, "grad_norm": 0.023491984233260155, "learning_rate": 4.174355126371382e-07, "loss": 0.001, "step": 107820 }, { "epoch": 1.7643786304507896, "grad_norm": 0.04882371053099632, "learning_rate": 4.1686452986287205e-07, "loss": 0.0006, "step": 107830 }, { "epoch": 1.7645422564018653, "grad_norm": 0.0026152748614549637, "learning_rate": 4.162939208735428e-07, "loss": 0.0023, "step": 107840 }, { "epoch": 1.7647058823529411, "grad_norm": 0.12709873914718628, "learning_rate": 4.15723685715686e-07, "loss": 0.0022, "step": 107850 }, { "epoch": 1.7648695083040171, "grad_norm": 0.042144399136304855, "learning_rate": 4.151538244358083e-07, "loss": 0.0021, "step": 107860 }, { "epoch": 1.765033134255093, "grad_norm": 0.0195899847894907, "learning_rate": 4.145843370803854e-07, "loss": 0.0004, "step": 107870 }, { "epoch": 1.7651967602061687, "grad_norm": 0.03930019959807396, "learning_rate": 4.140152236958639e-07, "loss": 0.0007, "step": 107880 }, { "epoch": 1.7653603861572447, "grad_norm": 0.05209214612841606, "learning_rate": 4.1344648432865677e-07, "loss": 0.0009, "step": 107890 }, { "epoch": 1.7655240121083202, "grad_norm": 0.08244826644659042, "learning_rate": 4.128781190251502e-07, "loss": 0.0009, "step": 107900 }, { "epoch": 1.7656876380593962, "grad_norm": 0.07820073515176773, "learning_rate": 4.123101278316971e-07, "loss": 0.001, "step": 107910 }, { "epoch": 1.7658512640104722, "grad_norm": 0.06501936167478561, "learning_rate": 4.1174251079462215e-07, "loss": 0.002, "step": 107920 }, { "epoch": 1.7660148899615478, "grad_norm": 0.051150836050510406, "learning_rate": 4.1117526796021655e-07, "loss": 0.001, "step": 107930 }, { "epoch": 1.7661785159126238, "grad_norm": 0.07909323275089264, "learning_rate": 4.1060839937474396e-07, "loss": 0.0012, "step": 107940 }, { "epoch": 1.7663421418636995, "grad_norm": 0.0015398211544379592, "learning_rate": 4.1004190508443564e-07, "loss": 0.0011, "step": 107950 }, { "epoch": 1.7665057678147753, "grad_norm": 0.012376747094094753, "learning_rate": 4.0947578513549356e-07, "loss": 0.0007, "step": 107960 }, { "epoch": 1.7666693937658513, "grad_norm": 0.002680887235328555, "learning_rate": 4.089100395740875e-07, "loss": 0.0033, "step": 107970 }, { "epoch": 1.766833019716927, "grad_norm": 0.1572302281856537, "learning_rate": 4.083446684463593e-07, "loss": 0.0013, "step": 107980 }, { "epoch": 1.7669966456680029, "grad_norm": 0.02062288112938404, "learning_rate": 4.077796717984167e-07, "loss": 0.0008, "step": 107990 }, { "epoch": 1.7671602716190788, "grad_norm": 0.028081782162189484, "learning_rate": 4.072150496763411e-07, "loss": 0.0005, "step": 108000 }, { "epoch": 1.7673238975701546, "grad_norm": 0.05993391573429108, "learning_rate": 4.066508021261789e-07, "loss": 0.0016, "step": 108010 }, { "epoch": 1.7674875235212304, "grad_norm": 0.07842147350311279, "learning_rate": 4.060869291939501e-07, "loss": 0.0007, "step": 108020 }, { "epoch": 1.7676511494723064, "grad_norm": 0.0626661628484726, "learning_rate": 4.0552343092564115e-07, "loss": 0.0012, "step": 108030 }, { "epoch": 1.7678147754233822, "grad_norm": 0.06272612512111664, "learning_rate": 4.049603073672098e-07, "loss": 0.0012, "step": 108040 }, { "epoch": 1.767978401374458, "grad_norm": 0.03335786983370781, "learning_rate": 4.0439755856458153e-07, "loss": 0.0007, "step": 108050 }, { "epoch": 1.768142027325534, "grad_norm": 0.004727168940007687, "learning_rate": 4.0383518456365346e-07, "loss": 0.0006, "step": 108060 }, { "epoch": 1.7683056532766097, "grad_norm": 0.035718612372875214, "learning_rate": 4.0327318541028947e-07, "loss": 0.0008, "step": 108070 }, { "epoch": 1.7684692792276855, "grad_norm": 0.03150029107928276, "learning_rate": 4.0271156115032625e-07, "loss": 0.0009, "step": 108080 }, { "epoch": 1.7686329051787615, "grad_norm": 0.016425782814621925, "learning_rate": 4.0215031182956665e-07, "loss": 0.0004, "step": 108090 }, { "epoch": 1.768796531129837, "grad_norm": 0.09022302180528641, "learning_rate": 4.015894374937829e-07, "loss": 0.0007, "step": 108100 }, { "epoch": 1.768960157080913, "grad_norm": 0.048652973026037216, "learning_rate": 4.010289381887211e-07, "loss": 0.0011, "step": 108110 }, { "epoch": 1.769123783031989, "grad_norm": 0.023149846121668816, "learning_rate": 4.0046881396009096e-07, "loss": 0.0009, "step": 108120 }, { "epoch": 1.7692874089830646, "grad_norm": 0.03632233291864395, "learning_rate": 3.9990906485357583e-07, "loss": 0.0011, "step": 108130 }, { "epoch": 1.7694510349341406, "grad_norm": 0.012505215592682362, "learning_rate": 3.9934969091482647e-07, "loss": 0.0003, "step": 108140 }, { "epoch": 1.7696146608852164, "grad_norm": 0.09661774337291718, "learning_rate": 3.9879069218946354e-07, "loss": 0.0011, "step": 108150 }, { "epoch": 1.7697782868362921, "grad_norm": 0.05754555016756058, "learning_rate": 3.9823206872307674e-07, "loss": 0.0013, "step": 108160 }, { "epoch": 1.7699419127873681, "grad_norm": 0.10947311669588089, "learning_rate": 3.9767382056122685e-07, "loss": 0.001, "step": 108170 }, { "epoch": 1.770105538738444, "grad_norm": 0.0036237684544175863, "learning_rate": 3.9711594774944015e-07, "loss": 0.0008, "step": 108180 }, { "epoch": 1.7702691646895197, "grad_norm": 0.06420178711414337, "learning_rate": 3.9655845033321814e-07, "loss": 0.0006, "step": 108190 }, { "epoch": 1.7704327906405957, "grad_norm": 0.023750467225909233, "learning_rate": 3.9600132835802494e-07, "loss": 0.0007, "step": 108200 }, { "epoch": 1.7705964165916714, "grad_norm": 0.054547131061553955, "learning_rate": 3.954445818693009e-07, "loss": 0.0006, "step": 108210 }, { "epoch": 1.7707600425427472, "grad_norm": 0.020808063447475433, "learning_rate": 3.948882109124491e-07, "loss": 0.0007, "step": 108220 }, { "epoch": 1.7709236684938232, "grad_norm": 0.10417719930410385, "learning_rate": 3.943322155328483e-07, "loss": 0.0016, "step": 108230 }, { "epoch": 1.771087294444899, "grad_norm": 0.005488272290676832, "learning_rate": 3.9377659577584105e-07, "loss": 0.0011, "step": 108240 }, { "epoch": 1.7712509203959748, "grad_norm": 0.10023872554302216, "learning_rate": 3.9322135168674393e-07, "loss": 0.0014, "step": 108250 }, { "epoch": 1.7714145463470508, "grad_norm": 0.027351336553692818, "learning_rate": 3.9266648331083847e-07, "loss": 0.0006, "step": 108260 }, { "epoch": 1.7715781722981265, "grad_norm": 0.015320983715355396, "learning_rate": 3.921119906933807e-07, "loss": 0.0008, "step": 108270 }, { "epoch": 1.7717417982492023, "grad_norm": 0.02882968820631504, "learning_rate": 3.9155787387959e-07, "loss": 0.0006, "step": 108280 }, { "epoch": 1.7719054242002783, "grad_norm": 0.026351315900683403, "learning_rate": 3.9100413291466133e-07, "loss": 0.0014, "step": 108290 }, { "epoch": 1.7720690501513539, "grad_norm": 0.01935686729848385, "learning_rate": 3.90450767843753e-07, "loss": 0.0011, "step": 108300 }, { "epoch": 1.7722326761024298, "grad_norm": 0.04308044910430908, "learning_rate": 3.8989777871199843e-07, "loss": 0.0011, "step": 108310 }, { "epoch": 1.7723963020535058, "grad_norm": 0.009379858151078224, "learning_rate": 3.8934516556449474e-07, "loss": 0.0011, "step": 108320 }, { "epoch": 1.7725599280045814, "grad_norm": 0.013123693875968456, "learning_rate": 3.887929284463138e-07, "loss": 0.0007, "step": 108330 }, { "epoch": 1.7727235539556574, "grad_norm": 0.06673294305801392, "learning_rate": 3.882410674024917e-07, "loss": 0.0017, "step": 108340 }, { "epoch": 1.7728871799067332, "grad_norm": 0.03813819959759712, "learning_rate": 3.8768958247803857e-07, "loss": 0.0009, "step": 108350 }, { "epoch": 1.773050805857809, "grad_norm": 0.05809563398361206, "learning_rate": 3.8713847371792954e-07, "loss": 0.001, "step": 108360 }, { "epoch": 1.773214431808885, "grad_norm": 0.012311948463320732, "learning_rate": 3.865877411671137e-07, "loss": 0.0005, "step": 108370 }, { "epoch": 1.7733780577599607, "grad_norm": 0.039004478603601456, "learning_rate": 3.8603738487050457e-07, "loss": 0.0016, "step": 108380 }, { "epoch": 1.7735416837110365, "grad_norm": 0.03414362296462059, "learning_rate": 3.8548740487298895e-07, "loss": 0.0006, "step": 108390 }, { "epoch": 1.7737053096621125, "grad_norm": 0.038908738642930984, "learning_rate": 3.8493780121941935e-07, "loss": 0.0009, "step": 108400 }, { "epoch": 1.7738689356131883, "grad_norm": 0.005892162211239338, "learning_rate": 3.843885739546227e-07, "loss": 0.0027, "step": 108410 }, { "epoch": 1.774032561564264, "grad_norm": 0.026326606050133705, "learning_rate": 3.838397231233898e-07, "loss": 0.0009, "step": 108420 }, { "epoch": 1.77419618751534, "grad_norm": 0.03375169634819031, "learning_rate": 3.832912487704826e-07, "loss": 0.0008, "step": 108430 }, { "epoch": 1.7743598134664158, "grad_norm": 0.08160717040300369, "learning_rate": 3.827431509406343e-07, "loss": 0.0011, "step": 108440 }, { "epoch": 1.7745234394174916, "grad_norm": 0.04383422061800957, "learning_rate": 3.821954296785452e-07, "loss": 0.0007, "step": 108450 }, { "epoch": 1.7746870653685676, "grad_norm": 0.04501236602663994, "learning_rate": 3.816480850288862e-07, "loss": 0.0009, "step": 108460 }, { "epoch": 1.7748506913196433, "grad_norm": 0.060540322214365005, "learning_rate": 3.811011170362955e-07, "loss": 0.0009, "step": 108470 }, { "epoch": 1.7750143172707191, "grad_norm": 0.006911097094416618, "learning_rate": 3.805545257453841e-07, "loss": 0.0007, "step": 108480 }, { "epoch": 1.7751779432217951, "grad_norm": 0.042887214571237564, "learning_rate": 3.8000831120072747e-07, "loss": 0.0008, "step": 108490 }, { "epoch": 1.7753415691728707, "grad_norm": 0.0329434871673584, "learning_rate": 3.794624734468755e-07, "loss": 0.0003, "step": 108500 }, { "epoch": 1.7755051951239467, "grad_norm": 0.02230953238904476, "learning_rate": 3.789170125283431e-07, "loss": 0.0024, "step": 108510 }, { "epoch": 1.7756688210750227, "grad_norm": 0.03865766152739525, "learning_rate": 3.7837192848961757e-07, "loss": 0.0014, "step": 108520 }, { "epoch": 1.7758324470260982, "grad_norm": 0.11782630532979965, "learning_rate": 3.7782722137515213e-07, "loss": 0.0013, "step": 108530 }, { "epoch": 1.7759960729771742, "grad_norm": 0.10710638761520386, "learning_rate": 3.772828912293741e-07, "loss": 0.0008, "step": 108540 }, { "epoch": 1.77615969892825, "grad_norm": 0.006776588037610054, "learning_rate": 3.767389380966746e-07, "loss": 0.0003, "step": 108550 }, { "epoch": 1.7763233248793258, "grad_norm": 0.1646786779165268, "learning_rate": 3.7619536202141813e-07, "loss": 0.0009, "step": 108560 }, { "epoch": 1.7764869508304018, "grad_norm": 0.004421981517225504, "learning_rate": 3.7565216304793594e-07, "loss": 0.0007, "step": 108570 }, { "epoch": 1.7766505767814775, "grad_norm": 0.04801374301314354, "learning_rate": 3.7510934122053034e-07, "loss": 0.0008, "step": 108580 }, { "epoch": 1.7768142027325533, "grad_norm": 0.003219426842406392, "learning_rate": 3.745668965834709e-07, "loss": 0.001, "step": 108590 }, { "epoch": 1.7769778286836293, "grad_norm": 0.03201757371425629, "learning_rate": 3.7402482918099946e-07, "loss": 0.0007, "step": 108600 }, { "epoch": 1.777141454634705, "grad_norm": 0.034481413662433624, "learning_rate": 3.7348313905732294e-07, "loss": 0.0004, "step": 108610 }, { "epoch": 1.7773050805857808, "grad_norm": 0.10792404413223267, "learning_rate": 3.7294182625662145e-07, "loss": 0.001, "step": 108620 }, { "epoch": 1.7774687065368568, "grad_norm": 0.054812539368867874, "learning_rate": 3.7240089082304145e-07, "loss": 0.0005, "step": 108630 }, { "epoch": 1.7776323324879326, "grad_norm": 0.02432039938867092, "learning_rate": 3.7186033280070145e-07, "loss": 0.0006, "step": 108640 }, { "epoch": 1.7777959584390084, "grad_norm": 0.036705147475004196, "learning_rate": 3.713201522336851e-07, "loss": 0.0009, "step": 108650 }, { "epoch": 1.7779595843900844, "grad_norm": 0.041651029139757156, "learning_rate": 3.7078034916605e-07, "loss": 0.0006, "step": 108660 }, { "epoch": 1.77812321034116, "grad_norm": 0.032106123864650726, "learning_rate": 3.702409236418192e-07, "loss": 0.0004, "step": 108670 }, { "epoch": 1.778286836292236, "grad_norm": 0.041012149304151535, "learning_rate": 3.6970187570498696e-07, "loss": 0.0008, "step": 108680 }, { "epoch": 1.778450462243312, "grad_norm": 0.015991734340786934, "learning_rate": 3.691632053995159e-07, "loss": 0.0007, "step": 108690 }, { "epoch": 1.7786140881943875, "grad_norm": 0.06304717808961868, "learning_rate": 3.6862491276933864e-07, "loss": 0.0007, "step": 108700 }, { "epoch": 1.7787777141454635, "grad_norm": 0.016339173540472984, "learning_rate": 3.680869978583557e-07, "loss": 0.0012, "step": 108710 }, { "epoch": 1.7789413400965393, "grad_norm": 0.01635793410241604, "learning_rate": 3.6754946071043907e-07, "loss": 0.001, "step": 108720 }, { "epoch": 1.779104966047615, "grad_norm": 0.032469525933265686, "learning_rate": 3.670123013694271e-07, "loss": 0.0009, "step": 108730 }, { "epoch": 1.779268591998691, "grad_norm": 0.04310190677642822, "learning_rate": 3.6647551987912865e-07, "loss": 0.0008, "step": 108740 }, { "epoch": 1.7794322179497668, "grad_norm": 0.026811139658093452, "learning_rate": 3.6593911628332254e-07, "loss": 0.0006, "step": 108750 }, { "epoch": 1.7795958439008426, "grad_norm": 0.00654851458966732, "learning_rate": 3.65403090625755e-07, "loss": 0.0024, "step": 108760 }, { "epoch": 1.7797594698519186, "grad_norm": 0.029045140370726585, "learning_rate": 3.648674429501442e-07, "loss": 0.0007, "step": 108770 }, { "epoch": 1.7799230958029943, "grad_norm": 0.0021667282562702894, "learning_rate": 3.643321733001737e-07, "loss": 0.0009, "step": 108780 }, { "epoch": 1.7800867217540701, "grad_norm": 0.06168778985738754, "learning_rate": 3.637972817195001e-07, "loss": 0.0008, "step": 108790 }, { "epoch": 1.7802503477051461, "grad_norm": 0.03460283949971199, "learning_rate": 3.6326276825174536e-07, "loss": 0.0007, "step": 108800 }, { "epoch": 1.780413973656222, "grad_norm": 0.027735942974686623, "learning_rate": 3.6272863294050497e-07, "loss": 0.0009, "step": 108810 }, { "epoch": 1.7805775996072977, "grad_norm": 0.00878941174596548, "learning_rate": 3.6219487582933863e-07, "loss": 0.0005, "step": 108820 }, { "epoch": 1.7807412255583737, "grad_norm": 0.07693719863891602, "learning_rate": 3.6166149696178045e-07, "loss": 0.0008, "step": 108830 }, { "epoch": 1.7809048515094494, "grad_norm": 0.07743791490793228, "learning_rate": 3.611284963813283e-07, "loss": 0.0007, "step": 108840 }, { "epoch": 1.7810684774605252, "grad_norm": 0.08219505846500397, "learning_rate": 3.605958741314541e-07, "loss": 0.0012, "step": 108850 }, { "epoch": 1.7812321034116012, "grad_norm": 0.05495278909802437, "learning_rate": 3.600636302555949e-07, "loss": 0.0005, "step": 108860 }, { "epoch": 1.7813957293626768, "grad_norm": 0.01769072376191616, "learning_rate": 3.595317647971602e-07, "loss": 0.0008, "step": 108870 }, { "epoch": 1.7815593553137528, "grad_norm": 0.08900812268257141, "learning_rate": 3.5900027779952607e-07, "loss": 0.001, "step": 108880 }, { "epoch": 1.7817229812648288, "grad_norm": 0.04917926713824272, "learning_rate": 3.584691693060399e-07, "loss": 0.001, "step": 108890 }, { "epoch": 1.7818866072159043, "grad_norm": 0.04451920464634895, "learning_rate": 3.5793843936001604e-07, "loss": 0.0033, "step": 108900 }, { "epoch": 1.7820502331669803, "grad_norm": 0.039458729326725006, "learning_rate": 3.574080880047398e-07, "loss": 0.0007, "step": 108910 }, { "epoch": 1.782213859118056, "grad_norm": 0.0532478466629982, "learning_rate": 3.5687811528346386e-07, "loss": 0.0009, "step": 108920 }, { "epoch": 1.7823774850691319, "grad_norm": 0.030456246808171272, "learning_rate": 3.5634852123941187e-07, "loss": 0.0008, "step": 108930 }, { "epoch": 1.7825411110202078, "grad_norm": 0.03679100424051285, "learning_rate": 3.5581930591577495e-07, "loss": 0.0012, "step": 108940 }, { "epoch": 1.7827047369712836, "grad_norm": 0.09483564645051956, "learning_rate": 3.552904693557158e-07, "loss": 0.0012, "step": 108950 }, { "epoch": 1.7828683629223594, "grad_norm": 0.07398977875709534, "learning_rate": 3.54762011602362e-07, "loss": 0.001, "step": 108960 }, { "epoch": 1.7830319888734354, "grad_norm": 0.04821565002202988, "learning_rate": 3.5423393269881477e-07, "loss": 0.0007, "step": 108970 }, { "epoch": 1.7831956148245112, "grad_norm": 0.04015131667256355, "learning_rate": 3.537062326881413e-07, "loss": 0.0006, "step": 108980 }, { "epoch": 1.783359240775587, "grad_norm": 0.02346006967127323, "learning_rate": 3.531789116133799e-07, "loss": 0.0011, "step": 108990 }, { "epoch": 1.783522866726663, "grad_norm": 0.04381433129310608, "learning_rate": 3.526519695175362e-07, "loss": 0.0012, "step": 109000 }, { "epoch": 1.7836864926777387, "grad_norm": 0.0223515834659338, "learning_rate": 3.5212540644358695e-07, "loss": 0.0004, "step": 109010 }, { "epoch": 1.7838501186288145, "grad_norm": 0.04557507485151291, "learning_rate": 3.51599222434475e-07, "loss": 0.0039, "step": 109020 }, { "epoch": 1.7840137445798905, "grad_norm": 0.010546478442847729, "learning_rate": 3.5107341753311596e-07, "loss": 0.0005, "step": 109030 }, { "epoch": 1.7841773705309663, "grad_norm": 0.08547072112560272, "learning_rate": 3.505479917823917e-07, "loss": 0.0013, "step": 109040 }, { "epoch": 1.784340996482042, "grad_norm": 0.16562238335609436, "learning_rate": 3.500229452251547e-07, "loss": 0.0011, "step": 109050 }, { "epoch": 1.784504622433118, "grad_norm": 0.023884305730462074, "learning_rate": 3.494982779042261e-07, "loss": 0.0005, "step": 109060 }, { "epoch": 1.7846682483841936, "grad_norm": 0.01914985477924347, "learning_rate": 3.489739898623945e-07, "loss": 0.0003, "step": 109070 }, { "epoch": 1.7848318743352696, "grad_norm": 0.12570561468601227, "learning_rate": 3.484500811424213e-07, "loss": 0.001, "step": 109080 }, { "epoch": 1.7849955002863456, "grad_norm": 0.05569448322057724, "learning_rate": 3.4792655178703226e-07, "loss": 0.0008, "step": 109090 }, { "epoch": 1.7851591262374211, "grad_norm": 0.01894867606461048, "learning_rate": 3.4740340183892716e-07, "loss": 0.0006, "step": 109100 }, { "epoch": 1.7853227521884971, "grad_norm": 0.2176194041967392, "learning_rate": 3.468806313407702e-07, "loss": 0.0014, "step": 109110 }, { "epoch": 1.785486378139573, "grad_norm": 0.07903044670820236, "learning_rate": 3.4635824033519837e-07, "loss": 0.0013, "step": 109120 }, { "epoch": 1.7856500040906487, "grad_norm": 0.031136836856603622, "learning_rate": 3.458362288648148e-07, "loss": 0.0005, "step": 109130 }, { "epoch": 1.7858136300417247, "grad_norm": 0.23112307488918304, "learning_rate": 3.4531459697219383e-07, "loss": 0.0009, "step": 109140 }, { "epoch": 1.7859772559928004, "grad_norm": 0.03424979746341705, "learning_rate": 3.4479334469987745e-07, "loss": 0.0007, "step": 109150 }, { "epoch": 1.7861408819438762, "grad_norm": 0.02788633108139038, "learning_rate": 3.442724720903784e-07, "loss": 0.0012, "step": 109160 }, { "epoch": 1.7863045078949522, "grad_norm": 0.04473122954368591, "learning_rate": 3.4375197918617485e-07, "loss": 0.0007, "step": 109170 }, { "epoch": 1.786468133846028, "grad_norm": 0.044374048709869385, "learning_rate": 3.432318660297196e-07, "loss": 0.0007, "step": 109180 }, { "epoch": 1.7866317597971038, "grad_norm": 0.07242981344461441, "learning_rate": 3.4271213266342806e-07, "loss": 0.0014, "step": 109190 }, { "epoch": 1.7867953857481798, "grad_norm": 0.039236944168806076, "learning_rate": 3.4219277912969085e-07, "loss": 0.0008, "step": 109200 }, { "epoch": 1.7869590116992555, "grad_norm": 0.028940899297595024, "learning_rate": 3.416738054708624e-07, "loss": 0.0004, "step": 109210 }, { "epoch": 1.7871226376503313, "grad_norm": 0.02621063031256199, "learning_rate": 3.4115521172927044e-07, "loss": 0.0022, "step": 109220 }, { "epoch": 1.7872862636014073, "grad_norm": 0.07201523333787918, "learning_rate": 3.406369979472074e-07, "loss": 0.0009, "step": 109230 }, { "epoch": 1.787449889552483, "grad_norm": 0.010051997378468513, "learning_rate": 3.4011916416693933e-07, "loss": 0.0011, "step": 109240 }, { "epoch": 1.7876135155035588, "grad_norm": 0.054128870368003845, "learning_rate": 3.39601710430697e-07, "loss": 0.0008, "step": 109250 }, { "epoch": 1.7877771414546348, "grad_norm": 0.04184652864933014, "learning_rate": 3.3908463678068435e-07, "loss": 0.0004, "step": 109260 }, { "epoch": 1.7879407674057104, "grad_norm": 0.03830265998840332, "learning_rate": 3.3856794325906995e-07, "loss": 0.0017, "step": 109270 }, { "epoch": 1.7881043933567864, "grad_norm": 0.022045837715268135, "learning_rate": 3.38051629907995e-07, "loss": 0.0005, "step": 109280 }, { "epoch": 1.7882680193078624, "grad_norm": 0.09758727252483368, "learning_rate": 3.37535696769567e-07, "loss": 0.0018, "step": 109290 }, { "epoch": 1.788431645258938, "grad_norm": 0.0681813657283783, "learning_rate": 3.3702014388586557e-07, "loss": 0.0008, "step": 109300 }, { "epoch": 1.788595271210014, "grad_norm": 0.00808724109083414, "learning_rate": 3.3650497129893546e-07, "loss": 0.0013, "step": 109310 }, { "epoch": 1.7887588971610897, "grad_norm": 0.02012191154062748, "learning_rate": 3.359901790507941e-07, "loss": 0.001, "step": 109320 }, { "epoch": 1.7889225231121655, "grad_norm": 0.03943663090467453, "learning_rate": 3.3547576718342465e-07, "loss": 0.0008, "step": 109330 }, { "epoch": 1.7890861490632415, "grad_norm": 0.049337275326251984, "learning_rate": 3.349617357387824e-07, "loss": 0.0012, "step": 109340 }, { "epoch": 1.7892497750143173, "grad_norm": 0.12241708487272263, "learning_rate": 3.3444808475878833e-07, "loss": 0.0008, "step": 109350 }, { "epoch": 1.789413400965393, "grad_norm": 0.0004889406845904887, "learning_rate": 3.3393481428533604e-07, "loss": 0.0006, "step": 109360 }, { "epoch": 1.789577026916469, "grad_norm": 0.14237074553966522, "learning_rate": 3.3342192436028374e-07, "loss": 0.0008, "step": 109370 }, { "epoch": 1.7897406528675448, "grad_norm": 0.0270346961915493, "learning_rate": 3.3290941502546306e-07, "loss": 0.0008, "step": 109380 }, { "epoch": 1.7899042788186206, "grad_norm": 0.017784483730793, "learning_rate": 3.323972863226715e-07, "loss": 0.0008, "step": 109390 }, { "epoch": 1.7900679047696966, "grad_norm": 0.034558188170194626, "learning_rate": 3.3188553829367644e-07, "loss": 0.0014, "step": 109400 }, { "epoch": 1.7902315307207723, "grad_norm": 0.08992783725261688, "learning_rate": 3.3137417098021487e-07, "loss": 0.0012, "step": 109410 }, { "epoch": 1.7903951566718481, "grad_norm": 0.0024785336572676897, "learning_rate": 3.3086318442399123e-07, "loss": 0.0011, "step": 109420 }, { "epoch": 1.7905587826229241, "grad_norm": 0.0588332936167717, "learning_rate": 3.3035257866668113e-07, "loss": 0.0015, "step": 109430 }, { "epoch": 1.790722408574, "grad_norm": 0.048809777945280075, "learning_rate": 3.2984235374992677e-07, "loss": 0.0007, "step": 109440 }, { "epoch": 1.7908860345250757, "grad_norm": 0.09338638186454773, "learning_rate": 3.293325097153416e-07, "loss": 0.0009, "step": 109450 }, { "epoch": 1.7910496604761517, "grad_norm": 0.0046027353964746, "learning_rate": 3.288230466045045e-07, "loss": 0.0008, "step": 109460 }, { "epoch": 1.7912132864272272, "grad_norm": 0.03807989880442619, "learning_rate": 3.2831396445896834e-07, "loss": 0.0007, "step": 109470 }, { "epoch": 1.7913769123783032, "grad_norm": 0.12097148597240448, "learning_rate": 3.2780526332024997e-07, "loss": 0.0022, "step": 109480 }, { "epoch": 1.791540538329379, "grad_norm": 0.01567874476313591, "learning_rate": 3.2729694322983895e-07, "loss": 0.0003, "step": 109490 }, { "epoch": 1.7917041642804548, "grad_norm": 0.02814096212387085, "learning_rate": 3.2678900422919046e-07, "loss": 0.001, "step": 109500 }, { "epoch": 1.7918677902315308, "grad_norm": 0.05652020871639252, "learning_rate": 3.262814463597319e-07, "loss": 0.001, "step": 109510 }, { "epoch": 1.7920314161826065, "grad_norm": 0.025702757760882378, "learning_rate": 3.2577426966285687e-07, "loss": 0.0011, "step": 109520 }, { "epoch": 1.7921950421336823, "grad_norm": 0.11110571771860123, "learning_rate": 3.2526747417992943e-07, "loss": 0.0011, "step": 109530 }, { "epoch": 1.7923586680847583, "grad_norm": 0.016633115708827972, "learning_rate": 3.247610599522816e-07, "loss": 0.0015, "step": 109540 }, { "epoch": 1.792522294035834, "grad_norm": 0.03265874460339546, "learning_rate": 3.242550270212158e-07, "loss": 0.0013, "step": 109550 }, { "epoch": 1.7926859199869098, "grad_norm": 0.025700142607092857, "learning_rate": 3.2374937542800075e-07, "loss": 0.0037, "step": 109560 }, { "epoch": 1.7928495459379858, "grad_norm": 0.051331449300050735, "learning_rate": 3.232441052138779e-07, "loss": 0.0006, "step": 109570 }, { "epoch": 1.7930131718890616, "grad_norm": 0.043259989470243454, "learning_rate": 3.227392164200532e-07, "loss": 0.0011, "step": 109580 }, { "epoch": 1.7931767978401374, "grad_norm": 0.012173153460025787, "learning_rate": 3.2223470908770537e-07, "loss": 0.0007, "step": 109590 }, { "epoch": 1.7933404237912134, "grad_norm": 0.15781977772712708, "learning_rate": 3.2173058325797866e-07, "loss": 0.0023, "step": 109600 }, { "epoch": 1.7935040497422892, "grad_norm": 0.040818627923727036, "learning_rate": 3.2122683897198967e-07, "loss": 0.0013, "step": 109610 }, { "epoch": 1.793667675693365, "grad_norm": 0.06910662353038788, "learning_rate": 3.2072347627082003e-07, "loss": 0.0007, "step": 109620 }, { "epoch": 1.793831301644441, "grad_norm": 0.06196121126413345, "learning_rate": 3.2022049519552457e-07, "loss": 0.0011, "step": 109630 }, { "epoch": 1.7939949275955165, "grad_norm": 0.1300002485513687, "learning_rate": 3.197178957871222e-07, "loss": 0.0009, "step": 109640 }, { "epoch": 1.7941585535465925, "grad_norm": 0.016693996265530586, "learning_rate": 3.192156780866057e-07, "loss": 0.0009, "step": 109650 }, { "epoch": 1.7943221794976685, "grad_norm": 0.007119476795196533, "learning_rate": 3.187138421349323e-07, "loss": 0.0029, "step": 109660 }, { "epoch": 1.794485805448744, "grad_norm": 0.01714773289859295, "learning_rate": 3.18212387973032e-07, "loss": 0.0011, "step": 109670 }, { "epoch": 1.79464943139982, "grad_norm": 0.023635078221559525, "learning_rate": 3.1771131564179936e-07, "loss": 0.0008, "step": 109680 }, { "epoch": 1.7948130573508958, "grad_norm": 0.006420428398996592, "learning_rate": 3.1721062518210166e-07, "loss": 0.0007, "step": 109690 }, { "epoch": 1.7949766833019716, "grad_norm": 0.029131490737199783, "learning_rate": 3.167103166347735e-07, "loss": 0.0006, "step": 109700 }, { "epoch": 1.7951403092530476, "grad_norm": 0.06370867043733597, "learning_rate": 3.162103900406177e-07, "loss": 0.0009, "step": 109710 }, { "epoch": 1.7953039352041233, "grad_norm": 0.019433518871665, "learning_rate": 3.1571084544040674e-07, "loss": 0.0006, "step": 109720 }, { "epoch": 1.7954675611551991, "grad_norm": 0.023268291726708412, "learning_rate": 3.152116828748819e-07, "loss": 0.0013, "step": 109730 }, { "epoch": 1.7956311871062751, "grad_norm": 0.013202893547713757, "learning_rate": 3.1471290238475337e-07, "loss": 0.0007, "step": 109740 }, { "epoch": 1.795794813057351, "grad_norm": 0.030107159167528152, "learning_rate": 3.1421450401069976e-07, "loss": 0.0007, "step": 109750 }, { "epoch": 1.7959584390084267, "grad_norm": 0.05286666750907898, "learning_rate": 3.1371648779336906e-07, "loss": 0.0005, "step": 109760 }, { "epoch": 1.7961220649595027, "grad_norm": 0.030747853219509125, "learning_rate": 3.1321885377337657e-07, "loss": 0.0009, "step": 109770 }, { "epoch": 1.7962856909105784, "grad_norm": 0.000842454785015434, "learning_rate": 3.127216019913093e-07, "loss": 0.0008, "step": 109780 }, { "epoch": 1.7964493168616542, "grad_norm": 0.018881119787693024, "learning_rate": 3.1222473248772034e-07, "loss": 0.0008, "step": 109790 }, { "epoch": 1.7966129428127302, "grad_norm": 0.03469909355044365, "learning_rate": 3.117282453031334e-07, "loss": 0.0008, "step": 109800 }, { "epoch": 1.796776568763806, "grad_norm": 0.042177844792604446, "learning_rate": 3.112321404780394e-07, "loss": 0.0007, "step": 109810 }, { "epoch": 1.7969401947148818, "grad_norm": 0.042193010449409485, "learning_rate": 3.1073641805289987e-07, "loss": 0.0018, "step": 109820 }, { "epoch": 1.7971038206659578, "grad_norm": 0.16278859972953796, "learning_rate": 3.1024107806814364e-07, "loss": 0.0034, "step": 109830 }, { "epoch": 1.7972674466170333, "grad_norm": 0.02990959957242012, "learning_rate": 3.097461205641694e-07, "loss": 0.0019, "step": 109840 }, { "epoch": 1.7974310725681093, "grad_norm": 0.02863064780831337, "learning_rate": 3.092515455813433e-07, "loss": 0.001, "step": 109850 }, { "epoch": 1.7975946985191853, "grad_norm": 0.039100535213947296, "learning_rate": 3.08757353160003e-07, "loss": 0.0007, "step": 109860 }, { "epoch": 1.7977583244702608, "grad_norm": 0.03215475007891655, "learning_rate": 3.0826354334045073e-07, "loss": 0.0004, "step": 109870 }, { "epoch": 1.7979219504213368, "grad_norm": 0.0635056346654892, "learning_rate": 3.0777011616296205e-07, "loss": 0.0008, "step": 109880 }, { "epoch": 1.7980855763724126, "grad_norm": 0.025397958233952522, "learning_rate": 3.0727707166777753e-07, "loss": 0.0004, "step": 109890 }, { "epoch": 1.7982492023234884, "grad_norm": 0.08179663121700287, "learning_rate": 3.0678440989511006e-07, "loss": 0.0016, "step": 109900 }, { "epoch": 1.7984128282745644, "grad_norm": 0.03371858596801758, "learning_rate": 3.0629213088513743e-07, "loss": 0.001, "step": 109910 }, { "epoch": 1.7985764542256402, "grad_norm": 0.018032874912023544, "learning_rate": 3.0580023467801033e-07, "loss": 0.0009, "step": 109920 }, { "epoch": 1.798740080176716, "grad_norm": 0.015191777609288692, "learning_rate": 3.053087213138439e-07, "loss": 0.0009, "step": 109930 }, { "epoch": 1.798903706127792, "grad_norm": 0.10504893213510513, "learning_rate": 3.0481759083272613e-07, "loss": 0.001, "step": 109940 }, { "epoch": 1.7990673320788677, "grad_norm": 0.06394493579864502, "learning_rate": 3.0432684327471095e-07, "loss": 0.001, "step": 109950 }, { "epoch": 1.7992309580299435, "grad_norm": 0.02950209006667137, "learning_rate": 3.038364786798226e-07, "loss": 0.0011, "step": 109960 }, { "epoch": 1.7993945839810195, "grad_norm": 0.051735080778598785, "learning_rate": 3.0334649708805286e-07, "loss": 0.0011, "step": 109970 }, { "epoch": 1.7995582099320953, "grad_norm": 0.02537471055984497, "learning_rate": 3.028568985393643e-07, "loss": 0.0011, "step": 109980 }, { "epoch": 1.799721835883171, "grad_norm": 0.052202340215444565, "learning_rate": 3.023676830736849e-07, "loss": 0.0012, "step": 109990 }, { "epoch": 1.799885461834247, "grad_norm": 0.06691936403512955, "learning_rate": 3.018788507309156e-07, "loss": 0.0004, "step": 110000 }, { "epoch": 1.8000490877853228, "grad_norm": 0.16745823621749878, "learning_rate": 3.0139040155092225e-07, "loss": 0.0007, "step": 110010 }, { "epoch": 1.8002127137363986, "grad_norm": 0.0382610559463501, "learning_rate": 3.009023355735419e-07, "loss": 0.0011, "step": 110020 }, { "epoch": 1.8003763396874746, "grad_norm": 0.034382786601781845, "learning_rate": 3.0041465283857984e-07, "loss": 0.0004, "step": 110030 }, { "epoch": 1.8005399656385501, "grad_norm": 0.02954678237438202, "learning_rate": 2.9992735338580825e-07, "loss": 0.0009, "step": 110040 }, { "epoch": 1.8007035915896261, "grad_norm": 0.07690322399139404, "learning_rate": 2.9944043725497194e-07, "loss": 0.0023, "step": 110050 }, { "epoch": 1.8008672175407021, "grad_norm": 0.061491891741752625, "learning_rate": 2.989539044857798e-07, "loss": 0.0021, "step": 110060 }, { "epoch": 1.8010308434917777, "grad_norm": 0.036923281848430634, "learning_rate": 2.9846775511791324e-07, "loss": 0.001, "step": 110070 }, { "epoch": 1.8011944694428537, "grad_norm": 0.03511584922671318, "learning_rate": 2.9798198919102016e-07, "loss": 0.0005, "step": 110080 }, { "epoch": 1.8013580953939294, "grad_norm": 0.041213735938072205, "learning_rate": 2.9749660674471934e-07, "loss": 0.0007, "step": 110090 }, { "epoch": 1.8015217213450052, "grad_norm": 0.068928562104702, "learning_rate": 2.970116078185953e-07, "loss": 0.0006, "step": 110100 }, { "epoch": 1.8016853472960812, "grad_norm": 0.06118115782737732, "learning_rate": 2.9652699245220407e-07, "loss": 0.0009, "step": 110110 }, { "epoch": 1.801848973247157, "grad_norm": 0.0021085136104375124, "learning_rate": 2.9604276068506797e-07, "loss": 0.0009, "step": 110120 }, { "epoch": 1.8020125991982328, "grad_norm": 0.07775899022817612, "learning_rate": 2.95558912556681e-07, "loss": 0.0008, "step": 110130 }, { "epoch": 1.8021762251493088, "grad_norm": 0.020561179146170616, "learning_rate": 2.9507544810650214e-07, "loss": 0.0008, "step": 110140 }, { "epoch": 1.8023398511003845, "grad_norm": 0.013287893496453762, "learning_rate": 2.9459236737396314e-07, "loss": 0.0007, "step": 110150 }, { "epoch": 1.8025034770514603, "grad_norm": 0.06170758977532387, "learning_rate": 2.941096703984608e-07, "loss": 0.006, "step": 110160 }, { "epoch": 1.8026671030025363, "grad_norm": 0.06692039221525192, "learning_rate": 2.9362735721936376e-07, "loss": 0.0006, "step": 110170 }, { "epoch": 1.802830728953612, "grad_norm": 0.025893183425068855, "learning_rate": 2.9314542787600596e-07, "loss": 0.0006, "step": 110180 }, { "epoch": 1.8029943549046878, "grad_norm": 0.01852772757411003, "learning_rate": 2.926638824076938e-07, "loss": 0.0008, "step": 110190 }, { "epoch": 1.8031579808557638, "grad_norm": 0.007035739719867706, "learning_rate": 2.921827208536987e-07, "loss": 0.0014, "step": 110200 }, { "epoch": 1.8033216068068396, "grad_norm": 0.017723141238093376, "learning_rate": 2.917019432532647e-07, "loss": 0.0006, "step": 110210 }, { "epoch": 1.8034852327579154, "grad_norm": 0.1276751309633255, "learning_rate": 2.9122154964560003e-07, "loss": 0.0006, "step": 110220 }, { "epoch": 1.8036488587089914, "grad_norm": 0.03449685126543045, "learning_rate": 2.907415400698865e-07, "loss": 0.001, "step": 110230 }, { "epoch": 1.803812484660067, "grad_norm": 0.006220964249223471, "learning_rate": 2.902619145652691e-07, "loss": 0.0013, "step": 110240 }, { "epoch": 1.803976110611143, "grad_norm": 0.019480634480714798, "learning_rate": 2.897826731708675e-07, "loss": 0.0011, "step": 110250 }, { "epoch": 1.804139736562219, "grad_norm": 0.036189641803503036, "learning_rate": 2.893038159257644e-07, "loss": 0.0006, "step": 110260 }, { "epoch": 1.8043033625132945, "grad_norm": 0.026509476825594902, "learning_rate": 2.8882534286901575e-07, "loss": 0.0005, "step": 110270 }, { "epoch": 1.8044669884643705, "grad_norm": 0.0023668569047003984, "learning_rate": 2.883472540396426e-07, "loss": 0.0009, "step": 110280 }, { "epoch": 1.8046306144154463, "grad_norm": 0.042108867317438126, "learning_rate": 2.8786954947663756e-07, "loss": 0.0015, "step": 110290 }, { "epoch": 1.804794240366522, "grad_norm": 0.00629700580611825, "learning_rate": 2.8739222921895957e-07, "loss": 0.0005, "step": 110300 }, { "epoch": 1.804957866317598, "grad_norm": 0.004674853757023811, "learning_rate": 2.86915293305538e-07, "loss": 0.0017, "step": 110310 }, { "epoch": 1.8051214922686738, "grad_norm": 0.003186131129041314, "learning_rate": 2.864387417752695e-07, "loss": 0.0017, "step": 110320 }, { "epoch": 1.8052851182197496, "grad_norm": 0.04131060466170311, "learning_rate": 2.8596257466702084e-07, "loss": 0.0007, "step": 110330 }, { "epoch": 1.8054487441708256, "grad_norm": 0.043626222759485245, "learning_rate": 2.854867920196253e-07, "loss": 0.0006, "step": 110340 }, { "epoch": 1.8056123701219013, "grad_norm": 0.054487768560647964, "learning_rate": 2.850113938718879e-07, "loss": 0.0006, "step": 110350 }, { "epoch": 1.8057759960729771, "grad_norm": 0.06478989124298096, "learning_rate": 2.845363802625789e-07, "loss": 0.0007, "step": 110360 }, { "epoch": 1.8059396220240531, "grad_norm": 0.12410090118646622, "learning_rate": 2.8406175123044e-07, "loss": 0.0007, "step": 110370 }, { "epoch": 1.806103247975129, "grad_norm": 0.009459729306399822, "learning_rate": 2.8358750681417857e-07, "loss": 0.0006, "step": 110380 }, { "epoch": 1.8062668739262047, "grad_norm": 0.037657782435417175, "learning_rate": 2.831136470524742e-07, "loss": 0.0015, "step": 110390 }, { "epoch": 1.8064304998772807, "grad_norm": 0.06314370781183243, "learning_rate": 2.8264017198397163e-07, "loss": 0.001, "step": 110400 }, { "epoch": 1.8065941258283562, "grad_norm": 0.018982969224452972, "learning_rate": 2.821670816472877e-07, "loss": 0.0006, "step": 110410 }, { "epoch": 1.8067577517794322, "grad_norm": 0.02616678550839424, "learning_rate": 2.8169437608100427e-07, "loss": 0.0005, "step": 110420 }, { "epoch": 1.8069213777305082, "grad_norm": 0.04556853696703911, "learning_rate": 2.812220553236755e-07, "loss": 0.0009, "step": 110430 }, { "epoch": 1.8070850036815838, "grad_norm": 0.031112229451537132, "learning_rate": 2.8075011941382015e-07, "loss": 0.0013, "step": 110440 }, { "epoch": 1.8072486296326598, "grad_norm": 0.006761751137673855, "learning_rate": 2.802785683899295e-07, "loss": 0.001, "step": 110450 }, { "epoch": 1.8074122555837355, "grad_norm": 0.20393942296504974, "learning_rate": 2.7980740229046064e-07, "loss": 0.001, "step": 110460 }, { "epoch": 1.8075758815348113, "grad_norm": 0.04542384669184685, "learning_rate": 2.7933662115384065e-07, "loss": 0.0006, "step": 110470 }, { "epoch": 1.8077395074858873, "grad_norm": 0.012509548105299473, "learning_rate": 2.788662250184648e-07, "loss": 0.0007, "step": 110480 }, { "epoch": 1.807903133436963, "grad_norm": 0.04757794737815857, "learning_rate": 2.78396213922697e-07, "loss": 0.0007, "step": 110490 }, { "epoch": 1.8080667593880388, "grad_norm": 0.03990526497364044, "learning_rate": 2.779265879048698e-07, "loss": 0.0012, "step": 110500 }, { "epoch": 1.8082303853391148, "grad_norm": 0.020959237590432167, "learning_rate": 2.7745734700328374e-07, "loss": 0.0009, "step": 110510 }, { "epoch": 1.8083940112901906, "grad_norm": 0.03837386518716812, "learning_rate": 2.7698849125620986e-07, "loss": 0.0005, "step": 110520 }, { "epoch": 1.8085576372412664, "grad_norm": 0.02678568661212921, "learning_rate": 2.7652002070188476e-07, "loss": 0.0007, "step": 110530 }, { "epoch": 1.8087212631923424, "grad_norm": 0.02614133246243, "learning_rate": 2.760519353785163e-07, "loss": 0.001, "step": 110540 }, { "epoch": 1.8088848891434182, "grad_norm": 0.017137326300144196, "learning_rate": 2.7558423532427936e-07, "loss": 0.0005, "step": 110550 }, { "epoch": 1.809048515094494, "grad_norm": 0.04557979106903076, "learning_rate": 2.751169205773191e-07, "loss": 0.0009, "step": 110560 }, { "epoch": 1.80921214104557, "grad_norm": 0.07694178074598312, "learning_rate": 2.7464999117574664e-07, "loss": 0.0011, "step": 110570 }, { "epoch": 1.8093757669966457, "grad_norm": 0.033573735505342484, "learning_rate": 2.741834471576449e-07, "loss": 0.0007, "step": 110580 }, { "epoch": 1.8095393929477215, "grad_norm": 0.01824546419084072, "learning_rate": 2.737172885610612e-07, "loss": 0.0021, "step": 110590 }, { "epoch": 1.8097030188987975, "grad_norm": 0.04702093079686165, "learning_rate": 2.7325151542401673e-07, "loss": 0.0008, "step": 110600 }, { "epoch": 1.809866644849873, "grad_norm": 0.0018065616022795439, "learning_rate": 2.7278612778449563e-07, "loss": 0.0015, "step": 110610 }, { "epoch": 1.810030270800949, "grad_norm": 0.05801538750529289, "learning_rate": 2.7232112568045534e-07, "loss": 0.0009, "step": 110620 }, { "epoch": 1.810193896752025, "grad_norm": 0.059487905353307724, "learning_rate": 2.718565091498182e-07, "loss": 0.0012, "step": 110630 }, { "epoch": 1.8103575227031006, "grad_norm": 0.03374604880809784, "learning_rate": 2.713922782304784e-07, "loss": 0.0006, "step": 110640 }, { "epoch": 1.8105211486541766, "grad_norm": 0.026172664016485214, "learning_rate": 2.7092843296029627e-07, "loss": 0.0007, "step": 110650 }, { "epoch": 1.8106847746052523, "grad_norm": 0.00165732076857239, "learning_rate": 2.7046497337710144e-07, "loss": 0.0008, "step": 110660 }, { "epoch": 1.8108484005563281, "grad_norm": 0.04420202597975731, "learning_rate": 2.700018995186915e-07, "loss": 0.0013, "step": 110670 }, { "epoch": 1.8110120265074041, "grad_norm": 0.04795202612876892, "learning_rate": 2.6953921142283457e-07, "loss": 0.0013, "step": 110680 }, { "epoch": 1.81117565245848, "grad_norm": 0.015184788033366203, "learning_rate": 2.6907690912726435e-07, "loss": 0.0008, "step": 110690 }, { "epoch": 1.8113392784095557, "grad_norm": 0.04024822264909744, "learning_rate": 2.686149926696863e-07, "loss": 0.001, "step": 110700 }, { "epoch": 1.8115029043606317, "grad_norm": 0.039022479206323624, "learning_rate": 2.681534620877713e-07, "loss": 0.0016, "step": 110710 }, { "epoch": 1.8116665303117074, "grad_norm": 0.10446411371231079, "learning_rate": 2.676923174191615e-07, "loss": 0.0006, "step": 110720 }, { "epoch": 1.8118301562627832, "grad_norm": 0.031347062438726425, "learning_rate": 2.6723155870146454e-07, "loss": 0.0007, "step": 110730 }, { "epoch": 1.8119937822138592, "grad_norm": 0.06212379038333893, "learning_rate": 2.667711859722605e-07, "loss": 0.0014, "step": 110740 }, { "epoch": 1.812157408164935, "grad_norm": 0.06820494681596756, "learning_rate": 2.6631119926909365e-07, "loss": 0.0015, "step": 110750 }, { "epoch": 1.8123210341160108, "grad_norm": 0.04215957224369049, "learning_rate": 2.6585159862948073e-07, "loss": 0.002, "step": 110760 }, { "epoch": 1.8124846600670868, "grad_norm": 0.048620592802762985, "learning_rate": 2.6539238409090393e-07, "loss": 0.0006, "step": 110770 }, { "epoch": 1.8126482860181625, "grad_norm": 0.03688211739063263, "learning_rate": 2.6493355569081615e-07, "loss": 0.001, "step": 110780 }, { "epoch": 1.8128119119692383, "grad_norm": 0.02151479385793209, "learning_rate": 2.6447511346663734e-07, "loss": 0.0005, "step": 110790 }, { "epoch": 1.8129755379203143, "grad_norm": 0.008797711692750454, "learning_rate": 2.640170574557566e-07, "loss": 0.0005, "step": 110800 }, { "epoch": 1.8131391638713898, "grad_norm": 0.036530692130327225, "learning_rate": 2.635593876955322e-07, "loss": 0.0011, "step": 110810 }, { "epoch": 1.8133027898224658, "grad_norm": 0.020792389288544655, "learning_rate": 2.6310210422328787e-07, "loss": 0.001, "step": 110820 }, { "epoch": 1.8134664157735418, "grad_norm": 0.0023267720825970173, "learning_rate": 2.626452070763208e-07, "loss": 0.0006, "step": 110830 }, { "epoch": 1.8136300417246174, "grad_norm": 0.011442271061241627, "learning_rate": 2.6218869629189235e-07, "loss": 0.0003, "step": 110840 }, { "epoch": 1.8137936676756934, "grad_norm": 0.14230522513389587, "learning_rate": 2.6173257190723445e-07, "loss": 0.0007, "step": 110850 }, { "epoch": 1.8139572936267692, "grad_norm": 0.0642613023519516, "learning_rate": 2.6127683395954674e-07, "loss": 0.0008, "step": 110860 }, { "epoch": 1.814120919577845, "grad_norm": 0.021983062848448753, "learning_rate": 2.608214824859984e-07, "loss": 0.0017, "step": 110870 }, { "epoch": 1.814284545528921, "grad_norm": 0.09344790875911713, "learning_rate": 2.603665175237252e-07, "loss": 0.0009, "step": 110880 }, { "epoch": 1.8144481714799967, "grad_norm": 0.01867722161114216, "learning_rate": 2.599119391098343e-07, "loss": 0.0008, "step": 110890 }, { "epoch": 1.8146117974310725, "grad_norm": 0.020024236291646957, "learning_rate": 2.594577472813975e-07, "loss": 0.0004, "step": 110900 }, { "epoch": 1.8147754233821485, "grad_norm": 0.00954466499388218, "learning_rate": 2.5900394207545865e-07, "loss": 0.0005, "step": 110910 }, { "epoch": 1.8149390493332243, "grad_norm": 0.05353563278913498, "learning_rate": 2.5855052352902754e-07, "loss": 0.0011, "step": 110920 }, { "epoch": 1.8151026752843, "grad_norm": 0.15290431678295135, "learning_rate": 2.580974916790846e-07, "loss": 0.0014, "step": 110930 }, { "epoch": 1.815266301235376, "grad_norm": 0.011975144036114216, "learning_rate": 2.576448465625758e-07, "loss": 0.0006, "step": 110940 }, { "epoch": 1.8154299271864518, "grad_norm": 0.049102842807769775, "learning_rate": 2.5719258821641947e-07, "loss": 0.0009, "step": 110950 }, { "epoch": 1.8155935531375276, "grad_norm": 0.032433703541755676, "learning_rate": 2.567407166774988e-07, "loss": 0.0005, "step": 110960 }, { "epoch": 1.8157571790886036, "grad_norm": 0.0659307911992073, "learning_rate": 2.5628923198266764e-07, "loss": 0.0013, "step": 110970 }, { "epoch": 1.8159208050396793, "grad_norm": 0.01950692944228649, "learning_rate": 2.558381341687466e-07, "loss": 0.0007, "step": 110980 }, { "epoch": 1.8160844309907551, "grad_norm": 0.03755845129489899, "learning_rate": 2.5538742327252674e-07, "loss": 0.001, "step": 110990 }, { "epoch": 1.8162480569418311, "grad_norm": 0.04799890145659447, "learning_rate": 2.5493709933076593e-07, "loss": 0.0008, "step": 111000 }, { "epoch": 1.8164116828929067, "grad_norm": 0.045996349304914474, "learning_rate": 2.5448716238019145e-07, "loss": 0.0009, "step": 111010 }, { "epoch": 1.8165753088439827, "grad_norm": 0.016494890674948692, "learning_rate": 2.5403761245749783e-07, "loss": 0.0007, "step": 111020 }, { "epoch": 1.8167389347950587, "grad_norm": 0.1004316583275795, "learning_rate": 2.5358844959935016e-07, "loss": 0.0007, "step": 111030 }, { "epoch": 1.8169025607461342, "grad_norm": 0.03347358480095863, "learning_rate": 2.531396738423791e-07, "loss": 0.001, "step": 111040 }, { "epoch": 1.8170661866972102, "grad_norm": 0.024299656972289085, "learning_rate": 2.5269128522318664e-07, "loss": 0.0009, "step": 111050 }, { "epoch": 1.817229812648286, "grad_norm": 0.03240986913442612, "learning_rate": 2.522432837783406e-07, "loss": 0.0006, "step": 111060 }, { "epoch": 1.8173934385993618, "grad_norm": 0.15089312195777893, "learning_rate": 2.5179566954437964e-07, "loss": 0.001, "step": 111070 }, { "epoch": 1.8175570645504378, "grad_norm": 0.024585865437984467, "learning_rate": 2.513484425578083e-07, "loss": 0.0005, "step": 111080 }, { "epoch": 1.8177206905015135, "grad_norm": 0.005272882059216499, "learning_rate": 2.5090160285510257e-07, "loss": 0.0008, "step": 111090 }, { "epoch": 1.8178843164525893, "grad_norm": 0.036688633263111115, "learning_rate": 2.5045515047270375e-07, "loss": 0.0006, "step": 111100 }, { "epoch": 1.8180479424036653, "grad_norm": 0.017682114616036415, "learning_rate": 2.500090854470238e-07, "loss": 0.0014, "step": 111110 }, { "epoch": 1.818211568354741, "grad_norm": 0.03978768736124039, "learning_rate": 2.4956340781444154e-07, "loss": 0.001, "step": 111120 }, { "epoch": 1.8183751943058168, "grad_norm": 0.02109573222696781, "learning_rate": 2.491181176113061e-07, "loss": 0.0006, "step": 111130 }, { "epoch": 1.8185388202568928, "grad_norm": 0.03167502209544182, "learning_rate": 2.4867321487393294e-07, "loss": 0.0009, "step": 111140 }, { "epoch": 1.8187024462079686, "grad_norm": 0.06268315017223358, "learning_rate": 2.482286996386063e-07, "loss": 0.0007, "step": 111150 }, { "epoch": 1.8188660721590444, "grad_norm": 0.025574948638677597, "learning_rate": 2.477845719415811e-07, "loss": 0.0006, "step": 111160 }, { "epoch": 1.8190296981101204, "grad_norm": 0.12344178557395935, "learning_rate": 2.4734083181907677e-07, "loss": 0.0006, "step": 111170 }, { "epoch": 1.8191933240611962, "grad_norm": 0.03598754480481148, "learning_rate": 2.468974793072848e-07, "loss": 0.002, "step": 111180 }, { "epoch": 1.819356950012272, "grad_norm": 0.06256358325481415, "learning_rate": 2.464545144423625e-07, "loss": 0.0014, "step": 111190 }, { "epoch": 1.819520575963348, "grad_norm": 0.018764719367027283, "learning_rate": 2.460119372604375e-07, "loss": 0.0013, "step": 111200 }, { "epoch": 1.8196842019144235, "grad_norm": 0.07342077791690826, "learning_rate": 2.455697477976038e-07, "loss": 0.001, "step": 111210 }, { "epoch": 1.8198478278654995, "grad_norm": 0.1283794790506363, "learning_rate": 2.451279460899264e-07, "loss": 0.0012, "step": 111220 }, { "epoch": 1.8200114538165755, "grad_norm": 0.01647566445171833, "learning_rate": 2.446865321734354e-07, "loss": 0.0009, "step": 111230 }, { "epoch": 1.820175079767651, "grad_norm": 0.03955389931797981, "learning_rate": 2.4424550608413254e-07, "loss": 0.0007, "step": 111240 }, { "epoch": 1.820338705718727, "grad_norm": 0.07133174687623978, "learning_rate": 2.4380486785798516e-07, "loss": 0.0007, "step": 111250 }, { "epoch": 1.8205023316698028, "grad_norm": 0.09807589650154114, "learning_rate": 2.433646175309312e-07, "loss": 0.0016, "step": 111260 }, { "epoch": 1.8206659576208786, "grad_norm": 0.06567030400037766, "learning_rate": 2.4292475513887523e-07, "loss": 0.0012, "step": 111270 }, { "epoch": 1.8208295835719546, "grad_norm": 0.0040667993016541, "learning_rate": 2.424852807176914e-07, "loss": 0.0007, "step": 111280 }, { "epoch": 1.8209932095230303, "grad_norm": 0.01584528759121895, "learning_rate": 2.4204619430322153e-07, "loss": 0.0008, "step": 111290 }, { "epoch": 1.8211568354741061, "grad_norm": 0.025917090475559235, "learning_rate": 2.4160749593127643e-07, "loss": 0.0008, "step": 111300 }, { "epoch": 1.8213204614251821, "grad_norm": 0.05672488361597061, "learning_rate": 2.4116918563763416e-07, "loss": 0.0012, "step": 111310 }, { "epoch": 1.8214840873762579, "grad_norm": 0.043300505727529526, "learning_rate": 2.407312634580422e-07, "loss": 0.0013, "step": 111320 }, { "epoch": 1.8216477133273337, "grad_norm": 0.05439889803528786, "learning_rate": 2.4029372942821594e-07, "loss": 0.0009, "step": 111330 }, { "epoch": 1.8218113392784097, "grad_norm": 0.010111581534147263, "learning_rate": 2.3985658358383956e-07, "loss": 0.0012, "step": 111340 }, { "epoch": 1.8219749652294854, "grad_norm": 0.03371474891901016, "learning_rate": 2.39419825960564e-07, "loss": 0.0015, "step": 111350 }, { "epoch": 1.8221385911805612, "grad_norm": 0.2506891191005707, "learning_rate": 2.389834565940119e-07, "loss": 0.0009, "step": 111360 }, { "epoch": 1.8223022171316372, "grad_norm": 0.08747326582670212, "learning_rate": 2.385474755197692e-07, "loss": 0.0005, "step": 111370 }, { "epoch": 1.8224658430827128, "grad_norm": 0.024355821311473846, "learning_rate": 2.381118827733958e-07, "loss": 0.0019, "step": 111380 }, { "epoch": 1.8226294690337888, "grad_norm": 0.09600850194692612, "learning_rate": 2.3767667839041497e-07, "loss": 0.001, "step": 111390 }, { "epoch": 1.8227930949848647, "grad_norm": 0.02988213486969471, "learning_rate": 2.3724186240632218e-07, "loss": 0.0017, "step": 111400 }, { "epoch": 1.8229567209359403, "grad_norm": 0.03001558594405651, "learning_rate": 2.3680743485657797e-07, "loss": 0.0003, "step": 111410 }, { "epoch": 1.8231203468870163, "grad_norm": 0.155255526304245, "learning_rate": 2.3637339577661454e-07, "loss": 0.0011, "step": 111420 }, { "epoch": 1.823283972838092, "grad_norm": 0.05806509032845497, "learning_rate": 2.359397452018286e-07, "loss": 0.0011, "step": 111430 }, { "epoch": 1.8234475987891678, "grad_norm": 0.05453617498278618, "learning_rate": 2.3550648316758906e-07, "loss": 0.0012, "step": 111440 }, { "epoch": 1.8236112247402438, "grad_norm": 0.024343177676200867, "learning_rate": 2.35073609709231e-07, "loss": 0.001, "step": 111450 }, { "epoch": 1.8237748506913196, "grad_norm": 0.0034968750551342964, "learning_rate": 2.346411248620567e-07, "loss": 0.0015, "step": 111460 }, { "epoch": 1.8239384766423954, "grad_norm": 0.07651791721582413, "learning_rate": 2.3420902866133966e-07, "loss": 0.0005, "step": 111470 }, { "epoch": 1.8241021025934714, "grad_norm": 0.10037540644407272, "learning_rate": 2.337773211423189e-07, "loss": 0.0015, "step": 111480 }, { "epoch": 1.8242657285445472, "grad_norm": 0.036474306136369705, "learning_rate": 2.3334600234020455e-07, "loss": 0.0008, "step": 111490 }, { "epoch": 1.824429354495623, "grad_norm": 0.021350465714931488, "learning_rate": 2.329150722901724e-07, "loss": 0.0005, "step": 111500 }, { "epoch": 1.824592980446699, "grad_norm": 0.002814835635945201, "learning_rate": 2.3248453102736824e-07, "loss": 0.0008, "step": 111510 }, { "epoch": 1.8247566063977747, "grad_norm": 0.004110607318580151, "learning_rate": 2.3205437858690449e-07, "loss": 0.0007, "step": 111520 }, { "epoch": 1.8249202323488505, "grad_norm": 0.047404851764440536, "learning_rate": 2.3162461500386423e-07, "loss": 0.0007, "step": 111530 }, { "epoch": 1.8250838582999265, "grad_norm": 0.07846037298440933, "learning_rate": 2.3119524031329665e-07, "loss": 0.0007, "step": 111540 }, { "epoch": 1.8252474842510023, "grad_norm": 0.03799647465348244, "learning_rate": 2.3076625455022094e-07, "loss": 0.0008, "step": 111550 }, { "epoch": 1.825411110202078, "grad_norm": 0.04996400326490402, "learning_rate": 2.3033765774962247e-07, "loss": 0.0008, "step": 111560 }, { "epoch": 1.825574736153154, "grad_norm": 0.010807087644934654, "learning_rate": 2.2990944994645713e-07, "loss": 0.0006, "step": 111570 }, { "epoch": 1.8257383621042296, "grad_norm": 0.04443098604679108, "learning_rate": 2.2948163117564758e-07, "loss": 0.0009, "step": 111580 }, { "epoch": 1.8259019880553056, "grad_norm": 0.03280111774802208, "learning_rate": 2.2905420147208645e-07, "loss": 0.0008, "step": 111590 }, { "epoch": 1.8260656140063816, "grad_norm": 0.033179961144924164, "learning_rate": 2.2862716087063142e-07, "loss": 0.0013, "step": 111600 }, { "epoch": 1.8262292399574571, "grad_norm": 0.09884998947381973, "learning_rate": 2.2820050940611184e-07, "loss": 0.0009, "step": 111610 }, { "epoch": 1.8263928659085331, "grad_norm": 0.00635836785659194, "learning_rate": 2.2777424711332375e-07, "loss": 0.0004, "step": 111620 }, { "epoch": 1.8265564918596089, "grad_norm": 0.030929584056138992, "learning_rate": 2.2734837402703159e-07, "loss": 0.0015, "step": 111630 }, { "epoch": 1.8267201178106847, "grad_norm": 0.05123552680015564, "learning_rate": 2.2692289018196756e-07, "loss": 0.0009, "step": 111640 }, { "epoch": 1.8268837437617607, "grad_norm": 0.029979132115840912, "learning_rate": 2.2649779561283391e-07, "loss": 0.001, "step": 111650 }, { "epoch": 1.8270473697128364, "grad_norm": 0.06885220855474472, "learning_rate": 2.2607309035429847e-07, "loss": 0.0012, "step": 111660 }, { "epoch": 1.8272109956639122, "grad_norm": 0.1243332028388977, "learning_rate": 2.2564877444100075e-07, "loss": 0.0012, "step": 111670 }, { "epoch": 1.8273746216149882, "grad_norm": 0.10198543965816498, "learning_rate": 2.252248479075436e-07, "loss": 0.002, "step": 111680 }, { "epoch": 1.827538247566064, "grad_norm": 0.08848313987255096, "learning_rate": 2.2480131078850386e-07, "loss": 0.0006, "step": 111690 }, { "epoch": 1.8277018735171398, "grad_norm": 0.03245293349027634, "learning_rate": 2.243781631184222e-07, "loss": 0.0005, "step": 111700 }, { "epoch": 1.8278654994682157, "grad_norm": 0.07604245096445084, "learning_rate": 2.2395540493180989e-07, "loss": 0.0007, "step": 111710 }, { "epoch": 1.8280291254192915, "grad_norm": 0.040595319122076035, "learning_rate": 2.2353303626314438e-07, "loss": 0.0007, "step": 111720 }, { "epoch": 1.8281927513703673, "grad_norm": 0.03918663412332535, "learning_rate": 2.231110571468742e-07, "loss": 0.0008, "step": 111730 }, { "epoch": 1.8283563773214433, "grad_norm": 0.03864748403429985, "learning_rate": 2.226894676174135e-07, "loss": 0.0009, "step": 111740 }, { "epoch": 1.828520003272519, "grad_norm": 0.03836618736386299, "learning_rate": 2.222682677091459e-07, "loss": 0.0007, "step": 111750 }, { "epoch": 1.8286836292235948, "grad_norm": 0.16263221204280853, "learning_rate": 2.2184745745642334e-07, "loss": 0.0016, "step": 111760 }, { "epoch": 1.8288472551746708, "grad_norm": 0.19651682674884796, "learning_rate": 2.214270368935656e-07, "loss": 0.0009, "step": 111770 }, { "epoch": 1.8290108811257464, "grad_norm": 0.030727343633770943, "learning_rate": 2.210070060548608e-07, "loss": 0.0011, "step": 111780 }, { "epoch": 1.8291745070768224, "grad_norm": 0.052766796201467514, "learning_rate": 2.2058736497456436e-07, "loss": 0.0011, "step": 111790 }, { "epoch": 1.8293381330278984, "grad_norm": 0.09465035796165466, "learning_rate": 2.2016811368690217e-07, "loss": 0.0006, "step": 111800 }, { "epoch": 1.829501758978974, "grad_norm": 0.03092537820339203, "learning_rate": 2.197492522260658e-07, "loss": 0.0008, "step": 111810 }, { "epoch": 1.82966538493005, "grad_norm": 0.02302798442542553, "learning_rate": 2.1933078062621682e-07, "loss": 0.0008, "step": 111820 }, { "epoch": 1.8298290108811257, "grad_norm": 0.10573314875364304, "learning_rate": 2.1891269892148348e-07, "loss": 0.001, "step": 111830 }, { "epoch": 1.8299926368322015, "grad_norm": 0.02152593806385994, "learning_rate": 2.184950071459646e-07, "loss": 0.0012, "step": 111840 }, { "epoch": 1.8301562627832775, "grad_norm": 0.03256351500749588, "learning_rate": 2.1807770533372462e-07, "loss": 0.0007, "step": 111850 }, { "epoch": 1.8303198887343533, "grad_norm": 0.009935869835317135, "learning_rate": 2.1766079351879743e-07, "loss": 0.0015, "step": 111860 }, { "epoch": 1.830483514685429, "grad_norm": 0.09688275307416916, "learning_rate": 2.172442717351847e-07, "loss": 0.0008, "step": 111870 }, { "epoch": 1.830647140636505, "grad_norm": 0.012640787288546562, "learning_rate": 2.168281400168576e-07, "loss": 0.0008, "step": 111880 }, { "epoch": 1.8308107665875808, "grad_norm": 0.033625803887844086, "learning_rate": 2.1641239839775286e-07, "loss": 0.0008, "step": 111890 }, { "epoch": 1.8309743925386566, "grad_norm": 0.018425360321998596, "learning_rate": 2.159970469117778e-07, "loss": 0.0014, "step": 111900 }, { "epoch": 1.8311380184897326, "grad_norm": 0.12633606791496277, "learning_rate": 2.1558208559280701e-07, "loss": 0.001, "step": 111910 }, { "epoch": 1.8313016444408083, "grad_norm": 0.04302993416786194, "learning_rate": 2.1516751447468398e-07, "loss": 0.0007, "step": 111920 }, { "epoch": 1.8314652703918841, "grad_norm": 0.023784073069691658, "learning_rate": 2.147533335912183e-07, "loss": 0.0008, "step": 111930 }, { "epoch": 1.8316288963429601, "grad_norm": 0.009703692980110645, "learning_rate": 2.1433954297619076e-07, "loss": 0.0007, "step": 111940 }, { "epoch": 1.8317925222940359, "grad_norm": 0.07528490573167801, "learning_rate": 2.139261426633471e-07, "loss": 0.0015, "step": 111950 }, { "epoch": 1.8319561482451117, "grad_norm": 0.004625246860086918, "learning_rate": 2.1351313268640373e-07, "loss": 0.0009, "step": 111960 }, { "epoch": 1.8321197741961877, "grad_norm": 0.1257781982421875, "learning_rate": 2.1310051307904422e-07, "loss": 0.0012, "step": 111970 }, { "epoch": 1.8322834001472632, "grad_norm": 0.12225418537855148, "learning_rate": 2.1268828387492114e-07, "loss": 0.0013, "step": 111980 }, { "epoch": 1.8324470260983392, "grad_norm": 0.019057348370552063, "learning_rate": 2.1227644510765255e-07, "loss": 0.001, "step": 111990 }, { "epoch": 1.8326106520494152, "grad_norm": 0.03979181498289108, "learning_rate": 2.1186499681082884e-07, "loss": 0.0006, "step": 112000 }, { "epoch": 1.8327742780004908, "grad_norm": 0.04923700913786888, "learning_rate": 2.1145393901800482e-07, "loss": 0.0018, "step": 112010 }, { "epoch": 1.8329379039515667, "grad_norm": 0.028687067329883575, "learning_rate": 2.110432717627059e-07, "loss": 0.0008, "step": 112020 }, { "epoch": 1.8331015299026425, "grad_norm": 0.07212336361408234, "learning_rate": 2.1063299507842362e-07, "loss": 0.0012, "step": 112030 }, { "epoch": 1.8332651558537183, "grad_norm": 0.04509720206260681, "learning_rate": 2.1022310899862065e-07, "loss": 0.0011, "step": 112040 }, { "epoch": 1.8334287818047943, "grad_norm": 0.018241552636027336, "learning_rate": 2.0981361355672358e-07, "loss": 0.0007, "step": 112050 }, { "epoch": 1.83359240775587, "grad_norm": 0.02044142596423626, "learning_rate": 2.0940450878613073e-07, "loss": 0.0006, "step": 112060 }, { "epoch": 1.8337560337069458, "grad_norm": 0.04604267328977585, "learning_rate": 2.0899579472020704e-07, "loss": 0.0012, "step": 112070 }, { "epoch": 1.8339196596580218, "grad_norm": 0.10461519658565521, "learning_rate": 2.0858747139228698e-07, "loss": 0.0009, "step": 112080 }, { "epoch": 1.8340832856090976, "grad_norm": 0.048821017146110535, "learning_rate": 2.0817953883566944e-07, "loss": 0.0008, "step": 112090 }, { "epoch": 1.8342469115601734, "grad_norm": 0.07163149863481522, "learning_rate": 2.0777199708362672e-07, "loss": 0.0007, "step": 112100 }, { "epoch": 1.8344105375112494, "grad_norm": 0.06433001905679703, "learning_rate": 2.07364846169395e-07, "loss": 0.0009, "step": 112110 }, { "epoch": 1.8345741634623252, "grad_norm": 0.10736479610204697, "learning_rate": 2.069580861261805e-07, "loss": 0.0019, "step": 112120 }, { "epoch": 1.834737789413401, "grad_norm": 0.16886258125305176, "learning_rate": 2.0655171698715725e-07, "loss": 0.0011, "step": 112130 }, { "epoch": 1.834901415364477, "grad_norm": 0.014089349657297134, "learning_rate": 2.0614573878546652e-07, "loss": 0.0006, "step": 112140 }, { "epoch": 1.8350650413155525, "grad_norm": 0.059346482157707214, "learning_rate": 2.0574015155422012e-07, "loss": 0.001, "step": 112150 }, { "epoch": 1.8352286672666285, "grad_norm": 0.02900175005197525, "learning_rate": 2.0533495532649494e-07, "loss": 0.0006, "step": 112160 }, { "epoch": 1.8353922932177045, "grad_norm": 0.03774087876081467, "learning_rate": 2.0493015013533846e-07, "loss": 0.0012, "step": 112170 }, { "epoch": 1.83555591916878, "grad_norm": 0.004515171051025391, "learning_rate": 2.0452573601376369e-07, "loss": 0.001, "step": 112180 }, { "epoch": 1.835719545119856, "grad_norm": 0.08888539671897888, "learning_rate": 2.0412171299475535e-07, "loss": 0.0009, "step": 112190 }, { "epoch": 1.8358831710709318, "grad_norm": 0.0273519828915596, "learning_rate": 2.0371808111126212e-07, "loss": 0.0005, "step": 112200 }, { "epoch": 1.8360467970220076, "grad_norm": 0.07537045329809189, "learning_rate": 2.0331484039620487e-07, "loss": 0.001, "step": 112210 }, { "epoch": 1.8362104229730836, "grad_norm": 0.0029988503083586693, "learning_rate": 2.0291199088246894e-07, "loss": 0.0005, "step": 112220 }, { "epoch": 1.8363740489241593, "grad_norm": 0.006366785615682602, "learning_rate": 2.025095326029103e-07, "loss": 0.0005, "step": 112230 }, { "epoch": 1.8365376748752351, "grad_norm": 0.03925633803009987, "learning_rate": 2.0210746559035099e-07, "loss": 0.0009, "step": 112240 }, { "epoch": 1.8367013008263111, "grad_norm": 0.043429989367723465, "learning_rate": 2.017057898775837e-07, "loss": 0.0007, "step": 112250 }, { "epoch": 1.8368649267773869, "grad_norm": 0.022378094494342804, "learning_rate": 2.0130450549736668e-07, "loss": 0.0009, "step": 112260 }, { "epoch": 1.8370285527284627, "grad_norm": 0.07322119176387787, "learning_rate": 2.0090361248242762e-07, "loss": 0.0004, "step": 112270 }, { "epoch": 1.8371921786795387, "grad_norm": 0.021937793120741844, "learning_rate": 2.0050311086546148e-07, "loss": 0.0012, "step": 112280 }, { "epoch": 1.8373558046306144, "grad_norm": 0.009467998519539833, "learning_rate": 2.0010300067913323e-07, "loss": 0.0008, "step": 112290 }, { "epoch": 1.8375194305816902, "grad_norm": 0.006907562725245953, "learning_rate": 1.997032819560729e-07, "loss": 0.0006, "step": 112300 }, { "epoch": 1.8376830565327662, "grad_norm": 0.6138444542884827, "learning_rate": 1.9930395472888163e-07, "loss": 0.0009, "step": 112310 }, { "epoch": 1.837846682483842, "grad_norm": 0.04467267543077469, "learning_rate": 1.989050190301256e-07, "loss": 0.0009, "step": 112320 }, { "epoch": 1.8380103084349178, "grad_norm": 0.04853655397891998, "learning_rate": 1.985064748923421e-07, "loss": 0.0008, "step": 112330 }, { "epoch": 1.8381739343859937, "grad_norm": 0.061252862215042114, "learning_rate": 1.98108322348034e-07, "loss": 0.0007, "step": 112340 }, { "epoch": 1.8383375603370693, "grad_norm": 0.03716979920864105, "learning_rate": 1.977105614296737e-07, "loss": 0.0004, "step": 112350 }, { "epoch": 1.8385011862881453, "grad_norm": 0.0739227905869484, "learning_rate": 1.9731319216970135e-07, "loss": 0.0011, "step": 112360 }, { "epoch": 1.8386648122392213, "grad_norm": 0.03022587299346924, "learning_rate": 1.969162146005249e-07, "loss": 0.0006, "step": 112370 }, { "epoch": 1.8388284381902968, "grad_norm": 0.026162147521972656, "learning_rate": 1.9651962875452069e-07, "loss": 0.0007, "step": 112380 }, { "epoch": 1.8389920641413728, "grad_norm": 0.027955437079072, "learning_rate": 1.9612343466403227e-07, "loss": 0.0005, "step": 112390 }, { "epoch": 1.8391556900924486, "grad_norm": 0.13000430166721344, "learning_rate": 1.9572763236137272e-07, "loss": 0.0011, "step": 112400 }, { "epoch": 1.8393193160435244, "grad_norm": 0.01655576564371586, "learning_rate": 1.9533222187882172e-07, "loss": 0.0006, "step": 112410 }, { "epoch": 1.8394829419946004, "grad_norm": 0.025672340765595436, "learning_rate": 1.9493720324862854e-07, "loss": 0.0007, "step": 112420 }, { "epoch": 1.8396465679456762, "grad_norm": 0.002498106099665165, "learning_rate": 1.9454257650300733e-07, "loss": 0.0011, "step": 112430 }, { "epoch": 1.839810193896752, "grad_norm": 0.0408705435693264, "learning_rate": 1.941483416741452e-07, "loss": 0.0012, "step": 112440 }, { "epoch": 1.839973819847828, "grad_norm": 0.03590615466237068, "learning_rate": 1.9375449879419306e-07, "loss": 0.0009, "step": 112450 }, { "epoch": 1.8401374457989037, "grad_norm": 0.004171170759946108, "learning_rate": 1.933610478952719e-07, "loss": 0.0007, "step": 112460 }, { "epoch": 1.8403010717499795, "grad_norm": 0.014536440372467041, "learning_rate": 1.9296798900946935e-07, "loss": 0.0003, "step": 112470 }, { "epoch": 1.8404646977010555, "grad_norm": 0.08026809245347977, "learning_rate": 1.9257532216884368e-07, "loss": 0.0009, "step": 112480 }, { "epoch": 1.8406283236521312, "grad_norm": 0.02323959954082966, "learning_rate": 1.921830474054176e-07, "loss": 0.001, "step": 112490 }, { "epoch": 1.840791949603207, "grad_norm": 0.03691117838025093, "learning_rate": 1.91791164751185e-07, "loss": 0.0006, "step": 112500 }, { "epoch": 1.840955575554283, "grad_norm": 0.016201648861169815, "learning_rate": 1.913996742381058e-07, "loss": 0.0004, "step": 112510 }, { "epoch": 1.8411192015053588, "grad_norm": 0.040413998067379, "learning_rate": 1.9100857589810952e-07, "loss": 0.0011, "step": 112520 }, { "epoch": 1.8412828274564346, "grad_norm": 0.03945711627602577, "learning_rate": 1.9061786976309116e-07, "loss": 0.0007, "step": 112530 }, { "epoch": 1.8414464534075106, "grad_norm": 0.013471710495650768, "learning_rate": 1.9022755586491747e-07, "loss": 0.0018, "step": 112540 }, { "epoch": 1.8416100793585861, "grad_norm": 0.006731478031724691, "learning_rate": 1.8983763423541913e-07, "loss": 0.0008, "step": 112550 }, { "epoch": 1.8417737053096621, "grad_norm": 0.04281238839030266, "learning_rate": 1.8944810490639897e-07, "loss": 0.0009, "step": 112560 }, { "epoch": 1.841937331260738, "grad_norm": 0.07378807663917542, "learning_rate": 1.890589679096233e-07, "loss": 0.0007, "step": 112570 }, { "epoch": 1.8421009572118137, "grad_norm": 0.09281789511442184, "learning_rate": 1.886702232768306e-07, "loss": 0.0005, "step": 112580 }, { "epoch": 1.8422645831628897, "grad_norm": 0.02334589883685112, "learning_rate": 1.882818710397244e-07, "loss": 0.0015, "step": 112590 }, { "epoch": 1.8424282091139654, "grad_norm": 0.02348475158214569, "learning_rate": 1.8789391122997879e-07, "loss": 0.0009, "step": 112600 }, { "epoch": 1.8425918350650412, "grad_norm": 0.09792564064264297, "learning_rate": 1.875063438792324e-07, "loss": 0.0015, "step": 112610 }, { "epoch": 1.8427554610161172, "grad_norm": 0.12522895634174347, "learning_rate": 1.87119169019096e-07, "loss": 0.001, "step": 112620 }, { "epoch": 1.842919086967193, "grad_norm": 0.05113855004310608, "learning_rate": 1.867323866811449e-07, "loss": 0.0009, "step": 112630 }, { "epoch": 1.8430827129182688, "grad_norm": 0.12167287617921829, "learning_rate": 1.8634599689692502e-07, "loss": 0.0006, "step": 112640 }, { "epoch": 1.8432463388693447, "grad_norm": 0.02742084674537182, "learning_rate": 1.859599996979472e-07, "loss": 0.0008, "step": 112650 }, { "epoch": 1.8434099648204205, "grad_norm": 0.010081775486469269, "learning_rate": 1.8557439511569353e-07, "loss": 0.001, "step": 112660 }, { "epoch": 1.8435735907714963, "grad_norm": 0.306482195854187, "learning_rate": 1.851891831816116e-07, "loss": 0.0008, "step": 112670 }, { "epoch": 1.8437372167225723, "grad_norm": 0.044563211500644684, "learning_rate": 1.848043639271191e-07, "loss": 0.0015, "step": 112680 }, { "epoch": 1.843900842673648, "grad_norm": 0.005889760795980692, "learning_rate": 1.844199373835992e-07, "loss": 0.0008, "step": 112690 }, { "epoch": 1.8440644686247238, "grad_norm": 0.05091425031423569, "learning_rate": 1.8403590358240576e-07, "loss": 0.0016, "step": 112700 }, { "epoch": 1.8442280945757998, "grad_norm": 0.03527555987238884, "learning_rate": 1.8365226255485868e-07, "loss": 0.0006, "step": 112710 }, { "epoch": 1.8443917205268756, "grad_norm": 0.034672170877456665, "learning_rate": 1.8326901433224631e-07, "loss": 0.0004, "step": 112720 }, { "epoch": 1.8445553464779514, "grad_norm": 0.011133643798530102, "learning_rate": 1.828861589458253e-07, "loss": 0.0009, "step": 112730 }, { "epoch": 1.8447189724290274, "grad_norm": 0.027846060693264008, "learning_rate": 1.8250369642682007e-07, "loss": 0.0012, "step": 112740 }, { "epoch": 1.844882598380103, "grad_norm": 0.04411618411540985, "learning_rate": 1.8212162680642297e-07, "loss": 0.0005, "step": 112750 }, { "epoch": 1.845046224331179, "grad_norm": 0.08452106267213821, "learning_rate": 1.817399501157935e-07, "loss": 0.0008, "step": 112760 }, { "epoch": 1.845209850282255, "grad_norm": 0.0366785041987896, "learning_rate": 1.8135866638606114e-07, "loss": 0.0009, "step": 112770 }, { "epoch": 1.8453734762333305, "grad_norm": 0.15057390928268433, "learning_rate": 1.8097777564832053e-07, "loss": 0.0005, "step": 112780 }, { "epoch": 1.8455371021844065, "grad_norm": 0.15456070005893707, "learning_rate": 1.805972779336379e-07, "loss": 0.0009, "step": 112790 }, { "epoch": 1.8457007281354822, "grad_norm": 0.054340820759534836, "learning_rate": 1.80217173273044e-07, "loss": 0.0015, "step": 112800 }, { "epoch": 1.845864354086558, "grad_norm": 0.091983363032341, "learning_rate": 1.7983746169753902e-07, "loss": 0.0005, "step": 112810 }, { "epoch": 1.846027980037634, "grad_norm": 0.02596302516758442, "learning_rate": 1.7945814323809096e-07, "loss": 0.0006, "step": 112820 }, { "epoch": 1.8461916059887098, "grad_norm": 0.026148518547415733, "learning_rate": 1.790792179256362e-07, "loss": 0.0006, "step": 112830 }, { "epoch": 1.8463552319397856, "grad_norm": 0.0032644644379615784, "learning_rate": 1.7870068579107724e-07, "loss": 0.0014, "step": 112840 }, { "epoch": 1.8465188578908616, "grad_norm": 0.03951422870159149, "learning_rate": 1.7832254686528828e-07, "loss": 0.0007, "step": 112850 }, { "epoch": 1.8466824838419373, "grad_norm": 0.011702458374202251, "learning_rate": 1.779448011791063e-07, "loss": 0.0005, "step": 112860 }, { "epoch": 1.8468461097930131, "grad_norm": 0.08894037455320358, "learning_rate": 1.7756744876334165e-07, "loss": 0.0016, "step": 112870 }, { "epoch": 1.847009735744089, "grad_norm": 0.07927866280078888, "learning_rate": 1.771904896487675e-07, "loss": 0.0007, "step": 112880 }, { "epoch": 1.8471733616951649, "grad_norm": 0.05027158930897713, "learning_rate": 1.7681392386612872e-07, "loss": 0.0008, "step": 112890 }, { "epoch": 1.8473369876462407, "grad_norm": 0.04608063027262688, "learning_rate": 1.7643775144613628e-07, "loss": 0.0011, "step": 112900 }, { "epoch": 1.8475006135973167, "grad_norm": 0.05112637206912041, "learning_rate": 1.760619724194701e-07, "loss": 0.0013, "step": 112910 }, { "epoch": 1.8476642395483924, "grad_norm": 0.03093136101961136, "learning_rate": 1.756865868167762e-07, "loss": 0.0008, "step": 112920 }, { "epoch": 1.8478278654994682, "grad_norm": 0.01358999777585268, "learning_rate": 1.753115946686712e-07, "loss": 0.001, "step": 112930 }, { "epoch": 1.8479914914505442, "grad_norm": 0.05658520385622978, "learning_rate": 1.7493699600573734e-07, "loss": 0.0008, "step": 112940 }, { "epoch": 1.8481551174016198, "grad_norm": 0.00327929574996233, "learning_rate": 1.7456279085852624e-07, "loss": 0.0003, "step": 112950 }, { "epoch": 1.8483187433526957, "grad_norm": 0.017410041764378548, "learning_rate": 1.741889792575552e-07, "loss": 0.0009, "step": 112960 }, { "epoch": 1.8484823693037717, "grad_norm": 0.06320864707231522, "learning_rate": 1.7381556123331366e-07, "loss": 0.0006, "step": 112970 }, { "epoch": 1.8486459952548473, "grad_norm": 0.10688390582799911, "learning_rate": 1.7344253681625345e-07, "loss": 0.0012, "step": 112980 }, { "epoch": 1.8488096212059233, "grad_norm": 0.1625424474477768, "learning_rate": 1.7306990603679964e-07, "loss": 0.0006, "step": 112990 }, { "epoch": 1.848973247156999, "grad_norm": 0.03767475113272667, "learning_rate": 1.7269766892534124e-07, "loss": 0.0006, "step": 113000 }, { "epoch": 1.8491368731080748, "grad_norm": 0.06444856524467468, "learning_rate": 1.723258255122373e-07, "loss": 0.0016, "step": 113010 }, { "epoch": 1.8493004990591508, "grad_norm": 0.012578886933624744, "learning_rate": 1.7195437582781304e-07, "loss": 0.0006, "step": 113020 }, { "epoch": 1.8494641250102266, "grad_norm": 0.012308084405958652, "learning_rate": 1.715833199023642e-07, "loss": 0.0006, "step": 113030 }, { "epoch": 1.8496277509613024, "grad_norm": 0.019648972898721695, "learning_rate": 1.7121265776615214e-07, "loss": 0.0004, "step": 113040 }, { "epoch": 1.8497913769123784, "grad_norm": 0.04874153062701225, "learning_rate": 1.7084238944940656e-07, "loss": 0.0008, "step": 113050 }, { "epoch": 1.8499550028634542, "grad_norm": 0.02440950646996498, "learning_rate": 1.7047251498232553e-07, "loss": 0.0009, "step": 113060 }, { "epoch": 1.85011862881453, "grad_norm": 0.029458576813340187, "learning_rate": 1.7010303439507492e-07, "loss": 0.0007, "step": 113070 }, { "epoch": 1.850282254765606, "grad_norm": 0.0833294466137886, "learning_rate": 1.6973394771778784e-07, "loss": 0.0008, "step": 113080 }, { "epoch": 1.8504458807166817, "grad_norm": 0.07105235755443573, "learning_rate": 1.6936525498056576e-07, "loss": 0.0012, "step": 113090 }, { "epoch": 1.8506095066677575, "grad_norm": 0.008253823965787888, "learning_rate": 1.689969562134791e-07, "loss": 0.0012, "step": 113100 }, { "epoch": 1.8507731326188335, "grad_norm": 0.08622777462005615, "learning_rate": 1.6862905144656328e-07, "loss": 0.0011, "step": 113110 }, { "epoch": 1.850936758569909, "grad_norm": 0.06377329677343369, "learning_rate": 1.682615407098248e-07, "loss": 0.0011, "step": 113120 }, { "epoch": 1.851100384520985, "grad_norm": 0.07756470888853073, "learning_rate": 1.678944240332353e-07, "loss": 0.0008, "step": 113130 }, { "epoch": 1.851264010472061, "grad_norm": 0.0046451096422970295, "learning_rate": 1.6752770144673748e-07, "loss": 0.0008, "step": 113140 }, { "epoch": 1.8514276364231366, "grad_norm": 0.016653910279273987, "learning_rate": 1.6716137298023793e-07, "loss": 0.0006, "step": 113150 }, { "epoch": 1.8515912623742126, "grad_norm": 0.011719155125319958, "learning_rate": 1.6679543866361448e-07, "loss": 0.0004, "step": 113160 }, { "epoch": 1.8517548883252883, "grad_norm": 0.04671384394168854, "learning_rate": 1.66429898526711e-07, "loss": 0.001, "step": 113170 }, { "epoch": 1.8519185142763641, "grad_norm": 0.012840651907026768, "learning_rate": 1.6606475259933973e-07, "loss": 0.0013, "step": 113180 }, { "epoch": 1.85208214022744, "grad_norm": 0.02137523889541626, "learning_rate": 1.657000009112808e-07, "loss": 0.0003, "step": 113190 }, { "epoch": 1.8522457661785159, "grad_norm": 0.08299165219068527, "learning_rate": 1.6533564349228204e-07, "loss": 0.0007, "step": 113200 }, { "epoch": 1.8524093921295917, "grad_norm": 0.039249010384082794, "learning_rate": 1.6497168037205912e-07, "loss": 0.0007, "step": 113210 }, { "epoch": 1.8525730180806677, "grad_norm": 0.09357035160064697, "learning_rate": 1.646081115802961e-07, "loss": 0.0007, "step": 113220 }, { "epoch": 1.8527366440317434, "grad_norm": 0.15707960724830627, "learning_rate": 1.642449371466437e-07, "loss": 0.0013, "step": 113230 }, { "epoch": 1.8529002699828192, "grad_norm": 0.004865937866270542, "learning_rate": 1.638821571007221e-07, "loss": 0.0013, "step": 113240 }, { "epoch": 1.8530638959338952, "grad_norm": 0.0739060640335083, "learning_rate": 1.6351977147211707e-07, "loss": 0.001, "step": 113250 }, { "epoch": 1.853227521884971, "grad_norm": 0.043929535895586014, "learning_rate": 1.6315778029038498e-07, "loss": 0.0007, "step": 113260 }, { "epoch": 1.8533911478360467, "grad_norm": 0.024959269911050797, "learning_rate": 1.6279618358504777e-07, "loss": 0.0008, "step": 113270 }, { "epoch": 1.8535547737871227, "grad_norm": 0.09419511258602142, "learning_rate": 1.624349813855963e-07, "loss": 0.0012, "step": 113280 }, { "epoch": 1.8537183997381985, "grad_norm": 0.0299326553940773, "learning_rate": 1.620741737214887e-07, "loss": 0.001, "step": 113290 }, { "epoch": 1.8538820256892743, "grad_norm": 0.23981264233589172, "learning_rate": 1.6171376062215194e-07, "loss": 0.0008, "step": 113300 }, { "epoch": 1.8540456516403503, "grad_norm": 0.21001379191875458, "learning_rate": 1.613537421169792e-07, "loss": 0.001, "step": 113310 }, { "epoch": 1.8542092775914258, "grad_norm": 0.0021135404240339994, "learning_rate": 1.6099411823533307e-07, "loss": 0.0007, "step": 113320 }, { "epoch": 1.8543729035425018, "grad_norm": 0.029162239283323288, "learning_rate": 1.6063488900654233e-07, "loss": 0.001, "step": 113330 }, { "epoch": 1.8545365294935778, "grad_norm": 0.06761214882135391, "learning_rate": 1.6027605445990579e-07, "loss": 0.0008, "step": 113340 }, { "epoch": 1.8547001554446534, "grad_norm": 0.04418284446001053, "learning_rate": 1.599176146246878e-07, "loss": 0.0011, "step": 113350 }, { "epoch": 1.8548637813957294, "grad_norm": 0.12860621511936188, "learning_rate": 1.595595695301222e-07, "loss": 0.0008, "step": 113360 }, { "epoch": 1.8550274073468052, "grad_norm": 0.05377534031867981, "learning_rate": 1.592019192054084e-07, "loss": 0.0007, "step": 113370 }, { "epoch": 1.855191033297881, "grad_norm": 0.011231674812734127, "learning_rate": 1.588446636797175e-07, "loss": 0.0004, "step": 113380 }, { "epoch": 1.855354659248957, "grad_norm": 0.0036793563049286604, "learning_rate": 1.5848780298218458e-07, "loss": 0.0009, "step": 113390 }, { "epoch": 1.8555182852000327, "grad_norm": 0.03789239376783371, "learning_rate": 1.5813133714191352e-07, "loss": 0.0003, "step": 113400 }, { "epoch": 1.8556819111511085, "grad_norm": 0.03873893618583679, "learning_rate": 1.577752661879778e-07, "loss": 0.0004, "step": 113410 }, { "epoch": 1.8558455371021845, "grad_norm": 0.043267808854579926, "learning_rate": 1.5741959014941578e-07, "loss": 0.0016, "step": 113420 }, { "epoch": 1.8560091630532602, "grad_norm": 0.009245689027011395, "learning_rate": 1.5706430905523708e-07, "loss": 0.0008, "step": 113430 }, { "epoch": 1.856172789004336, "grad_norm": 0.03791176527738571, "learning_rate": 1.5670942293441572e-07, "loss": 0.0011, "step": 113440 }, { "epoch": 1.856336414955412, "grad_norm": 0.07527469098567963, "learning_rate": 1.563549318158958e-07, "loss": 0.0011, "step": 113450 }, { "epoch": 1.8565000409064878, "grad_norm": 0.03289582580327988, "learning_rate": 1.5600083572858805e-07, "loss": 0.0004, "step": 113460 }, { "epoch": 1.8566636668575636, "grad_norm": 0.08498571068048477, "learning_rate": 1.556471347013716e-07, "loss": 0.0008, "step": 113470 }, { "epoch": 1.8568272928086396, "grad_norm": 0.026511432603001595, "learning_rate": 1.552938287630923e-07, "loss": 0.0011, "step": 113480 }, { "epoch": 1.8569909187597153, "grad_norm": 0.018651818856596947, "learning_rate": 1.5494091794256593e-07, "loss": 0.001, "step": 113490 }, { "epoch": 1.857154544710791, "grad_norm": 0.084585040807724, "learning_rate": 1.545884022685734e-07, "loss": 0.001, "step": 113500 }, { "epoch": 1.857318170661867, "grad_norm": 0.01497381180524826, "learning_rate": 1.542362817698656e-07, "loss": 0.0006, "step": 113510 }, { "epoch": 1.8574817966129427, "grad_norm": 0.005593966227024794, "learning_rate": 1.538845564751601e-07, "loss": 0.0009, "step": 113520 }, { "epoch": 1.8576454225640187, "grad_norm": 0.037745069712400436, "learning_rate": 1.5353322641314228e-07, "loss": 0.0006, "step": 113530 }, { "epoch": 1.8578090485150947, "grad_norm": 0.04144706949591637, "learning_rate": 1.5318229161246477e-07, "loss": 0.0005, "step": 113540 }, { "epoch": 1.8579726744661702, "grad_norm": 0.01671553961932659, "learning_rate": 1.5283175210174973e-07, "loss": 0.001, "step": 113550 }, { "epoch": 1.8581363004172462, "grad_norm": 0.01007680781185627, "learning_rate": 1.5248160790958533e-07, "loss": 0.0012, "step": 113560 }, { "epoch": 1.858299926368322, "grad_norm": 0.04720287770032883, "learning_rate": 1.5213185906452876e-07, "loss": 0.0007, "step": 113570 }, { "epoch": 1.8584635523193977, "grad_norm": 0.002298968145623803, "learning_rate": 1.5178250559510333e-07, "loss": 0.001, "step": 113580 }, { "epoch": 1.8586271782704737, "grad_norm": 0.059410396963357925, "learning_rate": 1.5143354752980233e-07, "loss": 0.0004, "step": 113590 }, { "epoch": 1.8587908042215495, "grad_norm": 0.061401356011629105, "learning_rate": 1.5108498489708412e-07, "loss": 0.001, "step": 113600 }, { "epoch": 1.8589544301726253, "grad_norm": 0.028765061870217323, "learning_rate": 1.5073681772537763e-07, "loss": 0.0009, "step": 113610 }, { "epoch": 1.8591180561237013, "grad_norm": 0.023047300055623055, "learning_rate": 1.5038904604307736e-07, "loss": 0.001, "step": 113620 }, { "epoch": 1.859281682074777, "grad_norm": 0.046529725193977356, "learning_rate": 1.500416698785473e-07, "loss": 0.0007, "step": 113630 }, { "epoch": 1.8594453080258528, "grad_norm": 0.018830517306923866, "learning_rate": 1.49694689260117e-07, "loss": 0.0007, "step": 113640 }, { "epoch": 1.8596089339769288, "grad_norm": 0.028549816459417343, "learning_rate": 1.4934810421608602e-07, "loss": 0.0005, "step": 113650 }, { "epoch": 1.8597725599280046, "grad_norm": 0.11096290498971939, "learning_rate": 1.4900191477472014e-07, "loss": 0.0012, "step": 113660 }, { "epoch": 1.8599361858790804, "grad_norm": 0.09251801669597626, "learning_rate": 1.486561209642534e-07, "loss": 0.0006, "step": 113670 }, { "epoch": 1.8600998118301564, "grad_norm": 0.02776389569044113, "learning_rate": 1.483107228128877e-07, "loss": 0.0008, "step": 113680 }, { "epoch": 1.8602634377812322, "grad_norm": 0.03326091915369034, "learning_rate": 1.479657203487933e-07, "loss": 0.0008, "step": 113690 }, { "epoch": 1.860427063732308, "grad_norm": 0.008021118119359016, "learning_rate": 1.47621113600106e-07, "loss": 0.0008, "step": 113700 }, { "epoch": 1.860590689683384, "grad_norm": 0.011117805726826191, "learning_rate": 1.4727690259493222e-07, "loss": 0.0007, "step": 113710 }, { "epoch": 1.8607543156344595, "grad_norm": 0.01644272170960903, "learning_rate": 1.4693308736134336e-07, "loss": 0.001, "step": 113720 }, { "epoch": 1.8609179415855355, "grad_norm": 0.0393107607960701, "learning_rate": 1.4658966792738038e-07, "loss": 0.0009, "step": 113730 }, { "epoch": 1.8610815675366115, "grad_norm": 0.03911082074046135, "learning_rate": 1.462466443210514e-07, "loss": 0.0009, "step": 113740 }, { "epoch": 1.861245193487687, "grad_norm": 0.026783360168337822, "learning_rate": 1.459040165703318e-07, "loss": 0.001, "step": 113750 }, { "epoch": 1.861408819438763, "grad_norm": 0.04758188873529434, "learning_rate": 1.4556178470316652e-07, "loss": 0.0006, "step": 113760 }, { "epoch": 1.8615724453898388, "grad_norm": 0.07427937537431717, "learning_rate": 1.452199487474648e-07, "loss": 0.0014, "step": 113770 }, { "epoch": 1.8617360713409146, "grad_norm": 0.06645882874727249, "learning_rate": 1.448785087311072e-07, "loss": 0.0007, "step": 113780 }, { "epoch": 1.8618996972919906, "grad_norm": 0.03985673934221268, "learning_rate": 1.4453746468193975e-07, "loss": 0.0012, "step": 113790 }, { "epoch": 1.8620633232430663, "grad_norm": 0.04946216195821762, "learning_rate": 1.4419681662777685e-07, "loss": 0.0019, "step": 113800 }, { "epoch": 1.8622269491941421, "grad_norm": 0.0020157562103122473, "learning_rate": 1.4385656459640074e-07, "loss": 0.0007, "step": 113810 }, { "epoch": 1.862390575145218, "grad_norm": 0.04747512564063072, "learning_rate": 1.4351670861556145e-07, "loss": 0.0007, "step": 113820 }, { "epoch": 1.8625542010962939, "grad_norm": 0.037427473813295364, "learning_rate": 1.4317724871297623e-07, "loss": 0.0005, "step": 113830 }, { "epoch": 1.8627178270473697, "grad_norm": 0.13379958271980286, "learning_rate": 1.4283818491633017e-07, "loss": 0.0009, "step": 113840 }, { "epoch": 1.8628814529984457, "grad_norm": 0.035843439400196075, "learning_rate": 1.4249951725327615e-07, "loss": 0.0009, "step": 113850 }, { "epoch": 1.8630450789495214, "grad_norm": 0.048810385167598724, "learning_rate": 1.4216124575143486e-07, "loss": 0.0004, "step": 113860 }, { "epoch": 1.8632087049005972, "grad_norm": 0.02390442043542862, "learning_rate": 1.4182337043839477e-07, "loss": 0.0007, "step": 113870 }, { "epoch": 1.8633723308516732, "grad_norm": 0.0021288383286446333, "learning_rate": 1.4148589134171165e-07, "loss": 0.0005, "step": 113880 }, { "epoch": 1.8635359568027487, "grad_norm": 0.06789467483758926, "learning_rate": 1.4114880848890843e-07, "loss": 0.0007, "step": 113890 }, { "epoch": 1.8636995827538247, "grad_norm": 0.028271473944187164, "learning_rate": 1.4081212190747816e-07, "loss": 0.0011, "step": 113900 }, { "epoch": 1.8638632087049007, "grad_norm": 0.03492870554327965, "learning_rate": 1.4047583162487832e-07, "loss": 0.0006, "step": 113910 }, { "epoch": 1.8640268346559763, "grad_norm": 0.003272128524258733, "learning_rate": 1.401399376685364e-07, "loss": 0.0016, "step": 113920 }, { "epoch": 1.8641904606070523, "grad_norm": 0.08852490037679672, "learning_rate": 1.3980444006584605e-07, "loss": 0.0014, "step": 113930 }, { "epoch": 1.864354086558128, "grad_norm": 0.027692250907421112, "learning_rate": 1.3946933884417034e-07, "loss": 0.0009, "step": 113940 }, { "epoch": 1.8645177125092038, "grad_norm": 0.010999278165400028, "learning_rate": 1.3913463403083748e-07, "loss": 0.0009, "step": 113950 }, { "epoch": 1.8646813384602798, "grad_norm": 0.03792864456772804, "learning_rate": 1.3880032565314672e-07, "loss": 0.0006, "step": 113960 }, { "epoch": 1.8648449644113556, "grad_norm": 0.047340016812086105, "learning_rate": 1.3846641373836122e-07, "loss": 0.0007, "step": 113970 }, { "epoch": 1.8650085903624314, "grad_norm": 0.09782253950834274, "learning_rate": 1.381328983137159e-07, "loss": 0.0008, "step": 113980 }, { "epoch": 1.8651722163135074, "grad_norm": 0.0017703217454254627, "learning_rate": 1.37799779406409e-07, "loss": 0.0008, "step": 113990 }, { "epoch": 1.8653358422645832, "grad_norm": 0.06902812421321869, "learning_rate": 1.3746705704360985e-07, "loss": 0.0009, "step": 114000 }, { "epoch": 1.865499468215659, "grad_norm": 0.04585520923137665, "learning_rate": 1.3713473125245348e-07, "loss": 0.0012, "step": 114010 }, { "epoch": 1.865663094166735, "grad_norm": 0.033851347863674164, "learning_rate": 1.368028020600437e-07, "loss": 0.0006, "step": 114020 }, { "epoch": 1.8658267201178107, "grad_norm": 0.007088218349963427, "learning_rate": 1.364712694934517e-07, "loss": 0.0014, "step": 114030 }, { "epoch": 1.8659903460688865, "grad_norm": 0.036072053015232086, "learning_rate": 1.3614013357971523e-07, "loss": 0.0008, "step": 114040 }, { "epoch": 1.8661539720199625, "grad_norm": 0.06582151353359222, "learning_rate": 1.3580939434584162e-07, "loss": 0.0007, "step": 114050 }, { "epoch": 1.8663175979710382, "grad_norm": 0.004526420496404171, "learning_rate": 1.354790518188037e-07, "loss": 0.001, "step": 114060 }, { "epoch": 1.866481223922114, "grad_norm": 0.023701583966612816, "learning_rate": 1.3514910602554442e-07, "loss": 0.0008, "step": 114070 }, { "epoch": 1.86664484987319, "grad_norm": 0.06362594664096832, "learning_rate": 1.3481955699297221e-07, "loss": 0.0008, "step": 114080 }, { "epoch": 1.8668084758242656, "grad_norm": 0.03933916240930557, "learning_rate": 1.3449040474796394e-07, "loss": 0.0011, "step": 114090 }, { "epoch": 1.8669721017753416, "grad_norm": 0.016460837796330452, "learning_rate": 1.3416164931736475e-07, "loss": 0.0013, "step": 114100 }, { "epoch": 1.8671357277264176, "grad_norm": 0.04689618945121765, "learning_rate": 1.3383329072798657e-07, "loss": 0.0007, "step": 114110 }, { "epoch": 1.8672993536774931, "grad_norm": 0.033416748046875, "learning_rate": 1.335053290066085e-07, "loss": 0.0005, "step": 114120 }, { "epoch": 1.867462979628569, "grad_norm": 0.07599564641714096, "learning_rate": 1.3317776417997864e-07, "loss": 0.0009, "step": 114130 }, { "epoch": 1.8676266055796449, "grad_norm": 0.03028174303472042, "learning_rate": 1.3285059627481224e-07, "loss": 0.0014, "step": 114140 }, { "epoch": 1.8677902315307207, "grad_norm": 0.035100746899843216, "learning_rate": 1.3252382531779184e-07, "loss": 0.0007, "step": 114150 }, { "epoch": 1.8679538574817967, "grad_norm": 0.011115369386970997, "learning_rate": 1.3219745133556727e-07, "loss": 0.0007, "step": 114160 }, { "epoch": 1.8681174834328724, "grad_norm": 0.024122070521116257, "learning_rate": 1.3187147435475723e-07, "loss": 0.0015, "step": 114170 }, { "epoch": 1.8682811093839482, "grad_norm": 0.025685006752610207, "learning_rate": 1.3154589440194653e-07, "loss": 0.0007, "step": 114180 }, { "epoch": 1.8684447353350242, "grad_norm": 0.046651050448417664, "learning_rate": 1.3122071150368898e-07, "loss": 0.0009, "step": 114190 }, { "epoch": 1.8686083612861, "grad_norm": 0.014678443782031536, "learning_rate": 1.3089592568650499e-07, "loss": 0.0009, "step": 114200 }, { "epoch": 1.8687719872371757, "grad_norm": 0.014545261859893799, "learning_rate": 1.3057153697688397e-07, "loss": 0.0007, "step": 114210 }, { "epoch": 1.8689356131882517, "grad_norm": 0.037526004016399384, "learning_rate": 1.3024754540128025e-07, "loss": 0.0011, "step": 114220 }, { "epoch": 1.8690992391393275, "grad_norm": 0.053323958069086075, "learning_rate": 1.2992395098611888e-07, "loss": 0.0008, "step": 114230 }, { "epoch": 1.8692628650904033, "grad_norm": 0.07826829701662064, "learning_rate": 1.2960075375779035e-07, "loss": 0.0012, "step": 114240 }, { "epoch": 1.8694264910414793, "grad_norm": 0.021955395117402077, "learning_rate": 1.2927795374265418e-07, "loss": 0.0007, "step": 114250 }, { "epoch": 1.869590116992555, "grad_norm": 0.043120529502630234, "learning_rate": 1.2895555096703593e-07, "loss": 0.0007, "step": 114260 }, { "epoch": 1.8697537429436308, "grad_norm": 0.10827703028917313, "learning_rate": 1.2863354545723072e-07, "loss": 0.0015, "step": 114270 }, { "epoch": 1.8699173688947068, "grad_norm": 0.0020883185788989067, "learning_rate": 1.2831193723949976e-07, "loss": 0.0004, "step": 114280 }, { "epoch": 1.8700809948457824, "grad_norm": 0.038506872951984406, "learning_rate": 1.2799072634007204e-07, "loss": 0.0017, "step": 114290 }, { "epoch": 1.8702446207968584, "grad_norm": 0.0020213236566632986, "learning_rate": 1.276699127851444e-07, "loss": 0.0009, "step": 114300 }, { "epoch": 1.8704082467479344, "grad_norm": 0.00257762148976326, "learning_rate": 1.273494966008826e-07, "loss": 0.0007, "step": 114310 }, { "epoch": 1.87057187269901, "grad_norm": 0.0471498966217041, "learning_rate": 1.2702947781341622e-07, "loss": 0.0023, "step": 114320 }, { "epoch": 1.870735498650086, "grad_norm": 0.045030951499938965, "learning_rate": 1.2670985644884782e-07, "loss": 0.0007, "step": 114330 }, { "epoch": 1.8708991246011617, "grad_norm": 0.0077773695811629295, "learning_rate": 1.2639063253324202e-07, "loss": 0.0009, "step": 114340 }, { "epoch": 1.8710627505522375, "grad_norm": 0.07448321580886841, "learning_rate": 1.2607180609263526e-07, "loss": 0.0007, "step": 114350 }, { "epoch": 1.8712263765033135, "grad_norm": 0.044390615075826645, "learning_rate": 1.257533771530295e-07, "loss": 0.0005, "step": 114360 }, { "epoch": 1.8713900024543892, "grad_norm": 0.04984167963266373, "learning_rate": 1.254353457403945e-07, "loss": 0.0009, "step": 114370 }, { "epoch": 1.871553628405465, "grad_norm": 0.050430797040462494, "learning_rate": 1.2511771188066845e-07, "loss": 0.0009, "step": 114380 }, { "epoch": 1.871717254356541, "grad_norm": 0.10092493146657944, "learning_rate": 1.248004755997556e-07, "loss": 0.0013, "step": 114390 }, { "epoch": 1.8718808803076168, "grad_norm": 0.03787825629115105, "learning_rate": 1.2448363692352915e-07, "loss": 0.0006, "step": 114400 }, { "epoch": 1.8720445062586926, "grad_norm": 0.24682794511318207, "learning_rate": 1.24167195877829e-07, "loss": 0.0011, "step": 114410 }, { "epoch": 1.8722081322097686, "grad_norm": 0.08855167031288147, "learning_rate": 1.238511524884639e-07, "loss": 0.0013, "step": 114420 }, { "epoch": 1.8723717581608443, "grad_norm": 0.08572986721992493, "learning_rate": 1.2353550678120828e-07, "loss": 0.0011, "step": 114430 }, { "epoch": 1.87253538411192, "grad_norm": 0.003400080371648073, "learning_rate": 1.2322025878180598e-07, "loss": 0.0003, "step": 114440 }, { "epoch": 1.872699010062996, "grad_norm": 0.03428833559155464, "learning_rate": 1.22905408515967e-07, "loss": 0.0024, "step": 114450 }, { "epoch": 1.8728626360140719, "grad_norm": 0.04005441442131996, "learning_rate": 1.2259095600936966e-07, "loss": 0.0008, "step": 114460 }, { "epoch": 1.8730262619651477, "grad_norm": 0.03419559821486473, "learning_rate": 1.2227690128765902e-07, "loss": 0.0008, "step": 114470 }, { "epoch": 1.8731898879162237, "grad_norm": 0.03828272223472595, "learning_rate": 1.2196324437644958e-07, "loss": 0.0019, "step": 114480 }, { "epoch": 1.8733535138672992, "grad_norm": 0.08481821417808533, "learning_rate": 1.2164998530132089e-07, "loss": 0.001, "step": 114490 }, { "epoch": 1.8735171398183752, "grad_norm": 0.06506198644638062, "learning_rate": 1.213371240878225e-07, "loss": 0.0043, "step": 114500 }, { "epoch": 1.8736807657694512, "grad_norm": 0.03218720108270645, "learning_rate": 1.21024660761469e-07, "loss": 0.0004, "step": 114510 }, { "epoch": 1.8738443917205267, "grad_norm": 0.020112505182623863, "learning_rate": 1.207125953477445e-07, "loss": 0.0006, "step": 114520 }, { "epoch": 1.8740080176716027, "grad_norm": 0.005319379270076752, "learning_rate": 1.2040092787210022e-07, "loss": 0.0007, "step": 114530 }, { "epoch": 1.8741716436226785, "grad_norm": 0.11431462317705154, "learning_rate": 1.2008965835995424e-07, "loss": 0.0024, "step": 114540 }, { "epoch": 1.8743352695737543, "grad_norm": 0.010235359892249107, "learning_rate": 1.1977878683669287e-07, "loss": 0.0016, "step": 114550 }, { "epoch": 1.8744988955248303, "grad_norm": 0.03882668539881706, "learning_rate": 1.1946831332766974e-07, "loss": 0.0008, "step": 114560 }, { "epoch": 1.874662521475906, "grad_norm": 0.02070467360317707, "learning_rate": 1.1915823785820568e-07, "loss": 0.0007, "step": 114570 }, { "epoch": 1.8748261474269818, "grad_norm": 0.010153586976230145, "learning_rate": 1.188485604535905e-07, "loss": 0.0008, "step": 114580 }, { "epoch": 1.8749897733780578, "grad_norm": 0.03179711848497391, "learning_rate": 1.1853928113907842e-07, "loss": 0.0008, "step": 114590 }, { "epoch": 1.8751533993291336, "grad_norm": 0.06430523842573166, "learning_rate": 1.1823039993989538e-07, "loss": 0.0008, "step": 114600 }, { "epoch": 1.8753170252802094, "grad_norm": 0.059512339532375336, "learning_rate": 1.1792191688123123e-07, "loss": 0.001, "step": 114610 }, { "epoch": 1.8754806512312854, "grad_norm": 0.0408201701939106, "learning_rate": 1.1761383198824583e-07, "loss": 0.001, "step": 114620 }, { "epoch": 1.8756442771823612, "grad_norm": 0.002308931201696396, "learning_rate": 1.173061452860641e-07, "loss": 0.001, "step": 114630 }, { "epoch": 1.875807903133437, "grad_norm": 0.06227466091513634, "learning_rate": 1.1699885679978151e-07, "loss": 0.0006, "step": 114640 }, { "epoch": 1.875971529084513, "grad_norm": 0.0017069252207875252, "learning_rate": 1.1669196655445858e-07, "loss": 0.0005, "step": 114650 }, { "epoch": 1.8761351550355887, "grad_norm": 0.009412968531250954, "learning_rate": 1.1638547457512417e-07, "loss": 0.0004, "step": 114660 }, { "epoch": 1.8762987809866645, "grad_norm": 0.055103521794080734, "learning_rate": 1.1607938088677494e-07, "loss": 0.0004, "step": 114670 }, { "epoch": 1.8764624069377405, "grad_norm": 0.0183147881180048, "learning_rate": 1.157736855143754e-07, "loss": 0.0008, "step": 114680 }, { "epoch": 1.876626032888816, "grad_norm": 0.03809869661927223, "learning_rate": 1.1546838848285668e-07, "loss": 0.0009, "step": 114690 }, { "epoch": 1.876789658839892, "grad_norm": 0.06347623467445374, "learning_rate": 1.1516348981711667e-07, "loss": 0.0011, "step": 114700 }, { "epoch": 1.876953284790968, "grad_norm": 0.07838743180036545, "learning_rate": 1.1485898954202323e-07, "loss": 0.001, "step": 114710 }, { "epoch": 1.8771169107420436, "grad_norm": 0.02584925852715969, "learning_rate": 1.1455488768240985e-07, "loss": 0.0007, "step": 114720 }, { "epoch": 1.8772805366931196, "grad_norm": 0.034247543662786484, "learning_rate": 1.1425118426307835e-07, "loss": 0.0006, "step": 114730 }, { "epoch": 1.8774441626441953, "grad_norm": 0.01696961000561714, "learning_rate": 1.1394787930879725e-07, "loss": 0.0007, "step": 114740 }, { "epoch": 1.877607788595271, "grad_norm": 0.027281509712338448, "learning_rate": 1.1364497284430342e-07, "loss": 0.0008, "step": 114750 }, { "epoch": 1.877771414546347, "grad_norm": 0.019620731472969055, "learning_rate": 1.1334246489430046e-07, "loss": 0.0004, "step": 114760 }, { "epoch": 1.8779350404974229, "grad_norm": 0.16902285814285278, "learning_rate": 1.1304035548346081e-07, "loss": 0.0015, "step": 114770 }, { "epoch": 1.8780986664484987, "grad_norm": 0.028478844091296196, "learning_rate": 1.1273864463642259e-07, "loss": 0.0009, "step": 114780 }, { "epoch": 1.8782622923995747, "grad_norm": 0.04589986428618431, "learning_rate": 1.1243733237779275e-07, "loss": 0.0008, "step": 114790 }, { "epoch": 1.8784259183506504, "grad_norm": 0.08585745096206665, "learning_rate": 1.1213641873214443e-07, "loss": 0.001, "step": 114800 }, { "epoch": 1.8785895443017262, "grad_norm": 0.20149028301239014, "learning_rate": 1.1183590372402075e-07, "loss": 0.0013, "step": 114810 }, { "epoch": 1.8787531702528022, "grad_norm": 0.01713499426841736, "learning_rate": 1.1153578737792937e-07, "loss": 0.0007, "step": 114820 }, { "epoch": 1.878916796203878, "grad_norm": 0.05463026463985443, "learning_rate": 1.1123606971834732e-07, "loss": 0.0006, "step": 114830 }, { "epoch": 1.8790804221549537, "grad_norm": 0.014157851226627827, "learning_rate": 1.1093675076971844e-07, "loss": 0.0009, "step": 114840 }, { "epoch": 1.8792440481060297, "grad_norm": 0.057343583554029465, "learning_rate": 1.1063783055645428e-07, "loss": 0.0011, "step": 114850 }, { "epoch": 1.8794076740571053, "grad_norm": 0.025464218109846115, "learning_rate": 1.1033930910293311e-07, "loss": 0.0008, "step": 114860 }, { "epoch": 1.8795713000081813, "grad_norm": 0.25258785486221313, "learning_rate": 1.1004118643350214e-07, "loss": 0.0017, "step": 114870 }, { "epoch": 1.8797349259592573, "grad_norm": 0.01316542737185955, "learning_rate": 1.097434625724747e-07, "loss": 0.0011, "step": 114880 }, { "epoch": 1.8798985519103328, "grad_norm": 0.01584048941731453, "learning_rate": 1.0944613754413302e-07, "loss": 0.0006, "step": 114890 }, { "epoch": 1.8800621778614088, "grad_norm": 0.0012790379114449024, "learning_rate": 1.0914921137272439e-07, "loss": 0.0006, "step": 114900 }, { "epoch": 1.8802258038124846, "grad_norm": 0.003282808233052492, "learning_rate": 1.0885268408246663e-07, "loss": 0.0005, "step": 114910 }, { "epoch": 1.8803894297635604, "grad_norm": 0.017884593456983566, "learning_rate": 1.0855655569754264e-07, "loss": 0.0004, "step": 114920 }, { "epoch": 1.8805530557146364, "grad_norm": 0.10842547565698624, "learning_rate": 1.0826082624210421e-07, "loss": 0.0007, "step": 114930 }, { "epoch": 1.8807166816657122, "grad_norm": 0.08682433515787125, "learning_rate": 1.0796549574026872e-07, "loss": 0.0015, "step": 114940 }, { "epoch": 1.880880307616788, "grad_norm": 0.02029728889465332, "learning_rate": 1.076705642161241e-07, "loss": 0.0007, "step": 114950 }, { "epoch": 1.881043933567864, "grad_norm": 0.0072782267816364765, "learning_rate": 1.0737603169372279e-07, "loss": 0.0003, "step": 114960 }, { "epoch": 1.8812075595189397, "grad_norm": 0.06822889298200607, "learning_rate": 1.0708189819708614e-07, "loss": 0.0006, "step": 114970 }, { "epoch": 1.8813711854700155, "grad_norm": 0.010652842000126839, "learning_rate": 1.0678816375020273e-07, "loss": 0.0009, "step": 114980 }, { "epoch": 1.8815348114210915, "grad_norm": 0.02239980548620224, "learning_rate": 1.0649482837702896e-07, "loss": 0.0008, "step": 114990 }, { "epoch": 1.8816984373721672, "grad_norm": 0.05502698943018913, "learning_rate": 1.0620189210148735e-07, "loss": 0.0006, "step": 115000 }, { "epoch": 1.881862063323243, "grad_norm": 0.011067809537053108, "learning_rate": 1.0590935494746935e-07, "loss": 0.0006, "step": 115010 }, { "epoch": 1.882025689274319, "grad_norm": 0.008618976920843124, "learning_rate": 1.0561721693883364e-07, "loss": 0.0014, "step": 115020 }, { "epoch": 1.8821893152253948, "grad_norm": 0.023923030123114586, "learning_rate": 1.0532547809940507e-07, "loss": 0.0008, "step": 115030 }, { "epoch": 1.8823529411764706, "grad_norm": 0.006676288787275553, "learning_rate": 1.0503413845297739e-07, "loss": 0.001, "step": 115040 }, { "epoch": 1.8825165671275466, "grad_norm": 0.017794566228985786, "learning_rate": 1.0474319802331157e-07, "loss": 0.0006, "step": 115050 }, { "epoch": 1.882680193078622, "grad_norm": 0.0058709969744086266, "learning_rate": 1.0445265683413531e-07, "loss": 0.0012, "step": 115060 }, { "epoch": 1.882843819029698, "grad_norm": 0.08871816098690033, "learning_rate": 1.0416251490914409e-07, "loss": 0.0023, "step": 115070 }, { "epoch": 1.883007444980774, "grad_norm": 0.0012904698960483074, "learning_rate": 1.0387277227200176e-07, "loss": 0.0003, "step": 115080 }, { "epoch": 1.8831710709318497, "grad_norm": 0.06076951324939728, "learning_rate": 1.0358342894633722e-07, "loss": 0.0005, "step": 115090 }, { "epoch": 1.8833346968829257, "grad_norm": 0.04893820360302925, "learning_rate": 1.0329448495574934e-07, "loss": 0.0013, "step": 115100 }, { "epoch": 1.8834983228340014, "grad_norm": 0.07313743978738785, "learning_rate": 1.0300594032380318e-07, "loss": 0.0015, "step": 115110 }, { "epoch": 1.8836619487850772, "grad_norm": 0.017988761886954308, "learning_rate": 1.0271779507403213e-07, "loss": 0.001, "step": 115120 }, { "epoch": 1.8838255747361532, "grad_norm": 0.008396394550800323, "learning_rate": 1.0243004922993516e-07, "loss": 0.0008, "step": 115130 }, { "epoch": 1.883989200687229, "grad_norm": 0.04588119313120842, "learning_rate": 1.0214270281498074e-07, "loss": 0.0014, "step": 115140 }, { "epoch": 1.8841528266383047, "grad_norm": 0.0762842521071434, "learning_rate": 1.0185575585260288e-07, "loss": 0.001, "step": 115150 }, { "epoch": 1.8843164525893807, "grad_norm": 0.012233969755470753, "learning_rate": 1.0156920836620566e-07, "loss": 0.0007, "step": 115160 }, { "epoch": 1.8844800785404565, "grad_norm": 0.04828006029129028, "learning_rate": 1.0128306037915703e-07, "loss": 0.001, "step": 115170 }, { "epoch": 1.8846437044915323, "grad_norm": 0.013860705308616161, "learning_rate": 1.0099731191479556e-07, "loss": 0.0003, "step": 115180 }, { "epoch": 1.8848073304426083, "grad_norm": 0.046628449112176895, "learning_rate": 1.0071196299642483e-07, "loss": 0.0005, "step": 115190 }, { "epoch": 1.884970956393684, "grad_norm": 0.033884722739458084, "learning_rate": 1.0042701364731844e-07, "loss": 0.0008, "step": 115200 }, { "epoch": 1.8851345823447598, "grad_norm": 0.028205636888742447, "learning_rate": 1.0014246389071447e-07, "loss": 0.0013, "step": 115210 }, { "epoch": 1.8852982082958358, "grad_norm": 0.024718027561903, "learning_rate": 9.985831374982102e-08, "loss": 0.0017, "step": 115220 }, { "epoch": 1.8854618342469116, "grad_norm": 0.10282206535339355, "learning_rate": 9.957456324781123e-08, "loss": 0.0013, "step": 115230 }, { "epoch": 1.8856254601979874, "grad_norm": 0.05735069140791893, "learning_rate": 9.929121240782769e-08, "loss": 0.0012, "step": 115240 }, { "epoch": 1.8857890861490634, "grad_norm": 0.07045166194438934, "learning_rate": 9.900826125297858e-08, "loss": 0.001, "step": 115250 }, { "epoch": 1.885952712100139, "grad_norm": 0.012375889346003532, "learning_rate": 9.87257098063421e-08, "loss": 0.0012, "step": 115260 }, { "epoch": 1.886116338051215, "grad_norm": 0.056508224457502365, "learning_rate": 9.84435580909604e-08, "loss": 0.001, "step": 115270 }, { "epoch": 1.886279964002291, "grad_norm": 0.02302614040672779, "learning_rate": 9.816180612984561e-08, "loss": 0.0006, "step": 115280 }, { "epoch": 1.8864435899533665, "grad_norm": 0.027453452348709106, "learning_rate": 9.788045394597656e-08, "loss": 0.0019, "step": 115290 }, { "epoch": 1.8866072159044425, "grad_norm": 0.00518826674669981, "learning_rate": 9.759950156229936e-08, "loss": 0.0007, "step": 115300 }, { "epoch": 1.8867708418555182, "grad_norm": 0.02175845205783844, "learning_rate": 9.731894900172734e-08, "loss": 0.0007, "step": 115310 }, { "epoch": 1.886934467806594, "grad_norm": 0.03555656969547272, "learning_rate": 9.703879628714163e-08, "loss": 0.0009, "step": 115320 }, { "epoch": 1.88709809375767, "grad_norm": 0.011384014040231705, "learning_rate": 9.675904344139008e-08, "loss": 0.0009, "step": 115330 }, { "epoch": 1.8872617197087458, "grad_norm": 0.017740968614816666, "learning_rate": 9.647969048728888e-08, "loss": 0.0005, "step": 115340 }, { "epoch": 1.8874253456598216, "grad_norm": 0.019332095980644226, "learning_rate": 9.620073744762093e-08, "loss": 0.0009, "step": 115350 }, { "epoch": 1.8875889716108976, "grad_norm": 0.004902615677565336, "learning_rate": 9.592218434513634e-08, "loss": 0.0005, "step": 115360 }, { "epoch": 1.8877525975619733, "grad_norm": 0.03447529673576355, "learning_rate": 9.564403120255361e-08, "loss": 0.0007, "step": 115370 }, { "epoch": 1.887916223513049, "grad_norm": 0.06368916481733322, "learning_rate": 9.536627804255738e-08, "loss": 0.0006, "step": 115380 }, { "epoch": 1.888079849464125, "grad_norm": 0.01854928396642208, "learning_rate": 9.508892488780064e-08, "loss": 0.0007, "step": 115390 }, { "epoch": 1.8882434754152009, "grad_norm": 0.09362687170505524, "learning_rate": 9.481197176090307e-08, "loss": 0.0012, "step": 115400 }, { "epoch": 1.8884071013662767, "grad_norm": 0.01557767204940319, "learning_rate": 9.45354186844527e-08, "loss": 0.0008, "step": 115410 }, { "epoch": 1.8885707273173526, "grad_norm": 0.01166441012173891, "learning_rate": 9.425926568100319e-08, "loss": 0.0004, "step": 115420 }, { "epoch": 1.8887343532684284, "grad_norm": 0.10664310306310654, "learning_rate": 9.398351277307761e-08, "loss": 0.001, "step": 115430 }, { "epoch": 1.8888979792195042, "grad_norm": 0.045520734041929245, "learning_rate": 9.370815998316463e-08, "loss": 0.0012, "step": 115440 }, { "epoch": 1.8890616051705802, "grad_norm": 0.003025609999895096, "learning_rate": 9.343320733372186e-08, "loss": 0.0004, "step": 115450 }, { "epoch": 1.8892252311216557, "grad_norm": 0.0662052258849144, "learning_rate": 9.315865484717245e-08, "loss": 0.0008, "step": 115460 }, { "epoch": 1.8893888570727317, "grad_norm": 0.05222895368933678, "learning_rate": 9.288450254590909e-08, "loss": 0.0004, "step": 115470 }, { "epoch": 1.8895524830238077, "grad_norm": 0.03883333504199982, "learning_rate": 9.261075045229051e-08, "loss": 0.0011, "step": 115480 }, { "epoch": 1.8897161089748833, "grad_norm": 0.05243481695652008, "learning_rate": 9.233739858864277e-08, "loss": 0.0017, "step": 115490 }, { "epoch": 1.8898797349259593, "grad_norm": 0.029051413759589195, "learning_rate": 9.206444697725858e-08, "loss": 0.0007, "step": 115500 }, { "epoch": 1.890043360877035, "grad_norm": 0.0033195551950484514, "learning_rate": 9.17918956404007e-08, "loss": 0.0006, "step": 115510 }, { "epoch": 1.8902069868281108, "grad_norm": 0.03224264830350876, "learning_rate": 9.151974460029634e-08, "loss": 0.0006, "step": 115520 }, { "epoch": 1.8903706127791868, "grad_norm": 0.014638724736869335, "learning_rate": 9.124799387914162e-08, "loss": 0.0006, "step": 115530 }, { "epoch": 1.8905342387302626, "grad_norm": 0.0022072468418627977, "learning_rate": 9.09766434990994e-08, "loss": 0.0007, "step": 115540 }, { "epoch": 1.8906978646813384, "grad_norm": 0.03740597516298294, "learning_rate": 9.070569348230085e-08, "loss": 0.0007, "step": 115550 }, { "epoch": 1.8908614906324144, "grad_norm": 0.07786998152732849, "learning_rate": 9.043514385084218e-08, "loss": 0.0013, "step": 115560 }, { "epoch": 1.8910251165834902, "grad_norm": 0.009632066823542118, "learning_rate": 9.016499462679019e-08, "loss": 0.0006, "step": 115570 }, { "epoch": 1.891188742534566, "grad_norm": 0.010564597323536873, "learning_rate": 8.989524583217557e-08, "loss": 0.0005, "step": 115580 }, { "epoch": 1.891352368485642, "grad_norm": 0.02836195006966591, "learning_rate": 8.962589748899963e-08, "loss": 0.0018, "step": 115590 }, { "epoch": 1.8915159944367177, "grad_norm": 0.07581872493028641, "learning_rate": 8.935694961922869e-08, "loss": 0.0011, "step": 115600 }, { "epoch": 1.8916796203877935, "grad_norm": 0.023821106180548668, "learning_rate": 8.908840224479797e-08, "loss": 0.0007, "step": 115610 }, { "epoch": 1.8918432463388695, "grad_norm": 0.07170823216438293, "learning_rate": 8.88202553876083e-08, "loss": 0.0013, "step": 115620 }, { "epoch": 1.8920068722899452, "grad_norm": 0.08178595453500748, "learning_rate": 8.855250906952995e-08, "loss": 0.0007, "step": 115630 }, { "epoch": 1.892170498241021, "grad_norm": 0.12656652927398682, "learning_rate": 8.828516331239823e-08, "loss": 0.0014, "step": 115640 }, { "epoch": 1.892334124192097, "grad_norm": 0.1051178127527237, "learning_rate": 8.801821813801792e-08, "loss": 0.0009, "step": 115650 }, { "epoch": 1.8924977501431726, "grad_norm": 0.06637538969516754, "learning_rate": 8.775167356815939e-08, "loss": 0.0011, "step": 115660 }, { "epoch": 1.8926613760942486, "grad_norm": 0.01222996599972248, "learning_rate": 8.748552962456136e-08, "loss": 0.0004, "step": 115670 }, { "epoch": 1.8928250020453243, "grad_norm": 0.02322150580585003, "learning_rate": 8.721978632893036e-08, "loss": 0.0009, "step": 115680 }, { "epoch": 1.8929886279964, "grad_norm": 0.0872635468840599, "learning_rate": 8.695444370293793e-08, "loss": 0.0007, "step": 115690 }, { "epoch": 1.893152253947476, "grad_norm": 0.03089163452386856, "learning_rate": 8.668950176822621e-08, "loss": 0.0005, "step": 115700 }, { "epoch": 1.8933158798985519, "grad_norm": 0.001216677948832512, "learning_rate": 8.642496054640181e-08, "loss": 0.0007, "step": 115710 }, { "epoch": 1.8934795058496277, "grad_norm": 0.009266712702810764, "learning_rate": 8.616082005904026e-08, "loss": 0.0013, "step": 115720 }, { "epoch": 1.8936431318007036, "grad_norm": 0.30547383427619934, "learning_rate": 8.589708032768374e-08, "loss": 0.0012, "step": 115730 }, { "epoch": 1.8938067577517794, "grad_norm": 0.02439533919095993, "learning_rate": 8.563374137384283e-08, "loss": 0.0011, "step": 115740 }, { "epoch": 1.8939703837028552, "grad_norm": 0.034946080297231674, "learning_rate": 8.537080321899316e-08, "loss": 0.0005, "step": 115750 }, { "epoch": 1.8941340096539312, "grad_norm": 0.03289042040705681, "learning_rate": 8.510826588458032e-08, "loss": 0.0006, "step": 115760 }, { "epoch": 1.894297635605007, "grad_norm": 0.06040625274181366, "learning_rate": 8.484612939201441e-08, "loss": 0.0009, "step": 115770 }, { "epoch": 1.8944612615560827, "grad_norm": 0.23672960698604584, "learning_rate": 8.458439376267669e-08, "loss": 0.0005, "step": 115780 }, { "epoch": 1.8946248875071587, "grad_norm": 0.055471859872341156, "learning_rate": 8.432305901791116e-08, "loss": 0.0011, "step": 115790 }, { "epoch": 1.8947885134582345, "grad_norm": 0.06190602108836174, "learning_rate": 8.406212517903245e-08, "loss": 0.0008, "step": 115800 }, { "epoch": 1.8949521394093103, "grad_norm": 0.012343844398856163, "learning_rate": 8.380159226732132e-08, "loss": 0.0006, "step": 115810 }, { "epoch": 1.8951157653603863, "grad_norm": 0.014734986238181591, "learning_rate": 8.354146030402688e-08, "loss": 0.0008, "step": 115820 }, { "epoch": 1.8952793913114618, "grad_norm": 0.08109033852815628, "learning_rate": 8.328172931036272e-08, "loss": 0.0005, "step": 115830 }, { "epoch": 1.8954430172625378, "grad_norm": 0.01399065088480711, "learning_rate": 8.302239930751299e-08, "loss": 0.0007, "step": 115840 }, { "epoch": 1.8956066432136138, "grad_norm": 0.056841157376766205, "learning_rate": 8.276347031662691e-08, "loss": 0.0008, "step": 115850 }, { "epoch": 1.8957702691646894, "grad_norm": 0.056089065968990326, "learning_rate": 8.250494235882311e-08, "loss": 0.0012, "step": 115860 }, { "epoch": 1.8959338951157654, "grad_norm": 0.0776858851313591, "learning_rate": 8.224681545518476e-08, "loss": 0.0014, "step": 115870 }, { "epoch": 1.8960975210668412, "grad_norm": 0.10620084404945374, "learning_rate": 8.198908962676499e-08, "loss": 0.0006, "step": 115880 }, { "epoch": 1.896261147017917, "grad_norm": 0.0258939191699028, "learning_rate": 8.173176489458201e-08, "loss": 0.0007, "step": 115890 }, { "epoch": 1.896424772968993, "grad_norm": 0.05787687748670578, "learning_rate": 8.147484127962347e-08, "loss": 0.0011, "step": 115900 }, { "epoch": 1.8965883989200687, "grad_norm": 0.012510452419519424, "learning_rate": 8.121831880284203e-08, "loss": 0.0009, "step": 115910 }, { "epoch": 1.8967520248711445, "grad_norm": 0.1753036379814148, "learning_rate": 8.096219748515987e-08, "loss": 0.0022, "step": 115920 }, { "epoch": 1.8969156508222205, "grad_norm": 0.027009334415197372, "learning_rate": 8.070647734746473e-08, "loss": 0.0008, "step": 115930 }, { "epoch": 1.8970792767732962, "grad_norm": 0.022676708176732063, "learning_rate": 8.045115841061268e-08, "loss": 0.0007, "step": 115940 }, { "epoch": 1.897242902724372, "grad_norm": 0.0455305278301239, "learning_rate": 8.019624069542541e-08, "loss": 0.0006, "step": 115950 }, { "epoch": 1.897406528675448, "grad_norm": 0.10886897891759872, "learning_rate": 7.99417242226952e-08, "loss": 0.0007, "step": 115960 }, { "epoch": 1.8975701546265238, "grad_norm": 0.009297381155192852, "learning_rate": 7.968760901317818e-08, "loss": 0.0003, "step": 115970 }, { "epoch": 1.8977337805775996, "grad_norm": 0.009364566765725613, "learning_rate": 7.943389508759947e-08, "loss": 0.0007, "step": 115980 }, { "epoch": 1.8978974065286756, "grad_norm": 0.03216440603137016, "learning_rate": 7.918058246665084e-08, "loss": 0.0007, "step": 115990 }, { "epoch": 1.8980610324797513, "grad_norm": 0.045093510299921036, "learning_rate": 7.892767117099187e-08, "loss": 0.0013, "step": 116000 }, { "epoch": 1.898224658430827, "grad_norm": 0.01548362523317337, "learning_rate": 7.867516122124941e-08, "loss": 0.0028, "step": 116010 }, { "epoch": 1.898388284381903, "grad_norm": 0.05727113410830498, "learning_rate": 7.842305263801641e-08, "loss": 0.0012, "step": 116020 }, { "epoch": 1.8985519103329787, "grad_norm": 0.02318890206515789, "learning_rate": 7.817134544185535e-08, "loss": 0.0008, "step": 116030 }, { "epoch": 1.8987155362840547, "grad_norm": 0.05266227200627327, "learning_rate": 7.79200396532931e-08, "loss": 0.001, "step": 116040 }, { "epoch": 1.8988791622351306, "grad_norm": 0.018220210447907448, "learning_rate": 7.76691352928266e-08, "loss": 0.0026, "step": 116050 }, { "epoch": 1.8990427881862062, "grad_norm": 0.050774186849594116, "learning_rate": 7.741863238091785e-08, "loss": 0.0022, "step": 116060 }, { "epoch": 1.8992064141372822, "grad_norm": 0.09759583324193954, "learning_rate": 7.71685309379977e-08, "loss": 0.0014, "step": 116070 }, { "epoch": 1.899370040088358, "grad_norm": 0.014021795243024826, "learning_rate": 7.691883098446262e-08, "loss": 0.0023, "step": 116080 }, { "epoch": 1.8995336660394337, "grad_norm": 0.11141176521778107, "learning_rate": 7.666953254067855e-08, "loss": 0.0012, "step": 116090 }, { "epoch": 1.8996972919905097, "grad_norm": 0.04700498282909393, "learning_rate": 7.642063562697644e-08, "loss": 0.001, "step": 116100 }, { "epoch": 1.8998609179415855, "grad_norm": 0.04161045327782631, "learning_rate": 7.617214026365616e-08, "loss": 0.0008, "step": 116110 }, { "epoch": 1.9000245438926613, "grad_norm": 0.051022887229919434, "learning_rate": 7.592404647098317e-08, "loss": 0.0009, "step": 116120 }, { "epoch": 1.9001881698437373, "grad_norm": 0.05725853517651558, "learning_rate": 7.56763542691924e-08, "loss": 0.0012, "step": 116130 }, { "epoch": 1.900351795794813, "grad_norm": 0.010440354235470295, "learning_rate": 7.542906367848435e-08, "loss": 0.0006, "step": 116140 }, { "epoch": 1.9005154217458888, "grad_norm": 0.0035140812397003174, "learning_rate": 7.518217471902677e-08, "loss": 0.0012, "step": 116150 }, { "epoch": 1.9006790476969648, "grad_norm": 0.0815066322684288, "learning_rate": 7.493568741095469e-08, "loss": 0.0013, "step": 116160 }, { "epoch": 1.9008426736480406, "grad_norm": 0.01767807826399803, "learning_rate": 7.468960177437257e-08, "loss": 0.0007, "step": 116170 }, { "epoch": 1.9010062995991164, "grad_norm": 0.03358037769794464, "learning_rate": 7.444391782934823e-08, "loss": 0.0008, "step": 116180 }, { "epoch": 1.9011699255501924, "grad_norm": 0.09311171621084213, "learning_rate": 7.419863559592066e-08, "loss": 0.0007, "step": 116190 }, { "epoch": 1.9013335515012681, "grad_norm": 0.0928061455488205, "learning_rate": 7.395375509409219e-08, "loss": 0.0005, "step": 116200 }, { "epoch": 1.901497177452344, "grad_norm": 0.03519140183925629, "learning_rate": 7.370927634383685e-08, "loss": 0.0017, "step": 116210 }, { "epoch": 1.90166080340342, "grad_norm": 0.010303204879164696, "learning_rate": 7.346519936509145e-08, "loss": 0.0005, "step": 116220 }, { "epoch": 1.9018244293544955, "grad_norm": 0.13487018644809723, "learning_rate": 7.322152417776285e-08, "loss": 0.0011, "step": 116230 }, { "epoch": 1.9019880553055715, "grad_norm": 0.023089276626706123, "learning_rate": 7.29782508017246e-08, "loss": 0.005, "step": 116240 }, { "epoch": 1.9021516812566475, "grad_norm": 0.017940377816557884, "learning_rate": 7.273537925681751e-08, "loss": 0.0005, "step": 116250 }, { "epoch": 1.902315307207723, "grad_norm": 0.029363514855504036, "learning_rate": 7.249290956284794e-08, "loss": 0.001, "step": 116260 }, { "epoch": 1.902478933158799, "grad_norm": 0.014394384808838367, "learning_rate": 7.225084173959285e-08, "loss": 0.0006, "step": 116270 }, { "epoch": 1.9026425591098748, "grad_norm": 0.0295296348631382, "learning_rate": 7.200917580679256e-08, "loss": 0.0006, "step": 116280 }, { "epoch": 1.9028061850609506, "grad_norm": 0.08148827403783798, "learning_rate": 7.176791178415799e-08, "loss": 0.0012, "step": 116290 }, { "epoch": 1.9029698110120266, "grad_norm": 0.0415823757648468, "learning_rate": 7.152704969136504e-08, "loss": 0.0005, "step": 116300 }, { "epoch": 1.9031334369631023, "grad_norm": 0.05801570415496826, "learning_rate": 7.128658954805745e-08, "loss": 0.0011, "step": 116310 }, { "epoch": 1.903297062914178, "grad_norm": 0.021948300302028656, "learning_rate": 7.104653137384732e-08, "loss": 0.0007, "step": 116320 }, { "epoch": 1.903460688865254, "grad_norm": 0.1137373223900795, "learning_rate": 7.080687518831175e-08, "loss": 0.0021, "step": 116330 }, { "epoch": 1.9036243148163299, "grad_norm": 0.007349936757236719, "learning_rate": 7.05676210109979e-08, "loss": 0.0008, "step": 116340 }, { "epoch": 1.9037879407674057, "grad_norm": 0.06870221346616745, "learning_rate": 7.032876886141627e-08, "loss": 0.0006, "step": 116350 }, { "epoch": 1.9039515667184816, "grad_norm": 0.0719907134771347, "learning_rate": 7.009031875904903e-08, "loss": 0.0007, "step": 116360 }, { "epoch": 1.9041151926695574, "grad_norm": 0.02348886802792549, "learning_rate": 6.985227072334178e-08, "loss": 0.0004, "step": 116370 }, { "epoch": 1.9042788186206332, "grad_norm": 0.06328196078538895, "learning_rate": 6.96146247737095e-08, "loss": 0.0005, "step": 116380 }, { "epoch": 1.9044424445717092, "grad_norm": 0.0172183346003294, "learning_rate": 6.937738092953394e-08, "loss": 0.0006, "step": 116390 }, { "epoch": 1.904606070522785, "grad_norm": 0.02095283940434456, "learning_rate": 6.914053921016406e-08, "loss": 0.0011, "step": 116400 }, { "epoch": 1.9047696964738607, "grad_norm": 0.15059605240821838, "learning_rate": 6.890409963491495e-08, "loss": 0.0005, "step": 116410 }, { "epoch": 1.9049333224249367, "grad_norm": 0.00268458086065948, "learning_rate": 6.866806222307121e-08, "loss": 0.0006, "step": 116420 }, { "epoch": 1.9050969483760123, "grad_norm": 0.0715903788805008, "learning_rate": 6.843242699388187e-08, "loss": 0.001, "step": 116430 }, { "epoch": 1.9052605743270883, "grad_norm": 0.004708403721451759, "learning_rate": 6.819719396656544e-08, "loss": 0.0007, "step": 116440 }, { "epoch": 1.9054242002781643, "grad_norm": 0.04708794876933098, "learning_rate": 6.796236316030602e-08, "loss": 0.0011, "step": 116450 }, { "epoch": 1.9055878262292398, "grad_norm": 0.060547955334186554, "learning_rate": 6.772793459425665e-08, "loss": 0.0013, "step": 116460 }, { "epoch": 1.9057514521803158, "grad_norm": 0.06558769941329956, "learning_rate": 6.749390828753533e-08, "loss": 0.0011, "step": 116470 }, { "epoch": 1.9059150781313916, "grad_norm": 0.1840386539697647, "learning_rate": 6.72602842592296e-08, "loss": 0.002, "step": 116480 }, { "epoch": 1.9060787040824674, "grad_norm": 0.02503153681755066, "learning_rate": 6.702706252839197e-08, "loss": 0.0009, "step": 116490 }, { "epoch": 1.9062423300335434, "grad_norm": 0.10345010459423065, "learning_rate": 6.679424311404392e-08, "loss": 0.0012, "step": 116500 }, { "epoch": 1.9064059559846191, "grad_norm": 0.03735567256808281, "learning_rate": 6.656182603517303e-08, "loss": 0.0008, "step": 116510 }, { "epoch": 1.906569581935695, "grad_norm": 0.02678655833005905, "learning_rate": 6.632981131073469e-08, "loss": 0.0006, "step": 116520 }, { "epoch": 1.906733207886771, "grad_norm": 0.08449308574199677, "learning_rate": 6.609819895965097e-08, "loss": 0.0009, "step": 116530 }, { "epoch": 1.9068968338378467, "grad_norm": 0.021715274080634117, "learning_rate": 6.586698900081179e-08, "loss": 0.0005, "step": 116540 }, { "epoch": 1.9070604597889225, "grad_norm": 0.03355414420366287, "learning_rate": 6.563618145307316e-08, "loss": 0.0013, "step": 116550 }, { "epoch": 1.9072240857399985, "grad_norm": 0.007211328484117985, "learning_rate": 6.540577633526001e-08, "loss": 0.001, "step": 116560 }, { "epoch": 1.9073877116910742, "grad_norm": 0.02551354095339775, "learning_rate": 6.51757736661629e-08, "loss": 0.0003, "step": 116570 }, { "epoch": 1.90755133764215, "grad_norm": 0.005793520715087652, "learning_rate": 6.494617346453957e-08, "loss": 0.0004, "step": 116580 }, { "epoch": 1.907714963593226, "grad_norm": 0.004928684793412685, "learning_rate": 6.471697574911562e-08, "loss": 0.0009, "step": 116590 }, { "epoch": 1.9078785895443016, "grad_norm": 0.039309728890657425, "learning_rate": 6.448818053858441e-08, "loss": 0.0009, "step": 116600 }, { "epoch": 1.9080422154953776, "grad_norm": 0.05481138825416565, "learning_rate": 6.425978785160492e-08, "loss": 0.0011, "step": 116610 }, { "epoch": 1.9082058414464536, "grad_norm": 0.06257200241088867, "learning_rate": 6.403179770680445e-08, "loss": 0.0006, "step": 116620 }, { "epoch": 1.908369467397529, "grad_norm": 0.11583060026168823, "learning_rate": 6.380421012277649e-08, "loss": 0.0015, "step": 116630 }, { "epoch": 1.908533093348605, "grad_norm": 0.05019020289182663, "learning_rate": 6.357702511808284e-08, "loss": 0.0009, "step": 116640 }, { "epoch": 1.9086967192996809, "grad_norm": 0.10041210055351257, "learning_rate": 6.335024271125256e-08, "loss": 0.0007, "step": 116650 }, { "epoch": 1.9088603452507567, "grad_norm": 0.04091503843665123, "learning_rate": 6.312386292077977e-08, "loss": 0.0009, "step": 116660 }, { "epoch": 1.9090239712018326, "grad_norm": 0.07322060316801071, "learning_rate": 6.289788576512856e-08, "loss": 0.0009, "step": 116670 }, { "epoch": 1.9091875971529084, "grad_norm": 0.06976725906133652, "learning_rate": 6.267231126272754e-08, "loss": 0.0007, "step": 116680 }, { "epoch": 1.9093512231039842, "grad_norm": 0.051630690693855286, "learning_rate": 6.244713943197533e-08, "loss": 0.0012, "step": 116690 }, { "epoch": 1.9095148490550602, "grad_norm": 0.010435717180371284, "learning_rate": 6.222237029123501e-08, "loss": 0.0005, "step": 116700 }, { "epoch": 1.909678475006136, "grad_norm": 0.043589841574430466, "learning_rate": 6.199800385883858e-08, "loss": 0.0007, "step": 116710 }, { "epoch": 1.9098421009572117, "grad_norm": 0.306078165769577, "learning_rate": 6.177404015308419e-08, "loss": 0.0016, "step": 116720 }, { "epoch": 1.9100057269082877, "grad_norm": 0.03573667258024216, "learning_rate": 6.155047919223833e-08, "loss": 0.0006, "step": 116730 }, { "epoch": 1.9101693528593635, "grad_norm": 0.05123012140393257, "learning_rate": 6.132732099453254e-08, "loss": 0.0019, "step": 116740 }, { "epoch": 1.9103329788104393, "grad_norm": 0.03394199162721634, "learning_rate": 6.110456557816835e-08, "loss": 0.0004, "step": 116750 }, { "epoch": 1.9104966047615153, "grad_norm": 0.023436540737748146, "learning_rate": 6.08822129613118e-08, "loss": 0.0005, "step": 116760 }, { "epoch": 1.910660230712591, "grad_norm": 0.06775376945734024, "learning_rate": 6.066026316209784e-08, "loss": 0.0008, "step": 116770 }, { "epoch": 1.9108238566636668, "grad_norm": 0.026028968393802643, "learning_rate": 6.043871619862751e-08, "loss": 0.0007, "step": 116780 }, { "epoch": 1.9109874826147428, "grad_norm": 0.030310045927762985, "learning_rate": 6.021757208897028e-08, "loss": 0.0009, "step": 116790 }, { "epoch": 1.9111511085658184, "grad_norm": 0.03099539689719677, "learning_rate": 5.999683085116059e-08, "loss": 0.001, "step": 116800 }, { "epoch": 1.9113147345168944, "grad_norm": 0.028056008741259575, "learning_rate": 5.977649250320239e-08, "loss": 0.0011, "step": 116810 }, { "epoch": 1.9114783604679704, "grad_norm": 0.030993202701210976, "learning_rate": 5.955655706306518e-08, "loss": 0.0004, "step": 116820 }, { "epoch": 1.911641986419046, "grad_norm": 0.01232182513922453, "learning_rate": 5.933702454868628e-08, "loss": 0.001, "step": 116830 }, { "epoch": 1.911805612370122, "grad_norm": 0.07155122607946396, "learning_rate": 5.911789497797027e-08, "loss": 0.001, "step": 116840 }, { "epoch": 1.9119692383211977, "grad_norm": 0.021993393078446388, "learning_rate": 5.8899168368788395e-08, "loss": 0.0005, "step": 116850 }, { "epoch": 1.9121328642722735, "grad_norm": 0.011465046554803848, "learning_rate": 5.868084473897917e-08, "loss": 0.0013, "step": 116860 }, { "epoch": 1.9122964902233495, "grad_norm": 0.03232261538505554, "learning_rate": 5.846292410634835e-08, "loss": 0.001, "step": 116870 }, { "epoch": 1.9124601161744252, "grad_norm": 0.015318089164793491, "learning_rate": 5.8245406488668944e-08, "loss": 0.0006, "step": 116880 }, { "epoch": 1.912623742125501, "grad_norm": 0.01868932694196701, "learning_rate": 5.80282919036812e-08, "loss": 0.0011, "step": 116890 }, { "epoch": 1.912787368076577, "grad_norm": 0.026175817474722862, "learning_rate": 5.781158036909096e-08, "loss": 0.0007, "step": 116900 }, { "epoch": 1.9129509940276528, "grad_norm": 0.04268673434853554, "learning_rate": 5.7595271902574636e-08, "loss": 0.0009, "step": 116910 }, { "epoch": 1.9131146199787286, "grad_norm": 0.04154679924249649, "learning_rate": 5.737936652177145e-08, "loss": 0.0009, "step": 116920 }, { "epoch": 1.9132782459298046, "grad_norm": 0.0030126285273581743, "learning_rate": 5.7163864244291764e-08, "loss": 0.0015, "step": 116930 }, { "epoch": 1.9134418718808803, "grad_norm": 0.0038906957488507032, "learning_rate": 5.6948765087709855e-08, "loss": 0.0004, "step": 116940 }, { "epoch": 1.913605497831956, "grad_norm": 0.05314008891582489, "learning_rate": 5.673406906956891e-08, "loss": 0.0005, "step": 116950 }, { "epoch": 1.913769123783032, "grad_norm": 0.030740106478333473, "learning_rate": 5.651977620737936e-08, "loss": 0.0005, "step": 116960 }, { "epoch": 1.9139327497341079, "grad_norm": 0.04226497933268547, "learning_rate": 5.6305886518617235e-08, "loss": 0.0006, "step": 116970 }, { "epoch": 1.9140963756851836, "grad_norm": 0.10254088789224625, "learning_rate": 5.609240002072691e-08, "loss": 0.0011, "step": 116980 }, { "epoch": 1.9142600016362596, "grad_norm": 0.04294828698039055, "learning_rate": 5.587931673112002e-08, "loss": 0.0007, "step": 116990 }, { "epoch": 1.9144236275873352, "grad_norm": 0.019339660182595253, "learning_rate": 5.5666636667174866e-08, "loss": 0.0005, "step": 117000 }, { "epoch": 1.9145872535384112, "grad_norm": 0.062423329800367355, "learning_rate": 5.5454359846236485e-08, "loss": 0.0009, "step": 117010 }, { "epoch": 1.9147508794894872, "grad_norm": 0.04364512488245964, "learning_rate": 5.5242486285618235e-08, "loss": 0.001, "step": 117020 }, { "epoch": 1.9149145054405627, "grad_norm": 0.04420114681124687, "learning_rate": 5.503101600259853e-08, "loss": 0.0007, "step": 117030 }, { "epoch": 1.9150781313916387, "grad_norm": 0.04976295679807663, "learning_rate": 5.4819949014425776e-08, "loss": 0.0009, "step": 117040 }, { "epoch": 1.9152417573427145, "grad_norm": 0.03043130412697792, "learning_rate": 5.4609285338312335e-08, "loss": 0.0014, "step": 117050 }, { "epoch": 1.9154053832937903, "grad_norm": 0.05502431467175484, "learning_rate": 5.4399024991440005e-08, "loss": 0.001, "step": 117060 }, { "epoch": 1.9155690092448663, "grad_norm": 0.08185819536447525, "learning_rate": 5.418916799095675e-08, "loss": 0.0023, "step": 117070 }, { "epoch": 1.915732635195942, "grad_norm": 0.059280503541231155, "learning_rate": 5.397971435397831e-08, "loss": 0.0016, "step": 117080 }, { "epoch": 1.9158962611470178, "grad_norm": 0.04656801000237465, "learning_rate": 5.377066409758602e-08, "loss": 0.0014, "step": 117090 }, { "epoch": 1.9160598870980938, "grad_norm": 0.006370526272803545, "learning_rate": 5.3562017238830147e-08, "loss": 0.0008, "step": 117100 }, { "epoch": 1.9162235130491696, "grad_norm": 0.04435443505644798, "learning_rate": 5.335377379472706e-08, "loss": 0.0009, "step": 117110 }, { "epoch": 1.9163871390002454, "grad_norm": 0.03789312765002251, "learning_rate": 5.314593378225985e-08, "loss": 0.0006, "step": 117120 }, { "epoch": 1.9165507649513214, "grad_norm": 0.04919374734163284, "learning_rate": 5.293849721837996e-08, "loss": 0.0005, "step": 117130 }, { "epoch": 1.9167143909023971, "grad_norm": 0.0709136500954628, "learning_rate": 5.273146412000496e-08, "loss": 0.0007, "step": 117140 }, { "epoch": 1.916878016853473, "grad_norm": 0.04750726372003555, "learning_rate": 5.252483450401913e-08, "loss": 0.0004, "step": 117150 }, { "epoch": 1.917041642804549, "grad_norm": 0.039837710559368134, "learning_rate": 5.231860838727565e-08, "loss": 0.0006, "step": 117160 }, { "epoch": 1.9172052687556247, "grad_norm": 0.06660003960132599, "learning_rate": 5.211278578659273e-08, "loss": 0.0012, "step": 117170 }, { "epoch": 1.9173688947067005, "grad_norm": 0.051756616681814194, "learning_rate": 5.1907366718756955e-08, "loss": 0.0005, "step": 117180 }, { "epoch": 1.9175325206577765, "grad_norm": 0.011971354484558105, "learning_rate": 5.1702351200521026e-08, "loss": 0.0009, "step": 117190 }, { "epoch": 1.917696146608852, "grad_norm": 0.029503384605050087, "learning_rate": 5.149773924860602e-08, "loss": 0.0007, "step": 117200 }, { "epoch": 1.917859772559928, "grad_norm": 0.02789662778377533, "learning_rate": 5.129353087969913e-08, "loss": 0.0009, "step": 117210 }, { "epoch": 1.918023398511004, "grad_norm": 0.00774666341021657, "learning_rate": 5.1089726110455374e-08, "loss": 0.0005, "step": 117220 }, { "epoch": 1.9181870244620796, "grad_norm": 0.05077791213989258, "learning_rate": 5.088632495749535e-08, "loss": 0.0007, "step": 117230 }, { "epoch": 1.9183506504131556, "grad_norm": 0.058956168591976166, "learning_rate": 5.068332743740856e-08, "loss": 0.0009, "step": 117240 }, { "epoch": 1.9185142763642313, "grad_norm": 0.07228808104991913, "learning_rate": 5.0480733566750094e-08, "loss": 0.0014, "step": 117250 }, { "epoch": 1.918677902315307, "grad_norm": 0.03618868812918663, "learning_rate": 5.0278543362043964e-08, "loss": 0.0006, "step": 117260 }, { "epoch": 1.918841528266383, "grad_norm": 0.0344681590795517, "learning_rate": 5.0076756839779196e-08, "loss": 0.0007, "step": 117270 }, { "epoch": 1.9190051542174589, "grad_norm": 0.0023111009504646063, "learning_rate": 4.987537401641318e-08, "loss": 0.0012, "step": 117280 }, { "epoch": 1.9191687801685346, "grad_norm": 0.07324729859828949, "learning_rate": 4.967439490837e-08, "loss": 0.0013, "step": 117290 }, { "epoch": 1.9193324061196106, "grad_norm": 0.027084648609161377, "learning_rate": 4.947381953203989e-08, "loss": 0.0005, "step": 117300 }, { "epoch": 1.9194960320706864, "grad_norm": 0.054264459758996964, "learning_rate": 4.927364790378253e-08, "loss": 0.0007, "step": 117310 }, { "epoch": 1.9196596580217622, "grad_norm": 0.015776021406054497, "learning_rate": 4.9073880039922636e-08, "loss": 0.0009, "step": 117320 }, { "epoch": 1.9198232839728382, "grad_norm": 0.009941987693309784, "learning_rate": 4.8874515956752745e-08, "loss": 0.0008, "step": 117330 }, { "epoch": 1.919986909923914, "grad_norm": 0.04222123697400093, "learning_rate": 4.86755556705315e-08, "loss": 0.0013, "step": 117340 }, { "epoch": 1.9201505358749897, "grad_norm": 0.005467007867991924, "learning_rate": 4.8476999197486494e-08, "loss": 0.0004, "step": 117350 }, { "epoch": 1.9203141618260657, "grad_norm": 0.014334475621581078, "learning_rate": 4.8278846553810876e-08, "loss": 0.0004, "step": 117360 }, { "epoch": 1.9204777877771415, "grad_norm": 0.005753939971327782, "learning_rate": 4.808109775566561e-08, "loss": 0.001, "step": 117370 }, { "epoch": 1.9206414137282173, "grad_norm": 0.2799023687839508, "learning_rate": 4.7883752819177787e-08, "loss": 0.0053, "step": 117380 }, { "epoch": 1.9208050396792933, "grad_norm": 0.014019107446074486, "learning_rate": 4.768681176044232e-08, "loss": 0.0003, "step": 117390 }, { "epoch": 1.9209686656303688, "grad_norm": 0.028729360550642014, "learning_rate": 4.7490274595521355e-08, "loss": 0.0008, "step": 117400 }, { "epoch": 1.9211322915814448, "grad_norm": 0.08716363459825516, "learning_rate": 4.72941413404443e-08, "loss": 0.001, "step": 117410 }, { "epoch": 1.9212959175325206, "grad_norm": 0.042327962815761566, "learning_rate": 4.709841201120557e-08, "loss": 0.0009, "step": 117420 }, { "epoch": 1.9214595434835964, "grad_norm": 0.05567514896392822, "learning_rate": 4.690308662377019e-08, "loss": 0.0004, "step": 117430 }, { "epoch": 1.9216231694346724, "grad_norm": 0.0731797143816948, "learning_rate": 4.670816519406651e-08, "loss": 0.0009, "step": 117440 }, { "epoch": 1.9217867953857481, "grad_norm": 0.09268610179424286, "learning_rate": 4.651364773799238e-08, "loss": 0.0015, "step": 117450 }, { "epoch": 1.921950421336824, "grad_norm": 0.15806390345096588, "learning_rate": 4.631953427141178e-08, "loss": 0.0011, "step": 117460 }, { "epoch": 1.9221140472879, "grad_norm": 0.0193981621414423, "learning_rate": 4.6125824810156484e-08, "loss": 0.0008, "step": 117470 }, { "epoch": 1.9222776732389757, "grad_norm": 0.0775398313999176, "learning_rate": 4.5932519370023855e-08, "loss": 0.0009, "step": 117480 }, { "epoch": 1.9224412991900515, "grad_norm": 0.05478496849536896, "learning_rate": 4.573961796678017e-08, "loss": 0.0008, "step": 117490 }, { "epoch": 1.9226049251411275, "grad_norm": 0.010411275550723076, "learning_rate": 4.554712061615729e-08, "loss": 0.001, "step": 117500 }, { "epoch": 1.9227685510922032, "grad_norm": 0.06554795056581497, "learning_rate": 4.535502733385488e-08, "loss": 0.0009, "step": 117510 }, { "epoch": 1.922932177043279, "grad_norm": 0.0036133560352027416, "learning_rate": 4.5163338135538745e-08, "loss": 0.0005, "step": 117520 }, { "epoch": 1.923095802994355, "grad_norm": 0.06528127193450928, "learning_rate": 4.49720530368436e-08, "loss": 0.0007, "step": 117530 }, { "epoch": 1.9232594289454308, "grad_norm": 0.003624382195994258, "learning_rate": 4.478117205336918e-08, "loss": 0.0004, "step": 117540 }, { "epoch": 1.9234230548965066, "grad_norm": 0.0884091779589653, "learning_rate": 4.459069520068304e-08, "loss": 0.0013, "step": 117550 }, { "epoch": 1.9235866808475826, "grad_norm": 0.044825129210948944, "learning_rate": 4.440062249431998e-08, "loss": 0.001, "step": 117560 }, { "epoch": 1.923750306798658, "grad_norm": 0.04207734391093254, "learning_rate": 4.421095394978259e-08, "loss": 0.0008, "step": 117570 }, { "epoch": 1.923913932749734, "grad_norm": 0.024763138964772224, "learning_rate": 4.402168958253794e-08, "loss": 0.001, "step": 117580 }, { "epoch": 1.92407755870081, "grad_norm": 0.002714958507567644, "learning_rate": 4.383282940802258e-08, "loss": 0.0008, "step": 117590 }, { "epoch": 1.9242411846518856, "grad_norm": 0.024393411353230476, "learning_rate": 4.364437344163974e-08, "loss": 0.0006, "step": 117600 }, { "epoch": 1.9244048106029616, "grad_norm": 0.0037096389569342136, "learning_rate": 4.3456321698758244e-08, "loss": 0.0007, "step": 117610 }, { "epoch": 1.9245684365540374, "grad_norm": 0.004397556185722351, "learning_rate": 4.326867419471637e-08, "loss": 0.0015, "step": 117620 }, { "epoch": 1.9247320625051132, "grad_norm": 0.042645443230867386, "learning_rate": 4.3081430944816315e-08, "loss": 0.0017, "step": 117630 }, { "epoch": 1.9248956884561892, "grad_norm": 0.06267805397510529, "learning_rate": 4.289459196433032e-08, "loss": 0.0008, "step": 117640 }, { "epoch": 1.925059314407265, "grad_norm": 0.09096043556928635, "learning_rate": 4.270815726849564e-08, "loss": 0.0007, "step": 117650 }, { "epoch": 1.9252229403583407, "grad_norm": 0.024136090651154518, "learning_rate": 4.252212687251789e-08, "loss": 0.0006, "step": 117660 }, { "epoch": 1.9253865663094167, "grad_norm": 0.011028232052922249, "learning_rate": 4.233650079156826e-08, "loss": 0.001, "step": 117670 }, { "epoch": 1.9255501922604925, "grad_norm": 0.08266288787126541, "learning_rate": 4.215127904078631e-08, "loss": 0.0011, "step": 117680 }, { "epoch": 1.9257138182115683, "grad_norm": 0.01891559734940529, "learning_rate": 4.196646163527773e-08, "loss": 0.0007, "step": 117690 }, { "epoch": 1.9258774441626443, "grad_norm": 0.03202177584171295, "learning_rate": 4.1782048590116583e-08, "loss": 0.0005, "step": 117700 }, { "epoch": 1.92604107011372, "grad_norm": 0.007185592316091061, "learning_rate": 4.1598039920341394e-08, "loss": 0.0007, "step": 117710 }, { "epoch": 1.9262046960647958, "grad_norm": 0.015683166682720184, "learning_rate": 4.14144356409607e-08, "loss": 0.0026, "step": 117720 }, { "epoch": 1.9263683220158718, "grad_norm": 0.014360490255057812, "learning_rate": 4.123123576694754e-08, "loss": 0.001, "step": 117730 }, { "epoch": 1.9265319479669476, "grad_norm": 0.0372314453125, "learning_rate": 4.10484403132444e-08, "loss": 0.0008, "step": 117740 }, { "epoch": 1.9266955739180234, "grad_norm": 0.07619918882846832, "learning_rate": 4.086604929475824e-08, "loss": 0.002, "step": 117750 }, { "epoch": 1.9268591998690994, "grad_norm": 0.05482002720236778, "learning_rate": 4.0684062726364384e-08, "loss": 0.0008, "step": 117760 }, { "epoch": 1.927022825820175, "grad_norm": 0.07788547873497009, "learning_rate": 4.0502480622905404e-08, "loss": 0.0005, "step": 117770 }, { "epoch": 1.927186451771251, "grad_norm": 0.032083842903375626, "learning_rate": 4.0321302999191125e-08, "loss": 0.0008, "step": 117780 }, { "epoch": 1.927350077722327, "grad_norm": 0.04234011843800545, "learning_rate": 4.0140529869996395e-08, "loss": 0.0011, "step": 117790 }, { "epoch": 1.9275137036734025, "grad_norm": 0.04810163006186485, "learning_rate": 3.9960161250065524e-08, "loss": 0.001, "step": 117800 }, { "epoch": 1.9276773296244785, "grad_norm": 0.01350861880928278, "learning_rate": 3.978019715410786e-08, "loss": 0.0008, "step": 117810 }, { "epoch": 1.9278409555755542, "grad_norm": 0.03149794042110443, "learning_rate": 3.960063759680166e-08, "loss": 0.001, "step": 117820 }, { "epoch": 1.92800458152663, "grad_norm": 0.0029188902117311954, "learning_rate": 3.942148259279022e-08, "loss": 0.0004, "step": 117830 }, { "epoch": 1.928168207477706, "grad_norm": 0.09763001650571823, "learning_rate": 3.924273215668628e-08, "loss": 0.0011, "step": 117840 }, { "epoch": 1.9283318334287818, "grad_norm": 0.08357292413711548, "learning_rate": 3.906438630306597e-08, "loss": 0.0011, "step": 117850 }, { "epoch": 1.9284954593798576, "grad_norm": 0.1580624133348465, "learning_rate": 3.888644504647654e-08, "loss": 0.001, "step": 117860 }, { "epoch": 1.9286590853309336, "grad_norm": 0.033751845359802246, "learning_rate": 3.870890840142916e-08, "loss": 0.0003, "step": 117870 }, { "epoch": 1.9288227112820093, "grad_norm": 0.029373381286859512, "learning_rate": 3.853177638240391e-08, "loss": 0.001, "step": 117880 }, { "epoch": 1.928986337233085, "grad_norm": 0.04505932703614235, "learning_rate": 3.83550490038459e-08, "loss": 0.0011, "step": 117890 }, { "epoch": 1.929149963184161, "grad_norm": 0.04801009222865105, "learning_rate": 3.817872628016972e-08, "loss": 0.0013, "step": 117900 }, { "epoch": 1.9293135891352369, "grad_norm": 0.0026336137671023607, "learning_rate": 3.800280822575497e-08, "loss": 0.0002, "step": 117910 }, { "epoch": 1.9294772150863126, "grad_norm": 0.028031054884195328, "learning_rate": 3.7827294854948516e-08, "loss": 0.0007, "step": 117920 }, { "epoch": 1.9296408410373886, "grad_norm": 0.05538899824023247, "learning_rate": 3.765218618206556e-08, "loss": 0.0005, "step": 117930 }, { "epoch": 1.9298044669884644, "grad_norm": 0.009470922872424126, "learning_rate": 3.747748222138692e-08, "loss": 0.0007, "step": 117940 }, { "epoch": 1.9299680929395402, "grad_norm": 0.06905197352170944, "learning_rate": 3.7303182987160624e-08, "loss": 0.0016, "step": 117950 }, { "epoch": 1.9301317188906162, "grad_norm": 0.05365915223956108, "learning_rate": 3.7129288493602534e-08, "loss": 0.0011, "step": 117960 }, { "epoch": 1.9302953448416917, "grad_norm": 0.09884864091873169, "learning_rate": 3.695579875489408e-08, "loss": 0.0008, "step": 117970 }, { "epoch": 1.9304589707927677, "grad_norm": 0.12790483236312866, "learning_rate": 3.6782713785185054e-08, "loss": 0.0006, "step": 117980 }, { "epoch": 1.9306225967438437, "grad_norm": 0.04073750972747803, "learning_rate": 3.6610033598591946e-08, "loss": 0.0007, "step": 117990 }, { "epoch": 1.9307862226949193, "grad_norm": 0.284094899892807, "learning_rate": 3.643775820919737e-08, "loss": 0.0009, "step": 118000 }, { "epoch": 1.9309498486459953, "grad_norm": 0.02556479349732399, "learning_rate": 3.6265887631051764e-08, "loss": 0.0012, "step": 118010 }, { "epoch": 1.931113474597071, "grad_norm": 0.1338217407464981, "learning_rate": 3.6094421878172245e-08, "loss": 0.0008, "step": 118020 }, { "epoch": 1.9312771005481468, "grad_norm": 0.02789692021906376, "learning_rate": 3.5923360964543184e-08, "loss": 0.001, "step": 118030 }, { "epoch": 1.9314407264992228, "grad_norm": 0.033490389585494995, "learning_rate": 3.575270490411509e-08, "loss": 0.0009, "step": 118040 }, { "epoch": 1.9316043524502986, "grad_norm": 0.005060158669948578, "learning_rate": 3.558245371080682e-08, "loss": 0.0005, "step": 118050 }, { "epoch": 1.9317679784013744, "grad_norm": 0.0402166023850441, "learning_rate": 3.5412607398503384e-08, "loss": 0.0006, "step": 118060 }, { "epoch": 1.9319316043524504, "grad_norm": 0.04928470030426979, "learning_rate": 3.524316598105648e-08, "loss": 0.0007, "step": 118070 }, { "epoch": 1.9320952303035261, "grad_norm": 0.044275086373090744, "learning_rate": 3.5074129472285056e-08, "loss": 0.0011, "step": 118080 }, { "epoch": 1.932258856254602, "grad_norm": 0.024314627051353455, "learning_rate": 3.4905497885976415e-08, "loss": 0.0013, "step": 118090 }, { "epoch": 1.932422482205678, "grad_norm": 0.052798792719841, "learning_rate": 3.4737271235881774e-08, "loss": 0.0035, "step": 118100 }, { "epoch": 1.9325861081567537, "grad_norm": 0.019403163343667984, "learning_rate": 3.456944953572239e-08, "loss": 0.0018, "step": 118110 }, { "epoch": 1.9327497341078295, "grad_norm": 0.034056615084409714, "learning_rate": 3.440203279918508e-08, "loss": 0.0005, "step": 118120 }, { "epoch": 1.9329133600589055, "grad_norm": 0.006692399736493826, "learning_rate": 3.4235021039923375e-08, "loss": 0.0005, "step": 118130 }, { "epoch": 1.9330769860099812, "grad_norm": 0.053489863872528076, "learning_rate": 3.4068414271558046e-08, "loss": 0.001, "step": 118140 }, { "epoch": 1.933240611961057, "grad_norm": 0.04069867357611656, "learning_rate": 3.390221250767767e-08, "loss": 0.0014, "step": 118150 }, { "epoch": 1.933404237912133, "grad_norm": 0.06978915631771088, "learning_rate": 3.3736415761836397e-08, "loss": 0.001, "step": 118160 }, { "epoch": 1.9335678638632086, "grad_norm": 0.014980925247073174, "learning_rate": 3.357102404755674e-08, "loss": 0.0008, "step": 118170 }, { "epoch": 1.9337314898142846, "grad_norm": 0.038956403732299805, "learning_rate": 3.340603737832682e-08, "loss": 0.001, "step": 118180 }, { "epoch": 1.9338951157653606, "grad_norm": 0.036051664501428604, "learning_rate": 3.324145576760307e-08, "loss": 0.0009, "step": 118190 }, { "epoch": 1.934058741716436, "grad_norm": 0.05190818011760712, "learning_rate": 3.307727922880699e-08, "loss": 0.0004, "step": 118200 }, { "epoch": 1.934222367667512, "grad_norm": 0.009585447609424591, "learning_rate": 3.291350777533009e-08, "loss": 0.0006, "step": 118210 }, { "epoch": 1.9343859936185879, "grad_norm": 0.04707228019833565, "learning_rate": 3.2750141420527236e-08, "loss": 0.001, "step": 118220 }, { "epoch": 1.9345496195696636, "grad_norm": 0.03924688324332237, "learning_rate": 3.258718017772333e-08, "loss": 0.0005, "step": 118230 }, { "epoch": 1.9347132455207396, "grad_norm": 0.0471479557454586, "learning_rate": 3.24246240602083e-08, "loss": 0.0004, "step": 118240 }, { "epoch": 1.9348768714718154, "grad_norm": 0.06175505369901657, "learning_rate": 3.226247308123931e-08, "loss": 0.001, "step": 118250 }, { "epoch": 1.9350404974228912, "grad_norm": 0.010532890446484089, "learning_rate": 3.2100727254041896e-08, "loss": 0.001, "step": 118260 }, { "epoch": 1.9352041233739672, "grad_norm": 0.031367652118206024, "learning_rate": 3.1939386591806624e-08, "loss": 0.001, "step": 118270 }, { "epoch": 1.935367749325043, "grad_norm": 0.062123656272888184, "learning_rate": 3.17784511076924e-08, "loss": 0.0006, "step": 118280 }, { "epoch": 1.9355313752761187, "grad_norm": 0.048285409808158875, "learning_rate": 3.161792081482429e-08, "loss": 0.0021, "step": 118290 }, { "epoch": 1.9356950012271947, "grad_norm": 0.028201134875416756, "learning_rate": 3.1457795726294595e-08, "loss": 0.0013, "step": 118300 }, { "epoch": 1.9358586271782705, "grad_norm": 0.002594185061752796, "learning_rate": 3.1298075855162314e-08, "loss": 0.0006, "step": 118310 }, { "epoch": 1.9360222531293463, "grad_norm": 0.016251588240265846, "learning_rate": 3.113876121445425e-08, "loss": 0.0012, "step": 118320 }, { "epoch": 1.9361858790804223, "grad_norm": 0.147062286734581, "learning_rate": 3.097985181716334e-08, "loss": 0.0011, "step": 118330 }, { "epoch": 1.9363495050314978, "grad_norm": 0.011207268573343754, "learning_rate": 3.082134767624978e-08, "loss": 0.0019, "step": 118340 }, { "epoch": 1.9365131309825738, "grad_norm": 0.02507847547531128, "learning_rate": 3.0663248804640445e-08, "loss": 0.0008, "step": 118350 }, { "epoch": 1.9366767569336498, "grad_norm": 0.024048246443271637, "learning_rate": 3.0505555215229466e-08, "loss": 0.0009, "step": 118360 }, { "epoch": 1.9368403828847254, "grad_norm": 0.004993805196136236, "learning_rate": 3.034826692087767e-08, "loss": 0.0009, "step": 118370 }, { "epoch": 1.9370040088358014, "grad_norm": 0.03710927441716194, "learning_rate": 3.0191383934412563e-08, "loss": 0.0008, "step": 118380 }, { "epoch": 1.9371676347868771, "grad_norm": 0.05473821610212326, "learning_rate": 3.0034906268630034e-08, "loss": 0.0008, "step": 118390 }, { "epoch": 1.937331260737953, "grad_norm": 0.018258212134242058, "learning_rate": 2.9878833936290985e-08, "loss": 0.0005, "step": 118400 }, { "epoch": 1.937494886689029, "grad_norm": 0.03875812888145447, "learning_rate": 2.9723166950124117e-08, "loss": 0.0011, "step": 118410 }, { "epoch": 1.9376585126401047, "grad_norm": 0.014572301879525185, "learning_rate": 2.956790532282594e-08, "loss": 0.0006, "step": 118420 }, { "epoch": 1.9378221385911805, "grad_norm": 0.0769246518611908, "learning_rate": 2.9413049067058552e-08, "loss": 0.0012, "step": 118430 }, { "epoch": 1.9379857645422565, "grad_norm": 0.045898791402578354, "learning_rate": 2.9258598195451293e-08, "loss": 0.0014, "step": 118440 }, { "epoch": 1.9381493904933322, "grad_norm": 0.034800853580236435, "learning_rate": 2.9104552720600754e-08, "loss": 0.0021, "step": 118450 }, { "epoch": 1.938313016444408, "grad_norm": 0.05612555146217346, "learning_rate": 2.895091265507022e-08, "loss": 0.0008, "step": 118460 }, { "epoch": 1.938476642395484, "grad_norm": 0.02328142151236534, "learning_rate": 2.8797678011390774e-08, "loss": 0.0009, "step": 118470 }, { "epoch": 1.9386402683465598, "grad_norm": 0.1035553514957428, "learning_rate": 2.8644848802059088e-08, "loss": 0.0009, "step": 118480 }, { "epoch": 1.9388038942976356, "grad_norm": 0.11448800563812256, "learning_rate": 2.8492425039539085e-08, "loss": 0.0007, "step": 118490 }, { "epoch": 1.9389675202487116, "grad_norm": 0.040437307208776474, "learning_rate": 2.834040673626304e-08, "loss": 0.0009, "step": 118500 }, { "epoch": 1.9391311461997873, "grad_norm": 0.013946594670414925, "learning_rate": 2.81887939046277e-08, "loss": 0.001, "step": 118510 }, { "epoch": 1.939294772150863, "grad_norm": 0.0034135030582547188, "learning_rate": 2.8037586556999287e-08, "loss": 0.0008, "step": 118520 }, { "epoch": 1.939458398101939, "grad_norm": 0.0016365470364689827, "learning_rate": 2.7886784705709048e-08, "loss": 0.0008, "step": 118530 }, { "epoch": 1.9396220240530146, "grad_norm": 0.029756847769021988, "learning_rate": 2.7736388363056588e-08, "loss": 0.0005, "step": 118540 }, { "epoch": 1.9397856500040906, "grad_norm": 0.04337065666913986, "learning_rate": 2.7586397541306543e-08, "loss": 0.0008, "step": 118550 }, { "epoch": 1.9399492759551666, "grad_norm": 0.024112407118082047, "learning_rate": 2.7436812252692456e-08, "loss": 0.0014, "step": 118560 }, { "epoch": 1.9401129019062422, "grad_norm": 0.004146270453929901, "learning_rate": 2.7287632509413464e-08, "loss": 0.0009, "step": 118570 }, { "epoch": 1.9402765278573182, "grad_norm": 0.10304737091064453, "learning_rate": 2.7138858323637052e-08, "loss": 0.0006, "step": 118580 }, { "epoch": 1.940440153808394, "grad_norm": 0.024668464437127113, "learning_rate": 2.6990489707496292e-08, "loss": 0.0006, "step": 118590 }, { "epoch": 1.9406037797594697, "grad_norm": 0.08798343688249588, "learning_rate": 2.6842526673091508e-08, "loss": 0.0007, "step": 118600 }, { "epoch": 1.9407674057105457, "grad_norm": 0.013673658482730389, "learning_rate": 2.6694969232489708e-08, "loss": 0.0021, "step": 118610 }, { "epoch": 1.9409310316616215, "grad_norm": 0.017358314245939255, "learning_rate": 2.654781739772572e-08, "loss": 0.0006, "step": 118620 }, { "epoch": 1.9410946576126973, "grad_norm": 0.08628585934638977, "learning_rate": 2.6401071180800485e-08, "loss": 0.0012, "step": 118630 }, { "epoch": 1.9412582835637733, "grad_norm": 0.045581232756376266, "learning_rate": 2.6254730593682775e-08, "loss": 0.0012, "step": 118640 }, { "epoch": 1.941421909514849, "grad_norm": 0.013516368344426155, "learning_rate": 2.6108795648306372e-08, "loss": 0.0009, "step": 118650 }, { "epoch": 1.9415855354659248, "grad_norm": 0.05546526610851288, "learning_rate": 2.5963266356574534e-08, "loss": 0.0011, "step": 118660 }, { "epoch": 1.9417491614170008, "grad_norm": 0.07094526290893555, "learning_rate": 2.5818142730355544e-08, "loss": 0.0012, "step": 118670 }, { "epoch": 1.9419127873680766, "grad_norm": 0.038748107850551605, "learning_rate": 2.5673424781484936e-08, "loss": 0.001, "step": 118680 }, { "epoch": 1.9420764133191524, "grad_norm": 0.02977623976767063, "learning_rate": 2.552911252176604e-08, "loss": 0.0006, "step": 118690 }, { "epoch": 1.9422400392702284, "grad_norm": 0.05881514400243759, "learning_rate": 2.5385205962967784e-08, "loss": 0.0014, "step": 118700 }, { "epoch": 1.9424036652213041, "grad_norm": 0.03750619292259216, "learning_rate": 2.5241705116826887e-08, "loss": 0.0013, "step": 118710 }, { "epoch": 1.94256729117238, "grad_norm": 0.02984338253736496, "learning_rate": 2.509860999504732e-08, "loss": 0.0008, "step": 118720 }, { "epoch": 1.942730917123456, "grad_norm": 0.014807168394327164, "learning_rate": 2.4955920609298635e-08, "loss": 0.0004, "step": 118730 }, { "epoch": 1.9428945430745315, "grad_norm": 0.05217082053422928, "learning_rate": 2.4813636971218747e-08, "loss": 0.0006, "step": 118740 }, { "epoch": 1.9430581690256075, "grad_norm": 0.033538561314344406, "learning_rate": 2.4671759092411708e-08, "loss": 0.0006, "step": 118750 }, { "epoch": 1.9432217949766835, "grad_norm": 0.057510845363140106, "learning_rate": 2.453028698444826e-08, "loss": 0.0007, "step": 118760 }, { "epoch": 1.943385420927759, "grad_norm": 0.03222835808992386, "learning_rate": 2.4389220658866952e-08, "loss": 0.0009, "step": 118770 }, { "epoch": 1.943549046878835, "grad_norm": 0.00914396345615387, "learning_rate": 2.4248560127171915e-08, "loss": 0.0008, "step": 118780 }, { "epoch": 1.9437126728299108, "grad_norm": 0.017285292968153954, "learning_rate": 2.4108305400835086e-08, "loss": 0.0005, "step": 118790 }, { "epoch": 1.9438762987809866, "grad_norm": 0.04153028875589371, "learning_rate": 2.3968456491295643e-08, "loss": 0.0006, "step": 118800 }, { "epoch": 1.9440399247320626, "grad_norm": 0.0309784933924675, "learning_rate": 2.382901340995947e-08, "loss": 0.0007, "step": 118810 }, { "epoch": 1.9442035506831383, "grad_norm": 0.04131908714771271, "learning_rate": 2.3689976168198014e-08, "loss": 0.0006, "step": 118820 }, { "epoch": 1.944367176634214, "grad_norm": 0.005361310672014952, "learning_rate": 2.355134477735166e-08, "loss": 0.0008, "step": 118830 }, { "epoch": 1.94453080258529, "grad_norm": 0.06579025089740753, "learning_rate": 2.3413119248725803e-08, "loss": 0.0009, "step": 118840 }, { "epoch": 1.9446944285363659, "grad_norm": 0.004515460692346096, "learning_rate": 2.3275299593594758e-08, "loss": 0.0005, "step": 118850 }, { "epoch": 1.9448580544874416, "grad_norm": 0.011067098937928677, "learning_rate": 2.3137885823197316e-08, "loss": 0.001, "step": 118860 }, { "epoch": 1.9450216804385176, "grad_norm": 0.03738608956336975, "learning_rate": 2.3000877948741728e-08, "loss": 0.0017, "step": 118870 }, { "epoch": 1.9451853063895934, "grad_norm": 0.1172296404838562, "learning_rate": 2.286427598140073e-08, "loss": 0.0018, "step": 118880 }, { "epoch": 1.9453489323406692, "grad_norm": 0.013883393257856369, "learning_rate": 2.2728079932316515e-08, "loss": 0.0006, "step": 118890 }, { "epoch": 1.9455125582917452, "grad_norm": 0.020110009238123894, "learning_rate": 2.2592289812595202e-08, "loss": 0.001, "step": 118900 }, { "epoch": 1.945676184242821, "grad_norm": 0.12202437967061996, "learning_rate": 2.2456905633312377e-08, "loss": 0.001, "step": 118910 }, { "epoch": 1.9458398101938967, "grad_norm": 0.0923973098397255, "learning_rate": 2.23219274055092e-08, "loss": 0.0013, "step": 118920 }, { "epoch": 1.9460034361449727, "grad_norm": 0.05844296142458916, "learning_rate": 2.21873551401941e-08, "loss": 0.0009, "step": 118930 }, { "epoch": 1.9461670620960483, "grad_norm": 0.3888278305530548, "learning_rate": 2.205318884834273e-08, "loss": 0.001, "step": 118940 }, { "epoch": 1.9463306880471243, "grad_norm": 0.04085360839962959, "learning_rate": 2.1919428540896347e-08, "loss": 0.0007, "step": 118950 }, { "epoch": 1.9464943139982003, "grad_norm": 0.0014444985426962376, "learning_rate": 2.1786074228764552e-08, "loss": 0.0007, "step": 118960 }, { "epoch": 1.9466579399492758, "grad_norm": 0.042099274694919586, "learning_rate": 2.1653125922823648e-08, "loss": 0.0008, "step": 118970 }, { "epoch": 1.9468215659003518, "grad_norm": 0.05133488029241562, "learning_rate": 2.1520583633915514e-08, "loss": 0.0006, "step": 118980 }, { "epoch": 1.9469851918514276, "grad_norm": 0.026576509699225426, "learning_rate": 2.1388447372850397e-08, "loss": 0.0008, "step": 118990 }, { "epoch": 1.9471488178025034, "grad_norm": 0.0730036050081253, "learning_rate": 2.125671715040467e-08, "loss": 0.0011, "step": 119000 }, { "epoch": 1.9473124437535794, "grad_norm": 0.09758596867322922, "learning_rate": 2.1125392977322524e-08, "loss": 0.0027, "step": 119010 }, { "epoch": 1.9474760697046551, "grad_norm": 0.03471507132053375, "learning_rate": 2.0994474864313718e-08, "loss": 0.0004, "step": 119020 }, { "epoch": 1.947639695655731, "grad_norm": 0.01150267943739891, "learning_rate": 2.086396282205472e-08, "loss": 0.0003, "step": 119030 }, { "epoch": 1.947803321606807, "grad_norm": 0.0543401874601841, "learning_rate": 2.0733856861190894e-08, "loss": 0.0018, "step": 119040 }, { "epoch": 1.9479669475578827, "grad_norm": 0.02571258693933487, "learning_rate": 2.0604156992332645e-08, "loss": 0.0009, "step": 119050 }, { "epoch": 1.9481305735089585, "grad_norm": 0.026605796068906784, "learning_rate": 2.0474863226058184e-08, "loss": 0.0006, "step": 119060 }, { "epoch": 1.9482941994600345, "grad_norm": 0.06606029719114304, "learning_rate": 2.0345975572911848e-08, "loss": 0.0009, "step": 119070 }, { "epoch": 1.9484578254111102, "grad_norm": 0.01350910123437643, "learning_rate": 2.0217494043405783e-08, "loss": 0.001, "step": 119080 }, { "epoch": 1.948621451362186, "grad_norm": 0.0011455549392849207, "learning_rate": 2.0089418648018278e-08, "loss": 0.0007, "step": 119090 }, { "epoch": 1.948785077313262, "grad_norm": 0.0849999263882637, "learning_rate": 1.996174939719431e-08, "loss": 0.0017, "step": 119100 }, { "epoch": 1.9489487032643378, "grad_norm": 0.01061960682272911, "learning_rate": 1.9834486301346655e-08, "loss": 0.0018, "step": 119110 }, { "epoch": 1.9491123292154136, "grad_norm": 0.07178749144077301, "learning_rate": 1.9707629370854243e-08, "loss": 0.0007, "step": 119120 }, { "epoch": 1.9492759551664895, "grad_norm": 0.02332564815878868, "learning_rate": 1.9581178616063235e-08, "loss": 0.0006, "step": 119130 }, { "epoch": 1.949439581117565, "grad_norm": 0.059939831495285034, "learning_rate": 1.9455134047286495e-08, "loss": 0.0012, "step": 119140 }, { "epoch": 1.949603207068641, "grad_norm": 0.009971341118216515, "learning_rate": 1.9329495674803577e-08, "loss": 0.0003, "step": 119150 }, { "epoch": 1.9497668330197169, "grad_norm": 0.024140900000929832, "learning_rate": 1.920426350886129e-08, "loss": 0.0019, "step": 119160 }, { "epoch": 1.9499304589707926, "grad_norm": 0.04765401780605316, "learning_rate": 1.9079437559673673e-08, "loss": 0.0008, "step": 119170 }, { "epoch": 1.9500940849218686, "grad_norm": 0.018852246925234795, "learning_rate": 1.895501783741982e-08, "loss": 0.0007, "step": 119180 }, { "epoch": 1.9502577108729444, "grad_norm": 0.026595139876008034, "learning_rate": 1.8831004352248273e-08, "loss": 0.001, "step": 119190 }, { "epoch": 1.9504213368240202, "grad_norm": 0.02010231651365757, "learning_rate": 1.8707397114272607e-08, "loss": 0.0021, "step": 119200 }, { "epoch": 1.9505849627750962, "grad_norm": 0.09063665568828583, "learning_rate": 1.8584196133573095e-08, "loss": 0.0008, "step": 119210 }, { "epoch": 1.950748588726172, "grad_norm": 0.04249636456370354, "learning_rate": 1.846140142019892e-08, "loss": 0.0008, "step": 119220 }, { "epoch": 1.9509122146772477, "grad_norm": 0.08367861807346344, "learning_rate": 1.8339012984164296e-08, "loss": 0.0021, "step": 119230 }, { "epoch": 1.9510758406283237, "grad_norm": 0.03771822154521942, "learning_rate": 1.8217030835450678e-08, "loss": 0.0016, "step": 119240 }, { "epoch": 1.9512394665793995, "grad_norm": 0.03222620114684105, "learning_rate": 1.8095454984006222e-08, "loss": 0.0008, "step": 119250 }, { "epoch": 1.9514030925304753, "grad_norm": 0.01875535026192665, "learning_rate": 1.797428543974633e-08, "loss": 0.0009, "step": 119260 }, { "epoch": 1.9515667184815513, "grad_norm": 0.021393340080976486, "learning_rate": 1.7853522212553652e-08, "loss": 0.001, "step": 119270 }, { "epoch": 1.951730344432627, "grad_norm": 0.09135446697473526, "learning_rate": 1.7733165312277533e-08, "loss": 0.0008, "step": 119280 }, { "epoch": 1.9518939703837028, "grad_norm": 0.004633698146790266, "learning_rate": 1.7613214748732344e-08, "loss": 0.0005, "step": 119290 }, { "epoch": 1.9520575963347788, "grad_norm": 0.029996832832694054, "learning_rate": 1.7493670531702478e-08, "loss": 0.0009, "step": 119300 }, { "epoch": 1.9522212222858544, "grad_norm": 0.05384202301502228, "learning_rate": 1.7374532670936804e-08, "loss": 0.0006, "step": 119310 }, { "epoch": 1.9523848482369304, "grad_norm": 0.023778783157467842, "learning_rate": 1.7255801176151998e-08, "loss": 0.0003, "step": 119320 }, { "epoch": 1.9525484741880064, "grad_norm": 0.05338103696703911, "learning_rate": 1.7137476057031423e-08, "loss": 0.0006, "step": 119330 }, { "epoch": 1.952712100139082, "grad_norm": 0.03128186613321304, "learning_rate": 1.7019557323224577e-08, "loss": 0.0032, "step": 119340 }, { "epoch": 1.952875726090158, "grad_norm": 0.07055889070034027, "learning_rate": 1.6902044984349332e-08, "loss": 0.0011, "step": 119350 }, { "epoch": 1.9530393520412337, "grad_norm": 0.03257028013467789, "learning_rate": 1.6784939049989123e-08, "loss": 0.0005, "step": 119360 }, { "epoch": 1.9532029779923095, "grad_norm": 0.0605674609541893, "learning_rate": 1.6668239529695208e-08, "loss": 0.0005, "step": 119370 }, { "epoch": 1.9533666039433855, "grad_norm": 0.06829767674207687, "learning_rate": 1.6551946432984966e-08, "loss": 0.0069, "step": 119380 }, { "epoch": 1.9535302298944612, "grad_norm": 0.037804704159498215, "learning_rate": 1.643605976934248e-08, "loss": 0.001, "step": 119390 }, { "epoch": 1.953693855845537, "grad_norm": 0.013051602058112621, "learning_rate": 1.6320579548219638e-08, "loss": 0.0006, "step": 119400 }, { "epoch": 1.953857481796613, "grad_norm": 0.0068069882690906525, "learning_rate": 1.620550577903446e-08, "loss": 0.0009, "step": 119410 }, { "epoch": 1.9540211077476888, "grad_norm": 0.006176062393933535, "learning_rate": 1.6090838471171655e-08, "loss": 0.0006, "step": 119420 }, { "epoch": 1.9541847336987646, "grad_norm": 0.0808325782418251, "learning_rate": 1.597657763398375e-08, "loss": 0.0015, "step": 119430 }, { "epoch": 1.9543483596498405, "grad_norm": 0.06635306775569916, "learning_rate": 1.586272327678884e-08, "loss": 0.0007, "step": 119440 }, { "epoch": 1.9545119856009163, "grad_norm": 0.054228853434324265, "learning_rate": 1.574927540887283e-08, "loss": 0.0007, "step": 119450 }, { "epoch": 1.954675611551992, "grad_norm": 0.07508311420679092, "learning_rate": 1.5636234039487773e-08, "loss": 0.0006, "step": 119460 }, { "epoch": 1.954839237503068, "grad_norm": 0.026145469397306442, "learning_rate": 1.552359917785351e-08, "loss": 0.0016, "step": 119470 }, { "epoch": 1.9550028634541439, "grad_norm": 0.004232988227158785, "learning_rate": 1.541137083315547e-08, "loss": 0.0008, "step": 119480 }, { "epoch": 1.9551664894052196, "grad_norm": 0.03720800578594208, "learning_rate": 1.5299549014546887e-08, "loss": 0.0005, "step": 119490 }, { "epoch": 1.9553301153562956, "grad_norm": 0.023317476734519005, "learning_rate": 1.5188133731148248e-08, "loss": 0.0012, "step": 119500 }, { "epoch": 1.9554937413073712, "grad_norm": 0.07464942336082458, "learning_rate": 1.5077124992045057e-08, "loss": 0.0009, "step": 119510 }, { "epoch": 1.9556573672584472, "grad_norm": 0.04751353710889816, "learning_rate": 1.496652280629174e-08, "loss": 0.0014, "step": 119520 }, { "epoch": 1.9558209932095232, "grad_norm": 0.011533886194229126, "learning_rate": 1.4856327182908304e-08, "loss": 0.0012, "step": 119530 }, { "epoch": 1.9559846191605987, "grad_norm": 0.04342764988541603, "learning_rate": 1.474653813088145e-08, "loss": 0.0009, "step": 119540 }, { "epoch": 1.9561482451116747, "grad_norm": 0.026417843997478485, "learning_rate": 1.4637155659166236e-08, "loss": 0.0013, "step": 119550 }, { "epoch": 1.9563118710627505, "grad_norm": 0.16043344140052795, "learning_rate": 1.4528179776682749e-08, "loss": 0.0009, "step": 119560 }, { "epoch": 1.9564754970138263, "grad_norm": 0.03958956152200699, "learning_rate": 1.4419610492318881e-08, "loss": 0.0011, "step": 119570 }, { "epoch": 1.9566391229649023, "grad_norm": 0.05950172618031502, "learning_rate": 1.4311447814928658e-08, "loss": 0.0004, "step": 119580 }, { "epoch": 1.956802748915978, "grad_norm": 0.043503303080797195, "learning_rate": 1.4203691753335025e-08, "loss": 0.0007, "step": 119590 }, { "epoch": 1.9569663748670538, "grad_norm": 0.1272813230752945, "learning_rate": 1.4096342316324285e-08, "loss": 0.0008, "step": 119600 }, { "epoch": 1.9571300008181298, "grad_norm": 0.07593804597854614, "learning_rate": 1.3989399512652768e-08, "loss": 0.0017, "step": 119610 }, { "epoch": 1.9572936267692056, "grad_norm": 0.04772377386689186, "learning_rate": 1.3882863351041276e-08, "loss": 0.0009, "step": 119620 }, { "epoch": 1.9574572527202814, "grad_norm": 0.0030029609333723783, "learning_rate": 1.377673384018008e-08, "loss": 0.0004, "step": 119630 }, { "epoch": 1.9576208786713574, "grad_norm": 0.03763430193066597, "learning_rate": 1.3671010988723365e-08, "loss": 0.0005, "step": 119640 }, { "epoch": 1.9577845046224331, "grad_norm": 0.04394448921084404, "learning_rate": 1.3565694805294238e-08, "loss": 0.0014, "step": 119650 }, { "epoch": 1.957948130573509, "grad_norm": 0.013911603949964046, "learning_rate": 1.3460785298481938e-08, "loss": 0.0008, "step": 119660 }, { "epoch": 1.958111756524585, "grad_norm": 0.00789094902575016, "learning_rate": 1.3356282476841843e-08, "loss": 0.0029, "step": 119670 }, { "epoch": 1.9582753824756607, "grad_norm": 0.0644741877913475, "learning_rate": 1.3252186348897688e-08, "loss": 0.0008, "step": 119680 }, { "epoch": 1.9584390084267365, "grad_norm": 0.01335859950631857, "learning_rate": 1.3148496923138799e-08, "loss": 0.0003, "step": 119690 }, { "epoch": 1.9586026343778125, "grad_norm": 0.05094321444630623, "learning_rate": 1.3045214208021739e-08, "loss": 0.0009, "step": 119700 }, { "epoch": 1.958766260328888, "grad_norm": 0.04956056922674179, "learning_rate": 1.2942338211969775e-08, "loss": 0.0009, "step": 119710 }, { "epoch": 1.958929886279964, "grad_norm": 0.04690057411789894, "learning_rate": 1.2839868943373413e-08, "loss": 0.0007, "step": 119720 }, { "epoch": 1.95909351223104, "grad_norm": 0.0027065288741141558, "learning_rate": 1.2737806410589304e-08, "loss": 0.0006, "step": 119730 }, { "epoch": 1.9592571381821156, "grad_norm": 0.021053733304142952, "learning_rate": 1.26361506219419e-08, "loss": 0.0006, "step": 119740 }, { "epoch": 1.9594207641331916, "grad_norm": 0.023141274228692055, "learning_rate": 1.2534901585721238e-08, "loss": 0.001, "step": 119750 }, { "epoch": 1.9595843900842673, "grad_norm": 0.09062420576810837, "learning_rate": 1.2434059310185708e-08, "loss": 0.0008, "step": 119760 }, { "epoch": 1.959748016035343, "grad_norm": 0.011048682034015656, "learning_rate": 1.2333623803558737e-08, "loss": 0.0004, "step": 119770 }, { "epoch": 1.959911641986419, "grad_norm": 0.01725071668624878, "learning_rate": 1.2233595074031545e-08, "loss": 0.0008, "step": 119780 }, { "epoch": 1.9600752679374949, "grad_norm": 0.026094339787960052, "learning_rate": 1.2133973129763165e-08, "loss": 0.0007, "step": 119790 }, { "epoch": 1.9602388938885706, "grad_norm": 0.021458825096488, "learning_rate": 1.2034757978877654e-08, "loss": 0.0009, "step": 119800 }, { "epoch": 1.9604025198396466, "grad_norm": 0.04918216913938522, "learning_rate": 1.1935949629466315e-08, "loss": 0.0011, "step": 119810 }, { "epoch": 1.9605661457907224, "grad_norm": 0.07723425328731537, "learning_rate": 1.1837548089588258e-08, "loss": 0.0006, "step": 119820 }, { "epoch": 1.9607297717417982, "grad_norm": 0.012126307003200054, "learning_rate": 1.173955336726873e-08, "loss": 0.0005, "step": 119830 }, { "epoch": 1.9608933976928742, "grad_norm": 0.13935650885105133, "learning_rate": 1.1641965470499672e-08, "loss": 0.0015, "step": 119840 }, { "epoch": 1.96105702364395, "grad_norm": 0.07405298203229904, "learning_rate": 1.1544784407240273e-08, "loss": 0.0004, "step": 119850 }, { "epoch": 1.9612206495950257, "grad_norm": 0.11607611924409866, "learning_rate": 1.1448010185415859e-08, "loss": 0.001, "step": 119860 }, { "epoch": 1.9613842755461017, "grad_norm": 0.008421391248703003, "learning_rate": 1.1351642812919006e-08, "loss": 0.0008, "step": 119870 }, { "epoch": 1.9615479014971775, "grad_norm": 0.00933240819722414, "learning_rate": 1.1255682297609539e-08, "loss": 0.0008, "step": 119880 }, { "epoch": 1.9617115274482533, "grad_norm": 0.060162998735904694, "learning_rate": 1.1160128647313417e-08, "loss": 0.0007, "step": 119890 }, { "epoch": 1.9618751533993293, "grad_norm": 0.02956484630703926, "learning_rate": 1.1064981869823855e-08, "loss": 0.0013, "step": 119900 }, { "epoch": 1.9620387793504048, "grad_norm": 0.07528740912675858, "learning_rate": 1.0970241972900198e-08, "loss": 0.001, "step": 119910 }, { "epoch": 1.9622024053014808, "grad_norm": 0.02967989258468151, "learning_rate": 1.08759089642696e-08, "loss": 0.0005, "step": 119920 }, { "epoch": 1.9623660312525568, "grad_norm": 0.11306829750537872, "learning_rate": 1.078198285162535e-08, "loss": 0.0004, "step": 119930 }, { "epoch": 1.9625296572036324, "grad_norm": 0.028859227895736694, "learning_rate": 1.0688463642627989e-08, "loss": 0.0013, "step": 119940 }, { "epoch": 1.9626932831547084, "grad_norm": 0.055813319981098175, "learning_rate": 1.0595351344904192e-08, "loss": 0.0013, "step": 119950 }, { "epoch": 1.9628569091057841, "grad_norm": 0.006267130374908447, "learning_rate": 1.050264596604844e-08, "loss": 0.0009, "step": 119960 }, { "epoch": 1.96302053505686, "grad_norm": 0.019412953406572342, "learning_rate": 1.04103475136208e-08, "loss": 0.0007, "step": 119970 }, { "epoch": 1.963184161007936, "grad_norm": 0.013167980127036572, "learning_rate": 1.0318455995149689e-08, "loss": 0.0008, "step": 119980 }, { "epoch": 1.9633477869590117, "grad_norm": 0.06747866421937943, "learning_rate": 1.0226971418128562e-08, "loss": 0.001, "step": 119990 }, { "epoch": 1.9635114129100875, "grad_norm": 0.013039028272032738, "learning_rate": 1.013589379001867e-08, "loss": 0.0009, "step": 120000 }, { "epoch": 1.9636750388611635, "grad_norm": 0.006112654227763414, "learning_rate": 1.0045223118248514e-08, "loss": 0.0004, "step": 120010 }, { "epoch": 1.9638386648122392, "grad_norm": 0.10876543074846268, "learning_rate": 9.954959410212738e-09, "loss": 0.0016, "step": 120020 }, { "epoch": 1.964002290763315, "grad_norm": 0.011344965547323227, "learning_rate": 9.865102673273231e-09, "loss": 0.0004, "step": 120030 }, { "epoch": 1.964165916714391, "grad_norm": 0.012625058181583881, "learning_rate": 9.77565291475746e-09, "loss": 0.0003, "step": 120040 }, { "epoch": 1.9643295426654668, "grad_norm": 0.044346991926431656, "learning_rate": 9.686610141961816e-09, "loss": 0.001, "step": 120050 }, { "epoch": 1.9644931686165426, "grad_norm": 0.0013447734527289867, "learning_rate": 9.597974362147156e-09, "loss": 0.0005, "step": 120060 }, { "epoch": 1.9646567945676185, "grad_norm": 0.019491171464323997, "learning_rate": 9.50974558254325e-09, "loss": 0.0007, "step": 120070 }, { "epoch": 1.964820420518694, "grad_norm": 0.014783798716962337, "learning_rate": 9.421923810345457e-09, "loss": 0.0008, "step": 120080 }, { "epoch": 1.96498404646977, "grad_norm": 0.01749807596206665, "learning_rate": 9.334509052715823e-09, "loss": 0.0019, "step": 120090 }, { "epoch": 1.965147672420846, "grad_norm": 0.21147726476192474, "learning_rate": 9.247501316784202e-09, "loss": 0.0006, "step": 120100 }, { "epoch": 1.9653112983719216, "grad_norm": 0.07809248566627502, "learning_rate": 9.16090060964603e-09, "loss": 0.0009, "step": 120110 }, { "epoch": 1.9654749243229976, "grad_norm": 0.19455690681934357, "learning_rate": 9.074706938364542e-09, "loss": 0.0012, "step": 120120 }, { "epoch": 1.9656385502740734, "grad_norm": 0.04191499948501587, "learning_rate": 8.988920309969673e-09, "loss": 0.0011, "step": 120130 }, { "epoch": 1.9658021762251492, "grad_norm": 0.12729717791080475, "learning_rate": 8.903540731457494e-09, "loss": 0.0008, "step": 120140 }, { "epoch": 1.9659658021762252, "grad_norm": 0.018768733367323875, "learning_rate": 8.818568209791323e-09, "loss": 0.0013, "step": 120150 }, { "epoch": 1.966129428127301, "grad_norm": 0.0029396044556051493, "learning_rate": 8.734002751901171e-09, "loss": 0.001, "step": 120160 }, { "epoch": 1.9662930540783767, "grad_norm": 0.06560855358839035, "learning_rate": 8.649844364684301e-09, "loss": 0.0007, "step": 120170 }, { "epoch": 1.9664566800294527, "grad_norm": 0.004699551500380039, "learning_rate": 8.566093055003555e-09, "loss": 0.0005, "step": 120180 }, { "epoch": 1.9666203059805285, "grad_norm": 0.026043064892292023, "learning_rate": 8.482748829690134e-09, "loss": 0.0004, "step": 120190 }, { "epoch": 1.9667839319316043, "grad_norm": 0.04580653831362724, "learning_rate": 8.399811695541383e-09, "loss": 0.0011, "step": 120200 }, { "epoch": 1.9669475578826803, "grad_norm": 0.08058738708496094, "learning_rate": 8.317281659320774e-09, "loss": 0.0014, "step": 120210 }, { "epoch": 1.967111183833756, "grad_norm": 0.07367295771837234, "learning_rate": 8.235158727759596e-09, "loss": 0.0008, "step": 120220 }, { "epoch": 1.9672748097848318, "grad_norm": 0.003173008793964982, "learning_rate": 8.15344290755582e-09, "loss": 0.0006, "step": 120230 }, { "epoch": 1.9674384357359078, "grad_norm": 0.020336421206593513, "learning_rate": 8.072134205373005e-09, "loss": 0.0009, "step": 120240 }, { "epoch": 1.9676020616869836, "grad_norm": 0.02688850834965706, "learning_rate": 7.991232627843071e-09, "loss": 0.0004, "step": 120250 }, { "epoch": 1.9677656876380594, "grad_norm": 0.18842047452926636, "learning_rate": 7.910738181563515e-09, "loss": 0.0017, "step": 120260 }, { "epoch": 1.9679293135891354, "grad_norm": 0.0487213209271431, "learning_rate": 7.830650873100198e-09, "loss": 0.0008, "step": 120270 }, { "epoch": 1.968092939540211, "grad_norm": 0.012311895377933979, "learning_rate": 7.750970708984007e-09, "loss": 0.0004, "step": 120280 }, { "epoch": 1.968256565491287, "grad_norm": 0.013489848002791405, "learning_rate": 7.671697695713632e-09, "loss": 0.0008, "step": 120290 }, { "epoch": 1.968420191442363, "grad_norm": 0.08853311091661453, "learning_rate": 7.5928318397539e-09, "loss": 0.0004, "step": 120300 }, { "epoch": 1.9685838173934385, "grad_norm": 0.01457985956221819, "learning_rate": 7.514373147537446e-09, "loss": 0.0008, "step": 120310 }, { "epoch": 1.9687474433445145, "grad_norm": 0.05608929693698883, "learning_rate": 7.436321625463039e-09, "loss": 0.0005, "step": 120320 }, { "epoch": 1.9689110692955902, "grad_norm": 0.013787472620606422, "learning_rate": 7.3586772798955874e-09, "loss": 0.0006, "step": 120330 }, { "epoch": 1.969074695246666, "grad_norm": 0.008914037607610226, "learning_rate": 7.281440117168359e-09, "loss": 0.0005, "step": 120340 }, { "epoch": 1.969238321197742, "grad_norm": 0.015239895321428776, "learning_rate": 7.204610143579649e-09, "loss": 0.0013, "step": 120350 }, { "epoch": 1.9694019471488178, "grad_norm": 0.13767629861831665, "learning_rate": 7.128187365396666e-09, "loss": 0.0013, "step": 120360 }, { "epoch": 1.9695655730998936, "grad_norm": 0.03583362326025963, "learning_rate": 7.052171788851092e-09, "loss": 0.0008, "step": 120370 }, { "epoch": 1.9697291990509695, "grad_norm": 0.009943093173205853, "learning_rate": 6.976563420142968e-09, "loss": 0.0021, "step": 120380 }, { "epoch": 1.9698928250020453, "grad_norm": 0.10988359898328781, "learning_rate": 6.901362265438471e-09, "loss": 0.0008, "step": 120390 }, { "epoch": 1.970056450953121, "grad_norm": 0.004041541367769241, "learning_rate": 6.826568330871031e-09, "loss": 0.0012, "step": 120400 }, { "epoch": 1.970220076904197, "grad_norm": 0.060312848538160324, "learning_rate": 6.75218162254021e-09, "loss": 0.0008, "step": 120410 }, { "epoch": 1.9703837028552729, "grad_norm": 0.0006657483172602952, "learning_rate": 6.678202146513379e-09, "loss": 0.0006, "step": 120420 }, { "epoch": 1.9705473288063486, "grad_norm": 0.037375181913375854, "learning_rate": 6.60462990882349e-09, "loss": 0.0008, "step": 120430 }, { "epoch": 1.9707109547574246, "grad_norm": 0.09979595243930817, "learning_rate": 6.5314649154707425e-09, "loss": 0.001, "step": 120440 }, { "epoch": 1.9708745807085004, "grad_norm": 0.038665417581796646, "learning_rate": 6.4587071724225845e-09, "loss": 0.0011, "step": 120450 }, { "epoch": 1.9710382066595762, "grad_norm": 0.07766951620578766, "learning_rate": 6.38635668561316e-09, "loss": 0.0008, "step": 120460 }, { "epoch": 1.9712018326106522, "grad_norm": 0.11505376547574997, "learning_rate": 6.314413460942192e-09, "loss": 0.001, "step": 120470 }, { "epoch": 1.9713654585617277, "grad_norm": 0.019717954099178314, "learning_rate": 6.242877504278322e-09, "loss": 0.0004, "step": 120480 }, { "epoch": 1.9715290845128037, "grad_norm": 0.00963100790977478, "learning_rate": 6.17174882145466e-09, "loss": 0.0006, "step": 120490 }, { "epoch": 1.9716927104638797, "grad_norm": 0.018108205869793892, "learning_rate": 6.101027418272676e-09, "loss": 0.0013, "step": 120500 }, { "epoch": 1.9718563364149553, "grad_norm": 0.04570852592587471, "learning_rate": 6.030713300499979e-09, "loss": 0.0012, "step": 120510 }, { "epoch": 1.9720199623660313, "grad_norm": 0.09894517809152603, "learning_rate": 5.960806473871983e-09, "loss": 0.001, "step": 120520 }, { "epoch": 1.972183588317107, "grad_norm": 0.05172663927078247, "learning_rate": 5.891306944088571e-09, "loss": 0.001, "step": 120530 }, { "epoch": 1.9723472142681828, "grad_norm": 0.03273118659853935, "learning_rate": 5.822214716819652e-09, "loss": 0.0006, "step": 120540 }, { "epoch": 1.9725108402192588, "grad_norm": 0.06041044369339943, "learning_rate": 5.753529797698499e-09, "loss": 0.0007, "step": 120550 }, { "epoch": 1.9726744661703346, "grad_norm": 0.002393483417108655, "learning_rate": 5.6852521923278505e-09, "loss": 0.0009, "step": 120560 }, { "epoch": 1.9728380921214104, "grad_norm": 0.128173366189003, "learning_rate": 5.617381906276031e-09, "loss": 0.0012, "step": 120570 }, { "epoch": 1.9730017180724864, "grad_norm": 0.03880898281931877, "learning_rate": 5.5499189450780585e-09, "loss": 0.0007, "step": 120580 }, { "epoch": 1.9731653440235621, "grad_norm": 0.16140474379062653, "learning_rate": 5.482863314236197e-09, "loss": 0.001, "step": 120590 }, { "epoch": 1.973328969974638, "grad_norm": 0.06171557679772377, "learning_rate": 5.416215019219406e-09, "loss": 0.0017, "step": 120600 }, { "epoch": 1.973492595925714, "grad_norm": 0.045106757432222366, "learning_rate": 5.349974065462782e-09, "loss": 0.0009, "step": 120610 }, { "epoch": 1.9736562218767897, "grad_norm": 0.019132466986775398, "learning_rate": 5.2841404583692245e-09, "loss": 0.0003, "step": 120620 }, { "epoch": 1.9738198478278655, "grad_norm": 0.27232876420021057, "learning_rate": 5.218714203307218e-09, "loss": 0.0016, "step": 120630 }, { "epoch": 1.9739834737789415, "grad_norm": 0.05358761548995972, "learning_rate": 5.153695305613604e-09, "loss": 0.0007, "step": 120640 }, { "epoch": 1.9741470997300172, "grad_norm": 0.08669502288103104, "learning_rate": 5.089083770590253e-09, "loss": 0.0011, "step": 120650 }, { "epoch": 1.974310725681093, "grad_norm": 0.022912029176950455, "learning_rate": 5.024879603507393e-09, "loss": 0.0009, "step": 120660 }, { "epoch": 1.974474351632169, "grad_norm": 0.025878889486193657, "learning_rate": 4.961082809600837e-09, "loss": 0.0007, "step": 120670 }, { "epoch": 1.9746379775832446, "grad_norm": 0.028424885123968124, "learning_rate": 4.897693394074199e-09, "loss": 0.0018, "step": 120680 }, { "epoch": 1.9748016035343205, "grad_norm": 0.05567040666937828, "learning_rate": 4.834711362096678e-09, "loss": 0.0015, "step": 120690 }, { "epoch": 1.9749652294853965, "grad_norm": 0.057673078030347824, "learning_rate": 4.772136718804721e-09, "loss": 0.0007, "step": 120700 }, { "epoch": 1.975128855436472, "grad_norm": 0.04457862302660942, "learning_rate": 4.709969469302023e-09, "loss": 0.0007, "step": 120710 }, { "epoch": 1.975292481387548, "grad_norm": 0.012798531912267208, "learning_rate": 4.648209618658972e-09, "loss": 0.0017, "step": 120720 }, { "epoch": 1.9754561073386239, "grad_norm": 0.019399434328079224, "learning_rate": 4.586857171912651e-09, "loss": 0.0004, "step": 120730 }, { "epoch": 1.9756197332896996, "grad_norm": 0.015058878809213638, "learning_rate": 4.525912134066279e-09, "loss": 0.0008, "step": 120740 }, { "epoch": 1.9757833592407756, "grad_norm": 0.0758814588189125, "learning_rate": 4.4653745100903255e-09, "loss": 0.001, "step": 120750 }, { "epoch": 1.9759469851918514, "grad_norm": 0.04663432389497757, "learning_rate": 4.405244304921952e-09, "loss": 0.0006, "step": 120760 }, { "epoch": 1.9761106111429272, "grad_norm": 0.05779373273253441, "learning_rate": 4.345521523465568e-09, "loss": 0.0005, "step": 120770 }, { "epoch": 1.9762742370940032, "grad_norm": 0.04826616495847702, "learning_rate": 4.286206170591167e-09, "loss": 0.0006, "step": 120780 }, { "epoch": 1.976437863045079, "grad_norm": 0.060472290962934494, "learning_rate": 4.227298251137657e-09, "loss": 0.0005, "step": 120790 }, { "epoch": 1.9766014889961547, "grad_norm": 0.017789114266633987, "learning_rate": 4.168797769908417e-09, "loss": 0.0008, "step": 120800 }, { "epoch": 1.9767651149472307, "grad_norm": 0.03495703265070915, "learning_rate": 4.1107047316746305e-09, "loss": 0.0017, "step": 120810 }, { "epoch": 1.9769287408983065, "grad_norm": 0.039546020328998566, "learning_rate": 4.05301914117473e-09, "loss": 0.0006, "step": 120820 }, { "epoch": 1.9770923668493823, "grad_norm": 0.075041763484478, "learning_rate": 3.9957410031121744e-09, "loss": 0.0016, "step": 120830 }, { "epoch": 1.9772559928004583, "grad_norm": 0.07128296047449112, "learning_rate": 3.938870322159339e-09, "loss": 0.0012, "step": 120840 }, { "epoch": 1.977419618751534, "grad_norm": 0.022848211228847504, "learning_rate": 3.8824071029547325e-09, "loss": 0.0008, "step": 120850 }, { "epoch": 1.9775832447026098, "grad_norm": 0.0770878791809082, "learning_rate": 3.826351350102453e-09, "loss": 0.0023, "step": 120860 }, { "epoch": 1.9777468706536858, "grad_norm": 0.04800952970981598, "learning_rate": 3.770703068174397e-09, "loss": 0.0013, "step": 120870 }, { "epoch": 1.9779104966047614, "grad_norm": 0.16103595495224, "learning_rate": 3.7154622617086024e-09, "loss": 0.0007, "step": 120880 }, { "epoch": 1.9780741225558374, "grad_norm": 0.053905077278614044, "learning_rate": 3.6606289352114633e-09, "loss": 0.0012, "step": 120890 }, { "epoch": 1.9782377485069134, "grad_norm": 0.05038908123970032, "learning_rate": 3.6062030931544034e-09, "loss": 0.0005, "step": 120900 }, { "epoch": 1.978401374457989, "grad_norm": 0.08015959709882736, "learning_rate": 3.5521847399766496e-09, "loss": 0.0009, "step": 120910 }, { "epoch": 1.978565000409065, "grad_norm": 0.024472760036587715, "learning_rate": 3.4985738800824564e-09, "loss": 0.0006, "step": 120920 }, { "epoch": 1.9787286263601407, "grad_norm": 0.04162255674600601, "learning_rate": 3.4453705178455476e-09, "loss": 0.0011, "step": 120930 }, { "epoch": 1.9788922523112165, "grad_norm": 0.014462755061686039, "learning_rate": 3.39257465760412e-09, "loss": 0.0008, "step": 120940 }, { "epoch": 1.9790558782622925, "grad_norm": 0.07161793112754822, "learning_rate": 3.3401863036647276e-09, "loss": 0.0005, "step": 120950 }, { "epoch": 1.9792195042133682, "grad_norm": 0.02279045060276985, "learning_rate": 3.2882054602995095e-09, "loss": 0.0013, "step": 120960 }, { "epoch": 1.979383130164444, "grad_norm": 0.056222815066576004, "learning_rate": 3.236632131747852e-09, "loss": 0.0013, "step": 120970 }, { "epoch": 1.97954675611552, "grad_norm": 0.047857679426670074, "learning_rate": 3.1854663222163907e-09, "loss": 0.0012, "step": 120980 }, { "epoch": 1.9797103820665958, "grad_norm": 0.03302791342139244, "learning_rate": 3.1347080358773428e-09, "loss": 0.0005, "step": 120990 }, { "epoch": 1.9798740080176715, "grad_norm": 0.02408667467534542, "learning_rate": 3.084357276870731e-09, "loss": 0.0017, "step": 121000 }, { "epoch": 1.9800376339687475, "grad_norm": 0.040168460458517075, "learning_rate": 3.034414049303269e-09, "loss": 0.0012, "step": 121010 }, { "epoch": 1.9802012599198233, "grad_norm": 0.1759580373764038, "learning_rate": 2.984878357247256e-09, "loss": 0.0019, "step": 121020 }, { "epoch": 1.980364885870899, "grad_norm": 0.022691862657666206, "learning_rate": 2.9357502047439035e-09, "loss": 0.0005, "step": 121030 }, { "epoch": 1.980528511821975, "grad_norm": 0.04220419377088547, "learning_rate": 2.887029595798896e-09, "loss": 0.0005, "step": 121040 }, { "epoch": 1.9806921377730506, "grad_norm": 0.020097140222787857, "learning_rate": 2.8387165343862765e-09, "loss": 0.0007, "step": 121050 }, { "epoch": 1.9808557637241266, "grad_norm": 0.03585117682814598, "learning_rate": 2.790811024445672e-09, "loss": 0.0015, "step": 121060 }, { "epoch": 1.9810193896752026, "grad_norm": 0.06970857828855515, "learning_rate": 2.7433130698850673e-09, "loss": 0.0009, "step": 121070 }, { "epoch": 1.9811830156262782, "grad_norm": 0.05048614367842674, "learning_rate": 2.6962226745774754e-09, "loss": 0.0005, "step": 121080 }, { "epoch": 1.9813466415773542, "grad_norm": 0.03118452988564968, "learning_rate": 2.649539842363713e-09, "loss": 0.0012, "step": 121090 }, { "epoch": 1.98151026752843, "grad_norm": 0.04376658797264099, "learning_rate": 2.60326457705129e-09, "loss": 0.0006, "step": 121100 }, { "epoch": 1.9816738934795057, "grad_norm": 0.016238750889897346, "learning_rate": 2.557396882413299e-09, "loss": 0.0006, "step": 121110 }, { "epoch": 1.9818375194305817, "grad_norm": 0.012554849497973919, "learning_rate": 2.511936762191747e-09, "loss": 0.0006, "step": 121120 }, { "epoch": 1.9820011453816575, "grad_norm": 0.0611734576523304, "learning_rate": 2.466884220093668e-09, "loss": 0.0007, "step": 121130 }, { "epoch": 1.9821647713327333, "grad_norm": 0.02710803970694542, "learning_rate": 2.4222392597933464e-09, "loss": 0.0006, "step": 121140 }, { "epoch": 1.9823283972838093, "grad_norm": 0.01143574994057417, "learning_rate": 2.378001884932313e-09, "loss": 0.0007, "step": 121150 }, { "epoch": 1.982492023234885, "grad_norm": 0.08319120109081268, "learning_rate": 2.334172099117682e-09, "loss": 0.0007, "step": 121160 }, { "epoch": 1.9826556491859608, "grad_norm": 0.0866430252790451, "learning_rate": 2.290749905924372e-09, "loss": 0.0012, "step": 121170 }, { "epoch": 1.9828192751370368, "grad_norm": 0.0026253052055835724, "learning_rate": 2.2477353088945496e-09, "loss": 0.0005, "step": 121180 }, { "epoch": 1.9829829010881126, "grad_norm": 0.08984646946191788, "learning_rate": 2.2051283115348542e-09, "loss": 0.0014, "step": 121190 }, { "epoch": 1.9831465270391884, "grad_norm": 0.04879742115736008, "learning_rate": 2.1629289173213942e-09, "loss": 0.0015, "step": 121200 }, { "epoch": 1.9833101529902644, "grad_norm": 0.04284178465604782, "learning_rate": 2.1211371296947503e-09, "loss": 0.0011, "step": 121210 }, { "epoch": 1.9834737789413401, "grad_norm": 0.043293219059705734, "learning_rate": 2.079752952064418e-09, "loss": 0.0009, "step": 121220 }, { "epoch": 1.983637404892416, "grad_norm": 0.001439051702618599, "learning_rate": 2.038776387804919e-09, "loss": 0.0015, "step": 121230 }, { "epoch": 1.983801030843492, "grad_norm": 0.036187853664159775, "learning_rate": 1.9982074402580266e-09, "loss": 0.0009, "step": 121240 }, { "epoch": 1.9839646567945675, "grad_norm": 0.08715284615755081, "learning_rate": 1.9580461127327588e-09, "loss": 0.0007, "step": 121250 }, { "epoch": 1.9841282827456435, "grad_norm": 0.007016774732619524, "learning_rate": 1.9182924085048295e-09, "loss": 0.0009, "step": 121260 }, { "epoch": 1.9842919086967195, "grad_norm": 0.2651830017566681, "learning_rate": 1.8789463308160894e-09, "loss": 0.0017, "step": 121270 }, { "epoch": 1.984455534647795, "grad_norm": 0.0751025453209877, "learning_rate": 1.8400078828745283e-09, "loss": 0.0021, "step": 121280 }, { "epoch": 1.984619160598871, "grad_norm": 0.02467581443488598, "learning_rate": 1.8014770678576043e-09, "loss": 0.0008, "step": 121290 }, { "epoch": 1.9847827865499468, "grad_norm": 0.03527678921818733, "learning_rate": 1.7633538889066937e-09, "loss": 0.0017, "step": 121300 }, { "epoch": 1.9849464125010226, "grad_norm": 0.010351304896175861, "learning_rate": 1.7256383491309758e-09, "loss": 0.0004, "step": 121310 }, { "epoch": 1.9851100384520985, "grad_norm": 0.10141579806804657, "learning_rate": 1.6883304516063238e-09, "loss": 0.001, "step": 121320 }, { "epoch": 1.9852736644031743, "grad_norm": 0.044918958097696304, "learning_rate": 1.6514301993764138e-09, "loss": 0.0018, "step": 121330 }, { "epoch": 1.98543729035425, "grad_norm": 0.041537243872880936, "learning_rate": 1.6149375954493952e-09, "loss": 0.0007, "step": 121340 }, { "epoch": 1.985600916305326, "grad_norm": 0.04633361101150513, "learning_rate": 1.578852642802331e-09, "loss": 0.0007, "step": 121350 }, { "epoch": 1.9857645422564019, "grad_norm": 0.11641356348991394, "learning_rate": 1.5431753443778674e-09, "loss": 0.0014, "step": 121360 }, { "epoch": 1.9859281682074776, "grad_norm": 0.030262496322393417, "learning_rate": 1.507905703085899e-09, "loss": 0.0005, "step": 121370 }, { "epoch": 1.9860917941585536, "grad_norm": 0.06622287631034851, "learning_rate": 1.4730437218030136e-09, "loss": 0.0009, "step": 121380 }, { "epoch": 1.9862554201096294, "grad_norm": 0.010505574755370617, "learning_rate": 1.4385894033719372e-09, "loss": 0.0009, "step": 121390 }, { "epoch": 1.9864190460607052, "grad_norm": 0.035860270261764526, "learning_rate": 1.4045427506026443e-09, "loss": 0.0008, "step": 121400 }, { "epoch": 1.9865826720117812, "grad_norm": 0.012280511669814587, "learning_rate": 1.3709037662729129e-09, "loss": 0.0006, "step": 121410 }, { "epoch": 1.986746297962857, "grad_norm": 0.02275555208325386, "learning_rate": 1.337672453124994e-09, "loss": 0.001, "step": 121420 }, { "epoch": 1.9869099239139327, "grad_norm": 0.06456863880157471, "learning_rate": 1.3048488138694971e-09, "loss": 0.0012, "step": 121430 }, { "epoch": 1.9870735498650087, "grad_norm": 0.0017447107238695025, "learning_rate": 1.2724328511837248e-09, "loss": 0.0009, "step": 121440 }, { "epoch": 1.9872371758160843, "grad_norm": 0.0033917995169758797, "learning_rate": 1.2404245677111183e-09, "loss": 0.0006, "step": 121450 }, { "epoch": 1.9874008017671603, "grad_norm": 0.03676427900791168, "learning_rate": 1.2088239660623668e-09, "loss": 0.0007, "step": 121460 }, { "epoch": 1.9875644277182363, "grad_norm": 0.03885538876056671, "learning_rate": 1.1776310488142983e-09, "loss": 0.0005, "step": 121470 }, { "epoch": 1.9877280536693118, "grad_norm": 0.047191012650728226, "learning_rate": 1.146845818511544e-09, "loss": 0.001, "step": 121480 }, { "epoch": 1.9878916796203878, "grad_norm": 0.0196328517049551, "learning_rate": 1.1164682776637625e-09, "loss": 0.0009, "step": 121490 }, { "epoch": 1.9880553055714636, "grad_norm": 0.00553932087495923, "learning_rate": 1.0864984287500823e-09, "loss": 0.0007, "step": 121500 }, { "epoch": 1.9882189315225394, "grad_norm": 0.07081619650125504, "learning_rate": 1.056936274212994e-09, "loss": 0.0009, "step": 121510 }, { "epoch": 1.9883825574736154, "grad_norm": 0.058189865201711655, "learning_rate": 1.0277818164650122e-09, "loss": 0.0007, "step": 121520 }, { "epoch": 1.9885461834246911, "grad_norm": 0.030802302062511444, "learning_rate": 9.990350578825692e-10, "loss": 0.0005, "step": 121530 }, { "epoch": 1.988709809375767, "grad_norm": 0.05505533888936043, "learning_rate": 9.70696000811011e-10, "loss": 0.0008, "step": 121540 }, { "epoch": 1.988873435326843, "grad_norm": 0.04216821864247322, "learning_rate": 9.427646475618225e-10, "loss": 0.0005, "step": 121550 }, { "epoch": 1.9890370612779187, "grad_norm": 0.03344396501779556, "learning_rate": 9.152410004120704e-10, "loss": 0.001, "step": 121560 }, { "epoch": 1.9892006872289945, "grad_norm": 0.032580576837062836, "learning_rate": 8.881250616066261e-10, "loss": 0.0015, "step": 121570 }, { "epoch": 1.9893643131800705, "grad_norm": 0.11990205198526382, "learning_rate": 8.61416833357609e-10, "loss": 0.0007, "step": 121580 }, { "epoch": 1.9895279391311462, "grad_norm": 0.04535841569304466, "learning_rate": 8.351163178427213e-10, "loss": 0.0007, "step": 121590 }, { "epoch": 1.989691565082222, "grad_norm": 0.04809006676077843, "learning_rate": 8.09223517206914e-10, "loss": 0.001, "step": 121600 }, { "epoch": 1.989855191033298, "grad_norm": 0.02720620110630989, "learning_rate": 7.837384335623865e-10, "loss": 0.0009, "step": 121610 }, { "epoch": 1.9900188169843738, "grad_norm": 0.041904766112565994, "learning_rate": 7.586610689874763e-10, "loss": 0.0008, "step": 121620 }, { "epoch": 1.9901824429354495, "grad_norm": 0.025538332760334015, "learning_rate": 7.33991425527214e-10, "loss": 0.0013, "step": 121630 }, { "epoch": 1.9903460688865255, "grad_norm": 0.0030932335648685694, "learning_rate": 7.09729505193324e-10, "loss": 0.0013, "step": 121640 }, { "epoch": 1.990509694837601, "grad_norm": 0.019069423899054527, "learning_rate": 6.858753099653337e-10, "loss": 0.0006, "step": 121650 }, { "epoch": 1.990673320788677, "grad_norm": 0.061187829822301865, "learning_rate": 6.624288417877989e-10, "loss": 0.001, "step": 121660 }, { "epoch": 1.990836946739753, "grad_norm": 0.033669210970401764, "learning_rate": 6.393901025736337e-10, "loss": 0.0005, "step": 121670 }, { "epoch": 1.9910005726908286, "grad_norm": 0.02763231098651886, "learning_rate": 6.167590942013358e-10, "loss": 0.0009, "step": 121680 }, { "epoch": 1.9911641986419046, "grad_norm": 0.12069132924079895, "learning_rate": 5.945358185172056e-10, "loss": 0.0013, "step": 121690 }, { "epoch": 1.9913278245929804, "grad_norm": 0.024630241096019745, "learning_rate": 5.727202773325724e-10, "loss": 0.0005, "step": 121700 }, { "epoch": 1.9914914505440562, "grad_norm": 0.028935732319951057, "learning_rate": 5.513124724276786e-10, "loss": 0.0006, "step": 121710 }, { "epoch": 1.9916550764951322, "grad_norm": 0.02910882607102394, "learning_rate": 5.3031240554835e-10, "loss": 0.001, "step": 121720 }, { "epoch": 1.991818702446208, "grad_norm": 0.04302552714943886, "learning_rate": 5.097200784071054e-10, "loss": 0.0008, "step": 121730 }, { "epoch": 1.9919823283972837, "grad_norm": 0.028214646503329277, "learning_rate": 4.895354926831575e-10, "loss": 0.0006, "step": 121740 }, { "epoch": 1.9921459543483597, "grad_norm": 0.051813915371894836, "learning_rate": 4.697586500229667e-10, "loss": 0.0015, "step": 121750 }, { "epoch": 1.9923095802994355, "grad_norm": 0.050833575427532196, "learning_rate": 4.5038955203913214e-10, "loss": 0.0007, "step": 121760 }, { "epoch": 1.9924732062505113, "grad_norm": 0.031718213111162186, "learning_rate": 4.3142820031205624e-10, "loss": 0.0009, "step": 121770 }, { "epoch": 1.9926368322015873, "grad_norm": 0.05440567433834076, "learning_rate": 4.128745963871694e-10, "loss": 0.0011, "step": 121780 }, { "epoch": 1.992800458152663, "grad_norm": 0.006071154028177261, "learning_rate": 3.9472874177881594e-10, "loss": 0.0009, "step": 121790 }, { "epoch": 1.9929640841037388, "grad_norm": 0.0027376804500818253, "learning_rate": 3.769906379658128e-10, "loss": 0.0005, "step": 121800 }, { "epoch": 1.9931277100548148, "grad_norm": 0.00888826884329319, "learning_rate": 3.5966028639533577e-10, "loss": 0.001, "step": 121810 }, { "epoch": 1.9932913360058904, "grad_norm": 0.02660565823316574, "learning_rate": 3.4273768848069875e-10, "loss": 0.0007, "step": 121820 }, { "epoch": 1.9934549619569664, "grad_norm": 0.06732051074504852, "learning_rate": 3.26222845601909e-10, "loss": 0.0007, "step": 121830 }, { "epoch": 1.9936185879080424, "grad_norm": 0.017321888357400894, "learning_rate": 3.1011575910622204e-10, "loss": 0.0004, "step": 121840 }, { "epoch": 1.993782213859118, "grad_norm": 0.016387946903705597, "learning_rate": 2.9441643030703184e-10, "loss": 0.0016, "step": 121850 }, { "epoch": 1.993945839810194, "grad_norm": 0.019673125818371773, "learning_rate": 2.7912486048498054e-10, "loss": 0.0007, "step": 121860 }, { "epoch": 1.9941094657612697, "grad_norm": 0.06270117312669754, "learning_rate": 2.642410508868487e-10, "loss": 0.0009, "step": 121870 }, { "epoch": 1.9942730917123455, "grad_norm": 0.03190852329134941, "learning_rate": 2.4976500272611005e-10, "loss": 0.0008, "step": 121880 }, { "epoch": 1.9944367176634215, "grad_norm": 0.07426242530345917, "learning_rate": 2.3569671718459694e-10, "loss": 0.0012, "step": 121890 }, { "epoch": 1.9946003436144972, "grad_norm": 0.06926099956035614, "learning_rate": 2.2203619540861476e-10, "loss": 0.001, "step": 121900 }, { "epoch": 1.994763969565573, "grad_norm": 0.06824562698602676, "learning_rate": 2.0878343851282733e-10, "loss": 0.001, "step": 121910 }, { "epoch": 1.994927595516649, "grad_norm": 0.03840915486216545, "learning_rate": 1.9593844757803682e-10, "loss": 0.001, "step": 121920 }, { "epoch": 1.9950912214677248, "grad_norm": 0.06597849726676941, "learning_rate": 1.835012236517386e-10, "loss": 0.0009, "step": 121930 }, { "epoch": 1.9952548474188005, "grad_norm": 0.08199784904718399, "learning_rate": 1.7147176774812148e-10, "loss": 0.0009, "step": 121940 }, { "epoch": 1.9954184733698765, "grad_norm": 0.030702393501996994, "learning_rate": 1.598500808480674e-10, "loss": 0.0009, "step": 121950 }, { "epoch": 1.9955820993209523, "grad_norm": 0.060015805065631866, "learning_rate": 1.4863616390026202e-10, "loss": 0.0012, "step": 121960 }, { "epoch": 1.995745725272028, "grad_norm": 0.019695112481713295, "learning_rate": 1.3783001781841886e-10, "loss": 0.0004, "step": 121970 }, { "epoch": 1.995909351223104, "grad_norm": 0.035306788980960846, "learning_rate": 1.2743164348405502e-10, "loss": 0.0006, "step": 121980 }, { "epoch": 1.9960729771741799, "grad_norm": 0.02539607509970665, "learning_rate": 1.1744104174538084e-10, "loss": 0.0005, "step": 121990 }, { "epoch": 1.9962366031252556, "grad_norm": 0.10506034642457962, "learning_rate": 1.0785821341730007e-10, "loss": 0.0008, "step": 122000 }, { "epoch": 1.9962366031252556, "eval_loss": 0.0007702079601585865, "eval_runtime": 5.3816, "eval_samples_per_second": 37.164, "eval_steps_per_second": 9.291, "step": 122000 }, { "epoch": 1.9964002290763316, "grad_norm": 0.09506328403949738, "learning_rate": 9.86831592814097e-11, "loss": 0.0008, "step": 122010 }, { "epoch": 1.9965638550274072, "grad_norm": 0.04165778309106827, "learning_rate": 8.991588008544494e-11, "loss": 0.0009, "step": 122020 }, { "epoch": 1.9967274809784832, "grad_norm": 0.09607928991317749, "learning_rate": 8.155637654494452e-11, "loss": 0.0012, "step": 122030 }, { "epoch": 1.9968911069295592, "grad_norm": 0.0512206107378006, "learning_rate": 7.360464934103029e-11, "loss": 0.0012, "step": 122040 }, { "epoch": 1.9970547328806347, "grad_norm": 0.10398688912391663, "learning_rate": 6.606069912318269e-11, "loss": 0.0012, "step": 122050 }, { "epoch": 1.9972183588317107, "grad_norm": 0.001678274478763342, "learning_rate": 5.892452650591018e-11, "loss": 0.0005, "step": 122060 }, { "epoch": 1.9973819847827865, "grad_norm": 0.043181102722883224, "learning_rate": 5.2196132072079854e-11, "loss": 0.0018, "step": 122070 }, { "epoch": 1.9975456107338623, "grad_norm": 0.013596700504422188, "learning_rate": 4.587551636903165e-11, "loss": 0.0011, "step": 122080 }, { "epoch": 1.9977092366849383, "grad_norm": 0.024047480896115303, "learning_rate": 3.9962679913574386e-11, "loss": 0.0005, "step": 122090 }, { "epoch": 1.997872862636014, "grad_norm": 0.1744498759508133, "learning_rate": 3.4457623187544865e-11, "loss": 0.0008, "step": 122100 }, { "epoch": 1.9980364885870898, "grad_norm": 0.028417835012078285, "learning_rate": 2.936034663947318e-11, "loss": 0.0005, "step": 122110 }, { "epoch": 1.9982001145381658, "grad_norm": 0.02932523936033249, "learning_rate": 2.467085068513786e-11, "loss": 0.0005, "step": 122120 }, { "epoch": 1.9983637404892416, "grad_norm": 0.03043956682085991, "learning_rate": 2.0389135708120954e-11, "loss": 0.0008, "step": 122130 }, { "epoch": 1.9985273664403174, "grad_norm": 0.015334525145590305, "learning_rate": 1.6515202055922273e-11, "loss": 0.0008, "step": 122140 }, { "epoch": 1.9986909923913934, "grad_norm": 0.021789342164993286, "learning_rate": 1.3049050045510492e-11, "loss": 0.0008, "step": 122150 }, { "epoch": 1.9988546183424691, "grad_norm": 0.008600656874477863, "learning_rate": 9.990679959992478e-12, "loss": 0.001, "step": 122160 }, { "epoch": 1.999018244293545, "grad_norm": 0.01498556137084961, "learning_rate": 7.340092047503078e-12, "loss": 0.001, "step": 122170 }, { "epoch": 1.999181870244621, "grad_norm": 0.029551725834608078, "learning_rate": 5.097286525090894e-12, "loss": 0.0016, "step": 122180 }, { "epoch": 1.9993454961956967, "grad_norm": 0.036482520401477814, "learning_rate": 3.2622635759427257e-12, "loss": 0.0006, "step": 122190 }, { "epoch": 1.9995091221467725, "grad_norm": 0.06275158375501633, "learning_rate": 1.8350233488284573e-12, "loss": 0.0007, "step": 122200 }, { "epoch": 1.9996727480978485, "grad_norm": 0.028780221939086914, "learning_rate": 8.155659603215072e-13, "loss": 0.0007, "step": 122210 }, { "epoch": 1.999836374048924, "grad_norm": 0.015836335718631744, "learning_rate": 2.0389149424371314e-13, "loss": 0.0003, "step": 122220 }, { "epoch": 2.0, "grad_norm": 0.006197645328938961, "learning_rate": 0.0, "loss": 0.0004, "step": 122230 }, { "epoch": 2.0, "step": 122230, "total_flos": 1.424923653115478e+19, "train_loss": 0.003896685916660938, "train_runtime": 121618.4873, "train_samples_per_second": 8.04, "train_steps_per_second": 1.005 } ], "logging_steps": 10, "max_steps": 122230, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 62000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.424923653115478e+19, "train_batch_size": 1, "trial_name": null, "trial_params": null }