{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.43016089672002317, "eval_steps": 500, "global_step": 52000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.272324936923522e-05, "grad_norm": 80462.0859375, "learning_rate": 8.080808080808081e-09, "loss": 6007.0633, "step": 10 }, { "epoch": 0.00016544649873847045, "grad_norm": 360423.46875, "learning_rate": 1.6161616161616162e-08, "loss": 16793.1375, "step": 20 }, { "epoch": 0.0002481697481077057, "grad_norm": 260210.0, "learning_rate": 2.4242424242424243e-08, "loss": 8807.757, "step": 30 }, { "epoch": 0.0003308929974769409, "grad_norm": 333457.03125, "learning_rate": 3.2323232323232324e-08, "loss": 6780.607, "step": 40 }, { "epoch": 0.0004136162468461761, "grad_norm": 240292.671875, "learning_rate": 4.040404040404041e-08, "loss": 10353.4664, "step": 50 }, { "epoch": 0.0004963394962154114, "grad_norm": 205636.09375, "learning_rate": 4.8484848484848486e-08, "loss": 9239.3687, "step": 60 }, { "epoch": 0.0005790627455846465, "grad_norm": 91692.9375, "learning_rate": 5.656565656565657e-08, "loss": 5793.2141, "step": 70 }, { "epoch": 0.0006617859949538818, "grad_norm": 128958.5859375, "learning_rate": 6.464646464646465e-08, "loss": 7645.5141, "step": 80 }, { "epoch": 0.000744509244323117, "grad_norm": 198792.46875, "learning_rate": 7.272727272727274e-08, "loss": 7540.1, "step": 90 }, { "epoch": 0.0008272324936923522, "grad_norm": 143177.3125, "learning_rate": 8.080808080808082e-08, "loss": 5863.3023, "step": 100 }, { "epoch": 0.0009099557430615875, "grad_norm": 69092.4453125, "learning_rate": 8.88888888888889e-08, "loss": 9028.2383, "step": 110 }, { "epoch": 0.0009926789924308227, "grad_norm": 135931.546875, "learning_rate": 9.696969696969697e-08, "loss": 7231.0602, "step": 120 }, { "epoch": 0.0010754022418000579, "grad_norm": 142479.5625, "learning_rate": 1.0505050505050506e-07, "loss": 4912.673, "step": 130 }, { "epoch": 0.001158125491169293, "grad_norm": 310996.5625, "learning_rate": 1.1313131313131314e-07, "loss": 4637.1719, "step": 140 }, { "epoch": 0.0012408487405385284, "grad_norm": 195854.375, "learning_rate": 1.2121212121212122e-07, "loss": 5191.2895, "step": 150 }, { "epoch": 0.0013235719899077636, "grad_norm": 98090.03125, "learning_rate": 1.292929292929293e-07, "loss": 6609.8664, "step": 160 }, { "epoch": 0.0014062952392769987, "grad_norm": 69154.9765625, "learning_rate": 1.3737373737373738e-07, "loss": 6437.2246, "step": 170 }, { "epoch": 0.001489018488646234, "grad_norm": 161934.78125, "learning_rate": 1.4545454545454548e-07, "loss": 7987.9906, "step": 180 }, { "epoch": 0.0015717417380154692, "grad_norm": 72492.5390625, "learning_rate": 1.5353535353535356e-07, "loss": 5339.8434, "step": 190 }, { "epoch": 0.0016544649873847044, "grad_norm": 28318.041015625, "learning_rate": 1.6161616161616163e-07, "loss": 3778.6113, "step": 200 }, { "epoch": 0.0017371882367539398, "grad_norm": 78588.4375, "learning_rate": 1.6969696969696974e-07, "loss": 5780.0262, "step": 210 }, { "epoch": 0.001819911486123175, "grad_norm": 69777.796875, "learning_rate": 1.777777777777778e-07, "loss": 3939.0852, "step": 220 }, { "epoch": 0.00190263473549241, "grad_norm": 26142.01953125, "learning_rate": 1.858585858585859e-07, "loss": 3253.984, "step": 230 }, { "epoch": 0.0019853579848616455, "grad_norm": 77199.8828125, "learning_rate": 1.9393939393939395e-07, "loss": 3297.4723, "step": 240 }, { "epoch": 0.0020680812342308806, "grad_norm": 79269.9921875, "learning_rate": 2.0202020202020205e-07, "loss": 3044.4252, "step": 250 }, { "epoch": 0.0021508044836001158, "grad_norm": 39357.45703125, "learning_rate": 2.1010101010101013e-07, "loss": 3016.1178, "step": 260 }, { "epoch": 0.002233527732969351, "grad_norm": 37892.91015625, "learning_rate": 2.181818181818182e-07, "loss": 2370.4621, "step": 270 }, { "epoch": 0.002316250982338586, "grad_norm": 40799.3984375, "learning_rate": 2.2626262626262628e-07, "loss": 2173.1178, "step": 280 }, { "epoch": 0.0023989742317078217, "grad_norm": 16129.89453125, "learning_rate": 2.343434343434344e-07, "loss": 2872.4018, "step": 290 }, { "epoch": 0.002481697481077057, "grad_norm": 12510.9287109375, "learning_rate": 2.4242424242424244e-07, "loss": 1385.131, "step": 300 }, { "epoch": 0.002564420730446292, "grad_norm": 21687.78515625, "learning_rate": 2.505050505050505e-07, "loss": 1238.8357, "step": 310 }, { "epoch": 0.002647143979815527, "grad_norm": 26216.89453125, "learning_rate": 2.585858585858586e-07, "loss": 1459.9193, "step": 320 }, { "epoch": 0.0027298672291847623, "grad_norm": 54759.859375, "learning_rate": 2.666666666666667e-07, "loss": 1641.5232, "step": 330 }, { "epoch": 0.0028125904785539974, "grad_norm": 11365.4833984375, "learning_rate": 2.7474747474747475e-07, "loss": 1190.3224, "step": 340 }, { "epoch": 0.002895313727923233, "grad_norm": 16354.1533203125, "learning_rate": 2.828282828282829e-07, "loss": 885.4668, "step": 350 }, { "epoch": 0.002978036977292468, "grad_norm": 11645.587890625, "learning_rate": 2.9090909090909096e-07, "loss": 1400.4568, "step": 360 }, { "epoch": 0.0030607602266617033, "grad_norm": 8614.1328125, "learning_rate": 2.9898989898989904e-07, "loss": 814.0722, "step": 370 }, { "epoch": 0.0031434834760309385, "grad_norm": 11007.8818359375, "learning_rate": 3.070707070707071e-07, "loss": 970.882, "step": 380 }, { "epoch": 0.0032262067254001736, "grad_norm": 6238.4267578125, "learning_rate": 3.151515151515152e-07, "loss": 946.8882, "step": 390 }, { "epoch": 0.003308929974769409, "grad_norm": 5815.009765625, "learning_rate": 3.2323232323232327e-07, "loss": 792.6276, "step": 400 }, { "epoch": 0.003391653224138644, "grad_norm": 34222.5390625, "learning_rate": 3.3131313131313135e-07, "loss": 787.8205, "step": 410 }, { "epoch": 0.0034743764735078795, "grad_norm": 4778.3984375, "learning_rate": 3.393939393939395e-07, "loss": 653.9669, "step": 420 }, { "epoch": 0.0035570997228771147, "grad_norm": 2678.25244140625, "learning_rate": 3.474747474747475e-07, "loss": 793.3054, "step": 430 }, { "epoch": 0.00363982297224635, "grad_norm": 1457.694580078125, "learning_rate": 3.555555555555556e-07, "loss": 630.5825, "step": 440 }, { "epoch": 0.003722546221615585, "grad_norm": 5033.21923828125, "learning_rate": 3.6363636363636366e-07, "loss": 759.3085, "step": 450 }, { "epoch": 0.00380526947098482, "grad_norm": 3704.6376953125, "learning_rate": 3.717171717171718e-07, "loss": 644.2665, "step": 460 }, { "epoch": 0.0038879927203540553, "grad_norm": 5654.28662109375, "learning_rate": 3.7979797979797987e-07, "loss": 710.7244, "step": 470 }, { "epoch": 0.003970715969723291, "grad_norm": 1483.4691162109375, "learning_rate": 3.878787878787879e-07, "loss": 694.1961, "step": 480 }, { "epoch": 0.004053439219092526, "grad_norm": 2019.6363525390625, "learning_rate": 3.9595959595959597e-07, "loss": 573.5805, "step": 490 }, { "epoch": 0.004136162468461761, "grad_norm": 1998.4979248046875, "learning_rate": 4.040404040404041e-07, "loss": 624.7649, "step": 500 }, { "epoch": 0.004218885717830996, "grad_norm": 2387.564453125, "learning_rate": 4.121212121212122e-07, "loss": 626.6972, "step": 510 }, { "epoch": 0.0043016089672002315, "grad_norm": 3881.398681640625, "learning_rate": 4.2020202020202026e-07, "loss": 759.5687, "step": 520 }, { "epoch": 0.004384332216569467, "grad_norm": 1924.41845703125, "learning_rate": 4.282828282828283e-07, "loss": 700.7449, "step": 530 }, { "epoch": 0.004467055465938702, "grad_norm": 2266.10693359375, "learning_rate": 4.363636363636364e-07, "loss": 640.943, "step": 540 }, { "epoch": 0.004549778715307937, "grad_norm": 7472.12548828125, "learning_rate": 4.444444444444445e-07, "loss": 806.1628, "step": 550 }, { "epoch": 0.004632501964677172, "grad_norm": 1539.118408203125, "learning_rate": 4.5252525252525257e-07, "loss": 556.9574, "step": 560 }, { "epoch": 0.004715225214046408, "grad_norm": 1867.2452392578125, "learning_rate": 4.6060606060606064e-07, "loss": 453.8387, "step": 570 }, { "epoch": 0.004797948463415643, "grad_norm": 6622.47119140625, "learning_rate": 4.686868686868688e-07, "loss": 585.511, "step": 580 }, { "epoch": 0.0048806717127848785, "grad_norm": 1117.4276123046875, "learning_rate": 4.767676767676768e-07, "loss": 591.8928, "step": 590 }, { "epoch": 0.004963394962154114, "grad_norm": 1792.2279052734375, "learning_rate": 4.848484848484849e-07, "loss": 659.099, "step": 600 }, { "epoch": 0.005046118211523349, "grad_norm": 952.1702880859375, "learning_rate": 4.929292929292929e-07, "loss": 558.1813, "step": 610 }, { "epoch": 0.005128841460892584, "grad_norm": 1103.0098876953125, "learning_rate": 5.01010101010101e-07, "loss": 488.6359, "step": 620 }, { "epoch": 0.005211564710261819, "grad_norm": 2055.734130859375, "learning_rate": 5.090909090909092e-07, "loss": 611.8516, "step": 630 }, { "epoch": 0.005294287959631054, "grad_norm": 1028.85986328125, "learning_rate": 5.171717171717172e-07, "loss": 568.1, "step": 640 }, { "epoch": 0.005377011209000289, "grad_norm": 1175.14892578125, "learning_rate": 5.252525252525253e-07, "loss": 407.9692, "step": 650 }, { "epoch": 0.005459734458369525, "grad_norm": 1459.8382568359375, "learning_rate": 5.333333333333335e-07, "loss": 496.3047, "step": 660 }, { "epoch": 0.00554245770773876, "grad_norm": 3791.671630859375, "learning_rate": 5.414141414141415e-07, "loss": 492.2279, "step": 670 }, { "epoch": 0.005625180957107995, "grad_norm": 1465.2242431640625, "learning_rate": 5.494949494949495e-07, "loss": 518.7601, "step": 680 }, { "epoch": 0.00570790420647723, "grad_norm": 1057.6175537109375, "learning_rate": 5.575757575757576e-07, "loss": 496.9481, "step": 690 }, { "epoch": 0.005790627455846466, "grad_norm": 1338.5599365234375, "learning_rate": 5.656565656565658e-07, "loss": 493.3083, "step": 700 }, { "epoch": 0.005873350705215701, "grad_norm": 1229.9342041015625, "learning_rate": 5.737373737373738e-07, "loss": 507.9572, "step": 710 }, { "epoch": 0.005956073954584936, "grad_norm": 2031.885498046875, "learning_rate": 5.818181818181819e-07, "loss": 545.9699, "step": 720 }, { "epoch": 0.0060387972039541715, "grad_norm": 1317.18408203125, "learning_rate": 5.898989898989899e-07, "loss": 515.3469, "step": 730 }, { "epoch": 0.006121520453323407, "grad_norm": 1839.4345703125, "learning_rate": 5.979797979797981e-07, "loss": 377.9613, "step": 740 }, { "epoch": 0.006204243702692642, "grad_norm": 1210.0321044921875, "learning_rate": 6.060606060606061e-07, "loss": 465.0116, "step": 750 }, { "epoch": 0.006286966952061877, "grad_norm": 2228.900390625, "learning_rate": 6.141414141414142e-07, "loss": 336.6599, "step": 760 }, { "epoch": 0.006369690201431112, "grad_norm": 1690.2906494140625, "learning_rate": 6.222222222222223e-07, "loss": 402.5558, "step": 770 }, { "epoch": 0.006452413450800347, "grad_norm": 1048.852783203125, "learning_rate": 6.303030303030304e-07, "loss": 460.7069, "step": 780 }, { "epoch": 0.0065351367001695825, "grad_norm": 1623.499755859375, "learning_rate": 6.383838383838384e-07, "loss": 322.7708, "step": 790 }, { "epoch": 0.006617859949538818, "grad_norm": 4279.55908203125, "learning_rate": 6.464646464646465e-07, "loss": 430.4449, "step": 800 }, { "epoch": 0.006700583198908053, "grad_norm": 1473.4871826171875, "learning_rate": 6.545454545454547e-07, "loss": 474.9226, "step": 810 }, { "epoch": 0.006783306448277288, "grad_norm": 1544.477783203125, "learning_rate": 6.626262626262627e-07, "loss": 459.8634, "step": 820 }, { "epoch": 0.006866029697646524, "grad_norm": 991.1500854492188, "learning_rate": 6.707070707070708e-07, "loss": 339.08, "step": 830 }, { "epoch": 0.006948752947015759, "grad_norm": 1124.6473388671875, "learning_rate": 6.78787878787879e-07, "loss": 476.3061, "step": 840 }, { "epoch": 0.007031476196384994, "grad_norm": 1139.6890869140625, "learning_rate": 6.868686868686869e-07, "loss": 504.3425, "step": 850 }, { "epoch": 0.007114199445754229, "grad_norm": 1275.02294921875, "learning_rate": 6.94949494949495e-07, "loss": 431.0188, "step": 860 }, { "epoch": 0.0071969226951234646, "grad_norm": 1159.5965576171875, "learning_rate": 7.03030303030303e-07, "loss": 455.7138, "step": 870 }, { "epoch": 0.0072796459444927, "grad_norm": 1401.516357421875, "learning_rate": 7.111111111111112e-07, "loss": 529.1278, "step": 880 }, { "epoch": 0.007362369193861935, "grad_norm": 1474.188720703125, "learning_rate": 7.191919191919193e-07, "loss": 326.5665, "step": 890 }, { "epoch": 0.00744509244323117, "grad_norm": 917.8528442382812, "learning_rate": 7.272727272727273e-07, "loss": 436.4797, "step": 900 }, { "epoch": 0.007527815692600405, "grad_norm": 941.6436157226562, "learning_rate": 7.353535353535354e-07, "loss": 357.6453, "step": 910 }, { "epoch": 0.00761053894196964, "grad_norm": 934.293701171875, "learning_rate": 7.434343434343436e-07, "loss": 433.9422, "step": 920 }, { "epoch": 0.0076932621913388755, "grad_norm": 1530.59814453125, "learning_rate": 7.515151515151516e-07, "loss": 506.7857, "step": 930 }, { "epoch": 0.007775985440708111, "grad_norm": 3313.063720703125, "learning_rate": 7.595959595959597e-07, "loss": 386.7442, "step": 940 }, { "epoch": 0.007858708690077347, "grad_norm": 9505.775390625, "learning_rate": 7.676767676767677e-07, "loss": 393.8395, "step": 950 }, { "epoch": 0.007941431939446582, "grad_norm": 9777.1015625, "learning_rate": 7.757575757575758e-07, "loss": 444.2824, "step": 960 }, { "epoch": 0.008024155188815817, "grad_norm": 952.7528686523438, "learning_rate": 7.838383838383839e-07, "loss": 463.8062, "step": 970 }, { "epoch": 0.008106878438185052, "grad_norm": 1653.4957275390625, "learning_rate": 7.919191919191919e-07, "loss": 422.011, "step": 980 }, { "epoch": 0.008189601687554287, "grad_norm": 1222.734619140625, "learning_rate": 8.000000000000001e-07, "loss": 370.3142, "step": 990 }, { "epoch": 0.008272324936923522, "grad_norm": 1304.0595703125, "learning_rate": 8.080808080808082e-07, "loss": 423.2224, "step": 1000 }, { "epoch": 0.008355048186292758, "grad_norm": 1451.695556640625, "learning_rate": 8.161616161616162e-07, "loss": 333.3123, "step": 1010 }, { "epoch": 0.008437771435661993, "grad_norm": 1295.0885009765625, "learning_rate": 8.242424242424244e-07, "loss": 395.7991, "step": 1020 }, { "epoch": 0.008520494685031228, "grad_norm": 24490.919921875, "learning_rate": 8.323232323232324e-07, "loss": 484.7744, "step": 1030 }, { "epoch": 0.008603217934400463, "grad_norm": 1630.39306640625, "learning_rate": 8.404040404040405e-07, "loss": 507.0378, "step": 1040 }, { "epoch": 0.008685941183769698, "grad_norm": 1444.76708984375, "learning_rate": 8.484848484848486e-07, "loss": 424.0987, "step": 1050 }, { "epoch": 0.008768664433138933, "grad_norm": 9745.6103515625, "learning_rate": 8.565656565656566e-07, "loss": 356.5501, "step": 1060 }, { "epoch": 0.008851387682508169, "grad_norm": 1603.8682861328125, "learning_rate": 8.646464646464647e-07, "loss": 363.1298, "step": 1070 }, { "epoch": 0.008934110931877404, "grad_norm": 1848.8529052734375, "learning_rate": 8.727272727272728e-07, "loss": 453.0991, "step": 1080 }, { "epoch": 0.009016834181246639, "grad_norm": 1721.5032958984375, "learning_rate": 8.808080808080808e-07, "loss": 352.8759, "step": 1090 }, { "epoch": 0.009099557430615874, "grad_norm": 1215.8017578125, "learning_rate": 8.88888888888889e-07, "loss": 358.2751, "step": 1100 }, { "epoch": 0.00918228067998511, "grad_norm": 3627.594482421875, "learning_rate": 8.96969696969697e-07, "loss": 373.2328, "step": 1110 }, { "epoch": 0.009265003929354344, "grad_norm": 1093.44677734375, "learning_rate": 9.050505050505051e-07, "loss": 305.7944, "step": 1120 }, { "epoch": 0.00934772717872358, "grad_norm": 5584.5224609375, "learning_rate": 9.131313131313133e-07, "loss": 371.0138, "step": 1130 }, { "epoch": 0.009430450428092816, "grad_norm": 1449.30419921875, "learning_rate": 9.212121212121213e-07, "loss": 374.5949, "step": 1140 }, { "epoch": 0.009513173677462052, "grad_norm": 1450.5250244140625, "learning_rate": 9.292929292929294e-07, "loss": 370.9519, "step": 1150 }, { "epoch": 0.009595896926831287, "grad_norm": 1153.1729736328125, "learning_rate": 9.373737373737376e-07, "loss": 311.7418, "step": 1160 }, { "epoch": 0.009678620176200522, "grad_norm": 1182.4305419921875, "learning_rate": 9.454545454545455e-07, "loss": 340.1112, "step": 1170 }, { "epoch": 0.009761343425569757, "grad_norm": 2788.3564453125, "learning_rate": 9.535353535353536e-07, "loss": 402.735, "step": 1180 }, { "epoch": 0.009844066674938992, "grad_norm": 1240.61669921875, "learning_rate": 9.616161616161617e-07, "loss": 430.115, "step": 1190 }, { "epoch": 0.009926789924308227, "grad_norm": 7811.59228515625, "learning_rate": 9.696969696969698e-07, "loss": 294.6839, "step": 1200 }, { "epoch": 0.010009513173677462, "grad_norm": 1490.346923828125, "learning_rate": 9.77777777777778e-07, "loss": 314.4158, "step": 1210 }, { "epoch": 0.010092236423046698, "grad_norm": 1153.1387939453125, "learning_rate": 9.858585858585858e-07, "loss": 422.2901, "step": 1220 }, { "epoch": 0.010174959672415933, "grad_norm": 1359.1773681640625, "learning_rate": 9.93939393939394e-07, "loss": 396.2289, "step": 1230 }, { "epoch": 0.010257682921785168, "grad_norm": 1238.34814453125, "learning_rate": 1.002020202020202e-06, "loss": 432.7112, "step": 1240 }, { "epoch": 0.010340406171154403, "grad_norm": 1150.58154296875, "learning_rate": 1.01010101010101e-06, "loss": 533.3193, "step": 1250 }, { "epoch": 0.010423129420523638, "grad_norm": 2107.6708984375, "learning_rate": 1.0181818181818183e-06, "loss": 386.3309, "step": 1260 }, { "epoch": 0.010505852669892873, "grad_norm": 1610.2342529296875, "learning_rate": 1.0262626262626264e-06, "loss": 378.974, "step": 1270 }, { "epoch": 0.010588575919262109, "grad_norm": 10111.8642578125, "learning_rate": 1.0343434343434344e-06, "loss": 440.8467, "step": 1280 }, { "epoch": 0.010671299168631344, "grad_norm": 3038.983642578125, "learning_rate": 1.0424242424242426e-06, "loss": 531.3744, "step": 1290 }, { "epoch": 0.010754022418000579, "grad_norm": 2078.825927734375, "learning_rate": 1.0505050505050506e-06, "loss": 358.3208, "step": 1300 }, { "epoch": 0.010836745667369814, "grad_norm": 1494.41943359375, "learning_rate": 1.0585858585858587e-06, "loss": 368.8838, "step": 1310 }, { "epoch": 0.01091946891673905, "grad_norm": 913.7642211914062, "learning_rate": 1.066666666666667e-06, "loss": 343.7515, "step": 1320 }, { "epoch": 0.011002192166108284, "grad_norm": 1475.224853515625, "learning_rate": 1.0747474747474747e-06, "loss": 311.1521, "step": 1330 }, { "epoch": 0.01108491541547752, "grad_norm": 6028.0986328125, "learning_rate": 1.082828282828283e-06, "loss": 346.5072, "step": 1340 }, { "epoch": 0.011167638664846755, "grad_norm": 1047.4739990234375, "learning_rate": 1.090909090909091e-06, "loss": 371.3439, "step": 1350 }, { "epoch": 0.01125036191421599, "grad_norm": 2157.152099609375, "learning_rate": 1.098989898989899e-06, "loss": 396.0025, "step": 1360 }, { "epoch": 0.011333085163585225, "grad_norm": 1551.3135986328125, "learning_rate": 1.1070707070707072e-06, "loss": 423.25, "step": 1370 }, { "epoch": 0.01141580841295446, "grad_norm": 1080.8287353515625, "learning_rate": 1.1151515151515153e-06, "loss": 255.2609, "step": 1380 }, { "epoch": 0.011498531662323695, "grad_norm": 835.9887084960938, "learning_rate": 1.1232323232323233e-06, "loss": 284.983, "step": 1390 }, { "epoch": 0.011581254911692932, "grad_norm": 774.7077026367188, "learning_rate": 1.1313131313131315e-06, "loss": 274.2369, "step": 1400 }, { "epoch": 0.011663978161062167, "grad_norm": 1325.56982421875, "learning_rate": 1.1393939393939395e-06, "loss": 797.6222, "step": 1410 }, { "epoch": 0.011746701410431402, "grad_norm": 1981.0302734375, "learning_rate": 1.1474747474747476e-06, "loss": 366.0973, "step": 1420 }, { "epoch": 0.011829424659800638, "grad_norm": 2644.739501953125, "learning_rate": 1.1555555555555556e-06, "loss": 327.9804, "step": 1430 }, { "epoch": 0.011912147909169873, "grad_norm": 1665.543212890625, "learning_rate": 1.1636363636363638e-06, "loss": 425.4298, "step": 1440 }, { "epoch": 0.011994871158539108, "grad_norm": 1862.1712646484375, "learning_rate": 1.1717171717171719e-06, "loss": 324.925, "step": 1450 }, { "epoch": 0.012077594407908343, "grad_norm": 1164.2347412109375, "learning_rate": 1.1797979797979799e-06, "loss": 298.3789, "step": 1460 }, { "epoch": 0.012160317657277578, "grad_norm": 1282.484375, "learning_rate": 1.187878787878788e-06, "loss": 399.5682, "step": 1470 }, { "epoch": 0.012243040906646813, "grad_norm": 1316.5758056640625, "learning_rate": 1.1959595959595961e-06, "loss": 338.0538, "step": 1480 }, { "epoch": 0.012325764156016049, "grad_norm": 875.5802001953125, "learning_rate": 1.2040404040404042e-06, "loss": 308.0409, "step": 1490 }, { "epoch": 0.012408487405385284, "grad_norm": 1360.3880615234375, "learning_rate": 1.2121212121212122e-06, "loss": 292.9588, "step": 1500 }, { "epoch": 0.012491210654754519, "grad_norm": 1129.11083984375, "learning_rate": 1.2202020202020202e-06, "loss": 322.4768, "step": 1510 }, { "epoch": 0.012573933904123754, "grad_norm": 2066.356689453125, "learning_rate": 1.2282828282828285e-06, "loss": 271.5606, "step": 1520 }, { "epoch": 0.012656657153492989, "grad_norm": 1669.0103759765625, "learning_rate": 1.2363636363636365e-06, "loss": 283.8805, "step": 1530 }, { "epoch": 0.012739380402862224, "grad_norm": 2613.5205078125, "learning_rate": 1.2444444444444445e-06, "loss": 376.5511, "step": 1540 }, { "epoch": 0.01282210365223146, "grad_norm": 1188.38427734375, "learning_rate": 1.2525252525252527e-06, "loss": 429.8225, "step": 1550 }, { "epoch": 0.012904826901600695, "grad_norm": 1084.84814453125, "learning_rate": 1.2606060606060608e-06, "loss": 289.73, "step": 1560 }, { "epoch": 0.01298755015096993, "grad_norm": 2206.3232421875, "learning_rate": 1.268686868686869e-06, "loss": 316.7855, "step": 1570 }, { "epoch": 0.013070273400339165, "grad_norm": 2453.128662109375, "learning_rate": 1.2767676767676768e-06, "loss": 304.9484, "step": 1580 }, { "epoch": 0.0131529966497084, "grad_norm": 976.6162719726562, "learning_rate": 1.2848484848484848e-06, "loss": 303.9162, "step": 1590 }, { "epoch": 0.013235719899077635, "grad_norm": 1493.1630859375, "learning_rate": 1.292929292929293e-06, "loss": 356.7861, "step": 1600 }, { "epoch": 0.01331844314844687, "grad_norm": 1568.7215576171875, "learning_rate": 1.301010101010101e-06, "loss": 556.4589, "step": 1610 }, { "epoch": 0.013401166397816106, "grad_norm": 2514.0361328125, "learning_rate": 1.3090909090909093e-06, "loss": 414.0774, "step": 1620 }, { "epoch": 0.01348388964718534, "grad_norm": 1089.7327880859375, "learning_rate": 1.3171717171717172e-06, "loss": 330.1746, "step": 1630 }, { "epoch": 0.013566612896554576, "grad_norm": 1226.1904296875, "learning_rate": 1.3252525252525254e-06, "loss": 408.1962, "step": 1640 }, { "epoch": 0.013649336145923813, "grad_norm": 2240.143310546875, "learning_rate": 1.3333333333333334e-06, "loss": 344.259, "step": 1650 }, { "epoch": 0.013732059395293048, "grad_norm": 1064.2305908203125, "learning_rate": 1.3414141414141417e-06, "loss": 348.617, "step": 1660 }, { "epoch": 0.013814782644662283, "grad_norm": 1232.1429443359375, "learning_rate": 1.3494949494949497e-06, "loss": 406.3739, "step": 1670 }, { "epoch": 0.013897505894031518, "grad_norm": 1123.156982421875, "learning_rate": 1.357575757575758e-06, "loss": 396.9652, "step": 1680 }, { "epoch": 0.013980229143400753, "grad_norm": 1280.202880859375, "learning_rate": 1.3656565656565657e-06, "loss": 391.5166, "step": 1690 }, { "epoch": 0.014062952392769988, "grad_norm": 1420.6954345703125, "learning_rate": 1.3737373737373738e-06, "loss": 437.0295, "step": 1700 }, { "epoch": 0.014145675642139224, "grad_norm": 2150.417236328125, "learning_rate": 1.381818181818182e-06, "loss": 346.5058, "step": 1710 }, { "epoch": 0.014228398891508459, "grad_norm": 1029.7105712890625, "learning_rate": 1.38989898989899e-06, "loss": 326.7657, "step": 1720 }, { "epoch": 0.014311122140877694, "grad_norm": 1207.562744140625, "learning_rate": 1.3979797979797982e-06, "loss": 279.2056, "step": 1730 }, { "epoch": 0.014393845390246929, "grad_norm": 919.50244140625, "learning_rate": 1.406060606060606e-06, "loss": 264.9025, "step": 1740 }, { "epoch": 0.014476568639616164, "grad_norm": 1641.6435546875, "learning_rate": 1.4141414141414143e-06, "loss": 239.4691, "step": 1750 }, { "epoch": 0.0145592918889854, "grad_norm": 1205.822998046875, "learning_rate": 1.4222222222222223e-06, "loss": 333.4085, "step": 1760 }, { "epoch": 0.014642015138354635, "grad_norm": 1508.9305419921875, "learning_rate": 1.4303030303030306e-06, "loss": 300.8089, "step": 1770 }, { "epoch": 0.01472473838772387, "grad_norm": 1087.9306640625, "learning_rate": 1.4383838383838386e-06, "loss": 292.6564, "step": 1780 }, { "epoch": 0.014807461637093105, "grad_norm": 2015.249755859375, "learning_rate": 1.4464646464646464e-06, "loss": 277.2897, "step": 1790 }, { "epoch": 0.01489018488646234, "grad_norm": 4033.002685546875, "learning_rate": 1.4545454545454546e-06, "loss": 336.2396, "step": 1800 }, { "epoch": 0.014972908135831575, "grad_norm": 1528.888916015625, "learning_rate": 1.4626262626262627e-06, "loss": 310.2283, "step": 1810 }, { "epoch": 0.01505563138520081, "grad_norm": 1040.3310546875, "learning_rate": 1.470707070707071e-06, "loss": 296.3051, "step": 1820 }, { "epoch": 0.015138354634570046, "grad_norm": 3019.808837890625, "learning_rate": 1.478787878787879e-06, "loss": 278.5342, "step": 1830 }, { "epoch": 0.01522107788393928, "grad_norm": 1161.161376953125, "learning_rate": 1.4868686868686872e-06, "loss": 357.7166, "step": 1840 }, { "epoch": 0.015303801133308516, "grad_norm": 2254.221435546875, "learning_rate": 1.494949494949495e-06, "loss": 325.2632, "step": 1850 }, { "epoch": 0.015386524382677751, "grad_norm": 3298.8662109375, "learning_rate": 1.5030303030303032e-06, "loss": 340.6655, "step": 1860 }, { "epoch": 0.015469247632046986, "grad_norm": 1297.3563232421875, "learning_rate": 1.5111111111111112e-06, "loss": 327.4712, "step": 1870 }, { "epoch": 0.015551970881416221, "grad_norm": 1163.013671875, "learning_rate": 1.5191919191919195e-06, "loss": 302.4008, "step": 1880 }, { "epoch": 0.015634694130785456, "grad_norm": 3574.491943359375, "learning_rate": 1.5272727272727275e-06, "loss": 304.6065, "step": 1890 }, { "epoch": 0.015717417380154693, "grad_norm": 953.6083984375, "learning_rate": 1.5353535353535353e-06, "loss": 283.16, "step": 1900 }, { "epoch": 0.015800140629523927, "grad_norm": 579.375732421875, "learning_rate": 1.5434343434343435e-06, "loss": 294.0983, "step": 1910 }, { "epoch": 0.015882863878893164, "grad_norm": 904.5838012695312, "learning_rate": 1.5515151515151516e-06, "loss": 277.4686, "step": 1920 }, { "epoch": 0.015965587128262397, "grad_norm": 878.8187255859375, "learning_rate": 1.5595959595959598e-06, "loss": 348.8653, "step": 1930 }, { "epoch": 0.016048310377631634, "grad_norm": 1161.1363525390625, "learning_rate": 1.5676767676767678e-06, "loss": 374.7675, "step": 1940 }, { "epoch": 0.016131033627000867, "grad_norm": 1667.3831787109375, "learning_rate": 1.5757575757575759e-06, "loss": 359.4971, "step": 1950 }, { "epoch": 0.016213756876370104, "grad_norm": 2197.3330078125, "learning_rate": 1.5838383838383839e-06, "loss": 367.9104, "step": 1960 }, { "epoch": 0.016296480125739338, "grad_norm": 1425.8123779296875, "learning_rate": 1.5919191919191921e-06, "loss": 367.0379, "step": 1970 }, { "epoch": 0.016379203375108575, "grad_norm": 4604.79052734375, "learning_rate": 1.6000000000000001e-06, "loss": 462.412, "step": 1980 }, { "epoch": 0.016461926624477808, "grad_norm": 1806.8238525390625, "learning_rate": 1.6080808080808084e-06, "loss": 350.7875, "step": 1990 }, { "epoch": 0.016544649873847045, "grad_norm": 1519.6781005859375, "learning_rate": 1.6161616161616164e-06, "loss": 272.1075, "step": 2000 }, { "epoch": 0.01662737312321628, "grad_norm": 2001.111572265625, "learning_rate": 1.6242424242424242e-06, "loss": 264.7772, "step": 2010 }, { "epoch": 0.016710096372585515, "grad_norm": 2383.3994140625, "learning_rate": 1.6323232323232325e-06, "loss": 293.2167, "step": 2020 }, { "epoch": 0.016792819621954752, "grad_norm": 670.5611572265625, "learning_rate": 1.6404040404040405e-06, "loss": 379.9431, "step": 2030 }, { "epoch": 0.016875542871323986, "grad_norm": 1432.4300537109375, "learning_rate": 1.6484848484848487e-06, "loss": 362.749, "step": 2040 }, { "epoch": 0.016958266120693222, "grad_norm": 6461.77880859375, "learning_rate": 1.6565656565656567e-06, "loss": 407.5534, "step": 2050 }, { "epoch": 0.017040989370062456, "grad_norm": 1288.83544921875, "learning_rate": 1.6646464646464648e-06, "loss": 315.7526, "step": 2060 }, { "epoch": 0.017123712619431693, "grad_norm": 907.4447021484375, "learning_rate": 1.6727272727272728e-06, "loss": 292.4799, "step": 2070 }, { "epoch": 0.017206435868800926, "grad_norm": 1921.0576171875, "learning_rate": 1.680808080808081e-06, "loss": 297.3491, "step": 2080 }, { "epoch": 0.017289159118170163, "grad_norm": 1620.2481689453125, "learning_rate": 1.688888888888889e-06, "loss": 291.1318, "step": 2090 }, { "epoch": 0.017371882367539396, "grad_norm": 892.2626342773438, "learning_rate": 1.6969696969696973e-06, "loss": 337.7454, "step": 2100 }, { "epoch": 0.017454605616908633, "grad_norm": 1724.4583740234375, "learning_rate": 1.705050505050505e-06, "loss": 386.2352, "step": 2110 }, { "epoch": 0.017537328866277867, "grad_norm": 1373.0406494140625, "learning_rate": 1.7131313131313131e-06, "loss": 312.9512, "step": 2120 }, { "epoch": 0.017620052115647104, "grad_norm": 1802.06884765625, "learning_rate": 1.7212121212121214e-06, "loss": 392.6038, "step": 2130 }, { "epoch": 0.017702775365016337, "grad_norm": 1823.2747802734375, "learning_rate": 1.7292929292929294e-06, "loss": 276.9635, "step": 2140 }, { "epoch": 0.017785498614385574, "grad_norm": 2303.46484375, "learning_rate": 1.7373737373737376e-06, "loss": 363.1688, "step": 2150 }, { "epoch": 0.017868221863754807, "grad_norm": 939.9644165039062, "learning_rate": 1.7454545454545456e-06, "loss": 371.3494, "step": 2160 }, { "epoch": 0.017950945113124044, "grad_norm": 803.9099731445312, "learning_rate": 1.7535353535353537e-06, "loss": 325.9621, "step": 2170 }, { "epoch": 0.018033668362493278, "grad_norm": 1147.0989990234375, "learning_rate": 1.7616161616161617e-06, "loss": 339.9603, "step": 2180 }, { "epoch": 0.018116391611862515, "grad_norm": 1110.0518798828125, "learning_rate": 1.76969696969697e-06, "loss": 311.3122, "step": 2190 }, { "epoch": 0.018199114861231748, "grad_norm": 1060.744140625, "learning_rate": 1.777777777777778e-06, "loss": 350.8291, "step": 2200 }, { "epoch": 0.018281838110600985, "grad_norm": 2863.481201171875, "learning_rate": 1.7858585858585862e-06, "loss": 347.886, "step": 2210 }, { "epoch": 0.01836456135997022, "grad_norm": 1049.102294921875, "learning_rate": 1.793939393939394e-06, "loss": 255.8531, "step": 2220 }, { "epoch": 0.018447284609339455, "grad_norm": 1447.9163818359375, "learning_rate": 1.802020202020202e-06, "loss": 270.9402, "step": 2230 }, { "epoch": 0.01853000785870869, "grad_norm": 5666.75634765625, "learning_rate": 1.8101010101010103e-06, "loss": 265.4302, "step": 2240 }, { "epoch": 0.018612731108077925, "grad_norm": 901.1162109375, "learning_rate": 1.8181818181818183e-06, "loss": 327.2734, "step": 2250 }, { "epoch": 0.01869545435744716, "grad_norm": 1582.9307861328125, "learning_rate": 1.8262626262626265e-06, "loss": 295.4947, "step": 2260 }, { "epoch": 0.018778177606816396, "grad_norm": 910.0087280273438, "learning_rate": 1.8343434343434343e-06, "loss": 254.5832, "step": 2270 }, { "epoch": 0.018860900856185633, "grad_norm": 1553.7003173828125, "learning_rate": 1.8424242424242426e-06, "loss": 312.1798, "step": 2280 }, { "epoch": 0.018943624105554866, "grad_norm": 1657.673828125, "learning_rate": 1.8505050505050506e-06, "loss": 333.7471, "step": 2290 }, { "epoch": 0.019026347354924103, "grad_norm": 2194.053466796875, "learning_rate": 1.8585858585858588e-06, "loss": 252.629, "step": 2300 }, { "epoch": 0.019109070604293336, "grad_norm": 968.1193237304688, "learning_rate": 1.8666666666666669e-06, "loss": 317.2808, "step": 2310 }, { "epoch": 0.019191793853662573, "grad_norm": 2099.283203125, "learning_rate": 1.874747474747475e-06, "loss": 318.3145, "step": 2320 }, { "epoch": 0.019274517103031807, "grad_norm": 1018.1463012695312, "learning_rate": 1.882828282828283e-06, "loss": 310.5461, "step": 2330 }, { "epoch": 0.019357240352401044, "grad_norm": 1319.893310546875, "learning_rate": 1.890909090909091e-06, "loss": 403.0323, "step": 2340 }, { "epoch": 0.019439963601770277, "grad_norm": 3296.14404296875, "learning_rate": 1.8989898989898992e-06, "loss": 277.1991, "step": 2350 }, { "epoch": 0.019522686851139514, "grad_norm": 2120.728515625, "learning_rate": 1.9070707070707072e-06, "loss": 266.7476, "step": 2360 }, { "epoch": 0.019605410100508747, "grad_norm": 1736.4859619140625, "learning_rate": 1.9151515151515154e-06, "loss": 385.987, "step": 2370 }, { "epoch": 0.019688133349877984, "grad_norm": 1218.7796630859375, "learning_rate": 1.9232323232323235e-06, "loss": 337.6978, "step": 2380 }, { "epoch": 0.019770856599247218, "grad_norm": 4797.71337890625, "learning_rate": 1.9313131313131315e-06, "loss": 255.6176, "step": 2390 }, { "epoch": 0.019853579848616455, "grad_norm": 1956.2137451171875, "learning_rate": 1.9393939393939395e-06, "loss": 283.0651, "step": 2400 }, { "epoch": 0.019936303097985688, "grad_norm": 918.0914916992188, "learning_rate": 1.9474747474747475e-06, "loss": 312.682, "step": 2410 }, { "epoch": 0.020019026347354925, "grad_norm": 2107.04736328125, "learning_rate": 1.955555555555556e-06, "loss": 384.748, "step": 2420 }, { "epoch": 0.02010174959672416, "grad_norm": 861.828125, "learning_rate": 1.9636363636363636e-06, "loss": 340.8865, "step": 2430 }, { "epoch": 0.020184472846093395, "grad_norm": 6480.6171875, "learning_rate": 1.9717171717171716e-06, "loss": 416.4972, "step": 2440 }, { "epoch": 0.02026719609546263, "grad_norm": 1926.79296875, "learning_rate": 1.97979797979798e-06, "loss": 260.5365, "step": 2450 }, { "epoch": 0.020349919344831865, "grad_norm": 1225.735107421875, "learning_rate": 1.987878787878788e-06, "loss": 261.068, "step": 2460 }, { "epoch": 0.0204326425942011, "grad_norm": 857.5597534179688, "learning_rate": 1.995959595959596e-06, "loss": 241.7558, "step": 2470 }, { "epoch": 0.020515365843570336, "grad_norm": 42987.5546875, "learning_rate": 2.004040404040404e-06, "loss": 380.0271, "step": 2480 }, { "epoch": 0.02059808909293957, "grad_norm": 1689.1239013671875, "learning_rate": 2.012121212121212e-06, "loss": 390.1416, "step": 2490 }, { "epoch": 0.020680812342308806, "grad_norm": 4134.138671875, "learning_rate": 2.02020202020202e-06, "loss": 265.3958, "step": 2500 }, { "epoch": 0.02076353559167804, "grad_norm": 1039.45703125, "learning_rate": 2.0282828282828286e-06, "loss": 297.8323, "step": 2510 }, { "epoch": 0.020846258841047276, "grad_norm": 2275.5380859375, "learning_rate": 2.0363636363636367e-06, "loss": 326.8338, "step": 2520 }, { "epoch": 0.02092898209041651, "grad_norm": 1341.4542236328125, "learning_rate": 2.0444444444444447e-06, "loss": 255.4626, "step": 2530 }, { "epoch": 0.021011705339785747, "grad_norm": 1704.446044921875, "learning_rate": 2.0525252525252527e-06, "loss": 311.6551, "step": 2540 }, { "epoch": 0.021094428589154984, "grad_norm": 1272.9852294921875, "learning_rate": 2.0606060606060607e-06, "loss": 278.1134, "step": 2550 }, { "epoch": 0.021177151838524217, "grad_norm": 788.3895263671875, "learning_rate": 2.0686868686868688e-06, "loss": 282.1122, "step": 2560 }, { "epoch": 0.021259875087893454, "grad_norm": 1385.612548828125, "learning_rate": 2.0767676767676768e-06, "loss": 314.3244, "step": 2570 }, { "epoch": 0.021342598337262687, "grad_norm": 1955.3095703125, "learning_rate": 2.0848484848484852e-06, "loss": 262.7597, "step": 2580 }, { "epoch": 0.021425321586631924, "grad_norm": 1727.3680419921875, "learning_rate": 2.092929292929293e-06, "loss": 264.8611, "step": 2590 }, { "epoch": 0.021508044836001158, "grad_norm": 1702.6575927734375, "learning_rate": 2.1010101010101013e-06, "loss": 286.7952, "step": 2600 }, { "epoch": 0.021590768085370395, "grad_norm": 1000.8258666992188, "learning_rate": 2.1090909090909093e-06, "loss": 259.9386, "step": 2610 }, { "epoch": 0.021673491334739628, "grad_norm": 1225.5982666015625, "learning_rate": 2.1171717171717173e-06, "loss": 248.9738, "step": 2620 }, { "epoch": 0.021756214584108865, "grad_norm": 2182.8955078125, "learning_rate": 2.1252525252525254e-06, "loss": 290.0493, "step": 2630 }, { "epoch": 0.0218389378334781, "grad_norm": 946.896484375, "learning_rate": 2.133333333333334e-06, "loss": 285.7407, "step": 2640 }, { "epoch": 0.021921661082847335, "grad_norm": 965.7489624023438, "learning_rate": 2.1414141414141414e-06, "loss": 277.0386, "step": 2650 }, { "epoch": 0.02200438433221657, "grad_norm": 1855.605712890625, "learning_rate": 2.1494949494949494e-06, "loss": 308.2425, "step": 2660 }, { "epoch": 0.022087107581585805, "grad_norm": 945.2372436523438, "learning_rate": 2.157575757575758e-06, "loss": 314.9084, "step": 2670 }, { "epoch": 0.02216983083095504, "grad_norm": 2101.0771484375, "learning_rate": 2.165656565656566e-06, "loss": 277.5356, "step": 2680 }, { "epoch": 0.022252554080324276, "grad_norm": 931.0570678710938, "learning_rate": 2.173737373737374e-06, "loss": 380.9075, "step": 2690 }, { "epoch": 0.02233527732969351, "grad_norm": 849.65380859375, "learning_rate": 2.181818181818182e-06, "loss": 266.7404, "step": 2700 }, { "epoch": 0.022418000579062746, "grad_norm": 795.3492431640625, "learning_rate": 2.18989898989899e-06, "loss": 301.0031, "step": 2710 }, { "epoch": 0.02250072382843198, "grad_norm": 998.2992553710938, "learning_rate": 2.197979797979798e-06, "loss": 236.0826, "step": 2720 }, { "epoch": 0.022583447077801216, "grad_norm": 3029.24462890625, "learning_rate": 2.2060606060606064e-06, "loss": 284.6466, "step": 2730 }, { "epoch": 0.02266617032717045, "grad_norm": 991.4857177734375, "learning_rate": 2.2141414141414145e-06, "loss": 480.9545, "step": 2740 }, { "epoch": 0.022748893576539687, "grad_norm": 610.8831787109375, "learning_rate": 2.222222222222222e-06, "loss": 280.1348, "step": 2750 }, { "epoch": 0.02283161682590892, "grad_norm": 1224.61083984375, "learning_rate": 2.2303030303030305e-06, "loss": 287.0002, "step": 2760 }, { "epoch": 0.022914340075278157, "grad_norm": 967.6873779296875, "learning_rate": 2.2383838383838385e-06, "loss": 302.4269, "step": 2770 }, { "epoch": 0.02299706332464739, "grad_norm": 1249.24169921875, "learning_rate": 2.2464646464646466e-06, "loss": 324.5018, "step": 2780 }, { "epoch": 0.023079786574016627, "grad_norm": 2674.385498046875, "learning_rate": 2.254545454545455e-06, "loss": 253.8143, "step": 2790 }, { "epoch": 0.023162509823385864, "grad_norm": 1356.0936279296875, "learning_rate": 2.262626262626263e-06, "loss": 287.3596, "step": 2800 }, { "epoch": 0.023245233072755098, "grad_norm": 1003.8848266601562, "learning_rate": 2.2707070707070706e-06, "loss": 259.5809, "step": 2810 }, { "epoch": 0.023327956322124335, "grad_norm": 1609.186767578125, "learning_rate": 2.278787878787879e-06, "loss": 243.4104, "step": 2820 }, { "epoch": 0.023410679571493568, "grad_norm": 1286.5640869140625, "learning_rate": 2.286868686868687e-06, "loss": 303.2196, "step": 2830 }, { "epoch": 0.023493402820862805, "grad_norm": 1285.9228515625, "learning_rate": 2.294949494949495e-06, "loss": 274.7912, "step": 2840 }, { "epoch": 0.02357612607023204, "grad_norm": 1015.9469604492188, "learning_rate": 2.303030303030303e-06, "loss": 255.5114, "step": 2850 }, { "epoch": 0.023658849319601275, "grad_norm": 1524.3387451171875, "learning_rate": 2.311111111111111e-06, "loss": 394.2463, "step": 2860 }, { "epoch": 0.02374157256897051, "grad_norm": 4101.37548828125, "learning_rate": 2.3191919191919192e-06, "loss": 343.4752, "step": 2870 }, { "epoch": 0.023824295818339745, "grad_norm": 1837.815673828125, "learning_rate": 2.3272727272727277e-06, "loss": 306.1534, "step": 2880 }, { "epoch": 0.02390701906770898, "grad_norm": 1742.3822021484375, "learning_rate": 2.3353535353535357e-06, "loss": 382.2382, "step": 2890 }, { "epoch": 0.023989742317078216, "grad_norm": 922.0031127929688, "learning_rate": 2.3434343434343437e-06, "loss": 230.2729, "step": 2900 }, { "epoch": 0.02407246556644745, "grad_norm": 860.6764526367188, "learning_rate": 2.3515151515151517e-06, "loss": 257.8272, "step": 2910 }, { "epoch": 0.024155188815816686, "grad_norm": 3094.787841796875, "learning_rate": 2.3595959595959598e-06, "loss": 247.0388, "step": 2920 }, { "epoch": 0.02423791206518592, "grad_norm": 1400.10009765625, "learning_rate": 2.367676767676768e-06, "loss": 350.3922, "step": 2930 }, { "epoch": 0.024320635314555156, "grad_norm": 1306.7431640625, "learning_rate": 2.375757575757576e-06, "loss": 318.7807, "step": 2940 }, { "epoch": 0.02440335856392439, "grad_norm": 859.3858032226562, "learning_rate": 2.3838383838383843e-06, "loss": 292.0828, "step": 2950 }, { "epoch": 0.024486081813293627, "grad_norm": 1207.4444580078125, "learning_rate": 2.3919191919191923e-06, "loss": 268.6456, "step": 2960 }, { "epoch": 0.02456880506266286, "grad_norm": 1388.7242431640625, "learning_rate": 2.4000000000000003e-06, "loss": 327.8167, "step": 2970 }, { "epoch": 0.024651528312032097, "grad_norm": 1601.5413818359375, "learning_rate": 2.4080808080808083e-06, "loss": 217.4461, "step": 2980 }, { "epoch": 0.02473425156140133, "grad_norm": 1094.072509765625, "learning_rate": 2.4161616161616164e-06, "loss": 237.2505, "step": 2990 }, { "epoch": 0.024816974810770567, "grad_norm": 1064.4403076171875, "learning_rate": 2.4242424242424244e-06, "loss": 274.2577, "step": 3000 }, { "epoch": 0.0248996980601398, "grad_norm": 2712.60986328125, "learning_rate": 2.432323232323233e-06, "loss": 336.2089, "step": 3010 }, { "epoch": 0.024982421309509038, "grad_norm": 657.0144653320312, "learning_rate": 2.4404040404040404e-06, "loss": 285.5267, "step": 3020 }, { "epoch": 0.02506514455887827, "grad_norm": 1818.3843994140625, "learning_rate": 2.4484848484848485e-06, "loss": 345.3333, "step": 3030 }, { "epoch": 0.025147867808247508, "grad_norm": 1924.058349609375, "learning_rate": 2.456565656565657e-06, "loss": 254.806, "step": 3040 }, { "epoch": 0.025230591057616745, "grad_norm": 926.4883422851562, "learning_rate": 2.464646464646465e-06, "loss": 248.8595, "step": 3050 }, { "epoch": 0.025313314306985978, "grad_norm": 1217.055419921875, "learning_rate": 2.472727272727273e-06, "loss": 258.9547, "step": 3060 }, { "epoch": 0.025396037556355215, "grad_norm": 837.9513549804688, "learning_rate": 2.480808080808081e-06, "loss": 239.7319, "step": 3070 }, { "epoch": 0.02547876080572445, "grad_norm": 702.333740234375, "learning_rate": 2.488888888888889e-06, "loss": 290.1943, "step": 3080 }, { "epoch": 0.025561484055093685, "grad_norm": 1217.6317138671875, "learning_rate": 2.496969696969697e-06, "loss": 282.1116, "step": 3090 }, { "epoch": 0.02564420730446292, "grad_norm": 1642.226806640625, "learning_rate": 2.5050505050505055e-06, "loss": 242.3538, "step": 3100 }, { "epoch": 0.025726930553832156, "grad_norm": 1036.6883544921875, "learning_rate": 2.5131313131313135e-06, "loss": 339.3384, "step": 3110 }, { "epoch": 0.02580965380320139, "grad_norm": 2547.31396484375, "learning_rate": 2.5212121212121215e-06, "loss": 268.7113, "step": 3120 }, { "epoch": 0.025892377052570626, "grad_norm": 2499.7900390625, "learning_rate": 2.5292929292929296e-06, "loss": 371.6728, "step": 3130 }, { "epoch": 0.02597510030193986, "grad_norm": 835.4087524414062, "learning_rate": 2.537373737373738e-06, "loss": 331.6685, "step": 3140 }, { "epoch": 0.026057823551309096, "grad_norm": 1723.9932861328125, "learning_rate": 2.5454545454545456e-06, "loss": 241.2475, "step": 3150 }, { "epoch": 0.02614054680067833, "grad_norm": 892.8982543945312, "learning_rate": 2.5535353535353536e-06, "loss": 239.1675, "step": 3160 }, { "epoch": 0.026223270050047567, "grad_norm": 1679.0596923828125, "learning_rate": 2.5616161616161617e-06, "loss": 251.3017, "step": 3170 }, { "epoch": 0.0263059932994168, "grad_norm": 1314.4605712890625, "learning_rate": 2.5696969696969697e-06, "loss": 291.6142, "step": 3180 }, { "epoch": 0.026388716548786037, "grad_norm": 1959.02490234375, "learning_rate": 2.577777777777778e-06, "loss": 335.8238, "step": 3190 }, { "epoch": 0.02647143979815527, "grad_norm": 1912.5706787109375, "learning_rate": 2.585858585858586e-06, "loss": 374.3031, "step": 3200 }, { "epoch": 0.026554163047524507, "grad_norm": 3296.021728515625, "learning_rate": 2.593939393939394e-06, "loss": 258.0628, "step": 3210 }, { "epoch": 0.02663688629689374, "grad_norm": 1115.4268798828125, "learning_rate": 2.602020202020202e-06, "loss": 334.8846, "step": 3220 }, { "epoch": 0.026719609546262978, "grad_norm": 973.0841064453125, "learning_rate": 2.6101010101010107e-06, "loss": 275.7638, "step": 3230 }, { "epoch": 0.02680233279563221, "grad_norm": 1405.8780517578125, "learning_rate": 2.6181818181818187e-06, "loss": 290.556, "step": 3240 }, { "epoch": 0.026885056045001448, "grad_norm": 2512.458740234375, "learning_rate": 2.6262626262626267e-06, "loss": 255.2991, "step": 3250 }, { "epoch": 0.02696777929437068, "grad_norm": 1015.4577026367188, "learning_rate": 2.6343434343434343e-06, "loss": 293.1441, "step": 3260 }, { "epoch": 0.027050502543739918, "grad_norm": 1590.4383544921875, "learning_rate": 2.6424242424242423e-06, "loss": 315.3852, "step": 3270 }, { "epoch": 0.02713322579310915, "grad_norm": 1241.1497802734375, "learning_rate": 2.6505050505050508e-06, "loss": 279.314, "step": 3280 }, { "epoch": 0.02721594904247839, "grad_norm": 803.2537841796875, "learning_rate": 2.658585858585859e-06, "loss": 237.7475, "step": 3290 }, { "epoch": 0.027298672291847625, "grad_norm": 1308.884765625, "learning_rate": 2.666666666666667e-06, "loss": 289.3862, "step": 3300 }, { "epoch": 0.02738139554121686, "grad_norm": 1037.373291015625, "learning_rate": 2.674747474747475e-06, "loss": 278.7418, "step": 3310 }, { "epoch": 0.027464118790586096, "grad_norm": 1414.7493896484375, "learning_rate": 2.6828282828282833e-06, "loss": 388.9876, "step": 3320 }, { "epoch": 0.02754684203995533, "grad_norm": 1686.8880615234375, "learning_rate": 2.6909090909090913e-06, "loss": 224.3872, "step": 3330 }, { "epoch": 0.027629565289324566, "grad_norm": 1130.1907958984375, "learning_rate": 2.6989898989898994e-06, "loss": 219.274, "step": 3340 }, { "epoch": 0.0277122885386938, "grad_norm": 1604.05419921875, "learning_rate": 2.7070707070707074e-06, "loss": 313.1014, "step": 3350 }, { "epoch": 0.027795011788063036, "grad_norm": 1515.8199462890625, "learning_rate": 2.715151515151516e-06, "loss": 351.9699, "step": 3360 }, { "epoch": 0.02787773503743227, "grad_norm": 1105.636474609375, "learning_rate": 2.7232323232323234e-06, "loss": 258.1985, "step": 3370 }, { "epoch": 0.027960458286801507, "grad_norm": 2343.5537109375, "learning_rate": 2.7313131313131315e-06, "loss": 247.2701, "step": 3380 }, { "epoch": 0.02804318153617074, "grad_norm": 3364.70068359375, "learning_rate": 2.7393939393939395e-06, "loss": 320.3825, "step": 3390 }, { "epoch": 0.028125904785539977, "grad_norm": 1226.61279296875, "learning_rate": 2.7474747474747475e-06, "loss": 308.3539, "step": 3400 }, { "epoch": 0.02820862803490921, "grad_norm": 1365.4581298828125, "learning_rate": 2.755555555555556e-06, "loss": 265.8813, "step": 3410 }, { "epoch": 0.028291351284278447, "grad_norm": 1471.4437255859375, "learning_rate": 2.763636363636364e-06, "loss": 246.5465, "step": 3420 }, { "epoch": 0.02837407453364768, "grad_norm": 1522.9873046875, "learning_rate": 2.771717171717172e-06, "loss": 294.6199, "step": 3430 }, { "epoch": 0.028456797783016918, "grad_norm": 2295.73095703125, "learning_rate": 2.77979797979798e-06, "loss": 471.7754, "step": 3440 }, { "epoch": 0.02853952103238615, "grad_norm": 1030.5484619140625, "learning_rate": 2.7878787878787885e-06, "loss": 315.6531, "step": 3450 }, { "epoch": 0.028622244281755388, "grad_norm": 1534.0970458984375, "learning_rate": 2.7959595959595965e-06, "loss": 236.7922, "step": 3460 }, { "epoch": 0.02870496753112462, "grad_norm": 1193.41748046875, "learning_rate": 2.804040404040404e-06, "loss": 301.1876, "step": 3470 }, { "epoch": 0.028787690780493858, "grad_norm": 884.4568481445312, "learning_rate": 2.812121212121212e-06, "loss": 248.0099, "step": 3480 }, { "epoch": 0.02887041402986309, "grad_norm": 1417.0870361328125, "learning_rate": 2.82020202020202e-06, "loss": 214.0175, "step": 3490 }, { "epoch": 0.02895313727923233, "grad_norm": 906.0728149414062, "learning_rate": 2.8282828282828286e-06, "loss": 337.6909, "step": 3500 }, { "epoch": 0.029035860528601562, "grad_norm": 720.5711059570312, "learning_rate": 2.8363636363636366e-06, "loss": 237.9402, "step": 3510 }, { "epoch": 0.0291185837779708, "grad_norm": 2159.7626953125, "learning_rate": 2.8444444444444446e-06, "loss": 290.2204, "step": 3520 }, { "epoch": 0.029201307027340032, "grad_norm": 1583.6937255859375, "learning_rate": 2.8525252525252527e-06, "loss": 339.3829, "step": 3530 }, { "epoch": 0.02928403027670927, "grad_norm": 1711.7625732421875, "learning_rate": 2.860606060606061e-06, "loss": 338.8203, "step": 3540 }, { "epoch": 0.029366753526078506, "grad_norm": 1867.3909912109375, "learning_rate": 2.868686868686869e-06, "loss": 295.4354, "step": 3550 }, { "epoch": 0.02944947677544774, "grad_norm": 1350.51708984375, "learning_rate": 2.876767676767677e-06, "loss": 255.1478, "step": 3560 }, { "epoch": 0.029532200024816976, "grad_norm": 1835.9559326171875, "learning_rate": 2.884848484848485e-06, "loss": 235.7582, "step": 3570 }, { "epoch": 0.02961492327418621, "grad_norm": 1273.539794921875, "learning_rate": 2.892929292929293e-06, "loss": 305.7688, "step": 3580 }, { "epoch": 0.029697646523555447, "grad_norm": 1092.1051025390625, "learning_rate": 2.9010101010101012e-06, "loss": 229.7812, "step": 3590 }, { "epoch": 0.02978036977292468, "grad_norm": 1232.7423095703125, "learning_rate": 2.9090909090909093e-06, "loss": 308.136, "step": 3600 }, { "epoch": 0.029863093022293917, "grad_norm": 860.4628295898438, "learning_rate": 2.9171717171717173e-06, "loss": 245.5094, "step": 3610 }, { "epoch": 0.02994581627166315, "grad_norm": 1105.772705078125, "learning_rate": 2.9252525252525253e-06, "loss": 225.044, "step": 3620 }, { "epoch": 0.030028539521032387, "grad_norm": 1092.992431640625, "learning_rate": 2.9333333333333338e-06, "loss": 240.9013, "step": 3630 }, { "epoch": 0.03011126277040162, "grad_norm": 1877.3671875, "learning_rate": 2.941414141414142e-06, "loss": 304.4779, "step": 3640 }, { "epoch": 0.030193986019770858, "grad_norm": 1553.412109375, "learning_rate": 2.94949494949495e-06, "loss": 253.1373, "step": 3650 }, { "epoch": 0.03027670926914009, "grad_norm": 1163.033447265625, "learning_rate": 2.957575757575758e-06, "loss": 272.4007, "step": 3660 }, { "epoch": 0.030359432518509328, "grad_norm": 1105.1661376953125, "learning_rate": 2.9656565656565663e-06, "loss": 243.5992, "step": 3670 }, { "epoch": 0.03044215576787856, "grad_norm": 1370.2781982421875, "learning_rate": 2.9737373737373743e-06, "loss": 232.3612, "step": 3680 }, { "epoch": 0.030524879017247798, "grad_norm": 925.8299560546875, "learning_rate": 2.981818181818182e-06, "loss": 194.9076, "step": 3690 }, { "epoch": 0.03060760226661703, "grad_norm": 4565.89208984375, "learning_rate": 2.98989898989899e-06, "loss": 317.9731, "step": 3700 }, { "epoch": 0.03069032551598627, "grad_norm": 1785.587890625, "learning_rate": 2.997979797979798e-06, "loss": 261.7598, "step": 3710 }, { "epoch": 0.030773048765355502, "grad_norm": 1177.1761474609375, "learning_rate": 3.0060606060606064e-06, "loss": 211.625, "step": 3720 }, { "epoch": 0.03085577201472474, "grad_norm": 980.7220458984375, "learning_rate": 3.0141414141414144e-06, "loss": 198.16, "step": 3730 }, { "epoch": 0.030938495264093972, "grad_norm": 1450.34033203125, "learning_rate": 3.0222222222222225e-06, "loss": 281.4796, "step": 3740 }, { "epoch": 0.03102121851346321, "grad_norm": 917.07275390625, "learning_rate": 3.0303030303030305e-06, "loss": 266.2501, "step": 3750 }, { "epoch": 0.031103941762832443, "grad_norm": 1657.814697265625, "learning_rate": 3.038383838383839e-06, "loss": 279.4682, "step": 3760 }, { "epoch": 0.03118666501220168, "grad_norm": 1294.77685546875, "learning_rate": 3.046464646464647e-06, "loss": 259.5207, "step": 3770 }, { "epoch": 0.03126938826157091, "grad_norm": 1622.716796875, "learning_rate": 3.054545454545455e-06, "loss": 298.1313, "step": 3780 }, { "epoch": 0.031352111510940146, "grad_norm": 1660.8116455078125, "learning_rate": 3.0626262626262626e-06, "loss": 250.9933, "step": 3790 }, { "epoch": 0.03143483476030939, "grad_norm": 799.0031127929688, "learning_rate": 3.0707070707070706e-06, "loss": 310.6956, "step": 3800 }, { "epoch": 0.03151755800967862, "grad_norm": 1199.9678955078125, "learning_rate": 3.078787878787879e-06, "loss": 283.1158, "step": 3810 }, { "epoch": 0.031600281259047854, "grad_norm": 1449.6212158203125, "learning_rate": 3.086868686868687e-06, "loss": 249.93, "step": 3820 }, { "epoch": 0.031683004508417094, "grad_norm": 1299.2960205078125, "learning_rate": 3.094949494949495e-06, "loss": 224.7813, "step": 3830 }, { "epoch": 0.03176572775778633, "grad_norm": 2239.721435546875, "learning_rate": 3.103030303030303e-06, "loss": 223.2453, "step": 3840 }, { "epoch": 0.03184845100715556, "grad_norm": 1760.5396728515625, "learning_rate": 3.1111111111111116e-06, "loss": 252.7971, "step": 3850 }, { "epoch": 0.031931174256524794, "grad_norm": 682.6892700195312, "learning_rate": 3.1191919191919196e-06, "loss": 216.5466, "step": 3860 }, { "epoch": 0.032013897505894034, "grad_norm": 2052.32861328125, "learning_rate": 3.1272727272727276e-06, "loss": 288.775, "step": 3870 }, { "epoch": 0.03209662075526327, "grad_norm": 1602.2225341796875, "learning_rate": 3.1353535353535357e-06, "loss": 328.757, "step": 3880 }, { "epoch": 0.0321793440046325, "grad_norm": 681.2459106445312, "learning_rate": 3.143434343434344e-06, "loss": 190.3252, "step": 3890 }, { "epoch": 0.032262067254001735, "grad_norm": 1182.54052734375, "learning_rate": 3.1515151515151517e-06, "loss": 268.2503, "step": 3900 }, { "epoch": 0.032344790503370975, "grad_norm": 1219.3660888671875, "learning_rate": 3.1595959595959597e-06, "loss": 239.1843, "step": 3910 }, { "epoch": 0.03242751375274021, "grad_norm": 1041.707275390625, "learning_rate": 3.1676767676767678e-06, "loss": 273.016, "step": 3920 }, { "epoch": 0.03251023700210944, "grad_norm": 1269.7681884765625, "learning_rate": 3.1757575757575758e-06, "loss": 249.6547, "step": 3930 }, { "epoch": 0.032592960251478675, "grad_norm": 1372.903564453125, "learning_rate": 3.1838383838383842e-06, "loss": 311.8126, "step": 3940 }, { "epoch": 0.032675683500847916, "grad_norm": 2617.830322265625, "learning_rate": 3.1919191919191923e-06, "loss": 299.153, "step": 3950 }, { "epoch": 0.03275840675021715, "grad_norm": 1350.3302001953125, "learning_rate": 3.2000000000000003e-06, "loss": 238.4413, "step": 3960 }, { "epoch": 0.03284112999958638, "grad_norm": 2270.457763671875, "learning_rate": 3.2080808080808083e-06, "loss": 267.5316, "step": 3970 }, { "epoch": 0.032923853248955616, "grad_norm": 1409.05712890625, "learning_rate": 3.2161616161616168e-06, "loss": 237.9799, "step": 3980 }, { "epoch": 0.033006576498324856, "grad_norm": 1641.050537109375, "learning_rate": 3.2242424242424248e-06, "loss": 352.6279, "step": 3990 }, { "epoch": 0.03308929974769409, "grad_norm": 1178.6591796875, "learning_rate": 3.232323232323233e-06, "loss": 200.0928, "step": 4000 }, { "epoch": 0.03317202299706332, "grad_norm": 895.1983032226562, "learning_rate": 3.2404040404040404e-06, "loss": 274.1366, "step": 4010 }, { "epoch": 0.03325474624643256, "grad_norm": 2193.698486328125, "learning_rate": 3.2484848484848484e-06, "loss": 205.6771, "step": 4020 }, { "epoch": 0.0333374694958018, "grad_norm": 1705.0743408203125, "learning_rate": 3.256565656565657e-06, "loss": 253.3518, "step": 4030 }, { "epoch": 0.03342019274517103, "grad_norm": 1552.232666015625, "learning_rate": 3.264646464646465e-06, "loss": 243.4265, "step": 4040 }, { "epoch": 0.033502915994540264, "grad_norm": 4037.46875, "learning_rate": 3.272727272727273e-06, "loss": 229.4004, "step": 4050 }, { "epoch": 0.033585639243909504, "grad_norm": 3386.515869140625, "learning_rate": 3.280808080808081e-06, "loss": 338.8573, "step": 4060 }, { "epoch": 0.03366836249327874, "grad_norm": 1145.6063232421875, "learning_rate": 3.2888888888888894e-06, "loss": 254.9263, "step": 4070 }, { "epoch": 0.03375108574264797, "grad_norm": 2452.519775390625, "learning_rate": 3.2969696969696974e-06, "loss": 365.3762, "step": 4080 }, { "epoch": 0.033833808992017204, "grad_norm": 1752.8304443359375, "learning_rate": 3.3050505050505054e-06, "loss": 242.0698, "step": 4090 }, { "epoch": 0.033916532241386445, "grad_norm": 1975.453857421875, "learning_rate": 3.3131313131313135e-06, "loss": 294.2446, "step": 4100 }, { "epoch": 0.03399925549075568, "grad_norm": 1159.05615234375, "learning_rate": 3.321212121212121e-06, "loss": 254.9454, "step": 4110 }, { "epoch": 0.03408197874012491, "grad_norm": 904.747802734375, "learning_rate": 3.3292929292929295e-06, "loss": 291.38, "step": 4120 }, { "epoch": 0.034164701989494145, "grad_norm": 1563.5689697265625, "learning_rate": 3.3373737373737375e-06, "loss": 212.5943, "step": 4130 }, { "epoch": 0.034247425238863385, "grad_norm": 1204.7821044921875, "learning_rate": 3.3454545454545456e-06, "loss": 265.8186, "step": 4140 }, { "epoch": 0.03433014848823262, "grad_norm": 904.9209594726562, "learning_rate": 3.3535353535353536e-06, "loss": 260.1104, "step": 4150 }, { "epoch": 0.03441287173760185, "grad_norm": 1248.727783203125, "learning_rate": 3.361616161616162e-06, "loss": 291.2336, "step": 4160 }, { "epoch": 0.034495594986971086, "grad_norm": 893.3284912109375, "learning_rate": 3.36969696969697e-06, "loss": 233.5644, "step": 4170 }, { "epoch": 0.034578318236340326, "grad_norm": 1158.58984375, "learning_rate": 3.377777777777778e-06, "loss": 246.782, "step": 4180 }, { "epoch": 0.03466104148570956, "grad_norm": 1969.4088134765625, "learning_rate": 3.385858585858586e-06, "loss": 237.9205, "step": 4190 }, { "epoch": 0.03474376473507879, "grad_norm": 1035.88427734375, "learning_rate": 3.3939393939393946e-06, "loss": 321.7581, "step": 4200 }, { "epoch": 0.034826487984448026, "grad_norm": 918.1465454101562, "learning_rate": 3.4020202020202026e-06, "loss": 272.7782, "step": 4210 }, { "epoch": 0.03490921123381727, "grad_norm": 1070.0615234375, "learning_rate": 3.41010101010101e-06, "loss": 224.1239, "step": 4220 }, { "epoch": 0.0349919344831865, "grad_norm": 1230.9432373046875, "learning_rate": 3.4181818181818182e-06, "loss": 276.4893, "step": 4230 }, { "epoch": 0.035074657732555733, "grad_norm": 1131.748046875, "learning_rate": 3.4262626262626262e-06, "loss": 217.2287, "step": 4240 }, { "epoch": 0.03515738098192497, "grad_norm": 2427.749267578125, "learning_rate": 3.4343434343434347e-06, "loss": 251.6459, "step": 4250 }, { "epoch": 0.03524010423129421, "grad_norm": 1668.80810546875, "learning_rate": 3.4424242424242427e-06, "loss": 317.6187, "step": 4260 }, { "epoch": 0.03532282748066344, "grad_norm": 372.29510498046875, "learning_rate": 3.4505050505050507e-06, "loss": 179.444, "step": 4270 }, { "epoch": 0.035405550730032674, "grad_norm": 1011.8233032226562, "learning_rate": 3.4585858585858588e-06, "loss": 219.8903, "step": 4280 }, { "epoch": 0.03548827397940191, "grad_norm": 1418.88232421875, "learning_rate": 3.4666666666666672e-06, "loss": 286.1539, "step": 4290 }, { "epoch": 0.03557099722877115, "grad_norm": 2228.23095703125, "learning_rate": 3.4747474747474752e-06, "loss": 293.0923, "step": 4300 }, { "epoch": 0.03565372047814038, "grad_norm": 715.6218872070312, "learning_rate": 3.4828282828282833e-06, "loss": 282.1017, "step": 4310 }, { "epoch": 0.035736443727509615, "grad_norm": 742.7416381835938, "learning_rate": 3.4909090909090913e-06, "loss": 204.9258, "step": 4320 }, { "epoch": 0.035819166976878855, "grad_norm": 1228.9534912109375, "learning_rate": 3.498989898989899e-06, "loss": 259.2133, "step": 4330 }, { "epoch": 0.03590189022624809, "grad_norm": 967.5274047851562, "learning_rate": 3.5070707070707073e-06, "loss": 288.3385, "step": 4340 }, { "epoch": 0.03598461347561732, "grad_norm": 1587.67724609375, "learning_rate": 3.5151515151515154e-06, "loss": 277.6203, "step": 4350 }, { "epoch": 0.036067336724986555, "grad_norm": 1431.6448974609375, "learning_rate": 3.5232323232323234e-06, "loss": 213.064, "step": 4360 }, { "epoch": 0.036150059974355796, "grad_norm": 1135.779052734375, "learning_rate": 3.5313131313131314e-06, "loss": 221.4357, "step": 4370 }, { "epoch": 0.03623278322372503, "grad_norm": 1519.3890380859375, "learning_rate": 3.53939393939394e-06, "loss": 277.6575, "step": 4380 }, { "epoch": 0.03631550647309426, "grad_norm": 1360.25048828125, "learning_rate": 3.547474747474748e-06, "loss": 195.2055, "step": 4390 }, { "epoch": 0.036398229722463496, "grad_norm": 2059.44287109375, "learning_rate": 3.555555555555556e-06, "loss": 324.8405, "step": 4400 }, { "epoch": 0.036480952971832736, "grad_norm": 780.7919921875, "learning_rate": 3.563636363636364e-06, "loss": 212.4319, "step": 4410 }, { "epoch": 0.03656367622120197, "grad_norm": 1413.5367431640625, "learning_rate": 3.5717171717171724e-06, "loss": 255.6041, "step": 4420 }, { "epoch": 0.0366463994705712, "grad_norm": 637.59521484375, "learning_rate": 3.57979797979798e-06, "loss": 193.1592, "step": 4430 }, { "epoch": 0.03672912271994044, "grad_norm": 1240.098876953125, "learning_rate": 3.587878787878788e-06, "loss": 331.8386, "step": 4440 }, { "epoch": 0.03681184596930968, "grad_norm": 980.0120239257812, "learning_rate": 3.595959595959596e-06, "loss": 276.4505, "step": 4450 }, { "epoch": 0.03689456921867891, "grad_norm": 2805.927001953125, "learning_rate": 3.604040404040404e-06, "loss": 269.0381, "step": 4460 }, { "epoch": 0.036977292468048144, "grad_norm": 2359.113525390625, "learning_rate": 3.6121212121212125e-06, "loss": 258.4454, "step": 4470 }, { "epoch": 0.03706001571741738, "grad_norm": 1293.63720703125, "learning_rate": 3.6202020202020205e-06, "loss": 225.2379, "step": 4480 }, { "epoch": 0.03714273896678662, "grad_norm": 2243.848876953125, "learning_rate": 3.6282828282828286e-06, "loss": 213.6965, "step": 4490 }, { "epoch": 0.03722546221615585, "grad_norm": 770.7854614257812, "learning_rate": 3.6363636363636366e-06, "loss": 231.6815, "step": 4500 }, { "epoch": 0.037308185465525084, "grad_norm": 1308.199462890625, "learning_rate": 3.644444444444445e-06, "loss": 232.9941, "step": 4510 }, { "epoch": 0.03739090871489432, "grad_norm": 883.5875244140625, "learning_rate": 3.652525252525253e-06, "loss": 216.6295, "step": 4520 }, { "epoch": 0.03747363196426356, "grad_norm": 672.0443115234375, "learning_rate": 3.660606060606061e-06, "loss": 201.8103, "step": 4530 }, { "epoch": 0.03755635521363279, "grad_norm": 1314.2528076171875, "learning_rate": 3.6686868686868687e-06, "loss": 256.3278, "step": 4540 }, { "epoch": 0.037639078463002025, "grad_norm": 1359.0018310546875, "learning_rate": 3.6767676767676767e-06, "loss": 216.419, "step": 4550 }, { "epoch": 0.037721801712371265, "grad_norm": 1884.207763671875, "learning_rate": 3.684848484848485e-06, "loss": 211.731, "step": 4560 }, { "epoch": 0.0378045249617405, "grad_norm": 1213.398193359375, "learning_rate": 3.692929292929293e-06, "loss": 272.2958, "step": 4570 }, { "epoch": 0.03788724821110973, "grad_norm": 2207.1171875, "learning_rate": 3.701010101010101e-06, "loss": 265.4331, "step": 4580 }, { "epoch": 0.037969971460478966, "grad_norm": 1944.73876953125, "learning_rate": 3.7090909090909092e-06, "loss": 272.1659, "step": 4590 }, { "epoch": 0.038052694709848206, "grad_norm": 1226.4189453125, "learning_rate": 3.7171717171717177e-06, "loss": 226.3827, "step": 4600 }, { "epoch": 0.03813541795921744, "grad_norm": 1100.8829345703125, "learning_rate": 3.7252525252525257e-06, "loss": 221.5738, "step": 4610 }, { "epoch": 0.03821814120858667, "grad_norm": 1859.399169921875, "learning_rate": 3.7333333333333337e-06, "loss": 229.0128, "step": 4620 }, { "epoch": 0.038300864457955906, "grad_norm": 1107.61572265625, "learning_rate": 3.7414141414141418e-06, "loss": 224.0653, "step": 4630 }, { "epoch": 0.03838358770732515, "grad_norm": 1617.0789794921875, "learning_rate": 3.74949494949495e-06, "loss": 240.6666, "step": 4640 }, { "epoch": 0.03846631095669438, "grad_norm": 2554.2158203125, "learning_rate": 3.757575757575758e-06, "loss": 258.9923, "step": 4650 }, { "epoch": 0.038549034206063613, "grad_norm": 3580.094482421875, "learning_rate": 3.765656565656566e-06, "loss": 270.2465, "step": 4660 }, { "epoch": 0.03863175745543285, "grad_norm": 1391.0946044921875, "learning_rate": 3.773737373737374e-06, "loss": 184.8681, "step": 4670 }, { "epoch": 0.03871448070480209, "grad_norm": 1413.2205810546875, "learning_rate": 3.781818181818182e-06, "loss": 210.8876, "step": 4680 }, { "epoch": 0.03879720395417132, "grad_norm": 1123.7315673828125, "learning_rate": 3.7898989898989903e-06, "loss": 226.6469, "step": 4690 }, { "epoch": 0.038879927203540554, "grad_norm": 1507.7408447265625, "learning_rate": 3.7979797979797984e-06, "loss": 243.835, "step": 4700 }, { "epoch": 0.03896265045290979, "grad_norm": 1154.9927978515625, "learning_rate": 3.8060606060606064e-06, "loss": 275.5142, "step": 4710 }, { "epoch": 0.03904537370227903, "grad_norm": 1263.9693603515625, "learning_rate": 3.8141414141414144e-06, "loss": 208.4059, "step": 4720 }, { "epoch": 0.03912809695164826, "grad_norm": 2182.939208984375, "learning_rate": 3.8222222222222224e-06, "loss": 225.735, "step": 4730 }, { "epoch": 0.039210820201017495, "grad_norm": 1238.331298828125, "learning_rate": 3.830303030303031e-06, "loss": 215.773, "step": 4740 }, { "epoch": 0.03929354345038673, "grad_norm": 1922.897216796875, "learning_rate": 3.8383838383838385e-06, "loss": 205.833, "step": 4750 }, { "epoch": 0.03937626669975597, "grad_norm": 1248.5205078125, "learning_rate": 3.846464646464647e-06, "loss": 212.8604, "step": 4760 }, { "epoch": 0.0394589899491252, "grad_norm": 1999.810791015625, "learning_rate": 3.8545454545454545e-06, "loss": 234.7124, "step": 4770 }, { "epoch": 0.039541713198494435, "grad_norm": 1900.56787109375, "learning_rate": 3.862626262626263e-06, "loss": 206.3176, "step": 4780 }, { "epoch": 0.03962443644786367, "grad_norm": 1776.326171875, "learning_rate": 3.8707070707070706e-06, "loss": 235.4771, "step": 4790 }, { "epoch": 0.03970715969723291, "grad_norm": 1206.546630859375, "learning_rate": 3.878787878787879e-06, "loss": 232.6159, "step": 4800 }, { "epoch": 0.03978988294660214, "grad_norm": 1193.0535888671875, "learning_rate": 3.8868686868686875e-06, "loss": 239.4891, "step": 4810 }, { "epoch": 0.039872606195971376, "grad_norm": 1266.81103515625, "learning_rate": 3.894949494949495e-06, "loss": 264.1928, "step": 4820 }, { "epoch": 0.039955329445340616, "grad_norm": 1152.444580078125, "learning_rate": 3.9030303030303035e-06, "loss": 197.2479, "step": 4830 }, { "epoch": 0.04003805269470985, "grad_norm": 1796.5777587890625, "learning_rate": 3.911111111111112e-06, "loss": 232.1052, "step": 4840 }, { "epoch": 0.04012077594407908, "grad_norm": 1190.642822265625, "learning_rate": 3.9191919191919196e-06, "loss": 246.6287, "step": 4850 }, { "epoch": 0.04020349919344832, "grad_norm": 1134.314453125, "learning_rate": 3.927272727272727e-06, "loss": 268.4593, "step": 4860 }, { "epoch": 0.04028622244281756, "grad_norm": 1696.9732666015625, "learning_rate": 3.935353535353536e-06, "loss": 226.5233, "step": 4870 }, { "epoch": 0.04036894569218679, "grad_norm": 1237.491455078125, "learning_rate": 3.943434343434343e-06, "loss": 196.7495, "step": 4880 }, { "epoch": 0.040451668941556024, "grad_norm": 2431.97265625, "learning_rate": 3.951515151515152e-06, "loss": 220.9505, "step": 4890 }, { "epoch": 0.04053439219092526, "grad_norm": 1421.2952880859375, "learning_rate": 3.95959595959596e-06, "loss": 253.9248, "step": 4900 }, { "epoch": 0.0406171154402945, "grad_norm": 1305.2431640625, "learning_rate": 3.967676767676768e-06, "loss": 302.1668, "step": 4910 }, { "epoch": 0.04069983868966373, "grad_norm": 1975.3135986328125, "learning_rate": 3.975757575757576e-06, "loss": 282.536, "step": 4920 }, { "epoch": 0.040782561939032964, "grad_norm": 1655.1588134765625, "learning_rate": 3.983838383838385e-06, "loss": 217.2957, "step": 4930 }, { "epoch": 0.0408652851884022, "grad_norm": 1051.86083984375, "learning_rate": 3.991919191919192e-06, "loss": 240.2383, "step": 4940 }, { "epoch": 0.04094800843777144, "grad_norm": 773.5546875, "learning_rate": 4.000000000000001e-06, "loss": 221.8524, "step": 4950 }, { "epoch": 0.04103073168714067, "grad_norm": 1287.1944580078125, "learning_rate": 4.008080808080808e-06, "loss": 229.3614, "step": 4960 }, { "epoch": 0.041113454936509905, "grad_norm": 1897.0220947265625, "learning_rate": 4.016161616161616e-06, "loss": 233.9596, "step": 4970 }, { "epoch": 0.04119617818587914, "grad_norm": 1463.894287109375, "learning_rate": 4.024242424242424e-06, "loss": 205.995, "step": 4980 }, { "epoch": 0.04127890143524838, "grad_norm": 981.1106567382812, "learning_rate": 4.032323232323233e-06, "loss": 226.2633, "step": 4990 }, { "epoch": 0.04136162468461761, "grad_norm": 3404.24365234375, "learning_rate": 4.04040404040404e-06, "loss": 249.0778, "step": 5000 }, { "epoch": 0.041444347933986846, "grad_norm": 1160.4622802734375, "learning_rate": 4.048484848484849e-06, "loss": 201.7348, "step": 5010 }, { "epoch": 0.04152707118335608, "grad_norm": 1339.532470703125, "learning_rate": 4.056565656565657e-06, "loss": 296.6651, "step": 5020 }, { "epoch": 0.04160979443272532, "grad_norm": 1330.253173828125, "learning_rate": 4.064646464646465e-06, "loss": 222.7661, "step": 5030 }, { "epoch": 0.04169251768209455, "grad_norm": 1871.73095703125, "learning_rate": 4.072727272727273e-06, "loss": 185.6875, "step": 5040 }, { "epoch": 0.041775240931463786, "grad_norm": 989.5170288085938, "learning_rate": 4.080808080808081e-06, "loss": 184.853, "step": 5050 }, { "epoch": 0.04185796418083302, "grad_norm": 1325.712158203125, "learning_rate": 4.088888888888889e-06, "loss": 177.4656, "step": 5060 }, { "epoch": 0.04194068743020226, "grad_norm": 1084.450927734375, "learning_rate": 4.096969696969697e-06, "loss": 218.0059, "step": 5070 }, { "epoch": 0.04202341067957149, "grad_norm": 1230.160888671875, "learning_rate": 4.105050505050505e-06, "loss": 202.8811, "step": 5080 }, { "epoch": 0.04210613392894073, "grad_norm": 909.71728515625, "learning_rate": 4.113131313131313e-06, "loss": 229.2698, "step": 5090 }, { "epoch": 0.04218885717830997, "grad_norm": 2449.3154296875, "learning_rate": 4.1212121212121215e-06, "loss": 211.7621, "step": 5100 }, { "epoch": 0.0422715804276792, "grad_norm": 578.8762817382812, "learning_rate": 4.12929292929293e-06, "loss": 220.8765, "step": 5110 }, { "epoch": 0.042354303677048434, "grad_norm": 1294.6402587890625, "learning_rate": 4.1373737373737375e-06, "loss": 252.3842, "step": 5120 }, { "epoch": 0.04243702692641767, "grad_norm": 911.5681762695312, "learning_rate": 4.145454545454546e-06, "loss": 294.7006, "step": 5130 }, { "epoch": 0.04251975017578691, "grad_norm": 1328.2413330078125, "learning_rate": 4.1535353535353536e-06, "loss": 211.482, "step": 5140 }, { "epoch": 0.04260247342515614, "grad_norm": 1352.004638671875, "learning_rate": 4.161616161616162e-06, "loss": 210.5752, "step": 5150 }, { "epoch": 0.042685196674525375, "grad_norm": 1081.876953125, "learning_rate": 4.1696969696969705e-06, "loss": 224.4739, "step": 5160 }, { "epoch": 0.04276791992389461, "grad_norm": 2295.4033203125, "learning_rate": 4.177777777777778e-06, "loss": 229.6548, "step": 5170 }, { "epoch": 0.04285064317326385, "grad_norm": 1036.4749755859375, "learning_rate": 4.185858585858586e-06, "loss": 208.1364, "step": 5180 }, { "epoch": 0.04293336642263308, "grad_norm": 1558.9718017578125, "learning_rate": 4.193939393939394e-06, "loss": 262.7995, "step": 5190 }, { "epoch": 0.043016089672002315, "grad_norm": 1155.65478515625, "learning_rate": 4.2020202020202026e-06, "loss": 251.0647, "step": 5200 }, { "epoch": 0.04309881292137155, "grad_norm": 1081.0423583984375, "learning_rate": 4.21010101010101e-06, "loss": 235.0402, "step": 5210 }, { "epoch": 0.04318153617074079, "grad_norm": 725.3358764648438, "learning_rate": 4.218181818181819e-06, "loss": 310.5599, "step": 5220 }, { "epoch": 0.04326425942011002, "grad_norm": 1182.2801513671875, "learning_rate": 4.226262626262626e-06, "loss": 178.3826, "step": 5230 }, { "epoch": 0.043346982669479256, "grad_norm": 1413.02587890625, "learning_rate": 4.234343434343435e-06, "loss": 257.1767, "step": 5240 }, { "epoch": 0.04342970591884849, "grad_norm": 1222.1142578125, "learning_rate": 4.242424242424243e-06, "loss": 218.1251, "step": 5250 }, { "epoch": 0.04351242916821773, "grad_norm": 998.1055297851562, "learning_rate": 4.250505050505051e-06, "loss": 211.1936, "step": 5260 }, { "epoch": 0.04359515241758696, "grad_norm": 844.65478515625, "learning_rate": 4.258585858585859e-06, "loss": 235.3665, "step": 5270 }, { "epoch": 0.0436778756669562, "grad_norm": 691.6785278320312, "learning_rate": 4.266666666666668e-06, "loss": 173.3902, "step": 5280 }, { "epoch": 0.04376059891632543, "grad_norm": 1113.0621337890625, "learning_rate": 4.274747474747475e-06, "loss": 169.7838, "step": 5290 }, { "epoch": 0.04384332216569467, "grad_norm": 1622.1474609375, "learning_rate": 4.282828282828283e-06, "loss": 217.8731, "step": 5300 }, { "epoch": 0.043926045415063904, "grad_norm": 1753.8427734375, "learning_rate": 4.290909090909091e-06, "loss": 251.789, "step": 5310 }, { "epoch": 0.04400876866443314, "grad_norm": 918.1004638671875, "learning_rate": 4.298989898989899e-06, "loss": 201.8801, "step": 5320 }, { "epoch": 0.04409149191380238, "grad_norm": 899.498779296875, "learning_rate": 4.307070707070707e-06, "loss": 230.2721, "step": 5330 }, { "epoch": 0.04417421516317161, "grad_norm": 1194.7314453125, "learning_rate": 4.315151515151516e-06, "loss": 302.4559, "step": 5340 }, { "epoch": 0.044256938412540844, "grad_norm": 745.4715576171875, "learning_rate": 4.323232323232323e-06, "loss": 217.5794, "step": 5350 }, { "epoch": 0.04433966166191008, "grad_norm": 3024.5693359375, "learning_rate": 4.331313131313132e-06, "loss": 215.8062, "step": 5360 }, { "epoch": 0.04442238491127932, "grad_norm": 692.5128173828125, "learning_rate": 4.33939393939394e-06, "loss": 206.9033, "step": 5370 }, { "epoch": 0.04450510816064855, "grad_norm": 638.6357421875, "learning_rate": 4.347474747474748e-06, "loss": 277.6233, "step": 5380 }, { "epoch": 0.044587831410017785, "grad_norm": 1237.0350341796875, "learning_rate": 4.3555555555555555e-06, "loss": 202.0422, "step": 5390 }, { "epoch": 0.04467055465938702, "grad_norm": 1546.1956787109375, "learning_rate": 4.363636363636364e-06, "loss": 195.5879, "step": 5400 }, { "epoch": 0.04475327790875626, "grad_norm": 1744.332763671875, "learning_rate": 4.3717171717171715e-06, "loss": 308.2887, "step": 5410 }, { "epoch": 0.04483600115812549, "grad_norm": 1885.436767578125, "learning_rate": 4.37979797979798e-06, "loss": 257.4365, "step": 5420 }, { "epoch": 0.044918724407494726, "grad_norm": 1949.8758544921875, "learning_rate": 4.387878787878788e-06, "loss": 288.0396, "step": 5430 }, { "epoch": 0.04500144765686396, "grad_norm": 2583.996826171875, "learning_rate": 4.395959595959596e-06, "loss": 243.6479, "step": 5440 }, { "epoch": 0.0450841709062332, "grad_norm": 940.0198974609375, "learning_rate": 4.4040404040404044e-06, "loss": 209.4657, "step": 5450 }, { "epoch": 0.04516689415560243, "grad_norm": 897.3545532226562, "learning_rate": 4.412121212121213e-06, "loss": 244.724, "step": 5460 }, { "epoch": 0.045249617404971666, "grad_norm": 1344.112060546875, "learning_rate": 4.4202020202020205e-06, "loss": 210.6839, "step": 5470 }, { "epoch": 0.0453323406543409, "grad_norm": 2638.66357421875, "learning_rate": 4.428282828282829e-06, "loss": 281.9325, "step": 5480 }, { "epoch": 0.04541506390371014, "grad_norm": 1480.95068359375, "learning_rate": 4.436363636363637e-06, "loss": 227.8617, "step": 5490 }, { "epoch": 0.04549778715307937, "grad_norm": 1484.61962890625, "learning_rate": 4.444444444444444e-06, "loss": 307.6048, "step": 5500 }, { "epoch": 0.04558051040244861, "grad_norm": 921.8578491210938, "learning_rate": 4.452525252525253e-06, "loss": 283.7532, "step": 5510 }, { "epoch": 0.04566323365181784, "grad_norm": 1015.0638427734375, "learning_rate": 4.460606060606061e-06, "loss": 232.6935, "step": 5520 }, { "epoch": 0.04574595690118708, "grad_norm": 1154.900146484375, "learning_rate": 4.468686868686869e-06, "loss": 237.65, "step": 5530 }, { "epoch": 0.045828680150556314, "grad_norm": 1067.7034912109375, "learning_rate": 4.476767676767677e-06, "loss": 178.2383, "step": 5540 }, { "epoch": 0.04591140339992555, "grad_norm": 1582.29541015625, "learning_rate": 4.4848484848484855e-06, "loss": 208.2042, "step": 5550 }, { "epoch": 0.04599412664929478, "grad_norm": 1326.168212890625, "learning_rate": 4.492929292929293e-06, "loss": 206.2426, "step": 5560 }, { "epoch": 0.04607684989866402, "grad_norm": 1166.575927734375, "learning_rate": 4.501010101010102e-06, "loss": 246.9012, "step": 5570 }, { "epoch": 0.046159573148033255, "grad_norm": 1051.04345703125, "learning_rate": 4.50909090909091e-06, "loss": 220.1656, "step": 5580 }, { "epoch": 0.04624229639740249, "grad_norm": 994.1414794921875, "learning_rate": 4.517171717171718e-06, "loss": 246.7432, "step": 5590 }, { "epoch": 0.04632501964677173, "grad_norm": 1792.089111328125, "learning_rate": 4.525252525252526e-06, "loss": 195.5538, "step": 5600 }, { "epoch": 0.04640774289614096, "grad_norm": 889.2850341796875, "learning_rate": 4.533333333333334e-06, "loss": 205.7484, "step": 5610 }, { "epoch": 0.046490466145510195, "grad_norm": 986.1232299804688, "learning_rate": 4.541414141414141e-06, "loss": 259.9392, "step": 5620 }, { "epoch": 0.04657318939487943, "grad_norm": 1162.5496826171875, "learning_rate": 4.54949494949495e-06, "loss": 215.2503, "step": 5630 }, { "epoch": 0.04665591264424867, "grad_norm": 1568.9271240234375, "learning_rate": 4.557575757575758e-06, "loss": 198.5209, "step": 5640 }, { "epoch": 0.0467386358936179, "grad_norm": 1163.0565185546875, "learning_rate": 4.565656565656566e-06, "loss": 199.4977, "step": 5650 }, { "epoch": 0.046821359142987136, "grad_norm": 918.8221435546875, "learning_rate": 4.573737373737374e-06, "loss": 292.0952, "step": 5660 }, { "epoch": 0.04690408239235637, "grad_norm": 1420.076171875, "learning_rate": 4.581818181818183e-06, "loss": 273.1453, "step": 5670 }, { "epoch": 0.04698680564172561, "grad_norm": 751.3970947265625, "learning_rate": 4.58989898989899e-06, "loss": 227.5678, "step": 5680 }, { "epoch": 0.04706952889109484, "grad_norm": 4106.84814453125, "learning_rate": 4.597979797979799e-06, "loss": 257.4646, "step": 5690 }, { "epoch": 0.04715225214046408, "grad_norm": 1618.319091796875, "learning_rate": 4.606060606060606e-06, "loss": 242.2784, "step": 5700 }, { "epoch": 0.04723497538983331, "grad_norm": 1682.986083984375, "learning_rate": 4.614141414141414e-06, "loss": 270.8784, "step": 5710 }, { "epoch": 0.04731769863920255, "grad_norm": 2293.774658203125, "learning_rate": 4.622222222222222e-06, "loss": 310.2911, "step": 5720 }, { "epoch": 0.047400421888571784, "grad_norm": 896.1268920898438, "learning_rate": 4.630303030303031e-06, "loss": 262.9586, "step": 5730 }, { "epoch": 0.04748314513794102, "grad_norm": 1522.369384765625, "learning_rate": 4.6383838383838384e-06, "loss": 200.667, "step": 5740 }, { "epoch": 0.04756586838731025, "grad_norm": 1271.126220703125, "learning_rate": 4.646464646464647e-06, "loss": 211.7462, "step": 5750 }, { "epoch": 0.04764859163667949, "grad_norm": 1342.4813232421875, "learning_rate": 4.654545454545455e-06, "loss": 271.1495, "step": 5760 }, { "epoch": 0.047731314886048724, "grad_norm": 1042.1790771484375, "learning_rate": 4.662626262626263e-06, "loss": 207.4777, "step": 5770 }, { "epoch": 0.04781403813541796, "grad_norm": 1017.2898559570312, "learning_rate": 4.670707070707071e-06, "loss": 272.7402, "step": 5780 }, { "epoch": 0.04789676138478719, "grad_norm": 1240.869140625, "learning_rate": 4.678787878787879e-06, "loss": 224.074, "step": 5790 }, { "epoch": 0.04797948463415643, "grad_norm": 1895.0374755859375, "learning_rate": 4.6868686868686874e-06, "loss": 348.4789, "step": 5800 }, { "epoch": 0.048062207883525665, "grad_norm": 1665.503662109375, "learning_rate": 4.694949494949496e-06, "loss": 287.3858, "step": 5810 }, { "epoch": 0.0481449311328949, "grad_norm": 888.2938842773438, "learning_rate": 4.7030303030303035e-06, "loss": 186.7068, "step": 5820 }, { "epoch": 0.04822765438226414, "grad_norm": 1077.885009765625, "learning_rate": 4.711111111111111e-06, "loss": 188.8208, "step": 5830 }, { "epoch": 0.04831037763163337, "grad_norm": 1367.186279296875, "learning_rate": 4.7191919191919195e-06, "loss": 235.7021, "step": 5840 }, { "epoch": 0.048393100881002606, "grad_norm": 880.5682373046875, "learning_rate": 4.727272727272728e-06, "loss": 150.1969, "step": 5850 }, { "epoch": 0.04847582413037184, "grad_norm": 1563.840576171875, "learning_rate": 4.735353535353536e-06, "loss": 213.3092, "step": 5860 }, { "epoch": 0.04855854737974108, "grad_norm": 1928.1456298828125, "learning_rate": 4.743434343434344e-06, "loss": 258.8421, "step": 5870 }, { "epoch": 0.04864127062911031, "grad_norm": 1255.3079833984375, "learning_rate": 4.751515151515152e-06, "loss": 242.0784, "step": 5880 }, { "epoch": 0.048723993878479546, "grad_norm": 1542.299072265625, "learning_rate": 4.75959595959596e-06, "loss": 303.837, "step": 5890 }, { "epoch": 0.04880671712784878, "grad_norm": 2009.2406005859375, "learning_rate": 4.7676767676767685e-06, "loss": 155.5049, "step": 5900 }, { "epoch": 0.04888944037721802, "grad_norm": 1701.25830078125, "learning_rate": 4.775757575757576e-06, "loss": 289.9675, "step": 5910 }, { "epoch": 0.04897216362658725, "grad_norm": 1794.075439453125, "learning_rate": 4.783838383838385e-06, "loss": 206.6406, "step": 5920 }, { "epoch": 0.04905488687595649, "grad_norm": 2225.670654296875, "learning_rate": 4.791919191919192e-06, "loss": 241.3904, "step": 5930 }, { "epoch": 0.04913761012532572, "grad_norm": 1904.87939453125, "learning_rate": 4.800000000000001e-06, "loss": 245.3052, "step": 5940 }, { "epoch": 0.04922033337469496, "grad_norm": 1126.554931640625, "learning_rate": 4.808080808080808e-06, "loss": 188.594, "step": 5950 }, { "epoch": 0.049303056624064194, "grad_norm": 691.1046142578125, "learning_rate": 4.816161616161617e-06, "loss": 248.3712, "step": 5960 }, { "epoch": 0.04938577987343343, "grad_norm": 1169.880859375, "learning_rate": 4.824242424242424e-06, "loss": 245.4409, "step": 5970 }, { "epoch": 0.04946850312280266, "grad_norm": 1042.1029052734375, "learning_rate": 4.832323232323233e-06, "loss": 202.4587, "step": 5980 }, { "epoch": 0.0495512263721719, "grad_norm": 1452.813720703125, "learning_rate": 4.840404040404041e-06, "loss": 257.6912, "step": 5990 }, { "epoch": 0.049633949621541135, "grad_norm": 1247.24609375, "learning_rate": 4.848484848484849e-06, "loss": 210.8361, "step": 6000 }, { "epoch": 0.04971667287091037, "grad_norm": 1635.0625, "learning_rate": 4.856565656565657e-06, "loss": 241.0676, "step": 6010 }, { "epoch": 0.0497993961202796, "grad_norm": 1609.6522216796875, "learning_rate": 4.864646464646466e-06, "loss": 188.2872, "step": 6020 }, { "epoch": 0.04988211936964884, "grad_norm": 1085.433837890625, "learning_rate": 4.872727272727273e-06, "loss": 194.121, "step": 6030 }, { "epoch": 0.049964842619018075, "grad_norm": 1211.4505615234375, "learning_rate": 4.880808080808081e-06, "loss": 237.9093, "step": 6040 }, { "epoch": 0.05004756586838731, "grad_norm": 1017.5370483398438, "learning_rate": 4.888888888888889e-06, "loss": 298.4579, "step": 6050 }, { "epoch": 0.05013028911775654, "grad_norm": 1557.1414794921875, "learning_rate": 4.896969696969697e-06, "loss": 254.7002, "step": 6060 }, { "epoch": 0.05021301236712578, "grad_norm": 966.5237426757812, "learning_rate": 4.905050505050505e-06, "loss": 218.6844, "step": 6070 }, { "epoch": 0.050295735616495016, "grad_norm": 1119.049072265625, "learning_rate": 4.913131313131314e-06, "loss": 185.6555, "step": 6080 }, { "epoch": 0.05037845886586425, "grad_norm": 813.0082397460938, "learning_rate": 4.9212121212121214e-06, "loss": 193.8167, "step": 6090 }, { "epoch": 0.05046118211523349, "grad_norm": 1165.0540771484375, "learning_rate": 4.92929292929293e-06, "loss": 270.3936, "step": 6100 }, { "epoch": 0.05054390536460272, "grad_norm": 774.7178344726562, "learning_rate": 4.937373737373738e-06, "loss": 187.8722, "step": 6110 }, { "epoch": 0.050626628613971957, "grad_norm": 1326.367919921875, "learning_rate": 4.945454545454546e-06, "loss": 243.3256, "step": 6120 }, { "epoch": 0.05070935186334119, "grad_norm": 1486.3304443359375, "learning_rate": 4.953535353535354e-06, "loss": 186.0064, "step": 6130 }, { "epoch": 0.05079207511271043, "grad_norm": 1937.40234375, "learning_rate": 4.961616161616162e-06, "loss": 246.7979, "step": 6140 }, { "epoch": 0.050874798362079664, "grad_norm": 1517.21923828125, "learning_rate": 4.9696969696969696e-06, "loss": 231.4793, "step": 6150 }, { "epoch": 0.0509575216114489, "grad_norm": 5960.52197265625, "learning_rate": 4.977777777777778e-06, "loss": 197.895, "step": 6160 }, { "epoch": 0.05104024486081813, "grad_norm": 1942.2510986328125, "learning_rate": 4.9858585858585865e-06, "loss": 300.5423, "step": 6170 }, { "epoch": 0.05112296811018737, "grad_norm": 1276.1055908203125, "learning_rate": 4.993939393939394e-06, "loss": 288.2531, "step": 6180 }, { "epoch": 0.051205691359556604, "grad_norm": 2154.565673828125, "learning_rate": 5.0020202020202025e-06, "loss": 259.9872, "step": 6190 }, { "epoch": 0.05128841460892584, "grad_norm": 961.3928833007812, "learning_rate": 5.010101010101011e-06, "loss": 177.8174, "step": 6200 }, { "epoch": 0.05137113785829507, "grad_norm": 1438.574462890625, "learning_rate": 5.0181818181818186e-06, "loss": 197.5066, "step": 6210 }, { "epoch": 0.05145386110766431, "grad_norm": 766.4448852539062, "learning_rate": 5.026262626262627e-06, "loss": 257.3491, "step": 6220 }, { "epoch": 0.051536584357033545, "grad_norm": 793.5451049804688, "learning_rate": 5.034343434343435e-06, "loss": 184.9512, "step": 6230 }, { "epoch": 0.05161930760640278, "grad_norm": 869.6885375976562, "learning_rate": 5.042424242424243e-06, "loss": 253.1606, "step": 6240 }, { "epoch": 0.05170203085577201, "grad_norm": 1229.890869140625, "learning_rate": 5.0505050505050515e-06, "loss": 184.6765, "step": 6250 }, { "epoch": 0.05178475410514125, "grad_norm": 804.93994140625, "learning_rate": 5.058585858585859e-06, "loss": 162.1275, "step": 6260 }, { "epoch": 0.051867477354510486, "grad_norm": 1098.615478515625, "learning_rate": 5.0666666666666676e-06, "loss": 174.5565, "step": 6270 }, { "epoch": 0.05195020060387972, "grad_norm": 570.5386352539062, "learning_rate": 5.074747474747476e-06, "loss": 182.8037, "step": 6280 }, { "epoch": 0.05203292385324895, "grad_norm": 3363.004150390625, "learning_rate": 5.082828282828284e-06, "loss": 265.7634, "step": 6290 }, { "epoch": 0.05211564710261819, "grad_norm": 2040.7962646484375, "learning_rate": 5.090909090909091e-06, "loss": 240.1976, "step": 6300 }, { "epoch": 0.052198370351987426, "grad_norm": 1316.55908203125, "learning_rate": 5.098989898989899e-06, "loss": 249.6734, "step": 6310 }, { "epoch": 0.05228109360135666, "grad_norm": 2261.989013671875, "learning_rate": 5.107070707070707e-06, "loss": 208.5467, "step": 6320 }, { "epoch": 0.0523638168507259, "grad_norm": 1246.7120361328125, "learning_rate": 5.115151515151515e-06, "loss": 207.1795, "step": 6330 }, { "epoch": 0.05244654010009513, "grad_norm": 1235.8924560546875, "learning_rate": 5.123232323232323e-06, "loss": 231.5552, "step": 6340 }, { "epoch": 0.05252926334946437, "grad_norm": 805.1367797851562, "learning_rate": 5.131313131313132e-06, "loss": 217.3461, "step": 6350 }, { "epoch": 0.0526119865988336, "grad_norm": 902.8432006835938, "learning_rate": 5.139393939393939e-06, "loss": 250.8667, "step": 6360 }, { "epoch": 0.05269470984820284, "grad_norm": 1314.75634765625, "learning_rate": 5.147474747474748e-06, "loss": 188.1431, "step": 6370 }, { "epoch": 0.052777433097572074, "grad_norm": 1282.649169921875, "learning_rate": 5.155555555555556e-06, "loss": 198.1481, "step": 6380 }, { "epoch": 0.05286015634694131, "grad_norm": 1167.904052734375, "learning_rate": 5.163636363636364e-06, "loss": 192.307, "step": 6390 }, { "epoch": 0.05294287959631054, "grad_norm": 686.8750610351562, "learning_rate": 5.171717171717172e-06, "loss": 225.3385, "step": 6400 }, { "epoch": 0.05302560284567978, "grad_norm": 1144.7125244140625, "learning_rate": 5.17979797979798e-06, "loss": 188.2769, "step": 6410 }, { "epoch": 0.053108326095049015, "grad_norm": 1010.5237426757812, "learning_rate": 5.187878787878788e-06, "loss": 225.7106, "step": 6420 }, { "epoch": 0.05319104934441825, "grad_norm": 1336.3463134765625, "learning_rate": 5.195959595959597e-06, "loss": 208.7707, "step": 6430 }, { "epoch": 0.05327377259378748, "grad_norm": 1576.3480224609375, "learning_rate": 5.204040404040404e-06, "loss": 236.1072, "step": 6440 }, { "epoch": 0.05335649584315672, "grad_norm": 1445.076416015625, "learning_rate": 5.212121212121213e-06, "loss": 271.4749, "step": 6450 }, { "epoch": 0.053439219092525955, "grad_norm": 895.9810791015625, "learning_rate": 5.220202020202021e-06, "loss": 258.855, "step": 6460 }, { "epoch": 0.05352194234189519, "grad_norm": 952.8675537109375, "learning_rate": 5.228282828282829e-06, "loss": 175.3387, "step": 6470 }, { "epoch": 0.05360466559126442, "grad_norm": 1066.5716552734375, "learning_rate": 5.236363636363637e-06, "loss": 218.2292, "step": 6480 }, { "epoch": 0.05368738884063366, "grad_norm": 849.3695678710938, "learning_rate": 5.244444444444445e-06, "loss": 221.0137, "step": 6490 }, { "epoch": 0.053770112090002896, "grad_norm": 1675.2149658203125, "learning_rate": 5.252525252525253e-06, "loss": 223.2969, "step": 6500 }, { "epoch": 0.05385283533937213, "grad_norm": 1693.541748046875, "learning_rate": 5.26060606060606e-06, "loss": 229.6179, "step": 6510 }, { "epoch": 0.05393555858874136, "grad_norm": 1324.1651611328125, "learning_rate": 5.268686868686869e-06, "loss": 244.8779, "step": 6520 }, { "epoch": 0.0540182818381106, "grad_norm": 1403.1368408203125, "learning_rate": 5.276767676767677e-06, "loss": 268.8457, "step": 6530 }, { "epoch": 0.054101005087479836, "grad_norm": 1165.638671875, "learning_rate": 5.284848484848485e-06, "loss": 239.8147, "step": 6540 }, { "epoch": 0.05418372833684907, "grad_norm": 995.2945556640625, "learning_rate": 5.292929292929293e-06, "loss": 229.9961, "step": 6550 }, { "epoch": 0.0542664515862183, "grad_norm": 1505.5985107421875, "learning_rate": 5.3010101010101016e-06, "loss": 223.1589, "step": 6560 }, { "epoch": 0.054349174835587544, "grad_norm": 1667.1300048828125, "learning_rate": 5.309090909090909e-06, "loss": 217.6828, "step": 6570 }, { "epoch": 0.05443189808495678, "grad_norm": 1639.0364990234375, "learning_rate": 5.317171717171718e-06, "loss": 205.0005, "step": 6580 }, { "epoch": 0.05451462133432601, "grad_norm": 1414.21240234375, "learning_rate": 5.325252525252525e-06, "loss": 215.9356, "step": 6590 }, { "epoch": 0.05459734458369525, "grad_norm": 586.2081298828125, "learning_rate": 5.333333333333334e-06, "loss": 227.373, "step": 6600 }, { "epoch": 0.054680067833064484, "grad_norm": 2097.269775390625, "learning_rate": 5.341414141414142e-06, "loss": 234.4245, "step": 6610 }, { "epoch": 0.05476279108243372, "grad_norm": 1416.9189453125, "learning_rate": 5.34949494949495e-06, "loss": 262.4427, "step": 6620 }, { "epoch": 0.05484551433180295, "grad_norm": 1144.749755859375, "learning_rate": 5.357575757575758e-06, "loss": 201.3394, "step": 6630 }, { "epoch": 0.05492823758117219, "grad_norm": 1287.0030517578125, "learning_rate": 5.365656565656567e-06, "loss": 281.6339, "step": 6640 }, { "epoch": 0.055010960830541425, "grad_norm": 1555.04150390625, "learning_rate": 5.373737373737374e-06, "loss": 225.9727, "step": 6650 }, { "epoch": 0.05509368407991066, "grad_norm": 1129.1629638671875, "learning_rate": 5.381818181818183e-06, "loss": 159.6996, "step": 6660 }, { "epoch": 0.05517640732927989, "grad_norm": 1159.3424072265625, "learning_rate": 5.38989898989899e-06, "loss": 226.1631, "step": 6670 }, { "epoch": 0.05525913057864913, "grad_norm": 1065.8494873046875, "learning_rate": 5.397979797979799e-06, "loss": 213.2366, "step": 6680 }, { "epoch": 0.055341853828018366, "grad_norm": 801.6089477539062, "learning_rate": 5.406060606060607e-06, "loss": 180.9512, "step": 6690 }, { "epoch": 0.0554245770773876, "grad_norm": 1254.9515380859375, "learning_rate": 5.414141414141415e-06, "loss": 223.2975, "step": 6700 }, { "epoch": 0.05550730032675683, "grad_norm": 1734.849609375, "learning_rate": 5.422222222222223e-06, "loss": 179.4333, "step": 6710 }, { "epoch": 0.05559002357612607, "grad_norm": 1987.9564208984375, "learning_rate": 5.430303030303032e-06, "loss": 248.2462, "step": 6720 }, { "epoch": 0.055672746825495306, "grad_norm": 1181.2037353515625, "learning_rate": 5.438383838383838e-06, "loss": 182.878, "step": 6730 }, { "epoch": 0.05575547007486454, "grad_norm": 1901.66015625, "learning_rate": 5.446464646464647e-06, "loss": 251.8006, "step": 6740 }, { "epoch": 0.05583819332423377, "grad_norm": 632.8333740234375, "learning_rate": 5.4545454545454545e-06, "loss": 196.6042, "step": 6750 }, { "epoch": 0.05592091657360301, "grad_norm": 604.5985717773438, "learning_rate": 5.462626262626263e-06, "loss": 177.947, "step": 6760 }, { "epoch": 0.05600363982297225, "grad_norm": 1062.9954833984375, "learning_rate": 5.4707070707070705e-06, "loss": 312.2176, "step": 6770 }, { "epoch": 0.05608636307234148, "grad_norm": 1964.1070556640625, "learning_rate": 5.478787878787879e-06, "loss": 192.9068, "step": 6780 }, { "epoch": 0.056169086321710714, "grad_norm": 1671.50146484375, "learning_rate": 5.486868686868687e-06, "loss": 236.6793, "step": 6790 }, { "epoch": 0.056251809571079954, "grad_norm": 1363.7489013671875, "learning_rate": 5.494949494949495e-06, "loss": 207.5258, "step": 6800 }, { "epoch": 0.05633453282044919, "grad_norm": 1217.836669921875, "learning_rate": 5.5030303030303034e-06, "loss": 185.7013, "step": 6810 }, { "epoch": 0.05641725606981842, "grad_norm": 1585.8988037109375, "learning_rate": 5.511111111111112e-06, "loss": 217.5803, "step": 6820 }, { "epoch": 0.056499979319187654, "grad_norm": 983.3916625976562, "learning_rate": 5.5191919191919195e-06, "loss": 224.2803, "step": 6830 }, { "epoch": 0.056582702568556895, "grad_norm": 1328.7664794921875, "learning_rate": 5.527272727272728e-06, "loss": 213.4681, "step": 6840 }, { "epoch": 0.05666542581792613, "grad_norm": 908.5962524414062, "learning_rate": 5.5353535353535355e-06, "loss": 191.1099, "step": 6850 }, { "epoch": 0.05674814906729536, "grad_norm": 965.225341796875, "learning_rate": 5.543434343434344e-06, "loss": 180.1947, "step": 6860 }, { "epoch": 0.0568308723166646, "grad_norm": 1269.7061767578125, "learning_rate": 5.5515151515151524e-06, "loss": 235.1135, "step": 6870 }, { "epoch": 0.056913595566033835, "grad_norm": 1158.512939453125, "learning_rate": 5.55959595959596e-06, "loss": 215.2743, "step": 6880 }, { "epoch": 0.05699631881540307, "grad_norm": 1231.1844482421875, "learning_rate": 5.5676767676767685e-06, "loss": 327.6613, "step": 6890 }, { "epoch": 0.0570790420647723, "grad_norm": 1180.1192626953125, "learning_rate": 5.575757575757577e-06, "loss": 228.0458, "step": 6900 }, { "epoch": 0.05716176531414154, "grad_norm": 952.682861328125, "learning_rate": 5.5838383838383845e-06, "loss": 250.6257, "step": 6910 }, { "epoch": 0.057244488563510776, "grad_norm": 16013.0146484375, "learning_rate": 5.591919191919193e-06, "loss": 250.4285, "step": 6920 }, { "epoch": 0.05732721181288001, "grad_norm": 1266.31787109375, "learning_rate": 5.600000000000001e-06, "loss": 213.8897, "step": 6930 }, { "epoch": 0.05740993506224924, "grad_norm": 1832.4088134765625, "learning_rate": 5.608080808080808e-06, "loss": 218.5001, "step": 6940 }, { "epoch": 0.05749265831161848, "grad_norm": 1213.3984375, "learning_rate": 5.616161616161616e-06, "loss": 211.7139, "step": 6950 }, { "epoch": 0.057575381560987716, "grad_norm": 1369.9437255859375, "learning_rate": 5.624242424242424e-06, "loss": 192.9413, "step": 6960 }, { "epoch": 0.05765810481035695, "grad_norm": 1018.5819091796875, "learning_rate": 5.632323232323233e-06, "loss": 150.2073, "step": 6970 }, { "epoch": 0.05774082805972618, "grad_norm": 1435.1353759765625, "learning_rate": 5.64040404040404e-06, "loss": 187.5981, "step": 6980 }, { "epoch": 0.057823551309095424, "grad_norm": 1339.41650390625, "learning_rate": 5.648484848484849e-06, "loss": 165.7803, "step": 6990 }, { "epoch": 0.05790627455846466, "grad_norm": 3150.48974609375, "learning_rate": 5.656565656565657e-06, "loss": 183.762, "step": 7000 }, { "epoch": 0.05798899780783389, "grad_norm": 1491.4512939453125, "learning_rate": 5.664646464646465e-06, "loss": 205.812, "step": 7010 }, { "epoch": 0.058071721057203124, "grad_norm": 1045.4873046875, "learning_rate": 5.672727272727273e-06, "loss": 222.1431, "step": 7020 }, { "epoch": 0.058154444306572364, "grad_norm": 1604.70556640625, "learning_rate": 5.680808080808081e-06, "loss": 312.9387, "step": 7030 }, { "epoch": 0.0582371675559416, "grad_norm": 794.0164184570312, "learning_rate": 5.688888888888889e-06, "loss": 148.5473, "step": 7040 }, { "epoch": 0.05831989080531083, "grad_norm": 1710.36328125, "learning_rate": 5.696969696969698e-06, "loss": 187.5921, "step": 7050 }, { "epoch": 0.058402614054680065, "grad_norm": 1974.2613525390625, "learning_rate": 5.705050505050505e-06, "loss": 192.2352, "step": 7060 }, { "epoch": 0.058485337304049305, "grad_norm": 1533.34619140625, "learning_rate": 5.713131313131314e-06, "loss": 189.592, "step": 7070 }, { "epoch": 0.05856806055341854, "grad_norm": 1127.27392578125, "learning_rate": 5.721212121212122e-06, "loss": 211.6275, "step": 7080 }, { "epoch": 0.05865078380278777, "grad_norm": 1902.27001953125, "learning_rate": 5.72929292929293e-06, "loss": 189.9227, "step": 7090 }, { "epoch": 0.05873350705215701, "grad_norm": 1564.533203125, "learning_rate": 5.737373737373738e-06, "loss": 231.5597, "step": 7100 }, { "epoch": 0.058816230301526246, "grad_norm": 2943.590576171875, "learning_rate": 5.745454545454546e-06, "loss": 177.4852, "step": 7110 }, { "epoch": 0.05889895355089548, "grad_norm": 1309.1622314453125, "learning_rate": 5.753535353535354e-06, "loss": 165.6322, "step": 7120 }, { "epoch": 0.05898167680026471, "grad_norm": 1501.813720703125, "learning_rate": 5.761616161616163e-06, "loss": 189.5367, "step": 7130 }, { "epoch": 0.05906440004963395, "grad_norm": 1574.177978515625, "learning_rate": 5.76969696969697e-06, "loss": 234.3292, "step": 7140 }, { "epoch": 0.059147123299003186, "grad_norm": 1052.1170654296875, "learning_rate": 5.777777777777778e-06, "loss": 214.4601, "step": 7150 }, { "epoch": 0.05922984654837242, "grad_norm": 1018.6898803710938, "learning_rate": 5.785858585858586e-06, "loss": 156.9678, "step": 7160 }, { "epoch": 0.05931256979774165, "grad_norm": 1917.61865234375, "learning_rate": 5.793939393939394e-06, "loss": 233.6657, "step": 7170 }, { "epoch": 0.05939529304711089, "grad_norm": 1654.3441162109375, "learning_rate": 5.8020202020202025e-06, "loss": 173.1022, "step": 7180 }, { "epoch": 0.05947801629648013, "grad_norm": 1147.4951171875, "learning_rate": 5.81010101010101e-06, "loss": 209.7125, "step": 7190 }, { "epoch": 0.05956073954584936, "grad_norm": 1505.357177734375, "learning_rate": 5.8181818181818185e-06, "loss": 239.9892, "step": 7200 }, { "epoch": 0.059643462795218594, "grad_norm": 3524.7958984375, "learning_rate": 5.826262626262626e-06, "loss": 199.2161, "step": 7210 }, { "epoch": 0.059726186044587834, "grad_norm": 2917.4033203125, "learning_rate": 5.834343434343435e-06, "loss": 282.2665, "step": 7220 }, { "epoch": 0.05980890929395707, "grad_norm": 976.106689453125, "learning_rate": 5.842424242424243e-06, "loss": 220.0685, "step": 7230 }, { "epoch": 0.0598916325433263, "grad_norm": 1532.435791015625, "learning_rate": 5.850505050505051e-06, "loss": 247.5289, "step": 7240 }, { "epoch": 0.059974355792695534, "grad_norm": 868.5018920898438, "learning_rate": 5.858585858585859e-06, "loss": 204.2799, "step": 7250 }, { "epoch": 0.060057079042064775, "grad_norm": 1144.6495361328125, "learning_rate": 5.8666666666666675e-06, "loss": 233.1151, "step": 7260 }, { "epoch": 0.06013980229143401, "grad_norm": 1262.310302734375, "learning_rate": 5.874747474747475e-06, "loss": 215.0127, "step": 7270 }, { "epoch": 0.06022252554080324, "grad_norm": 1465.82568359375, "learning_rate": 5.882828282828284e-06, "loss": 214.6379, "step": 7280 }, { "epoch": 0.060305248790172475, "grad_norm": 890.1478271484375, "learning_rate": 5.890909090909091e-06, "loss": 212.3409, "step": 7290 }, { "epoch": 0.060387972039541715, "grad_norm": 804.8695068359375, "learning_rate": 5.8989898989899e-06, "loss": 161.0226, "step": 7300 }, { "epoch": 0.06047069528891095, "grad_norm": 1049.8092041015625, "learning_rate": 5.907070707070708e-06, "loss": 228.0541, "step": 7310 }, { "epoch": 0.06055341853828018, "grad_norm": 1314.708984375, "learning_rate": 5.915151515151516e-06, "loss": 169.0287, "step": 7320 }, { "epoch": 0.060636141787649415, "grad_norm": 877.56982421875, "learning_rate": 5.923232323232324e-06, "loss": 182.722, "step": 7330 }, { "epoch": 0.060718865037018656, "grad_norm": 1187.692138671875, "learning_rate": 5.9313131313131326e-06, "loss": 216.9126, "step": 7340 }, { "epoch": 0.06080158828638789, "grad_norm": 910.7957763671875, "learning_rate": 5.93939393939394e-06, "loss": 156.4217, "step": 7350 }, { "epoch": 0.06088431153575712, "grad_norm": 937.8931274414062, "learning_rate": 5.947474747474749e-06, "loss": 206.2811, "step": 7360 }, { "epoch": 0.06096703478512636, "grad_norm": 1068.356201171875, "learning_rate": 5.955555555555555e-06, "loss": 209.255, "step": 7370 }, { "epoch": 0.061049758034495596, "grad_norm": 1415.2979736328125, "learning_rate": 5.963636363636364e-06, "loss": 180.4899, "step": 7380 }, { "epoch": 0.06113248128386483, "grad_norm": 1216.490966796875, "learning_rate": 5.9717171717171714e-06, "loss": 181.2195, "step": 7390 }, { "epoch": 0.06121520453323406, "grad_norm": 1969.770751953125, "learning_rate": 5.97979797979798e-06, "loss": 185.2952, "step": 7400 }, { "epoch": 0.061297927782603304, "grad_norm": 1073.1572265625, "learning_rate": 5.987878787878788e-06, "loss": 194.6978, "step": 7410 }, { "epoch": 0.06138065103197254, "grad_norm": 1329.2269287109375, "learning_rate": 5.995959595959596e-06, "loss": 225.0991, "step": 7420 }, { "epoch": 0.06146337428134177, "grad_norm": 1447.0704345703125, "learning_rate": 6.004040404040404e-06, "loss": 222.2881, "step": 7430 }, { "epoch": 0.061546097530711004, "grad_norm": 1847.17578125, "learning_rate": 6.012121212121213e-06, "loss": 250.8386, "step": 7440 }, { "epoch": 0.061628820780080244, "grad_norm": 1011.4767456054688, "learning_rate": 6.0202020202020204e-06, "loss": 217.8489, "step": 7450 }, { "epoch": 0.06171154402944948, "grad_norm": 905.0313110351562, "learning_rate": 6.028282828282829e-06, "loss": 176.8829, "step": 7460 }, { "epoch": 0.06179426727881871, "grad_norm": 1627.3341064453125, "learning_rate": 6.0363636363636365e-06, "loss": 245.858, "step": 7470 }, { "epoch": 0.061876990528187945, "grad_norm": 1333.339111328125, "learning_rate": 6.044444444444445e-06, "loss": 172.512, "step": 7480 }, { "epoch": 0.061959713777557185, "grad_norm": 981.724609375, "learning_rate": 6.052525252525253e-06, "loss": 188.6218, "step": 7490 }, { "epoch": 0.06204243702692642, "grad_norm": 963.2952880859375, "learning_rate": 6.060606060606061e-06, "loss": 200.9197, "step": 7500 }, { "epoch": 0.06212516027629565, "grad_norm": 1976.6124267578125, "learning_rate": 6.068686868686869e-06, "loss": 206.6058, "step": 7510 }, { "epoch": 0.062207883525664885, "grad_norm": 964.7189331054688, "learning_rate": 6.076767676767678e-06, "loss": 244.5135, "step": 7520 }, { "epoch": 0.062290606775034126, "grad_norm": 940.2135009765625, "learning_rate": 6.0848484848484855e-06, "loss": 286.4893, "step": 7530 }, { "epoch": 0.06237333002440336, "grad_norm": 1086.095947265625, "learning_rate": 6.092929292929294e-06, "loss": 166.7001, "step": 7540 }, { "epoch": 0.06245605327377259, "grad_norm": 824.2728881835938, "learning_rate": 6.1010101010101015e-06, "loss": 178.9928, "step": 7550 }, { "epoch": 0.06253877652314183, "grad_norm": 1227.130859375, "learning_rate": 6.10909090909091e-06, "loss": 188.424, "step": 7560 }, { "epoch": 0.06262149977251107, "grad_norm": 943.979736328125, "learning_rate": 6.117171717171718e-06, "loss": 170.1881, "step": 7570 }, { "epoch": 0.06270422302188029, "grad_norm": 893.2919921875, "learning_rate": 6.125252525252525e-06, "loss": 203.6881, "step": 7580 }, { "epoch": 0.06278694627124953, "grad_norm": 1502.2542724609375, "learning_rate": 6.133333333333334e-06, "loss": 227.4439, "step": 7590 }, { "epoch": 0.06286966952061877, "grad_norm": 2379.823974609375, "learning_rate": 6.141414141414141e-06, "loss": 216.3211, "step": 7600 }, { "epoch": 0.062952392769988, "grad_norm": 2946.320068359375, "learning_rate": 6.14949494949495e-06, "loss": 181.0182, "step": 7610 }, { "epoch": 0.06303511601935724, "grad_norm": 1127.286865234375, "learning_rate": 6.157575757575758e-06, "loss": 219.9298, "step": 7620 }, { "epoch": 0.06311783926872648, "grad_norm": 2195.520751953125, "learning_rate": 6.165656565656566e-06, "loss": 225.9704, "step": 7630 }, { "epoch": 0.06320056251809571, "grad_norm": 1115.85107421875, "learning_rate": 6.173737373737374e-06, "loss": 192.5431, "step": 7640 }, { "epoch": 0.06328328576746495, "grad_norm": 1610.82861328125, "learning_rate": 6.181818181818182e-06, "loss": 234.5798, "step": 7650 }, { "epoch": 0.06336600901683419, "grad_norm": 2228.57080078125, "learning_rate": 6.18989898989899e-06, "loss": 203.8422, "step": 7660 }, { "epoch": 0.06344873226620341, "grad_norm": 1072.0853271484375, "learning_rate": 6.197979797979799e-06, "loss": 136.4943, "step": 7670 }, { "epoch": 0.06353145551557265, "grad_norm": 987.9502563476562, "learning_rate": 6.206060606060606e-06, "loss": 176.214, "step": 7680 }, { "epoch": 0.06361417876494188, "grad_norm": 1846.4237060546875, "learning_rate": 6.214141414141415e-06, "loss": 328.0838, "step": 7690 }, { "epoch": 0.06369690201431112, "grad_norm": 1142.6070556640625, "learning_rate": 6.222222222222223e-06, "loss": 197.7707, "step": 7700 }, { "epoch": 0.06377962526368036, "grad_norm": 2267.01904296875, "learning_rate": 6.230303030303031e-06, "loss": 246.8396, "step": 7710 }, { "epoch": 0.06386234851304959, "grad_norm": 1803.31591796875, "learning_rate": 6.238383838383839e-06, "loss": 156.6524, "step": 7720 }, { "epoch": 0.06394507176241883, "grad_norm": 1524.9769287109375, "learning_rate": 6.246464646464647e-06, "loss": 252.2839, "step": 7730 }, { "epoch": 0.06402779501178807, "grad_norm": 1911.2991943359375, "learning_rate": 6.254545454545455e-06, "loss": 185.8043, "step": 7740 }, { "epoch": 0.0641105182611573, "grad_norm": 1456.3634033203125, "learning_rate": 6.262626262626264e-06, "loss": 189.6922, "step": 7750 }, { "epoch": 0.06419324151052654, "grad_norm": 872.6129150390625, "learning_rate": 6.270707070707071e-06, "loss": 211.5073, "step": 7760 }, { "epoch": 0.06427596475989576, "grad_norm": 2161.432861328125, "learning_rate": 6.27878787878788e-06, "loss": 227.991, "step": 7770 }, { "epoch": 0.064358688009265, "grad_norm": 1427.72900390625, "learning_rate": 6.286868686868688e-06, "loss": 187.7627, "step": 7780 }, { "epoch": 0.06444141125863424, "grad_norm": 1048.0509033203125, "learning_rate": 6.294949494949495e-06, "loss": 216.9241, "step": 7790 }, { "epoch": 0.06452413450800347, "grad_norm": 831.1154174804688, "learning_rate": 6.303030303030303e-06, "loss": 156.4739, "step": 7800 }, { "epoch": 0.06460685775737271, "grad_norm": 1421.5413818359375, "learning_rate": 6.311111111111111e-06, "loss": 240.7841, "step": 7810 }, { "epoch": 0.06468958100674195, "grad_norm": 1609.0704345703125, "learning_rate": 6.3191919191919195e-06, "loss": 202.0292, "step": 7820 }, { "epoch": 0.06477230425611118, "grad_norm": 1056.16064453125, "learning_rate": 6.327272727272727e-06, "loss": 150.6357, "step": 7830 }, { "epoch": 0.06485502750548042, "grad_norm": 3895.295166015625, "learning_rate": 6.3353535353535355e-06, "loss": 166.0538, "step": 7840 }, { "epoch": 0.06493775075484966, "grad_norm": 1299.444580078125, "learning_rate": 6.343434343434344e-06, "loss": 160.5584, "step": 7850 }, { "epoch": 0.06502047400421888, "grad_norm": 712.9761352539062, "learning_rate": 6.3515151515151516e-06, "loss": 197.3979, "step": 7860 }, { "epoch": 0.06510319725358812, "grad_norm": 1037.716064453125, "learning_rate": 6.35959595959596e-06, "loss": 200.6918, "step": 7870 }, { "epoch": 0.06518592050295735, "grad_norm": 1494.057373046875, "learning_rate": 6.3676767676767685e-06, "loss": 238.1615, "step": 7880 }, { "epoch": 0.06526864375232659, "grad_norm": 1253.9837646484375, "learning_rate": 6.375757575757576e-06, "loss": 190.7486, "step": 7890 }, { "epoch": 0.06535136700169583, "grad_norm": 1362.12353515625, "learning_rate": 6.3838383838383845e-06, "loss": 214.7477, "step": 7900 }, { "epoch": 0.06543409025106506, "grad_norm": 890.4193725585938, "learning_rate": 6.391919191919192e-06, "loss": 170.1652, "step": 7910 }, { "epoch": 0.0655168135004343, "grad_norm": 1644.9490966796875, "learning_rate": 6.4000000000000006e-06, "loss": 175.0597, "step": 7920 }, { "epoch": 0.06559953674980354, "grad_norm": 777.9044799804688, "learning_rate": 6.408080808080809e-06, "loss": 238.5318, "step": 7930 }, { "epoch": 0.06568225999917277, "grad_norm": 1498.41455078125, "learning_rate": 6.416161616161617e-06, "loss": 183.5011, "step": 7940 }, { "epoch": 0.065764983248542, "grad_norm": 1392.91748046875, "learning_rate": 6.424242424242425e-06, "loss": 164.5937, "step": 7950 }, { "epoch": 0.06584770649791123, "grad_norm": 1324.7021484375, "learning_rate": 6.4323232323232335e-06, "loss": 230.2011, "step": 7960 }, { "epoch": 0.06593042974728047, "grad_norm": 1371.1768798828125, "learning_rate": 6.440404040404041e-06, "loss": 226.2801, "step": 7970 }, { "epoch": 0.06601315299664971, "grad_norm": 2229.455078125, "learning_rate": 6.4484848484848496e-06, "loss": 196.2276, "step": 7980 }, { "epoch": 0.06609587624601894, "grad_norm": 1298.6649169921875, "learning_rate": 6.456565656565658e-06, "loss": 211.2707, "step": 7990 }, { "epoch": 0.06617859949538818, "grad_norm": 784.8013305664062, "learning_rate": 6.464646464646466e-06, "loss": 207.776, "step": 8000 }, { "epoch": 0.06626132274475742, "grad_norm": 911.982666015625, "learning_rate": 6.472727272727272e-06, "loss": 162.3888, "step": 8010 }, { "epoch": 0.06634404599412665, "grad_norm": 1066.7635498046875, "learning_rate": 6.480808080808081e-06, "loss": 182.5109, "step": 8020 }, { "epoch": 0.06642676924349589, "grad_norm": 733.5493774414062, "learning_rate": 6.488888888888889e-06, "loss": 140.7046, "step": 8030 }, { "epoch": 0.06650949249286511, "grad_norm": 1074.85888671875, "learning_rate": 6.496969696969697e-06, "loss": 182.7351, "step": 8040 }, { "epoch": 0.06659221574223435, "grad_norm": 1245.099365234375, "learning_rate": 6.505050505050505e-06, "loss": 228.4951, "step": 8050 }, { "epoch": 0.0666749389916036, "grad_norm": 820.326171875, "learning_rate": 6.513131313131314e-06, "loss": 172.91, "step": 8060 }, { "epoch": 0.06675766224097282, "grad_norm": 1347.561279296875, "learning_rate": 6.521212121212121e-06, "loss": 175.6294, "step": 8070 }, { "epoch": 0.06684038549034206, "grad_norm": 2743.030517578125, "learning_rate": 6.52929292929293e-06, "loss": 240.5082, "step": 8080 }, { "epoch": 0.0669231087397113, "grad_norm": 1014.7551879882812, "learning_rate": 6.537373737373737e-06, "loss": 181.1287, "step": 8090 }, { "epoch": 0.06700583198908053, "grad_norm": 1014.5245971679688, "learning_rate": 6.545454545454546e-06, "loss": 189.7482, "step": 8100 }, { "epoch": 0.06708855523844977, "grad_norm": 778.7886962890625, "learning_rate": 6.553535353535354e-06, "loss": 170.7764, "step": 8110 }, { "epoch": 0.06717127848781901, "grad_norm": 2081.901611328125, "learning_rate": 6.561616161616162e-06, "loss": 245.8872, "step": 8120 }, { "epoch": 0.06725400173718823, "grad_norm": 1881.017333984375, "learning_rate": 6.56969696969697e-06, "loss": 252.341, "step": 8130 }, { "epoch": 0.06733672498655748, "grad_norm": 910.3214111328125, "learning_rate": 6.577777777777779e-06, "loss": 179.5906, "step": 8140 }, { "epoch": 0.0674194482359267, "grad_norm": 1163.5965576171875, "learning_rate": 6.585858585858586e-06, "loss": 233.8698, "step": 8150 }, { "epoch": 0.06750217148529594, "grad_norm": 884.6065673828125, "learning_rate": 6.593939393939395e-06, "loss": 139.5692, "step": 8160 }, { "epoch": 0.06758489473466518, "grad_norm": 1140.02392578125, "learning_rate": 6.602020202020203e-06, "loss": 195.1031, "step": 8170 }, { "epoch": 0.06766761798403441, "grad_norm": 1378.050537109375, "learning_rate": 6.610101010101011e-06, "loss": 215.5058, "step": 8180 }, { "epoch": 0.06775034123340365, "grad_norm": 1974.1138916015625, "learning_rate": 6.618181818181819e-06, "loss": 294.2934, "step": 8190 }, { "epoch": 0.06783306448277289, "grad_norm": 832.8023681640625, "learning_rate": 6.626262626262627e-06, "loss": 184.4631, "step": 8200 }, { "epoch": 0.06791578773214212, "grad_norm": 856.9111938476562, "learning_rate": 6.634343434343435e-06, "loss": 170.4937, "step": 8210 }, { "epoch": 0.06799851098151136, "grad_norm": 916.113525390625, "learning_rate": 6.642424242424242e-06, "loss": 170.3063, "step": 8220 }, { "epoch": 0.06808123423088058, "grad_norm": 1267.204345703125, "learning_rate": 6.650505050505051e-06, "loss": 154.5532, "step": 8230 }, { "epoch": 0.06816395748024982, "grad_norm": 1163.3558349609375, "learning_rate": 6.658585858585859e-06, "loss": 196.9229, "step": 8240 }, { "epoch": 0.06824668072961906, "grad_norm": 1783.79541015625, "learning_rate": 6.666666666666667e-06, "loss": 210.5392, "step": 8250 }, { "epoch": 0.06832940397898829, "grad_norm": 917.7781982421875, "learning_rate": 6.674747474747475e-06, "loss": 177.4379, "step": 8260 }, { "epoch": 0.06841212722835753, "grad_norm": 674.7391967773438, "learning_rate": 6.682828282828283e-06, "loss": 186.2342, "step": 8270 }, { "epoch": 0.06849485047772677, "grad_norm": 654.15234375, "learning_rate": 6.690909090909091e-06, "loss": 136.4612, "step": 8280 }, { "epoch": 0.068577573727096, "grad_norm": 1094.0560302734375, "learning_rate": 6.6989898989899e-06, "loss": 170.5708, "step": 8290 }, { "epoch": 0.06866029697646524, "grad_norm": 1474.585693359375, "learning_rate": 6.707070707070707e-06, "loss": 203.4748, "step": 8300 }, { "epoch": 0.06874302022583446, "grad_norm": 1227.391845703125, "learning_rate": 6.715151515151516e-06, "loss": 222.0174, "step": 8310 }, { "epoch": 0.0688257434752037, "grad_norm": 1838.2691650390625, "learning_rate": 6.723232323232324e-06, "loss": 224.6444, "step": 8320 }, { "epoch": 0.06890846672457294, "grad_norm": 1169.060791015625, "learning_rate": 6.731313131313132e-06, "loss": 142.1164, "step": 8330 }, { "epoch": 0.06899118997394217, "grad_norm": 547.6967163085938, "learning_rate": 6.73939393939394e-06, "loss": 153.1282, "step": 8340 }, { "epoch": 0.06907391322331141, "grad_norm": 1083.063720703125, "learning_rate": 6.747474747474749e-06, "loss": 184.0296, "step": 8350 }, { "epoch": 0.06915663647268065, "grad_norm": 5596.943359375, "learning_rate": 6.755555555555556e-06, "loss": 173.6666, "step": 8360 }, { "epoch": 0.06923935972204988, "grad_norm": 1326.8721923828125, "learning_rate": 6.763636363636365e-06, "loss": 190.1673, "step": 8370 }, { "epoch": 0.06932208297141912, "grad_norm": 1101.3780517578125, "learning_rate": 6.771717171717172e-06, "loss": 220.6135, "step": 8380 }, { "epoch": 0.06940480622078836, "grad_norm": 1435.279296875, "learning_rate": 6.779797979797981e-06, "loss": 178.5454, "step": 8390 }, { "epoch": 0.06948752947015759, "grad_norm": 1620.093994140625, "learning_rate": 6.787878787878789e-06, "loss": 151.146, "step": 8400 }, { "epoch": 0.06957025271952683, "grad_norm": 1524.4627685546875, "learning_rate": 6.795959595959597e-06, "loss": 185.1478, "step": 8410 }, { "epoch": 0.06965297596889605, "grad_norm": 1806.7978515625, "learning_rate": 6.804040404040405e-06, "loss": 173.0628, "step": 8420 }, { "epoch": 0.06973569921826529, "grad_norm": 1826.8575439453125, "learning_rate": 6.812121212121212e-06, "loss": 167.8523, "step": 8430 }, { "epoch": 0.06981842246763453, "grad_norm": 1353.9345703125, "learning_rate": 6.82020202020202e-06, "loss": 194.5669, "step": 8440 }, { "epoch": 0.06990114571700376, "grad_norm": 2063.500244140625, "learning_rate": 6.828282828282828e-06, "loss": 184.4123, "step": 8450 }, { "epoch": 0.069983868966373, "grad_norm": 1244.160400390625, "learning_rate": 6.8363636363636364e-06, "loss": 234.5416, "step": 8460 }, { "epoch": 0.07006659221574224, "grad_norm": 894.0686645507812, "learning_rate": 6.844444444444445e-06, "loss": 157.8413, "step": 8470 }, { "epoch": 0.07014931546511147, "grad_norm": 953.1651000976562, "learning_rate": 6.8525252525252525e-06, "loss": 187.5382, "step": 8480 }, { "epoch": 0.07023203871448071, "grad_norm": 868.2008056640625, "learning_rate": 6.860606060606061e-06, "loss": 153.3808, "step": 8490 }, { "epoch": 0.07031476196384993, "grad_norm": 1246.61328125, "learning_rate": 6.868686868686869e-06, "loss": 198.8839, "step": 8500 }, { "epoch": 0.07039748521321917, "grad_norm": 1278.021484375, "learning_rate": 6.876767676767677e-06, "loss": 162.5859, "step": 8510 }, { "epoch": 0.07048020846258841, "grad_norm": 1224.3677978515625, "learning_rate": 6.8848484848484854e-06, "loss": 216.0751, "step": 8520 }, { "epoch": 0.07056293171195764, "grad_norm": 629.5072021484375, "learning_rate": 6.892929292929294e-06, "loss": 219.3646, "step": 8530 }, { "epoch": 0.07064565496132688, "grad_norm": 873.804443359375, "learning_rate": 6.9010101010101015e-06, "loss": 176.4796, "step": 8540 }, { "epoch": 0.07072837821069612, "grad_norm": 1832.682373046875, "learning_rate": 6.90909090909091e-06, "loss": 256.3848, "step": 8550 }, { "epoch": 0.07081110146006535, "grad_norm": 1244.7239990234375, "learning_rate": 6.9171717171717175e-06, "loss": 215.3179, "step": 8560 }, { "epoch": 0.07089382470943459, "grad_norm": 938.3501586914062, "learning_rate": 6.925252525252526e-06, "loss": 169.5242, "step": 8570 }, { "epoch": 0.07097654795880382, "grad_norm": 2076.439697265625, "learning_rate": 6.9333333333333344e-06, "loss": 200.3409, "step": 8580 }, { "epoch": 0.07105927120817306, "grad_norm": 800.4453735351562, "learning_rate": 6.941414141414142e-06, "loss": 192.4127, "step": 8590 }, { "epoch": 0.0711419944575423, "grad_norm": 1373.4234619140625, "learning_rate": 6.9494949494949505e-06, "loss": 161.1228, "step": 8600 }, { "epoch": 0.07122471770691152, "grad_norm": 950.77197265625, "learning_rate": 6.957575757575759e-06, "loss": 179.3362, "step": 8610 }, { "epoch": 0.07130744095628076, "grad_norm": 1121.46044921875, "learning_rate": 6.9656565656565665e-06, "loss": 196.0917, "step": 8620 }, { "epoch": 0.07139016420565, "grad_norm": 906.2406005859375, "learning_rate": 6.973737373737375e-06, "loss": 139.1432, "step": 8630 }, { "epoch": 0.07147288745501923, "grad_norm": 2017.980712890625, "learning_rate": 6.981818181818183e-06, "loss": 212.8915, "step": 8640 }, { "epoch": 0.07155561070438847, "grad_norm": 1261.973388671875, "learning_rate": 6.98989898989899e-06, "loss": 200.444, "step": 8650 }, { "epoch": 0.07163833395375771, "grad_norm": 855.2569580078125, "learning_rate": 6.997979797979798e-06, "loss": 222.0111, "step": 8660 }, { "epoch": 0.07172105720312694, "grad_norm": 1921.39599609375, "learning_rate": 7.006060606060606e-06, "loss": 222.5992, "step": 8670 }, { "epoch": 0.07180378045249618, "grad_norm": 857.9965209960938, "learning_rate": 7.014141414141415e-06, "loss": 219.586, "step": 8680 }, { "epoch": 0.0718865037018654, "grad_norm": 915.5559692382812, "learning_rate": 7.022222222222222e-06, "loss": 184.9689, "step": 8690 }, { "epoch": 0.07196922695123464, "grad_norm": 1838.1370849609375, "learning_rate": 7.030303030303031e-06, "loss": 232.5889, "step": 8700 }, { "epoch": 0.07205195020060388, "grad_norm": 932.5780639648438, "learning_rate": 7.038383838383839e-06, "loss": 171.6244, "step": 8710 }, { "epoch": 0.07213467344997311, "grad_norm": 1225.2701416015625, "learning_rate": 7.046464646464647e-06, "loss": 153.8994, "step": 8720 }, { "epoch": 0.07221739669934235, "grad_norm": 1276.4774169921875, "learning_rate": 7.054545454545455e-06, "loss": 195.4201, "step": 8730 }, { "epoch": 0.07230011994871159, "grad_norm": 1169.0623779296875, "learning_rate": 7.062626262626263e-06, "loss": 184.115, "step": 8740 }, { "epoch": 0.07238284319808082, "grad_norm": 1720.584716796875, "learning_rate": 7.070707070707071e-06, "loss": 225.3465, "step": 8750 }, { "epoch": 0.07246556644745006, "grad_norm": 811.720947265625, "learning_rate": 7.07878787878788e-06, "loss": 155.7371, "step": 8760 }, { "epoch": 0.07254828969681928, "grad_norm": 1744.4354248046875, "learning_rate": 7.086868686868687e-06, "loss": 193.3227, "step": 8770 }, { "epoch": 0.07263101294618853, "grad_norm": 862.0523681640625, "learning_rate": 7.094949494949496e-06, "loss": 170.9686, "step": 8780 }, { "epoch": 0.07271373619555777, "grad_norm": 2165.270751953125, "learning_rate": 7.103030303030304e-06, "loss": 234.9279, "step": 8790 }, { "epoch": 0.07279645944492699, "grad_norm": 1295.5478515625, "learning_rate": 7.111111111111112e-06, "loss": 211.1977, "step": 8800 }, { "epoch": 0.07287918269429623, "grad_norm": 957.7193603515625, "learning_rate": 7.11919191919192e-06, "loss": 191.1015, "step": 8810 }, { "epoch": 0.07296190594366547, "grad_norm": 1650.5340576171875, "learning_rate": 7.127272727272728e-06, "loss": 164.5978, "step": 8820 }, { "epoch": 0.0730446291930347, "grad_norm": 1488.514404296875, "learning_rate": 7.135353535353536e-06, "loss": 187.6664, "step": 8830 }, { "epoch": 0.07312735244240394, "grad_norm": 1756.99853515625, "learning_rate": 7.143434343434345e-06, "loss": 219.4122, "step": 8840 }, { "epoch": 0.07321007569177317, "grad_norm": 924.37158203125, "learning_rate": 7.151515151515152e-06, "loss": 157.6518, "step": 8850 }, { "epoch": 0.0732927989411424, "grad_norm": 1157.555908203125, "learning_rate": 7.15959595959596e-06, "loss": 162.5851, "step": 8860 }, { "epoch": 0.07337552219051165, "grad_norm": 668.1375732421875, "learning_rate": 7.1676767676767676e-06, "loss": 153.8082, "step": 8870 }, { "epoch": 0.07345824543988087, "grad_norm": 1706.42333984375, "learning_rate": 7.175757575757576e-06, "loss": 145.3626, "step": 8880 }, { "epoch": 0.07354096868925011, "grad_norm": 2563.07861328125, "learning_rate": 7.1838383838383845e-06, "loss": 217.706, "step": 8890 }, { "epoch": 0.07362369193861935, "grad_norm": 1756.02392578125, "learning_rate": 7.191919191919192e-06, "loss": 177.5173, "step": 8900 }, { "epoch": 0.07370641518798858, "grad_norm": 1470.858642578125, "learning_rate": 7.2000000000000005e-06, "loss": 218.2071, "step": 8910 }, { "epoch": 0.07378913843735782, "grad_norm": 1429.1534423828125, "learning_rate": 7.208080808080808e-06, "loss": 192.6579, "step": 8920 }, { "epoch": 0.07387186168672706, "grad_norm": 1044.5311279296875, "learning_rate": 7.2161616161616166e-06, "loss": 175.2781, "step": 8930 }, { "epoch": 0.07395458493609629, "grad_norm": 1081.16748046875, "learning_rate": 7.224242424242425e-06, "loss": 195.337, "step": 8940 }, { "epoch": 0.07403730818546553, "grad_norm": 1816.50634765625, "learning_rate": 7.232323232323233e-06, "loss": 164.159, "step": 8950 }, { "epoch": 0.07412003143483475, "grad_norm": 941.095947265625, "learning_rate": 7.240404040404041e-06, "loss": 167.4316, "step": 8960 }, { "epoch": 0.074202754684204, "grad_norm": 1430.9918212890625, "learning_rate": 7.2484848484848495e-06, "loss": 142.7207, "step": 8970 }, { "epoch": 0.07428547793357324, "grad_norm": 987.8670043945312, "learning_rate": 7.256565656565657e-06, "loss": 185.2262, "step": 8980 }, { "epoch": 0.07436820118294246, "grad_norm": 1886.202392578125, "learning_rate": 7.2646464646464656e-06, "loss": 201.7182, "step": 8990 }, { "epoch": 0.0744509244323117, "grad_norm": 6469.64501953125, "learning_rate": 7.272727272727273e-06, "loss": 242.3246, "step": 9000 }, { "epoch": 0.07453364768168094, "grad_norm": 1446.23388671875, "learning_rate": 7.280808080808082e-06, "loss": 185.7719, "step": 9010 }, { "epoch": 0.07461637093105017, "grad_norm": 1158.1019287109375, "learning_rate": 7.28888888888889e-06, "loss": 135.7039, "step": 9020 }, { "epoch": 0.07469909418041941, "grad_norm": 1710.8179931640625, "learning_rate": 7.296969696969698e-06, "loss": 214.3662, "step": 9030 }, { "epoch": 0.07478181742978864, "grad_norm": 693.0316772460938, "learning_rate": 7.305050505050506e-06, "loss": 168.4758, "step": 9040 }, { "epoch": 0.07486454067915788, "grad_norm": 1307.99267578125, "learning_rate": 7.3131313131313146e-06, "loss": 221.4772, "step": 9050 }, { "epoch": 0.07494726392852712, "grad_norm": 578.5460815429688, "learning_rate": 7.321212121212122e-06, "loss": 172.2857, "step": 9060 }, { "epoch": 0.07502998717789634, "grad_norm": 4154.9052734375, "learning_rate": 7.32929292929293e-06, "loss": 200.6511, "step": 9070 }, { "epoch": 0.07511271042726558, "grad_norm": 3818.542236328125, "learning_rate": 7.337373737373737e-06, "loss": 198.6826, "step": 9080 }, { "epoch": 0.07519543367663482, "grad_norm": 630.0006713867188, "learning_rate": 7.345454545454546e-06, "loss": 146.3737, "step": 9090 }, { "epoch": 0.07527815692600405, "grad_norm": 1059.06640625, "learning_rate": 7.353535353535353e-06, "loss": 163.5028, "step": 9100 }, { "epoch": 0.07536088017537329, "grad_norm": 668.64111328125, "learning_rate": 7.361616161616162e-06, "loss": 170.1274, "step": 9110 }, { "epoch": 0.07544360342474253, "grad_norm": 1227.2437744140625, "learning_rate": 7.36969696969697e-06, "loss": 167.1523, "step": 9120 }, { "epoch": 0.07552632667411176, "grad_norm": 989.3958740234375, "learning_rate": 7.377777777777778e-06, "loss": 179.0444, "step": 9130 }, { "epoch": 0.075609049923481, "grad_norm": 2812.70166015625, "learning_rate": 7.385858585858586e-06, "loss": 170.3505, "step": 9140 }, { "epoch": 0.07569177317285022, "grad_norm": 1429.5191650390625, "learning_rate": 7.393939393939395e-06, "loss": 198.7005, "step": 9150 }, { "epoch": 0.07577449642221946, "grad_norm": 937.4029541015625, "learning_rate": 7.402020202020202e-06, "loss": 149.7339, "step": 9160 }, { "epoch": 0.0758572196715887, "grad_norm": 6428.83642578125, "learning_rate": 7.410101010101011e-06, "loss": 224.5054, "step": 9170 }, { "epoch": 0.07593994292095793, "grad_norm": 1135.309814453125, "learning_rate": 7.4181818181818185e-06, "loss": 206.6003, "step": 9180 }, { "epoch": 0.07602266617032717, "grad_norm": 663.953125, "learning_rate": 7.426262626262627e-06, "loss": 148.6399, "step": 9190 }, { "epoch": 0.07610538941969641, "grad_norm": 1465.2401123046875, "learning_rate": 7.434343434343435e-06, "loss": 129.7046, "step": 9200 }, { "epoch": 0.07618811266906564, "grad_norm": 840.3555297851562, "learning_rate": 7.442424242424243e-06, "loss": 212.8563, "step": 9210 }, { "epoch": 0.07627083591843488, "grad_norm": 1056.4595947265625, "learning_rate": 7.450505050505051e-06, "loss": 219.987, "step": 9220 }, { "epoch": 0.0763535591678041, "grad_norm": 1585.2525634765625, "learning_rate": 7.45858585858586e-06, "loss": 254.3914, "step": 9230 }, { "epoch": 0.07643628241717335, "grad_norm": 1167.71728515625, "learning_rate": 7.4666666666666675e-06, "loss": 173.4369, "step": 9240 }, { "epoch": 0.07651900566654259, "grad_norm": 1241.547119140625, "learning_rate": 7.474747474747476e-06, "loss": 196.5241, "step": 9250 }, { "epoch": 0.07660172891591181, "grad_norm": 1076.36279296875, "learning_rate": 7.4828282828282835e-06, "loss": 141.0298, "step": 9260 }, { "epoch": 0.07668445216528105, "grad_norm": 894.4515991210938, "learning_rate": 7.490909090909092e-06, "loss": 182.4499, "step": 9270 }, { "epoch": 0.0767671754146503, "grad_norm": 1318.6171875, "learning_rate": 7.4989898989899e-06, "loss": 294.3835, "step": 9280 }, { "epoch": 0.07684989866401952, "grad_norm": 1835.936279296875, "learning_rate": 7.507070707070707e-06, "loss": 200.7481, "step": 9290 }, { "epoch": 0.07693262191338876, "grad_norm": 1094.5806884765625, "learning_rate": 7.515151515151516e-06, "loss": 155.6739, "step": 9300 }, { "epoch": 0.07701534516275799, "grad_norm": 2438.4599609375, "learning_rate": 7.523232323232323e-06, "loss": 168.196, "step": 9310 }, { "epoch": 0.07709806841212723, "grad_norm": 1409.369384765625, "learning_rate": 7.531313131313132e-06, "loss": 168.5339, "step": 9320 }, { "epoch": 0.07718079166149647, "grad_norm": 792.833740234375, "learning_rate": 7.53939393939394e-06, "loss": 204.8236, "step": 9330 }, { "epoch": 0.0772635149108657, "grad_norm": 4106.85107421875, "learning_rate": 7.547474747474748e-06, "loss": 206.0253, "step": 9340 }, { "epoch": 0.07734623816023493, "grad_norm": 1158.9046630859375, "learning_rate": 7.555555555555556e-06, "loss": 200.0595, "step": 9350 }, { "epoch": 0.07742896140960417, "grad_norm": 829.5303344726562, "learning_rate": 7.563636363636364e-06, "loss": 152.1526, "step": 9360 }, { "epoch": 0.0775116846589734, "grad_norm": 1672.3824462890625, "learning_rate": 7.571717171717172e-06, "loss": 196.9728, "step": 9370 }, { "epoch": 0.07759440790834264, "grad_norm": 1697.9437255859375, "learning_rate": 7.579797979797981e-06, "loss": 140.7818, "step": 9380 }, { "epoch": 0.07767713115771188, "grad_norm": 1467.42138671875, "learning_rate": 7.587878787878788e-06, "loss": 237.8365, "step": 9390 }, { "epoch": 0.07775985440708111, "grad_norm": 1372.5516357421875, "learning_rate": 7.595959595959597e-06, "loss": 201.537, "step": 9400 }, { "epoch": 0.07784257765645035, "grad_norm": 1280.147216796875, "learning_rate": 7.604040404040405e-06, "loss": 168.6732, "step": 9410 }, { "epoch": 0.07792530090581958, "grad_norm": 1045.321533203125, "learning_rate": 7.612121212121213e-06, "loss": 154.092, "step": 9420 }, { "epoch": 0.07800802415518882, "grad_norm": 6423.11962890625, "learning_rate": 7.620202020202021e-06, "loss": 251.8613, "step": 9430 }, { "epoch": 0.07809074740455806, "grad_norm": 1650.4332275390625, "learning_rate": 7.628282828282829e-06, "loss": 218.2032, "step": 9440 }, { "epoch": 0.07817347065392728, "grad_norm": 1520.5958251953125, "learning_rate": 7.636363636363638e-06, "loss": 194.5603, "step": 9450 }, { "epoch": 0.07825619390329652, "grad_norm": 1989.30126953125, "learning_rate": 7.644444444444445e-06, "loss": 167.2613, "step": 9460 }, { "epoch": 0.07833891715266576, "grad_norm": 1363.1075439453125, "learning_rate": 7.652525252525253e-06, "loss": 193.0324, "step": 9470 }, { "epoch": 0.07842164040203499, "grad_norm": 1044.422607421875, "learning_rate": 7.660606060606062e-06, "loss": 239.5535, "step": 9480 }, { "epoch": 0.07850436365140423, "grad_norm": 1506.1378173828125, "learning_rate": 7.66868686868687e-06, "loss": 157.3207, "step": 9490 }, { "epoch": 0.07858708690077346, "grad_norm": 1023.1807250976562, "learning_rate": 7.676767676767677e-06, "loss": 177.0086, "step": 9500 }, { "epoch": 0.0786698101501427, "grad_norm": 766.3860473632812, "learning_rate": 7.684848484848485e-06, "loss": 180.8027, "step": 9510 }, { "epoch": 0.07875253339951194, "grad_norm": 1220.765380859375, "learning_rate": 7.692929292929294e-06, "loss": 199.764, "step": 9520 }, { "epoch": 0.07883525664888116, "grad_norm": 798.783447265625, "learning_rate": 7.7010101010101e-06, "loss": 178.7332, "step": 9530 }, { "epoch": 0.0789179798982504, "grad_norm": 1135.2913818359375, "learning_rate": 7.709090909090909e-06, "loss": 138.7885, "step": 9540 }, { "epoch": 0.07900070314761964, "grad_norm": 820.0831298828125, "learning_rate": 7.717171717171717e-06, "loss": 192.7479, "step": 9550 }, { "epoch": 0.07908342639698887, "grad_norm": 1487.1383056640625, "learning_rate": 7.725252525252526e-06, "loss": 202.4578, "step": 9560 }, { "epoch": 0.07916614964635811, "grad_norm": 3294.249267578125, "learning_rate": 7.733333333333334e-06, "loss": 154.1931, "step": 9570 }, { "epoch": 0.07924887289572734, "grad_norm": 1593.376708984375, "learning_rate": 7.741414141414141e-06, "loss": 220.1035, "step": 9580 }, { "epoch": 0.07933159614509658, "grad_norm": 1254.923095703125, "learning_rate": 7.74949494949495e-06, "loss": 162.3905, "step": 9590 }, { "epoch": 0.07941431939446582, "grad_norm": 989.010009765625, "learning_rate": 7.757575757575758e-06, "loss": 150.6632, "step": 9600 }, { "epoch": 0.07949704264383504, "grad_norm": 668.603759765625, "learning_rate": 7.765656565656566e-06, "loss": 180.0927, "step": 9610 }, { "epoch": 0.07957976589320429, "grad_norm": 918.044677734375, "learning_rate": 7.773737373737375e-06, "loss": 216.1173, "step": 9620 }, { "epoch": 0.07966248914257353, "grad_norm": 1150.803955078125, "learning_rate": 7.781818181818183e-06, "loss": 165.5561, "step": 9630 }, { "epoch": 0.07974521239194275, "grad_norm": 1339.1085205078125, "learning_rate": 7.78989898989899e-06, "loss": 120.4266, "step": 9640 }, { "epoch": 0.07982793564131199, "grad_norm": 1119.98388671875, "learning_rate": 7.797979797979799e-06, "loss": 186.139, "step": 9650 }, { "epoch": 0.07991065889068123, "grad_norm": 845.5322875976562, "learning_rate": 7.806060606060607e-06, "loss": 147.2671, "step": 9660 }, { "epoch": 0.07999338214005046, "grad_norm": 1525.1080322265625, "learning_rate": 7.814141414141415e-06, "loss": 227.4823, "step": 9670 }, { "epoch": 0.0800761053894197, "grad_norm": 936.9368286132812, "learning_rate": 7.822222222222224e-06, "loss": 147.8492, "step": 9680 }, { "epoch": 0.08015882863878893, "grad_norm": 1528.34375, "learning_rate": 7.83030303030303e-06, "loss": 176.4171, "step": 9690 }, { "epoch": 0.08024155188815817, "grad_norm": 1028.9813232421875, "learning_rate": 7.838383838383839e-06, "loss": 168.1568, "step": 9700 }, { "epoch": 0.0803242751375274, "grad_norm": 3313.676513671875, "learning_rate": 7.846464646464646e-06, "loss": 211.1931, "step": 9710 }, { "epoch": 0.08040699838689663, "grad_norm": 698.055419921875, "learning_rate": 7.854545454545454e-06, "loss": 147.0731, "step": 9720 }, { "epoch": 0.08048972163626587, "grad_norm": 1256.8193359375, "learning_rate": 7.862626262626263e-06, "loss": 141.3686, "step": 9730 }, { "epoch": 0.08057244488563511, "grad_norm": 1118.2659912109375, "learning_rate": 7.870707070707071e-06, "loss": 191.6792, "step": 9740 }, { "epoch": 0.08065516813500434, "grad_norm": 1615.0177001953125, "learning_rate": 7.87878787878788e-06, "loss": 171.2617, "step": 9750 }, { "epoch": 0.08073789138437358, "grad_norm": 1359.4404296875, "learning_rate": 7.886868686868686e-06, "loss": 233.8179, "step": 9760 }, { "epoch": 0.08082061463374281, "grad_norm": 1080.4814453125, "learning_rate": 7.894949494949495e-06, "loss": 167.5727, "step": 9770 }, { "epoch": 0.08090333788311205, "grad_norm": 1568.5997314453125, "learning_rate": 7.903030303030303e-06, "loss": 180.408, "step": 9780 }, { "epoch": 0.08098606113248129, "grad_norm": 1154.7166748046875, "learning_rate": 7.911111111111112e-06, "loss": 141.3988, "step": 9790 }, { "epoch": 0.08106878438185051, "grad_norm": 1465.5701904296875, "learning_rate": 7.91919191919192e-06, "loss": 235.8258, "step": 9800 }, { "epoch": 0.08115150763121975, "grad_norm": 1399.992919921875, "learning_rate": 7.927272727272729e-06, "loss": 197.8094, "step": 9810 }, { "epoch": 0.081234230880589, "grad_norm": 1609.233154296875, "learning_rate": 7.935353535353535e-06, "loss": 169.6143, "step": 9820 }, { "epoch": 0.08131695412995822, "grad_norm": 771.2155151367188, "learning_rate": 7.943434343434344e-06, "loss": 174.0228, "step": 9830 }, { "epoch": 0.08139967737932746, "grad_norm": 1397.8433837890625, "learning_rate": 7.951515151515152e-06, "loss": 195.8236, "step": 9840 }, { "epoch": 0.08148240062869669, "grad_norm": 1942.0560302734375, "learning_rate": 7.95959595959596e-06, "loss": 174.9514, "step": 9850 }, { "epoch": 0.08156512387806593, "grad_norm": 881.597900390625, "learning_rate": 7.96767676767677e-06, "loss": 155.3664, "step": 9860 }, { "epoch": 0.08164784712743517, "grad_norm": 982.1563110351562, "learning_rate": 7.975757575757576e-06, "loss": 166.2256, "step": 9870 }, { "epoch": 0.0817305703768044, "grad_norm": 1786.5938720703125, "learning_rate": 7.983838383838384e-06, "loss": 179.014, "step": 9880 }, { "epoch": 0.08181329362617364, "grad_norm": 1422.998046875, "learning_rate": 7.991919191919193e-06, "loss": 185.8182, "step": 9890 }, { "epoch": 0.08189601687554288, "grad_norm": 1009.948486328125, "learning_rate": 8.000000000000001e-06, "loss": 153.7416, "step": 9900 }, { "epoch": 0.0819787401249121, "grad_norm": 929.018798828125, "learning_rate": 8.00808080808081e-06, "loss": 178.9259, "step": 9910 }, { "epoch": 0.08206146337428134, "grad_norm": 654.3829345703125, "learning_rate": 8.016161616161617e-06, "loss": 130.9901, "step": 9920 }, { "epoch": 0.08214418662365058, "grad_norm": 1169.20068359375, "learning_rate": 8.024242424242425e-06, "loss": 125.0886, "step": 9930 }, { "epoch": 0.08222690987301981, "grad_norm": 1203.2630615234375, "learning_rate": 8.032323232323232e-06, "loss": 191.309, "step": 9940 }, { "epoch": 0.08230963312238905, "grad_norm": 1353.7027587890625, "learning_rate": 8.04040404040404e-06, "loss": 239.4487, "step": 9950 }, { "epoch": 0.08239235637175828, "grad_norm": 1199.4378662109375, "learning_rate": 8.048484848484849e-06, "loss": 173.5184, "step": 9960 }, { "epoch": 0.08247507962112752, "grad_norm": 1027.1614990234375, "learning_rate": 8.056565656565657e-06, "loss": 236.1041, "step": 9970 }, { "epoch": 0.08255780287049676, "grad_norm": 2071.078125, "learning_rate": 8.064646464646466e-06, "loss": 210.167, "step": 9980 }, { "epoch": 0.08264052611986598, "grad_norm": 931.7273559570312, "learning_rate": 8.072727272727274e-06, "loss": 169.0378, "step": 9990 }, { "epoch": 0.08272324936923522, "grad_norm": 1780.4796142578125, "learning_rate": 8.08080808080808e-06, "loss": 182.981, "step": 10000 }, { "epoch": 0.08280597261860446, "grad_norm": 1484.16845703125, "learning_rate": 8.08888888888889e-06, "loss": 190.9732, "step": 10010 }, { "epoch": 0.08288869586797369, "grad_norm": 1517.7740478515625, "learning_rate": 8.096969696969698e-06, "loss": 214.248, "step": 10020 }, { "epoch": 0.08297141911734293, "grad_norm": 4507.2001953125, "learning_rate": 8.105050505050506e-06, "loss": 149.1075, "step": 10030 }, { "epoch": 0.08305414236671216, "grad_norm": 1202.4232177734375, "learning_rate": 8.113131313131315e-06, "loss": 167.8956, "step": 10040 }, { "epoch": 0.0831368656160814, "grad_norm": 3145.95556640625, "learning_rate": 8.121212121212121e-06, "loss": 218.0878, "step": 10050 }, { "epoch": 0.08321958886545064, "grad_norm": 833.5780029296875, "learning_rate": 8.12929292929293e-06, "loss": 183.5963, "step": 10060 }, { "epoch": 0.08330231211481987, "grad_norm": 1200.9332275390625, "learning_rate": 8.137373737373738e-06, "loss": 200.6742, "step": 10070 }, { "epoch": 0.0833850353641891, "grad_norm": 2536.378662109375, "learning_rate": 8.145454545454547e-06, "loss": 244.585, "step": 10080 }, { "epoch": 0.08346775861355835, "grad_norm": 0.0, "learning_rate": 8.153535353535355e-06, "loss": 151.4828, "step": 10090 }, { "epoch": 0.08355048186292757, "grad_norm": 1108.849853515625, "learning_rate": 8.161616161616162e-06, "loss": 178.3399, "step": 10100 }, { "epoch": 0.08363320511229681, "grad_norm": 809.9747314453125, "learning_rate": 8.16969696969697e-06, "loss": 201.301, "step": 10110 }, { "epoch": 0.08371592836166604, "grad_norm": 1331.628173828125, "learning_rate": 8.177777777777779e-06, "loss": 141.3669, "step": 10120 }, { "epoch": 0.08379865161103528, "grad_norm": 990.9824829101562, "learning_rate": 8.185858585858587e-06, "loss": 169.3207, "step": 10130 }, { "epoch": 0.08388137486040452, "grad_norm": 1191.9073486328125, "learning_rate": 8.193939393939394e-06, "loss": 136.0156, "step": 10140 }, { "epoch": 0.08396409810977375, "grad_norm": 829.3964233398438, "learning_rate": 8.202020202020202e-06, "loss": 165.8068, "step": 10150 }, { "epoch": 0.08404682135914299, "grad_norm": 781.4263305664062, "learning_rate": 8.21010101010101e-06, "loss": 186.8697, "step": 10160 }, { "epoch": 0.08412954460851223, "grad_norm": 815.2606811523438, "learning_rate": 8.21818181818182e-06, "loss": 257.8631, "step": 10170 }, { "epoch": 0.08421226785788145, "grad_norm": 1860.0218505859375, "learning_rate": 8.226262626262626e-06, "loss": 187.696, "step": 10180 }, { "epoch": 0.0842949911072507, "grad_norm": 2099.6728515625, "learning_rate": 8.234343434343434e-06, "loss": 205.9569, "step": 10190 }, { "epoch": 0.08437771435661993, "grad_norm": 1526.5322265625, "learning_rate": 8.242424242424243e-06, "loss": 200.3001, "step": 10200 }, { "epoch": 0.08446043760598916, "grad_norm": 1122.8441162109375, "learning_rate": 8.250505050505051e-06, "loss": 132.1188, "step": 10210 }, { "epoch": 0.0845431608553584, "grad_norm": 952.8866577148438, "learning_rate": 8.25858585858586e-06, "loss": 180.2814, "step": 10220 }, { "epoch": 0.08462588410472763, "grad_norm": 1098.6842041015625, "learning_rate": 8.266666666666667e-06, "loss": 186.863, "step": 10230 }, { "epoch": 0.08470860735409687, "grad_norm": 1201.576904296875, "learning_rate": 8.274747474747475e-06, "loss": 166.658, "step": 10240 }, { "epoch": 0.08479133060346611, "grad_norm": 3628.583984375, "learning_rate": 8.282828282828283e-06, "loss": 280.9522, "step": 10250 }, { "epoch": 0.08487405385283533, "grad_norm": 956.4686279296875, "learning_rate": 8.290909090909092e-06, "loss": 177.9082, "step": 10260 }, { "epoch": 0.08495677710220458, "grad_norm": 1213.8653564453125, "learning_rate": 8.2989898989899e-06, "loss": 176.0302, "step": 10270 }, { "epoch": 0.08503950035157382, "grad_norm": 1263.095458984375, "learning_rate": 8.307070707070707e-06, "loss": 171.6865, "step": 10280 }, { "epoch": 0.08512222360094304, "grad_norm": 1094.4754638671875, "learning_rate": 8.315151515151516e-06, "loss": 158.2062, "step": 10290 }, { "epoch": 0.08520494685031228, "grad_norm": 957.9495239257812, "learning_rate": 8.323232323232324e-06, "loss": 169.5506, "step": 10300 }, { "epoch": 0.08528767009968151, "grad_norm": 1073.7796630859375, "learning_rate": 8.331313131313132e-06, "loss": 147.6098, "step": 10310 }, { "epoch": 0.08537039334905075, "grad_norm": 1453.0513916015625, "learning_rate": 8.339393939393941e-06, "loss": 189.7265, "step": 10320 }, { "epoch": 0.08545311659841999, "grad_norm": 791.720703125, "learning_rate": 8.34747474747475e-06, "loss": 214.0726, "step": 10330 }, { "epoch": 0.08553583984778922, "grad_norm": 1202.690673828125, "learning_rate": 8.355555555555556e-06, "loss": 197.4867, "step": 10340 }, { "epoch": 0.08561856309715846, "grad_norm": 1175.0545654296875, "learning_rate": 8.363636363636365e-06, "loss": 153.0685, "step": 10350 }, { "epoch": 0.0857012863465277, "grad_norm": 745.9451293945312, "learning_rate": 8.371717171717171e-06, "loss": 170.8535, "step": 10360 }, { "epoch": 0.08578400959589692, "grad_norm": 1357.7061767578125, "learning_rate": 8.37979797979798e-06, "loss": 162.8026, "step": 10370 }, { "epoch": 0.08586673284526616, "grad_norm": 1293.4013671875, "learning_rate": 8.387878787878788e-06, "loss": 152.5479, "step": 10380 }, { "epoch": 0.0859494560946354, "grad_norm": 1522.9151611328125, "learning_rate": 8.395959595959597e-06, "loss": 180.1763, "step": 10390 }, { "epoch": 0.08603217934400463, "grad_norm": 2097.494140625, "learning_rate": 8.404040404040405e-06, "loss": 168.4439, "step": 10400 }, { "epoch": 0.08611490259337387, "grad_norm": 1112.0291748046875, "learning_rate": 8.412121212121212e-06, "loss": 193.0542, "step": 10410 }, { "epoch": 0.0861976258427431, "grad_norm": 927.9266357421875, "learning_rate": 8.42020202020202e-06, "loss": 211.676, "step": 10420 }, { "epoch": 0.08628034909211234, "grad_norm": 1505.004150390625, "learning_rate": 8.428282828282829e-06, "loss": 203.2798, "step": 10430 }, { "epoch": 0.08636307234148158, "grad_norm": 1962.483154296875, "learning_rate": 8.436363636363637e-06, "loss": 217.3728, "step": 10440 }, { "epoch": 0.0864457955908508, "grad_norm": 2074.048583984375, "learning_rate": 8.444444444444446e-06, "loss": 224.9844, "step": 10450 }, { "epoch": 0.08652851884022005, "grad_norm": 1316.5950927734375, "learning_rate": 8.452525252525252e-06, "loss": 165.5474, "step": 10460 }, { "epoch": 0.08661124208958929, "grad_norm": 1096.7896728515625, "learning_rate": 8.460606060606061e-06, "loss": 138.2374, "step": 10470 }, { "epoch": 0.08669396533895851, "grad_norm": 1451.124755859375, "learning_rate": 8.46868686868687e-06, "loss": 141.8237, "step": 10480 }, { "epoch": 0.08677668858832775, "grad_norm": 1239.64697265625, "learning_rate": 8.476767676767678e-06, "loss": 156.3808, "step": 10490 }, { "epoch": 0.08685941183769698, "grad_norm": 689.178955078125, "learning_rate": 8.484848484848486e-06, "loss": 143.1666, "step": 10500 }, { "epoch": 0.08694213508706622, "grad_norm": 684.1348266601562, "learning_rate": 8.492929292929295e-06, "loss": 182.6362, "step": 10510 }, { "epoch": 0.08702485833643546, "grad_norm": 1299.8443603515625, "learning_rate": 8.501010101010101e-06, "loss": 160.0256, "step": 10520 }, { "epoch": 0.08710758158580469, "grad_norm": 845.9354858398438, "learning_rate": 8.50909090909091e-06, "loss": 184.4307, "step": 10530 }, { "epoch": 0.08719030483517393, "grad_norm": 1035.8701171875, "learning_rate": 8.517171717171718e-06, "loss": 228.6084, "step": 10540 }, { "epoch": 0.08727302808454317, "grad_norm": 668.9830932617188, "learning_rate": 8.525252525252527e-06, "loss": 200.758, "step": 10550 }, { "epoch": 0.0873557513339124, "grad_norm": 1058.86181640625, "learning_rate": 8.533333333333335e-06, "loss": 204.7663, "step": 10560 }, { "epoch": 0.08743847458328163, "grad_norm": 1521.737548828125, "learning_rate": 8.541414141414142e-06, "loss": 192.2928, "step": 10570 }, { "epoch": 0.08752119783265086, "grad_norm": 1117.1220703125, "learning_rate": 8.54949494949495e-06, "loss": 142.4138, "step": 10580 }, { "epoch": 0.0876039210820201, "grad_norm": 1011.63525390625, "learning_rate": 8.557575757575757e-06, "loss": 193.8034, "step": 10590 }, { "epoch": 0.08768664433138934, "grad_norm": 2114.847900390625, "learning_rate": 8.565656565656566e-06, "loss": 184.3933, "step": 10600 }, { "epoch": 0.08776936758075857, "grad_norm": 823.4564819335938, "learning_rate": 8.573737373737374e-06, "loss": 186.983, "step": 10610 }, { "epoch": 0.08785209083012781, "grad_norm": 1262.8677978515625, "learning_rate": 8.581818181818183e-06, "loss": 145.9637, "step": 10620 }, { "epoch": 0.08793481407949705, "grad_norm": 1084.4609375, "learning_rate": 8.589898989898991e-06, "loss": 199.8638, "step": 10630 }, { "epoch": 0.08801753732886627, "grad_norm": 1483.7017822265625, "learning_rate": 8.597979797979798e-06, "loss": 183.0729, "step": 10640 }, { "epoch": 0.08810026057823551, "grad_norm": 1132.1685791015625, "learning_rate": 8.606060606060606e-06, "loss": 160.7905, "step": 10650 }, { "epoch": 0.08818298382760476, "grad_norm": 1912.802978515625, "learning_rate": 8.614141414141415e-06, "loss": 235.4324, "step": 10660 }, { "epoch": 0.08826570707697398, "grad_norm": 1315.4351806640625, "learning_rate": 8.622222222222223e-06, "loss": 186.3054, "step": 10670 }, { "epoch": 0.08834843032634322, "grad_norm": 884.4146118164062, "learning_rate": 8.630303030303032e-06, "loss": 157.2276, "step": 10680 }, { "epoch": 0.08843115357571245, "grad_norm": 1087.9329833984375, "learning_rate": 8.63838383838384e-06, "loss": 154.3768, "step": 10690 }, { "epoch": 0.08851387682508169, "grad_norm": 913.88818359375, "learning_rate": 8.646464646464647e-06, "loss": 185.6447, "step": 10700 }, { "epoch": 0.08859660007445093, "grad_norm": 2494.311279296875, "learning_rate": 8.654545454545455e-06, "loss": 195.7933, "step": 10710 }, { "epoch": 0.08867932332382016, "grad_norm": 1353.1029052734375, "learning_rate": 8.662626262626264e-06, "loss": 179.2149, "step": 10720 }, { "epoch": 0.0887620465731894, "grad_norm": 1235.08056640625, "learning_rate": 8.670707070707072e-06, "loss": 140.4072, "step": 10730 }, { "epoch": 0.08884476982255864, "grad_norm": 1513.452880859375, "learning_rate": 8.67878787878788e-06, "loss": 188.1772, "step": 10740 }, { "epoch": 0.08892749307192786, "grad_norm": 576.0892333984375, "learning_rate": 8.686868686868687e-06, "loss": 149.6942, "step": 10750 }, { "epoch": 0.0890102163212971, "grad_norm": 1103.5166015625, "learning_rate": 8.694949494949496e-06, "loss": 165.9619, "step": 10760 }, { "epoch": 0.08909293957066633, "grad_norm": 964.4625244140625, "learning_rate": 8.703030303030304e-06, "loss": 138.5367, "step": 10770 }, { "epoch": 0.08917566282003557, "grad_norm": 1614.7874755859375, "learning_rate": 8.711111111111111e-06, "loss": 175.2976, "step": 10780 }, { "epoch": 0.08925838606940481, "grad_norm": 1648.361328125, "learning_rate": 8.71919191919192e-06, "loss": 187.3021, "step": 10790 }, { "epoch": 0.08934110931877404, "grad_norm": 1707.2655029296875, "learning_rate": 8.727272727272728e-06, "loss": 258.8253, "step": 10800 }, { "epoch": 0.08942383256814328, "grad_norm": 1160.01806640625, "learning_rate": 8.735353535353536e-06, "loss": 194.5743, "step": 10810 }, { "epoch": 0.08950655581751252, "grad_norm": 1146.39990234375, "learning_rate": 8.743434343434343e-06, "loss": 203.5324, "step": 10820 }, { "epoch": 0.08958927906688174, "grad_norm": 3626.928955078125, "learning_rate": 8.751515151515151e-06, "loss": 253.2095, "step": 10830 }, { "epoch": 0.08967200231625098, "grad_norm": 728.5478515625, "learning_rate": 8.75959595959596e-06, "loss": 145.7956, "step": 10840 }, { "epoch": 0.08975472556562021, "grad_norm": 1002.9689331054688, "learning_rate": 8.767676767676768e-06, "loss": 181.9694, "step": 10850 }, { "epoch": 0.08983744881498945, "grad_norm": 1103.3865966796875, "learning_rate": 8.775757575757577e-06, "loss": 177.4934, "step": 10860 }, { "epoch": 0.08992017206435869, "grad_norm": 1987.7679443359375, "learning_rate": 8.783838383838385e-06, "loss": 190.5272, "step": 10870 }, { "epoch": 0.09000289531372792, "grad_norm": 1918.5572509765625, "learning_rate": 8.791919191919192e-06, "loss": 176.4213, "step": 10880 }, { "epoch": 0.09008561856309716, "grad_norm": 979.5933837890625, "learning_rate": 8.8e-06, "loss": 208.7932, "step": 10890 }, { "epoch": 0.0901683418124664, "grad_norm": 1254.18603515625, "learning_rate": 8.808080808080809e-06, "loss": 179.7351, "step": 10900 }, { "epoch": 0.09025106506183563, "grad_norm": 1393.46044921875, "learning_rate": 8.816161616161617e-06, "loss": 239.5334, "step": 10910 }, { "epoch": 0.09033378831120487, "grad_norm": 932.0426025390625, "learning_rate": 8.824242424242426e-06, "loss": 159.2398, "step": 10920 }, { "epoch": 0.0904165115605741, "grad_norm": 1175.749755859375, "learning_rate": 8.832323232323233e-06, "loss": 196.0824, "step": 10930 }, { "epoch": 0.09049923480994333, "grad_norm": 669.9056396484375, "learning_rate": 8.840404040404041e-06, "loss": 195.3154, "step": 10940 }, { "epoch": 0.09058195805931257, "grad_norm": 2161.71826171875, "learning_rate": 8.84848484848485e-06, "loss": 201.2208, "step": 10950 }, { "epoch": 0.0906646813086818, "grad_norm": 1083.76318359375, "learning_rate": 8.856565656565658e-06, "loss": 181.3205, "step": 10960 }, { "epoch": 0.09074740455805104, "grad_norm": 1360.2738037109375, "learning_rate": 8.864646464646466e-06, "loss": 173.5026, "step": 10970 }, { "epoch": 0.09083012780742028, "grad_norm": 1274.3690185546875, "learning_rate": 8.872727272727275e-06, "loss": 191.3013, "step": 10980 }, { "epoch": 0.0909128510567895, "grad_norm": 1275.59814453125, "learning_rate": 8.880808080808082e-06, "loss": 180.379, "step": 10990 }, { "epoch": 0.09099557430615875, "grad_norm": 1908.28125, "learning_rate": 8.888888888888888e-06, "loss": 226.928, "step": 11000 }, { "epoch": 0.09107829755552799, "grad_norm": 1636.098388671875, "learning_rate": 8.896969696969697e-06, "loss": 166.5137, "step": 11010 }, { "epoch": 0.09116102080489721, "grad_norm": 1362.2508544921875, "learning_rate": 8.905050505050505e-06, "loss": 162.6263, "step": 11020 }, { "epoch": 0.09124374405426645, "grad_norm": 958.8033447265625, "learning_rate": 8.913131313131314e-06, "loss": 126.4212, "step": 11030 }, { "epoch": 0.09132646730363568, "grad_norm": 4022.8896484375, "learning_rate": 8.921212121212122e-06, "loss": 171.8223, "step": 11040 }, { "epoch": 0.09140919055300492, "grad_norm": 1437.0355224609375, "learning_rate": 8.92929292929293e-06, "loss": 150.3858, "step": 11050 }, { "epoch": 0.09149191380237416, "grad_norm": 1457.9029541015625, "learning_rate": 8.937373737373737e-06, "loss": 191.1608, "step": 11060 }, { "epoch": 0.09157463705174339, "grad_norm": 1079.6673583984375, "learning_rate": 8.945454545454546e-06, "loss": 169.316, "step": 11070 }, { "epoch": 0.09165736030111263, "grad_norm": 1028.742431640625, "learning_rate": 8.953535353535354e-06, "loss": 141.8136, "step": 11080 }, { "epoch": 0.09174008355048187, "grad_norm": 1539.916259765625, "learning_rate": 8.961616161616163e-06, "loss": 196.1172, "step": 11090 }, { "epoch": 0.0918228067998511, "grad_norm": 1310.006591796875, "learning_rate": 8.969696969696971e-06, "loss": 153.3139, "step": 11100 }, { "epoch": 0.09190553004922034, "grad_norm": 3632.826416015625, "learning_rate": 8.977777777777778e-06, "loss": 274.3921, "step": 11110 }, { "epoch": 0.09198825329858956, "grad_norm": 1360.5455322265625, "learning_rate": 8.985858585858586e-06, "loss": 199.4904, "step": 11120 }, { "epoch": 0.0920709765479588, "grad_norm": 1219.487548828125, "learning_rate": 8.993939393939395e-06, "loss": 138.8559, "step": 11130 }, { "epoch": 0.09215369979732804, "grad_norm": 1320.4920654296875, "learning_rate": 9.002020202020203e-06, "loss": 179.0958, "step": 11140 }, { "epoch": 0.09223642304669727, "grad_norm": 754.0531616210938, "learning_rate": 9.010101010101012e-06, "loss": 183.8919, "step": 11150 }, { "epoch": 0.09231914629606651, "grad_norm": 1132.8018798828125, "learning_rate": 9.01818181818182e-06, "loss": 138.0276, "step": 11160 }, { "epoch": 0.09240186954543575, "grad_norm": 1015.0302124023438, "learning_rate": 9.026262626262627e-06, "loss": 169.833, "step": 11170 }, { "epoch": 0.09248459279480498, "grad_norm": 1256.841552734375, "learning_rate": 9.034343434343435e-06, "loss": 192.5117, "step": 11180 }, { "epoch": 0.09256731604417422, "grad_norm": 637.6029663085938, "learning_rate": 9.042424242424244e-06, "loss": 136.4471, "step": 11190 }, { "epoch": 0.09265003929354346, "grad_norm": 2063.305419921875, "learning_rate": 9.050505050505052e-06, "loss": 187.3035, "step": 11200 }, { "epoch": 0.09273276254291268, "grad_norm": 893.4026489257812, "learning_rate": 9.058585858585859e-06, "loss": 197.6645, "step": 11210 }, { "epoch": 0.09281548579228192, "grad_norm": 11727.92578125, "learning_rate": 9.066666666666667e-06, "loss": 184.4137, "step": 11220 }, { "epoch": 0.09289820904165115, "grad_norm": 939.4194946289062, "learning_rate": 9.074747474747476e-06, "loss": 163.7697, "step": 11230 }, { "epoch": 0.09298093229102039, "grad_norm": 1211.7532958984375, "learning_rate": 9.082828282828283e-06, "loss": 150.1024, "step": 11240 }, { "epoch": 0.09306365554038963, "grad_norm": 1020.1021118164062, "learning_rate": 9.090909090909091e-06, "loss": 176.2725, "step": 11250 }, { "epoch": 0.09314637878975886, "grad_norm": 1597.582763671875, "learning_rate": 9.0989898989899e-06, "loss": 161.5863, "step": 11260 }, { "epoch": 0.0932291020391281, "grad_norm": 1205.4434814453125, "learning_rate": 9.107070707070708e-06, "loss": 191.3436, "step": 11270 }, { "epoch": 0.09331182528849734, "grad_norm": 1347.427001953125, "learning_rate": 9.115151515151516e-06, "loss": 147.056, "step": 11280 }, { "epoch": 0.09339454853786656, "grad_norm": 1340.0323486328125, "learning_rate": 9.123232323232323e-06, "loss": 166.0659, "step": 11290 }, { "epoch": 0.0934772717872358, "grad_norm": 1243.469482421875, "learning_rate": 9.131313131313132e-06, "loss": 160.2771, "step": 11300 }, { "epoch": 0.09355999503660503, "grad_norm": 1073.79638671875, "learning_rate": 9.13939393939394e-06, "loss": 189.4957, "step": 11310 }, { "epoch": 0.09364271828597427, "grad_norm": 1185.810546875, "learning_rate": 9.147474747474748e-06, "loss": 169.7764, "step": 11320 }, { "epoch": 0.09372544153534351, "grad_norm": 1002.9879760742188, "learning_rate": 9.155555555555557e-06, "loss": 117.5662, "step": 11330 }, { "epoch": 0.09380816478471274, "grad_norm": 1063.5313720703125, "learning_rate": 9.163636363636365e-06, "loss": 199.0167, "step": 11340 }, { "epoch": 0.09389088803408198, "grad_norm": 975.7847290039062, "learning_rate": 9.171717171717172e-06, "loss": 178.3596, "step": 11350 }, { "epoch": 0.09397361128345122, "grad_norm": 1078.6475830078125, "learning_rate": 9.17979797979798e-06, "loss": 167.8156, "step": 11360 }, { "epoch": 0.09405633453282045, "grad_norm": 2150.571044921875, "learning_rate": 9.187878787878789e-06, "loss": 151.6369, "step": 11370 }, { "epoch": 0.09413905778218969, "grad_norm": 1107.6488037109375, "learning_rate": 9.195959595959597e-06, "loss": 138.8753, "step": 11380 }, { "epoch": 0.09422178103155891, "grad_norm": 2013.7427978515625, "learning_rate": 9.204040404040406e-06, "loss": 168.8607, "step": 11390 }, { "epoch": 0.09430450428092815, "grad_norm": 926.5513916015625, "learning_rate": 9.212121212121213e-06, "loss": 155.2859, "step": 11400 }, { "epoch": 0.0943872275302974, "grad_norm": 640.766357421875, "learning_rate": 9.220202020202021e-06, "loss": 170.4931, "step": 11410 }, { "epoch": 0.09446995077966662, "grad_norm": 2521.883544921875, "learning_rate": 9.228282828282828e-06, "loss": 163.8456, "step": 11420 }, { "epoch": 0.09455267402903586, "grad_norm": 2574.5419921875, "learning_rate": 9.236363636363636e-06, "loss": 192.8077, "step": 11430 }, { "epoch": 0.0946353972784051, "grad_norm": 1514.116943359375, "learning_rate": 9.244444444444445e-06, "loss": 190.4653, "step": 11440 }, { "epoch": 0.09471812052777433, "grad_norm": 1116.5655517578125, "learning_rate": 9.252525252525253e-06, "loss": 150.5008, "step": 11450 }, { "epoch": 0.09480084377714357, "grad_norm": 7808.6015625, "learning_rate": 9.260606060606062e-06, "loss": 282.1488, "step": 11460 }, { "epoch": 0.09488356702651281, "grad_norm": 1106.9825439453125, "learning_rate": 9.268686868686868e-06, "loss": 195.7642, "step": 11470 }, { "epoch": 0.09496629027588203, "grad_norm": 3581.72900390625, "learning_rate": 9.276767676767677e-06, "loss": 179.7523, "step": 11480 }, { "epoch": 0.09504901352525127, "grad_norm": 1101.411865234375, "learning_rate": 9.284848484848485e-06, "loss": 199.7394, "step": 11490 }, { "epoch": 0.0951317367746205, "grad_norm": 1929.9052734375, "learning_rate": 9.292929292929294e-06, "loss": 174.8564, "step": 11500 }, { "epoch": 0.09521446002398974, "grad_norm": 1121.5028076171875, "learning_rate": 9.301010101010102e-06, "loss": 134.3296, "step": 11510 }, { "epoch": 0.09529718327335898, "grad_norm": 1080.38671875, "learning_rate": 9.30909090909091e-06, "loss": 173.9918, "step": 11520 }, { "epoch": 0.09537990652272821, "grad_norm": 1371.1961669921875, "learning_rate": 9.317171717171717e-06, "loss": 213.5175, "step": 11530 }, { "epoch": 0.09546262977209745, "grad_norm": 1480.9495849609375, "learning_rate": 9.325252525252526e-06, "loss": 136.4908, "step": 11540 }, { "epoch": 0.09554535302146669, "grad_norm": 912.1797485351562, "learning_rate": 9.333333333333334e-06, "loss": 215.026, "step": 11550 }, { "epoch": 0.09562807627083592, "grad_norm": 2468.00341796875, "learning_rate": 9.341414141414143e-06, "loss": 175.5986, "step": 11560 }, { "epoch": 0.09571079952020516, "grad_norm": 1296.7786865234375, "learning_rate": 9.349494949494951e-06, "loss": 209.1408, "step": 11570 }, { "epoch": 0.09579352276957438, "grad_norm": 1264.5654296875, "learning_rate": 9.357575757575758e-06, "loss": 200.8274, "step": 11580 }, { "epoch": 0.09587624601894362, "grad_norm": 1155.8309326171875, "learning_rate": 9.365656565656566e-06, "loss": 164.7913, "step": 11590 }, { "epoch": 0.09595896926831286, "grad_norm": 1020.3081665039062, "learning_rate": 9.373737373737375e-06, "loss": 155.3867, "step": 11600 }, { "epoch": 0.09604169251768209, "grad_norm": 1648.6768798828125, "learning_rate": 9.381818181818183e-06, "loss": 150.0429, "step": 11610 }, { "epoch": 0.09612441576705133, "grad_norm": 1153.5174560546875, "learning_rate": 9.389898989898992e-06, "loss": 183.8605, "step": 11620 }, { "epoch": 0.09620713901642057, "grad_norm": 1894.8492431640625, "learning_rate": 9.397979797979799e-06, "loss": 178.0279, "step": 11630 }, { "epoch": 0.0962898622657898, "grad_norm": 1127.750244140625, "learning_rate": 9.406060606060607e-06, "loss": 162.6299, "step": 11640 }, { "epoch": 0.09637258551515904, "grad_norm": 1268.74267578125, "learning_rate": 9.414141414141414e-06, "loss": 155.4865, "step": 11650 }, { "epoch": 0.09645530876452828, "grad_norm": 1007.6961669921875, "learning_rate": 9.422222222222222e-06, "loss": 209.233, "step": 11660 }, { "epoch": 0.0965380320138975, "grad_norm": 1457.4285888671875, "learning_rate": 9.43030303030303e-06, "loss": 163.3144, "step": 11670 }, { "epoch": 0.09662075526326674, "grad_norm": 1042.603515625, "learning_rate": 9.438383838383839e-06, "loss": 182.3341, "step": 11680 }, { "epoch": 0.09670347851263597, "grad_norm": 1047.1702880859375, "learning_rate": 9.446464646464648e-06, "loss": 167.2329, "step": 11690 }, { "epoch": 0.09678620176200521, "grad_norm": 1840.010009765625, "learning_rate": 9.454545454545456e-06, "loss": 204.4755, "step": 11700 }, { "epoch": 0.09686892501137445, "grad_norm": 573.9185180664062, "learning_rate": 9.462626262626263e-06, "loss": 106.0682, "step": 11710 }, { "epoch": 0.09695164826074368, "grad_norm": 1605.5093994140625, "learning_rate": 9.470707070707071e-06, "loss": 214.7734, "step": 11720 }, { "epoch": 0.09703437151011292, "grad_norm": 2460.1376953125, "learning_rate": 9.47878787878788e-06, "loss": 180.7382, "step": 11730 }, { "epoch": 0.09711709475948216, "grad_norm": 1100.57080078125, "learning_rate": 9.486868686868688e-06, "loss": 220.1118, "step": 11740 }, { "epoch": 0.09719981800885139, "grad_norm": 937.7584838867188, "learning_rate": 9.494949494949497e-06, "loss": 176.8332, "step": 11750 }, { "epoch": 0.09728254125822063, "grad_norm": 1534.452880859375, "learning_rate": 9.503030303030303e-06, "loss": 173.9935, "step": 11760 }, { "epoch": 0.09736526450758985, "grad_norm": 1466.6376953125, "learning_rate": 9.511111111111112e-06, "loss": 196.6087, "step": 11770 }, { "epoch": 0.09744798775695909, "grad_norm": 1731.87841796875, "learning_rate": 9.51919191919192e-06, "loss": 179.0696, "step": 11780 }, { "epoch": 0.09753071100632833, "grad_norm": 1235.0560302734375, "learning_rate": 9.527272727272729e-06, "loss": 164.9756, "step": 11790 }, { "epoch": 0.09761343425569756, "grad_norm": 662.07568359375, "learning_rate": 9.535353535353537e-06, "loss": 155.2859, "step": 11800 }, { "epoch": 0.0976961575050668, "grad_norm": 642.6212158203125, "learning_rate": 9.543434343434344e-06, "loss": 183.2646, "step": 11810 }, { "epoch": 0.09777888075443604, "grad_norm": 861.9931030273438, "learning_rate": 9.551515151515152e-06, "loss": 140.6058, "step": 11820 }, { "epoch": 0.09786160400380527, "grad_norm": 2064.848388671875, "learning_rate": 9.55959595959596e-06, "loss": 175.8456, "step": 11830 }, { "epoch": 0.0979443272531745, "grad_norm": 1402.7391357421875, "learning_rate": 9.56767676767677e-06, "loss": 168.1851, "step": 11840 }, { "epoch": 0.09802705050254373, "grad_norm": 1274.8916015625, "learning_rate": 9.575757575757576e-06, "loss": 177.3613, "step": 11850 }, { "epoch": 0.09810977375191297, "grad_norm": 1175.4793701171875, "learning_rate": 9.583838383838384e-06, "loss": 155.9946, "step": 11860 }, { "epoch": 0.09819249700128221, "grad_norm": 1113.3656005859375, "learning_rate": 9.591919191919193e-06, "loss": 174.7684, "step": 11870 }, { "epoch": 0.09827522025065144, "grad_norm": 1227.9544677734375, "learning_rate": 9.600000000000001e-06, "loss": 173.7199, "step": 11880 }, { "epoch": 0.09835794350002068, "grad_norm": 1131.5029296875, "learning_rate": 9.608080808080808e-06, "loss": 179.6841, "step": 11890 }, { "epoch": 0.09844066674938992, "grad_norm": 1879.9422607421875, "learning_rate": 9.616161616161616e-06, "loss": 201.8642, "step": 11900 }, { "epoch": 0.09852338999875915, "grad_norm": 1383.9271240234375, "learning_rate": 9.624242424242425e-06, "loss": 155.471, "step": 11910 }, { "epoch": 0.09860611324812839, "grad_norm": 1202.215576171875, "learning_rate": 9.632323232323233e-06, "loss": 176.2691, "step": 11920 }, { "epoch": 0.09868883649749763, "grad_norm": 713.47216796875, "learning_rate": 9.640404040404042e-06, "loss": 129.0435, "step": 11930 }, { "epoch": 0.09877155974686685, "grad_norm": 1676.49658203125, "learning_rate": 9.648484848484849e-06, "loss": 158.0141, "step": 11940 }, { "epoch": 0.0988542829962361, "grad_norm": 822.8709716796875, "learning_rate": 9.656565656565657e-06, "loss": 128.7683, "step": 11950 }, { "epoch": 0.09893700624560532, "grad_norm": 2505.596923828125, "learning_rate": 9.664646464646465e-06, "loss": 173.4101, "step": 11960 }, { "epoch": 0.09901972949497456, "grad_norm": 764.0625610351562, "learning_rate": 9.672727272727274e-06, "loss": 186.7536, "step": 11970 }, { "epoch": 0.0991024527443438, "grad_norm": 1434.5181884765625, "learning_rate": 9.680808080808082e-06, "loss": 195.2917, "step": 11980 }, { "epoch": 0.09918517599371303, "grad_norm": 916.0157470703125, "learning_rate": 9.688888888888889e-06, "loss": 151.0252, "step": 11990 }, { "epoch": 0.09926789924308227, "grad_norm": 1034.74072265625, "learning_rate": 9.696969696969698e-06, "loss": 163.2282, "step": 12000 }, { "epoch": 0.09935062249245151, "grad_norm": 1380.8863525390625, "learning_rate": 9.705050505050506e-06, "loss": 139.9053, "step": 12010 }, { "epoch": 0.09943334574182074, "grad_norm": 911.8162231445312, "learning_rate": 9.713131313131314e-06, "loss": 159.4434, "step": 12020 }, { "epoch": 0.09951606899118998, "grad_norm": 1710.5338134765625, "learning_rate": 9.721212121212123e-06, "loss": 152.8905, "step": 12030 }, { "epoch": 0.0995987922405592, "grad_norm": 1827.9671630859375, "learning_rate": 9.729292929292931e-06, "loss": 195.7561, "step": 12040 }, { "epoch": 0.09968151548992844, "grad_norm": 1594.53759765625, "learning_rate": 9.737373737373738e-06, "loss": 172.4959, "step": 12050 }, { "epoch": 0.09976423873929768, "grad_norm": 985.9189453125, "learning_rate": 9.745454545454547e-06, "loss": 167.3074, "step": 12060 }, { "epoch": 0.09984696198866691, "grad_norm": 1075.2579345703125, "learning_rate": 9.753535353535353e-06, "loss": 163.0599, "step": 12070 }, { "epoch": 0.09992968523803615, "grad_norm": 2030.679443359375, "learning_rate": 9.761616161616162e-06, "loss": 163.872, "step": 12080 }, { "epoch": 0.10001240848740539, "grad_norm": 1216.58984375, "learning_rate": 9.76969696969697e-06, "loss": 158.4474, "step": 12090 }, { "epoch": 0.10009513173677462, "grad_norm": 1842.97998046875, "learning_rate": 9.777777777777779e-06, "loss": 225.2362, "step": 12100 }, { "epoch": 0.10017785498614386, "grad_norm": 1359.1461181640625, "learning_rate": 9.785858585858587e-06, "loss": 172.9057, "step": 12110 }, { "epoch": 0.10026057823551308, "grad_norm": 1055.8148193359375, "learning_rate": 9.793939393939394e-06, "loss": 153.7417, "step": 12120 }, { "epoch": 0.10034330148488232, "grad_norm": 1045.6163330078125, "learning_rate": 9.802020202020202e-06, "loss": 151.8674, "step": 12130 }, { "epoch": 0.10042602473425156, "grad_norm": 702.3483276367188, "learning_rate": 9.81010101010101e-06, "loss": 154.9376, "step": 12140 }, { "epoch": 0.10050874798362079, "grad_norm": 582.2290649414062, "learning_rate": 9.81818181818182e-06, "loss": 191.1368, "step": 12150 }, { "epoch": 0.10059147123299003, "grad_norm": 1067.549072265625, "learning_rate": 9.826262626262628e-06, "loss": 215.4936, "step": 12160 }, { "epoch": 0.10067419448235927, "grad_norm": 1477.08251953125, "learning_rate": 9.834343434343434e-06, "loss": 179.9154, "step": 12170 }, { "epoch": 0.1007569177317285, "grad_norm": 1615.7445068359375, "learning_rate": 9.842424242424243e-06, "loss": 151.6433, "step": 12180 }, { "epoch": 0.10083964098109774, "grad_norm": 1283.7108154296875, "learning_rate": 9.850505050505051e-06, "loss": 176.6817, "step": 12190 }, { "epoch": 0.10092236423046698, "grad_norm": 1741.5172119140625, "learning_rate": 9.85858585858586e-06, "loss": 148.4978, "step": 12200 }, { "epoch": 0.1010050874798362, "grad_norm": 956.2932739257812, "learning_rate": 9.866666666666668e-06, "loss": 178.0466, "step": 12210 }, { "epoch": 0.10108781072920545, "grad_norm": 771.7984008789062, "learning_rate": 9.874747474747477e-06, "loss": 177.3261, "step": 12220 }, { "epoch": 0.10117053397857467, "grad_norm": 703.4427490234375, "learning_rate": 9.882828282828283e-06, "loss": 175.3549, "step": 12230 }, { "epoch": 0.10125325722794391, "grad_norm": 931.7167358398438, "learning_rate": 9.890909090909092e-06, "loss": 158.6873, "step": 12240 }, { "epoch": 0.10133598047731315, "grad_norm": 1080.5570068359375, "learning_rate": 9.8989898989899e-06, "loss": 109.4355, "step": 12250 }, { "epoch": 0.10141870372668238, "grad_norm": 1163.09423828125, "learning_rate": 9.907070707070709e-06, "loss": 164.6523, "step": 12260 }, { "epoch": 0.10150142697605162, "grad_norm": 870.4569091796875, "learning_rate": 9.915151515151515e-06, "loss": 162.8174, "step": 12270 }, { "epoch": 0.10158415022542086, "grad_norm": 976.0827026367188, "learning_rate": 9.923232323232324e-06, "loss": 149.6927, "step": 12280 }, { "epoch": 0.10166687347479009, "grad_norm": 881.8626098632812, "learning_rate": 9.931313131313132e-06, "loss": 128.7362, "step": 12290 }, { "epoch": 0.10174959672415933, "grad_norm": 612.5805053710938, "learning_rate": 9.939393939393939e-06, "loss": 129.0437, "step": 12300 }, { "epoch": 0.10183231997352855, "grad_norm": 1372.064453125, "learning_rate": 9.947474747474748e-06, "loss": 188.9469, "step": 12310 }, { "epoch": 0.1019150432228978, "grad_norm": 1295.59375, "learning_rate": 9.955555555555556e-06, "loss": 167.0848, "step": 12320 }, { "epoch": 0.10199776647226703, "grad_norm": 1037.228515625, "learning_rate": 9.963636363636364e-06, "loss": 176.8787, "step": 12330 }, { "epoch": 0.10208048972163626, "grad_norm": 1227.047607421875, "learning_rate": 9.971717171717173e-06, "loss": 198.2504, "step": 12340 }, { "epoch": 0.1021632129710055, "grad_norm": 1416.220458984375, "learning_rate": 9.97979797979798e-06, "loss": 179.9958, "step": 12350 }, { "epoch": 0.10224593622037474, "grad_norm": 1424.654052734375, "learning_rate": 9.987878787878788e-06, "loss": 191.8388, "step": 12360 }, { "epoch": 0.10232865946974397, "grad_norm": 1317.3236083984375, "learning_rate": 9.995959595959597e-06, "loss": 167.6917, "step": 12370 }, { "epoch": 0.10241138271911321, "grad_norm": 868.8121337890625, "learning_rate": 9.99999995027162e-06, "loss": 156.1878, "step": 12380 }, { "epoch": 0.10249410596848244, "grad_norm": 889.690185546875, "learning_rate": 9.99999955244457e-06, "loss": 188.0309, "step": 12390 }, { "epoch": 0.10257682921785168, "grad_norm": 964.9615478515625, "learning_rate": 9.999998756790503e-06, "loss": 162.0736, "step": 12400 }, { "epoch": 0.10265955246722092, "grad_norm": 2292.847900390625, "learning_rate": 9.999997563309483e-06, "loss": 138.4502, "step": 12410 }, { "epoch": 0.10274227571659014, "grad_norm": 1073.1864013671875, "learning_rate": 9.999995972001602e-06, "loss": 183.7101, "step": 12420 }, { "epoch": 0.10282499896595938, "grad_norm": 1411.09521484375, "learning_rate": 9.99999398286699e-06, "loss": 172.5208, "step": 12430 }, { "epoch": 0.10290772221532862, "grad_norm": 1048.4619140625, "learning_rate": 9.999991595905803e-06, "loss": 169.999, "step": 12440 }, { "epoch": 0.10299044546469785, "grad_norm": 986.3778686523438, "learning_rate": 9.999988811118232e-06, "loss": 146.7089, "step": 12450 }, { "epoch": 0.10307316871406709, "grad_norm": 795.2116088867188, "learning_rate": 9.999985628504498e-06, "loss": 159.4188, "step": 12460 }, { "epoch": 0.10315589196343633, "grad_norm": 919.0172119140625, "learning_rate": 9.999982048064854e-06, "loss": 144.6598, "step": 12470 }, { "epoch": 0.10323861521280556, "grad_norm": 1297.25732421875, "learning_rate": 9.999978069799585e-06, "loss": 171.9091, "step": 12480 }, { "epoch": 0.1033213384621748, "grad_norm": 1352.2923583984375, "learning_rate": 9.999973693709008e-06, "loss": 151.0232, "step": 12490 }, { "epoch": 0.10340406171154402, "grad_norm": 1723.31640625, "learning_rate": 9.99996891979347e-06, "loss": 131.0583, "step": 12500 }, { "epoch": 0.10348678496091326, "grad_norm": 1077.4090576171875, "learning_rate": 9.999963748053354e-06, "loss": 155.0065, "step": 12510 }, { "epoch": 0.1035695082102825, "grad_norm": 881.5154418945312, "learning_rate": 9.999958178489069e-06, "loss": 150.2538, "step": 12520 }, { "epoch": 0.10365223145965173, "grad_norm": 1021.1143188476562, "learning_rate": 9.999952211101056e-06, "loss": 154.2661, "step": 12530 }, { "epoch": 0.10373495470902097, "grad_norm": 1220.9776611328125, "learning_rate": 9.999945845889795e-06, "loss": 154.2683, "step": 12540 }, { "epoch": 0.10381767795839021, "grad_norm": 1148.5760498046875, "learning_rate": 9.999939082855788e-06, "loss": 137.0497, "step": 12550 }, { "epoch": 0.10390040120775944, "grad_norm": 1300.0975341796875, "learning_rate": 9.999931921999575e-06, "loss": 169.4536, "step": 12560 }, { "epoch": 0.10398312445712868, "grad_norm": 1183.8826904296875, "learning_rate": 9.999924363321726e-06, "loss": 166.6497, "step": 12570 }, { "epoch": 0.1040658477064979, "grad_norm": 718.7395629882812, "learning_rate": 9.999916406822843e-06, "loss": 109.411, "step": 12580 }, { "epoch": 0.10414857095586715, "grad_norm": 841.6725463867188, "learning_rate": 9.999908052503557e-06, "loss": 146.8344, "step": 12590 }, { "epoch": 0.10423129420523639, "grad_norm": 918.6251831054688, "learning_rate": 9.999899300364534e-06, "loss": 162.9566, "step": 12600 }, { "epoch": 0.10431401745460561, "grad_norm": 1467.325927734375, "learning_rate": 9.99989015040647e-06, "loss": 189.8623, "step": 12610 }, { "epoch": 0.10439674070397485, "grad_norm": 2947.025146484375, "learning_rate": 9.999880602630092e-06, "loss": 141.2983, "step": 12620 }, { "epoch": 0.10447946395334409, "grad_norm": 1483.4896240234375, "learning_rate": 9.999870657036161e-06, "loss": 162.2035, "step": 12630 }, { "epoch": 0.10456218720271332, "grad_norm": 1846.94580078125, "learning_rate": 9.99986031362547e-06, "loss": 150.0733, "step": 12640 }, { "epoch": 0.10464491045208256, "grad_norm": 1538.964111328125, "learning_rate": 9.99984957239884e-06, "loss": 206.5021, "step": 12650 }, { "epoch": 0.1047276337014518, "grad_norm": 1100.305908203125, "learning_rate": 9.999838433357124e-06, "loss": 197.57, "step": 12660 }, { "epoch": 0.10481035695082103, "grad_norm": 594.6061401367188, "learning_rate": 9.99982689650121e-06, "loss": 179.5414, "step": 12670 }, { "epoch": 0.10489308020019027, "grad_norm": 737.6015014648438, "learning_rate": 9.999814961832018e-06, "loss": 170.4644, "step": 12680 }, { "epoch": 0.1049758034495595, "grad_norm": 1346.4503173828125, "learning_rate": 9.999802629350492e-06, "loss": 202.0369, "step": 12690 }, { "epoch": 0.10505852669892873, "grad_norm": 2559.85546875, "learning_rate": 9.99978989905762e-06, "loss": 221.5045, "step": 12700 }, { "epoch": 0.10514124994829797, "grad_norm": 1314.4476318359375, "learning_rate": 9.999776770954411e-06, "loss": 173.8742, "step": 12710 }, { "epoch": 0.1052239731976672, "grad_norm": 3551.19677734375, "learning_rate": 9.99976324504191e-06, "loss": 149.5013, "step": 12720 }, { "epoch": 0.10530669644703644, "grad_norm": 1428.34814453125, "learning_rate": 9.999749321321192e-06, "loss": 251.0179, "step": 12730 }, { "epoch": 0.10538941969640568, "grad_norm": 753.5355224609375, "learning_rate": 9.999734999793369e-06, "loss": 163.6853, "step": 12740 }, { "epoch": 0.10547214294577491, "grad_norm": 1578.79541015625, "learning_rate": 9.999720280459576e-06, "loss": 154.6416, "step": 12750 }, { "epoch": 0.10555486619514415, "grad_norm": 1111.734130859375, "learning_rate": 9.999705163320987e-06, "loss": 177.3941, "step": 12760 }, { "epoch": 0.10563758944451337, "grad_norm": 1722.9654541015625, "learning_rate": 9.999689648378801e-06, "loss": 179.8888, "step": 12770 }, { "epoch": 0.10572031269388261, "grad_norm": 1038.706298828125, "learning_rate": 9.999673735634259e-06, "loss": 127.1906, "step": 12780 }, { "epoch": 0.10580303594325186, "grad_norm": 11962.8779296875, "learning_rate": 9.99965742508862e-06, "loss": 213.939, "step": 12790 }, { "epoch": 0.10588575919262108, "grad_norm": 1482.868408203125, "learning_rate": 9.999640716743186e-06, "loss": 133.2547, "step": 12800 }, { "epoch": 0.10596848244199032, "grad_norm": 3282.25390625, "learning_rate": 9.999623610599287e-06, "loss": 160.013, "step": 12810 }, { "epoch": 0.10605120569135956, "grad_norm": 1836.9112548828125, "learning_rate": 9.999606106658282e-06, "loss": 201.4363, "step": 12820 }, { "epoch": 0.10613392894072879, "grad_norm": 816.5084838867188, "learning_rate": 9.999588204921562e-06, "loss": 174.8686, "step": 12830 }, { "epoch": 0.10621665219009803, "grad_norm": 1352.0396728515625, "learning_rate": 9.999569905390556e-06, "loss": 167.4276, "step": 12840 }, { "epoch": 0.10629937543946726, "grad_norm": 533.4453125, "learning_rate": 9.999551208066716e-06, "loss": 100.8425, "step": 12850 }, { "epoch": 0.1063820986888365, "grad_norm": 1191.3555908203125, "learning_rate": 9.99953211295153e-06, "loss": 147.7438, "step": 12860 }, { "epoch": 0.10646482193820574, "grad_norm": 1642.895263671875, "learning_rate": 9.999512620046523e-06, "loss": 170.0533, "step": 12870 }, { "epoch": 0.10654754518757496, "grad_norm": 1362.4990234375, "learning_rate": 9.999492729353238e-06, "loss": 177.1871, "step": 12880 }, { "epoch": 0.1066302684369442, "grad_norm": 879.8888549804688, "learning_rate": 9.999472440873261e-06, "loss": 160.7778, "step": 12890 }, { "epoch": 0.10671299168631344, "grad_norm": 1243.377685546875, "learning_rate": 9.999451754608208e-06, "loss": 150.4305, "step": 12900 }, { "epoch": 0.10679571493568267, "grad_norm": 984.1234130859375, "learning_rate": 9.999430670559723e-06, "loss": 139.6723, "step": 12910 }, { "epoch": 0.10687843818505191, "grad_norm": 852.8709716796875, "learning_rate": 9.999409188729484e-06, "loss": 134.8206, "step": 12920 }, { "epoch": 0.10696116143442115, "grad_norm": 886.4750366210938, "learning_rate": 9.999387309119198e-06, "loss": 150.4601, "step": 12930 }, { "epoch": 0.10704388468379038, "grad_norm": 1050.5213623046875, "learning_rate": 9.999365031730609e-06, "loss": 162.7591, "step": 12940 }, { "epoch": 0.10712660793315962, "grad_norm": 1679.367919921875, "learning_rate": 9.99934235656549e-06, "loss": 146.1199, "step": 12950 }, { "epoch": 0.10720933118252884, "grad_norm": 1546.12939453125, "learning_rate": 9.999319283625641e-06, "loss": 207.9114, "step": 12960 }, { "epoch": 0.10729205443189808, "grad_norm": 856.313720703125, "learning_rate": 9.999295812912902e-06, "loss": 168.0912, "step": 12970 }, { "epoch": 0.10737477768126732, "grad_norm": 1466.169677734375, "learning_rate": 9.999271944429139e-06, "loss": 202.6795, "step": 12980 }, { "epoch": 0.10745750093063655, "grad_norm": 1105.8492431640625, "learning_rate": 9.99924767817625e-06, "loss": 139.1426, "step": 12990 }, { "epoch": 0.10754022418000579, "grad_norm": 1072.4610595703125, "learning_rate": 9.999223014156167e-06, "loss": 196.2584, "step": 13000 }, { "epoch": 0.10762294742937503, "grad_norm": 820.6765747070312, "learning_rate": 9.999197952370851e-06, "loss": 177.7073, "step": 13010 }, { "epoch": 0.10770567067874426, "grad_norm": 788.7813720703125, "learning_rate": 9.9991724928223e-06, "loss": 157.0522, "step": 13020 }, { "epoch": 0.1077883939281135, "grad_norm": 1179.322265625, "learning_rate": 9.999146635512535e-06, "loss": 189.2783, "step": 13030 }, { "epoch": 0.10787111717748273, "grad_norm": 1035.255615234375, "learning_rate": 9.999120380443614e-06, "loss": 145.2693, "step": 13040 }, { "epoch": 0.10795384042685197, "grad_norm": 1015.682861328125, "learning_rate": 9.99909372761763e-06, "loss": 143.6347, "step": 13050 }, { "epoch": 0.1080365636762212, "grad_norm": 2936.619873046875, "learning_rate": 9.9990666770367e-06, "loss": 179.5399, "step": 13060 }, { "epoch": 0.10811928692559043, "grad_norm": 1349.863037109375, "learning_rate": 9.999039228702975e-06, "loss": 176.5338, "step": 13070 }, { "epoch": 0.10820201017495967, "grad_norm": 1154.5147705078125, "learning_rate": 9.999011382618644e-06, "loss": 166.4444, "step": 13080 }, { "epoch": 0.10828473342432891, "grad_norm": 1495.1844482421875, "learning_rate": 9.998983138785919e-06, "loss": 149.649, "step": 13090 }, { "epoch": 0.10836745667369814, "grad_norm": 1207.58984375, "learning_rate": 9.998954497207045e-06, "loss": 149.596, "step": 13100 }, { "epoch": 0.10845017992306738, "grad_norm": 1018.6047973632812, "learning_rate": 9.998925457884307e-06, "loss": 154.7165, "step": 13110 }, { "epoch": 0.1085329031724366, "grad_norm": 1084.2874755859375, "learning_rate": 9.99889602082001e-06, "loss": 145.5709, "step": 13120 }, { "epoch": 0.10861562642180585, "grad_norm": 1162.242919921875, "learning_rate": 9.998866186016501e-06, "loss": 168.6215, "step": 13130 }, { "epoch": 0.10869834967117509, "grad_norm": 1217.2177734375, "learning_rate": 9.99883595347615e-06, "loss": 150.9769, "step": 13140 }, { "epoch": 0.10878107292054431, "grad_norm": 884.3536376953125, "learning_rate": 9.998805323201364e-06, "loss": 161.5837, "step": 13150 }, { "epoch": 0.10886379616991355, "grad_norm": 1069.2374267578125, "learning_rate": 9.998774295194579e-06, "loss": 167.3784, "step": 13160 }, { "epoch": 0.1089465194192828, "grad_norm": 749.6088256835938, "learning_rate": 9.998742869458264e-06, "loss": 115.7084, "step": 13170 }, { "epoch": 0.10902924266865202, "grad_norm": 703.9533081054688, "learning_rate": 9.998711045994922e-06, "loss": 162.0743, "step": 13180 }, { "epoch": 0.10911196591802126, "grad_norm": 1521.7666015625, "learning_rate": 9.998678824807082e-06, "loss": 189.686, "step": 13190 }, { "epoch": 0.1091946891673905, "grad_norm": 1742.9715576171875, "learning_rate": 9.99864620589731e-06, "loss": 163.9251, "step": 13200 }, { "epoch": 0.10927741241675973, "grad_norm": 999.13623046875, "learning_rate": 9.998613189268197e-06, "loss": 142.0809, "step": 13210 }, { "epoch": 0.10936013566612897, "grad_norm": 1409.1549072265625, "learning_rate": 9.998579774922377e-06, "loss": 149.1874, "step": 13220 }, { "epoch": 0.1094428589154982, "grad_norm": 675.3158569335938, "learning_rate": 9.998545962862503e-06, "loss": 117.2426, "step": 13230 }, { "epoch": 0.10952558216486744, "grad_norm": 1737.0997314453125, "learning_rate": 9.998511753091267e-06, "loss": 155.6872, "step": 13240 }, { "epoch": 0.10960830541423668, "grad_norm": 4492.49853515625, "learning_rate": 9.998477145611389e-06, "loss": 195.1054, "step": 13250 }, { "epoch": 0.1096910286636059, "grad_norm": 1431.2005615234375, "learning_rate": 9.998442140425625e-06, "loss": 189.3633, "step": 13260 }, { "epoch": 0.10977375191297514, "grad_norm": 1214.457763671875, "learning_rate": 9.998406737536761e-06, "loss": 167.9152, "step": 13270 }, { "epoch": 0.10985647516234438, "grad_norm": 553.1201171875, "learning_rate": 9.998370936947614e-06, "loss": 154.5592, "step": 13280 }, { "epoch": 0.10993919841171361, "grad_norm": 1713.546875, "learning_rate": 9.998334738661028e-06, "loss": 178.8647, "step": 13290 }, { "epoch": 0.11002192166108285, "grad_norm": 1338.5965576171875, "learning_rate": 9.998298142679888e-06, "loss": 209.809, "step": 13300 }, { "epoch": 0.11010464491045208, "grad_norm": 1447.3448486328125, "learning_rate": 9.998261149007104e-06, "loss": 151.2987, "step": 13310 }, { "epoch": 0.11018736815982132, "grad_norm": 1176.512939453125, "learning_rate": 9.998223757645618e-06, "loss": 154.0995, "step": 13320 }, { "epoch": 0.11027009140919056, "grad_norm": 1491.28466796875, "learning_rate": 9.998185968598407e-06, "loss": 172.6219, "step": 13330 }, { "epoch": 0.11035281465855978, "grad_norm": 872.3587036132812, "learning_rate": 9.998147781868477e-06, "loss": 136.3148, "step": 13340 }, { "epoch": 0.11043553790792902, "grad_norm": 1718.0472412109375, "learning_rate": 9.998109197458865e-06, "loss": 147.2434, "step": 13350 }, { "epoch": 0.11051826115729826, "grad_norm": 1724.40966796875, "learning_rate": 9.998070215372645e-06, "loss": 155.5677, "step": 13360 }, { "epoch": 0.11060098440666749, "grad_norm": 1238.861572265625, "learning_rate": 9.998030835612914e-06, "loss": 177.9599, "step": 13370 }, { "epoch": 0.11068370765603673, "grad_norm": 1163.747802734375, "learning_rate": 9.997991058182807e-06, "loss": 159.718, "step": 13380 }, { "epoch": 0.11076643090540596, "grad_norm": 1202.32763671875, "learning_rate": 9.997950883085492e-06, "loss": 161.9838, "step": 13390 }, { "epoch": 0.1108491541547752, "grad_norm": 1815.2415771484375, "learning_rate": 9.99791031032416e-06, "loss": 187.6479, "step": 13400 }, { "epoch": 0.11093187740414444, "grad_norm": 1177.880126953125, "learning_rate": 9.997869339902043e-06, "loss": 180.2671, "step": 13410 }, { "epoch": 0.11101460065351366, "grad_norm": 1133.1861572265625, "learning_rate": 9.9978279718224e-06, "loss": 161.116, "step": 13420 }, { "epoch": 0.1110973239028829, "grad_norm": 1010.6656494140625, "learning_rate": 9.99778620608852e-06, "loss": 178.742, "step": 13430 }, { "epoch": 0.11118004715225215, "grad_norm": 1107.10986328125, "learning_rate": 9.997744042703731e-06, "loss": 139.692, "step": 13440 }, { "epoch": 0.11126277040162137, "grad_norm": 1005.2382202148438, "learning_rate": 9.997701481671384e-06, "loss": 178.2121, "step": 13450 }, { "epoch": 0.11134549365099061, "grad_norm": 2284.540283203125, "learning_rate": 9.997658522994867e-06, "loss": 156.5188, "step": 13460 }, { "epoch": 0.11142821690035985, "grad_norm": 5658.24658203125, "learning_rate": 9.997615166677597e-06, "loss": 146.769, "step": 13470 }, { "epoch": 0.11151094014972908, "grad_norm": 457.560302734375, "learning_rate": 9.997571412723024e-06, "loss": 119.6845, "step": 13480 }, { "epoch": 0.11159366339909832, "grad_norm": 1155.69677734375, "learning_rate": 9.99752726113463e-06, "loss": 126.1926, "step": 13490 }, { "epoch": 0.11167638664846755, "grad_norm": 1057.7230224609375, "learning_rate": 9.997482711915926e-06, "loss": 145.3562, "step": 13500 }, { "epoch": 0.11175910989783679, "grad_norm": 1107.236328125, "learning_rate": 9.99743776507046e-06, "loss": 189.204, "step": 13510 }, { "epoch": 0.11184183314720603, "grad_norm": 1068.25634765625, "learning_rate": 9.997392420601804e-06, "loss": 135.3788, "step": 13520 }, { "epoch": 0.11192455639657525, "grad_norm": 875.785888671875, "learning_rate": 9.99734667851357e-06, "loss": 152.927, "step": 13530 }, { "epoch": 0.1120072796459445, "grad_norm": 1772.181396484375, "learning_rate": 9.997300538809394e-06, "loss": 222.4996, "step": 13540 }, { "epoch": 0.11209000289531373, "grad_norm": 1264.271240234375, "learning_rate": 9.99725400149295e-06, "loss": 145.0414, "step": 13550 }, { "epoch": 0.11217272614468296, "grad_norm": 2198.46630859375, "learning_rate": 9.997207066567939e-06, "loss": 194.5429, "step": 13560 }, { "epoch": 0.1122554493940522, "grad_norm": 1130.449951171875, "learning_rate": 9.997159734038096e-06, "loss": 166.257, "step": 13570 }, { "epoch": 0.11233817264342143, "grad_norm": 1828.01513671875, "learning_rate": 9.997112003907186e-06, "loss": 152.8911, "step": 13580 }, { "epoch": 0.11242089589279067, "grad_norm": 1196.669677734375, "learning_rate": 9.997063876179007e-06, "loss": 129.7313, "step": 13590 }, { "epoch": 0.11250361914215991, "grad_norm": 1254.476806640625, "learning_rate": 9.997015350857391e-06, "loss": 169.0213, "step": 13600 }, { "epoch": 0.11258634239152913, "grad_norm": 1430.6507568359375, "learning_rate": 9.996966427946195e-06, "loss": 150.1627, "step": 13610 }, { "epoch": 0.11266906564089837, "grad_norm": 906.8787231445312, "learning_rate": 9.996917107449313e-06, "loss": 174.3134, "step": 13620 }, { "epoch": 0.11275178889026762, "grad_norm": 812.057373046875, "learning_rate": 9.99686738937067e-06, "loss": 138.5191, "step": 13630 }, { "epoch": 0.11283451213963684, "grad_norm": 1879.1136474609375, "learning_rate": 9.996817273714222e-06, "loss": 174.5974, "step": 13640 }, { "epoch": 0.11291723538900608, "grad_norm": 945.3467407226562, "learning_rate": 9.996766760483955e-06, "loss": 161.973, "step": 13650 }, { "epoch": 0.11299995863837531, "grad_norm": 862.3170776367188, "learning_rate": 9.996715849683889e-06, "loss": 137.5633, "step": 13660 }, { "epoch": 0.11308268188774455, "grad_norm": 690.8417358398438, "learning_rate": 9.996664541318076e-06, "loss": 141.2179, "step": 13670 }, { "epoch": 0.11316540513711379, "grad_norm": 972.900634765625, "learning_rate": 9.996612835390596e-06, "loss": 115.8736, "step": 13680 }, { "epoch": 0.11324812838648302, "grad_norm": 1904.7318115234375, "learning_rate": 9.996560731905565e-06, "loss": 154.7887, "step": 13690 }, { "epoch": 0.11333085163585226, "grad_norm": 851.8038940429688, "learning_rate": 9.996508230867126e-06, "loss": 137.4024, "step": 13700 }, { "epoch": 0.1134135748852215, "grad_norm": 1190.90380859375, "learning_rate": 9.996455332279458e-06, "loss": 153.5202, "step": 13710 }, { "epoch": 0.11349629813459072, "grad_norm": 2241.255126953125, "learning_rate": 9.99640203614677e-06, "loss": 173.7966, "step": 13720 }, { "epoch": 0.11357902138395996, "grad_norm": 1084.945068359375, "learning_rate": 9.996348342473304e-06, "loss": 159.4762, "step": 13730 }, { "epoch": 0.1136617446333292, "grad_norm": 1753.5206298828125, "learning_rate": 9.99629425126333e-06, "loss": 218.3281, "step": 13740 }, { "epoch": 0.11374446788269843, "grad_norm": 802.15576171875, "learning_rate": 9.996239762521152e-06, "loss": 153.0149, "step": 13750 }, { "epoch": 0.11382719113206767, "grad_norm": 2257.760986328125, "learning_rate": 9.996184876251105e-06, "loss": 134.656, "step": 13760 }, { "epoch": 0.1139099143814369, "grad_norm": 1485.7083740234375, "learning_rate": 9.996129592457558e-06, "loss": 119.6472, "step": 13770 }, { "epoch": 0.11399263763080614, "grad_norm": 1323.489990234375, "learning_rate": 9.996073911144907e-06, "loss": 135.8627, "step": 13780 }, { "epoch": 0.11407536088017538, "grad_norm": 1439.948974609375, "learning_rate": 9.996017832317583e-06, "loss": 105.2017, "step": 13790 }, { "epoch": 0.1141580841295446, "grad_norm": 755.0234375, "learning_rate": 9.995961355980052e-06, "loss": 119.7319, "step": 13800 }, { "epoch": 0.11424080737891384, "grad_norm": 1265.96826171875, "learning_rate": 9.995904482136803e-06, "loss": 144.368, "step": 13810 }, { "epoch": 0.11432353062828308, "grad_norm": 1014.328857421875, "learning_rate": 9.99584721079236e-06, "loss": 180.5596, "step": 13820 }, { "epoch": 0.11440625387765231, "grad_norm": 1355.7919921875, "learning_rate": 9.995789541951287e-06, "loss": 169.1609, "step": 13830 }, { "epoch": 0.11448897712702155, "grad_norm": 1709.610107421875, "learning_rate": 9.995731475618163e-06, "loss": 152.0147, "step": 13840 }, { "epoch": 0.11457170037639078, "grad_norm": 1426.723388671875, "learning_rate": 9.995673011797615e-06, "loss": 142.7122, "step": 13850 }, { "epoch": 0.11465442362576002, "grad_norm": 3322.473876953125, "learning_rate": 9.995614150494293e-06, "loss": 192.3159, "step": 13860 }, { "epoch": 0.11473714687512926, "grad_norm": 852.6958618164062, "learning_rate": 9.995554891712879e-06, "loss": 221.4455, "step": 13870 }, { "epoch": 0.11481987012449849, "grad_norm": 679.8392944335938, "learning_rate": 9.995495235458087e-06, "loss": 205.1969, "step": 13880 }, { "epoch": 0.11490259337386773, "grad_norm": 1818.237548828125, "learning_rate": 9.99543518173467e-06, "loss": 157.8227, "step": 13890 }, { "epoch": 0.11498531662323697, "grad_norm": 911.2511596679688, "learning_rate": 9.995374730547397e-06, "loss": 213.6541, "step": 13900 }, { "epoch": 0.11506803987260619, "grad_norm": 1822.821044921875, "learning_rate": 9.995313881901085e-06, "loss": 198.7188, "step": 13910 }, { "epoch": 0.11515076312197543, "grad_norm": 653.8102416992188, "learning_rate": 9.995252635800572e-06, "loss": 127.7723, "step": 13920 }, { "epoch": 0.11523348637134467, "grad_norm": 1713.5816650390625, "learning_rate": 9.995190992250732e-06, "loss": 225.4239, "step": 13930 }, { "epoch": 0.1153162096207139, "grad_norm": 1139.3336181640625, "learning_rate": 9.995128951256469e-06, "loss": 140.1807, "step": 13940 }, { "epoch": 0.11539893287008314, "grad_norm": 1414.932861328125, "learning_rate": 9.99506651282272e-06, "loss": 161.69, "step": 13950 }, { "epoch": 0.11548165611945237, "grad_norm": 1176.623291015625, "learning_rate": 9.995003676954454e-06, "loss": 151.0156, "step": 13960 }, { "epoch": 0.11556437936882161, "grad_norm": 784.6156616210938, "learning_rate": 9.994940443656668e-06, "loss": 198.3028, "step": 13970 }, { "epoch": 0.11564710261819085, "grad_norm": 862.885986328125, "learning_rate": 9.994876812934395e-06, "loss": 153.6012, "step": 13980 }, { "epoch": 0.11572982586756007, "grad_norm": 2193.662109375, "learning_rate": 9.994812784792698e-06, "loss": 165.4299, "step": 13990 }, { "epoch": 0.11581254911692931, "grad_norm": 1005.8052978515625, "learning_rate": 9.99474835923667e-06, "loss": 117.067, "step": 14000 }, { "epoch": 0.11589527236629855, "grad_norm": 870.8054809570312, "learning_rate": 9.994683536271437e-06, "loss": 177.513, "step": 14010 }, { "epoch": 0.11597799561566778, "grad_norm": 1938.4117431640625, "learning_rate": 9.994618315902161e-06, "loss": 147.8295, "step": 14020 }, { "epoch": 0.11606071886503702, "grad_norm": 1145.3619384765625, "learning_rate": 9.994552698134023e-06, "loss": 126.2492, "step": 14030 }, { "epoch": 0.11614344211440625, "grad_norm": 704.4757080078125, "learning_rate": 9.994486682972253e-06, "loss": 183.3489, "step": 14040 }, { "epoch": 0.11622616536377549, "grad_norm": 799.8057250976562, "learning_rate": 9.994420270422096e-06, "loss": 155.8286, "step": 14050 }, { "epoch": 0.11630888861314473, "grad_norm": 2168.39794921875, "learning_rate": 9.994353460488842e-06, "loss": 165.6206, "step": 14060 }, { "epoch": 0.11639161186251396, "grad_norm": 1048.3438720703125, "learning_rate": 9.994286253177803e-06, "loss": 196.4472, "step": 14070 }, { "epoch": 0.1164743351118832, "grad_norm": 1240.358642578125, "learning_rate": 9.994218648494327e-06, "loss": 169.1644, "step": 14080 }, { "epoch": 0.11655705836125244, "grad_norm": 1450.99169921875, "learning_rate": 9.994150646443793e-06, "loss": 119.286, "step": 14090 }, { "epoch": 0.11663978161062166, "grad_norm": 1026.7149658203125, "learning_rate": 9.994082247031613e-06, "loss": 166.7578, "step": 14100 }, { "epoch": 0.1167225048599909, "grad_norm": 1081.6656494140625, "learning_rate": 9.99401345026323e-06, "loss": 141.0757, "step": 14110 }, { "epoch": 0.11680522810936013, "grad_norm": 746.9979248046875, "learning_rate": 9.993944256144115e-06, "loss": 124.9759, "step": 14120 }, { "epoch": 0.11688795135872937, "grad_norm": 891.9210815429688, "learning_rate": 9.993874664679774e-06, "loss": 150.3685, "step": 14130 }, { "epoch": 0.11697067460809861, "grad_norm": 1401.4002685546875, "learning_rate": 9.993804675875744e-06, "loss": 168.8493, "step": 14140 }, { "epoch": 0.11705339785746784, "grad_norm": 1588.7640380859375, "learning_rate": 9.993734289737596e-06, "loss": 141.4464, "step": 14150 }, { "epoch": 0.11713612110683708, "grad_norm": 1859.66552734375, "learning_rate": 9.993663506270928e-06, "loss": 162.024, "step": 14160 }, { "epoch": 0.11721884435620632, "grad_norm": 1133.1839599609375, "learning_rate": 9.993592325481373e-06, "loss": 166.6096, "step": 14170 }, { "epoch": 0.11730156760557554, "grad_norm": 1811.849365234375, "learning_rate": 9.993520747374594e-06, "loss": 127.2197, "step": 14180 }, { "epoch": 0.11738429085494478, "grad_norm": 909.2362060546875, "learning_rate": 9.993448771956285e-06, "loss": 189.4919, "step": 14190 }, { "epoch": 0.11746701410431402, "grad_norm": 1350.44140625, "learning_rate": 9.993376399232175e-06, "loss": 142.4382, "step": 14200 }, { "epoch": 0.11754973735368325, "grad_norm": 1765.2679443359375, "learning_rate": 9.993303629208023e-06, "loss": 148.8411, "step": 14210 }, { "epoch": 0.11763246060305249, "grad_norm": 2343.818359375, "learning_rate": 9.993230461889616e-06, "loss": 212.7168, "step": 14220 }, { "epoch": 0.11771518385242172, "grad_norm": 683.275146484375, "learning_rate": 9.993156897282776e-06, "loss": 148.4446, "step": 14230 }, { "epoch": 0.11779790710179096, "grad_norm": 998.6349487304688, "learning_rate": 9.99308293539336e-06, "loss": 117.4103, "step": 14240 }, { "epoch": 0.1178806303511602, "grad_norm": 823.5191040039062, "learning_rate": 9.993008576227248e-06, "loss": 130.8048, "step": 14250 }, { "epoch": 0.11796335360052942, "grad_norm": 1420.8035888671875, "learning_rate": 9.992933819790358e-06, "loss": 163.5295, "step": 14260 }, { "epoch": 0.11804607684989867, "grad_norm": 1167.037109375, "learning_rate": 9.992858666088638e-06, "loss": 164.6194, "step": 14270 }, { "epoch": 0.1181288000992679, "grad_norm": 1515.985107421875, "learning_rate": 9.992783115128072e-06, "loss": 163.406, "step": 14280 }, { "epoch": 0.11821152334863713, "grad_norm": 1549.8917236328125, "learning_rate": 9.992707166914662e-06, "loss": 168.7726, "step": 14290 }, { "epoch": 0.11829424659800637, "grad_norm": 1190.7861328125, "learning_rate": 9.992630821454458e-06, "loss": 140.9276, "step": 14300 }, { "epoch": 0.1183769698473756, "grad_norm": 1568.7037353515625, "learning_rate": 9.992554078753534e-06, "loss": 147.5554, "step": 14310 }, { "epoch": 0.11845969309674484, "grad_norm": 1000.02880859375, "learning_rate": 9.992476938817994e-06, "loss": 180.1213, "step": 14320 }, { "epoch": 0.11854241634611408, "grad_norm": 1296.5947265625, "learning_rate": 9.992399401653976e-06, "loss": 137.781, "step": 14330 }, { "epoch": 0.1186251395954833, "grad_norm": 1144.0504150390625, "learning_rate": 9.99232146726765e-06, "loss": 140.0204, "step": 14340 }, { "epoch": 0.11870786284485255, "grad_norm": 1281.876708984375, "learning_rate": 9.992243135665217e-06, "loss": 154.8919, "step": 14350 }, { "epoch": 0.11879058609422179, "grad_norm": 871.8610229492188, "learning_rate": 9.992164406852908e-06, "loss": 186.1516, "step": 14360 }, { "epoch": 0.11887330934359101, "grad_norm": 1534.3536376953125, "learning_rate": 9.992085280836988e-06, "loss": 160.6092, "step": 14370 }, { "epoch": 0.11895603259296025, "grad_norm": 988.0948486328125, "learning_rate": 9.992005757623753e-06, "loss": 203.4977, "step": 14380 }, { "epoch": 0.11903875584232948, "grad_norm": 1357.1668701171875, "learning_rate": 9.991925837219532e-06, "loss": 160.5042, "step": 14390 }, { "epoch": 0.11912147909169872, "grad_norm": 636.4329223632812, "learning_rate": 9.991845519630679e-06, "loss": 137.9073, "step": 14400 }, { "epoch": 0.11920420234106796, "grad_norm": 1027.252197265625, "learning_rate": 9.991764804863588e-06, "loss": 122.9028, "step": 14410 }, { "epoch": 0.11928692559043719, "grad_norm": 2356.058837890625, "learning_rate": 9.991683692924682e-06, "loss": 155.1582, "step": 14420 }, { "epoch": 0.11936964883980643, "grad_norm": 743.142578125, "learning_rate": 9.991602183820412e-06, "loss": 142.9998, "step": 14430 }, { "epoch": 0.11945237208917567, "grad_norm": 1039.77978515625, "learning_rate": 9.991520277557266e-06, "loss": 156.9646, "step": 14440 }, { "epoch": 0.1195350953385449, "grad_norm": 1624.250732421875, "learning_rate": 9.991437974141759e-06, "loss": 165.5059, "step": 14450 }, { "epoch": 0.11961781858791413, "grad_norm": 589.4429321289062, "learning_rate": 9.99135527358044e-06, "loss": 119.63, "step": 14460 }, { "epoch": 0.11970054183728338, "grad_norm": 1199.8302001953125, "learning_rate": 9.991272175879888e-06, "loss": 175.7935, "step": 14470 }, { "epoch": 0.1197832650866526, "grad_norm": 1233.6771240234375, "learning_rate": 9.991188681046718e-06, "loss": 192.3081, "step": 14480 }, { "epoch": 0.11986598833602184, "grad_norm": 1209.5980224609375, "learning_rate": 9.991104789087568e-06, "loss": 139.1357, "step": 14490 }, { "epoch": 0.11994871158539107, "grad_norm": 1937.0008544921875, "learning_rate": 9.991020500009118e-06, "loss": 138.8762, "step": 14500 }, { "epoch": 0.12003143483476031, "grad_norm": 545.6674194335938, "learning_rate": 9.990935813818073e-06, "loss": 139.6014, "step": 14510 }, { "epoch": 0.12011415808412955, "grad_norm": 650.5000610351562, "learning_rate": 9.99085073052117e-06, "loss": 195.5592, "step": 14520 }, { "epoch": 0.12019688133349878, "grad_norm": 566.500732421875, "learning_rate": 9.990765250125179e-06, "loss": 154.2413, "step": 14530 }, { "epoch": 0.12027960458286802, "grad_norm": 1750.378662109375, "learning_rate": 9.990679372636902e-06, "loss": 161.8778, "step": 14540 }, { "epoch": 0.12036232783223726, "grad_norm": 1349.2432861328125, "learning_rate": 9.99059309806317e-06, "loss": 172.9573, "step": 14550 }, { "epoch": 0.12044505108160648, "grad_norm": 975.1179809570312, "learning_rate": 9.990506426410851e-06, "loss": 128.1013, "step": 14560 }, { "epoch": 0.12052777433097572, "grad_norm": 3406.892333984375, "learning_rate": 9.990419357686839e-06, "loss": 177.5993, "step": 14570 }, { "epoch": 0.12061049758034495, "grad_norm": 428.1646423339844, "learning_rate": 9.99033189189806e-06, "loss": 128.9519, "step": 14580 }, { "epoch": 0.12069322082971419, "grad_norm": 996.7293701171875, "learning_rate": 9.990244029051475e-06, "loss": 149.3833, "step": 14590 }, { "epoch": 0.12077594407908343, "grad_norm": 1458.9307861328125, "learning_rate": 9.990155769154077e-06, "loss": 160.3518, "step": 14600 }, { "epoch": 0.12085866732845266, "grad_norm": 714.3155517578125, "learning_rate": 9.990067112212884e-06, "loss": 128.5733, "step": 14610 }, { "epoch": 0.1209413905778219, "grad_norm": 629.7801513671875, "learning_rate": 9.989978058234952e-06, "loss": 141.1231, "step": 14620 }, { "epoch": 0.12102411382719114, "grad_norm": 1056.1544189453125, "learning_rate": 9.989888607227369e-06, "loss": 173.8705, "step": 14630 }, { "epoch": 0.12110683707656036, "grad_norm": 1272.472412109375, "learning_rate": 9.989798759197247e-06, "loss": 146.9385, "step": 14640 }, { "epoch": 0.1211895603259296, "grad_norm": 1054.3629150390625, "learning_rate": 9.989708514151739e-06, "loss": 164.1719, "step": 14650 }, { "epoch": 0.12127228357529883, "grad_norm": 659.7613525390625, "learning_rate": 9.989617872098026e-06, "loss": 149.6539, "step": 14660 }, { "epoch": 0.12135500682466807, "grad_norm": 815.8479614257812, "learning_rate": 9.989526833043316e-06, "loss": 140.1702, "step": 14670 }, { "epoch": 0.12143773007403731, "grad_norm": 1148.9129638671875, "learning_rate": 9.989435396994856e-06, "loss": 125.2471, "step": 14680 }, { "epoch": 0.12152045332340654, "grad_norm": 3006.224609375, "learning_rate": 9.989343563959919e-06, "loss": 150.3076, "step": 14690 }, { "epoch": 0.12160317657277578, "grad_norm": 1365.89892578125, "learning_rate": 9.989251333945813e-06, "loss": 179.2145, "step": 14700 }, { "epoch": 0.12168589982214502, "grad_norm": 982.8682861328125, "learning_rate": 9.989158706959875e-06, "loss": 137.9394, "step": 14710 }, { "epoch": 0.12176862307151425, "grad_norm": 1103.151123046875, "learning_rate": 9.989065683009477e-06, "loss": 150.3043, "step": 14720 }, { "epoch": 0.12185134632088349, "grad_norm": 1068.439208984375, "learning_rate": 9.988972262102018e-06, "loss": 115.0475, "step": 14730 }, { "epoch": 0.12193406957025273, "grad_norm": 1475.9112548828125, "learning_rate": 9.988878444244937e-06, "loss": 162.3183, "step": 14740 }, { "epoch": 0.12201679281962195, "grad_norm": 1475.7916259765625, "learning_rate": 9.988784229445689e-06, "loss": 132.9056, "step": 14750 }, { "epoch": 0.12209951606899119, "grad_norm": 1580.335205078125, "learning_rate": 9.988689617711777e-06, "loss": 180.2133, "step": 14760 }, { "epoch": 0.12218223931836042, "grad_norm": 1742.8638916015625, "learning_rate": 9.988594609050726e-06, "loss": 170.6644, "step": 14770 }, { "epoch": 0.12226496256772966, "grad_norm": 778.4093017578125, "learning_rate": 9.988499203470097e-06, "loss": 163.6835, "step": 14780 }, { "epoch": 0.1223476858170989, "grad_norm": 908.4758911132812, "learning_rate": 9.988403400977482e-06, "loss": 143.079, "step": 14790 }, { "epoch": 0.12243040906646813, "grad_norm": 1540.624755859375, "learning_rate": 9.9883072015805e-06, "loss": 160.3763, "step": 14800 }, { "epoch": 0.12251313231583737, "grad_norm": 919.294677734375, "learning_rate": 9.98821060528681e-06, "loss": 165.4283, "step": 14810 }, { "epoch": 0.12259585556520661, "grad_norm": 865.2339477539062, "learning_rate": 9.988113612104093e-06, "loss": 128.951, "step": 14820 }, { "epoch": 0.12267857881457583, "grad_norm": 2098.492919921875, "learning_rate": 9.988016222040067e-06, "loss": 151.1649, "step": 14830 }, { "epoch": 0.12276130206394507, "grad_norm": 847.621337890625, "learning_rate": 9.987918435102484e-06, "loss": 121.6645, "step": 14840 }, { "epoch": 0.1228440253133143, "grad_norm": 1472.7208251953125, "learning_rate": 9.987820251299121e-06, "loss": 140.8588, "step": 14850 }, { "epoch": 0.12292674856268354, "grad_norm": 1310.5726318359375, "learning_rate": 9.987721670637794e-06, "loss": 132.6207, "step": 14860 }, { "epoch": 0.12300947181205278, "grad_norm": 788.9578247070312, "learning_rate": 9.987622693126342e-06, "loss": 139.2334, "step": 14870 }, { "epoch": 0.12309219506142201, "grad_norm": 1761.3287353515625, "learning_rate": 9.987523318772644e-06, "loss": 156.3363, "step": 14880 }, { "epoch": 0.12317491831079125, "grad_norm": 1024.44140625, "learning_rate": 9.987423547584605e-06, "loss": 167.0266, "step": 14890 }, { "epoch": 0.12325764156016049, "grad_norm": 891.505126953125, "learning_rate": 9.987323379570161e-06, "loss": 144.4436, "step": 14900 }, { "epoch": 0.12334036480952972, "grad_norm": 1373.43359375, "learning_rate": 9.987222814737287e-06, "loss": 139.1032, "step": 14910 }, { "epoch": 0.12342308805889896, "grad_norm": 2005.6689453125, "learning_rate": 9.987121853093982e-06, "loss": 179.2018, "step": 14920 }, { "epoch": 0.12350581130826818, "grad_norm": 1276.0216064453125, "learning_rate": 9.987020494648279e-06, "loss": 157.102, "step": 14930 }, { "epoch": 0.12358853455763742, "grad_norm": 1294.2474365234375, "learning_rate": 9.986918739408241e-06, "loss": 176.7196, "step": 14940 }, { "epoch": 0.12367125780700666, "grad_norm": 1715.0291748046875, "learning_rate": 9.986816587381966e-06, "loss": 150.2139, "step": 14950 }, { "epoch": 0.12375398105637589, "grad_norm": 1020.9821166992188, "learning_rate": 9.986714038577582e-06, "loss": 145.5669, "step": 14960 }, { "epoch": 0.12383670430574513, "grad_norm": 969.8027954101562, "learning_rate": 9.986611093003249e-06, "loss": 129.0563, "step": 14970 }, { "epoch": 0.12391942755511437, "grad_norm": 1188.594482421875, "learning_rate": 9.986507750667157e-06, "loss": 130.371, "step": 14980 }, { "epoch": 0.1240021508044836, "grad_norm": 2070.416015625, "learning_rate": 9.986404011577525e-06, "loss": 164.5774, "step": 14990 }, { "epoch": 0.12408487405385284, "grad_norm": 1122.52587890625, "learning_rate": 9.986299875742612e-06, "loss": 187.0694, "step": 15000 }, { "epoch": 0.12416759730322208, "grad_norm": 1119.6961669921875, "learning_rate": 9.986195343170703e-06, "loss": 180.5289, "step": 15010 }, { "epoch": 0.1242503205525913, "grad_norm": 1574.2568359375, "learning_rate": 9.986090413870114e-06, "loss": 144.7522, "step": 15020 }, { "epoch": 0.12433304380196054, "grad_norm": 1006.7045288085938, "learning_rate": 9.985985087849193e-06, "loss": 143.7221, "step": 15030 }, { "epoch": 0.12441576705132977, "grad_norm": 1300.2181396484375, "learning_rate": 9.98587936511632e-06, "loss": 150.1932, "step": 15040 }, { "epoch": 0.12449849030069901, "grad_norm": 821.7041625976562, "learning_rate": 9.98577324567991e-06, "loss": 139.0086, "step": 15050 }, { "epoch": 0.12458121355006825, "grad_norm": 830.7269287109375, "learning_rate": 9.985666729548404e-06, "loss": 146.4651, "step": 15060 }, { "epoch": 0.12466393679943748, "grad_norm": 1310.355224609375, "learning_rate": 9.985559816730277e-06, "loss": 141.5489, "step": 15070 }, { "epoch": 0.12474666004880672, "grad_norm": 1190.0335693359375, "learning_rate": 9.985452507234037e-06, "loss": 144.9001, "step": 15080 }, { "epoch": 0.12482938329817596, "grad_norm": 2714.714599609375, "learning_rate": 9.98534480106822e-06, "loss": 154.9118, "step": 15090 }, { "epoch": 0.12491210654754518, "grad_norm": 792.223388671875, "learning_rate": 9.985236698241396e-06, "loss": 149.7406, "step": 15100 }, { "epoch": 0.12499482979691443, "grad_norm": 1287.8345947265625, "learning_rate": 9.985128198762168e-06, "loss": 171.4261, "step": 15110 }, { "epoch": 0.12507755304628365, "grad_norm": 957.4619140625, "learning_rate": 9.98501930263917e-06, "loss": 234.8733, "step": 15120 }, { "epoch": 0.1251602762956529, "grad_norm": 862.0460205078125, "learning_rate": 9.984910009881062e-06, "loss": 112.6332, "step": 15130 }, { "epoch": 0.12524299954502213, "grad_norm": 1100.07080078125, "learning_rate": 9.984800320496542e-06, "loss": 139.7673, "step": 15140 }, { "epoch": 0.12532572279439136, "grad_norm": 1111.9737548828125, "learning_rate": 9.984690234494338e-06, "loss": 106.7051, "step": 15150 }, { "epoch": 0.12540844604376059, "grad_norm": 744.8794555664062, "learning_rate": 9.98457975188321e-06, "loss": 142.1312, "step": 15160 }, { "epoch": 0.12549116929312984, "grad_norm": 923.189697265625, "learning_rate": 9.984468872671945e-06, "loss": 139.3656, "step": 15170 }, { "epoch": 0.12557389254249907, "grad_norm": 1322.687255859375, "learning_rate": 9.984357596869369e-06, "loss": 148.6495, "step": 15180 }, { "epoch": 0.1256566157918683, "grad_norm": 882.3487548828125, "learning_rate": 9.984245924484334e-06, "loss": 141.7766, "step": 15190 }, { "epoch": 0.12573933904123755, "grad_norm": 725.9840698242188, "learning_rate": 9.984133855525723e-06, "loss": 138.5364, "step": 15200 }, { "epoch": 0.12582206229060677, "grad_norm": 1547.522705078125, "learning_rate": 9.984021390002458e-06, "loss": 136.4458, "step": 15210 }, { "epoch": 0.125904785539976, "grad_norm": 1425.56494140625, "learning_rate": 9.983908527923486e-06, "loss": 222.0387, "step": 15220 }, { "epoch": 0.12598750878934525, "grad_norm": 911.7035522460938, "learning_rate": 9.983795269297782e-06, "loss": 169.7902, "step": 15230 }, { "epoch": 0.12607023203871448, "grad_norm": 1066.0264892578125, "learning_rate": 9.983681614134363e-06, "loss": 122.5573, "step": 15240 }, { "epoch": 0.1261529552880837, "grad_norm": 1829.2509765625, "learning_rate": 9.98356756244227e-06, "loss": 154.7958, "step": 15250 }, { "epoch": 0.12623567853745296, "grad_norm": 1402.93408203125, "learning_rate": 9.983453114230575e-06, "loss": 145.442, "step": 15260 }, { "epoch": 0.1263184017868222, "grad_norm": 990.7800903320312, "learning_rate": 9.98333826950839e-06, "loss": 138.916, "step": 15270 }, { "epoch": 0.12640112503619141, "grad_norm": 861.1292724609375, "learning_rate": 9.983223028284847e-06, "loss": 152.3527, "step": 15280 }, { "epoch": 0.12648384828556067, "grad_norm": 887.3511962890625, "learning_rate": 9.983107390569118e-06, "loss": 129.7973, "step": 15290 }, { "epoch": 0.1265665715349299, "grad_norm": 1043.2373046875, "learning_rate": 9.982991356370404e-06, "loss": 116.1451, "step": 15300 }, { "epoch": 0.12664929478429912, "grad_norm": 1244.5079345703125, "learning_rate": 9.982874925697937e-06, "loss": 221.0664, "step": 15310 }, { "epoch": 0.12673201803366838, "grad_norm": 1715.1995849609375, "learning_rate": 9.982758098560978e-06, "loss": 186.7455, "step": 15320 }, { "epoch": 0.1268147412830376, "grad_norm": 679.9988403320312, "learning_rate": 9.982640874968827e-06, "loss": 171.8672, "step": 15330 }, { "epoch": 0.12689746453240683, "grad_norm": 595.40625, "learning_rate": 9.98252325493081e-06, "loss": 130.3511, "step": 15340 }, { "epoch": 0.12698018778177605, "grad_norm": 915.3275146484375, "learning_rate": 9.982405238456281e-06, "loss": 153.7831, "step": 15350 }, { "epoch": 0.1270629110311453, "grad_norm": 1383.0423583984375, "learning_rate": 9.982286825554636e-06, "loss": 155.1486, "step": 15360 }, { "epoch": 0.12714563428051454, "grad_norm": 1527.0670166015625, "learning_rate": 9.982168016235292e-06, "loss": 235.3831, "step": 15370 }, { "epoch": 0.12722835752988376, "grad_norm": 1168.0416259765625, "learning_rate": 9.982048810507706e-06, "loss": 175.3166, "step": 15380 }, { "epoch": 0.12731108077925302, "grad_norm": 1577.9107666015625, "learning_rate": 9.98192920838136e-06, "loss": 136.4098, "step": 15390 }, { "epoch": 0.12739380402862224, "grad_norm": 2239.125244140625, "learning_rate": 9.98180920986577e-06, "loss": 162.4811, "step": 15400 }, { "epoch": 0.12747652727799147, "grad_norm": 1140.6561279296875, "learning_rate": 9.981688814970485e-06, "loss": 159.3877, "step": 15410 }, { "epoch": 0.12755925052736072, "grad_norm": 929.8948974609375, "learning_rate": 9.981568023705085e-06, "loss": 113.0717, "step": 15420 }, { "epoch": 0.12764197377672995, "grad_norm": 1146.56396484375, "learning_rate": 9.981446836079178e-06, "loss": 121.9914, "step": 15430 }, { "epoch": 0.12772469702609918, "grad_norm": 877.8550415039062, "learning_rate": 9.981325252102408e-06, "loss": 173.6141, "step": 15440 }, { "epoch": 0.12780742027546843, "grad_norm": 969.0079956054688, "learning_rate": 9.98120327178445e-06, "loss": 178.5706, "step": 15450 }, { "epoch": 0.12789014352483766, "grad_norm": 1263.2391357421875, "learning_rate": 9.981080895135007e-06, "loss": 180.7431, "step": 15460 }, { "epoch": 0.12797286677420688, "grad_norm": 742.5184326171875, "learning_rate": 9.980958122163818e-06, "loss": 111.0224, "step": 15470 }, { "epoch": 0.12805559002357614, "grad_norm": 1423.19873046875, "learning_rate": 9.980834952880652e-06, "loss": 128.3473, "step": 15480 }, { "epoch": 0.12813831327294536, "grad_norm": 1583.2940673828125, "learning_rate": 9.980711387295306e-06, "loss": 149.955, "step": 15490 }, { "epoch": 0.1282210365223146, "grad_norm": 1052.7265625, "learning_rate": 9.980587425417612e-06, "loss": 159.9205, "step": 15500 }, { "epoch": 0.12830375977168385, "grad_norm": 2138.17724609375, "learning_rate": 9.980463067257437e-06, "loss": 169.7366, "step": 15510 }, { "epoch": 0.12838648302105307, "grad_norm": 1006.1878662109375, "learning_rate": 9.980338312824672e-06, "loss": 193.1612, "step": 15520 }, { "epoch": 0.1284692062704223, "grad_norm": 1047.7593994140625, "learning_rate": 9.980213162129244e-06, "loss": 175.5892, "step": 15530 }, { "epoch": 0.12855192951979152, "grad_norm": 1267.4644775390625, "learning_rate": 9.980087615181111e-06, "loss": 149.4357, "step": 15540 }, { "epoch": 0.12863465276916078, "grad_norm": 1171.0859375, "learning_rate": 9.979961671990263e-06, "loss": 165.6414, "step": 15550 }, { "epoch": 0.12871737601853, "grad_norm": 911.0418701171875, "learning_rate": 9.979835332566719e-06, "loss": 155.2462, "step": 15560 }, { "epoch": 0.12880009926789923, "grad_norm": 1016.1674194335938, "learning_rate": 9.97970859692053e-06, "loss": 142.4974, "step": 15570 }, { "epoch": 0.12888282251726849, "grad_norm": 653.232421875, "learning_rate": 9.979581465061784e-06, "loss": 155.5012, "step": 15580 }, { "epoch": 0.1289655457666377, "grad_norm": 1058.5008544921875, "learning_rate": 9.979453937000594e-06, "loss": 101.9423, "step": 15590 }, { "epoch": 0.12904826901600694, "grad_norm": 820.3455200195312, "learning_rate": 9.979326012747106e-06, "loss": 117.5258, "step": 15600 }, { "epoch": 0.1291309922653762, "grad_norm": 1352.7105712890625, "learning_rate": 9.9791976923115e-06, "loss": 99.8209, "step": 15610 }, { "epoch": 0.12921371551474542, "grad_norm": 1026.9713134765625, "learning_rate": 9.979068975703984e-06, "loss": 166.7305, "step": 15620 }, { "epoch": 0.12929643876411465, "grad_norm": 1245.904296875, "learning_rate": 9.978939862934802e-06, "loss": 126.3938, "step": 15630 }, { "epoch": 0.1293791620134839, "grad_norm": 1658.7640380859375, "learning_rate": 9.978810354014223e-06, "loss": 135.5493, "step": 15640 }, { "epoch": 0.12946188526285313, "grad_norm": 1517.82373046875, "learning_rate": 9.978680448952556e-06, "loss": 139.2036, "step": 15650 }, { "epoch": 0.12954460851222235, "grad_norm": 1112.2469482421875, "learning_rate": 9.978550147760133e-06, "loss": 127.4167, "step": 15660 }, { "epoch": 0.1296273317615916, "grad_norm": 995.6966552734375, "learning_rate": 9.978419450447325e-06, "loss": 128.1456, "step": 15670 }, { "epoch": 0.12971005501096083, "grad_norm": 785.5421142578125, "learning_rate": 9.978288357024527e-06, "loss": 142.7447, "step": 15680 }, { "epoch": 0.12979277826033006, "grad_norm": 3418.008544921875, "learning_rate": 9.978156867502173e-06, "loss": 161.2918, "step": 15690 }, { "epoch": 0.12987550150969931, "grad_norm": 1545.32568359375, "learning_rate": 9.978024981890724e-06, "loss": 107.0028, "step": 15700 }, { "epoch": 0.12995822475906854, "grad_norm": 963.7340087890625, "learning_rate": 9.977892700200673e-06, "loss": 140.569, "step": 15710 }, { "epoch": 0.13004094800843777, "grad_norm": 692.7611694335938, "learning_rate": 9.977760022442545e-06, "loss": 110.664, "step": 15720 }, { "epoch": 0.130123671257807, "grad_norm": 1015.7322998046875, "learning_rate": 9.977626948626897e-06, "loss": 158.9243, "step": 15730 }, { "epoch": 0.13020639450717625, "grad_norm": 1334.6917724609375, "learning_rate": 9.977493478764316e-06, "loss": 152.3215, "step": 15740 }, { "epoch": 0.13028911775654548, "grad_norm": 963.1575927734375, "learning_rate": 9.977359612865424e-06, "loss": 137.1868, "step": 15750 }, { "epoch": 0.1303718410059147, "grad_norm": 1332.0909423828125, "learning_rate": 9.97722535094087e-06, "loss": 130.2153, "step": 15760 }, { "epoch": 0.13045456425528396, "grad_norm": 864.9472045898438, "learning_rate": 9.977090693001336e-06, "loss": 142.6017, "step": 15770 }, { "epoch": 0.13053728750465318, "grad_norm": 1091.3128662109375, "learning_rate": 9.976955639057539e-06, "loss": 126.0693, "step": 15780 }, { "epoch": 0.1306200107540224, "grad_norm": 1137.7115478515625, "learning_rate": 9.976820189120223e-06, "loss": 147.4185, "step": 15790 }, { "epoch": 0.13070273400339166, "grad_norm": 1658.2982177734375, "learning_rate": 9.976684343200164e-06, "loss": 135.441, "step": 15800 }, { "epoch": 0.1307854572527609, "grad_norm": 1823.3642578125, "learning_rate": 9.976548101308173e-06, "loss": 138.8229, "step": 15810 }, { "epoch": 0.13086818050213012, "grad_norm": 1511.6375732421875, "learning_rate": 9.976411463455088e-06, "loss": 140.3549, "step": 15820 }, { "epoch": 0.13095090375149937, "grad_norm": 970.9559326171875, "learning_rate": 9.976274429651783e-06, "loss": 188.4605, "step": 15830 }, { "epoch": 0.1310336270008686, "grad_norm": 1540.7110595703125, "learning_rate": 9.976136999909156e-06, "loss": 106.2589, "step": 15840 }, { "epoch": 0.13111635025023782, "grad_norm": 829.7328491210938, "learning_rate": 9.97599917423815e-06, "loss": 166.2885, "step": 15850 }, { "epoch": 0.13119907349960708, "grad_norm": 0.0, "learning_rate": 9.975860952649724e-06, "loss": 180.9173, "step": 15860 }, { "epoch": 0.1312817967489763, "grad_norm": 1011.6902465820312, "learning_rate": 9.975722335154876e-06, "loss": 161.2201, "step": 15870 }, { "epoch": 0.13136451999834553, "grad_norm": 1166.7960205078125, "learning_rate": 9.975583321764638e-06, "loss": 144.3113, "step": 15880 }, { "epoch": 0.13144724324771476, "grad_norm": 1041.4771728515625, "learning_rate": 9.975443912490073e-06, "loss": 149.5042, "step": 15890 }, { "epoch": 0.131529966497084, "grad_norm": 2316.5087890625, "learning_rate": 9.975304107342268e-06, "loss": 179.2303, "step": 15900 }, { "epoch": 0.13161268974645324, "grad_norm": 692.1578369140625, "learning_rate": 9.97516390633235e-06, "loss": 133.4318, "step": 15910 }, { "epoch": 0.13169541299582246, "grad_norm": 1125.5006103515625, "learning_rate": 9.975023309471473e-06, "loss": 156.1001, "step": 15920 }, { "epoch": 0.13177813624519172, "grad_norm": 1064.159423828125, "learning_rate": 9.974882316770823e-06, "loss": 147.876, "step": 15930 }, { "epoch": 0.13186085949456094, "grad_norm": 1456.9761962890625, "learning_rate": 9.974740928241617e-06, "loss": 146.098, "step": 15940 }, { "epoch": 0.13194358274393017, "grad_norm": 1191.022705078125, "learning_rate": 9.974599143895107e-06, "loss": 139.6693, "step": 15950 }, { "epoch": 0.13202630599329943, "grad_norm": 2010.4088134765625, "learning_rate": 9.974456963742573e-06, "loss": 152.4677, "step": 15960 }, { "epoch": 0.13210902924266865, "grad_norm": 1077.85205078125, "learning_rate": 9.97431438779533e-06, "loss": 199.6097, "step": 15970 }, { "epoch": 0.13219175249203788, "grad_norm": 975.5093994140625, "learning_rate": 9.974171416064719e-06, "loss": 110.029, "step": 15980 }, { "epoch": 0.13227447574140713, "grad_norm": 1180.7437744140625, "learning_rate": 9.974028048562118e-06, "loss": 136.7102, "step": 15990 }, { "epoch": 0.13235719899077636, "grad_norm": 1241.7110595703125, "learning_rate": 9.973884285298932e-06, "loss": 154.7749, "step": 16000 }, { "epoch": 0.13243992224014559, "grad_norm": 1181.715576171875, "learning_rate": 9.9737401262866e-06, "loss": 163.9049, "step": 16010 }, { "epoch": 0.13252264548951484, "grad_norm": 1441.7060546875, "learning_rate": 9.973595571536593e-06, "loss": 131.6654, "step": 16020 }, { "epoch": 0.13260536873888407, "grad_norm": 1810.145751953125, "learning_rate": 9.973450621060412e-06, "loss": 155.4361, "step": 16030 }, { "epoch": 0.1326880919882533, "grad_norm": 1024.084716796875, "learning_rate": 9.97330527486959e-06, "loss": 130.6234, "step": 16040 }, { "epoch": 0.13277081523762255, "grad_norm": 1294.561279296875, "learning_rate": 9.973159532975691e-06, "loss": 122.6079, "step": 16050 }, { "epoch": 0.13285353848699177, "grad_norm": 1282.573486328125, "learning_rate": 9.973013395390314e-06, "loss": 173.6021, "step": 16060 }, { "epoch": 0.132936261736361, "grad_norm": 1436.6795654296875, "learning_rate": 9.972866862125083e-06, "loss": 201.6667, "step": 16070 }, { "epoch": 0.13301898498573023, "grad_norm": 880.5997924804688, "learning_rate": 9.972719933191657e-06, "loss": 121.1312, "step": 16080 }, { "epoch": 0.13310170823509948, "grad_norm": 720.5911254882812, "learning_rate": 9.97257260860173e-06, "loss": 117.1484, "step": 16090 }, { "epoch": 0.1331844314844687, "grad_norm": 1505.3927001953125, "learning_rate": 9.972424888367019e-06, "loss": 146.7309, "step": 16100 }, { "epoch": 0.13326715473383793, "grad_norm": 958.6402587890625, "learning_rate": 9.972276772499281e-06, "loss": 156.9766, "step": 16110 }, { "epoch": 0.1333498779832072, "grad_norm": 877.50244140625, "learning_rate": 9.9721282610103e-06, "loss": 191.0899, "step": 16120 }, { "epoch": 0.13343260123257641, "grad_norm": 1021.2138671875, "learning_rate": 9.971979353911891e-06, "loss": 133.9165, "step": 16130 }, { "epoch": 0.13351532448194564, "grad_norm": 847.0870971679688, "learning_rate": 9.971830051215905e-06, "loss": 101.3374, "step": 16140 }, { "epoch": 0.1335980477313149, "grad_norm": 2785.597412109375, "learning_rate": 9.97168035293422e-06, "loss": 267.7292, "step": 16150 }, { "epoch": 0.13368077098068412, "grad_norm": 801.3421020507812, "learning_rate": 9.971530259078743e-06, "loss": 111.4734, "step": 16160 }, { "epoch": 0.13376349423005335, "grad_norm": 768.2542114257812, "learning_rate": 9.971379769661422e-06, "loss": 149.4196, "step": 16170 }, { "epoch": 0.1338462174794226, "grad_norm": 893.0291748046875, "learning_rate": 9.971228884694228e-06, "loss": 122.37, "step": 16180 }, { "epoch": 0.13392894072879183, "grad_norm": 1295.072509765625, "learning_rate": 9.971077604189166e-06, "loss": 156.3286, "step": 16190 }, { "epoch": 0.13401166397816106, "grad_norm": 998.2085571289062, "learning_rate": 9.970925928158275e-06, "loss": 122.403, "step": 16200 }, { "epoch": 0.1340943872275303, "grad_norm": 701.3370361328125, "learning_rate": 9.970773856613617e-06, "loss": 140.6802, "step": 16210 }, { "epoch": 0.13417711047689954, "grad_norm": 971.6983032226562, "learning_rate": 9.970621389567301e-06, "loss": 178.1052, "step": 16220 }, { "epoch": 0.13425983372626876, "grad_norm": 2665.119384765625, "learning_rate": 9.97046852703145e-06, "loss": 138.6044, "step": 16230 }, { "epoch": 0.13434255697563802, "grad_norm": 2127.31884765625, "learning_rate": 9.970315269018231e-06, "loss": 157.2493, "step": 16240 }, { "epoch": 0.13442528022500724, "grad_norm": 1778.2391357421875, "learning_rate": 9.970161615539837e-06, "loss": 134.0471, "step": 16250 }, { "epoch": 0.13450800347437647, "grad_norm": 993.4716796875, "learning_rate": 9.970007566608492e-06, "loss": 146.2506, "step": 16260 }, { "epoch": 0.1345907267237457, "grad_norm": 798.2664184570312, "learning_rate": 9.969853122236455e-06, "loss": 114.1296, "step": 16270 }, { "epoch": 0.13467344997311495, "grad_norm": 703.0869750976562, "learning_rate": 9.969698282436013e-06, "loss": 120.5299, "step": 16280 }, { "epoch": 0.13475617322248418, "grad_norm": 1201.6317138671875, "learning_rate": 9.969543047219487e-06, "loss": 125.8007, "step": 16290 }, { "epoch": 0.1348388964718534, "grad_norm": 1785.0177001953125, "learning_rate": 9.969387416599227e-06, "loss": 144.5029, "step": 16300 }, { "epoch": 0.13492161972122266, "grad_norm": 1228.9619140625, "learning_rate": 9.969231390587618e-06, "loss": 164.9693, "step": 16310 }, { "epoch": 0.13500434297059188, "grad_norm": 864.3604736328125, "learning_rate": 9.969074969197072e-06, "loss": 168.7043, "step": 16320 }, { "epoch": 0.1350870662199611, "grad_norm": 1214.023681640625, "learning_rate": 9.968918152440036e-06, "loss": 172.751, "step": 16330 }, { "epoch": 0.13516978946933036, "grad_norm": 928.501220703125, "learning_rate": 9.968760940328987e-06, "loss": 131.5311, "step": 16340 }, { "epoch": 0.1352525127186996, "grad_norm": 510.1147155761719, "learning_rate": 9.968603332876435e-06, "loss": 171.1721, "step": 16350 }, { "epoch": 0.13533523596806882, "grad_norm": 1110.3807373046875, "learning_rate": 9.968445330094915e-06, "loss": 169.255, "step": 16360 }, { "epoch": 0.13541795921743807, "grad_norm": 1672.8614501953125, "learning_rate": 9.968286931997004e-06, "loss": 112.5926, "step": 16370 }, { "epoch": 0.1355006824668073, "grad_norm": 1014.0128784179688, "learning_rate": 9.968128138595304e-06, "loss": 100.9882, "step": 16380 }, { "epoch": 0.13558340571617652, "grad_norm": 1446.147216796875, "learning_rate": 9.967968949902448e-06, "loss": 185.0402, "step": 16390 }, { "epoch": 0.13566612896554578, "grad_norm": 753.0343627929688, "learning_rate": 9.967809365931102e-06, "loss": 148.759, "step": 16400 }, { "epoch": 0.135748852214915, "grad_norm": 909.8871459960938, "learning_rate": 9.967649386693964e-06, "loss": 123.6662, "step": 16410 }, { "epoch": 0.13583157546428423, "grad_norm": 1223.7244873046875, "learning_rate": 9.967489012203765e-06, "loss": 132.6178, "step": 16420 }, { "epoch": 0.13591429871365346, "grad_norm": 1106.0858154296875, "learning_rate": 9.967328242473261e-06, "loss": 146.9553, "step": 16430 }, { "epoch": 0.1359970219630227, "grad_norm": 1789.8829345703125, "learning_rate": 9.967167077515246e-06, "loss": 133.0784, "step": 16440 }, { "epoch": 0.13607974521239194, "grad_norm": 741.1414184570312, "learning_rate": 9.967005517342544e-06, "loss": 143.1583, "step": 16450 }, { "epoch": 0.13616246846176117, "grad_norm": 1324.021240234375, "learning_rate": 9.966843561968005e-06, "loss": 108.1861, "step": 16460 }, { "epoch": 0.13624519171113042, "grad_norm": 866.0011596679688, "learning_rate": 9.966681211404521e-06, "loss": 138.6324, "step": 16470 }, { "epoch": 0.13632791496049965, "grad_norm": 520.3377685546875, "learning_rate": 9.966518465665007e-06, "loss": 113.3134, "step": 16480 }, { "epoch": 0.13641063820986887, "grad_norm": 883.1153564453125, "learning_rate": 9.966355324762412e-06, "loss": 163.313, "step": 16490 }, { "epoch": 0.13649336145923813, "grad_norm": 1007.1843872070312, "learning_rate": 9.966191788709716e-06, "loss": 140.2184, "step": 16500 }, { "epoch": 0.13657608470860735, "grad_norm": 1669.2816162109375, "learning_rate": 9.966027857519931e-06, "loss": 188.2176, "step": 16510 }, { "epoch": 0.13665880795797658, "grad_norm": 772.6116943359375, "learning_rate": 9.9658635312061e-06, "loss": 163.7544, "step": 16520 }, { "epoch": 0.13674153120734583, "grad_norm": 706.4850463867188, "learning_rate": 9.965698809781298e-06, "loss": 121.3989, "step": 16530 }, { "epoch": 0.13682425445671506, "grad_norm": 766.0828247070312, "learning_rate": 9.965533693258632e-06, "loss": 213.4713, "step": 16540 }, { "epoch": 0.1369069777060843, "grad_norm": 957.917724609375, "learning_rate": 9.965368181651239e-06, "loss": 183.1273, "step": 16550 }, { "epoch": 0.13698970095545354, "grad_norm": 696.8062744140625, "learning_rate": 9.965202274972288e-06, "loss": 112.6891, "step": 16560 }, { "epoch": 0.13707242420482277, "grad_norm": 902.3621215820312, "learning_rate": 9.965035973234977e-06, "loss": 113.6838, "step": 16570 }, { "epoch": 0.137155147454192, "grad_norm": 1020.390625, "learning_rate": 9.964869276452542e-06, "loss": 106.0109, "step": 16580 }, { "epoch": 0.13723787070356125, "grad_norm": 1181.8326416015625, "learning_rate": 9.964702184638244e-06, "loss": 139.7021, "step": 16590 }, { "epoch": 0.13732059395293048, "grad_norm": 629.9285278320312, "learning_rate": 9.964534697805377e-06, "loss": 193.1732, "step": 16600 }, { "epoch": 0.1374033172022997, "grad_norm": 1531.7962646484375, "learning_rate": 9.96436681596727e-06, "loss": 154.7776, "step": 16610 }, { "epoch": 0.13748604045166893, "grad_norm": 1220.1796875, "learning_rate": 9.964198539137277e-06, "loss": 191.2195, "step": 16620 }, { "epoch": 0.13756876370103818, "grad_norm": 0.0, "learning_rate": 9.964029867328791e-06, "loss": 112.8693, "step": 16630 }, { "epoch": 0.1376514869504074, "grad_norm": 1105.817626953125, "learning_rate": 9.963860800555228e-06, "loss": 103.0777, "step": 16640 }, { "epoch": 0.13773421019977664, "grad_norm": 472.4584655761719, "learning_rate": 9.963691338830045e-06, "loss": 123.1952, "step": 16650 }, { "epoch": 0.1378169334491459, "grad_norm": 990.940673828125, "learning_rate": 9.963521482166718e-06, "loss": 136.4567, "step": 16660 }, { "epoch": 0.13789965669851512, "grad_norm": 1503.9461669921875, "learning_rate": 9.96335123057877e-06, "loss": 136.2858, "step": 16670 }, { "epoch": 0.13798237994788434, "grad_norm": 1348.58740234375, "learning_rate": 9.963180584079741e-06, "loss": 137.5341, "step": 16680 }, { "epoch": 0.1380651031972536, "grad_norm": 1100.0037841796875, "learning_rate": 9.963009542683214e-06, "loss": 199.9709, "step": 16690 }, { "epoch": 0.13814782644662282, "grad_norm": 718.8609619140625, "learning_rate": 9.962838106402791e-06, "loss": 184.6782, "step": 16700 }, { "epoch": 0.13823054969599205, "grad_norm": 865.5576782226562, "learning_rate": 9.962666275252117e-06, "loss": 104.1854, "step": 16710 }, { "epoch": 0.1383132729453613, "grad_norm": 1161.63525390625, "learning_rate": 9.962494049244866e-06, "loss": 169.3983, "step": 16720 }, { "epoch": 0.13839599619473053, "grad_norm": 589.0774536132812, "learning_rate": 9.962321428394735e-06, "loss": 165.776, "step": 16730 }, { "epoch": 0.13847871944409976, "grad_norm": 2693.160888671875, "learning_rate": 9.962148412715464e-06, "loss": 154.1448, "step": 16740 }, { "epoch": 0.138561442693469, "grad_norm": 1310.2269287109375, "learning_rate": 9.961975002220816e-06, "loss": 166.3599, "step": 16750 }, { "epoch": 0.13864416594283824, "grad_norm": 1167.153076171875, "learning_rate": 9.96180119692459e-06, "loss": 171.0495, "step": 16760 }, { "epoch": 0.13872688919220746, "grad_norm": 1377.29833984375, "learning_rate": 9.961626996840613e-06, "loss": 102.7167, "step": 16770 }, { "epoch": 0.13880961244157672, "grad_norm": 977.5831909179688, "learning_rate": 9.961452401982748e-06, "loss": 136.4004, "step": 16780 }, { "epoch": 0.13889233569094595, "grad_norm": 1010.1982421875, "learning_rate": 9.961277412364884e-06, "loss": 146.971, "step": 16790 }, { "epoch": 0.13897505894031517, "grad_norm": 814.7576293945312, "learning_rate": 9.961102028000948e-06, "loss": 213.2676, "step": 16800 }, { "epoch": 0.1390577821896844, "grad_norm": 881.7014770507812, "learning_rate": 9.96092624890489e-06, "loss": 91.0271, "step": 16810 }, { "epoch": 0.13914050543905365, "grad_norm": 4899.205078125, "learning_rate": 9.960750075090698e-06, "loss": 166.8467, "step": 16820 }, { "epoch": 0.13922322868842288, "grad_norm": 1270.030029296875, "learning_rate": 9.960573506572391e-06, "loss": 186.535, "step": 16830 }, { "epoch": 0.1393059519377921, "grad_norm": 1338.3089599609375, "learning_rate": 9.960396543364013e-06, "loss": 192.4324, "step": 16840 }, { "epoch": 0.13938867518716136, "grad_norm": 1512.3917236328125, "learning_rate": 9.96021918547965e-06, "loss": 124.9194, "step": 16850 }, { "epoch": 0.13947139843653059, "grad_norm": 1637.7535400390625, "learning_rate": 9.96004143293341e-06, "loss": 131.2566, "step": 16860 }, { "epoch": 0.1395541216858998, "grad_norm": 1564.211669921875, "learning_rate": 9.959863285739436e-06, "loss": 124.8255, "step": 16870 }, { "epoch": 0.13963684493526907, "grad_norm": 720.8834228515625, "learning_rate": 9.959684743911904e-06, "loss": 140.7759, "step": 16880 }, { "epoch": 0.1397195681846383, "grad_norm": 796.6300659179688, "learning_rate": 9.959505807465018e-06, "loss": 120.1176, "step": 16890 }, { "epoch": 0.13980229143400752, "grad_norm": 1232.4276123046875, "learning_rate": 9.959326476413016e-06, "loss": 130.2664, "step": 16900 }, { "epoch": 0.13988501468337677, "grad_norm": 457.3919677734375, "learning_rate": 9.959146750770167e-06, "loss": 124.8512, "step": 16910 }, { "epoch": 0.139967737932746, "grad_norm": 708.2092895507812, "learning_rate": 9.95896663055077e-06, "loss": 120.5444, "step": 16920 }, { "epoch": 0.14005046118211523, "grad_norm": 995.7003784179688, "learning_rate": 9.958786115769157e-06, "loss": 114.9213, "step": 16930 }, { "epoch": 0.14013318443148448, "grad_norm": 1515.4827880859375, "learning_rate": 9.958605206439692e-06, "loss": 146.7894, "step": 16940 }, { "epoch": 0.1402159076808537, "grad_norm": 814.6317138671875, "learning_rate": 9.958423902576764e-06, "loss": 99.1024, "step": 16950 }, { "epoch": 0.14029863093022293, "grad_norm": 760.5602416992188, "learning_rate": 9.958242204194804e-06, "loss": 160.827, "step": 16960 }, { "epoch": 0.1403813541795922, "grad_norm": 738.33349609375, "learning_rate": 9.958060111308267e-06, "loss": 136.0457, "step": 16970 }, { "epoch": 0.14046407742896141, "grad_norm": 1149.28857421875, "learning_rate": 9.957877623931642e-06, "loss": 151.1577, "step": 16980 }, { "epoch": 0.14054680067833064, "grad_norm": 1362.2108154296875, "learning_rate": 9.95769474207945e-06, "loss": 173.5694, "step": 16990 }, { "epoch": 0.14062952392769987, "grad_norm": 1314.1846923828125, "learning_rate": 9.957511465766236e-06, "loss": 169.4035, "step": 17000 }, { "epoch": 0.14071224717706912, "grad_norm": 1065.3922119140625, "learning_rate": 9.957327795006589e-06, "loss": 169.1779, "step": 17010 }, { "epoch": 0.14079497042643835, "grad_norm": 1382.638427734375, "learning_rate": 9.95714372981512e-06, "loss": 145.6161, "step": 17020 }, { "epoch": 0.14087769367580757, "grad_norm": 1485.4481201171875, "learning_rate": 9.956959270206474e-06, "loss": 131.7884, "step": 17030 }, { "epoch": 0.14096041692517683, "grad_norm": 901.7747192382812, "learning_rate": 9.956774416195329e-06, "loss": 129.2612, "step": 17040 }, { "epoch": 0.14104314017454606, "grad_norm": 1346.950439453125, "learning_rate": 9.956589167796392e-06, "loss": 108.1172, "step": 17050 }, { "epoch": 0.14112586342391528, "grad_norm": 857.8418579101562, "learning_rate": 9.956403525024402e-06, "loss": 132.697, "step": 17060 }, { "epoch": 0.14120858667328454, "grad_norm": 1938.1868896484375, "learning_rate": 9.956217487894131e-06, "loss": 165.6452, "step": 17070 }, { "epoch": 0.14129130992265376, "grad_norm": 749.3518676757812, "learning_rate": 9.95603105642038e-06, "loss": 212.4321, "step": 17080 }, { "epoch": 0.141374033172023, "grad_norm": 709.408447265625, "learning_rate": 9.955844230617985e-06, "loss": 156.41, "step": 17090 }, { "epoch": 0.14145675642139224, "grad_norm": 1008.6261596679688, "learning_rate": 9.955657010501807e-06, "loss": 118.0272, "step": 17100 }, { "epoch": 0.14153947967076147, "grad_norm": 828.895751953125, "learning_rate": 9.955469396086743e-06, "loss": 138.8411, "step": 17110 }, { "epoch": 0.1416222029201307, "grad_norm": 1362.32421875, "learning_rate": 9.955281387387724e-06, "loss": 145.7589, "step": 17120 }, { "epoch": 0.14170492616949995, "grad_norm": 1597.079345703125, "learning_rate": 9.955092984419705e-06, "loss": 170.475, "step": 17130 }, { "epoch": 0.14178764941886918, "grad_norm": 1059.4306640625, "learning_rate": 9.954904187197679e-06, "loss": 158.0434, "step": 17140 }, { "epoch": 0.1418703726682384, "grad_norm": 694.0506591796875, "learning_rate": 9.954714995736667e-06, "loss": 142.6755, "step": 17150 }, { "epoch": 0.14195309591760763, "grad_norm": 1392.7862548828125, "learning_rate": 9.95452541005172e-06, "loss": 192.9698, "step": 17160 }, { "epoch": 0.14203581916697688, "grad_norm": 1239.712646484375, "learning_rate": 9.954335430157926e-06, "loss": 126.2119, "step": 17170 }, { "epoch": 0.1421185424163461, "grad_norm": 949.230712890625, "learning_rate": 9.9541450560704e-06, "loss": 76.8772, "step": 17180 }, { "epoch": 0.14220126566571534, "grad_norm": 1190.4364013671875, "learning_rate": 9.953954287804286e-06, "loss": 156.7768, "step": 17190 }, { "epoch": 0.1422839889150846, "grad_norm": 1422.4742431640625, "learning_rate": 9.953763125374767e-06, "loss": 107.7513, "step": 17200 }, { "epoch": 0.14236671216445382, "grad_norm": 1076.408935546875, "learning_rate": 9.953571568797049e-06, "loss": 136.0641, "step": 17210 }, { "epoch": 0.14244943541382304, "grad_norm": 930.828125, "learning_rate": 9.953379618086377e-06, "loss": 143.9599, "step": 17220 }, { "epoch": 0.1425321586631923, "grad_norm": 1367.8873291015625, "learning_rate": 9.95318727325802e-06, "loss": 128.7768, "step": 17230 }, { "epoch": 0.14261488191256153, "grad_norm": 1150.171875, "learning_rate": 9.952994534327283e-06, "loss": 124.427, "step": 17240 }, { "epoch": 0.14269760516193075, "grad_norm": 821.237548828125, "learning_rate": 9.952801401309504e-06, "loss": 137.096, "step": 17250 }, { "epoch": 0.1427803284113, "grad_norm": 1357.8616943359375, "learning_rate": 9.952607874220048e-06, "loss": 201.047, "step": 17260 }, { "epoch": 0.14286305166066923, "grad_norm": 1452.91650390625, "learning_rate": 9.952413953074312e-06, "loss": 199.8793, "step": 17270 }, { "epoch": 0.14294577491003846, "grad_norm": 965.8828125, "learning_rate": 9.952219637887725e-06, "loss": 129.7407, "step": 17280 }, { "epoch": 0.1430284981594077, "grad_norm": 1721.4344482421875, "learning_rate": 9.952024928675752e-06, "loss": 177.8543, "step": 17290 }, { "epoch": 0.14311122140877694, "grad_norm": 3541.317626953125, "learning_rate": 9.951829825453881e-06, "loss": 167.7698, "step": 17300 }, { "epoch": 0.14319394465814617, "grad_norm": 2036.2423095703125, "learning_rate": 9.951634328237635e-06, "loss": 141.8449, "step": 17310 }, { "epoch": 0.14327666790751542, "grad_norm": 880.5416870117188, "learning_rate": 9.951438437042572e-06, "loss": 198.8033, "step": 17320 }, { "epoch": 0.14335939115688465, "grad_norm": 807.236572265625, "learning_rate": 9.951242151884275e-06, "loss": 112.0078, "step": 17330 }, { "epoch": 0.14344211440625387, "grad_norm": 1530.7301025390625, "learning_rate": 9.951045472778365e-06, "loss": 133.3953, "step": 17340 }, { "epoch": 0.1435248376556231, "grad_norm": 1775.3485107421875, "learning_rate": 9.950848399740488e-06, "loss": 132.5112, "step": 17350 }, { "epoch": 0.14360756090499235, "grad_norm": 1216.1314697265625, "learning_rate": 9.950650932786325e-06, "loss": 150.7454, "step": 17360 }, { "epoch": 0.14369028415436158, "grad_norm": 756.1212158203125, "learning_rate": 9.95045307193159e-06, "loss": 114.4585, "step": 17370 }, { "epoch": 0.1437730074037308, "grad_norm": 987.248779296875, "learning_rate": 9.95025481719202e-06, "loss": 140.8504, "step": 17380 }, { "epoch": 0.14385573065310006, "grad_norm": 1126.249267578125, "learning_rate": 9.950056168583395e-06, "loss": 225.9696, "step": 17390 }, { "epoch": 0.1439384539024693, "grad_norm": 706.3463745117188, "learning_rate": 9.949857126121519e-06, "loss": 113.696, "step": 17400 }, { "epoch": 0.14402117715183851, "grad_norm": 892.3402099609375, "learning_rate": 9.949657689822226e-06, "loss": 162.9231, "step": 17410 }, { "epoch": 0.14410390040120777, "grad_norm": 856.6466674804688, "learning_rate": 9.949457859701388e-06, "loss": 99.4635, "step": 17420 }, { "epoch": 0.144186623650577, "grad_norm": 775.4996948242188, "learning_rate": 9.949257635774903e-06, "loss": 152.7363, "step": 17430 }, { "epoch": 0.14426934689994622, "grad_norm": 842.1768798828125, "learning_rate": 9.9490570180587e-06, "loss": 85.8346, "step": 17440 }, { "epoch": 0.14435207014931548, "grad_norm": 1798.95849609375, "learning_rate": 9.948856006568746e-06, "loss": 197.5757, "step": 17450 }, { "epoch": 0.1444347933986847, "grad_norm": 1381.5155029296875, "learning_rate": 9.94865460132103e-06, "loss": 150.2531, "step": 17460 }, { "epoch": 0.14451751664805393, "grad_norm": 997.7630004882812, "learning_rate": 9.948452802331578e-06, "loss": 133.1603, "step": 17470 }, { "epoch": 0.14460023989742318, "grad_norm": 1275.1690673828125, "learning_rate": 9.948250609616449e-06, "loss": 168.5733, "step": 17480 }, { "epoch": 0.1446829631467924, "grad_norm": 1112.8721923828125, "learning_rate": 9.948048023191728e-06, "loss": 182.301, "step": 17490 }, { "epoch": 0.14476568639616164, "grad_norm": 950.4414672851562, "learning_rate": 9.947845043073533e-06, "loss": 149.5477, "step": 17500 }, { "epoch": 0.1448484096455309, "grad_norm": 1122.95751953125, "learning_rate": 9.947641669278016e-06, "loss": 123.1119, "step": 17510 }, { "epoch": 0.14493113289490012, "grad_norm": 1148.9334716796875, "learning_rate": 9.947437901821358e-06, "loss": 128.3063, "step": 17520 }, { "epoch": 0.14501385614426934, "grad_norm": 1392.179443359375, "learning_rate": 9.947233740719772e-06, "loss": 139.3278, "step": 17530 }, { "epoch": 0.14509657939363857, "grad_norm": 604.5231323242188, "learning_rate": 9.947029185989501e-06, "loss": 163.2896, "step": 17540 }, { "epoch": 0.14517930264300782, "grad_norm": 1102.948486328125, "learning_rate": 9.946824237646823e-06, "loss": 153.8839, "step": 17550 }, { "epoch": 0.14526202589237705, "grad_norm": 2167.79638671875, "learning_rate": 9.946618895708043e-06, "loss": 172.0367, "step": 17560 }, { "epoch": 0.14534474914174628, "grad_norm": 1476.7362060546875, "learning_rate": 9.946413160189498e-06, "loss": 138.3295, "step": 17570 }, { "epoch": 0.14542747239111553, "grad_norm": 882.1810913085938, "learning_rate": 9.946207031107562e-06, "loss": 186.2194, "step": 17580 }, { "epoch": 0.14551019564048476, "grad_norm": 2111.673095703125, "learning_rate": 9.94600050847863e-06, "loss": 170.1872, "step": 17590 }, { "epoch": 0.14559291888985398, "grad_norm": 1099.032958984375, "learning_rate": 9.945793592319137e-06, "loss": 128.6498, "step": 17600 }, { "epoch": 0.14567564213922324, "grad_norm": 1059.4005126953125, "learning_rate": 9.945586282645545e-06, "loss": 134.5357, "step": 17610 }, { "epoch": 0.14575836538859246, "grad_norm": 1566.564208984375, "learning_rate": 9.945378579474351e-06, "loss": 164.359, "step": 17620 }, { "epoch": 0.1458410886379617, "grad_norm": 782.61279296875, "learning_rate": 9.945170482822079e-06, "loss": 106.899, "step": 17630 }, { "epoch": 0.14592381188733095, "grad_norm": 1026.7816162109375, "learning_rate": 9.944961992705288e-06, "loss": 142.0462, "step": 17640 }, { "epoch": 0.14600653513670017, "grad_norm": 817.039306640625, "learning_rate": 9.944753109140564e-06, "loss": 166.4367, "step": 17650 }, { "epoch": 0.1460892583860694, "grad_norm": 856.3842163085938, "learning_rate": 9.94454383214453e-06, "loss": 131.3289, "step": 17660 }, { "epoch": 0.14617198163543865, "grad_norm": 1656.999755859375, "learning_rate": 9.944334161733835e-06, "loss": 129.1978, "step": 17670 }, { "epoch": 0.14625470488480788, "grad_norm": 1338.8382568359375, "learning_rate": 9.944124097925161e-06, "loss": 184.4288, "step": 17680 }, { "epoch": 0.1463374281341771, "grad_norm": 1011.0686645507812, "learning_rate": 9.943913640735224e-06, "loss": 127.4451, "step": 17690 }, { "epoch": 0.14642015138354633, "grad_norm": 923.1884765625, "learning_rate": 9.94370279018077e-06, "loss": 120.5529, "step": 17700 }, { "epoch": 0.1465028746329156, "grad_norm": 1001.093505859375, "learning_rate": 9.94349154627857e-06, "loss": 129.3988, "step": 17710 }, { "epoch": 0.1465855978822848, "grad_norm": 1354.6356201171875, "learning_rate": 9.943279909045438e-06, "loss": 122.9835, "step": 17720 }, { "epoch": 0.14666832113165404, "grad_norm": 1260.7392578125, "learning_rate": 9.94306787849821e-06, "loss": 101.6319, "step": 17730 }, { "epoch": 0.1467510443810233, "grad_norm": 1424.63330078125, "learning_rate": 9.942855454653755e-06, "loss": 179.1118, "step": 17740 }, { "epoch": 0.14683376763039252, "grad_norm": 1053.8809814453125, "learning_rate": 9.942642637528977e-06, "loss": 167.5939, "step": 17750 }, { "epoch": 0.14691649087976175, "grad_norm": 936.3515014648438, "learning_rate": 9.942429427140807e-06, "loss": 154.7948, "step": 17760 }, { "epoch": 0.146999214129131, "grad_norm": 876.3916015625, "learning_rate": 9.942215823506211e-06, "loss": 114.5722, "step": 17770 }, { "epoch": 0.14708193737850023, "grad_norm": 1172.0423583984375, "learning_rate": 9.942001826642184e-06, "loss": 142.9646, "step": 17780 }, { "epoch": 0.14716466062786945, "grad_norm": 1635.97802734375, "learning_rate": 9.941787436565751e-06, "loss": 150.69, "step": 17790 }, { "epoch": 0.1472473838772387, "grad_norm": 728.3792724609375, "learning_rate": 9.941572653293974e-06, "loss": 97.5937, "step": 17800 }, { "epoch": 0.14733010712660793, "grad_norm": 935.0343627929688, "learning_rate": 9.941357476843938e-06, "loss": 135.0443, "step": 17810 }, { "epoch": 0.14741283037597716, "grad_norm": 583.3887329101562, "learning_rate": 9.941141907232766e-06, "loss": 134.4311, "step": 17820 }, { "epoch": 0.14749555362534642, "grad_norm": 1191.19677734375, "learning_rate": 9.940925944477608e-06, "loss": 129.727, "step": 17830 }, { "epoch": 0.14757827687471564, "grad_norm": 1111.1417236328125, "learning_rate": 9.940709588595649e-06, "loss": 171.4274, "step": 17840 }, { "epoch": 0.14766100012408487, "grad_norm": 2006.4134521484375, "learning_rate": 9.940492839604103e-06, "loss": 152.9817, "step": 17850 }, { "epoch": 0.14774372337345412, "grad_norm": 1163.596923828125, "learning_rate": 9.940275697520216e-06, "loss": 169.9584, "step": 17860 }, { "epoch": 0.14782644662282335, "grad_norm": 1189.015869140625, "learning_rate": 9.940058162361264e-06, "loss": 152.1794, "step": 17870 }, { "epoch": 0.14790916987219258, "grad_norm": 998.8855590820312, "learning_rate": 9.939840234144556e-06, "loss": 129.5204, "step": 17880 }, { "epoch": 0.1479918931215618, "grad_norm": 442.9149475097656, "learning_rate": 9.939621912887431e-06, "loss": 106.7805, "step": 17890 }, { "epoch": 0.14807461637093106, "grad_norm": 830.00927734375, "learning_rate": 9.93940319860726e-06, "loss": 139.6457, "step": 17900 }, { "epoch": 0.14815733962030028, "grad_norm": 1069.5220947265625, "learning_rate": 9.939184091321445e-06, "loss": 129.1493, "step": 17910 }, { "epoch": 0.1482400628696695, "grad_norm": 1180.868896484375, "learning_rate": 9.938964591047421e-06, "loss": 108.2578, "step": 17920 }, { "epoch": 0.14832278611903876, "grad_norm": 1095.6793212890625, "learning_rate": 9.938744697802651e-06, "loss": 145.4649, "step": 17930 }, { "epoch": 0.148405509368408, "grad_norm": 1292.62744140625, "learning_rate": 9.938524411604631e-06, "loss": 145.161, "step": 17940 }, { "epoch": 0.14848823261777722, "grad_norm": 1319.2213134765625, "learning_rate": 9.938303732470888e-06, "loss": 129.5037, "step": 17950 }, { "epoch": 0.14857095586714647, "grad_norm": 697.8318481445312, "learning_rate": 9.938082660418981e-06, "loss": 103.5571, "step": 17960 }, { "epoch": 0.1486536791165157, "grad_norm": 784.6300659179688, "learning_rate": 9.937861195466498e-06, "loss": 133.7046, "step": 17970 }, { "epoch": 0.14873640236588492, "grad_norm": 966.1806030273438, "learning_rate": 9.937639337631064e-06, "loss": 170.2544, "step": 17980 }, { "epoch": 0.14881912561525418, "grad_norm": 862.203857421875, "learning_rate": 9.937417086930328e-06, "loss": 129.5846, "step": 17990 }, { "epoch": 0.1489018488646234, "grad_norm": 3391.59716796875, "learning_rate": 9.937194443381972e-06, "loss": 195.0929, "step": 18000 }, { "epoch": 0.14898457211399263, "grad_norm": 737.4010009765625, "learning_rate": 9.936971407003714e-06, "loss": 110.9804, "step": 18010 }, { "epoch": 0.14906729536336188, "grad_norm": 1527.3822021484375, "learning_rate": 9.936747977813299e-06, "loss": 124.5241, "step": 18020 }, { "epoch": 0.1491500186127311, "grad_norm": 1383.10986328125, "learning_rate": 9.936524155828503e-06, "loss": 138.0007, "step": 18030 }, { "epoch": 0.14923274186210034, "grad_norm": 850.4631958007812, "learning_rate": 9.936299941067137e-06, "loss": 131.9197, "step": 18040 }, { "epoch": 0.1493154651114696, "grad_norm": 736.7586059570312, "learning_rate": 9.93607533354704e-06, "loss": 116.8003, "step": 18050 }, { "epoch": 0.14939818836083882, "grad_norm": 3558.953857421875, "learning_rate": 9.935850333286081e-06, "loss": 236.4352, "step": 18060 }, { "epoch": 0.14948091161020804, "grad_norm": 2170.6923828125, "learning_rate": 9.935624940302165e-06, "loss": 162.2385, "step": 18070 }, { "epoch": 0.14956363485957727, "grad_norm": 890.1776123046875, "learning_rate": 9.93539915461322e-06, "loss": 152.4946, "step": 18080 }, { "epoch": 0.14964635810894653, "grad_norm": 1055.08447265625, "learning_rate": 9.935172976237218e-06, "loss": 179.4581, "step": 18090 }, { "epoch": 0.14972908135831575, "grad_norm": 1069.735595703125, "learning_rate": 9.934946405192152e-06, "loss": 109.1896, "step": 18100 }, { "epoch": 0.14981180460768498, "grad_norm": 936.50048828125, "learning_rate": 9.934719441496048e-06, "loss": 170.2172, "step": 18110 }, { "epoch": 0.14989452785705423, "grad_norm": 1424.594970703125, "learning_rate": 9.934492085166965e-06, "loss": 120.5943, "step": 18120 }, { "epoch": 0.14997725110642346, "grad_norm": 1259.637939453125, "learning_rate": 9.934264336222992e-06, "loss": 141.8418, "step": 18130 }, { "epoch": 0.15005997435579269, "grad_norm": 1107.58447265625, "learning_rate": 9.934036194682253e-06, "loss": 132.8073, "step": 18140 }, { "epoch": 0.15014269760516194, "grad_norm": 1031.5169677734375, "learning_rate": 9.933807660562898e-06, "loss": 122.9906, "step": 18150 }, { "epoch": 0.15022542085453117, "grad_norm": 683.3692626953125, "learning_rate": 9.933578733883109e-06, "loss": 175.373, "step": 18160 }, { "epoch": 0.1503081441039004, "grad_norm": 841.3174438476562, "learning_rate": 9.933349414661103e-06, "loss": 143.8702, "step": 18170 }, { "epoch": 0.15039086735326965, "grad_norm": 1086.541015625, "learning_rate": 9.933119702915125e-06, "loss": 149.0898, "step": 18180 }, { "epoch": 0.15047359060263887, "grad_norm": 1380.8690185546875, "learning_rate": 9.932889598663452e-06, "loss": 142.0298, "step": 18190 }, { "epoch": 0.1505563138520081, "grad_norm": 800.4336547851562, "learning_rate": 9.932659101924393e-06, "loss": 169.3204, "step": 18200 }, { "epoch": 0.15063903710137735, "grad_norm": 883.3157348632812, "learning_rate": 9.932428212716287e-06, "loss": 183.8594, "step": 18210 }, { "epoch": 0.15072176035074658, "grad_norm": 922.6904907226562, "learning_rate": 9.932196931057505e-06, "loss": 157.8369, "step": 18220 }, { "epoch": 0.1508044836001158, "grad_norm": 1918.9375, "learning_rate": 9.931965256966449e-06, "loss": 143.9471, "step": 18230 }, { "epoch": 0.15088720684948506, "grad_norm": 1153.788818359375, "learning_rate": 9.931733190461552e-06, "loss": 167.4599, "step": 18240 }, { "epoch": 0.1509699300988543, "grad_norm": 1510.7779541015625, "learning_rate": 9.931500731561279e-06, "loss": 123.1982, "step": 18250 }, { "epoch": 0.15105265334822351, "grad_norm": 939.08447265625, "learning_rate": 9.931267880284124e-06, "loss": 128.6788, "step": 18260 }, { "epoch": 0.15113537659759274, "grad_norm": 521.10693359375, "learning_rate": 9.931034636648616e-06, "loss": 110.3548, "step": 18270 }, { "epoch": 0.151218099846962, "grad_norm": 1793.3514404296875, "learning_rate": 9.930801000673314e-06, "loss": 226.4601, "step": 18280 }, { "epoch": 0.15130082309633122, "grad_norm": 1056.244384765625, "learning_rate": 9.930566972376803e-06, "loss": 137.7991, "step": 18290 }, { "epoch": 0.15138354634570045, "grad_norm": 1053.8623046875, "learning_rate": 9.930332551777709e-06, "loss": 126.393, "step": 18300 }, { "epoch": 0.1514662695950697, "grad_norm": 466.3129577636719, "learning_rate": 9.930097738894679e-06, "loss": 142.9212, "step": 18310 }, { "epoch": 0.15154899284443893, "grad_norm": 1002.7549438476562, "learning_rate": 9.929862533746398e-06, "loss": 142.7721, "step": 18320 }, { "epoch": 0.15163171609380816, "grad_norm": 758.2431030273438, "learning_rate": 9.92962693635158e-06, "loss": 138.2474, "step": 18330 }, { "epoch": 0.1517144393431774, "grad_norm": 640.2601318359375, "learning_rate": 9.929390946728972e-06, "loss": 127.6863, "step": 18340 }, { "epoch": 0.15179716259254664, "grad_norm": 974.4703979492188, "learning_rate": 9.929154564897347e-06, "loss": 127.8559, "step": 18350 }, { "epoch": 0.15187988584191586, "grad_norm": 989.0883178710938, "learning_rate": 9.928917790875519e-06, "loss": 146.4885, "step": 18360 }, { "epoch": 0.15196260909128512, "grad_norm": 926.8894653320312, "learning_rate": 9.92868062468232e-06, "loss": 140.2718, "step": 18370 }, { "epoch": 0.15204533234065434, "grad_norm": 1093.3875732421875, "learning_rate": 9.928443066336624e-06, "loss": 156.4275, "step": 18380 }, { "epoch": 0.15212805559002357, "grad_norm": 1011.3756713867188, "learning_rate": 9.92820511585733e-06, "loss": 110.9518, "step": 18390 }, { "epoch": 0.15221077883939282, "grad_norm": 963.0042114257812, "learning_rate": 9.927966773263375e-06, "loss": 126.9806, "step": 18400 }, { "epoch": 0.15229350208876205, "grad_norm": 1338.420654296875, "learning_rate": 9.92772803857372e-06, "loss": 151.1979, "step": 18410 }, { "epoch": 0.15237622533813128, "grad_norm": 2231.09326171875, "learning_rate": 9.927488911807359e-06, "loss": 126.2641, "step": 18420 }, { "epoch": 0.1524589485875005, "grad_norm": 740.568603515625, "learning_rate": 9.927249392983319e-06, "loss": 161.8315, "step": 18430 }, { "epoch": 0.15254167183686976, "grad_norm": 1194.8526611328125, "learning_rate": 9.927009482120658e-06, "loss": 147.5258, "step": 18440 }, { "epoch": 0.15262439508623898, "grad_norm": 575.281005859375, "learning_rate": 9.926769179238467e-06, "loss": 123.2295, "step": 18450 }, { "epoch": 0.1527071183356082, "grad_norm": 1439.9266357421875, "learning_rate": 9.926528484355859e-06, "loss": 131.7167, "step": 18460 }, { "epoch": 0.15278984158497746, "grad_norm": 1190.1434326171875, "learning_rate": 9.926287397491992e-06, "loss": 147.6172, "step": 18470 }, { "epoch": 0.1528725648343467, "grad_norm": 1017.2939453125, "learning_rate": 9.926045918666045e-06, "loss": 144.2414, "step": 18480 }, { "epoch": 0.15295528808371592, "grad_norm": 1303.34814453125, "learning_rate": 9.925804047897231e-06, "loss": 202.542, "step": 18490 }, { "epoch": 0.15303801133308517, "grad_norm": 1302.6015625, "learning_rate": 9.925561785204797e-06, "loss": 150.7994, "step": 18500 }, { "epoch": 0.1531207345824544, "grad_norm": 1482.454345703125, "learning_rate": 9.925319130608015e-06, "loss": 160.8186, "step": 18510 }, { "epoch": 0.15320345783182363, "grad_norm": 1089.9215087890625, "learning_rate": 9.925076084126194e-06, "loss": 140.3311, "step": 18520 }, { "epoch": 0.15328618108119288, "grad_norm": 2586.4873046875, "learning_rate": 9.924832645778674e-06, "loss": 105.1053, "step": 18530 }, { "epoch": 0.1533689043305621, "grad_norm": 986.44775390625, "learning_rate": 9.924588815584822e-06, "loss": 146.6998, "step": 18540 }, { "epoch": 0.15345162757993133, "grad_norm": 1076.886474609375, "learning_rate": 9.924344593564038e-06, "loss": 167.1004, "step": 18550 }, { "epoch": 0.1535343508293006, "grad_norm": 576.464599609375, "learning_rate": 9.924099979735754e-06, "loss": 109.8678, "step": 18560 }, { "epoch": 0.1536170740786698, "grad_norm": 722.518310546875, "learning_rate": 9.923854974119434e-06, "loss": 125.0473, "step": 18570 }, { "epoch": 0.15369979732803904, "grad_norm": 1820.7373046875, "learning_rate": 9.92360957673457e-06, "loss": 141.7229, "step": 18580 }, { "epoch": 0.1537825205774083, "grad_norm": 1114.6781005859375, "learning_rate": 9.923363787600688e-06, "loss": 141.2934, "step": 18590 }, { "epoch": 0.15386524382677752, "grad_norm": 1381.0604248046875, "learning_rate": 9.923117606737347e-06, "loss": 116.5776, "step": 18600 }, { "epoch": 0.15394796707614675, "grad_norm": 1086.9346923828125, "learning_rate": 9.92287103416413e-06, "loss": 191.1018, "step": 18610 }, { "epoch": 0.15403069032551597, "grad_norm": 1572.17529296875, "learning_rate": 9.922624069900658e-06, "loss": 155.7499, "step": 18620 }, { "epoch": 0.15411341357488523, "grad_norm": 1132.931884765625, "learning_rate": 9.922376713966581e-06, "loss": 152.9908, "step": 18630 }, { "epoch": 0.15419613682425445, "grad_norm": 585.323486328125, "learning_rate": 9.92212896638158e-06, "loss": 152.2068, "step": 18640 }, { "epoch": 0.15427886007362368, "grad_norm": 595.2325439453125, "learning_rate": 9.921880827165367e-06, "loss": 118.7037, "step": 18650 }, { "epoch": 0.15436158332299293, "grad_norm": 4941.7626953125, "learning_rate": 9.921632296337683e-06, "loss": 153.8302, "step": 18660 }, { "epoch": 0.15444430657236216, "grad_norm": 1101.675048828125, "learning_rate": 9.921383373918305e-06, "loss": 180.7743, "step": 18670 }, { "epoch": 0.1545270298217314, "grad_norm": 1755.6380615234375, "learning_rate": 9.92113405992704e-06, "loss": 180.545, "step": 18680 }, { "epoch": 0.15460975307110064, "grad_norm": 925.7059326171875, "learning_rate": 9.92088435438372e-06, "loss": 134.731, "step": 18690 }, { "epoch": 0.15469247632046987, "grad_norm": 1003.4811401367188, "learning_rate": 9.920634257308217e-06, "loss": 123.1074, "step": 18700 }, { "epoch": 0.1547751995698391, "grad_norm": 853.6227416992188, "learning_rate": 9.920383768720429e-06, "loss": 150.079, "step": 18710 }, { "epoch": 0.15485792281920835, "grad_norm": 1113.7686767578125, "learning_rate": 9.920132888640286e-06, "loss": 155.0464, "step": 18720 }, { "epoch": 0.15494064606857758, "grad_norm": 1343.3956298828125, "learning_rate": 9.91988161708775e-06, "loss": 158.199, "step": 18730 }, { "epoch": 0.1550233693179468, "grad_norm": 704.8764038085938, "learning_rate": 9.919629954082813e-06, "loss": 153.2144, "step": 18740 }, { "epoch": 0.15510609256731606, "grad_norm": 550.6301879882812, "learning_rate": 9.919377899645497e-06, "loss": 141.5231, "step": 18750 }, { "epoch": 0.15518881581668528, "grad_norm": 658.1661376953125, "learning_rate": 9.91912545379586e-06, "loss": 138.1113, "step": 18760 }, { "epoch": 0.1552715390660545, "grad_norm": 1573.263916015625, "learning_rate": 9.918872616553986e-06, "loss": 129.6509, "step": 18770 }, { "epoch": 0.15535426231542376, "grad_norm": 1778.827392578125, "learning_rate": 9.918619387939991e-06, "loss": 155.8357, "step": 18780 }, { "epoch": 0.155436985564793, "grad_norm": 3056.092041015625, "learning_rate": 9.918365767974025e-06, "loss": 187.3279, "step": 18790 }, { "epoch": 0.15551970881416222, "grad_norm": 905.0897827148438, "learning_rate": 9.91811175667627e-06, "loss": 198.9183, "step": 18800 }, { "epoch": 0.15560243206353144, "grad_norm": 854.0653076171875, "learning_rate": 9.91785735406693e-06, "loss": 111.8965, "step": 18810 }, { "epoch": 0.1556851553129007, "grad_norm": 1693.5703125, "learning_rate": 9.917602560166253e-06, "loss": 138.9856, "step": 18820 }, { "epoch": 0.15576787856226992, "grad_norm": 1688.492431640625, "learning_rate": 9.917347374994507e-06, "loss": 118.313, "step": 18830 }, { "epoch": 0.15585060181163915, "grad_norm": 1094.8780517578125, "learning_rate": 9.917091798571998e-06, "loss": 122.0171, "step": 18840 }, { "epoch": 0.1559333250610084, "grad_norm": 1383.755859375, "learning_rate": 9.916835830919062e-06, "loss": 149.7231, "step": 18850 }, { "epoch": 0.15601604831037763, "grad_norm": 4172.68603515625, "learning_rate": 9.916579472056064e-06, "loss": 164.1563, "step": 18860 }, { "epoch": 0.15609877155974686, "grad_norm": 1112.282958984375, "learning_rate": 9.916322722003402e-06, "loss": 140.6031, "step": 18870 }, { "epoch": 0.1561814948091161, "grad_norm": 1334.9544677734375, "learning_rate": 9.916065580781504e-06, "loss": 125.8786, "step": 18880 }, { "epoch": 0.15626421805848534, "grad_norm": 759.095703125, "learning_rate": 9.91580804841083e-06, "loss": 123.02, "step": 18890 }, { "epoch": 0.15634694130785456, "grad_norm": 923.3683471679688, "learning_rate": 9.915550124911866e-06, "loss": 111.5178, "step": 18900 }, { "epoch": 0.15642966455722382, "grad_norm": 1883.232177734375, "learning_rate": 9.915291810305141e-06, "loss": 153.0945, "step": 18910 }, { "epoch": 0.15651238780659305, "grad_norm": 1571.4327392578125, "learning_rate": 9.915033104611204e-06, "loss": 152.6783, "step": 18920 }, { "epoch": 0.15659511105596227, "grad_norm": 842.9326171875, "learning_rate": 9.914774007850641e-06, "loss": 154.6972, "step": 18930 }, { "epoch": 0.15667783430533153, "grad_norm": 1248.7547607421875, "learning_rate": 9.914514520044065e-06, "loss": 169.4783, "step": 18940 }, { "epoch": 0.15676055755470075, "grad_norm": 976.1325073242188, "learning_rate": 9.914254641212124e-06, "loss": 114.911, "step": 18950 }, { "epoch": 0.15684328080406998, "grad_norm": 893.714111328125, "learning_rate": 9.913994371375494e-06, "loss": 81.8798, "step": 18960 }, { "epoch": 0.1569260040534392, "grad_norm": 1223.4085693359375, "learning_rate": 9.913733710554886e-06, "loss": 138.9431, "step": 18970 }, { "epoch": 0.15700872730280846, "grad_norm": 2076.80712890625, "learning_rate": 9.913472658771034e-06, "loss": 113.3516, "step": 18980 }, { "epoch": 0.15709145055217769, "grad_norm": 1146.1357421875, "learning_rate": 9.913211216044715e-06, "loss": 162.9254, "step": 18990 }, { "epoch": 0.1571741738015469, "grad_norm": 1230.30224609375, "learning_rate": 9.912949382396728e-06, "loss": 197.952, "step": 19000 }, { "epoch": 0.15725689705091617, "grad_norm": 1353.6494140625, "learning_rate": 9.912687157847905e-06, "loss": 137.2512, "step": 19010 }, { "epoch": 0.1573396203002854, "grad_norm": 1508.077392578125, "learning_rate": 9.91242454241911e-06, "loss": 133.2853, "step": 19020 }, { "epoch": 0.15742234354965462, "grad_norm": 935.4700927734375, "learning_rate": 9.912161536131242e-06, "loss": 126.4163, "step": 19030 }, { "epoch": 0.15750506679902387, "grad_norm": 920.0881958007812, "learning_rate": 9.911898139005222e-06, "loss": 106.0859, "step": 19040 }, { "epoch": 0.1575877900483931, "grad_norm": 1098.602783203125, "learning_rate": 9.91163435106201e-06, "loss": 113.0203, "step": 19050 }, { "epoch": 0.15767051329776233, "grad_norm": 529.7808227539062, "learning_rate": 9.911370172322595e-06, "loss": 100.4977, "step": 19060 }, { "epoch": 0.15775323654713158, "grad_norm": 1122.334228515625, "learning_rate": 9.911105602807996e-06, "loss": 147.1685, "step": 19070 }, { "epoch": 0.1578359597965008, "grad_norm": 1302.458740234375, "learning_rate": 9.910840642539261e-06, "loss": 138.237, "step": 19080 }, { "epoch": 0.15791868304587003, "grad_norm": 1324.593505859375, "learning_rate": 9.910575291537476e-06, "loss": 182.2281, "step": 19090 }, { "epoch": 0.1580014062952393, "grad_norm": 746.2387084960938, "learning_rate": 9.91030954982375e-06, "loss": 111.4794, "step": 19100 }, { "epoch": 0.15808412954460851, "grad_norm": 1335.113525390625, "learning_rate": 9.910043417419228e-06, "loss": 148.9087, "step": 19110 }, { "epoch": 0.15816685279397774, "grad_norm": 688.1320190429688, "learning_rate": 9.909776894345086e-06, "loss": 141.3004, "step": 19120 }, { "epoch": 0.158249576043347, "grad_norm": 1126.445068359375, "learning_rate": 9.909509980622532e-06, "loss": 112.016, "step": 19130 }, { "epoch": 0.15833229929271622, "grad_norm": 600.5185546875, "learning_rate": 9.909242676272797e-06, "loss": 114.159, "step": 19140 }, { "epoch": 0.15841502254208545, "grad_norm": 1174.3468017578125, "learning_rate": 9.908974981317155e-06, "loss": 171.2533, "step": 19150 }, { "epoch": 0.15849774579145468, "grad_norm": 795.6885986328125, "learning_rate": 9.9087068957769e-06, "loss": 136.8247, "step": 19160 }, { "epoch": 0.15858046904082393, "grad_norm": 1241.0509033203125, "learning_rate": 9.908438419673367e-06, "loss": 137.2768, "step": 19170 }, { "epoch": 0.15866319229019316, "grad_norm": 761.6776123046875, "learning_rate": 9.908169553027916e-06, "loss": 165.4491, "step": 19180 }, { "epoch": 0.15874591553956238, "grad_norm": 1572.6136474609375, "learning_rate": 9.90790029586194e-06, "loss": 124.7587, "step": 19190 }, { "epoch": 0.15882863878893164, "grad_norm": 732.7517700195312, "learning_rate": 9.907630648196857e-06, "loss": 142.462, "step": 19200 }, { "epoch": 0.15891136203830086, "grad_norm": 957.8698120117188, "learning_rate": 9.907360610054132e-06, "loss": 145.4445, "step": 19210 }, { "epoch": 0.1589940852876701, "grad_norm": 1933.4423828125, "learning_rate": 9.907090181455241e-06, "loss": 126.4228, "step": 19220 }, { "epoch": 0.15907680853703934, "grad_norm": 1341.31591796875, "learning_rate": 9.906819362421707e-06, "loss": 127.2506, "step": 19230 }, { "epoch": 0.15915953178640857, "grad_norm": 1401.2039794921875, "learning_rate": 9.906548152975076e-06, "loss": 142.5762, "step": 19240 }, { "epoch": 0.1592422550357778, "grad_norm": 1090.92578125, "learning_rate": 9.906276553136924e-06, "loss": 133.9682, "step": 19250 }, { "epoch": 0.15932497828514705, "grad_norm": 627.4381713867188, "learning_rate": 9.906004562928865e-06, "loss": 123.456, "step": 19260 }, { "epoch": 0.15940770153451628, "grad_norm": 1303.6290283203125, "learning_rate": 9.905732182372538e-06, "loss": 176.1459, "step": 19270 }, { "epoch": 0.1594904247838855, "grad_norm": 1230.2550048828125, "learning_rate": 9.905459411489617e-06, "loss": 150.8253, "step": 19280 }, { "epoch": 0.15957314803325476, "grad_norm": 557.597900390625, "learning_rate": 9.905186250301802e-06, "loss": 128.1924, "step": 19290 }, { "epoch": 0.15965587128262398, "grad_norm": 1013.2421875, "learning_rate": 9.904912698830828e-06, "loss": 148.6797, "step": 19300 }, { "epoch": 0.1597385945319932, "grad_norm": 1238.0384521484375, "learning_rate": 9.904638757098464e-06, "loss": 143.5567, "step": 19310 }, { "epoch": 0.15982131778136247, "grad_norm": 1117.36962890625, "learning_rate": 9.9043644251265e-06, "loss": 129.1499, "step": 19320 }, { "epoch": 0.1599040410307317, "grad_norm": 1082.633544921875, "learning_rate": 9.90408970293677e-06, "loss": 99.3771, "step": 19330 }, { "epoch": 0.15998676428010092, "grad_norm": 1239.34326171875, "learning_rate": 9.903814590551127e-06, "loss": 152.1191, "step": 19340 }, { "epoch": 0.16006948752947014, "grad_norm": 1026.2008056640625, "learning_rate": 9.903539087991462e-06, "loss": 138.5603, "step": 19350 }, { "epoch": 0.1601522107788394, "grad_norm": 716.8160400390625, "learning_rate": 9.903263195279698e-06, "loss": 121.6254, "step": 19360 }, { "epoch": 0.16023493402820863, "grad_norm": 1355.804931640625, "learning_rate": 9.902986912437784e-06, "loss": 121.697, "step": 19370 }, { "epoch": 0.16031765727757785, "grad_norm": 1001.9555053710938, "learning_rate": 9.902710239487702e-06, "loss": 123.8956, "step": 19380 }, { "epoch": 0.1604003805269471, "grad_norm": 929.7130737304688, "learning_rate": 9.902433176451466e-06, "loss": 108.8211, "step": 19390 }, { "epoch": 0.16048310377631633, "grad_norm": 1794.6314697265625, "learning_rate": 9.902155723351124e-06, "loss": 119.6667, "step": 19400 }, { "epoch": 0.16056582702568556, "grad_norm": 981.8839721679688, "learning_rate": 9.901877880208747e-06, "loss": 123.9001, "step": 19410 }, { "epoch": 0.1606485502750548, "grad_norm": 1454.4476318359375, "learning_rate": 9.901599647046443e-06, "loss": 131.0193, "step": 19420 }, { "epoch": 0.16073127352442404, "grad_norm": 1284.30224609375, "learning_rate": 9.901321023886351e-06, "loss": 169.1719, "step": 19430 }, { "epoch": 0.16081399677379327, "grad_norm": 1159.77783203125, "learning_rate": 9.901042010750641e-06, "loss": 100.9739, "step": 19440 }, { "epoch": 0.16089672002316252, "grad_norm": 1844.110107421875, "learning_rate": 9.900762607661509e-06, "loss": 153.9659, "step": 19450 }, { "epoch": 0.16097944327253175, "grad_norm": 701.9683227539062, "learning_rate": 9.900482814641188e-06, "loss": 109.9286, "step": 19460 }, { "epoch": 0.16106216652190097, "grad_norm": 1962.1533203125, "learning_rate": 9.90020263171194e-06, "loss": 158.6689, "step": 19470 }, { "epoch": 0.16114488977127023, "grad_norm": 1527.3931884765625, "learning_rate": 9.899922058896058e-06, "loss": 129.6219, "step": 19480 }, { "epoch": 0.16122761302063945, "grad_norm": 935.3746337890625, "learning_rate": 9.899641096215865e-06, "loss": 187.1026, "step": 19490 }, { "epoch": 0.16131033627000868, "grad_norm": 1236.936279296875, "learning_rate": 9.899359743693715e-06, "loss": 194.122, "step": 19500 }, { "epoch": 0.16139305951937793, "grad_norm": 2321.50439453125, "learning_rate": 9.899078001351996e-06, "loss": 164.5937, "step": 19510 }, { "epoch": 0.16147578276874716, "grad_norm": 1305.2666015625, "learning_rate": 9.898795869213125e-06, "loss": 149.4349, "step": 19520 }, { "epoch": 0.1615585060181164, "grad_norm": 1175.187744140625, "learning_rate": 9.898513347299549e-06, "loss": 142.7042, "step": 19530 }, { "epoch": 0.16164122926748561, "grad_norm": 1031.960693359375, "learning_rate": 9.898230435633747e-06, "loss": 151.7943, "step": 19540 }, { "epoch": 0.16172395251685487, "grad_norm": 1225.9884033203125, "learning_rate": 9.897947134238228e-06, "loss": 162.5945, "step": 19550 }, { "epoch": 0.1618066757662241, "grad_norm": 1006.8120727539062, "learning_rate": 9.897663443135534e-06, "loss": 127.9761, "step": 19560 }, { "epoch": 0.16188939901559332, "grad_norm": 2127.416748046875, "learning_rate": 9.897379362348239e-06, "loss": 171.9894, "step": 19570 }, { "epoch": 0.16197212226496258, "grad_norm": 745.5164794921875, "learning_rate": 9.897094891898942e-06, "loss": 150.5477, "step": 19580 }, { "epoch": 0.1620548455143318, "grad_norm": 2045.43896484375, "learning_rate": 9.89681003181028e-06, "loss": 143.4611, "step": 19590 }, { "epoch": 0.16213756876370103, "grad_norm": 1801.7623291015625, "learning_rate": 9.896524782104917e-06, "loss": 136.5524, "step": 19600 }, { "epoch": 0.16222029201307028, "grad_norm": 865.27001953125, "learning_rate": 9.89623914280555e-06, "loss": 135.0138, "step": 19610 }, { "epoch": 0.1623030152624395, "grad_norm": 1108.943359375, "learning_rate": 9.895953113934904e-06, "loss": 131.2855, "step": 19620 }, { "epoch": 0.16238573851180874, "grad_norm": 1284.2874755859375, "learning_rate": 9.895666695515739e-06, "loss": 158.5307, "step": 19630 }, { "epoch": 0.162468461761178, "grad_norm": 1160.2169189453125, "learning_rate": 9.895379887570842e-06, "loss": 146.816, "step": 19640 }, { "epoch": 0.16255118501054722, "grad_norm": 1387.5469970703125, "learning_rate": 9.895092690123036e-06, "loss": 130.8016, "step": 19650 }, { "epoch": 0.16263390825991644, "grad_norm": 1327.9295654296875, "learning_rate": 9.894805103195168e-06, "loss": 131.4063, "step": 19660 }, { "epoch": 0.1627166315092857, "grad_norm": 1514.6529541015625, "learning_rate": 9.894517126810122e-06, "loss": 209.5621, "step": 19670 }, { "epoch": 0.16279935475865492, "grad_norm": 2022.22021484375, "learning_rate": 9.894228760990811e-06, "loss": 152.1554, "step": 19680 }, { "epoch": 0.16288207800802415, "grad_norm": 1740.86767578125, "learning_rate": 9.893940005760181e-06, "loss": 154.0035, "step": 19690 }, { "epoch": 0.16296480125739338, "grad_norm": 1580.342529296875, "learning_rate": 9.893650861141204e-06, "loss": 157.6928, "step": 19700 }, { "epoch": 0.16304752450676263, "grad_norm": 1006.531494140625, "learning_rate": 9.893361327156887e-06, "loss": 127.0846, "step": 19710 }, { "epoch": 0.16313024775613186, "grad_norm": 1319.7847900390625, "learning_rate": 9.893071403830265e-06, "loss": 136.8425, "step": 19720 }, { "epoch": 0.16321297100550108, "grad_norm": 830.5723876953125, "learning_rate": 9.892781091184409e-06, "loss": 136.2878, "step": 19730 }, { "epoch": 0.16329569425487034, "grad_norm": 759.5004272460938, "learning_rate": 9.892490389242417e-06, "loss": 120.3061, "step": 19740 }, { "epoch": 0.16337841750423956, "grad_norm": 2213.310546875, "learning_rate": 9.892199298027416e-06, "loss": 143.1016, "step": 19750 }, { "epoch": 0.1634611407536088, "grad_norm": 1020.21337890625, "learning_rate": 9.891907817562572e-06, "loss": 116.1548, "step": 19760 }, { "epoch": 0.16354386400297805, "grad_norm": 810.1748657226562, "learning_rate": 9.891615947871072e-06, "loss": 141.0581, "step": 19770 }, { "epoch": 0.16362658725234727, "grad_norm": 810.9425048828125, "learning_rate": 9.89132368897614e-06, "loss": 130.3051, "step": 19780 }, { "epoch": 0.1637093105017165, "grad_norm": 1321.05908203125, "learning_rate": 9.891031040901031e-06, "loss": 154.2215, "step": 19790 }, { "epoch": 0.16379203375108575, "grad_norm": 1216.8099365234375, "learning_rate": 9.890738003669029e-06, "loss": 164.4314, "step": 19800 }, { "epoch": 0.16387475700045498, "grad_norm": 2961.08447265625, "learning_rate": 9.890444577303448e-06, "loss": 184.4128, "step": 19810 }, { "epoch": 0.1639574802498242, "grad_norm": 702.2813110351562, "learning_rate": 9.890150761827639e-06, "loss": 118.5094, "step": 19820 }, { "epoch": 0.16404020349919346, "grad_norm": 1224.574951171875, "learning_rate": 9.889856557264975e-06, "loss": 164.4189, "step": 19830 }, { "epoch": 0.1641229267485627, "grad_norm": 1510.7064208984375, "learning_rate": 9.889561963638866e-06, "loss": 168.8556, "step": 19840 }, { "epoch": 0.1642056499979319, "grad_norm": 1308.8349609375, "learning_rate": 9.889266980972752e-06, "loss": 157.53, "step": 19850 }, { "epoch": 0.16428837324730117, "grad_norm": 554.7841796875, "learning_rate": 9.888971609290103e-06, "loss": 123.5679, "step": 19860 }, { "epoch": 0.1643710964966704, "grad_norm": 1066.4405517578125, "learning_rate": 9.88867584861442e-06, "loss": 115.5183, "step": 19870 }, { "epoch": 0.16445381974603962, "grad_norm": 823.9727172851562, "learning_rate": 9.888379698969236e-06, "loss": 127.0505, "step": 19880 }, { "epoch": 0.16453654299540885, "grad_norm": 2073.93017578125, "learning_rate": 9.888083160378114e-06, "loss": 142.6533, "step": 19890 }, { "epoch": 0.1646192662447781, "grad_norm": 2434.051513671875, "learning_rate": 9.887786232864648e-06, "loss": 147.1622, "step": 19900 }, { "epoch": 0.16470198949414733, "grad_norm": 724.6605224609375, "learning_rate": 9.887488916452463e-06, "loss": 121.8898, "step": 19910 }, { "epoch": 0.16478471274351655, "grad_norm": 1281.83203125, "learning_rate": 9.887191211165217e-06, "loss": 151.6535, "step": 19920 }, { "epoch": 0.1648674359928858, "grad_norm": 1131.1641845703125, "learning_rate": 9.886893117026593e-06, "loss": 149.5577, "step": 19930 }, { "epoch": 0.16495015924225503, "grad_norm": 1077.4385986328125, "learning_rate": 9.886594634060314e-06, "loss": 203.1148, "step": 19940 }, { "epoch": 0.16503288249162426, "grad_norm": 1563.59228515625, "learning_rate": 9.886295762290125e-06, "loss": 156.8315, "step": 19950 }, { "epoch": 0.16511560574099352, "grad_norm": 728.146240234375, "learning_rate": 9.885996501739808e-06, "loss": 123.6347, "step": 19960 }, { "epoch": 0.16519832899036274, "grad_norm": 1174.09521484375, "learning_rate": 9.885696852433174e-06, "loss": 171.4022, "step": 19970 }, { "epoch": 0.16528105223973197, "grad_norm": 2437.55908203125, "learning_rate": 9.885396814394062e-06, "loss": 166.2973, "step": 19980 }, { "epoch": 0.16536377548910122, "grad_norm": 756.368896484375, "learning_rate": 9.885096387646346e-06, "loss": 102.5183, "step": 19990 }, { "epoch": 0.16544649873847045, "grad_norm": 1208.659423828125, "learning_rate": 9.88479557221393e-06, "loss": 146.3919, "step": 20000 }, { "epoch": 0.16552922198783968, "grad_norm": 793.578857421875, "learning_rate": 9.88449436812075e-06, "loss": 151.3374, "step": 20010 }, { "epoch": 0.16561194523720893, "grad_norm": 3023.392333984375, "learning_rate": 9.88419277539077e-06, "loss": 147.2389, "step": 20020 }, { "epoch": 0.16569466848657816, "grad_norm": 1020.14404296875, "learning_rate": 9.883890794047985e-06, "loss": 133.7473, "step": 20030 }, { "epoch": 0.16577739173594738, "grad_norm": 925.8684692382812, "learning_rate": 9.883588424116424e-06, "loss": 145.2095, "step": 20040 }, { "epoch": 0.16586011498531664, "grad_norm": 1657.950927734375, "learning_rate": 9.883285665620145e-06, "loss": 131.4692, "step": 20050 }, { "epoch": 0.16594283823468586, "grad_norm": 1638.5106201171875, "learning_rate": 9.882982518583238e-06, "loss": 120.6384, "step": 20060 }, { "epoch": 0.1660255614840551, "grad_norm": 994.8275146484375, "learning_rate": 9.882678983029819e-06, "loss": 191.7884, "step": 20070 }, { "epoch": 0.16610828473342432, "grad_norm": 1858.4609375, "learning_rate": 9.882375058984044e-06, "loss": 145.8128, "step": 20080 }, { "epoch": 0.16619100798279357, "grad_norm": 640.3125, "learning_rate": 9.882070746470092e-06, "loss": 113.2083, "step": 20090 }, { "epoch": 0.1662737312321628, "grad_norm": 1469.511474609375, "learning_rate": 9.881766045512176e-06, "loss": 189.0106, "step": 20100 }, { "epoch": 0.16635645448153202, "grad_norm": 740.4965209960938, "learning_rate": 9.88146095613454e-06, "loss": 130.8047, "step": 20110 }, { "epoch": 0.16643917773090128, "grad_norm": 683.9896240234375, "learning_rate": 9.881155478361459e-06, "loss": 175.5372, "step": 20120 }, { "epoch": 0.1665219009802705, "grad_norm": 938.4227905273438, "learning_rate": 9.880849612217238e-06, "loss": 108.5235, "step": 20130 }, { "epoch": 0.16660462422963973, "grad_norm": 841.4732666015625, "learning_rate": 9.880543357726214e-06, "loss": 142.9208, "step": 20140 }, { "epoch": 0.16668734747900898, "grad_norm": 764.7952880859375, "learning_rate": 9.880236714912754e-06, "loss": 136.3933, "step": 20150 }, { "epoch": 0.1667700707283782, "grad_norm": 1391.5673828125, "learning_rate": 9.879929683801254e-06, "loss": 138.4007, "step": 20160 }, { "epoch": 0.16685279397774744, "grad_norm": 1550.94873046875, "learning_rate": 9.879622264416147e-06, "loss": 147.9795, "step": 20170 }, { "epoch": 0.1669355172271167, "grad_norm": 879.2140502929688, "learning_rate": 9.87931445678189e-06, "loss": 155.5872, "step": 20180 }, { "epoch": 0.16701824047648592, "grad_norm": 584.1538696289062, "learning_rate": 9.879006260922975e-06, "loss": 98.7441, "step": 20190 }, { "epoch": 0.16710096372585515, "grad_norm": 719.3741455078125, "learning_rate": 9.878697676863922e-06, "loss": 126.3837, "step": 20200 }, { "epoch": 0.1671836869752244, "grad_norm": 1768.941162109375, "learning_rate": 9.878388704629286e-06, "loss": 181.439, "step": 20210 }, { "epoch": 0.16726641022459363, "grad_norm": 1062.1995849609375, "learning_rate": 9.87807934424365e-06, "loss": 155.1683, "step": 20220 }, { "epoch": 0.16734913347396285, "grad_norm": 1074.44482421875, "learning_rate": 9.877769595731629e-06, "loss": 149.4426, "step": 20230 }, { "epoch": 0.16743185672333208, "grad_norm": 766.1312866210938, "learning_rate": 9.877459459117864e-06, "loss": 133.7859, "step": 20240 }, { "epoch": 0.16751457997270133, "grad_norm": 1180.6207275390625, "learning_rate": 9.877148934427037e-06, "loss": 185.559, "step": 20250 }, { "epoch": 0.16759730322207056, "grad_norm": 936.2619018554688, "learning_rate": 9.87683802168385e-06, "loss": 153.8027, "step": 20260 }, { "epoch": 0.16768002647143979, "grad_norm": 1289.442626953125, "learning_rate": 9.876526720913045e-06, "loss": 146.8949, "step": 20270 }, { "epoch": 0.16776274972080904, "grad_norm": 1198.4373779296875, "learning_rate": 9.87621503213939e-06, "loss": 101.3234, "step": 20280 }, { "epoch": 0.16784547297017827, "grad_norm": 1139.7901611328125, "learning_rate": 9.875902955387682e-06, "loss": 105.7266, "step": 20290 }, { "epoch": 0.1679281962195475, "grad_norm": 885.1135864257812, "learning_rate": 9.875590490682754e-06, "loss": 139.6578, "step": 20300 }, { "epoch": 0.16801091946891675, "grad_norm": 1269.400146484375, "learning_rate": 9.875277638049466e-06, "loss": 148.561, "step": 20310 }, { "epoch": 0.16809364271828597, "grad_norm": 1671.2281494140625, "learning_rate": 9.87496439751271e-06, "loss": 157.8267, "step": 20320 }, { "epoch": 0.1681763659676552, "grad_norm": 913.9154663085938, "learning_rate": 9.87465076909741e-06, "loss": 141.2827, "step": 20330 }, { "epoch": 0.16825908921702445, "grad_norm": 1082.798583984375, "learning_rate": 9.874336752828523e-06, "loss": 150.5321, "step": 20340 }, { "epoch": 0.16834181246639368, "grad_norm": 967.8886108398438, "learning_rate": 9.87402234873103e-06, "loss": 142.4086, "step": 20350 }, { "epoch": 0.1684245357157629, "grad_norm": 1056.4305419921875, "learning_rate": 9.873707556829945e-06, "loss": 97.3924, "step": 20360 }, { "epoch": 0.16850725896513216, "grad_norm": 837.5071411132812, "learning_rate": 9.873392377150318e-06, "loss": 123.7359, "step": 20370 }, { "epoch": 0.1685899822145014, "grad_norm": 874.9818115234375, "learning_rate": 9.873076809717226e-06, "loss": 109.1823, "step": 20380 }, { "epoch": 0.16867270546387061, "grad_norm": 727.0296020507812, "learning_rate": 9.872760854555776e-06, "loss": 143.6749, "step": 20390 }, { "epoch": 0.16875542871323987, "grad_norm": 1192.26904296875, "learning_rate": 9.872444511691108e-06, "loss": 128.2298, "step": 20400 }, { "epoch": 0.1688381519626091, "grad_norm": 765.3291015625, "learning_rate": 9.872127781148392e-06, "loss": 140.1519, "step": 20410 }, { "epoch": 0.16892087521197832, "grad_norm": 1048.41064453125, "learning_rate": 9.871810662952828e-06, "loss": 153.647, "step": 20420 }, { "epoch": 0.16900359846134755, "grad_norm": 1050.804931640625, "learning_rate": 9.87149315712965e-06, "loss": 160.4528, "step": 20430 }, { "epoch": 0.1690863217107168, "grad_norm": 1345.5584716796875, "learning_rate": 9.871175263704116e-06, "loss": 119.833, "step": 20440 }, { "epoch": 0.16916904496008603, "grad_norm": 1076.057861328125, "learning_rate": 9.870856982701522e-06, "loss": 123.713, "step": 20450 }, { "epoch": 0.16925176820945526, "grad_norm": 1091.8779296875, "learning_rate": 9.870538314147194e-06, "loss": 157.7124, "step": 20460 }, { "epoch": 0.1693344914588245, "grad_norm": 1105.4384765625, "learning_rate": 9.870219258066485e-06, "loss": 165.7277, "step": 20470 }, { "epoch": 0.16941721470819374, "grad_norm": 1101.884033203125, "learning_rate": 9.86989981448478e-06, "loss": 93.6696, "step": 20480 }, { "epoch": 0.16949993795756296, "grad_norm": 1253.4547119140625, "learning_rate": 9.869579983427497e-06, "loss": 156.5219, "step": 20490 }, { "epoch": 0.16958266120693222, "grad_norm": 571.4030151367188, "learning_rate": 9.869259764920081e-06, "loss": 108.441, "step": 20500 }, { "epoch": 0.16966538445630144, "grad_norm": 1125.4053955078125, "learning_rate": 9.868939158988016e-06, "loss": 111.0379, "step": 20510 }, { "epoch": 0.16974810770567067, "grad_norm": 1643.4638671875, "learning_rate": 9.868618165656805e-06, "loss": 174.5399, "step": 20520 }, { "epoch": 0.16983083095503992, "grad_norm": 1219.7093505859375, "learning_rate": 9.868296784951992e-06, "loss": 143.0416, "step": 20530 }, { "epoch": 0.16991355420440915, "grad_norm": 716.170654296875, "learning_rate": 9.867975016899145e-06, "loss": 129.3315, "step": 20540 }, { "epoch": 0.16999627745377838, "grad_norm": 722.2046508789062, "learning_rate": 9.867652861523866e-06, "loss": 110.3743, "step": 20550 }, { "epoch": 0.17007900070314763, "grad_norm": 8400.3681640625, "learning_rate": 9.86733031885179e-06, "loss": 293.1446, "step": 20560 }, { "epoch": 0.17016172395251686, "grad_norm": 1231.3631591796875, "learning_rate": 9.867007388908579e-06, "loss": 158.7573, "step": 20570 }, { "epoch": 0.17024444720188608, "grad_norm": 813.4060668945312, "learning_rate": 9.866684071719926e-06, "loss": 114.0276, "step": 20580 }, { "epoch": 0.17032717045125534, "grad_norm": 691.1693115234375, "learning_rate": 9.866360367311557e-06, "loss": 124.2069, "step": 20590 }, { "epoch": 0.17040989370062457, "grad_norm": 1228.2000732421875, "learning_rate": 9.866036275709226e-06, "loss": 113.3982, "step": 20600 }, { "epoch": 0.1704926169499938, "grad_norm": 1183.33935546875, "learning_rate": 9.86571179693872e-06, "loss": 124.6813, "step": 20610 }, { "epoch": 0.17057534019936302, "grad_norm": 1132.4244384765625, "learning_rate": 9.865386931025858e-06, "loss": 109.3566, "step": 20620 }, { "epoch": 0.17065806344873227, "grad_norm": 536.39453125, "learning_rate": 9.865061677996487e-06, "loss": 94.1924, "step": 20630 }, { "epoch": 0.1707407866981015, "grad_norm": 1061.803955078125, "learning_rate": 9.864736037876487e-06, "loss": 96.9849, "step": 20640 }, { "epoch": 0.17082350994747073, "grad_norm": 1086.843505859375, "learning_rate": 9.864410010691766e-06, "loss": 158.8272, "step": 20650 }, { "epoch": 0.17090623319683998, "grad_norm": 1095.378662109375, "learning_rate": 9.864083596468263e-06, "loss": 165.8036, "step": 20660 }, { "epoch": 0.1709889564462092, "grad_norm": 773.133544921875, "learning_rate": 9.863756795231953e-06, "loss": 107.4877, "step": 20670 }, { "epoch": 0.17107167969557843, "grad_norm": 922.110107421875, "learning_rate": 9.863429607008837e-06, "loss": 152.2869, "step": 20680 }, { "epoch": 0.1711544029449477, "grad_norm": 828.44677734375, "learning_rate": 9.863102031824946e-06, "loss": 103.4225, "step": 20690 }, { "epoch": 0.1712371261943169, "grad_norm": 1600.447021484375, "learning_rate": 9.862774069706346e-06, "loss": 116.9802, "step": 20700 }, { "epoch": 0.17131984944368614, "grad_norm": 1819.4007568359375, "learning_rate": 9.86244572067913e-06, "loss": 193.74, "step": 20710 }, { "epoch": 0.1714025726930554, "grad_norm": 1109.385009765625, "learning_rate": 9.862116984769424e-06, "loss": 155.2172, "step": 20720 }, { "epoch": 0.17148529594242462, "grad_norm": 1088.8150634765625, "learning_rate": 9.861787862003384e-06, "loss": 102.3083, "step": 20730 }, { "epoch": 0.17156801919179385, "grad_norm": 863.4269409179688, "learning_rate": 9.861458352407196e-06, "loss": 163.3116, "step": 20740 }, { "epoch": 0.1716507424411631, "grad_norm": 985.338623046875, "learning_rate": 9.861128456007076e-06, "loss": 128.2076, "step": 20750 }, { "epoch": 0.17173346569053233, "grad_norm": 2616.19189453125, "learning_rate": 9.860798172829277e-06, "loss": 128.8503, "step": 20760 }, { "epoch": 0.17181618893990155, "grad_norm": 821.7667846679688, "learning_rate": 9.860467502900076e-06, "loss": 139.1303, "step": 20770 }, { "epoch": 0.1718989121892708, "grad_norm": 1093.295654296875, "learning_rate": 9.860136446245779e-06, "loss": 158.2073, "step": 20780 }, { "epoch": 0.17198163543864003, "grad_norm": 2161.525390625, "learning_rate": 9.859805002892733e-06, "loss": 159.4854, "step": 20790 }, { "epoch": 0.17206435868800926, "grad_norm": 936.6251831054688, "learning_rate": 9.859473172867304e-06, "loss": 150.7438, "step": 20800 }, { "epoch": 0.1721470819373785, "grad_norm": 735.8521118164062, "learning_rate": 9.859140956195898e-06, "loss": 166.0925, "step": 20810 }, { "epoch": 0.17222980518674774, "grad_norm": 1382.367431640625, "learning_rate": 9.858808352904946e-06, "loss": 163.85, "step": 20820 }, { "epoch": 0.17231252843611697, "grad_norm": 1131.4239501953125, "learning_rate": 9.858475363020913e-06, "loss": 138.0052, "step": 20830 }, { "epoch": 0.1723952516854862, "grad_norm": 1062.67431640625, "learning_rate": 9.858141986570294e-06, "loss": 127.1865, "step": 20840 }, { "epoch": 0.17247797493485545, "grad_norm": 790.3716430664062, "learning_rate": 9.85780822357961e-06, "loss": 99.19, "step": 20850 }, { "epoch": 0.17256069818422468, "grad_norm": 1042.801025390625, "learning_rate": 9.857474074075422e-06, "loss": 152.4688, "step": 20860 }, { "epoch": 0.1726434214335939, "grad_norm": 872.380126953125, "learning_rate": 9.857139538084313e-06, "loss": 180.6786, "step": 20870 }, { "epoch": 0.17272614468296316, "grad_norm": 873.7167358398438, "learning_rate": 9.856804615632904e-06, "loss": 153.8926, "step": 20880 }, { "epoch": 0.17280886793233238, "grad_norm": 1261.3304443359375, "learning_rate": 9.85646930674784e-06, "loss": 138.4761, "step": 20890 }, { "epoch": 0.1728915911817016, "grad_norm": 1224.0684814453125, "learning_rate": 9.856133611455802e-06, "loss": 116.0446, "step": 20900 }, { "epoch": 0.17297431443107086, "grad_norm": 1034.66552734375, "learning_rate": 9.855797529783499e-06, "loss": 143.5475, "step": 20910 }, { "epoch": 0.1730570376804401, "grad_norm": 1428.3978271484375, "learning_rate": 9.855461061757673e-06, "loss": 202.7229, "step": 20920 }, { "epoch": 0.17313976092980932, "grad_norm": 1692.571533203125, "learning_rate": 9.855124207405093e-06, "loss": 129.3957, "step": 20930 }, { "epoch": 0.17322248417917857, "grad_norm": 901.4539184570312, "learning_rate": 9.854786966752561e-06, "loss": 114.9998, "step": 20940 }, { "epoch": 0.1733052074285478, "grad_norm": 1734.4339599609375, "learning_rate": 9.854449339826912e-06, "loss": 126.6563, "step": 20950 }, { "epoch": 0.17338793067791702, "grad_norm": 936.6329956054688, "learning_rate": 9.854111326655006e-06, "loss": 148.8187, "step": 20960 }, { "epoch": 0.17347065392728625, "grad_norm": 792.8075561523438, "learning_rate": 9.85377292726374e-06, "loss": 140.2986, "step": 20970 }, { "epoch": 0.1735533771766555, "grad_norm": 852.9043579101562, "learning_rate": 9.85343414168004e-06, "loss": 142.9736, "step": 20980 }, { "epoch": 0.17363610042602473, "grad_norm": 615.2127685546875, "learning_rate": 9.853094969930857e-06, "loss": 131.7546, "step": 20990 }, { "epoch": 0.17371882367539396, "grad_norm": 1290.294921875, "learning_rate": 9.85275541204318e-06, "loss": 157.3864, "step": 21000 }, { "epoch": 0.1738015469247632, "grad_norm": 428.7129821777344, "learning_rate": 9.852415468044027e-06, "loss": 117.3043, "step": 21010 }, { "epoch": 0.17388427017413244, "grad_norm": 1278.5567626953125, "learning_rate": 9.852075137960446e-06, "loss": 136.686, "step": 21020 }, { "epoch": 0.17396699342350166, "grad_norm": 1619.6778564453125, "learning_rate": 9.851734421819511e-06, "loss": 136.3727, "step": 21030 }, { "epoch": 0.17404971667287092, "grad_norm": 1088.0845947265625, "learning_rate": 9.851393319648338e-06, "loss": 114.8988, "step": 21040 }, { "epoch": 0.17413243992224015, "grad_norm": 738.9354858398438, "learning_rate": 9.851051831474062e-06, "loss": 138.9047, "step": 21050 }, { "epoch": 0.17421516317160937, "grad_norm": 926.2405395507812, "learning_rate": 9.850709957323855e-06, "loss": 127.3761, "step": 21060 }, { "epoch": 0.17429788642097863, "grad_norm": 911.6777954101562, "learning_rate": 9.85036769722492e-06, "loss": 163.0244, "step": 21070 }, { "epoch": 0.17438060967034785, "grad_norm": 868.7709350585938, "learning_rate": 9.850025051204484e-06, "loss": 139.7337, "step": 21080 }, { "epoch": 0.17446333291971708, "grad_norm": 810.2794189453125, "learning_rate": 9.849682019289816e-06, "loss": 129.7191, "step": 21090 }, { "epoch": 0.17454605616908633, "grad_norm": 862.4880981445312, "learning_rate": 9.849338601508204e-06, "loss": 110.1159, "step": 21100 }, { "epoch": 0.17462877941845556, "grad_norm": 1751.4161376953125, "learning_rate": 9.848994797886978e-06, "loss": 162.2478, "step": 21110 }, { "epoch": 0.1747115026678248, "grad_norm": 961.6451416015625, "learning_rate": 9.84865060845349e-06, "loss": 151.0574, "step": 21120 }, { "epoch": 0.17479422591719404, "grad_norm": 984.0369873046875, "learning_rate": 9.848306033235123e-06, "loss": 114.3529, "step": 21130 }, { "epoch": 0.17487694916656327, "grad_norm": 1510.6654052734375, "learning_rate": 9.847961072259298e-06, "loss": 142.4745, "step": 21140 }, { "epoch": 0.1749596724159325, "grad_norm": 578.0482177734375, "learning_rate": 9.847615725553457e-06, "loss": 149.1546, "step": 21150 }, { "epoch": 0.17504239566530172, "grad_norm": 2596.45654296875, "learning_rate": 9.847269993145082e-06, "loss": 140.9354, "step": 21160 }, { "epoch": 0.17512511891467097, "grad_norm": 1056.8643798828125, "learning_rate": 9.84692387506168e-06, "loss": 138.6333, "step": 21170 }, { "epoch": 0.1752078421640402, "grad_norm": 543.871826171875, "learning_rate": 9.846577371330788e-06, "loss": 100.0174, "step": 21180 }, { "epoch": 0.17529056541340943, "grad_norm": 505.1315002441406, "learning_rate": 9.846230481979978e-06, "loss": 126.8892, "step": 21190 }, { "epoch": 0.17537328866277868, "grad_norm": 1152.1531982421875, "learning_rate": 9.84588320703685e-06, "loss": 137.2241, "step": 21200 }, { "epoch": 0.1754560119121479, "grad_norm": 2514.837646484375, "learning_rate": 9.845535546529036e-06, "loss": 150.1807, "step": 21210 }, { "epoch": 0.17553873516151713, "grad_norm": 1177.4676513671875, "learning_rate": 9.845187500484194e-06, "loss": 136.4538, "step": 21220 }, { "epoch": 0.1756214584108864, "grad_norm": 1041.1258544921875, "learning_rate": 9.844839068930021e-06, "loss": 131.5591, "step": 21230 }, { "epoch": 0.17570418166025562, "grad_norm": 907.1217651367188, "learning_rate": 9.844490251894237e-06, "loss": 129.923, "step": 21240 }, { "epoch": 0.17578690490962484, "grad_norm": 917.8026123046875, "learning_rate": 9.844141049404598e-06, "loss": 112.846, "step": 21250 }, { "epoch": 0.1758696281589941, "grad_norm": 1224.1368408203125, "learning_rate": 9.843791461488887e-06, "loss": 142.4482, "step": 21260 }, { "epoch": 0.17595235140836332, "grad_norm": 1229.176513671875, "learning_rate": 9.843441488174918e-06, "loss": 103.1861, "step": 21270 }, { "epoch": 0.17603507465773255, "grad_norm": 850.4046630859375, "learning_rate": 9.843091129490539e-06, "loss": 127.6695, "step": 21280 }, { "epoch": 0.1761177979071018, "grad_norm": 842.6237182617188, "learning_rate": 9.842740385463628e-06, "loss": 150.564, "step": 21290 }, { "epoch": 0.17620052115647103, "grad_norm": 1639.1712646484375, "learning_rate": 9.842389256122086e-06, "loss": 154.4747, "step": 21300 }, { "epoch": 0.17628324440584026, "grad_norm": 1027.3468017578125, "learning_rate": 9.842037741493856e-06, "loss": 138.2844, "step": 21310 }, { "epoch": 0.1763659676552095, "grad_norm": 1297.7607421875, "learning_rate": 9.841685841606905e-06, "loss": 171.9979, "step": 21320 }, { "epoch": 0.17644869090457874, "grad_norm": 650.8685302734375, "learning_rate": 9.841333556489232e-06, "loss": 138.841, "step": 21330 }, { "epoch": 0.17653141415394796, "grad_norm": 1357.2320556640625, "learning_rate": 9.840980886168866e-06, "loss": 151.9759, "step": 21340 }, { "epoch": 0.1766141374033172, "grad_norm": 938.8858032226562, "learning_rate": 9.840627830673867e-06, "loss": 141.0563, "step": 21350 }, { "epoch": 0.17669686065268644, "grad_norm": 1711.847412109375, "learning_rate": 9.84027439003233e-06, "loss": 172.4341, "step": 21360 }, { "epoch": 0.17677958390205567, "grad_norm": 854.195068359375, "learning_rate": 9.839920564272372e-06, "loss": 137.6896, "step": 21370 }, { "epoch": 0.1768623071514249, "grad_norm": 1087.6898193359375, "learning_rate": 9.839566353422148e-06, "loss": 117.6248, "step": 21380 }, { "epoch": 0.17694503040079415, "grad_norm": 2788.3251953125, "learning_rate": 9.839211757509838e-06, "loss": 144.8487, "step": 21390 }, { "epoch": 0.17702775365016338, "grad_norm": 1420.71875, "learning_rate": 9.83885677656366e-06, "loss": 122.336, "step": 21400 }, { "epoch": 0.1771104768995326, "grad_norm": 761.9530029296875, "learning_rate": 9.838501410611852e-06, "loss": 132.9662, "step": 21410 }, { "epoch": 0.17719320014890186, "grad_norm": 2090.32666015625, "learning_rate": 9.838145659682695e-06, "loss": 183.7629, "step": 21420 }, { "epoch": 0.17727592339827108, "grad_norm": 3058.696044921875, "learning_rate": 9.837789523804491e-06, "loss": 142.1503, "step": 21430 }, { "epoch": 0.1773586466476403, "grad_norm": 1059.2083740234375, "learning_rate": 9.837433003005578e-06, "loss": 137.1923, "step": 21440 }, { "epoch": 0.17744136989700957, "grad_norm": 1151.251708984375, "learning_rate": 9.83707609731432e-06, "loss": 89.3938, "step": 21450 }, { "epoch": 0.1775240931463788, "grad_norm": 749.4866333007812, "learning_rate": 9.836718806759119e-06, "loss": 145.9112, "step": 21460 }, { "epoch": 0.17760681639574802, "grad_norm": 1220.1741943359375, "learning_rate": 9.836361131368398e-06, "loss": 179.5797, "step": 21470 }, { "epoch": 0.17768953964511727, "grad_norm": 446.74859619140625, "learning_rate": 9.836003071170617e-06, "loss": 156.5806, "step": 21480 }, { "epoch": 0.1777722628944865, "grad_norm": 872.1563720703125, "learning_rate": 9.835644626194268e-06, "loss": 141.7276, "step": 21490 }, { "epoch": 0.17785498614385573, "grad_norm": 1689.736328125, "learning_rate": 9.835285796467867e-06, "loss": 133.7971, "step": 21500 }, { "epoch": 0.17793770939322495, "grad_norm": 1551.1575927734375, "learning_rate": 9.834926582019968e-06, "loss": 156.3386, "step": 21510 }, { "epoch": 0.1780204326425942, "grad_norm": 1045.95849609375, "learning_rate": 9.834566982879149e-06, "loss": 118.8335, "step": 21520 }, { "epoch": 0.17810315589196343, "grad_norm": 1034.3304443359375, "learning_rate": 9.83420699907402e-06, "loss": 136.8709, "step": 21530 }, { "epoch": 0.17818587914133266, "grad_norm": 870.5877075195312, "learning_rate": 9.83384663063323e-06, "loss": 125.3036, "step": 21540 }, { "epoch": 0.1782686023907019, "grad_norm": 3151.6611328125, "learning_rate": 9.833485877585447e-06, "loss": 230.3187, "step": 21550 }, { "epoch": 0.17835132564007114, "grad_norm": 1180.687255859375, "learning_rate": 9.833124739959375e-06, "loss": 128.1897, "step": 21560 }, { "epoch": 0.17843404888944037, "grad_norm": 1331.4376220703125, "learning_rate": 9.83276321778375e-06, "loss": 161.2605, "step": 21570 }, { "epoch": 0.17851677213880962, "grad_norm": 347.8481750488281, "learning_rate": 9.832401311087334e-06, "loss": 137.9425, "step": 21580 }, { "epoch": 0.17859949538817885, "grad_norm": 1399.0616455078125, "learning_rate": 9.832039019898922e-06, "loss": 141.6175, "step": 21590 }, { "epoch": 0.17868221863754807, "grad_norm": 1321.5462646484375, "learning_rate": 9.831676344247343e-06, "loss": 145.0367, "step": 21600 }, { "epoch": 0.17876494188691733, "grad_norm": 2181.245361328125, "learning_rate": 9.831313284161452e-06, "loss": 127.1829, "step": 21610 }, { "epoch": 0.17884766513628655, "grad_norm": 980.81201171875, "learning_rate": 9.830949839670134e-06, "loss": 160.9928, "step": 21620 }, { "epoch": 0.17893038838565578, "grad_norm": 1042.4224853515625, "learning_rate": 9.83058601080231e-06, "loss": 143.5391, "step": 21630 }, { "epoch": 0.17901311163502504, "grad_norm": 1248.8353271484375, "learning_rate": 9.830221797586925e-06, "loss": 125.7723, "step": 21640 }, { "epoch": 0.17909583488439426, "grad_norm": 765.4020385742188, "learning_rate": 9.829857200052961e-06, "loss": 145.4247, "step": 21650 }, { "epoch": 0.1791785581337635, "grad_norm": 1584.0721435546875, "learning_rate": 9.829492218229426e-06, "loss": 129.5774, "step": 21660 }, { "epoch": 0.17926128138313274, "grad_norm": 640.2371215820312, "learning_rate": 9.829126852145357e-06, "loss": 175.8682, "step": 21670 }, { "epoch": 0.17934400463250197, "grad_norm": 879.1619262695312, "learning_rate": 9.82876110182983e-06, "loss": 148.2943, "step": 21680 }, { "epoch": 0.1794267278818712, "grad_norm": 1808.1944580078125, "learning_rate": 9.82839496731194e-06, "loss": 161.7926, "step": 21690 }, { "epoch": 0.17950945113124042, "grad_norm": 1331.4281005859375, "learning_rate": 9.828028448620824e-06, "loss": 144.7912, "step": 21700 }, { "epoch": 0.17959217438060968, "grad_norm": 916.88134765625, "learning_rate": 9.827661545785641e-06, "loss": 111.8015, "step": 21710 }, { "epoch": 0.1796748976299789, "grad_norm": 945.744140625, "learning_rate": 9.827294258835584e-06, "loss": 135.7884, "step": 21720 }, { "epoch": 0.17975762087934813, "grad_norm": 843.4868774414062, "learning_rate": 9.82692658779988e-06, "loss": 166.0405, "step": 21730 }, { "epoch": 0.17984034412871738, "grad_norm": 866.8135375976562, "learning_rate": 9.826558532707777e-06, "loss": 131.8315, "step": 21740 }, { "epoch": 0.1799230673780866, "grad_norm": 1258.517822265625, "learning_rate": 9.826190093588564e-06, "loss": 192.2693, "step": 21750 }, { "epoch": 0.18000579062745584, "grad_norm": 755.1361694335938, "learning_rate": 9.825821270471555e-06, "loss": 110.0884, "step": 21760 }, { "epoch": 0.1800885138768251, "grad_norm": 505.46856689453125, "learning_rate": 9.825452063386094e-06, "loss": 117.8567, "step": 21770 }, { "epoch": 0.18017123712619432, "grad_norm": 1369.44287109375, "learning_rate": 9.825082472361558e-06, "loss": 135.8278, "step": 21780 }, { "epoch": 0.18025396037556354, "grad_norm": 917.852783203125, "learning_rate": 9.824712497427354e-06, "loss": 101.3896, "step": 21790 }, { "epoch": 0.1803366836249328, "grad_norm": 987.9921264648438, "learning_rate": 9.824342138612918e-06, "loss": 140.2833, "step": 21800 }, { "epoch": 0.18041940687430202, "grad_norm": 902.6442260742188, "learning_rate": 9.823971395947723e-06, "loss": 147.3234, "step": 21810 }, { "epoch": 0.18050213012367125, "grad_norm": 767.7576293945312, "learning_rate": 9.823600269461259e-06, "loss": 148.7868, "step": 21820 }, { "epoch": 0.1805848533730405, "grad_norm": 766.7611694335938, "learning_rate": 9.823228759183058e-06, "loss": 124.8973, "step": 21830 }, { "epoch": 0.18066757662240973, "grad_norm": 836.1171264648438, "learning_rate": 9.822856865142683e-06, "loss": 117.0898, "step": 21840 }, { "epoch": 0.18075029987177896, "grad_norm": 1208.75146484375, "learning_rate": 9.822484587369721e-06, "loss": 125.5648, "step": 21850 }, { "epoch": 0.1808330231211482, "grad_norm": 1158.348388671875, "learning_rate": 9.822111925893792e-06, "loss": 139.8755, "step": 21860 }, { "epoch": 0.18091574637051744, "grad_norm": 1267.1917724609375, "learning_rate": 9.821738880744549e-06, "loss": 135.5524, "step": 21870 }, { "epoch": 0.18099846961988666, "grad_norm": 2137.44580078125, "learning_rate": 9.82136545195167e-06, "loss": 159.7926, "step": 21880 }, { "epoch": 0.1810811928692559, "grad_norm": 955.8012084960938, "learning_rate": 9.82099163954487e-06, "loss": 121.1186, "step": 21890 }, { "epoch": 0.18116391611862515, "grad_norm": 1042.67724609375, "learning_rate": 9.820617443553889e-06, "loss": 132.7085, "step": 21900 }, { "epoch": 0.18124663936799437, "grad_norm": 988.7931518554688, "learning_rate": 9.820242864008503e-06, "loss": 141.3183, "step": 21910 }, { "epoch": 0.1813293626173636, "grad_norm": 1212.5155029296875, "learning_rate": 9.819867900938514e-06, "loss": 139.713, "step": 21920 }, { "epoch": 0.18141208586673285, "grad_norm": 714.6464233398438, "learning_rate": 9.819492554373758e-06, "loss": 122.3466, "step": 21930 }, { "epoch": 0.18149480911610208, "grad_norm": 1552.1221923828125, "learning_rate": 9.819116824344095e-06, "loss": 127.3137, "step": 21940 }, { "epoch": 0.1815775323654713, "grad_norm": 577.6259155273438, "learning_rate": 9.818740710879424e-06, "loss": 87.6241, "step": 21950 }, { "epoch": 0.18166025561484056, "grad_norm": 947.9591064453125, "learning_rate": 9.81836421400967e-06, "loss": 109.6651, "step": 21960 }, { "epoch": 0.1817429788642098, "grad_norm": 915.073486328125, "learning_rate": 9.81798733376479e-06, "loss": 112.4667, "step": 21970 }, { "epoch": 0.181825702113579, "grad_norm": 971.461181640625, "learning_rate": 9.817610070174768e-06, "loss": 159.3875, "step": 21980 }, { "epoch": 0.18190842536294827, "grad_norm": 839.3546142578125, "learning_rate": 9.817232423269622e-06, "loss": 141.2685, "step": 21990 }, { "epoch": 0.1819911486123175, "grad_norm": 901.919189453125, "learning_rate": 9.816854393079402e-06, "loss": 150.3302, "step": 22000 }, { "epoch": 0.18207387186168672, "grad_norm": 1099.916259765625, "learning_rate": 9.816475979634183e-06, "loss": 105.6784, "step": 22010 }, { "epoch": 0.18215659511105597, "grad_norm": 1539.3607177734375, "learning_rate": 9.816097182964076e-06, "loss": 150.3366, "step": 22020 }, { "epoch": 0.1822393183604252, "grad_norm": 1262.76513671875, "learning_rate": 9.81571800309922e-06, "loss": 205.2206, "step": 22030 }, { "epoch": 0.18232204160979443, "grad_norm": 1119.6072998046875, "learning_rate": 9.815338440069782e-06, "loss": 97.6272, "step": 22040 }, { "epoch": 0.18240476485916368, "grad_norm": 977.2727661132812, "learning_rate": 9.814958493905962e-06, "loss": 154.7452, "step": 22050 }, { "epoch": 0.1824874881085329, "grad_norm": 1467.2486572265625, "learning_rate": 9.814578164637996e-06, "loss": 116.3554, "step": 22060 }, { "epoch": 0.18257021135790213, "grad_norm": 1132.6796875, "learning_rate": 9.81419745229614e-06, "loss": 146.8583, "step": 22070 }, { "epoch": 0.18265293460727136, "grad_norm": 1301.7706298828125, "learning_rate": 9.813816356910685e-06, "loss": 150.6081, "step": 22080 }, { "epoch": 0.18273565785664062, "grad_norm": 786.4771728515625, "learning_rate": 9.813434878511956e-06, "loss": 98.5915, "step": 22090 }, { "epoch": 0.18281838110600984, "grad_norm": 1082.4107666015625, "learning_rate": 9.813053017130305e-06, "loss": 106.2249, "step": 22100 }, { "epoch": 0.18290110435537907, "grad_norm": 844.4359130859375, "learning_rate": 9.812670772796113e-06, "loss": 108.4317, "step": 22110 }, { "epoch": 0.18298382760474832, "grad_norm": 1356.7515869140625, "learning_rate": 9.812288145539796e-06, "loss": 156.1458, "step": 22120 }, { "epoch": 0.18306655085411755, "grad_norm": 571.9811401367188, "learning_rate": 9.811905135391796e-06, "loss": 128.8242, "step": 22130 }, { "epoch": 0.18314927410348678, "grad_norm": 996.9983520507812, "learning_rate": 9.81152174238259e-06, "loss": 121.8907, "step": 22140 }, { "epoch": 0.18323199735285603, "grad_norm": 1005.370361328125, "learning_rate": 9.81113796654268e-06, "loss": 130.7526, "step": 22150 }, { "epoch": 0.18331472060222526, "grad_norm": 522.5587158203125, "learning_rate": 9.810753807902603e-06, "loss": 167.8494, "step": 22160 }, { "epoch": 0.18339744385159448, "grad_norm": 1476.099853515625, "learning_rate": 9.81036926649292e-06, "loss": 125.2975, "step": 22170 }, { "epoch": 0.18348016710096374, "grad_norm": 864.9098510742188, "learning_rate": 9.809984342344234e-06, "loss": 114.3755, "step": 22180 }, { "epoch": 0.18356289035033296, "grad_norm": 1131.15966796875, "learning_rate": 9.80959903548717e-06, "loss": 130.3137, "step": 22190 }, { "epoch": 0.1836456135997022, "grad_norm": 2763.490234375, "learning_rate": 9.80921334595238e-06, "loss": 159.7871, "step": 22200 }, { "epoch": 0.18372833684907144, "grad_norm": 815.5933227539062, "learning_rate": 9.808827273770558e-06, "loss": 103.766, "step": 22210 }, { "epoch": 0.18381106009844067, "grad_norm": 1680.257568359375, "learning_rate": 9.80844081897242e-06, "loss": 162.1204, "step": 22220 }, { "epoch": 0.1838937833478099, "grad_norm": 934.7627563476562, "learning_rate": 9.808053981588712e-06, "loss": 144.453, "step": 22230 }, { "epoch": 0.18397650659717912, "grad_norm": 1144.36767578125, "learning_rate": 9.807666761650215e-06, "loss": 121.5437, "step": 22240 }, { "epoch": 0.18405922984654838, "grad_norm": 2383.975341796875, "learning_rate": 9.80727915918774e-06, "loss": 178.4958, "step": 22250 }, { "epoch": 0.1841419530959176, "grad_norm": 1073.094482421875, "learning_rate": 9.806891174232122e-06, "loss": 149.8745, "step": 22260 }, { "epoch": 0.18422467634528683, "grad_norm": 1233.2926025390625, "learning_rate": 9.806502806814236e-06, "loss": 128.9702, "step": 22270 }, { "epoch": 0.18430739959465609, "grad_norm": 837.9373168945312, "learning_rate": 9.806114056964977e-06, "loss": 139.9306, "step": 22280 }, { "epoch": 0.1843901228440253, "grad_norm": 628.1285400390625, "learning_rate": 9.805724924715283e-06, "loss": 123.0449, "step": 22290 }, { "epoch": 0.18447284609339454, "grad_norm": 561.8240356445312, "learning_rate": 9.80533541009611e-06, "loss": 105.3535, "step": 22300 }, { "epoch": 0.1845555693427638, "grad_norm": 1421.851806640625, "learning_rate": 9.804945513138454e-06, "loss": 219.4902, "step": 22310 }, { "epoch": 0.18463829259213302, "grad_norm": 825.07080078125, "learning_rate": 9.804555233873335e-06, "loss": 135.7106, "step": 22320 }, { "epoch": 0.18472101584150225, "grad_norm": 2163.25439453125, "learning_rate": 9.804164572331804e-06, "loss": 173.7582, "step": 22330 }, { "epoch": 0.1848037390908715, "grad_norm": 1122.6065673828125, "learning_rate": 9.80377352854495e-06, "loss": 91.7703, "step": 22340 }, { "epoch": 0.18488646234024073, "grad_norm": 909.8526000976562, "learning_rate": 9.80338210254388e-06, "loss": 124.849, "step": 22350 }, { "epoch": 0.18496918558960995, "grad_norm": 784.2378540039062, "learning_rate": 9.80299029435974e-06, "loss": 148.4874, "step": 22360 }, { "epoch": 0.1850519088389792, "grad_norm": 999.297119140625, "learning_rate": 9.802598104023706e-06, "loss": 159.1145, "step": 22370 }, { "epoch": 0.18513463208834843, "grad_norm": 753.5262451171875, "learning_rate": 9.80220553156698e-06, "loss": 113.2028, "step": 22380 }, { "epoch": 0.18521735533771766, "grad_norm": 1254.95947265625, "learning_rate": 9.801812577020802e-06, "loss": 139.2906, "step": 22390 }, { "epoch": 0.1853000785870869, "grad_norm": 1023.6962890625, "learning_rate": 9.801419240416432e-06, "loss": 144.157, "step": 22400 }, { "epoch": 0.18538280183645614, "grad_norm": 937.4168701171875, "learning_rate": 9.80102552178517e-06, "loss": 124.0214, "step": 22410 }, { "epoch": 0.18546552508582537, "grad_norm": 1038.947265625, "learning_rate": 9.800631421158341e-06, "loss": 134.8847, "step": 22420 }, { "epoch": 0.1855482483351946, "grad_norm": 839.5892333984375, "learning_rate": 9.800236938567302e-06, "loss": 114.9243, "step": 22430 }, { "epoch": 0.18563097158456385, "grad_norm": 1064.1595458984375, "learning_rate": 9.799842074043438e-06, "loss": 132.5262, "step": 22440 }, { "epoch": 0.18571369483393307, "grad_norm": 868.5515747070312, "learning_rate": 9.799446827618172e-06, "loss": 123.4177, "step": 22450 }, { "epoch": 0.1857964180833023, "grad_norm": 1418.9991455078125, "learning_rate": 9.799051199322944e-06, "loss": 134.293, "step": 22460 }, { "epoch": 0.18587914133267155, "grad_norm": 1264.387939453125, "learning_rate": 9.798655189189239e-06, "loss": 155.6345, "step": 22470 }, { "epoch": 0.18596186458204078, "grad_norm": 1341.15185546875, "learning_rate": 9.798258797248563e-06, "loss": 151.904, "step": 22480 }, { "epoch": 0.18604458783141, "grad_norm": 1191.142578125, "learning_rate": 9.797862023532457e-06, "loss": 143.8828, "step": 22490 }, { "epoch": 0.18612731108077926, "grad_norm": 1029.2672119140625, "learning_rate": 9.797464868072489e-06, "loss": 111.6996, "step": 22500 }, { "epoch": 0.1862100343301485, "grad_norm": 898.6533203125, "learning_rate": 9.797067330900256e-06, "loss": 123.9605, "step": 22510 }, { "epoch": 0.18629275757951771, "grad_norm": 1148.5517578125, "learning_rate": 9.796669412047392e-06, "loss": 107.9766, "step": 22520 }, { "epoch": 0.18637548082888697, "grad_norm": 1347.3740234375, "learning_rate": 9.796271111545559e-06, "loss": 132.5673, "step": 22530 }, { "epoch": 0.1864582040782562, "grad_norm": 732.031982421875, "learning_rate": 9.795872429426443e-06, "loss": 113.1994, "step": 22540 }, { "epoch": 0.18654092732762542, "grad_norm": 847.0791625976562, "learning_rate": 9.79547336572177e-06, "loss": 115.746, "step": 22550 }, { "epoch": 0.18662365057699468, "grad_norm": 897.2965698242188, "learning_rate": 9.795073920463289e-06, "loss": 123.8876, "step": 22560 }, { "epoch": 0.1867063738263639, "grad_norm": 1148.21875, "learning_rate": 9.794674093682781e-06, "loss": 133.7378, "step": 22570 }, { "epoch": 0.18678909707573313, "grad_norm": 1180.834716796875, "learning_rate": 9.79427388541206e-06, "loss": 100.9495, "step": 22580 }, { "epoch": 0.18687182032510238, "grad_norm": 681.8207397460938, "learning_rate": 9.79387329568297e-06, "loss": 108.7203, "step": 22590 }, { "epoch": 0.1869545435744716, "grad_norm": 859.6419067382812, "learning_rate": 9.793472324527383e-06, "loss": 179.532, "step": 22600 }, { "epoch": 0.18703726682384084, "grad_norm": 615.40380859375, "learning_rate": 9.793070971977203e-06, "loss": 103.2244, "step": 22610 }, { "epoch": 0.18711999007321006, "grad_norm": 896.5668334960938, "learning_rate": 9.79266923806436e-06, "loss": 135.8797, "step": 22620 }, { "epoch": 0.18720271332257932, "grad_norm": 685.52685546875, "learning_rate": 9.792267122820823e-06, "loss": 102.7591, "step": 22630 }, { "epoch": 0.18728543657194854, "grad_norm": 656.8187255859375, "learning_rate": 9.791864626278584e-06, "loss": 134.8737, "step": 22640 }, { "epoch": 0.18736815982131777, "grad_norm": 406.96820068359375, "learning_rate": 9.791461748469669e-06, "loss": 129.8552, "step": 22650 }, { "epoch": 0.18745088307068702, "grad_norm": 1129.7803955078125, "learning_rate": 9.791058489426134e-06, "loss": 133.4618, "step": 22660 }, { "epoch": 0.18753360632005625, "grad_norm": 1034.3385009765625, "learning_rate": 9.790654849180059e-06, "loss": 107.0448, "step": 22670 }, { "epoch": 0.18761632956942548, "grad_norm": 1405.0714111328125, "learning_rate": 9.790250827763566e-06, "loss": 121.3317, "step": 22680 }, { "epoch": 0.18769905281879473, "grad_norm": 1143.3116455078125, "learning_rate": 9.7898464252088e-06, "loss": 93.2796, "step": 22690 }, { "epoch": 0.18778177606816396, "grad_norm": 1161.796630859375, "learning_rate": 9.789441641547935e-06, "loss": 112.5664, "step": 22700 }, { "epoch": 0.18786449931753318, "grad_norm": 1665.2811279296875, "learning_rate": 9.789036476813178e-06, "loss": 162.8835, "step": 22710 }, { "epoch": 0.18794722256690244, "grad_norm": 885.5316772460938, "learning_rate": 9.788630931036769e-06, "loss": 127.6729, "step": 22720 }, { "epoch": 0.18802994581627167, "grad_norm": 1863.15673828125, "learning_rate": 9.788225004250974e-06, "loss": 136.1164, "step": 22730 }, { "epoch": 0.1881126690656409, "grad_norm": 770.5360717773438, "learning_rate": 9.78781869648809e-06, "loss": 145.4966, "step": 22740 }, { "epoch": 0.18819539231501015, "grad_norm": 1846.6016845703125, "learning_rate": 9.787412007780445e-06, "loss": 119.8534, "step": 22750 }, { "epoch": 0.18827811556437937, "grad_norm": 930.654052734375, "learning_rate": 9.787004938160398e-06, "loss": 112.2845, "step": 22760 }, { "epoch": 0.1883608388137486, "grad_norm": 3968.6298828125, "learning_rate": 9.786597487660336e-06, "loss": 166.1542, "step": 22770 }, { "epoch": 0.18844356206311783, "grad_norm": 687.5050659179688, "learning_rate": 9.78618965631268e-06, "loss": 118.5364, "step": 22780 }, { "epoch": 0.18852628531248708, "grad_norm": 1421.8592529296875, "learning_rate": 9.785781444149883e-06, "loss": 102.7248, "step": 22790 }, { "epoch": 0.1886090085618563, "grad_norm": 1723.80029296875, "learning_rate": 9.785372851204415e-06, "loss": 136.1481, "step": 22800 }, { "epoch": 0.18869173181122553, "grad_norm": 1364.1827392578125, "learning_rate": 9.784963877508794e-06, "loss": 135.9108, "step": 22810 }, { "epoch": 0.1887744550605948, "grad_norm": 2061.134765625, "learning_rate": 9.784554523095554e-06, "loss": 201.537, "step": 22820 }, { "epoch": 0.188857178309964, "grad_norm": 958.0075073242188, "learning_rate": 9.784144787997272e-06, "loss": 96.8345, "step": 22830 }, { "epoch": 0.18893990155933324, "grad_norm": 717.226318359375, "learning_rate": 9.783734672246545e-06, "loss": 130.6322, "step": 22840 }, { "epoch": 0.1890226248087025, "grad_norm": 716.5567016601562, "learning_rate": 9.783324175876004e-06, "loss": 105.1676, "step": 22850 }, { "epoch": 0.18910534805807172, "grad_norm": 1025.28173828125, "learning_rate": 9.782913298918311e-06, "loss": 151.713, "step": 22860 }, { "epoch": 0.18918807130744095, "grad_norm": 649.5341186523438, "learning_rate": 9.782502041406157e-06, "loss": 135.4802, "step": 22870 }, { "epoch": 0.1892707945568102, "grad_norm": 1117.214599609375, "learning_rate": 9.782090403372263e-06, "loss": 148.4325, "step": 22880 }, { "epoch": 0.18935351780617943, "grad_norm": 1014.591552734375, "learning_rate": 9.781678384849385e-06, "loss": 111.5626, "step": 22890 }, { "epoch": 0.18943624105554865, "grad_norm": 2505.06982421875, "learning_rate": 9.7812659858703e-06, "loss": 142.1111, "step": 22900 }, { "epoch": 0.1895189643049179, "grad_norm": 1313.8260498046875, "learning_rate": 9.780853206467826e-06, "loss": 133.5671, "step": 22910 }, { "epoch": 0.18960168755428713, "grad_norm": 984.4515380859375, "learning_rate": 9.780440046674803e-06, "loss": 167.4173, "step": 22920 }, { "epoch": 0.18968441080365636, "grad_norm": 893.4295043945312, "learning_rate": 9.780026506524106e-06, "loss": 159.9201, "step": 22930 }, { "epoch": 0.18976713405302562, "grad_norm": 818.2760620117188, "learning_rate": 9.779612586048635e-06, "loss": 132.297, "step": 22940 }, { "epoch": 0.18984985730239484, "grad_norm": 1028.792236328125, "learning_rate": 9.779198285281326e-06, "loss": 175.7389, "step": 22950 }, { "epoch": 0.18993258055176407, "grad_norm": 1137.0174560546875, "learning_rate": 9.778783604255145e-06, "loss": 120.6092, "step": 22960 }, { "epoch": 0.1900153038011333, "grad_norm": 2498.3369140625, "learning_rate": 9.778368543003083e-06, "loss": 144.8033, "step": 22970 }, { "epoch": 0.19009802705050255, "grad_norm": 1361.688232421875, "learning_rate": 9.777953101558164e-06, "loss": 133.7076, "step": 22980 }, { "epoch": 0.19018075029987178, "grad_norm": 1047.64453125, "learning_rate": 9.777537279953448e-06, "loss": 120.5423, "step": 22990 }, { "epoch": 0.190263473549241, "grad_norm": 982.462158203125, "learning_rate": 9.777121078222015e-06, "loss": 108.502, "step": 23000 }, { "epoch": 0.19034619679861026, "grad_norm": 1133.6990966796875, "learning_rate": 9.77670449639698e-06, "loss": 129.8804, "step": 23010 }, { "epoch": 0.19042892004797948, "grad_norm": 671.9098510742188, "learning_rate": 9.776287534511492e-06, "loss": 130.2723, "step": 23020 }, { "epoch": 0.1905116432973487, "grad_norm": 1220.64892578125, "learning_rate": 9.775870192598726e-06, "loss": 122.8446, "step": 23030 }, { "epoch": 0.19059436654671796, "grad_norm": 2241.37548828125, "learning_rate": 9.775452470691886e-06, "loss": 133.7913, "step": 23040 }, { "epoch": 0.1906770897960872, "grad_norm": 1148.03662109375, "learning_rate": 9.77503436882421e-06, "loss": 138.1318, "step": 23050 }, { "epoch": 0.19075981304545642, "grad_norm": 980.452880859375, "learning_rate": 9.774615887028964e-06, "loss": 147.464, "step": 23060 }, { "epoch": 0.19084253629482567, "grad_norm": 2212.347412109375, "learning_rate": 9.774197025339442e-06, "loss": 102.5226, "step": 23070 }, { "epoch": 0.1909252595441949, "grad_norm": 928.7798461914062, "learning_rate": 9.773777783788976e-06, "loss": 113.3667, "step": 23080 }, { "epoch": 0.19100798279356412, "grad_norm": 1282.0994873046875, "learning_rate": 9.77335816241092e-06, "loss": 144.8801, "step": 23090 }, { "epoch": 0.19109070604293338, "grad_norm": 837.4090576171875, "learning_rate": 9.77293816123866e-06, "loss": 104.0558, "step": 23100 }, { "epoch": 0.1911734292923026, "grad_norm": 881.4822998046875, "learning_rate": 9.772517780305618e-06, "loss": 121.4086, "step": 23110 }, { "epoch": 0.19125615254167183, "grad_norm": 880.7931518554688, "learning_rate": 9.772097019645236e-06, "loss": 139.5056, "step": 23120 }, { "epoch": 0.19133887579104109, "grad_norm": 776.1262817382812, "learning_rate": 9.771675879290998e-06, "loss": 146.6072, "step": 23130 }, { "epoch": 0.1914215990404103, "grad_norm": 904.765380859375, "learning_rate": 9.771254359276407e-06, "loss": 116.9324, "step": 23140 }, { "epoch": 0.19150432228977954, "grad_norm": 730.8990478515625, "learning_rate": 9.770832459635004e-06, "loss": 133.2764, "step": 23150 }, { "epoch": 0.19158704553914876, "grad_norm": 690.1975708007812, "learning_rate": 9.77041018040036e-06, "loss": 175.1648, "step": 23160 }, { "epoch": 0.19166976878851802, "grad_norm": 907.7679443359375, "learning_rate": 9.769987521606068e-06, "loss": 105.3016, "step": 23170 }, { "epoch": 0.19175249203788725, "grad_norm": 693.336181640625, "learning_rate": 9.769564483285761e-06, "loss": 192.6149, "step": 23180 }, { "epoch": 0.19183521528725647, "grad_norm": 557.3772583007812, "learning_rate": 9.769141065473099e-06, "loss": 115.1971, "step": 23190 }, { "epoch": 0.19191793853662573, "grad_norm": 858.6890869140625, "learning_rate": 9.768717268201768e-06, "loss": 145.7314, "step": 23200 }, { "epoch": 0.19200066178599495, "grad_norm": 2752.160888671875, "learning_rate": 9.768293091505491e-06, "loss": 128.7382, "step": 23210 }, { "epoch": 0.19208338503536418, "grad_norm": 971.1498413085938, "learning_rate": 9.767868535418014e-06, "loss": 126.5057, "step": 23220 }, { "epoch": 0.19216610828473343, "grad_norm": 793.6380615234375, "learning_rate": 9.767443599973122e-06, "loss": 108.5758, "step": 23230 }, { "epoch": 0.19224883153410266, "grad_norm": 1140.6827392578125, "learning_rate": 9.76701828520462e-06, "loss": 117.5244, "step": 23240 }, { "epoch": 0.1923315547834719, "grad_norm": 840.5177001953125, "learning_rate": 9.766592591146353e-06, "loss": 107.5938, "step": 23250 }, { "epoch": 0.19241427803284114, "grad_norm": 1344.9217529296875, "learning_rate": 9.766166517832188e-06, "loss": 120.1126, "step": 23260 }, { "epoch": 0.19249700128221037, "grad_norm": 1015.5147705078125, "learning_rate": 9.765740065296025e-06, "loss": 131.5013, "step": 23270 }, { "epoch": 0.1925797245315796, "grad_norm": 574.64306640625, "learning_rate": 9.765313233571798e-06, "loss": 131.5927, "step": 23280 }, { "epoch": 0.19266244778094885, "grad_norm": 1062.6119384765625, "learning_rate": 9.76488602269347e-06, "loss": 162.6192, "step": 23290 }, { "epoch": 0.19274517103031807, "grad_norm": 1031.8822021484375, "learning_rate": 9.764458432695026e-06, "loss": 91.6339, "step": 23300 }, { "epoch": 0.1928278942796873, "grad_norm": 715.5499877929688, "learning_rate": 9.76403046361049e-06, "loss": 98.5823, "step": 23310 }, { "epoch": 0.19291061752905656, "grad_norm": 707.9616088867188, "learning_rate": 9.763602115473914e-06, "loss": 129.4643, "step": 23320 }, { "epoch": 0.19299334077842578, "grad_norm": 1023.2615966796875, "learning_rate": 9.763173388319381e-06, "loss": 108.6447, "step": 23330 }, { "epoch": 0.193076064027795, "grad_norm": 745.7240600585938, "learning_rate": 9.762744282181e-06, "loss": 160.1926, "step": 23340 }, { "epoch": 0.19315878727716423, "grad_norm": 1169.117431640625, "learning_rate": 9.762314797092916e-06, "loss": 136.8101, "step": 23350 }, { "epoch": 0.1932415105265335, "grad_norm": 2356.3876953125, "learning_rate": 9.761884933089301e-06, "loss": 149.6885, "step": 23360 }, { "epoch": 0.19332423377590272, "grad_norm": 748.1530151367188, "learning_rate": 9.761454690204352e-06, "loss": 90.2527, "step": 23370 }, { "epoch": 0.19340695702527194, "grad_norm": 773.795654296875, "learning_rate": 9.76102406847231e-06, "loss": 133.7801, "step": 23380 }, { "epoch": 0.1934896802746412, "grad_norm": 785.1251831054688, "learning_rate": 9.760593067927428e-06, "loss": 92.1381, "step": 23390 }, { "epoch": 0.19357240352401042, "grad_norm": 987.9070434570312, "learning_rate": 9.760161688604008e-06, "loss": 152.8994, "step": 23400 }, { "epoch": 0.19365512677337965, "grad_norm": 1091.9166259765625, "learning_rate": 9.759729930536367e-06, "loss": 106.4156, "step": 23410 }, { "epoch": 0.1937378500227489, "grad_norm": 828.226806640625, "learning_rate": 9.75929779375886e-06, "loss": 127.5234, "step": 23420 }, { "epoch": 0.19382057327211813, "grad_norm": 1164.93359375, "learning_rate": 9.75886527830587e-06, "loss": 147.2189, "step": 23430 }, { "epoch": 0.19390329652148736, "grad_norm": 1556.861572265625, "learning_rate": 9.75843238421181e-06, "loss": 132.9836, "step": 23440 }, { "epoch": 0.1939860197708566, "grad_norm": 1154.7764892578125, "learning_rate": 9.757999111511121e-06, "loss": 131.7635, "step": 23450 }, { "epoch": 0.19406874302022584, "grad_norm": 1170.080078125, "learning_rate": 9.757565460238281e-06, "loss": 120.619, "step": 23460 }, { "epoch": 0.19415146626959506, "grad_norm": 1463.3924560546875, "learning_rate": 9.757131430427791e-06, "loss": 129.6818, "step": 23470 }, { "epoch": 0.19423418951896432, "grad_norm": 1100.9061279296875, "learning_rate": 9.756697022114185e-06, "loss": 108.3849, "step": 23480 }, { "epoch": 0.19431691276833354, "grad_norm": 1876.0504150390625, "learning_rate": 9.756262235332029e-06, "loss": 103.7323, "step": 23490 }, { "epoch": 0.19439963601770277, "grad_norm": 974.3872680664062, "learning_rate": 9.755827070115915e-06, "loss": 105.9175, "step": 23500 }, { "epoch": 0.194482359267072, "grad_norm": 888.02099609375, "learning_rate": 9.755391526500466e-06, "loss": 108.9482, "step": 23510 }, { "epoch": 0.19456508251644125, "grad_norm": 863.0517578125, "learning_rate": 9.75495560452034e-06, "loss": 131.4029, "step": 23520 }, { "epoch": 0.19464780576581048, "grad_norm": 1131.92431640625, "learning_rate": 9.754519304210214e-06, "loss": 122.4951, "step": 23530 }, { "epoch": 0.1947305290151797, "grad_norm": 2271.8134765625, "learning_rate": 9.754082625604812e-06, "loss": 121.6546, "step": 23540 }, { "epoch": 0.19481325226454896, "grad_norm": 919.2047119140625, "learning_rate": 9.753645568738872e-06, "loss": 138.8903, "step": 23550 }, { "epoch": 0.19489597551391818, "grad_norm": 840.5191650390625, "learning_rate": 9.75320813364717e-06, "loss": 91.0436, "step": 23560 }, { "epoch": 0.1949786987632874, "grad_norm": 621.6953125, "learning_rate": 9.752770320364512e-06, "loss": 128.6445, "step": 23570 }, { "epoch": 0.19506142201265667, "grad_norm": 754.2584838867188, "learning_rate": 9.752332128925732e-06, "loss": 111.4495, "step": 23580 }, { "epoch": 0.1951441452620259, "grad_norm": 1024.14501953125, "learning_rate": 9.751893559365693e-06, "loss": 170.7815, "step": 23590 }, { "epoch": 0.19522686851139512, "grad_norm": 1205.4876708984375, "learning_rate": 9.751454611719294e-06, "loss": 121.6351, "step": 23600 }, { "epoch": 0.19530959176076437, "grad_norm": 1503.17236328125, "learning_rate": 9.751015286021455e-06, "loss": 164.3172, "step": 23610 }, { "epoch": 0.1953923150101336, "grad_norm": 937.5809936523438, "learning_rate": 9.750575582307136e-06, "loss": 160.1191, "step": 23620 }, { "epoch": 0.19547503825950283, "grad_norm": 983.7006225585938, "learning_rate": 9.75013550061132e-06, "loss": 120.9869, "step": 23630 }, { "epoch": 0.19555776150887208, "grad_norm": 1772.2957763671875, "learning_rate": 9.749695040969022e-06, "loss": 114.3774, "step": 23640 }, { "epoch": 0.1956404847582413, "grad_norm": 924.7174072265625, "learning_rate": 9.749254203415288e-06, "loss": 124.1198, "step": 23650 }, { "epoch": 0.19572320800761053, "grad_norm": 972.820068359375, "learning_rate": 9.748812987985193e-06, "loss": 123.3391, "step": 23660 }, { "epoch": 0.1958059312569798, "grad_norm": 1330.291748046875, "learning_rate": 9.748371394713842e-06, "loss": 126.0948, "step": 23670 }, { "epoch": 0.195888654506349, "grad_norm": 1056.2666015625, "learning_rate": 9.747929423636372e-06, "loss": 128.4867, "step": 23680 }, { "epoch": 0.19597137775571824, "grad_norm": 670.9091186523438, "learning_rate": 9.74748707478795e-06, "loss": 140.6503, "step": 23690 }, { "epoch": 0.19605410100508747, "grad_norm": 1226.66162109375, "learning_rate": 9.747044348203766e-06, "loss": 130.9208, "step": 23700 }, { "epoch": 0.19613682425445672, "grad_norm": 907.8272705078125, "learning_rate": 9.74660124391905e-06, "loss": 127.5544, "step": 23710 }, { "epoch": 0.19621954750382595, "grad_norm": 898.1268920898438, "learning_rate": 9.746157761969058e-06, "loss": 164.2529, "step": 23720 }, { "epoch": 0.19630227075319517, "grad_norm": 1068.112060546875, "learning_rate": 9.745713902389074e-06, "loss": 143.5591, "step": 23730 }, { "epoch": 0.19638499400256443, "grad_norm": 1102.2633056640625, "learning_rate": 9.745269665214415e-06, "loss": 122.1263, "step": 23740 }, { "epoch": 0.19646771725193365, "grad_norm": 556.4323120117188, "learning_rate": 9.744825050480425e-06, "loss": 121.492, "step": 23750 }, { "epoch": 0.19655044050130288, "grad_norm": 1045.6256103515625, "learning_rate": 9.744380058222483e-06, "loss": 131.9083, "step": 23760 }, { "epoch": 0.19663316375067214, "grad_norm": 1136.8719482421875, "learning_rate": 9.743934688475994e-06, "loss": 115.8493, "step": 23770 }, { "epoch": 0.19671588700004136, "grad_norm": 962.9896850585938, "learning_rate": 9.743488941276394e-06, "loss": 117.902, "step": 23780 }, { "epoch": 0.1967986102494106, "grad_norm": 1086.8525390625, "learning_rate": 9.743042816659147e-06, "loss": 134.2704, "step": 23790 }, { "epoch": 0.19688133349877984, "grad_norm": 1227.1202392578125, "learning_rate": 9.742596314659751e-06, "loss": 127.5619, "step": 23800 }, { "epoch": 0.19696405674814907, "grad_norm": 1608.360595703125, "learning_rate": 9.742149435313732e-06, "loss": 120.5991, "step": 23810 }, { "epoch": 0.1970467799975183, "grad_norm": 664.271728515625, "learning_rate": 9.741702178656647e-06, "loss": 151.2022, "step": 23820 }, { "epoch": 0.19712950324688755, "grad_norm": 1345.0858154296875, "learning_rate": 9.74125454472408e-06, "loss": 136.8234, "step": 23830 }, { "epoch": 0.19721222649625678, "grad_norm": 732.8429565429688, "learning_rate": 9.740806533551647e-06, "loss": 234.7962, "step": 23840 }, { "epoch": 0.197294949745626, "grad_norm": 1542.8756103515625, "learning_rate": 9.740358145174999e-06, "loss": 130.3346, "step": 23850 }, { "epoch": 0.19737767299499526, "grad_norm": 821.8853149414062, "learning_rate": 9.739909379629805e-06, "loss": 139.0029, "step": 23860 }, { "epoch": 0.19746039624436448, "grad_norm": 812.029541015625, "learning_rate": 9.739460236951778e-06, "loss": 127.8701, "step": 23870 }, { "epoch": 0.1975431194937337, "grad_norm": 969.9833984375, "learning_rate": 9.739010717176649e-06, "loss": 99.5856, "step": 23880 }, { "epoch": 0.19762584274310294, "grad_norm": 1117.0540771484375, "learning_rate": 9.738560820340189e-06, "loss": 114.7225, "step": 23890 }, { "epoch": 0.1977085659924722, "grad_norm": 601.694091796875, "learning_rate": 9.738110546478188e-06, "loss": 123.2165, "step": 23900 }, { "epoch": 0.19779128924184142, "grad_norm": 1268.921630859375, "learning_rate": 9.737659895626478e-06, "loss": 130.8849, "step": 23910 }, { "epoch": 0.19787401249121064, "grad_norm": 1429.4453125, "learning_rate": 9.737208867820914e-06, "loss": 123.7755, "step": 23920 }, { "epoch": 0.1979567357405799, "grad_norm": 654.82080078125, "learning_rate": 9.736757463097378e-06, "loss": 152.5086, "step": 23930 }, { "epoch": 0.19803945898994912, "grad_norm": 852.7525024414062, "learning_rate": 9.736305681491792e-06, "loss": 116.6475, "step": 23940 }, { "epoch": 0.19812218223931835, "grad_norm": 1039.8662109375, "learning_rate": 9.735853523040098e-06, "loss": 138.9281, "step": 23950 }, { "epoch": 0.1982049054886876, "grad_norm": 1178.4395751953125, "learning_rate": 9.735400987778274e-06, "loss": 143.172, "step": 23960 }, { "epoch": 0.19828762873805683, "grad_norm": 1232.4547119140625, "learning_rate": 9.734948075742328e-06, "loss": 121.5333, "step": 23970 }, { "epoch": 0.19837035198742606, "grad_norm": 865.7545166015625, "learning_rate": 9.734494786968293e-06, "loss": 154.3453, "step": 23980 }, { "epoch": 0.1984530752367953, "grad_norm": 669.6997680664062, "learning_rate": 9.734041121492235e-06, "loss": 144.5734, "step": 23990 }, { "epoch": 0.19853579848616454, "grad_norm": 1144.2825927734375, "learning_rate": 9.733587079350254e-06, "loss": 107.7752, "step": 24000 }, { "epoch": 0.19861852173553377, "grad_norm": 2160.167236328125, "learning_rate": 9.73313266057847e-06, "loss": 129.0158, "step": 24010 }, { "epoch": 0.19870124498490302, "grad_norm": 1064.593994140625, "learning_rate": 9.732677865213044e-06, "loss": 141.8949, "step": 24020 }, { "epoch": 0.19878396823427225, "grad_norm": 1279.8077392578125, "learning_rate": 9.73222269329016e-06, "loss": 153.0952, "step": 24030 }, { "epoch": 0.19886669148364147, "grad_norm": 950.4849853515625, "learning_rate": 9.731767144846034e-06, "loss": 146.848, "step": 24040 }, { "epoch": 0.1989494147330107, "grad_norm": 1283.0186767578125, "learning_rate": 9.731311219916912e-06, "loss": 150.6841, "step": 24050 }, { "epoch": 0.19903213798237995, "grad_norm": 704.3133544921875, "learning_rate": 9.730854918539072e-06, "loss": 149.4796, "step": 24060 }, { "epoch": 0.19911486123174918, "grad_norm": 846.5486450195312, "learning_rate": 9.730398240748816e-06, "loss": 138.2696, "step": 24070 }, { "epoch": 0.1991975844811184, "grad_norm": 779.280029296875, "learning_rate": 9.729941186582482e-06, "loss": 114.9246, "step": 24080 }, { "epoch": 0.19928030773048766, "grad_norm": 1280.6629638671875, "learning_rate": 9.729483756076436e-06, "loss": 96.8995, "step": 24090 }, { "epoch": 0.1993630309798569, "grad_norm": 626.4833374023438, "learning_rate": 9.729025949267072e-06, "loss": 117.7161, "step": 24100 }, { "epoch": 0.1994457542292261, "grad_norm": 866.7782592773438, "learning_rate": 9.728567766190817e-06, "loss": 110.1413, "step": 24110 }, { "epoch": 0.19952847747859537, "grad_norm": 1430.4652099609375, "learning_rate": 9.728109206884125e-06, "loss": 122.4254, "step": 24120 }, { "epoch": 0.1996112007279646, "grad_norm": 1178.0247802734375, "learning_rate": 9.727650271383485e-06, "loss": 131.602, "step": 24130 }, { "epoch": 0.19969392397733382, "grad_norm": 944.5925903320312, "learning_rate": 9.727190959725407e-06, "loss": 91.2385, "step": 24140 }, { "epoch": 0.19977664722670307, "grad_norm": 1111.4093017578125, "learning_rate": 9.72673127194644e-06, "loss": 185.1948, "step": 24150 }, { "epoch": 0.1998593704760723, "grad_norm": 1125.5455322265625, "learning_rate": 9.72627120808316e-06, "loss": 157.4787, "step": 24160 }, { "epoch": 0.19994209372544153, "grad_norm": 1090.2935791015625, "learning_rate": 9.725810768172169e-06, "loss": 106.9974, "step": 24170 }, { "epoch": 0.20002481697481078, "grad_norm": 620.5934448242188, "learning_rate": 9.725349952250105e-06, "loss": 115.3673, "step": 24180 }, { "epoch": 0.20010754022418, "grad_norm": 1263.1217041015625, "learning_rate": 9.724888760353631e-06, "loss": 148.2358, "step": 24190 }, { "epoch": 0.20019026347354923, "grad_norm": 1106.949951171875, "learning_rate": 9.72442719251944e-06, "loss": 170.5474, "step": 24200 }, { "epoch": 0.2002729867229185, "grad_norm": 1334.36962890625, "learning_rate": 9.723965248784264e-06, "loss": 149.0543, "step": 24210 }, { "epoch": 0.20035570997228772, "grad_norm": 1211.2431640625, "learning_rate": 9.723502929184851e-06, "loss": 126.5367, "step": 24220 }, { "epoch": 0.20043843322165694, "grad_norm": 863.3626708984375, "learning_rate": 9.723040233757987e-06, "loss": 142.1387, "step": 24230 }, { "epoch": 0.20052115647102617, "grad_norm": 1154.0657958984375, "learning_rate": 9.722577162540489e-06, "loss": 123.5952, "step": 24240 }, { "epoch": 0.20060387972039542, "grad_norm": 1012.69140625, "learning_rate": 9.7221137155692e-06, "loss": 110.8877, "step": 24250 }, { "epoch": 0.20068660296976465, "grad_norm": 1386.55859375, "learning_rate": 9.721649892880995e-06, "loss": 170.2592, "step": 24260 }, { "epoch": 0.20076932621913388, "grad_norm": 1342.9095458984375, "learning_rate": 9.721185694512776e-06, "loss": 134.4591, "step": 24270 }, { "epoch": 0.20085204946850313, "grad_norm": 392.2961120605469, "learning_rate": 9.720721120501478e-06, "loss": 134.1603, "step": 24280 }, { "epoch": 0.20093477271787236, "grad_norm": 826.9261474609375, "learning_rate": 9.720256170884066e-06, "loss": 101.2419, "step": 24290 }, { "epoch": 0.20101749596724158, "grad_norm": 791.0903930664062, "learning_rate": 9.719790845697534e-06, "loss": 129.4301, "step": 24300 }, { "epoch": 0.20110021921661084, "grad_norm": 1188.9549560546875, "learning_rate": 9.719325144978907e-06, "loss": 145.1762, "step": 24310 }, { "epoch": 0.20118294246598006, "grad_norm": 1141.0123291015625, "learning_rate": 9.718859068765234e-06, "loss": 107.9242, "step": 24320 }, { "epoch": 0.2012656657153493, "grad_norm": 1065.128173828125, "learning_rate": 9.718392617093602e-06, "loss": 113.2393, "step": 24330 }, { "epoch": 0.20134838896471854, "grad_norm": 1253.3646240234375, "learning_rate": 9.717925790001125e-06, "loss": 77.6727, "step": 24340 }, { "epoch": 0.20143111221408777, "grad_norm": 1093.85498046875, "learning_rate": 9.717458587524946e-06, "loss": 112.4173, "step": 24350 }, { "epoch": 0.201513835463457, "grad_norm": 787.4635009765625, "learning_rate": 9.716991009702236e-06, "loss": 114.5591, "step": 24360 }, { "epoch": 0.20159655871282625, "grad_norm": 926.3908081054688, "learning_rate": 9.7165230565702e-06, "loss": 123.4194, "step": 24370 }, { "epoch": 0.20167928196219548, "grad_norm": 792.05859375, "learning_rate": 9.71605472816607e-06, "loss": 128.6131, "step": 24380 }, { "epoch": 0.2017620052115647, "grad_norm": 835.9586791992188, "learning_rate": 9.71558602452711e-06, "loss": 125.1441, "step": 24390 }, { "epoch": 0.20184472846093396, "grad_norm": 1247.4454345703125, "learning_rate": 9.71511694569061e-06, "loss": 166.359, "step": 24400 }, { "epoch": 0.20192745171030319, "grad_norm": 828.7860107421875, "learning_rate": 9.714647491693897e-06, "loss": 140.4719, "step": 24410 }, { "epoch": 0.2020101749596724, "grad_norm": 843.8282470703125, "learning_rate": 9.714177662574316e-06, "loss": 101.7997, "step": 24420 }, { "epoch": 0.20209289820904164, "grad_norm": 967.5045776367188, "learning_rate": 9.713707458369258e-06, "loss": 124.0601, "step": 24430 }, { "epoch": 0.2021756214584109, "grad_norm": 692.8041381835938, "learning_rate": 9.713236879116127e-06, "loss": 120.8918, "step": 24440 }, { "epoch": 0.20225834470778012, "grad_norm": 1022.600341796875, "learning_rate": 9.71276592485237e-06, "loss": 128.2274, "step": 24450 }, { "epoch": 0.20234106795714935, "grad_norm": 1309.6890869140625, "learning_rate": 9.712294595615458e-06, "loss": 134.2852, "step": 24460 }, { "epoch": 0.2024237912065186, "grad_norm": 596.5182495117188, "learning_rate": 9.711822891442887e-06, "loss": 141.3561, "step": 24470 }, { "epoch": 0.20250651445588783, "grad_norm": 1456.3428955078125, "learning_rate": 9.711350812372198e-06, "loss": 166.3525, "step": 24480 }, { "epoch": 0.20258923770525705, "grad_norm": 1364.7294921875, "learning_rate": 9.710878358440945e-06, "loss": 178.6096, "step": 24490 }, { "epoch": 0.2026719609546263, "grad_norm": 941.8969116210938, "learning_rate": 9.710405529686722e-06, "loss": 154.1988, "step": 24500 }, { "epoch": 0.20275468420399553, "grad_norm": 1231.3597412109375, "learning_rate": 9.709932326147147e-06, "loss": 109.8311, "step": 24510 }, { "epoch": 0.20283740745336476, "grad_norm": 981.5293579101562, "learning_rate": 9.709458747859874e-06, "loss": 106.3588, "step": 24520 }, { "epoch": 0.20292013070273401, "grad_norm": 882.2429809570312, "learning_rate": 9.708984794862581e-06, "loss": 106.4644, "step": 24530 }, { "epoch": 0.20300285395210324, "grad_norm": 1536.478271484375, "learning_rate": 9.708510467192981e-06, "loss": 115.3478, "step": 24540 }, { "epoch": 0.20308557720147247, "grad_norm": 480.1463928222656, "learning_rate": 9.70803576488881e-06, "loss": 131.8311, "step": 24550 }, { "epoch": 0.20316830045084172, "grad_norm": 751.3448486328125, "learning_rate": 9.707560687987843e-06, "loss": 100.0164, "step": 24560 }, { "epoch": 0.20325102370021095, "grad_norm": 983.8947143554688, "learning_rate": 9.707085236527873e-06, "loss": 124.7845, "step": 24570 }, { "epoch": 0.20333374694958017, "grad_norm": 778.719970703125, "learning_rate": 9.706609410546736e-06, "loss": 70.2131, "step": 24580 }, { "epoch": 0.20341647019894943, "grad_norm": 799.94580078125, "learning_rate": 9.706133210082288e-06, "loss": 107.5597, "step": 24590 }, { "epoch": 0.20349919344831865, "grad_norm": 775.4839477539062, "learning_rate": 9.705656635172418e-06, "loss": 123.562, "step": 24600 }, { "epoch": 0.20358191669768788, "grad_norm": 906.2435302734375, "learning_rate": 9.705179685855048e-06, "loss": 141.3586, "step": 24610 }, { "epoch": 0.2036646399470571, "grad_norm": 572.1375732421875, "learning_rate": 9.704702362168121e-06, "loss": 174.9464, "step": 24620 }, { "epoch": 0.20374736319642636, "grad_norm": 1068.9876708984375, "learning_rate": 9.704224664149621e-06, "loss": 122.0626, "step": 24630 }, { "epoch": 0.2038300864457956, "grad_norm": 606.3220825195312, "learning_rate": 9.703746591837552e-06, "loss": 109.6907, "step": 24640 }, { "epoch": 0.20391280969516482, "grad_norm": 1032.2161865234375, "learning_rate": 9.703268145269957e-06, "loss": 126.0382, "step": 24650 }, { "epoch": 0.20399553294453407, "grad_norm": 1022.0555419921875, "learning_rate": 9.702789324484898e-06, "loss": 188.598, "step": 24660 }, { "epoch": 0.2040782561939033, "grad_norm": 1047.5244140625, "learning_rate": 9.702310129520476e-06, "loss": 122.4435, "step": 24670 }, { "epoch": 0.20416097944327252, "grad_norm": 1051.2161865234375, "learning_rate": 9.701830560414817e-06, "loss": 144.9207, "step": 24680 }, { "epoch": 0.20424370269264178, "grad_norm": 1291.77099609375, "learning_rate": 9.701350617206081e-06, "loss": 141.4524, "step": 24690 }, { "epoch": 0.204326425942011, "grad_norm": 1048.1988525390625, "learning_rate": 9.700870299932453e-06, "loss": 133.1601, "step": 24700 }, { "epoch": 0.20440914919138023, "grad_norm": 1616.698486328125, "learning_rate": 9.700389608632146e-06, "loss": 159.5847, "step": 24710 }, { "epoch": 0.20449187244074948, "grad_norm": 863.6107788085938, "learning_rate": 9.699908543343413e-06, "loss": 135.6566, "step": 24720 }, { "epoch": 0.2045745956901187, "grad_norm": 1772.513916015625, "learning_rate": 9.699427104104525e-06, "loss": 202.6929, "step": 24730 }, { "epoch": 0.20465731893948794, "grad_norm": 822.7219848632812, "learning_rate": 9.698945290953789e-06, "loss": 140.5493, "step": 24740 }, { "epoch": 0.2047400421888572, "grad_norm": 811.2693481445312, "learning_rate": 9.698463103929542e-06, "loss": 81.8386, "step": 24750 }, { "epoch": 0.20482276543822642, "grad_norm": 530.8605346679688, "learning_rate": 9.69798054307015e-06, "loss": 106.7644, "step": 24760 }, { "epoch": 0.20490548868759564, "grad_norm": 2197.434326171875, "learning_rate": 9.697497608414007e-06, "loss": 189.2195, "step": 24770 }, { "epoch": 0.20498821193696487, "grad_norm": 1628.6551513671875, "learning_rate": 9.697014299999536e-06, "loss": 110.0945, "step": 24780 }, { "epoch": 0.20507093518633412, "grad_norm": 988.3732299804688, "learning_rate": 9.696530617865197e-06, "loss": 147.2918, "step": 24790 }, { "epoch": 0.20515365843570335, "grad_norm": 934.0083618164062, "learning_rate": 9.696046562049469e-06, "loss": 124.3846, "step": 24800 }, { "epoch": 0.20523638168507258, "grad_norm": 1064.4521484375, "learning_rate": 9.695562132590865e-06, "loss": 130.5005, "step": 24810 }, { "epoch": 0.20531910493444183, "grad_norm": 936.8753662109375, "learning_rate": 9.695077329527936e-06, "loss": 79.4409, "step": 24820 }, { "epoch": 0.20540182818381106, "grad_norm": 2023.431396484375, "learning_rate": 9.694592152899249e-06, "loss": 135.3671, "step": 24830 }, { "epoch": 0.20548455143318028, "grad_norm": 1526.9307861328125, "learning_rate": 9.694106602743411e-06, "loss": 141.677, "step": 24840 }, { "epoch": 0.20556727468254954, "grad_norm": 1296.6025390625, "learning_rate": 9.693620679099055e-06, "loss": 106.9513, "step": 24850 }, { "epoch": 0.20564999793191877, "grad_norm": 675.1536865234375, "learning_rate": 9.693134382004839e-06, "loss": 122.0934, "step": 24860 }, { "epoch": 0.205732721181288, "grad_norm": 1233.307373046875, "learning_rate": 9.69264771149946e-06, "loss": 152.0633, "step": 24870 }, { "epoch": 0.20581544443065725, "grad_norm": 2084.801025390625, "learning_rate": 9.692160667621639e-06, "loss": 191.7889, "step": 24880 }, { "epoch": 0.20589816768002647, "grad_norm": 1063.7586669921875, "learning_rate": 9.69167325041013e-06, "loss": 98.7458, "step": 24890 }, { "epoch": 0.2059808909293957, "grad_norm": 1160.4176025390625, "learning_rate": 9.69118545990371e-06, "loss": 136.4698, "step": 24900 }, { "epoch": 0.20606361417876495, "grad_norm": 1240.2447509765625, "learning_rate": 9.690697296141194e-06, "loss": 131.5934, "step": 24910 }, { "epoch": 0.20614633742813418, "grad_norm": 1285.3697509765625, "learning_rate": 9.690208759161418e-06, "loss": 143.4328, "step": 24920 }, { "epoch": 0.2062290606775034, "grad_norm": 899.1880493164062, "learning_rate": 9.689719849003261e-06, "loss": 150.501, "step": 24930 }, { "epoch": 0.20631178392687266, "grad_norm": 1172.1856689453125, "learning_rate": 9.689230565705617e-06, "loss": 88.1189, "step": 24940 }, { "epoch": 0.2063945071762419, "grad_norm": 940.1471557617188, "learning_rate": 9.688740909307416e-06, "loss": 126.9263, "step": 24950 }, { "epoch": 0.2064772304256111, "grad_norm": 1031.7501220703125, "learning_rate": 9.68825087984762e-06, "loss": 145.9387, "step": 24960 }, { "epoch": 0.20655995367498034, "grad_norm": 747.2893676757812, "learning_rate": 9.687760477365217e-06, "loss": 111.3116, "step": 24970 }, { "epoch": 0.2066426769243496, "grad_norm": 1009.5612182617188, "learning_rate": 9.687269701899228e-06, "loss": 122.0988, "step": 24980 }, { "epoch": 0.20672540017371882, "grad_norm": 1089.2601318359375, "learning_rate": 9.6867785534887e-06, "loss": 118.2881, "step": 24990 }, { "epoch": 0.20680812342308805, "grad_norm": 1918.05908203125, "learning_rate": 9.686287032172712e-06, "loss": 119.0276, "step": 25000 }, { "epoch": 0.2068908466724573, "grad_norm": 1754.23486328125, "learning_rate": 9.685795137990372e-06, "loss": 149.8688, "step": 25010 }, { "epoch": 0.20697356992182653, "grad_norm": 701.3511352539062, "learning_rate": 9.685302870980819e-06, "loss": 111.5625, "step": 25020 }, { "epoch": 0.20705629317119575, "grad_norm": 1214.149658203125, "learning_rate": 9.684810231183218e-06, "loss": 104.316, "step": 25030 }, { "epoch": 0.207139016420565, "grad_norm": 1371.7374267578125, "learning_rate": 9.684317218636767e-06, "loss": 137.7989, "step": 25040 }, { "epoch": 0.20722173966993424, "grad_norm": 976.9849243164062, "learning_rate": 9.683823833380692e-06, "loss": 107.9419, "step": 25050 }, { "epoch": 0.20730446291930346, "grad_norm": 2080.160400390625, "learning_rate": 9.683330075454252e-06, "loss": 134.3795, "step": 25060 }, { "epoch": 0.20738718616867272, "grad_norm": 1066.323974609375, "learning_rate": 9.68283594489673e-06, "loss": 113.7562, "step": 25070 }, { "epoch": 0.20746990941804194, "grad_norm": 1623.5059814453125, "learning_rate": 9.682341441747446e-06, "loss": 124.4294, "step": 25080 }, { "epoch": 0.20755263266741117, "grad_norm": 1231.952880859375, "learning_rate": 9.68184656604574e-06, "loss": 168.1638, "step": 25090 }, { "epoch": 0.20763535591678042, "grad_norm": 604.9308471679688, "learning_rate": 9.681351317830991e-06, "loss": 150.3729, "step": 25100 }, { "epoch": 0.20771807916614965, "grad_norm": 915.5381469726562, "learning_rate": 9.680855697142601e-06, "loss": 89.7146, "step": 25110 }, { "epoch": 0.20780080241551888, "grad_norm": 517.771484375, "learning_rate": 9.680359704020005e-06, "loss": 110.3232, "step": 25120 }, { "epoch": 0.20788352566488813, "grad_norm": 999.2185668945312, "learning_rate": 9.67986333850267e-06, "loss": 155.6853, "step": 25130 }, { "epoch": 0.20796624891425736, "grad_norm": 837.3745727539062, "learning_rate": 9.679366600630085e-06, "loss": 123.4482, "step": 25140 }, { "epoch": 0.20804897216362658, "grad_norm": 1082.25146484375, "learning_rate": 9.678869490441775e-06, "loss": 92.413, "step": 25150 }, { "epoch": 0.2081316954129958, "grad_norm": 1187.2119140625, "learning_rate": 9.678372007977292e-06, "loss": 130.2228, "step": 25160 }, { "epoch": 0.20821441866236506, "grad_norm": 1067.64306640625, "learning_rate": 9.67787415327622e-06, "loss": 106.1636, "step": 25170 }, { "epoch": 0.2082971419117343, "grad_norm": 1119.31640625, "learning_rate": 9.67737592637817e-06, "loss": 125.416, "step": 25180 }, { "epoch": 0.20837986516110352, "grad_norm": 1132.6695556640625, "learning_rate": 9.676877327322785e-06, "loss": 121.1855, "step": 25190 }, { "epoch": 0.20846258841047277, "grad_norm": 921.9434204101562, "learning_rate": 9.676378356149733e-06, "loss": 119.0814, "step": 25200 }, { "epoch": 0.208545311659842, "grad_norm": 1111.28857421875, "learning_rate": 9.675879012898719e-06, "loss": 112.7059, "step": 25210 }, { "epoch": 0.20862803490921122, "grad_norm": 952.7010498046875, "learning_rate": 9.67537929760947e-06, "loss": 135.5189, "step": 25220 }, { "epoch": 0.20871075815858048, "grad_norm": 668.4132080078125, "learning_rate": 9.674879210321747e-06, "loss": 126.6339, "step": 25230 }, { "epoch": 0.2087934814079497, "grad_norm": 1562.3443603515625, "learning_rate": 9.67437875107534e-06, "loss": 157.3701, "step": 25240 }, { "epoch": 0.20887620465731893, "grad_norm": 787.549072265625, "learning_rate": 9.673877919910069e-06, "loss": 144.3811, "step": 25250 }, { "epoch": 0.20895892790668819, "grad_norm": 1168.210693359375, "learning_rate": 9.673376716865781e-06, "loss": 147.3342, "step": 25260 }, { "epoch": 0.2090416511560574, "grad_norm": 922.6990356445312, "learning_rate": 9.672875141982358e-06, "loss": 134.1699, "step": 25270 }, { "epoch": 0.20912437440542664, "grad_norm": 920.8440551757812, "learning_rate": 9.672373195299704e-06, "loss": 90.3388, "step": 25280 }, { "epoch": 0.2092070976547959, "grad_norm": 742.2246704101562, "learning_rate": 9.67187087685776e-06, "loss": 127.1734, "step": 25290 }, { "epoch": 0.20928982090416512, "grad_norm": 1413.235107421875, "learning_rate": 9.671368186696488e-06, "loss": 121.8572, "step": 25300 }, { "epoch": 0.20937254415353435, "grad_norm": 1089.089111328125, "learning_rate": 9.670865124855889e-06, "loss": 155.726, "step": 25310 }, { "epoch": 0.2094552674029036, "grad_norm": 1251.3277587890625, "learning_rate": 9.67036169137599e-06, "loss": 134.8554, "step": 25320 }, { "epoch": 0.20953799065227283, "grad_norm": 975.4923706054688, "learning_rate": 9.669857886296842e-06, "loss": 137.952, "step": 25330 }, { "epoch": 0.20962071390164205, "grad_norm": 1131.67822265625, "learning_rate": 9.669353709658537e-06, "loss": 119.2049, "step": 25340 }, { "epoch": 0.20970343715101128, "grad_norm": 637.6780395507812, "learning_rate": 9.668849161501186e-06, "loss": 104.5548, "step": 25350 }, { "epoch": 0.20978616040038053, "grad_norm": 1121.5142822265625, "learning_rate": 9.668344241864934e-06, "loss": 115.9487, "step": 25360 }, { "epoch": 0.20986888364974976, "grad_norm": 937.0314331054688, "learning_rate": 9.667838950789957e-06, "loss": 112.8814, "step": 25370 }, { "epoch": 0.209951606899119, "grad_norm": 1264.4849853515625, "learning_rate": 9.667333288316454e-06, "loss": 135.4153, "step": 25380 }, { "epoch": 0.21003433014848824, "grad_norm": 495.7436828613281, "learning_rate": 9.666827254484663e-06, "loss": 111.5311, "step": 25390 }, { "epoch": 0.21011705339785747, "grad_norm": 853.3590087890625, "learning_rate": 9.666320849334846e-06, "loss": 102.2723, "step": 25400 }, { "epoch": 0.2101997766472267, "grad_norm": 1231.4791259765625, "learning_rate": 9.665814072907293e-06, "loss": 118.1443, "step": 25410 }, { "epoch": 0.21028249989659595, "grad_norm": 688.8372192382812, "learning_rate": 9.665306925242329e-06, "loss": 120.3173, "step": 25420 }, { "epoch": 0.21036522314596517, "grad_norm": 1288.0115966796875, "learning_rate": 9.664799406380302e-06, "loss": 129.2124, "step": 25430 }, { "epoch": 0.2104479463953344, "grad_norm": 734.3385009765625, "learning_rate": 9.664291516361597e-06, "loss": 141.6067, "step": 25440 }, { "epoch": 0.21053066964470366, "grad_norm": 808.9627685546875, "learning_rate": 9.663783255226622e-06, "loss": 128.5043, "step": 25450 }, { "epoch": 0.21061339289407288, "grad_norm": 2131.609619140625, "learning_rate": 9.663274623015816e-06, "loss": 130.3591, "step": 25460 }, { "epoch": 0.2106961161434421, "grad_norm": 1364.452880859375, "learning_rate": 9.662765619769651e-06, "loss": 186.9397, "step": 25470 }, { "epoch": 0.21077883939281136, "grad_norm": 1547.8067626953125, "learning_rate": 9.662256245528622e-06, "loss": 130.2646, "step": 25480 }, { "epoch": 0.2108615626421806, "grad_norm": 760.1724853515625, "learning_rate": 9.661746500333265e-06, "loss": 107.9425, "step": 25490 }, { "epoch": 0.21094428589154982, "grad_norm": 691.5214233398438, "learning_rate": 9.66123638422413e-06, "loss": 131.0554, "step": 25500 }, { "epoch": 0.21102700914091904, "grad_norm": 1053.4617919921875, "learning_rate": 9.66072589724181e-06, "loss": 139.2496, "step": 25510 }, { "epoch": 0.2111097323902883, "grad_norm": 903.3119506835938, "learning_rate": 9.66021503942692e-06, "loss": 103.5156, "step": 25520 }, { "epoch": 0.21119245563965752, "grad_norm": 1562.44482421875, "learning_rate": 9.659703810820105e-06, "loss": 154.1499, "step": 25530 }, { "epoch": 0.21127517888902675, "grad_norm": 724.2481079101562, "learning_rate": 9.659192211462043e-06, "loss": 122.7316, "step": 25540 }, { "epoch": 0.211357902138396, "grad_norm": 1171.6414794921875, "learning_rate": 9.658680241393441e-06, "loss": 112.3694, "step": 25550 }, { "epoch": 0.21144062538776523, "grad_norm": 687.9328002929688, "learning_rate": 9.658167900655032e-06, "loss": 85.1699, "step": 25560 }, { "epoch": 0.21152334863713446, "grad_norm": 1203.814208984375, "learning_rate": 9.657655189287582e-06, "loss": 111.8283, "step": 25570 }, { "epoch": 0.2116060718865037, "grad_norm": 2333.49365234375, "learning_rate": 9.657142107331883e-06, "loss": 168.9521, "step": 25580 }, { "epoch": 0.21168879513587294, "grad_norm": 919.4609375, "learning_rate": 9.65662865482876e-06, "loss": 110.3846, "step": 25590 }, { "epoch": 0.21177151838524216, "grad_norm": 1061.5003662109375, "learning_rate": 9.656114831819067e-06, "loss": 133.6754, "step": 25600 }, { "epoch": 0.21185424163461142, "grad_norm": 498.1741638183594, "learning_rate": 9.655600638343685e-06, "loss": 109.6762, "step": 25610 }, { "epoch": 0.21193696488398064, "grad_norm": 946.8280639648438, "learning_rate": 9.655086074443527e-06, "loss": 183.0106, "step": 25620 }, { "epoch": 0.21201968813334987, "grad_norm": 1837.9036865234375, "learning_rate": 9.654571140159534e-06, "loss": 122.9791, "step": 25630 }, { "epoch": 0.21210241138271912, "grad_norm": 1461.47900390625, "learning_rate": 9.654055835532676e-06, "loss": 191.4103, "step": 25640 }, { "epoch": 0.21218513463208835, "grad_norm": 820.8297119140625, "learning_rate": 9.653540160603956e-06, "loss": 115.1019, "step": 25650 }, { "epoch": 0.21226785788145758, "grad_norm": 1157.8956298828125, "learning_rate": 9.653024115414402e-06, "loss": 142.3736, "step": 25660 }, { "epoch": 0.21235058113082683, "grad_norm": 917.7392578125, "learning_rate": 9.652507700005072e-06, "loss": 130.4384, "step": 25670 }, { "epoch": 0.21243330438019606, "grad_norm": 1544.7633056640625, "learning_rate": 9.651990914417057e-06, "loss": 135.7345, "step": 25680 }, { "epoch": 0.21251602762956529, "grad_norm": 768.8818359375, "learning_rate": 9.651473758691477e-06, "loss": 99.1156, "step": 25690 }, { "epoch": 0.2125987508789345, "grad_norm": 1055.4837646484375, "learning_rate": 9.650956232869475e-06, "loss": 113.1447, "step": 25700 }, { "epoch": 0.21268147412830377, "grad_norm": 1085.100341796875, "learning_rate": 9.650438336992231e-06, "loss": 156.4204, "step": 25710 }, { "epoch": 0.212764197377673, "grad_norm": 535.8530883789062, "learning_rate": 9.64992007110095e-06, "loss": 113.1042, "step": 25720 }, { "epoch": 0.21284692062704222, "grad_norm": 1140.1822509765625, "learning_rate": 9.64940143523687e-06, "loss": 134.6167, "step": 25730 }, { "epoch": 0.21292964387641147, "grad_norm": 1358.8885498046875, "learning_rate": 9.648882429441258e-06, "loss": 133.8381, "step": 25740 }, { "epoch": 0.2130123671257807, "grad_norm": 1481.0440673828125, "learning_rate": 9.648363053755406e-06, "loss": 120.2028, "step": 25750 }, { "epoch": 0.21309509037514993, "grad_norm": 1080.2623291015625, "learning_rate": 9.647843308220636e-06, "loss": 105.3537, "step": 25760 }, { "epoch": 0.21317781362451918, "grad_norm": 712.189453125, "learning_rate": 9.647323192878306e-06, "loss": 101.1071, "step": 25770 }, { "epoch": 0.2132605368738884, "grad_norm": 1282.9239501953125, "learning_rate": 9.646802707769798e-06, "loss": 121.2276, "step": 25780 }, { "epoch": 0.21334326012325763, "grad_norm": 558.678955078125, "learning_rate": 9.646281852936525e-06, "loss": 92.6775, "step": 25790 }, { "epoch": 0.2134259833726269, "grad_norm": 994.920654296875, "learning_rate": 9.64576062841993e-06, "loss": 124.3649, "step": 25800 }, { "epoch": 0.2135087066219961, "grad_norm": 1420.953857421875, "learning_rate": 9.64523903426148e-06, "loss": 122.571, "step": 25810 }, { "epoch": 0.21359142987136534, "grad_norm": 859.2244873046875, "learning_rate": 9.64471707050268e-06, "loss": 126.6968, "step": 25820 }, { "epoch": 0.2136741531207346, "grad_norm": 1234.9310302734375, "learning_rate": 9.644194737185058e-06, "loss": 139.6012, "step": 25830 }, { "epoch": 0.21375687637010382, "grad_norm": 1804.944091796875, "learning_rate": 9.643672034350177e-06, "loss": 150.6442, "step": 25840 }, { "epoch": 0.21383959961947305, "grad_norm": 1387.7882080078125, "learning_rate": 9.643148962039622e-06, "loss": 93.8409, "step": 25850 }, { "epoch": 0.2139223228688423, "grad_norm": 1455.9395751953125, "learning_rate": 9.642625520295014e-06, "loss": 174.9808, "step": 25860 }, { "epoch": 0.21400504611821153, "grad_norm": 1184.424072265625, "learning_rate": 9.642101709158001e-06, "loss": 129.4004, "step": 25870 }, { "epoch": 0.21408776936758075, "grad_norm": 1059.5274658203125, "learning_rate": 9.641577528670257e-06, "loss": 129.7015, "step": 25880 }, { "epoch": 0.21417049261694998, "grad_norm": 753.4532470703125, "learning_rate": 9.641052978873494e-06, "loss": 109.8265, "step": 25890 }, { "epoch": 0.21425321586631924, "grad_norm": 676.7515258789062, "learning_rate": 9.640528059809442e-06, "loss": 74.3417, "step": 25900 }, { "epoch": 0.21433593911568846, "grad_norm": 2531.762939453125, "learning_rate": 9.640002771519872e-06, "loss": 133.8275, "step": 25910 }, { "epoch": 0.2144186623650577, "grad_norm": 1403.41943359375, "learning_rate": 9.639477114046575e-06, "loss": 164.8685, "step": 25920 }, { "epoch": 0.21450138561442694, "grad_norm": 922.8995971679688, "learning_rate": 9.638951087431376e-06, "loss": 114.9469, "step": 25930 }, { "epoch": 0.21458410886379617, "grad_norm": 1005.201416015625, "learning_rate": 9.638424691716129e-06, "loss": 144.4065, "step": 25940 }, { "epoch": 0.2146668321131654, "grad_norm": 917.942626953125, "learning_rate": 9.637897926942716e-06, "loss": 140.8008, "step": 25950 }, { "epoch": 0.21474955536253465, "grad_norm": 1009.6735229492188, "learning_rate": 9.637370793153051e-06, "loss": 132.449, "step": 25960 }, { "epoch": 0.21483227861190388, "grad_norm": 904.17529296875, "learning_rate": 9.636843290389076e-06, "loss": 114.3653, "step": 25970 }, { "epoch": 0.2149150018612731, "grad_norm": 1091.709716796875, "learning_rate": 9.636315418692759e-06, "loss": 108.0948, "step": 25980 }, { "epoch": 0.21499772511064236, "grad_norm": 2188.771240234375, "learning_rate": 9.635787178106102e-06, "loss": 107.5951, "step": 25990 }, { "epoch": 0.21508044836001158, "grad_norm": 1075.4591064453125, "learning_rate": 9.635258568671135e-06, "loss": 137.4553, "step": 26000 }, { "epoch": 0.2151631716093808, "grad_norm": 916.110107421875, "learning_rate": 9.634729590429917e-06, "loss": 123.9193, "step": 26010 }, { "epoch": 0.21524589485875006, "grad_norm": 1345.30615234375, "learning_rate": 9.634200243424535e-06, "loss": 139.9196, "step": 26020 }, { "epoch": 0.2153286181081193, "grad_norm": 1103.3697509765625, "learning_rate": 9.633670527697108e-06, "loss": 130.9413, "step": 26030 }, { "epoch": 0.21541134135748852, "grad_norm": 1355.6485595703125, "learning_rate": 9.633140443289784e-06, "loss": 190.8187, "step": 26040 }, { "epoch": 0.21549406460685774, "grad_norm": 575.96142578125, "learning_rate": 9.632609990244737e-06, "loss": 103.7051, "step": 26050 }, { "epoch": 0.215576787856227, "grad_norm": 779.9686889648438, "learning_rate": 9.632079168604175e-06, "loss": 109.2936, "step": 26060 }, { "epoch": 0.21565951110559622, "grad_norm": 1010.6686401367188, "learning_rate": 9.63154797841033e-06, "loss": 159.4392, "step": 26070 }, { "epoch": 0.21574223435496545, "grad_norm": 826.314208984375, "learning_rate": 9.63101641970547e-06, "loss": 129.3246, "step": 26080 }, { "epoch": 0.2158249576043347, "grad_norm": 898.4963989257812, "learning_rate": 9.630484492531886e-06, "loss": 129.947, "step": 26090 }, { "epoch": 0.21590768085370393, "grad_norm": 843.03857421875, "learning_rate": 9.629952196931902e-06, "loss": 113.0155, "step": 26100 }, { "epoch": 0.21599040410307316, "grad_norm": 660.1622314453125, "learning_rate": 9.629419532947872e-06, "loss": 156.4598, "step": 26110 }, { "epoch": 0.2160731273524424, "grad_norm": 1190.5133056640625, "learning_rate": 9.628886500622174e-06, "loss": 128.8638, "step": 26120 }, { "epoch": 0.21615585060181164, "grad_norm": 924.6058959960938, "learning_rate": 9.62835309999722e-06, "loss": 99.8876, "step": 26130 }, { "epoch": 0.21623857385118087, "grad_norm": 1131.3111572265625, "learning_rate": 9.627819331115453e-06, "loss": 126.6344, "step": 26140 }, { "epoch": 0.21632129710055012, "grad_norm": 784.7423706054688, "learning_rate": 9.627285194019342e-06, "loss": 102.5163, "step": 26150 }, { "epoch": 0.21640402034991935, "grad_norm": 1271.62890625, "learning_rate": 9.626750688751382e-06, "loss": 115.6172, "step": 26160 }, { "epoch": 0.21648674359928857, "grad_norm": 1133.4263916015625, "learning_rate": 9.626215815354104e-06, "loss": 98.0378, "step": 26170 }, { "epoch": 0.21656946684865783, "grad_norm": 822.8490600585938, "learning_rate": 9.625680573870067e-06, "loss": 123.6515, "step": 26180 }, { "epoch": 0.21665219009802705, "grad_norm": 796.753662109375, "learning_rate": 9.625144964341853e-06, "loss": 127.32, "step": 26190 }, { "epoch": 0.21673491334739628, "grad_norm": 976.165771484375, "learning_rate": 9.624608986812082e-06, "loss": 113.2206, "step": 26200 }, { "epoch": 0.21681763659676553, "grad_norm": 1549.4820556640625, "learning_rate": 9.624072641323398e-06, "loss": 121.0571, "step": 26210 }, { "epoch": 0.21690035984613476, "grad_norm": 1164.861328125, "learning_rate": 9.623535927918474e-06, "loss": 168.508, "step": 26220 }, { "epoch": 0.216983083095504, "grad_norm": 1254.7081298828125, "learning_rate": 9.622998846640018e-06, "loss": 114.7848, "step": 26230 }, { "epoch": 0.2170658063448732, "grad_norm": 744.2569580078125, "learning_rate": 9.62246139753076e-06, "loss": 125.7201, "step": 26240 }, { "epoch": 0.21714852959424247, "grad_norm": 1193.7801513671875, "learning_rate": 9.621923580633462e-06, "loss": 109.2309, "step": 26250 }, { "epoch": 0.2172312528436117, "grad_norm": 933.6427612304688, "learning_rate": 9.621385395990915e-06, "loss": 137.9964, "step": 26260 }, { "epoch": 0.21731397609298092, "grad_norm": 1021.1292724609375, "learning_rate": 9.620846843645944e-06, "loss": 105.2249, "step": 26270 }, { "epoch": 0.21739669934235017, "grad_norm": 1435.4403076171875, "learning_rate": 9.620307923641395e-06, "loss": 120.3464, "step": 26280 }, { "epoch": 0.2174794225917194, "grad_norm": 1647.5338134765625, "learning_rate": 9.61976863602015e-06, "loss": 115.3988, "step": 26290 }, { "epoch": 0.21756214584108863, "grad_norm": 729.9489135742188, "learning_rate": 9.619228980825114e-06, "loss": 171.7545, "step": 26300 }, { "epoch": 0.21764486909045788, "grad_norm": 1271.6209716796875, "learning_rate": 9.61868895809923e-06, "loss": 120.3744, "step": 26310 }, { "epoch": 0.2177275923398271, "grad_norm": 844.6718139648438, "learning_rate": 9.618148567885462e-06, "loss": 142.1199, "step": 26320 }, { "epoch": 0.21781031558919633, "grad_norm": 837.2586669921875, "learning_rate": 9.617607810226806e-06, "loss": 154.3331, "step": 26330 }, { "epoch": 0.2178930388385656, "grad_norm": 1400.4326171875, "learning_rate": 9.61706668516629e-06, "loss": 124.3974, "step": 26340 }, { "epoch": 0.21797576208793482, "grad_norm": 1115.8116455078125, "learning_rate": 9.616525192746965e-06, "loss": 105.8458, "step": 26350 }, { "epoch": 0.21805848533730404, "grad_norm": 1421.916748046875, "learning_rate": 9.61598333301192e-06, "loss": 156.9738, "step": 26360 }, { "epoch": 0.2181412085866733, "grad_norm": 798.7007446289062, "learning_rate": 9.615441106004264e-06, "loss": 111.3176, "step": 26370 }, { "epoch": 0.21822393183604252, "grad_norm": 1311.8187255859375, "learning_rate": 9.614898511767142e-06, "loss": 112.8957, "step": 26380 }, { "epoch": 0.21830665508541175, "grad_norm": 1265.518798828125, "learning_rate": 9.614355550343724e-06, "loss": 97.6749, "step": 26390 }, { "epoch": 0.218389378334781, "grad_norm": 1181.8995361328125, "learning_rate": 9.613812221777212e-06, "loss": 126.788, "step": 26400 }, { "epoch": 0.21847210158415023, "grad_norm": 921.5441284179688, "learning_rate": 9.613268526110838e-06, "loss": 126.4273, "step": 26410 }, { "epoch": 0.21855482483351946, "grad_norm": 2307.983642578125, "learning_rate": 9.612724463387857e-06, "loss": 124.0576, "step": 26420 }, { "epoch": 0.21863754808288868, "grad_norm": 1391.1617431640625, "learning_rate": 9.612180033651561e-06, "loss": 122.5418, "step": 26430 }, { "epoch": 0.21872027133225794, "grad_norm": 883.1130981445312, "learning_rate": 9.611635236945267e-06, "loss": 105.9835, "step": 26440 }, { "epoch": 0.21880299458162716, "grad_norm": 1083.864990234375, "learning_rate": 9.61109007331232e-06, "loss": 124.8244, "step": 26450 }, { "epoch": 0.2188857178309964, "grad_norm": 1159.39697265625, "learning_rate": 9.610544542796101e-06, "loss": 120.3728, "step": 26460 }, { "epoch": 0.21896844108036564, "grad_norm": 1433.8590087890625, "learning_rate": 9.609998645440011e-06, "loss": 132.6535, "step": 26470 }, { "epoch": 0.21905116432973487, "grad_norm": 1183.7196044921875, "learning_rate": 9.609452381287486e-06, "loss": 133.8586, "step": 26480 }, { "epoch": 0.2191338875791041, "grad_norm": 859.5003662109375, "learning_rate": 9.608905750381988e-06, "loss": 101.3727, "step": 26490 }, { "epoch": 0.21921661082847335, "grad_norm": 800.9340209960938, "learning_rate": 9.608358752767013e-06, "loss": 142.6461, "step": 26500 }, { "epoch": 0.21929933407784258, "grad_norm": 1471.0047607421875, "learning_rate": 9.60781138848608e-06, "loss": 110.0766, "step": 26510 }, { "epoch": 0.2193820573272118, "grad_norm": 954.5092163085938, "learning_rate": 9.607263657582744e-06, "loss": 110.3453, "step": 26520 }, { "epoch": 0.21946478057658106, "grad_norm": 847.3948974609375, "learning_rate": 9.60671556010058e-06, "loss": 119.9163, "step": 26530 }, { "epoch": 0.21954750382595029, "grad_norm": 559.4891357421875, "learning_rate": 9.606167096083205e-06, "loss": 106.7365, "step": 26540 }, { "epoch": 0.2196302270753195, "grad_norm": 1348.5836181640625, "learning_rate": 9.60561826557425e-06, "loss": 148.4306, "step": 26550 }, { "epoch": 0.21971295032468877, "grad_norm": 912.4396362304688, "learning_rate": 9.60506906861739e-06, "loss": 116.1079, "step": 26560 }, { "epoch": 0.219795673574058, "grad_norm": 939.4193115234375, "learning_rate": 9.604519505256316e-06, "loss": 127.2096, "step": 26570 }, { "epoch": 0.21987839682342722, "grad_norm": 807.13623046875, "learning_rate": 9.603969575534757e-06, "loss": 102.2194, "step": 26580 }, { "epoch": 0.21996112007279647, "grad_norm": 695.100830078125, "learning_rate": 9.60341927949647e-06, "loss": 113.9714, "step": 26590 }, { "epoch": 0.2200438433221657, "grad_norm": 456.3763122558594, "learning_rate": 9.602868617185238e-06, "loss": 105.249, "step": 26600 }, { "epoch": 0.22012656657153493, "grad_norm": 1128.632568359375, "learning_rate": 9.602317588644872e-06, "loss": 104.7491, "step": 26610 }, { "epoch": 0.22020928982090415, "grad_norm": 948.9160766601562, "learning_rate": 9.601766193919217e-06, "loss": 104.4173, "step": 26620 }, { "epoch": 0.2202920130702734, "grad_norm": 725.5731201171875, "learning_rate": 9.601214433052147e-06, "loss": 103.2853, "step": 26630 }, { "epoch": 0.22037473631964263, "grad_norm": 979.1326293945312, "learning_rate": 9.600662306087562e-06, "loss": 122.0349, "step": 26640 }, { "epoch": 0.22045745956901186, "grad_norm": 772.8959350585938, "learning_rate": 9.600109813069389e-06, "loss": 118.9232, "step": 26650 }, { "epoch": 0.22054018281838111, "grad_norm": 879.559814453125, "learning_rate": 9.599556954041591e-06, "loss": 154.7716, "step": 26660 }, { "epoch": 0.22062290606775034, "grad_norm": 852.1553344726562, "learning_rate": 9.599003729048157e-06, "loss": 115.1464, "step": 26670 }, { "epoch": 0.22070562931711957, "grad_norm": 1091.1187744140625, "learning_rate": 9.598450138133101e-06, "loss": 124.5991, "step": 26680 }, { "epoch": 0.22078835256648882, "grad_norm": 1003.1347045898438, "learning_rate": 9.597896181340471e-06, "loss": 133.6112, "step": 26690 }, { "epoch": 0.22087107581585805, "grad_norm": 1310.1240234375, "learning_rate": 9.597341858714344e-06, "loss": 120.8151, "step": 26700 }, { "epoch": 0.22095379906522727, "grad_norm": 973.50439453125, "learning_rate": 9.596787170298824e-06, "loss": 132.3573, "step": 26710 }, { "epoch": 0.22103652231459653, "grad_norm": 838.1239013671875, "learning_rate": 9.596232116138047e-06, "loss": 135.2263, "step": 26720 }, { "epoch": 0.22111924556396576, "grad_norm": 776.5439453125, "learning_rate": 9.595676696276173e-06, "loss": 140.2891, "step": 26730 }, { "epoch": 0.22120196881333498, "grad_norm": 898.1226806640625, "learning_rate": 9.595120910757396e-06, "loss": 156.5662, "step": 26740 }, { "epoch": 0.22128469206270424, "grad_norm": 988.5482788085938, "learning_rate": 9.594564759625936e-06, "loss": 119.9368, "step": 26750 }, { "epoch": 0.22136741531207346, "grad_norm": 2739.469482421875, "learning_rate": 9.594008242926046e-06, "loss": 117.0178, "step": 26760 }, { "epoch": 0.2214501385614427, "grad_norm": 1172.0687255859375, "learning_rate": 9.593451360702003e-06, "loss": 109.8631, "step": 26770 }, { "epoch": 0.22153286181081192, "grad_norm": 1231.6884765625, "learning_rate": 9.592894112998115e-06, "loss": 123.6822, "step": 26780 }, { "epoch": 0.22161558506018117, "grad_norm": 2576.349609375, "learning_rate": 9.592336499858721e-06, "loss": 134.2115, "step": 26790 }, { "epoch": 0.2216983083095504, "grad_norm": 851.02197265625, "learning_rate": 9.59177852132819e-06, "loss": 130.6942, "step": 26800 }, { "epoch": 0.22178103155891962, "grad_norm": 1379.3516845703125, "learning_rate": 9.591220177450912e-06, "loss": 148.0982, "step": 26810 }, { "epoch": 0.22186375480828888, "grad_norm": 1033.928955078125, "learning_rate": 9.590661468271319e-06, "loss": 99.2162, "step": 26820 }, { "epoch": 0.2219464780576581, "grad_norm": 987.3099365234375, "learning_rate": 9.59010239383386e-06, "loss": 138.9152, "step": 26830 }, { "epoch": 0.22202920130702733, "grad_norm": 687.860595703125, "learning_rate": 9.589542954183018e-06, "loss": 112.7026, "step": 26840 }, { "epoch": 0.22211192455639658, "grad_norm": 732.00537109375, "learning_rate": 9.588983149363307e-06, "loss": 123.2144, "step": 26850 }, { "epoch": 0.2221946478057658, "grad_norm": 1079.510009765625, "learning_rate": 9.588422979419267e-06, "loss": 87.7841, "step": 26860 }, { "epoch": 0.22227737105513504, "grad_norm": 1468.6038818359375, "learning_rate": 9.587862444395471e-06, "loss": 136.3903, "step": 26870 }, { "epoch": 0.2223600943045043, "grad_norm": 889.5414428710938, "learning_rate": 9.587301544336513e-06, "loss": 115.5707, "step": 26880 }, { "epoch": 0.22244281755387352, "grad_norm": 590.138916015625, "learning_rate": 9.586740279287024e-06, "loss": 117.9152, "step": 26890 }, { "epoch": 0.22252554080324274, "grad_norm": 1332.5377197265625, "learning_rate": 9.586178649291664e-06, "loss": 131.7125, "step": 26900 }, { "epoch": 0.222608264052612, "grad_norm": 1022.7705688476562, "learning_rate": 9.585616654395113e-06, "loss": 115.9927, "step": 26910 }, { "epoch": 0.22269098730198122, "grad_norm": 847.87255859375, "learning_rate": 9.585054294642093e-06, "loss": 169.3321, "step": 26920 }, { "epoch": 0.22277371055135045, "grad_norm": 1865.4990234375, "learning_rate": 9.584491570077343e-06, "loss": 128.1739, "step": 26930 }, { "epoch": 0.2228564338007197, "grad_norm": 865.5852661132812, "learning_rate": 9.58392848074564e-06, "loss": 119.8757, "step": 26940 }, { "epoch": 0.22293915705008893, "grad_norm": 1116.7584228515625, "learning_rate": 9.583365026691785e-06, "loss": 111.9066, "step": 26950 }, { "epoch": 0.22302188029945816, "grad_norm": 1089.7393798828125, "learning_rate": 9.58280120796061e-06, "loss": 123.0073, "step": 26960 }, { "epoch": 0.22310460354882738, "grad_norm": 854.9454956054688, "learning_rate": 9.582237024596974e-06, "loss": 131.7255, "step": 26970 }, { "epoch": 0.22318732679819664, "grad_norm": 840.7254028320312, "learning_rate": 9.581672476645768e-06, "loss": 134.3853, "step": 26980 }, { "epoch": 0.22327005004756587, "grad_norm": 1240.8424072265625, "learning_rate": 9.58110756415191e-06, "loss": 127.8951, "step": 26990 }, { "epoch": 0.2233527732969351, "grad_norm": 1180.210205078125, "learning_rate": 9.580542287160348e-06, "loss": 127.6457, "step": 27000 }, { "epoch": 0.22343549654630435, "grad_norm": 1015.6724853515625, "learning_rate": 9.579976645716058e-06, "loss": 130.7046, "step": 27010 }, { "epoch": 0.22351821979567357, "grad_norm": 1194.459228515625, "learning_rate": 9.579410639864046e-06, "loss": 133.5198, "step": 27020 }, { "epoch": 0.2236009430450428, "grad_norm": 1310.887451171875, "learning_rate": 9.578844269649345e-06, "loss": 123.0892, "step": 27030 }, { "epoch": 0.22368366629441205, "grad_norm": 764.6407470703125, "learning_rate": 9.578277535117022e-06, "loss": 118.5598, "step": 27040 }, { "epoch": 0.22376638954378128, "grad_norm": 1107.6348876953125, "learning_rate": 9.577710436312164e-06, "loss": 113.8774, "step": 27050 }, { "epoch": 0.2238491127931505, "grad_norm": 1522.5843505859375, "learning_rate": 9.577142973279896e-06, "loss": 137.2552, "step": 27060 }, { "epoch": 0.22393183604251976, "grad_norm": 984.351318359375, "learning_rate": 9.576575146065369e-06, "loss": 128.6748, "step": 27070 }, { "epoch": 0.224014559291889, "grad_norm": 642.086181640625, "learning_rate": 9.576006954713762e-06, "loss": 117.2645, "step": 27080 }, { "epoch": 0.2240972825412582, "grad_norm": 1214.22998046875, "learning_rate": 9.57543839927028e-06, "loss": 128.8348, "step": 27090 }, { "epoch": 0.22418000579062747, "grad_norm": 1050.501953125, "learning_rate": 9.574869479780165e-06, "loss": 147.3516, "step": 27100 }, { "epoch": 0.2242627290399967, "grad_norm": 1185.0849609375, "learning_rate": 9.57430019628868e-06, "loss": 147.9033, "step": 27110 }, { "epoch": 0.22434545228936592, "grad_norm": 1115.831298828125, "learning_rate": 9.573730548841122e-06, "loss": 111.629, "step": 27120 }, { "epoch": 0.22442817553873518, "grad_norm": 1058.506103515625, "learning_rate": 9.573160537482816e-06, "loss": 135.1445, "step": 27130 }, { "epoch": 0.2245108987881044, "grad_norm": 1066.4493408203125, "learning_rate": 9.572590162259112e-06, "loss": 171.7336, "step": 27140 }, { "epoch": 0.22459362203747363, "grad_norm": 735.5531005859375, "learning_rate": 9.572019423215395e-06, "loss": 110.0656, "step": 27150 }, { "epoch": 0.22467634528684285, "grad_norm": 1075.277587890625, "learning_rate": 9.571448320397076e-06, "loss": 102.259, "step": 27160 }, { "epoch": 0.2247590685362121, "grad_norm": 677.2155151367188, "learning_rate": 9.570876853849593e-06, "loss": 128.2644, "step": 27170 }, { "epoch": 0.22484179178558134, "grad_norm": 993.1212768554688, "learning_rate": 9.570305023618417e-06, "loss": 185.3893, "step": 27180 }, { "epoch": 0.22492451503495056, "grad_norm": 963.5702514648438, "learning_rate": 9.569732829749045e-06, "loss": 125.3146, "step": 27190 }, { "epoch": 0.22500723828431982, "grad_norm": 950.5331420898438, "learning_rate": 9.569160272287003e-06, "loss": 126.4869, "step": 27200 }, { "epoch": 0.22508996153368904, "grad_norm": 820.460205078125, "learning_rate": 9.56858735127785e-06, "loss": 76.8966, "step": 27210 }, { "epoch": 0.22517268478305827, "grad_norm": 572.8544311523438, "learning_rate": 9.568014066767166e-06, "loss": 126.8829, "step": 27220 }, { "epoch": 0.22525540803242752, "grad_norm": 998.837646484375, "learning_rate": 9.567440418800569e-06, "loss": 134.7057, "step": 27230 }, { "epoch": 0.22533813128179675, "grad_norm": 956.578857421875, "learning_rate": 9.566866407423698e-06, "loss": 143.3908, "step": 27240 }, { "epoch": 0.22542085453116598, "grad_norm": 1049.07861328125, "learning_rate": 9.566292032682228e-06, "loss": 137.0985, "step": 27250 }, { "epoch": 0.22550357778053523, "grad_norm": 870.1298217773438, "learning_rate": 9.565717294621856e-06, "loss": 114.6946, "step": 27260 }, { "epoch": 0.22558630102990446, "grad_norm": 784.1261596679688, "learning_rate": 9.565142193288313e-06, "loss": 150.3023, "step": 27270 }, { "epoch": 0.22566902427927368, "grad_norm": 721.9214477539062, "learning_rate": 9.564566728727358e-06, "loss": 92.1085, "step": 27280 }, { "epoch": 0.22575174752864294, "grad_norm": 819.4923706054688, "learning_rate": 9.563990900984775e-06, "loss": 103.506, "step": 27290 }, { "epoch": 0.22583447077801216, "grad_norm": 2060.950927734375, "learning_rate": 9.563414710106382e-06, "loss": 212.3363, "step": 27300 }, { "epoch": 0.2259171940273814, "grad_norm": 921.5272216796875, "learning_rate": 9.562838156138025e-06, "loss": 142.4072, "step": 27310 }, { "epoch": 0.22599991727675062, "grad_norm": 935.8705444335938, "learning_rate": 9.562261239125575e-06, "loss": 122.9581, "step": 27320 }, { "epoch": 0.22608264052611987, "grad_norm": 925.080810546875, "learning_rate": 9.561683959114938e-06, "loss": 127.3665, "step": 27330 }, { "epoch": 0.2261653637754891, "grad_norm": 1195.8099365234375, "learning_rate": 9.561106316152043e-06, "loss": 133.1869, "step": 27340 }, { "epoch": 0.22624808702485832, "grad_norm": 784.7255249023438, "learning_rate": 9.56052831028285e-06, "loss": 139.8995, "step": 27350 }, { "epoch": 0.22633081027422758, "grad_norm": 1093.8494873046875, "learning_rate": 9.559949941553351e-06, "loss": 119.8402, "step": 27360 }, { "epoch": 0.2264135335235968, "grad_norm": 973.4978637695312, "learning_rate": 9.559371210009562e-06, "loss": 124.6592, "step": 27370 }, { "epoch": 0.22649625677296603, "grad_norm": 869.2335815429688, "learning_rate": 9.55879211569753e-06, "loss": 94.1615, "step": 27380 }, { "epoch": 0.22657898002233529, "grad_norm": 1302.50244140625, "learning_rate": 9.55821265866333e-06, "loss": 123.9871, "step": 27390 }, { "epoch": 0.2266617032717045, "grad_norm": 1908.8446044921875, "learning_rate": 9.55763283895307e-06, "loss": 140.2897, "step": 27400 }, { "epoch": 0.22674442652107374, "grad_norm": 553.2281494140625, "learning_rate": 9.557052656612882e-06, "loss": 98.0197, "step": 27410 }, { "epoch": 0.226827149770443, "grad_norm": 693.7083740234375, "learning_rate": 9.556472111688928e-06, "loss": 121.6574, "step": 27420 }, { "epoch": 0.22690987301981222, "grad_norm": 1643.4599609375, "learning_rate": 9.555891204227399e-06, "loss": 107.1005, "step": 27430 }, { "epoch": 0.22699259626918145, "grad_norm": 792.270263671875, "learning_rate": 9.555309934274515e-06, "loss": 177.7701, "step": 27440 }, { "epoch": 0.2270753195185507, "grad_norm": 456.06939697265625, "learning_rate": 9.554728301876525e-06, "loss": 137.5959, "step": 27450 }, { "epoch": 0.22715804276791993, "grad_norm": 966.9349365234375, "learning_rate": 9.554146307079711e-06, "loss": 99.2951, "step": 27460 }, { "epoch": 0.22724076601728915, "grad_norm": 1171.660400390625, "learning_rate": 9.553563949930374e-06, "loss": 126.8152, "step": 27470 }, { "epoch": 0.2273234892666584, "grad_norm": 631.3641967773438, "learning_rate": 9.552981230474849e-06, "loss": 101.0075, "step": 27480 }, { "epoch": 0.22740621251602763, "grad_norm": 1808.03466796875, "learning_rate": 9.552398148759506e-06, "loss": 105.2634, "step": 27490 }, { "epoch": 0.22748893576539686, "grad_norm": 1841.3206787109375, "learning_rate": 9.551814704830734e-06, "loss": 124.4455, "step": 27500 }, { "epoch": 0.2275716590147661, "grad_norm": 2790.470947265625, "learning_rate": 9.551230898734955e-06, "loss": 143.8618, "step": 27510 }, { "epoch": 0.22765438226413534, "grad_norm": 916.5533447265625, "learning_rate": 9.550646730518623e-06, "loss": 105.8261, "step": 27520 }, { "epoch": 0.22773710551350457, "grad_norm": 1437.979248046875, "learning_rate": 9.550062200228214e-06, "loss": 101.92, "step": 27530 }, { "epoch": 0.2278198287628738, "grad_norm": 1014.4559326171875, "learning_rate": 9.549477307910238e-06, "loss": 126.4266, "step": 27540 }, { "epoch": 0.22790255201224305, "grad_norm": 1683.48779296875, "learning_rate": 9.548892053611232e-06, "loss": 133.186, "step": 27550 }, { "epoch": 0.22798527526161227, "grad_norm": 864.87744140625, "learning_rate": 9.54830643737776e-06, "loss": 137.8783, "step": 27560 }, { "epoch": 0.2280679985109815, "grad_norm": 1702.0152587890625, "learning_rate": 9.54772045925642e-06, "loss": 125.6688, "step": 27570 }, { "epoch": 0.22815072176035076, "grad_norm": 1634.5037841796875, "learning_rate": 9.547134119293835e-06, "loss": 126.7895, "step": 27580 }, { "epoch": 0.22823344500971998, "grad_norm": 1126.988525390625, "learning_rate": 9.546547417536656e-06, "loss": 117.5014, "step": 27590 }, { "epoch": 0.2283161682590892, "grad_norm": 774.8274536132812, "learning_rate": 9.545960354031564e-06, "loss": 110.8326, "step": 27600 }, { "epoch": 0.22839889150845846, "grad_norm": 920.1649780273438, "learning_rate": 9.545372928825271e-06, "loss": 85.081, "step": 27610 }, { "epoch": 0.2284816147578277, "grad_norm": 550.8583374023438, "learning_rate": 9.544785141964514e-06, "loss": 82.1225, "step": 27620 }, { "epoch": 0.22856433800719692, "grad_norm": 2031.975830078125, "learning_rate": 9.544196993496062e-06, "loss": 113.8713, "step": 27630 }, { "epoch": 0.22864706125656617, "grad_norm": 660.8304443359375, "learning_rate": 9.54360848346671e-06, "loss": 111.5653, "step": 27640 }, { "epoch": 0.2287297845059354, "grad_norm": 1158.63916015625, "learning_rate": 9.543019611923283e-06, "loss": 105.64, "step": 27650 }, { "epoch": 0.22881250775530462, "grad_norm": 1066.19970703125, "learning_rate": 9.542430378912634e-06, "loss": 112.1879, "step": 27660 }, { "epoch": 0.22889523100467388, "grad_norm": 1097.3199462890625, "learning_rate": 9.541840784481648e-06, "loss": 112.1237, "step": 27670 }, { "epoch": 0.2289779542540431, "grad_norm": 1081.3756103515625, "learning_rate": 9.541250828677235e-06, "loss": 144.3419, "step": 27680 }, { "epoch": 0.22906067750341233, "grad_norm": 899.7421264648438, "learning_rate": 9.540660511546335e-06, "loss": 129.8028, "step": 27690 }, { "epoch": 0.22914340075278156, "grad_norm": 835.4717407226562, "learning_rate": 9.540069833135917e-06, "loss": 131.3196, "step": 27700 }, { "epoch": 0.2292261240021508, "grad_norm": 603.4605712890625, "learning_rate": 9.539478793492978e-06, "loss": 94.5269, "step": 27710 }, { "epoch": 0.22930884725152004, "grad_norm": 827.4793701171875, "learning_rate": 9.538887392664544e-06, "loss": 97.6406, "step": 27720 }, { "epoch": 0.22939157050088926, "grad_norm": 947.7173461914062, "learning_rate": 9.53829563069767e-06, "loss": 134.4241, "step": 27730 }, { "epoch": 0.22947429375025852, "grad_norm": 1094.2747802734375, "learning_rate": 9.537703507639444e-06, "loss": 102.8811, "step": 27740 }, { "epoch": 0.22955701699962774, "grad_norm": 1054.0640869140625, "learning_rate": 9.537111023536973e-06, "loss": 110.9093, "step": 27750 }, { "epoch": 0.22963974024899697, "grad_norm": 902.435302734375, "learning_rate": 9.536518178437402e-06, "loss": 120.4104, "step": 27760 }, { "epoch": 0.22972246349836623, "grad_norm": 1348.26025390625, "learning_rate": 9.535924972387898e-06, "loss": 109.3034, "step": 27770 }, { "epoch": 0.22980518674773545, "grad_norm": 857.2482299804688, "learning_rate": 9.535331405435662e-06, "loss": 125.6188, "step": 27780 }, { "epoch": 0.22988790999710468, "grad_norm": 1263.4981689453125, "learning_rate": 9.534737477627918e-06, "loss": 152.1994, "step": 27790 }, { "epoch": 0.22997063324647393, "grad_norm": 1043.9830322265625, "learning_rate": 9.534143189011928e-06, "loss": 139.1974, "step": 27800 }, { "epoch": 0.23005335649584316, "grad_norm": 1181.6470947265625, "learning_rate": 9.533548539634971e-06, "loss": 124.807, "step": 27810 }, { "epoch": 0.23013607974521239, "grad_norm": 1061.00244140625, "learning_rate": 9.532953529544365e-06, "loss": 127.3019, "step": 27820 }, { "epoch": 0.23021880299458164, "grad_norm": 2072.89404296875, "learning_rate": 9.532358158787446e-06, "loss": 112.2069, "step": 27830 }, { "epoch": 0.23030152624395087, "grad_norm": 726.18505859375, "learning_rate": 9.531762427411592e-06, "loss": 126.4197, "step": 27840 }, { "epoch": 0.2303842494933201, "grad_norm": 1991.278564453125, "learning_rate": 9.531166335464198e-06, "loss": 257.9386, "step": 27850 }, { "epoch": 0.23046697274268935, "grad_norm": 648.2035522460938, "learning_rate": 9.530569882992698e-06, "loss": 153.0686, "step": 27860 }, { "epoch": 0.23054969599205857, "grad_norm": 913.1694946289062, "learning_rate": 9.52997307004454e-06, "loss": 122.5074, "step": 27870 }, { "epoch": 0.2306324192414278, "grad_norm": 1065.9097900390625, "learning_rate": 9.529375896667218e-06, "loss": 95.8606, "step": 27880 }, { "epoch": 0.23071514249079703, "grad_norm": 2741.90234375, "learning_rate": 9.528778362908241e-06, "loss": 141.1961, "step": 27890 }, { "epoch": 0.23079786574016628, "grad_norm": 1098.0302734375, "learning_rate": 9.528180468815155e-06, "loss": 103.5171, "step": 27900 }, { "epoch": 0.2308805889895355, "grad_norm": 899.1497192382812, "learning_rate": 9.527582214435531e-06, "loss": 143.412, "step": 27910 }, { "epoch": 0.23096331223890473, "grad_norm": 311.1416320800781, "learning_rate": 9.526983599816968e-06, "loss": 91.3562, "step": 27920 }, { "epoch": 0.231046035488274, "grad_norm": 1649.770263671875, "learning_rate": 9.526384625007096e-06, "loss": 124.463, "step": 27930 }, { "epoch": 0.23112875873764321, "grad_norm": 804.8977661132812, "learning_rate": 9.525785290053573e-06, "loss": 138.7314, "step": 27940 }, { "epoch": 0.23121148198701244, "grad_norm": 859.3971557617188, "learning_rate": 9.525185595004085e-06, "loss": 85.416, "step": 27950 }, { "epoch": 0.2312942052363817, "grad_norm": 901.3175659179688, "learning_rate": 9.524585539906345e-06, "loss": 93.4797, "step": 27960 }, { "epoch": 0.23137692848575092, "grad_norm": 1204.8818359375, "learning_rate": 9.523985124808102e-06, "loss": 140.1107, "step": 27970 }, { "epoch": 0.23145965173512015, "grad_norm": 1186.2174072265625, "learning_rate": 9.523384349757123e-06, "loss": 111.8204, "step": 27980 }, { "epoch": 0.2315423749844894, "grad_norm": 1178.4058837890625, "learning_rate": 9.522783214801213e-06, "loss": 164.7376, "step": 27990 }, { "epoch": 0.23162509823385863, "grad_norm": 864.9526977539062, "learning_rate": 9.522181719988196e-06, "loss": 112.4256, "step": 28000 }, { "epoch": 0.23170782148322785, "grad_norm": 1473.5499267578125, "learning_rate": 9.521579865365935e-06, "loss": 117.2831, "step": 28010 }, { "epoch": 0.2317905447325971, "grad_norm": 525.5335083007812, "learning_rate": 9.520977650982316e-06, "loss": 150.6173, "step": 28020 }, { "epoch": 0.23187326798196634, "grad_norm": 886.9190063476562, "learning_rate": 9.520375076885253e-06, "loss": 146.7385, "step": 28030 }, { "epoch": 0.23195599123133556, "grad_norm": 937.6173095703125, "learning_rate": 9.519772143122691e-06, "loss": 104.6981, "step": 28040 }, { "epoch": 0.2320387144807048, "grad_norm": 834.8984375, "learning_rate": 9.519168849742603e-06, "loss": 96.541, "step": 28050 }, { "epoch": 0.23212143773007404, "grad_norm": 863.10986328125, "learning_rate": 9.51856519679299e-06, "loss": 93.61, "step": 28060 }, { "epoch": 0.23220416097944327, "grad_norm": 683.4520263671875, "learning_rate": 9.517961184321882e-06, "loss": 131.8342, "step": 28070 }, { "epoch": 0.2322868842288125, "grad_norm": 2610.283203125, "learning_rate": 9.517356812377336e-06, "loss": 189.3643, "step": 28080 }, { "epoch": 0.23236960747818175, "grad_norm": 993.5272827148438, "learning_rate": 9.516752081007441e-06, "loss": 133.2566, "step": 28090 }, { "epoch": 0.23245233072755098, "grad_norm": 1212.4417724609375, "learning_rate": 9.51614699026031e-06, "loss": 129.3865, "step": 28100 }, { "epoch": 0.2325350539769202, "grad_norm": 812.443359375, "learning_rate": 9.515541540184093e-06, "loss": 95.0065, "step": 28110 }, { "epoch": 0.23261777722628946, "grad_norm": 1204.5474853515625, "learning_rate": 9.514935730826957e-06, "loss": 145.4519, "step": 28120 }, { "epoch": 0.23270050047565868, "grad_norm": 1555.590576171875, "learning_rate": 9.514329562237107e-06, "loss": 136.7933, "step": 28130 }, { "epoch": 0.2327832237250279, "grad_norm": 870.7056274414062, "learning_rate": 9.51372303446277e-06, "loss": 122.1039, "step": 28140 }, { "epoch": 0.23286594697439716, "grad_norm": 1071.2955322265625, "learning_rate": 9.513116147552207e-06, "loss": 102.6043, "step": 28150 }, { "epoch": 0.2329486702237664, "grad_norm": 797.454833984375, "learning_rate": 9.512508901553703e-06, "loss": 140.6481, "step": 28160 }, { "epoch": 0.23303139347313562, "grad_norm": 585.2443237304688, "learning_rate": 9.511901296515578e-06, "loss": 113.4713, "step": 28170 }, { "epoch": 0.23311411672250487, "grad_norm": 870.2528076171875, "learning_rate": 9.511293332486172e-06, "loss": 130.058, "step": 28180 }, { "epoch": 0.2331968399718741, "grad_norm": 1487.830322265625, "learning_rate": 9.51068500951386e-06, "loss": 132.6228, "step": 28190 }, { "epoch": 0.23327956322124332, "grad_norm": 3511.711181640625, "learning_rate": 9.510076327647043e-06, "loss": 113.2403, "step": 28200 }, { "epoch": 0.23336228647061258, "grad_norm": 1605.590087890625, "learning_rate": 9.509467286934151e-06, "loss": 150.1105, "step": 28210 }, { "epoch": 0.2334450097199818, "grad_norm": 1058.44189453125, "learning_rate": 9.508857887423644e-06, "loss": 122.3643, "step": 28220 }, { "epoch": 0.23352773296935103, "grad_norm": 820.7523803710938, "learning_rate": 9.508248129164006e-06, "loss": 80.4105, "step": 28230 }, { "epoch": 0.23361045621872026, "grad_norm": 912.558837890625, "learning_rate": 9.507638012203755e-06, "loss": 152.501, "step": 28240 }, { "epoch": 0.2336931794680895, "grad_norm": 1337.6898193359375, "learning_rate": 9.507027536591436e-06, "loss": 149.5806, "step": 28250 }, { "epoch": 0.23377590271745874, "grad_norm": 1526.1043701171875, "learning_rate": 9.506416702375618e-06, "loss": 153.4466, "step": 28260 }, { "epoch": 0.23385862596682797, "grad_norm": 933.4179077148438, "learning_rate": 9.505805509604906e-06, "loss": 106.969, "step": 28270 }, { "epoch": 0.23394134921619722, "grad_norm": 1189.154296875, "learning_rate": 9.505193958327927e-06, "loss": 129.8097, "step": 28280 }, { "epoch": 0.23402407246556645, "grad_norm": 691.6627197265625, "learning_rate": 9.504582048593343e-06, "loss": 99.3678, "step": 28290 }, { "epoch": 0.23410679571493567, "grad_norm": 1236.778076171875, "learning_rate": 9.503969780449838e-06, "loss": 119.0243, "step": 28300 }, { "epoch": 0.23418951896430493, "grad_norm": 1243.570068359375, "learning_rate": 9.503357153946126e-06, "loss": 104.1002, "step": 28310 }, { "epoch": 0.23427224221367415, "grad_norm": 1092.5941162109375, "learning_rate": 9.502744169130955e-06, "loss": 97.2079, "step": 28320 }, { "epoch": 0.23435496546304338, "grad_norm": 1051.38671875, "learning_rate": 9.502130826053095e-06, "loss": 132.0031, "step": 28330 }, { "epoch": 0.23443768871241263, "grad_norm": 1343.4345703125, "learning_rate": 9.501517124761347e-06, "loss": 112.9695, "step": 28340 }, { "epoch": 0.23452041196178186, "grad_norm": 637.9159545898438, "learning_rate": 9.50090306530454e-06, "loss": 85.6707, "step": 28350 }, { "epoch": 0.2346031352111511, "grad_norm": 976.9581909179688, "learning_rate": 9.500288647731533e-06, "loss": 127.3839, "step": 28360 }, { "epoch": 0.23468585846052034, "grad_norm": 1000.0506591796875, "learning_rate": 9.49967387209121e-06, "loss": 168.1098, "step": 28370 }, { "epoch": 0.23476858170988957, "grad_norm": 838.3327026367188, "learning_rate": 9.499058738432492e-06, "loss": 112.4135, "step": 28380 }, { "epoch": 0.2348513049592588, "grad_norm": 708.0794067382812, "learning_rate": 9.498443246804314e-06, "loss": 120.7116, "step": 28390 }, { "epoch": 0.23493402820862805, "grad_norm": 1063.5574951171875, "learning_rate": 9.497827397255655e-06, "loss": 99.907, "step": 28400 }, { "epoch": 0.23501675145799727, "grad_norm": 933.8026123046875, "learning_rate": 9.49721118983551e-06, "loss": 153.6268, "step": 28410 }, { "epoch": 0.2350994747073665, "grad_norm": 836.0083618164062, "learning_rate": 9.49659462459291e-06, "loss": 123.5795, "step": 28420 }, { "epoch": 0.23518219795673573, "grad_norm": 1091.86669921875, "learning_rate": 9.495977701576913e-06, "loss": 141.2293, "step": 28430 }, { "epoch": 0.23526492120610498, "grad_norm": 960.7584838867188, "learning_rate": 9.495360420836603e-06, "loss": 113.6143, "step": 28440 }, { "epoch": 0.2353476444554742, "grad_norm": 1572.40771484375, "learning_rate": 9.494742782421099e-06, "loss": 159.3734, "step": 28450 }, { "epoch": 0.23543036770484344, "grad_norm": 1193.2608642578125, "learning_rate": 9.494124786379535e-06, "loss": 128.3347, "step": 28460 }, { "epoch": 0.2355130909542127, "grad_norm": 1346.21630859375, "learning_rate": 9.49350643276109e-06, "loss": 105.2375, "step": 28470 }, { "epoch": 0.23559581420358192, "grad_norm": 1731.52001953125, "learning_rate": 9.49288772161496e-06, "loss": 134.3214, "step": 28480 }, { "epoch": 0.23567853745295114, "grad_norm": 982.36279296875, "learning_rate": 9.492268652990374e-06, "loss": 120.5295, "step": 28490 }, { "epoch": 0.2357612607023204, "grad_norm": 1203.948486328125, "learning_rate": 9.491649226936586e-06, "loss": 149.1518, "step": 28500 }, { "epoch": 0.23584398395168962, "grad_norm": 1445.0263671875, "learning_rate": 9.491029443502884e-06, "loss": 133.0336, "step": 28510 }, { "epoch": 0.23592670720105885, "grad_norm": 1120.74462890625, "learning_rate": 9.490409302738582e-06, "loss": 104.1838, "step": 28520 }, { "epoch": 0.2360094304504281, "grad_norm": 1055.50830078125, "learning_rate": 9.489788804693017e-06, "loss": 97.1542, "step": 28530 }, { "epoch": 0.23609215369979733, "grad_norm": 645.5867309570312, "learning_rate": 9.489167949415563e-06, "loss": 124.6525, "step": 28540 }, { "epoch": 0.23617487694916656, "grad_norm": 723.2225952148438, "learning_rate": 9.48854673695562e-06, "loss": 117.6365, "step": 28550 }, { "epoch": 0.2362576001985358, "grad_norm": 1657.0540771484375, "learning_rate": 9.48792516736261e-06, "loss": 131.2895, "step": 28560 }, { "epoch": 0.23634032344790504, "grad_norm": 967.8582763671875, "learning_rate": 9.487303240685992e-06, "loss": 100.9019, "step": 28570 }, { "epoch": 0.23642304669727426, "grad_norm": 2011.5921630859375, "learning_rate": 9.48668095697525e-06, "loss": 167.2856, "step": 28580 }, { "epoch": 0.2365057699466435, "grad_norm": 991.98291015625, "learning_rate": 9.486058316279894e-06, "loss": 158.8021, "step": 28590 }, { "epoch": 0.23658849319601274, "grad_norm": 905.6279296875, "learning_rate": 9.485435318649468e-06, "loss": 124.5288, "step": 28600 }, { "epoch": 0.23667121644538197, "grad_norm": 793.5623779296875, "learning_rate": 9.484811964133537e-06, "loss": 138.7439, "step": 28610 }, { "epoch": 0.2367539396947512, "grad_norm": 1176.435791015625, "learning_rate": 9.484188252781701e-06, "loss": 109.5845, "step": 28620 }, { "epoch": 0.23683666294412045, "grad_norm": 840.8705444335938, "learning_rate": 9.483564184643586e-06, "loss": 90.2001, "step": 28630 }, { "epoch": 0.23691938619348968, "grad_norm": 705.6690673828125, "learning_rate": 9.482939759768845e-06, "loss": 145.6554, "step": 28640 }, { "epoch": 0.2370021094428589, "grad_norm": 1252.253662109375, "learning_rate": 9.48231497820716e-06, "loss": 129.5605, "step": 28650 }, { "epoch": 0.23708483269222816, "grad_norm": 1174.8831787109375, "learning_rate": 9.481689840008246e-06, "loss": 121.5843, "step": 28660 }, { "epoch": 0.23716755594159739, "grad_norm": 1164.958740234375, "learning_rate": 9.481064345221838e-06, "loss": 130.8124, "step": 28670 }, { "epoch": 0.2372502791909666, "grad_norm": 1460.279052734375, "learning_rate": 9.480438493897707e-06, "loss": 186.2501, "step": 28680 }, { "epoch": 0.23733300244033587, "grad_norm": 1548.7734375, "learning_rate": 9.479812286085645e-06, "loss": 122.9342, "step": 28690 }, { "epoch": 0.2374157256897051, "grad_norm": 485.3841857910156, "learning_rate": 9.47918572183548e-06, "loss": 116.2105, "step": 28700 }, { "epoch": 0.23749844893907432, "grad_norm": 2227.711181640625, "learning_rate": 9.478558801197065e-06, "loss": 108.795, "step": 28710 }, { "epoch": 0.23758117218844357, "grad_norm": 1480.4208984375, "learning_rate": 9.47793152422028e-06, "loss": 103.7922, "step": 28720 }, { "epoch": 0.2376638954378128, "grad_norm": 845.8829956054688, "learning_rate": 9.477303890955032e-06, "loss": 112.599, "step": 28730 }, { "epoch": 0.23774661868718203, "grad_norm": 593.84619140625, "learning_rate": 9.476675901451264e-06, "loss": 124.1586, "step": 28740 }, { "epoch": 0.23782934193655128, "grad_norm": 636.6671752929688, "learning_rate": 9.476047555758938e-06, "loss": 172.5131, "step": 28750 }, { "epoch": 0.2379120651859205, "grad_norm": 4360.0341796875, "learning_rate": 9.475418853928051e-06, "loss": 191.1747, "step": 28760 }, { "epoch": 0.23799478843528973, "grad_norm": 483.2803649902344, "learning_rate": 9.474789796008625e-06, "loss": 138.2722, "step": 28770 }, { "epoch": 0.23807751168465896, "grad_norm": 1149.00048828125, "learning_rate": 9.474160382050711e-06, "loss": 126.3032, "step": 28780 }, { "epoch": 0.23816023493402821, "grad_norm": 1440.6688232421875, "learning_rate": 9.47353061210439e-06, "loss": 96.6904, "step": 28790 }, { "epoch": 0.23824295818339744, "grad_norm": 2017.08837890625, "learning_rate": 9.47290048621977e-06, "loss": 116.2174, "step": 28800 }, { "epoch": 0.23832568143276667, "grad_norm": 900.5809326171875, "learning_rate": 9.472270004446984e-06, "loss": 110.8572, "step": 28810 }, { "epoch": 0.23840840468213592, "grad_norm": 853.006591796875, "learning_rate": 9.4716391668362e-06, "loss": 115.7237, "step": 28820 }, { "epoch": 0.23849112793150515, "grad_norm": 1115.9259033203125, "learning_rate": 9.471007973437607e-06, "loss": 108.8435, "step": 28830 }, { "epoch": 0.23857385118087437, "grad_norm": 1762.0460205078125, "learning_rate": 9.470376424301432e-06, "loss": 148.8191, "step": 28840 }, { "epoch": 0.23865657443024363, "grad_norm": 1468.8228759765625, "learning_rate": 9.46974451947792e-06, "loss": 154.2129, "step": 28850 }, { "epoch": 0.23873929767961286, "grad_norm": 1209.044921875, "learning_rate": 9.469112259017349e-06, "loss": 107.4766, "step": 28860 }, { "epoch": 0.23882202092898208, "grad_norm": 1850.443603515625, "learning_rate": 9.468479642970027e-06, "loss": 117.6253, "step": 28870 }, { "epoch": 0.23890474417835134, "grad_norm": 1324.356689453125, "learning_rate": 9.467846671386287e-06, "loss": 178.3749, "step": 28880 }, { "epoch": 0.23898746742772056, "grad_norm": 1060.7711181640625, "learning_rate": 9.467213344316493e-06, "loss": 101.0151, "step": 28890 }, { "epoch": 0.2390701906770898, "grad_norm": 2596.78076171875, "learning_rate": 9.466579661811032e-06, "loss": 149.5662, "step": 28900 }, { "epoch": 0.23915291392645904, "grad_norm": 1160.0654296875, "learning_rate": 9.46594562392033e-06, "loss": 114.8278, "step": 28910 }, { "epoch": 0.23923563717582827, "grad_norm": 1474.780029296875, "learning_rate": 9.465311230694828e-06, "loss": 94.9893, "step": 28920 }, { "epoch": 0.2393183604251975, "grad_norm": 827.9722900390625, "learning_rate": 9.464676482185005e-06, "loss": 82.4494, "step": 28930 }, { "epoch": 0.23940108367456675, "grad_norm": 490.88360595703125, "learning_rate": 9.464041378441365e-06, "loss": 160.122, "step": 28940 }, { "epoch": 0.23948380692393598, "grad_norm": 2539.133544921875, "learning_rate": 9.46340591951444e-06, "loss": 135.2263, "step": 28950 }, { "epoch": 0.2395665301733052, "grad_norm": 620.4109497070312, "learning_rate": 9.462770105454789e-06, "loss": 130.8018, "step": 28960 }, { "epoch": 0.23964925342267443, "grad_norm": 575.00146484375, "learning_rate": 9.462133936313002e-06, "loss": 123.9707, "step": 28970 }, { "epoch": 0.23973197667204368, "grad_norm": 764.5715942382812, "learning_rate": 9.461497412139697e-06, "loss": 103.3378, "step": 28980 }, { "epoch": 0.2398146999214129, "grad_norm": 1179.3966064453125, "learning_rate": 9.46086053298552e-06, "loss": 139.9406, "step": 28990 }, { "epoch": 0.23989742317078214, "grad_norm": 1064.7276611328125, "learning_rate": 9.460223298901138e-06, "loss": 100.375, "step": 29000 }, { "epoch": 0.2399801464201514, "grad_norm": 833.2089233398438, "learning_rate": 9.459585709937262e-06, "loss": 120.6056, "step": 29010 }, { "epoch": 0.24006286966952062, "grad_norm": 1361.6549072265625, "learning_rate": 9.458947766144617e-06, "loss": 129.4685, "step": 29020 }, { "epoch": 0.24014559291888984, "grad_norm": 1131.559326171875, "learning_rate": 9.458309467573963e-06, "loss": 90.7656, "step": 29030 }, { "epoch": 0.2402283161682591, "grad_norm": 1216.7392578125, "learning_rate": 9.457670814276083e-06, "loss": 105.4316, "step": 29040 }, { "epoch": 0.24031103941762832, "grad_norm": 853.8714599609375, "learning_rate": 9.457031806301795e-06, "loss": 94.2898, "step": 29050 }, { "epoch": 0.24039376266699755, "grad_norm": 826.33837890625, "learning_rate": 9.456392443701943e-06, "loss": 118.5048, "step": 29060 }, { "epoch": 0.2404764859163668, "grad_norm": 969.9588012695312, "learning_rate": 9.455752726527395e-06, "loss": 158.1088, "step": 29070 }, { "epoch": 0.24055920916573603, "grad_norm": 591.8674926757812, "learning_rate": 9.45511265482905e-06, "loss": 106.0139, "step": 29080 }, { "epoch": 0.24064193241510526, "grad_norm": 1056.0914306640625, "learning_rate": 9.454472228657841e-06, "loss": 148.1635, "step": 29090 }, { "epoch": 0.2407246556644745, "grad_norm": 1000.041259765625, "learning_rate": 9.453831448064717e-06, "loss": 119.1304, "step": 29100 }, { "epoch": 0.24080737891384374, "grad_norm": 765.8133544921875, "learning_rate": 9.453190313100666e-06, "loss": 83.0749, "step": 29110 }, { "epoch": 0.24089010216321297, "grad_norm": 963.5242919921875, "learning_rate": 9.4525488238167e-06, "loss": 153.7064, "step": 29120 }, { "epoch": 0.24097282541258222, "grad_norm": 809.49267578125, "learning_rate": 9.451906980263857e-06, "loss": 122.1319, "step": 29130 }, { "epoch": 0.24105554866195145, "grad_norm": 800.9232788085938, "learning_rate": 9.451264782493208e-06, "loss": 101.5012, "step": 29140 }, { "epoch": 0.24113827191132067, "grad_norm": 1233.4398193359375, "learning_rate": 9.450622230555849e-06, "loss": 144.6246, "step": 29150 }, { "epoch": 0.2412209951606899, "grad_norm": 692.1824951171875, "learning_rate": 9.449979324502905e-06, "loss": 160.0062, "step": 29160 }, { "epoch": 0.24130371841005915, "grad_norm": 946.5017700195312, "learning_rate": 9.449336064385529e-06, "loss": 105.0953, "step": 29170 }, { "epoch": 0.24138644165942838, "grad_norm": 1230.076416015625, "learning_rate": 9.4486924502549e-06, "loss": 120.0082, "step": 29180 }, { "epoch": 0.2414691649087976, "grad_norm": 839.8562622070312, "learning_rate": 9.448048482162231e-06, "loss": 137.8697, "step": 29190 }, { "epoch": 0.24155188815816686, "grad_norm": 882.9497680664062, "learning_rate": 9.447404160158758e-06, "loss": 119.5869, "step": 29200 }, { "epoch": 0.2416346114075361, "grad_norm": 1333.1221923828125, "learning_rate": 9.446759484295745e-06, "loss": 116.6337, "step": 29210 }, { "epoch": 0.2417173346569053, "grad_norm": 955.5294799804688, "learning_rate": 9.44611445462449e-06, "loss": 134.2271, "step": 29220 }, { "epoch": 0.24180005790627457, "grad_norm": 1251.0631103515625, "learning_rate": 9.445469071196312e-06, "loss": 124.4641, "step": 29230 }, { "epoch": 0.2418827811556438, "grad_norm": 834.0491943359375, "learning_rate": 9.444823334062562e-06, "loss": 116.2968, "step": 29240 }, { "epoch": 0.24196550440501302, "grad_norm": 1349.356201171875, "learning_rate": 9.444177243274619e-06, "loss": 131.4607, "step": 29250 }, { "epoch": 0.24204822765438228, "grad_norm": 1118.03173828125, "learning_rate": 9.443530798883887e-06, "loss": 107.2266, "step": 29260 }, { "epoch": 0.2421309509037515, "grad_norm": 1354.8619384765625, "learning_rate": 9.442884000941803e-06, "loss": 129.6626, "step": 29270 }, { "epoch": 0.24221367415312073, "grad_norm": 1301.5723876953125, "learning_rate": 9.44223684949983e-06, "loss": 126.9563, "step": 29280 }, { "epoch": 0.24229639740248998, "grad_norm": 639.9192504882812, "learning_rate": 9.441589344609457e-06, "loss": 97.4439, "step": 29290 }, { "epoch": 0.2423791206518592, "grad_norm": 1386.3795166015625, "learning_rate": 9.440941486322205e-06, "loss": 150.2773, "step": 29300 }, { "epoch": 0.24246184390122844, "grad_norm": 1358.3714599609375, "learning_rate": 9.44029327468962e-06, "loss": 110.78, "step": 29310 }, { "epoch": 0.24254456715059766, "grad_norm": 757.8045043945312, "learning_rate": 9.439644709763276e-06, "loss": 112.67, "step": 29320 }, { "epoch": 0.24262729039996692, "grad_norm": 930.5925903320312, "learning_rate": 9.43899579159478e-06, "loss": 121.9743, "step": 29330 }, { "epoch": 0.24271001364933614, "grad_norm": 1049.4073486328125, "learning_rate": 9.438346520235759e-06, "loss": 100.5406, "step": 29340 }, { "epoch": 0.24279273689870537, "grad_norm": 1115.40966796875, "learning_rate": 9.437696895737876e-06, "loss": 121.6903, "step": 29350 }, { "epoch": 0.24287546014807462, "grad_norm": 698.0397338867188, "learning_rate": 9.437046918152817e-06, "loss": 88.6896, "step": 29360 }, { "epoch": 0.24295818339744385, "grad_norm": 824.7769165039062, "learning_rate": 9.436396587532297e-06, "loss": 126.8226, "step": 29370 }, { "epoch": 0.24304090664681308, "grad_norm": 1229.65869140625, "learning_rate": 9.435745903928062e-06, "loss": 113.3302, "step": 29380 }, { "epoch": 0.24312362989618233, "grad_norm": 845.088623046875, "learning_rate": 9.435094867391881e-06, "loss": 154.009, "step": 29390 }, { "epoch": 0.24320635314555156, "grad_norm": 961.1011962890625, "learning_rate": 9.434443477975557e-06, "loss": 103.9956, "step": 29400 }, { "epoch": 0.24328907639492078, "grad_norm": 757.7135009765625, "learning_rate": 9.433791735730917e-06, "loss": 98.1805, "step": 29410 }, { "epoch": 0.24337179964429004, "grad_norm": 1421.3348388671875, "learning_rate": 9.433139640709817e-06, "loss": 132.9433, "step": 29420 }, { "epoch": 0.24345452289365926, "grad_norm": 608.5304565429688, "learning_rate": 9.432487192964142e-06, "loss": 122.0067, "step": 29430 }, { "epoch": 0.2435372461430285, "grad_norm": 1166.598876953125, "learning_rate": 9.431834392545803e-06, "loss": 127.8436, "step": 29440 }, { "epoch": 0.24361996939239774, "grad_norm": 1254.7440185546875, "learning_rate": 9.43118123950674e-06, "loss": 124.5958, "step": 29450 }, { "epoch": 0.24370269264176697, "grad_norm": 924.8063354492188, "learning_rate": 9.430527733898922e-06, "loss": 102.3073, "step": 29460 }, { "epoch": 0.2437854158911362, "grad_norm": 957.7825927734375, "learning_rate": 9.429873875774344e-06, "loss": 112.2574, "step": 29470 }, { "epoch": 0.24386813914050545, "grad_norm": 867.5020141601562, "learning_rate": 9.429219665185034e-06, "loss": 109.0799, "step": 29480 }, { "epoch": 0.24395086238987468, "grad_norm": 1606.21435546875, "learning_rate": 9.428565102183043e-06, "loss": 114.3639, "step": 29490 }, { "epoch": 0.2440335856392439, "grad_norm": 782.0588989257812, "learning_rate": 9.42791018682045e-06, "loss": 102.8368, "step": 29500 }, { "epoch": 0.24411630888861313, "grad_norm": 1499.0159912109375, "learning_rate": 9.427254919149367e-06, "loss": 129.8493, "step": 29510 }, { "epoch": 0.24419903213798239, "grad_norm": 1734.2025146484375, "learning_rate": 9.426599299221925e-06, "loss": 118.0028, "step": 29520 }, { "epoch": 0.2442817553873516, "grad_norm": 1104.4652099609375, "learning_rate": 9.425943327090295e-06, "loss": 133.7769, "step": 29530 }, { "epoch": 0.24436447863672084, "grad_norm": 809.5218505859375, "learning_rate": 9.425287002806666e-06, "loss": 101.5154, "step": 29540 }, { "epoch": 0.2444472018860901, "grad_norm": 860.76123046875, "learning_rate": 9.42463032642326e-06, "loss": 126.5965, "step": 29550 }, { "epoch": 0.24452992513545932, "grad_norm": 1226.3048095703125, "learning_rate": 9.423973297992324e-06, "loss": 133.2678, "step": 29560 }, { "epoch": 0.24461264838482855, "grad_norm": 638.6930541992188, "learning_rate": 9.423315917566137e-06, "loss": 153.0996, "step": 29570 }, { "epoch": 0.2446953716341978, "grad_norm": 598.7249755859375, "learning_rate": 9.422658185197002e-06, "loss": 122.7943, "step": 29580 }, { "epoch": 0.24477809488356703, "grad_norm": 711.6947021484375, "learning_rate": 9.422000100937253e-06, "loss": 93.9475, "step": 29590 }, { "epoch": 0.24486081813293625, "grad_norm": 1122.1689453125, "learning_rate": 9.42134166483925e-06, "loss": 97.7506, "step": 29600 }, { "epoch": 0.2449435413823055, "grad_norm": 1915.1302490234375, "learning_rate": 9.420682876955382e-06, "loss": 115.1031, "step": 29610 }, { "epoch": 0.24502626463167473, "grad_norm": 659.7858276367188, "learning_rate": 9.420023737338065e-06, "loss": 120.2869, "step": 29620 }, { "epoch": 0.24510898788104396, "grad_norm": 834.023193359375, "learning_rate": 9.419364246039745e-06, "loss": 125.4224, "step": 29630 }, { "epoch": 0.24519171113041321, "grad_norm": 1009.0372924804688, "learning_rate": 9.418704403112894e-06, "loss": 109.2442, "step": 29640 }, { "epoch": 0.24527443437978244, "grad_norm": 2674.98193359375, "learning_rate": 9.418044208610013e-06, "loss": 156.5225, "step": 29650 }, { "epoch": 0.24535715762915167, "grad_norm": 1011.0217895507812, "learning_rate": 9.41738366258363e-06, "loss": 126.1811, "step": 29660 }, { "epoch": 0.24543988087852092, "grad_norm": 610.7017211914062, "learning_rate": 9.416722765086304e-06, "loss": 144.7449, "step": 29670 }, { "epoch": 0.24552260412789015, "grad_norm": 1031.1539306640625, "learning_rate": 9.416061516170615e-06, "loss": 108.1692, "step": 29680 }, { "epoch": 0.24560532737725937, "grad_norm": 1801.7032470703125, "learning_rate": 9.415399915889179e-06, "loss": 121.3443, "step": 29690 }, { "epoch": 0.2456880506266286, "grad_norm": 1428.9144287109375, "learning_rate": 9.414737964294636e-06, "loss": 116.9526, "step": 29700 }, { "epoch": 0.24577077387599786, "grad_norm": 937.070556640625, "learning_rate": 9.414075661439653e-06, "loss": 111.9231, "step": 29710 }, { "epoch": 0.24585349712536708, "grad_norm": 1381.5968017578125, "learning_rate": 9.413413007376928e-06, "loss": 163.5947, "step": 29720 }, { "epoch": 0.2459362203747363, "grad_norm": 1236.41552734375, "learning_rate": 9.412750002159186e-06, "loss": 110.7294, "step": 29730 }, { "epoch": 0.24601894362410556, "grad_norm": 757.4229125976562, "learning_rate": 9.412086645839177e-06, "loss": 88.9742, "step": 29740 }, { "epoch": 0.2461016668734748, "grad_norm": 1002.5419311523438, "learning_rate": 9.411422938469683e-06, "loss": 137.723, "step": 29750 }, { "epoch": 0.24618439012284402, "grad_norm": 636.2584228515625, "learning_rate": 9.41075888010351e-06, "loss": 108.3714, "step": 29760 }, { "epoch": 0.24626711337221327, "grad_norm": 737.2359008789062, "learning_rate": 9.410094470793497e-06, "loss": 135.6444, "step": 29770 }, { "epoch": 0.2463498366215825, "grad_norm": 1887.1973876953125, "learning_rate": 9.409429710592505e-06, "loss": 126.0426, "step": 29780 }, { "epoch": 0.24643255987095172, "grad_norm": 1311.354248046875, "learning_rate": 9.408764599553429e-06, "loss": 156.2838, "step": 29790 }, { "epoch": 0.24651528312032098, "grad_norm": 1149.238525390625, "learning_rate": 9.408099137729188e-06, "loss": 130.7976, "step": 29800 }, { "epoch": 0.2465980063696902, "grad_norm": 1126.7828369140625, "learning_rate": 9.407433325172727e-06, "loss": 153.4184, "step": 29810 }, { "epoch": 0.24668072961905943, "grad_norm": 1250.0152587890625, "learning_rate": 9.406767161937025e-06, "loss": 142.3581, "step": 29820 }, { "epoch": 0.24676345286842868, "grad_norm": 1395.9898681640625, "learning_rate": 9.406100648075084e-06, "loss": 122.4098, "step": 29830 }, { "epoch": 0.2468461761177979, "grad_norm": 1577.0543212890625, "learning_rate": 9.405433783639936e-06, "loss": 112.6034, "step": 29840 }, { "epoch": 0.24692889936716714, "grad_norm": 1184.2935791015625, "learning_rate": 9.40476656868464e-06, "loss": 148.0747, "step": 29850 }, { "epoch": 0.24701162261653636, "grad_norm": 805.5813598632812, "learning_rate": 9.404099003262282e-06, "loss": 155.5525, "step": 29860 }, { "epoch": 0.24709434586590562, "grad_norm": 1295.2099609375, "learning_rate": 9.40343108742598e-06, "loss": 149.4768, "step": 29870 }, { "epoch": 0.24717706911527484, "grad_norm": 995.8720092773438, "learning_rate": 9.402762821228875e-06, "loss": 140.2816, "step": 29880 }, { "epoch": 0.24725979236464407, "grad_norm": 866.3977661132812, "learning_rate": 9.402094204724138e-06, "loss": 129.4959, "step": 29890 }, { "epoch": 0.24734251561401333, "grad_norm": 1852.4481201171875, "learning_rate": 9.401425237964966e-06, "loss": 102.9619, "step": 29900 }, { "epoch": 0.24742523886338255, "grad_norm": 743.556640625, "learning_rate": 9.400755921004592e-06, "loss": 85.2109, "step": 29910 }, { "epoch": 0.24750796211275178, "grad_norm": 809.3905639648438, "learning_rate": 9.400086253896264e-06, "loss": 106.4736, "step": 29920 }, { "epoch": 0.24759068536212103, "grad_norm": 783.1993408203125, "learning_rate": 9.399416236693264e-06, "loss": 125.8943, "step": 29930 }, { "epoch": 0.24767340861149026, "grad_norm": 772.568115234375, "learning_rate": 9.398745869448909e-06, "loss": 123.8559, "step": 29940 }, { "epoch": 0.24775613186085949, "grad_norm": 1002.6859130859375, "learning_rate": 9.39807515221653e-06, "loss": 101.8871, "step": 29950 }, { "epoch": 0.24783885511022874, "grad_norm": 1258.36572265625, "learning_rate": 9.397404085049496e-06, "loss": 98.4138, "step": 29960 }, { "epoch": 0.24792157835959797, "grad_norm": 1145.0703125, "learning_rate": 9.3967326680012e-06, "loss": 118.3927, "step": 29970 }, { "epoch": 0.2480043016089672, "grad_norm": 1031.3804931640625, "learning_rate": 9.396060901125064e-06, "loss": 105.3649, "step": 29980 }, { "epoch": 0.24808702485833645, "grad_norm": 2366.289794921875, "learning_rate": 9.395388784474538e-06, "loss": 168.2479, "step": 29990 }, { "epoch": 0.24816974810770567, "grad_norm": 369.7084045410156, "learning_rate": 9.394716318103098e-06, "loss": 121.3149, "step": 30000 }, { "epoch": 0.2482524713570749, "grad_norm": 1409.655029296875, "learning_rate": 9.394043502064249e-06, "loss": 105.2097, "step": 30010 }, { "epoch": 0.24833519460644415, "grad_norm": 1403.9825439453125, "learning_rate": 9.393370336411527e-06, "loss": 147.3934, "step": 30020 }, { "epoch": 0.24841791785581338, "grad_norm": 929.0065307617188, "learning_rate": 9.392696821198488e-06, "loss": 124.7842, "step": 30030 }, { "epoch": 0.2485006411051826, "grad_norm": 828.625244140625, "learning_rate": 9.392022956478724e-06, "loss": 112.9368, "step": 30040 }, { "epoch": 0.24858336435455183, "grad_norm": 3000.644287109375, "learning_rate": 9.391348742305849e-06, "loss": 148.1125, "step": 30050 }, { "epoch": 0.2486660876039211, "grad_norm": 908.979248046875, "learning_rate": 9.390674178733508e-06, "loss": 109.6535, "step": 30060 }, { "epoch": 0.24874881085329031, "grad_norm": 1394.7421875, "learning_rate": 9.389999265815373e-06, "loss": 112.9092, "step": 30070 }, { "epoch": 0.24883153410265954, "grad_norm": 1016.4574584960938, "learning_rate": 9.389324003605144e-06, "loss": 168.127, "step": 30080 }, { "epoch": 0.2489142573520288, "grad_norm": 1174.79443359375, "learning_rate": 9.388648392156547e-06, "loss": 112.0588, "step": 30090 }, { "epoch": 0.24899698060139802, "grad_norm": 2049.481689453125, "learning_rate": 9.387972431523341e-06, "loss": 127.4066, "step": 30100 }, { "epoch": 0.24907970385076725, "grad_norm": 712.5939331054688, "learning_rate": 9.387296121759305e-06, "loss": 98.8517, "step": 30110 }, { "epoch": 0.2491624271001365, "grad_norm": 872.814208984375, "learning_rate": 9.386619462918254e-06, "loss": 100.3602, "step": 30120 }, { "epoch": 0.24924515034950573, "grad_norm": 649.6997680664062, "learning_rate": 9.385942455054022e-06, "loss": 119.4873, "step": 30130 }, { "epoch": 0.24932787359887496, "grad_norm": 655.9243774414062, "learning_rate": 9.385265098220478e-06, "loss": 124.5341, "step": 30140 }, { "epoch": 0.2494105968482442, "grad_norm": 822.2200927734375, "learning_rate": 9.384587392471516e-06, "loss": 162.9077, "step": 30150 }, { "epoch": 0.24949332009761344, "grad_norm": 940.898193359375, "learning_rate": 9.383909337861058e-06, "loss": 118.583, "step": 30160 }, { "epoch": 0.24957604334698266, "grad_norm": 1128.41943359375, "learning_rate": 9.383230934443053e-06, "loss": 136.6669, "step": 30170 }, { "epoch": 0.24965876659635192, "grad_norm": 631.8690795898438, "learning_rate": 9.382552182271478e-06, "loss": 97.5566, "step": 30180 }, { "epoch": 0.24974148984572114, "grad_norm": 1021.1989135742188, "learning_rate": 9.38187308140034e-06, "loss": 146.495, "step": 30190 }, { "epoch": 0.24982421309509037, "grad_norm": 1181.3828125, "learning_rate": 9.381193631883672e-06, "loss": 150.6252, "step": 30200 }, { "epoch": 0.24990693634445962, "grad_norm": 814.0835571289062, "learning_rate": 9.380513833775531e-06, "loss": 114.7124, "step": 30210 }, { "epoch": 0.24998965959382885, "grad_norm": 1297.4193115234375, "learning_rate": 9.37983368713001e-06, "loss": 97.1973, "step": 30220 }, { "epoch": 0.2500723828431981, "grad_norm": 800.78564453125, "learning_rate": 9.379153192001223e-06, "loss": 98.411, "step": 30230 }, { "epoch": 0.2501551060925673, "grad_norm": 1123.505859375, "learning_rate": 9.378472348443315e-06, "loss": 119.3296, "step": 30240 }, { "epoch": 0.25023782934193656, "grad_norm": 888.609375, "learning_rate": 9.377791156510456e-06, "loss": 74.0182, "step": 30250 }, { "epoch": 0.2503205525913058, "grad_norm": 713.3021240234375, "learning_rate": 9.377109616256846e-06, "loss": 147.7178, "step": 30260 }, { "epoch": 0.250403275840675, "grad_norm": 1878.27880859375, "learning_rate": 9.37642772773671e-06, "loss": 154.91, "step": 30270 }, { "epoch": 0.25048599909004426, "grad_norm": 625.2662353515625, "learning_rate": 9.375745491004307e-06, "loss": 90.2972, "step": 30280 }, { "epoch": 0.2505687223394135, "grad_norm": 621.6907958984375, "learning_rate": 9.375062906113916e-06, "loss": 126.9956, "step": 30290 }, { "epoch": 0.2506514455887827, "grad_norm": 1250.3077392578125, "learning_rate": 9.37437997311985e-06, "loss": 107.0872, "step": 30300 }, { "epoch": 0.25073416883815197, "grad_norm": 720.9534912109375, "learning_rate": 9.373696692076446e-06, "loss": 105.0815, "step": 30310 }, { "epoch": 0.25081689208752117, "grad_norm": 684.220703125, "learning_rate": 9.373013063038066e-06, "loss": 129.8487, "step": 30320 }, { "epoch": 0.2508996153368904, "grad_norm": 1063.6759033203125, "learning_rate": 9.372329086059108e-06, "loss": 135.9542, "step": 30330 }, { "epoch": 0.2509823385862597, "grad_norm": 1607.3919677734375, "learning_rate": 9.37164476119399e-06, "loss": 142.6617, "step": 30340 }, { "epoch": 0.2510650618356289, "grad_norm": 542.721435546875, "learning_rate": 9.370960088497162e-06, "loss": 106.839, "step": 30350 }, { "epoch": 0.25114778508499813, "grad_norm": 549.24560546875, "learning_rate": 9.370275068023097e-06, "loss": 129.0447, "step": 30360 }, { "epoch": 0.2512305083343674, "grad_norm": 960.9791259765625, "learning_rate": 9.369589699826306e-06, "loss": 140.4398, "step": 30370 }, { "epoch": 0.2513132315837366, "grad_norm": 1057.4302978515625, "learning_rate": 9.368903983961315e-06, "loss": 138.126, "step": 30380 }, { "epoch": 0.25139595483310584, "grad_norm": 1365.5726318359375, "learning_rate": 9.368217920482684e-06, "loss": 139.174, "step": 30390 }, { "epoch": 0.2514786780824751, "grad_norm": 887.7735595703125, "learning_rate": 9.367531509445001e-06, "loss": 129.102, "step": 30400 }, { "epoch": 0.2515614013318443, "grad_norm": 1251.561767578125, "learning_rate": 9.366844750902878e-06, "loss": 121.9665, "step": 30410 }, { "epoch": 0.25164412458121355, "grad_norm": 881.740234375, "learning_rate": 9.36615764491096e-06, "loss": 82.5869, "step": 30420 }, { "epoch": 0.2517268478305828, "grad_norm": 821.780029296875, "learning_rate": 9.365470191523917e-06, "loss": 146.2663, "step": 30430 }, { "epoch": 0.251809571079952, "grad_norm": 626.3407592773438, "learning_rate": 9.364782390796446e-06, "loss": 86.4238, "step": 30440 }, { "epoch": 0.25189229432932125, "grad_norm": 1124.6002197265625, "learning_rate": 9.364094242783272e-06, "loss": 146.8187, "step": 30450 }, { "epoch": 0.2519750175786905, "grad_norm": 631.0712890625, "learning_rate": 9.363405747539147e-06, "loss": 98.5037, "step": 30460 }, { "epoch": 0.2520577408280597, "grad_norm": 949.3443603515625, "learning_rate": 9.362716905118851e-06, "loss": 139.6968, "step": 30470 }, { "epoch": 0.25214046407742896, "grad_norm": 513.1497802734375, "learning_rate": 9.362027715577195e-06, "loss": 118.3806, "step": 30480 }, { "epoch": 0.2522231873267982, "grad_norm": 1057.8067626953125, "learning_rate": 9.361338178969012e-06, "loss": 108.9348, "step": 30490 }, { "epoch": 0.2523059105761674, "grad_norm": 903.6969604492188, "learning_rate": 9.360648295349165e-06, "loss": 105.4085, "step": 30500 }, { "epoch": 0.25238863382553667, "grad_norm": 2535.44189453125, "learning_rate": 9.359958064772547e-06, "loss": 161.6714, "step": 30510 }, { "epoch": 0.2524713570749059, "grad_norm": 1677.7100830078125, "learning_rate": 9.359267487294075e-06, "loss": 128.2102, "step": 30520 }, { "epoch": 0.2525540803242751, "grad_norm": 1912.9716796875, "learning_rate": 9.358576562968695e-06, "loss": 118.4899, "step": 30530 }, { "epoch": 0.2526368035736444, "grad_norm": 763.476318359375, "learning_rate": 9.357885291851382e-06, "loss": 124.9722, "step": 30540 }, { "epoch": 0.25271952682301363, "grad_norm": 1949.4473876953125, "learning_rate": 9.357193673997133e-06, "loss": 104.1943, "step": 30550 }, { "epoch": 0.25280225007238283, "grad_norm": 1901.4537353515625, "learning_rate": 9.356501709460984e-06, "loss": 108.5047, "step": 30560 }, { "epoch": 0.2528849733217521, "grad_norm": 773.7173461914062, "learning_rate": 9.355809398297986e-06, "loss": 95.4959, "step": 30570 }, { "epoch": 0.25296769657112134, "grad_norm": 1136.826171875, "learning_rate": 9.355116740563225e-06, "loss": 136.653, "step": 30580 }, { "epoch": 0.25305041982049054, "grad_norm": 826.9710693359375, "learning_rate": 9.354423736311813e-06, "loss": 119.1377, "step": 30590 }, { "epoch": 0.2531331430698598, "grad_norm": 1087.69677734375, "learning_rate": 9.353730385598887e-06, "loss": 101.276, "step": 30600 }, { "epoch": 0.25321586631922904, "grad_norm": 567.2242431640625, "learning_rate": 9.353036688479615e-06, "loss": 116.7849, "step": 30610 }, { "epoch": 0.25329858956859824, "grad_norm": 1647.7808837890625, "learning_rate": 9.352342645009193e-06, "loss": 142.3532, "step": 30620 }, { "epoch": 0.2533813128179675, "grad_norm": 1223.5712890625, "learning_rate": 9.35164825524284e-06, "loss": 106.8768, "step": 30630 }, { "epoch": 0.25346403606733675, "grad_norm": 1215.446044921875, "learning_rate": 9.350953519235807e-06, "loss": 142.7279, "step": 30640 }, { "epoch": 0.25354675931670595, "grad_norm": 1093.0865478515625, "learning_rate": 9.35025843704337e-06, "loss": 133.1846, "step": 30650 }, { "epoch": 0.2536294825660752, "grad_norm": 603.0365600585938, "learning_rate": 9.349563008720836e-06, "loss": 143.9578, "step": 30660 }, { "epoch": 0.25371220581544446, "grad_norm": 926.9697265625, "learning_rate": 9.348867234323534e-06, "loss": 115.379, "step": 30670 }, { "epoch": 0.25379492906481366, "grad_norm": 1196.4434814453125, "learning_rate": 9.348171113906826e-06, "loss": 128.1764, "step": 30680 }, { "epoch": 0.2538776523141829, "grad_norm": 750.9150390625, "learning_rate": 9.347474647526095e-06, "loss": 194.924, "step": 30690 }, { "epoch": 0.2539603755635521, "grad_norm": 1341.235595703125, "learning_rate": 9.34677783523676e-06, "loss": 137.8295, "step": 30700 }, { "epoch": 0.25404309881292136, "grad_norm": 1126.3736572265625, "learning_rate": 9.346080677094262e-06, "loss": 132.7227, "step": 30710 }, { "epoch": 0.2541258220622906, "grad_norm": 824.2003784179688, "learning_rate": 9.345383173154072e-06, "loss": 133.4808, "step": 30720 }, { "epoch": 0.2542085453116598, "grad_norm": 929.5851440429688, "learning_rate": 9.344685323471682e-06, "loss": 109.8865, "step": 30730 }, { "epoch": 0.25429126856102907, "grad_norm": 763.9591674804688, "learning_rate": 9.343987128102624e-06, "loss": 114.478, "step": 30740 }, { "epoch": 0.2543739918103983, "grad_norm": 896.4277954101562, "learning_rate": 9.343288587102444e-06, "loss": 139.7716, "step": 30750 }, { "epoch": 0.2544567150597675, "grad_norm": 890.5599975585938, "learning_rate": 9.342589700526725e-06, "loss": 119.8424, "step": 30760 }, { "epoch": 0.2545394383091368, "grad_norm": 954.55322265625, "learning_rate": 9.341890468431072e-06, "loss": 197.9463, "step": 30770 }, { "epoch": 0.25462216155850603, "grad_norm": 962.0372314453125, "learning_rate": 9.341190890871123e-06, "loss": 173.233, "step": 30780 }, { "epoch": 0.25470488480787523, "grad_norm": 1072.42724609375, "learning_rate": 9.340490967902535e-06, "loss": 114.3112, "step": 30790 }, { "epoch": 0.2547876080572445, "grad_norm": 927.2454223632812, "learning_rate": 9.339790699581004e-06, "loss": 98.9923, "step": 30800 }, { "epoch": 0.25487033130661374, "grad_norm": 485.0035400390625, "learning_rate": 9.339090085962244e-06, "loss": 109.2545, "step": 30810 }, { "epoch": 0.25495305455598294, "grad_norm": 1289.7406005859375, "learning_rate": 9.338389127101998e-06, "loss": 137.8362, "step": 30820 }, { "epoch": 0.2550357778053522, "grad_norm": 655.8922119140625, "learning_rate": 9.337687823056041e-06, "loss": 101.9889, "step": 30830 }, { "epoch": 0.25511850105472145, "grad_norm": 1242.1337890625, "learning_rate": 9.336986173880169e-06, "loss": 106.3836, "step": 30840 }, { "epoch": 0.25520122430409065, "grad_norm": 580.3970947265625, "learning_rate": 9.336284179630215e-06, "loss": 94.8493, "step": 30850 }, { "epoch": 0.2552839475534599, "grad_norm": 789.4066162109375, "learning_rate": 9.335581840362026e-06, "loss": 74.5354, "step": 30860 }, { "epoch": 0.25536667080282915, "grad_norm": 1426.3404541015625, "learning_rate": 9.33487915613149e-06, "loss": 108.0914, "step": 30870 }, { "epoch": 0.25544939405219835, "grad_norm": 826.1908569335938, "learning_rate": 9.334176126994512e-06, "loss": 109.946, "step": 30880 }, { "epoch": 0.2555321173015676, "grad_norm": 755.2938232421875, "learning_rate": 9.333472753007031e-06, "loss": 111.354, "step": 30890 }, { "epoch": 0.25561484055093686, "grad_norm": 780.1597290039062, "learning_rate": 9.332769034225012e-06, "loss": 142.2512, "step": 30900 }, { "epoch": 0.25569756380030606, "grad_norm": 731.4649047851562, "learning_rate": 9.332064970704445e-06, "loss": 156.2841, "step": 30910 }, { "epoch": 0.2557802870496753, "grad_norm": 1020.5748291015625, "learning_rate": 9.33136056250135e-06, "loss": 127.7295, "step": 30920 }, { "epoch": 0.25586301029904457, "grad_norm": 698.0821533203125, "learning_rate": 9.330655809671773e-06, "loss": 92.2535, "step": 30930 }, { "epoch": 0.25594573354841377, "grad_norm": 698.9208374023438, "learning_rate": 9.32995071227179e-06, "loss": 124.9385, "step": 30940 }, { "epoch": 0.256028456797783, "grad_norm": 1320.0196533203125, "learning_rate": 9.3292452703575e-06, "loss": 128.1827, "step": 30950 }, { "epoch": 0.2561111800471523, "grad_norm": 2012.84033203125, "learning_rate": 9.328539483985031e-06, "loss": 155.1285, "step": 30960 }, { "epoch": 0.2561939032965215, "grad_norm": 1151.90625, "learning_rate": 9.327833353210541e-06, "loss": 111.3364, "step": 30970 }, { "epoch": 0.25627662654589073, "grad_norm": 1791.881103515625, "learning_rate": 9.327126878090214e-06, "loss": 118.88, "step": 30980 }, { "epoch": 0.25635934979526, "grad_norm": 741.9896850585938, "learning_rate": 9.32642005868026e-06, "loss": 122.3429, "step": 30990 }, { "epoch": 0.2564420730446292, "grad_norm": 712.4248657226562, "learning_rate": 9.325712895036916e-06, "loss": 125.5105, "step": 31000 }, { "epoch": 0.25652479629399844, "grad_norm": 1130.374267578125, "learning_rate": 9.32500538721645e-06, "loss": 86.682, "step": 31010 }, { "epoch": 0.2566075195433677, "grad_norm": 1284.844970703125, "learning_rate": 9.324297535275156e-06, "loss": 114.092, "step": 31020 }, { "epoch": 0.2566902427927369, "grad_norm": 911.753173828125, "learning_rate": 9.323589339269352e-06, "loss": 106.8176, "step": 31030 }, { "epoch": 0.25677296604210614, "grad_norm": 3918.962646484375, "learning_rate": 9.322880799255385e-06, "loss": 160.4931, "step": 31040 }, { "epoch": 0.25685568929147534, "grad_norm": 835.4883422851562, "learning_rate": 9.322171915289635e-06, "loss": 108.218, "step": 31050 }, { "epoch": 0.2569384125408446, "grad_norm": 745.3687133789062, "learning_rate": 9.321462687428499e-06, "loss": 103.9572, "step": 31060 }, { "epoch": 0.25702113579021385, "grad_norm": 2105.262939453125, "learning_rate": 9.320753115728413e-06, "loss": 137.9624, "step": 31070 }, { "epoch": 0.25710385903958305, "grad_norm": 1638.893798828125, "learning_rate": 9.320043200245829e-06, "loss": 76.6734, "step": 31080 }, { "epoch": 0.2571865822889523, "grad_norm": 1251.9464111328125, "learning_rate": 9.319332941037235e-06, "loss": 128.9104, "step": 31090 }, { "epoch": 0.25726930553832156, "grad_norm": 946.0052490234375, "learning_rate": 9.31862233815914e-06, "loss": 83.5885, "step": 31100 }, { "epoch": 0.25735202878769076, "grad_norm": 613.86181640625, "learning_rate": 9.317911391668087e-06, "loss": 88.8766, "step": 31110 }, { "epoch": 0.25743475203706, "grad_norm": 980.6512451171875, "learning_rate": 9.317200101620641e-06, "loss": 111.9333, "step": 31120 }, { "epoch": 0.25751747528642926, "grad_norm": 959.6453857421875, "learning_rate": 9.316488468073397e-06, "loss": 87.5497, "step": 31130 }, { "epoch": 0.25760019853579846, "grad_norm": 784.59033203125, "learning_rate": 9.315776491082973e-06, "loss": 140.7631, "step": 31140 }, { "epoch": 0.2576829217851677, "grad_norm": 1294.2275390625, "learning_rate": 9.315064170706023e-06, "loss": 114.7354, "step": 31150 }, { "epoch": 0.25776564503453697, "grad_norm": 1613.03857421875, "learning_rate": 9.31435150699922e-06, "loss": 123.3567, "step": 31160 }, { "epoch": 0.25784836828390617, "grad_norm": 1174.7305908203125, "learning_rate": 9.313638500019267e-06, "loss": 133.3073, "step": 31170 }, { "epoch": 0.2579310915332754, "grad_norm": 587.86572265625, "learning_rate": 9.312925149822895e-06, "loss": 90.9177, "step": 31180 }, { "epoch": 0.2580138147826447, "grad_norm": 934.0054931640625, "learning_rate": 9.312211456466862e-06, "loss": 127.4864, "step": 31190 }, { "epoch": 0.2580965380320139, "grad_norm": 712.6873779296875, "learning_rate": 9.311497420007955e-06, "loss": 111.8241, "step": 31200 }, { "epoch": 0.25817926128138313, "grad_norm": 1259.027587890625, "learning_rate": 9.310783040502987e-06, "loss": 120.1594, "step": 31210 }, { "epoch": 0.2582619845307524, "grad_norm": 1388.41162109375, "learning_rate": 9.310068318008794e-06, "loss": 121.605, "step": 31220 }, { "epoch": 0.2583447077801216, "grad_norm": 1037.8282470703125, "learning_rate": 9.309353252582246e-06, "loss": 138.8729, "step": 31230 }, { "epoch": 0.25842743102949084, "grad_norm": 943.52490234375, "learning_rate": 9.308637844280236e-06, "loss": 132.2363, "step": 31240 }, { "epoch": 0.2585101542788601, "grad_norm": 1297.0338134765625, "learning_rate": 9.307922093159688e-06, "loss": 113.9879, "step": 31250 }, { "epoch": 0.2585928775282293, "grad_norm": 739.4756469726562, "learning_rate": 9.30720599927755e-06, "loss": 79.2995, "step": 31260 }, { "epoch": 0.25867560077759855, "grad_norm": 1136.6614990234375, "learning_rate": 9.306489562690797e-06, "loss": 148.8123, "step": 31270 }, { "epoch": 0.2587583240269678, "grad_norm": 1102.057861328125, "learning_rate": 9.305772783456435e-06, "loss": 126.115, "step": 31280 }, { "epoch": 0.258841047276337, "grad_norm": 1000.919677734375, "learning_rate": 9.305055661631493e-06, "loss": 128.0628, "step": 31290 }, { "epoch": 0.25892377052570625, "grad_norm": 1486.086669921875, "learning_rate": 9.304338197273029e-06, "loss": 141.9742, "step": 31300 }, { "epoch": 0.2590064937750755, "grad_norm": 1208.7861328125, "learning_rate": 9.303620390438128e-06, "loss": 119.3574, "step": 31310 }, { "epoch": 0.2590892170244447, "grad_norm": 1793.4461669921875, "learning_rate": 9.302902241183905e-06, "loss": 115.7504, "step": 31320 }, { "epoch": 0.25917194027381396, "grad_norm": 1034.2620849609375, "learning_rate": 9.302183749567498e-06, "loss": 104.3807, "step": 31330 }, { "epoch": 0.2592546635231832, "grad_norm": 1191.4996337890625, "learning_rate": 9.301464915646074e-06, "loss": 95.0326, "step": 31340 }, { "epoch": 0.2593373867725524, "grad_norm": 663.1774291992188, "learning_rate": 9.30074573947683e-06, "loss": 105.2758, "step": 31350 }, { "epoch": 0.25942011002192167, "grad_norm": 1111.489501953125, "learning_rate": 9.30002622111698e-06, "loss": 120.7122, "step": 31360 }, { "epoch": 0.2595028332712909, "grad_norm": 1140.2496337890625, "learning_rate": 9.299306360623782e-06, "loss": 111.4868, "step": 31370 }, { "epoch": 0.2595855565206601, "grad_norm": 1038.2596435546875, "learning_rate": 9.298586158054508e-06, "loss": 119.8149, "step": 31380 }, { "epoch": 0.2596682797700294, "grad_norm": 1008.992431640625, "learning_rate": 9.297865613466459e-06, "loss": 145.3494, "step": 31390 }, { "epoch": 0.25975100301939863, "grad_norm": 1030.713623046875, "learning_rate": 9.29714472691697e-06, "loss": 116.2307, "step": 31400 }, { "epoch": 0.25983372626876783, "grad_norm": 1515.673095703125, "learning_rate": 9.296423498463396e-06, "loss": 125.331, "step": 31410 }, { "epoch": 0.2599164495181371, "grad_norm": 1273.73486328125, "learning_rate": 9.29570192816312e-06, "loss": 140.6214, "step": 31420 }, { "epoch": 0.2599991727675063, "grad_norm": 710.17236328125, "learning_rate": 9.29498001607356e-06, "loss": 108.083, "step": 31430 }, { "epoch": 0.26008189601687554, "grad_norm": 900.10107421875, "learning_rate": 9.294257762252148e-06, "loss": 98.9134, "step": 31440 }, { "epoch": 0.2601646192662448, "grad_norm": 875.4248657226562, "learning_rate": 9.293535166756356e-06, "loss": 174.0914, "step": 31450 }, { "epoch": 0.260247342515614, "grad_norm": 1023.4577026367188, "learning_rate": 9.292812229643674e-06, "loss": 96.2018, "step": 31460 }, { "epoch": 0.26033006576498324, "grad_norm": 1871.7161865234375, "learning_rate": 9.292088950971624e-06, "loss": 135.2347, "step": 31470 }, { "epoch": 0.2604127890143525, "grad_norm": 1158.376953125, "learning_rate": 9.291365330797755e-06, "loss": 131.9809, "step": 31480 }, { "epoch": 0.2604955122637217, "grad_norm": 962.1968383789062, "learning_rate": 9.290641369179643e-06, "loss": 109.7965, "step": 31490 }, { "epoch": 0.26057823551309095, "grad_norm": 4627.09521484375, "learning_rate": 9.289917066174887e-06, "loss": 133.032, "step": 31500 }, { "epoch": 0.2606609587624602, "grad_norm": 1069.57177734375, "learning_rate": 9.289192421841116e-06, "loss": 114.866, "step": 31510 }, { "epoch": 0.2607436820118294, "grad_norm": 1533.0517578125, "learning_rate": 9.288467436235992e-06, "loss": 135.5069, "step": 31520 }, { "epoch": 0.26082640526119866, "grad_norm": 876.2843627929688, "learning_rate": 9.287742109417194e-06, "loss": 165.9743, "step": 31530 }, { "epoch": 0.2609091285105679, "grad_norm": 849.820556640625, "learning_rate": 9.287016441442435e-06, "loss": 113.8865, "step": 31540 }, { "epoch": 0.2609918517599371, "grad_norm": 1214.044189453125, "learning_rate": 9.28629043236945e-06, "loss": 129.5201, "step": 31550 }, { "epoch": 0.26107457500930636, "grad_norm": 974.0680541992188, "learning_rate": 9.285564082256011e-06, "loss": 106.2931, "step": 31560 }, { "epoch": 0.2611572982586756, "grad_norm": 591.6702270507812, "learning_rate": 9.284837391159904e-06, "loss": 77.5611, "step": 31570 }, { "epoch": 0.2612400215080448, "grad_norm": 995.6359252929688, "learning_rate": 9.284110359138951e-06, "loss": 169.7267, "step": 31580 }, { "epoch": 0.26132274475741407, "grad_norm": 993.3253784179688, "learning_rate": 9.283382986250997e-06, "loss": 117.8098, "step": 31590 }, { "epoch": 0.2614054680067833, "grad_norm": 720.0477905273438, "learning_rate": 9.282655272553917e-06, "loss": 164.0745, "step": 31600 }, { "epoch": 0.2614881912561525, "grad_norm": 1000.5869750976562, "learning_rate": 9.281927218105613e-06, "loss": 103.8817, "step": 31610 }, { "epoch": 0.2615709145055218, "grad_norm": 701.7263793945312, "learning_rate": 9.281198822964011e-06, "loss": 115.0276, "step": 31620 }, { "epoch": 0.26165363775489103, "grad_norm": 1523.617919921875, "learning_rate": 9.280470087187066e-06, "loss": 150.8629, "step": 31630 }, { "epoch": 0.26173636100426023, "grad_norm": 1352.073486328125, "learning_rate": 9.279741010832761e-06, "loss": 111.8819, "step": 31640 }, { "epoch": 0.2618190842536295, "grad_norm": 784.3294677734375, "learning_rate": 9.279011593959107e-06, "loss": 134.8354, "step": 31650 }, { "epoch": 0.26190180750299874, "grad_norm": 518.8139038085938, "learning_rate": 9.278281836624137e-06, "loss": 109.1452, "step": 31660 }, { "epoch": 0.26198453075236794, "grad_norm": 738.8187255859375, "learning_rate": 9.277551738885915e-06, "loss": 137.3162, "step": 31670 }, { "epoch": 0.2620672540017372, "grad_norm": 1180.13037109375, "learning_rate": 9.276821300802535e-06, "loss": 101.0768, "step": 31680 }, { "epoch": 0.26214997725110645, "grad_norm": 1431.0516357421875, "learning_rate": 9.276090522432109e-06, "loss": 99.368, "step": 31690 }, { "epoch": 0.26223270050047565, "grad_norm": 688.6452026367188, "learning_rate": 9.275359403832787e-06, "loss": 123.8677, "step": 31700 }, { "epoch": 0.2623154237498449, "grad_norm": 854.5988159179688, "learning_rate": 9.274627945062738e-06, "loss": 111.6149, "step": 31710 }, { "epoch": 0.26239814699921415, "grad_norm": 1075.212158203125, "learning_rate": 9.27389614618016e-06, "loss": 138.4687, "step": 31720 }, { "epoch": 0.26248087024858335, "grad_norm": 573.2298583984375, "learning_rate": 9.273164007243281e-06, "loss": 108.3331, "step": 31730 }, { "epoch": 0.2625635934979526, "grad_norm": 990.3570556640625, "learning_rate": 9.272431528310354e-06, "loss": 112.5479, "step": 31740 }, { "epoch": 0.26264631674732186, "grad_norm": 1721.805419921875, "learning_rate": 9.271698709439658e-06, "loss": 140.3488, "step": 31750 }, { "epoch": 0.26272903999669106, "grad_norm": 1286.5728759765625, "learning_rate": 9.2709655506895e-06, "loss": 168.1867, "step": 31760 }, { "epoch": 0.2628117632460603, "grad_norm": 582.025146484375, "learning_rate": 9.270232052118214e-06, "loss": 119.0196, "step": 31770 }, { "epoch": 0.2628944864954295, "grad_norm": 1799.970703125, "learning_rate": 9.26949821378416e-06, "loss": 130.6446, "step": 31780 }, { "epoch": 0.26297720974479877, "grad_norm": 810.4905395507812, "learning_rate": 9.268764035745727e-06, "loss": 123.3437, "step": 31790 }, { "epoch": 0.263059932994168, "grad_norm": 1139.095947265625, "learning_rate": 9.268029518061335e-06, "loss": 138.0163, "step": 31800 }, { "epoch": 0.2631426562435372, "grad_norm": 728.7420654296875, "learning_rate": 9.267294660789417e-06, "loss": 118.9001, "step": 31810 }, { "epoch": 0.2632253794929065, "grad_norm": 937.4639282226562, "learning_rate": 9.26655946398845e-06, "loss": 101.5111, "step": 31820 }, { "epoch": 0.26330810274227573, "grad_norm": 951.8093872070312, "learning_rate": 9.265823927716927e-06, "loss": 114.6193, "step": 31830 }, { "epoch": 0.26339082599164493, "grad_norm": 1131.0379638671875, "learning_rate": 9.26508805203337e-06, "loss": 164.4967, "step": 31840 }, { "epoch": 0.2634735492410142, "grad_norm": 916.825439453125, "learning_rate": 9.264351836996332e-06, "loss": 99.9893, "step": 31850 }, { "epoch": 0.26355627249038344, "grad_norm": 781.8618774414062, "learning_rate": 9.26361528266439e-06, "loss": 147.4806, "step": 31860 }, { "epoch": 0.26363899573975264, "grad_norm": 1081.302001953125, "learning_rate": 9.262878389096147e-06, "loss": 107.9612, "step": 31870 }, { "epoch": 0.2637217189891219, "grad_norm": 1674.4130859375, "learning_rate": 9.262141156350233e-06, "loss": 120.8496, "step": 31880 }, { "epoch": 0.26380444223849114, "grad_norm": 599.2373046875, "learning_rate": 9.261403584485308e-06, "loss": 130.4039, "step": 31890 }, { "epoch": 0.26388716548786034, "grad_norm": 931.727783203125, "learning_rate": 9.260665673560058e-06, "loss": 94.2291, "step": 31900 }, { "epoch": 0.2639698887372296, "grad_norm": 695.3705444335938, "learning_rate": 9.259927423633193e-06, "loss": 173.953, "step": 31910 }, { "epoch": 0.26405261198659885, "grad_norm": 643.5379638671875, "learning_rate": 9.259188834763455e-06, "loss": 91.6798, "step": 31920 }, { "epoch": 0.26413533523596805, "grad_norm": 1540.7181396484375, "learning_rate": 9.258449907009607e-06, "loss": 126.6724, "step": 31930 }, { "epoch": 0.2642180584853373, "grad_norm": 1498.8092041015625, "learning_rate": 9.257710640430444e-06, "loss": 110.8607, "step": 31940 }, { "epoch": 0.26430078173470656, "grad_norm": 626.6985473632812, "learning_rate": 9.256971035084786e-06, "loss": 85.7513, "step": 31950 }, { "epoch": 0.26438350498407576, "grad_norm": 1363.011962890625, "learning_rate": 9.256231091031477e-06, "loss": 101.1794, "step": 31960 }, { "epoch": 0.264466228233445, "grad_norm": 1134.742919921875, "learning_rate": 9.255490808329397e-06, "loss": 212.7933, "step": 31970 }, { "epoch": 0.26454895148281427, "grad_norm": 1176.7347412109375, "learning_rate": 9.254750187037443e-06, "loss": 122.8415, "step": 31980 }, { "epoch": 0.26463167473218346, "grad_norm": 843.20458984375, "learning_rate": 9.254009227214543e-06, "loss": 115.774, "step": 31990 }, { "epoch": 0.2647143979815527, "grad_norm": 990.8587646484375, "learning_rate": 9.253267928919652e-06, "loss": 141.8495, "step": 32000 }, { "epoch": 0.264797121230922, "grad_norm": 1063.32763671875, "learning_rate": 9.25252629221175e-06, "loss": 118.555, "step": 32010 }, { "epoch": 0.26487984448029117, "grad_norm": 1893.1072998046875, "learning_rate": 9.251784317149848e-06, "loss": 122.1342, "step": 32020 }, { "epoch": 0.2649625677296604, "grad_norm": 1173.83349609375, "learning_rate": 9.251042003792983e-06, "loss": 161.0942, "step": 32030 }, { "epoch": 0.2650452909790297, "grad_norm": 927.9155883789062, "learning_rate": 9.250299352200214e-06, "loss": 78.0564, "step": 32040 }, { "epoch": 0.2651280142283989, "grad_norm": 1478.896484375, "learning_rate": 9.249556362430631e-06, "loss": 129.6906, "step": 32050 }, { "epoch": 0.26521073747776813, "grad_norm": 772.536865234375, "learning_rate": 9.248813034543353e-06, "loss": 102.5596, "step": 32060 }, { "epoch": 0.2652934607271374, "grad_norm": 728.9833984375, "learning_rate": 9.24806936859752e-06, "loss": 111.6626, "step": 32070 }, { "epoch": 0.2653761839765066, "grad_norm": 1042.043701171875, "learning_rate": 9.247325364652304e-06, "loss": 132.3886, "step": 32080 }, { "epoch": 0.26545890722587584, "grad_norm": 490.30419921875, "learning_rate": 9.2465810227669e-06, "loss": 128.4408, "step": 32090 }, { "epoch": 0.2655416304752451, "grad_norm": 1035.492919921875, "learning_rate": 9.245836343000534e-06, "loss": 102.5217, "step": 32100 }, { "epoch": 0.2656243537246143, "grad_norm": 1005.1189575195312, "learning_rate": 9.245091325412456e-06, "loss": 112.0046, "step": 32110 }, { "epoch": 0.26570707697398355, "grad_norm": 864.4721069335938, "learning_rate": 9.244345970061944e-06, "loss": 109.3595, "step": 32120 }, { "epoch": 0.26578980022335275, "grad_norm": 803.18896484375, "learning_rate": 9.243600277008301e-06, "loss": 123.4932, "step": 32130 }, { "epoch": 0.265872523472722, "grad_norm": 1339.6492919921875, "learning_rate": 9.24285424631086e-06, "loss": 131.0302, "step": 32140 }, { "epoch": 0.26595524672209125, "grad_norm": 331.6441650390625, "learning_rate": 9.242107878028978e-06, "loss": 84.0776, "step": 32150 }, { "epoch": 0.26603796997146045, "grad_norm": 1174.8720703125, "learning_rate": 9.241361172222043e-06, "loss": 144.0584, "step": 32160 }, { "epoch": 0.2661206932208297, "grad_norm": 955.9444580078125, "learning_rate": 9.240614128949463e-06, "loss": 79.2642, "step": 32170 }, { "epoch": 0.26620341647019896, "grad_norm": 408.5090637207031, "learning_rate": 9.239866748270679e-06, "loss": 80.5909, "step": 32180 }, { "epoch": 0.26628613971956816, "grad_norm": 1054.4498291015625, "learning_rate": 9.239119030245156e-06, "loss": 105.1165, "step": 32190 }, { "epoch": 0.2663688629689374, "grad_norm": 1652.845703125, "learning_rate": 9.238370974932387e-06, "loss": 144.1475, "step": 32200 }, { "epoch": 0.26645158621830667, "grad_norm": 897.6002807617188, "learning_rate": 9.23762258239189e-06, "loss": 85.8102, "step": 32210 }, { "epoch": 0.26653430946767587, "grad_norm": 848.9139404296875, "learning_rate": 9.236873852683213e-06, "loss": 107.4219, "step": 32220 }, { "epoch": 0.2666170327170451, "grad_norm": 900.8207397460938, "learning_rate": 9.23612478586593e-06, "loss": 77.9687, "step": 32230 }, { "epoch": 0.2666997559664144, "grad_norm": 1326.0458984375, "learning_rate": 9.235375381999636e-06, "loss": 106.4991, "step": 32240 }, { "epoch": 0.2667824792157836, "grad_norm": 2422.508056640625, "learning_rate": 9.234625641143962e-06, "loss": 138.341, "step": 32250 }, { "epoch": 0.26686520246515283, "grad_norm": 964.7281494140625, "learning_rate": 9.233875563358559e-06, "loss": 122.4212, "step": 32260 }, { "epoch": 0.2669479257145221, "grad_norm": 1445.0108642578125, "learning_rate": 9.23312514870311e-06, "loss": 107.632, "step": 32270 }, { "epoch": 0.2670306489638913, "grad_norm": 628.1731567382812, "learning_rate": 9.232374397237318e-06, "loss": 118.4748, "step": 32280 }, { "epoch": 0.26711337221326054, "grad_norm": 1020.5704345703125, "learning_rate": 9.231623309020922e-06, "loss": 132.8099, "step": 32290 }, { "epoch": 0.2671960954626298, "grad_norm": 949.1611938476562, "learning_rate": 9.230871884113679e-06, "loss": 128.9596, "step": 32300 }, { "epoch": 0.267278818711999, "grad_norm": 853.702880859375, "learning_rate": 9.230120122575376e-06, "loss": 117.7804, "step": 32310 }, { "epoch": 0.26736154196136824, "grad_norm": 856.49462890625, "learning_rate": 9.22936802446583e-06, "loss": 179.9062, "step": 32320 }, { "epoch": 0.2674442652107375, "grad_norm": 847.5012817382812, "learning_rate": 9.228615589844879e-06, "loss": 84.1749, "step": 32330 }, { "epoch": 0.2675269884601067, "grad_norm": 1034.2694091796875, "learning_rate": 9.227862818772392e-06, "loss": 131.7186, "step": 32340 }, { "epoch": 0.26760971170947595, "grad_norm": 1057.1470947265625, "learning_rate": 9.227109711308265e-06, "loss": 94.973, "step": 32350 }, { "epoch": 0.2676924349588452, "grad_norm": 374.79473876953125, "learning_rate": 9.226356267512417e-06, "loss": 107.7693, "step": 32360 }, { "epoch": 0.2677751582082144, "grad_norm": 817.5911865234375, "learning_rate": 9.225602487444799e-06, "loss": 107.2883, "step": 32370 }, { "epoch": 0.26785788145758366, "grad_norm": 1157.15234375, "learning_rate": 9.224848371165382e-06, "loss": 170.9429, "step": 32380 }, { "epoch": 0.2679406047069529, "grad_norm": 1658.9010009765625, "learning_rate": 9.224093918734172e-06, "loss": 202.5666, "step": 32390 }, { "epoch": 0.2680233279563221, "grad_norm": 1403.2574462890625, "learning_rate": 9.223339130211194e-06, "loss": 113.1494, "step": 32400 }, { "epoch": 0.26810605120569136, "grad_norm": 939.0480346679688, "learning_rate": 9.222584005656501e-06, "loss": 114.9759, "step": 32410 }, { "epoch": 0.2681887744550606, "grad_norm": 578.2918701171875, "learning_rate": 9.22182854513018e-06, "loss": 94.9374, "step": 32420 }, { "epoch": 0.2682714977044298, "grad_norm": 2604.21484375, "learning_rate": 9.221072748692336e-06, "loss": 109.7514, "step": 32430 }, { "epoch": 0.26835422095379907, "grad_norm": 962.8878173828125, "learning_rate": 9.220316616403109e-06, "loss": 104.4484, "step": 32440 }, { "epoch": 0.2684369442031683, "grad_norm": 759.3588256835938, "learning_rate": 9.219560148322655e-06, "loss": 101.3538, "step": 32450 }, { "epoch": 0.2685196674525375, "grad_norm": 1004.7863159179688, "learning_rate": 9.218803344511165e-06, "loss": 134.46, "step": 32460 }, { "epoch": 0.2686023907019068, "grad_norm": 1546.0279541015625, "learning_rate": 9.218046205028854e-06, "loss": 102.795, "step": 32470 }, { "epoch": 0.26868511395127603, "grad_norm": 422.1183776855469, "learning_rate": 9.217288729935966e-06, "loss": 100.5324, "step": 32480 }, { "epoch": 0.26876783720064523, "grad_norm": 953.4854736328125, "learning_rate": 9.216530919292768e-06, "loss": 147.8428, "step": 32490 }, { "epoch": 0.2688505604500145, "grad_norm": 910.6680297851562, "learning_rate": 9.215772773159556e-06, "loss": 138.0076, "step": 32500 }, { "epoch": 0.2689332836993837, "grad_norm": 1041.43505859375, "learning_rate": 9.215014291596653e-06, "loss": 120.8348, "step": 32510 }, { "epoch": 0.26901600694875294, "grad_norm": 1151.9285888671875, "learning_rate": 9.214255474664405e-06, "loss": 150.9121, "step": 32520 }, { "epoch": 0.2690987301981222, "grad_norm": 921.2622680664062, "learning_rate": 9.213496322423193e-06, "loss": 83.8476, "step": 32530 }, { "epoch": 0.2691814534474914, "grad_norm": 411.46826171875, "learning_rate": 9.212736834933413e-06, "loss": 129.1243, "step": 32540 }, { "epoch": 0.26926417669686065, "grad_norm": 3256.450927734375, "learning_rate": 9.211977012255497e-06, "loss": 87.7072, "step": 32550 }, { "epoch": 0.2693468999462299, "grad_norm": 1524.965087890625, "learning_rate": 9.211216854449903e-06, "loss": 128.8632, "step": 32560 }, { "epoch": 0.2694296231955991, "grad_norm": 953.8908081054688, "learning_rate": 9.210456361577109e-06, "loss": 113.7588, "step": 32570 }, { "epoch": 0.26951234644496835, "grad_norm": 1008.9974975585938, "learning_rate": 9.209695533697624e-06, "loss": 122.1501, "step": 32580 }, { "epoch": 0.2695950696943376, "grad_norm": 805.004150390625, "learning_rate": 9.208934370871989e-06, "loss": 113.7434, "step": 32590 }, { "epoch": 0.2696777929437068, "grad_norm": 636.7761840820312, "learning_rate": 9.20817287316076e-06, "loss": 113.1953, "step": 32600 }, { "epoch": 0.26976051619307606, "grad_norm": 763.083984375, "learning_rate": 9.20741104062453e-06, "loss": 101.9306, "step": 32610 }, { "epoch": 0.2698432394424453, "grad_norm": 1067.39208984375, "learning_rate": 9.206648873323912e-06, "loss": 104.6595, "step": 32620 }, { "epoch": 0.2699259626918145, "grad_norm": 1282.15576171875, "learning_rate": 9.205886371319548e-06, "loss": 112.2921, "step": 32630 }, { "epoch": 0.27000868594118377, "grad_norm": 1269.3165283203125, "learning_rate": 9.20512353467211e-06, "loss": 101.4175, "step": 32640 }, { "epoch": 0.270091409190553, "grad_norm": 1416.7222900390625, "learning_rate": 9.204360363442288e-06, "loss": 94.3014, "step": 32650 }, { "epoch": 0.2701741324399222, "grad_norm": 577.2730712890625, "learning_rate": 9.20359685769081e-06, "loss": 178.5912, "step": 32660 }, { "epoch": 0.2702568556892915, "grad_norm": 976.02880859375, "learning_rate": 9.202833017478421e-06, "loss": 142.0586, "step": 32670 }, { "epoch": 0.27033957893866073, "grad_norm": 1126.535888671875, "learning_rate": 9.2020688428659e-06, "loss": 100.6238, "step": 32680 }, { "epoch": 0.27042230218802993, "grad_norm": 945.1123657226562, "learning_rate": 9.201304333914042e-06, "loss": 124.4394, "step": 32690 }, { "epoch": 0.2705050254373992, "grad_norm": 600.9089965820312, "learning_rate": 9.200539490683682e-06, "loss": 93.1028, "step": 32700 }, { "epoch": 0.27058774868676844, "grad_norm": 704.4324951171875, "learning_rate": 9.19977431323567e-06, "loss": 124.6708, "step": 32710 }, { "epoch": 0.27067047193613764, "grad_norm": 938.8729858398438, "learning_rate": 9.199008801630893e-06, "loss": 119.1146, "step": 32720 }, { "epoch": 0.2707531951855069, "grad_norm": 1016.2319946289062, "learning_rate": 9.198242955930257e-06, "loss": 126.7218, "step": 32730 }, { "epoch": 0.27083591843487614, "grad_norm": 832.0853881835938, "learning_rate": 9.197476776194693e-06, "loss": 102.6724, "step": 32740 }, { "epoch": 0.27091864168424534, "grad_norm": 1260.37548828125, "learning_rate": 9.196710262485168e-06, "loss": 107.5099, "step": 32750 }, { "epoch": 0.2710013649336146, "grad_norm": 840.14990234375, "learning_rate": 9.195943414862667e-06, "loss": 124.9764, "step": 32760 }, { "epoch": 0.27108408818298385, "grad_norm": 486.6063537597656, "learning_rate": 9.195176233388206e-06, "loss": 92.0499, "step": 32770 }, { "epoch": 0.27116681143235305, "grad_norm": 907.8724975585938, "learning_rate": 9.194408718122825e-06, "loss": 120.6719, "step": 32780 }, { "epoch": 0.2712495346817223, "grad_norm": 572.4683837890625, "learning_rate": 9.193640869127592e-06, "loss": 124.2721, "step": 32790 }, { "epoch": 0.27133225793109156, "grad_norm": 957.0466918945312, "learning_rate": 9.192872686463601e-06, "loss": 131.9941, "step": 32800 }, { "epoch": 0.27141498118046076, "grad_norm": 691.4140014648438, "learning_rate": 9.192104170191973e-06, "loss": 101.0082, "step": 32810 }, { "epoch": 0.27149770442983, "grad_norm": 791.294677734375, "learning_rate": 9.191335320373856e-06, "loss": 138.9451, "step": 32820 }, { "epoch": 0.27158042767919927, "grad_norm": 1051.594482421875, "learning_rate": 9.190566137070422e-06, "loss": 107.2597, "step": 32830 }, { "epoch": 0.27166315092856846, "grad_norm": 1549.462158203125, "learning_rate": 9.189796620342875e-06, "loss": 153.74, "step": 32840 }, { "epoch": 0.2717458741779377, "grad_norm": 751.6979370117188, "learning_rate": 9.189026770252437e-06, "loss": 108.3263, "step": 32850 }, { "epoch": 0.2718285974273069, "grad_norm": 4171.58203125, "learning_rate": 9.188256586860365e-06, "loss": 177.8506, "step": 32860 }, { "epoch": 0.27191132067667617, "grad_norm": 880.1036987304688, "learning_rate": 9.187486070227938e-06, "loss": 124.1478, "step": 32870 }, { "epoch": 0.2719940439260454, "grad_norm": 875.8194580078125, "learning_rate": 9.186715220416463e-06, "loss": 80.9601, "step": 32880 }, { "epoch": 0.2720767671754146, "grad_norm": 517.209716796875, "learning_rate": 9.185944037487271e-06, "loss": 105.6458, "step": 32890 }, { "epoch": 0.2721594904247839, "grad_norm": 783.9718627929688, "learning_rate": 9.185172521501723e-06, "loss": 103.928, "step": 32900 }, { "epoch": 0.27224221367415313, "grad_norm": 809.2305297851562, "learning_rate": 9.184400672521204e-06, "loss": 119.6438, "step": 32910 }, { "epoch": 0.27232493692352233, "grad_norm": 847.0447998046875, "learning_rate": 9.183628490607129e-06, "loss": 118.6409, "step": 32920 }, { "epoch": 0.2724076601728916, "grad_norm": 803.6466674804688, "learning_rate": 9.182855975820934e-06, "loss": 86.8706, "step": 32930 }, { "epoch": 0.27249038342226084, "grad_norm": 1112.55322265625, "learning_rate": 9.182083128224086e-06, "loss": 108.3938, "step": 32940 }, { "epoch": 0.27257310667163004, "grad_norm": 758.2708740234375, "learning_rate": 9.181309947878077e-06, "loss": 136.1542, "step": 32950 }, { "epoch": 0.2726558299209993, "grad_norm": 655.5614013671875, "learning_rate": 9.180536434844426e-06, "loss": 93.3358, "step": 32960 }, { "epoch": 0.27273855317036855, "grad_norm": 746.388671875, "learning_rate": 9.179762589184676e-06, "loss": 137.545, "step": 32970 }, { "epoch": 0.27282127641973775, "grad_norm": 919.367431640625, "learning_rate": 9.1789884109604e-06, "loss": 105.9232, "step": 32980 }, { "epoch": 0.272903999669107, "grad_norm": 1165.487548828125, "learning_rate": 9.178213900233193e-06, "loss": 119.8975, "step": 32990 }, { "epoch": 0.27298672291847625, "grad_norm": 1274.4398193359375, "learning_rate": 9.177439057064684e-06, "loss": 108.2796, "step": 33000 }, { "epoch": 0.27306944616784545, "grad_norm": 2099.3134765625, "learning_rate": 9.17666388151652e-06, "loss": 88.9919, "step": 33010 }, { "epoch": 0.2731521694172147, "grad_norm": 1373.3428955078125, "learning_rate": 9.175888373650377e-06, "loss": 109.9396, "step": 33020 }, { "epoch": 0.27323489266658396, "grad_norm": 1429.390869140625, "learning_rate": 9.175112533527963e-06, "loss": 94.565, "step": 33030 }, { "epoch": 0.27331761591595316, "grad_norm": 1118.853271484375, "learning_rate": 9.174336361211007e-06, "loss": 101.014, "step": 33040 }, { "epoch": 0.2734003391653224, "grad_norm": 2152.85107421875, "learning_rate": 9.173559856761262e-06, "loss": 153.9467, "step": 33050 }, { "epoch": 0.27348306241469167, "grad_norm": 909.071533203125, "learning_rate": 9.172783020240514e-06, "loss": 102.0454, "step": 33060 }, { "epoch": 0.27356578566406087, "grad_norm": 890.9006958007812, "learning_rate": 9.172005851710573e-06, "loss": 130.9717, "step": 33070 }, { "epoch": 0.2736485089134301, "grad_norm": 1000.61279296875, "learning_rate": 9.171228351233272e-06, "loss": 150.3027, "step": 33080 }, { "epoch": 0.2737312321627994, "grad_norm": 1430.8470458984375, "learning_rate": 9.170450518870475e-06, "loss": 149.3742, "step": 33090 }, { "epoch": 0.2738139554121686, "grad_norm": 1026.9654541015625, "learning_rate": 9.169672354684069e-06, "loss": 123.5882, "step": 33100 }, { "epoch": 0.27389667866153783, "grad_norm": 841.4974975585938, "learning_rate": 9.168893858735972e-06, "loss": 92.7002, "step": 33110 }, { "epoch": 0.2739794019109071, "grad_norm": 1173.48681640625, "learning_rate": 9.168115031088122e-06, "loss": 89.6682, "step": 33120 }, { "epoch": 0.2740621251602763, "grad_norm": 733.5807495117188, "learning_rate": 9.167335871802488e-06, "loss": 86.3547, "step": 33130 }, { "epoch": 0.27414484840964554, "grad_norm": 1434.056640625, "learning_rate": 9.166556380941063e-06, "loss": 125.5328, "step": 33140 }, { "epoch": 0.2742275716590148, "grad_norm": 820.1240844726562, "learning_rate": 9.16577655856587e-06, "loss": 130.4958, "step": 33150 }, { "epoch": 0.274310294908384, "grad_norm": 840.7511596679688, "learning_rate": 9.164996404738955e-06, "loss": 160.6511, "step": 33160 }, { "epoch": 0.27439301815775324, "grad_norm": 915.5698852539062, "learning_rate": 9.16421591952239e-06, "loss": 127.9161, "step": 33170 }, { "epoch": 0.2744757414071225, "grad_norm": 1336.1126708984375, "learning_rate": 9.163435102978276e-06, "loss": 122.7304, "step": 33180 }, { "epoch": 0.2745584646564917, "grad_norm": 982.8937377929688, "learning_rate": 9.162653955168739e-06, "loss": 118.9783, "step": 33190 }, { "epoch": 0.27464118790586095, "grad_norm": 756.2153930664062, "learning_rate": 9.161872476155929e-06, "loss": 101.5269, "step": 33200 }, { "epoch": 0.2747239111552302, "grad_norm": 732.1403198242188, "learning_rate": 9.161090666002029e-06, "loss": 124.4535, "step": 33210 }, { "epoch": 0.2748066344045994, "grad_norm": 753.6777954101562, "learning_rate": 9.16030852476924e-06, "loss": 109.3473, "step": 33220 }, { "epoch": 0.27488935765396866, "grad_norm": 822.023681640625, "learning_rate": 9.159526052519794e-06, "loss": 120.7444, "step": 33230 }, { "epoch": 0.27497208090333786, "grad_norm": 925.0752563476562, "learning_rate": 9.15874324931595e-06, "loss": 100.8344, "step": 33240 }, { "epoch": 0.2750548041527071, "grad_norm": 686.9509887695312, "learning_rate": 9.157960115219993e-06, "loss": 113.704, "step": 33250 }, { "epoch": 0.27513752740207637, "grad_norm": 996.56787109375, "learning_rate": 9.157176650294231e-06, "loss": 133.5279, "step": 33260 }, { "epoch": 0.27522025065144556, "grad_norm": 728.7750244140625, "learning_rate": 9.156392854601001e-06, "loss": 133.3526, "step": 33270 }, { "epoch": 0.2753029739008148, "grad_norm": 1302.504638671875, "learning_rate": 9.155608728202669e-06, "loss": 113.5402, "step": 33280 }, { "epoch": 0.2753856971501841, "grad_norm": 857.5109252929688, "learning_rate": 9.154824271161621e-06, "loss": 83.4826, "step": 33290 }, { "epoch": 0.27546842039955327, "grad_norm": 910.7625122070312, "learning_rate": 9.154039483540273e-06, "loss": 110.0397, "step": 33300 }, { "epoch": 0.2755511436489225, "grad_norm": 529.6849365234375, "learning_rate": 9.153254365401069e-06, "loss": 125.4888, "step": 33310 }, { "epoch": 0.2756338668982918, "grad_norm": 879.7425537109375, "learning_rate": 9.152468916806477e-06, "loss": 103.2796, "step": 33320 }, { "epoch": 0.275716590147661, "grad_norm": 1299.931884765625, "learning_rate": 9.151683137818989e-06, "loss": 119.4664, "step": 33330 }, { "epoch": 0.27579931339703023, "grad_norm": 698.8023681640625, "learning_rate": 9.150897028501126e-06, "loss": 100.8363, "step": 33340 }, { "epoch": 0.2758820366463995, "grad_norm": 1195.03466796875, "learning_rate": 9.15011058891544e-06, "loss": 119.996, "step": 33350 }, { "epoch": 0.2759647598957687, "grad_norm": 1072.2655029296875, "learning_rate": 9.149323819124498e-06, "loss": 113.2403, "step": 33360 }, { "epoch": 0.27604748314513794, "grad_norm": 897.378662109375, "learning_rate": 9.148536719190904e-06, "loss": 131.1827, "step": 33370 }, { "epoch": 0.2761302063945072, "grad_norm": 900.53955078125, "learning_rate": 9.147749289177282e-06, "loss": 141.6734, "step": 33380 }, { "epoch": 0.2762129296438764, "grad_norm": 809.6741333007812, "learning_rate": 9.146961529146285e-06, "loss": 99.829, "step": 33390 }, { "epoch": 0.27629565289324565, "grad_norm": 757.1586303710938, "learning_rate": 9.146173439160591e-06, "loss": 117.2545, "step": 33400 }, { "epoch": 0.2763783761426149, "grad_norm": 763.6544799804688, "learning_rate": 9.145385019282904e-06, "loss": 135.9243, "step": 33410 }, { "epoch": 0.2764610993919841, "grad_norm": 1427.904296875, "learning_rate": 9.144596269575957e-06, "loss": 125.229, "step": 33420 }, { "epoch": 0.27654382264135335, "grad_norm": 1012.0225219726562, "learning_rate": 9.143807190102504e-06, "loss": 126.2279, "step": 33430 }, { "epoch": 0.2766265458907226, "grad_norm": 2056.858154296875, "learning_rate": 9.143017780925331e-06, "loss": 153.6504, "step": 33440 }, { "epoch": 0.2767092691400918, "grad_norm": 1273.417236328125, "learning_rate": 9.142228042107248e-06, "loss": 109.7093, "step": 33450 }, { "epoch": 0.27679199238946106, "grad_norm": 1077.05126953125, "learning_rate": 9.141437973711092e-06, "loss": 106.108, "step": 33460 }, { "epoch": 0.2768747156388303, "grad_norm": 1240.419189453125, "learning_rate": 9.14064757579972e-06, "loss": 95.5216, "step": 33470 }, { "epoch": 0.2769574388881995, "grad_norm": 1002.42724609375, "learning_rate": 9.139856848436023e-06, "loss": 117.1653, "step": 33480 }, { "epoch": 0.27704016213756877, "grad_norm": 1525.692626953125, "learning_rate": 9.139065791682916e-06, "loss": 161.3095, "step": 33490 }, { "epoch": 0.277122885386938, "grad_norm": 773.1032104492188, "learning_rate": 9.138274405603342e-06, "loss": 168.1776, "step": 33500 }, { "epoch": 0.2772056086363072, "grad_norm": 768.6693115234375, "learning_rate": 9.137482690260265e-06, "loss": 156.8843, "step": 33510 }, { "epoch": 0.2772883318856765, "grad_norm": 307.3286437988281, "learning_rate": 9.13669064571668e-06, "loss": 109.4442, "step": 33520 }, { "epoch": 0.27737105513504573, "grad_norm": 3717.694580078125, "learning_rate": 9.135898272035601e-06, "loss": 167.7826, "step": 33530 }, { "epoch": 0.27745377838441493, "grad_norm": 986.0308227539062, "learning_rate": 9.13510556928008e-06, "loss": 87.5478, "step": 33540 }, { "epoch": 0.2775365016337842, "grad_norm": 652.1424560546875, "learning_rate": 9.134312537513188e-06, "loss": 106.2764, "step": 33550 }, { "epoch": 0.27761922488315344, "grad_norm": 720.5858154296875, "learning_rate": 9.133519176798021e-06, "loss": 152.1906, "step": 33560 }, { "epoch": 0.27770194813252264, "grad_norm": 1833.9097900390625, "learning_rate": 9.132725487197701e-06, "loss": 116.5092, "step": 33570 }, { "epoch": 0.2777846713818919, "grad_norm": 1374.428955078125, "learning_rate": 9.131931468775382e-06, "loss": 132.6865, "step": 33580 }, { "epoch": 0.2778673946312611, "grad_norm": 724.4324340820312, "learning_rate": 9.131137121594239e-06, "loss": 127.3931, "step": 33590 }, { "epoch": 0.27795011788063034, "grad_norm": 594.5848999023438, "learning_rate": 9.130342445717474e-06, "loss": 117.9793, "step": 33600 }, { "epoch": 0.2780328411299996, "grad_norm": 1347.4599609375, "learning_rate": 9.129547441208317e-06, "loss": 123.5553, "step": 33610 }, { "epoch": 0.2781155643793688, "grad_norm": 825.2183837890625, "learning_rate": 9.128752108130022e-06, "loss": 109.409, "step": 33620 }, { "epoch": 0.27819828762873805, "grad_norm": 952.2738037109375, "learning_rate": 9.12795644654587e-06, "loss": 149.2746, "step": 33630 }, { "epoch": 0.2782810108781073, "grad_norm": 6804.1904296875, "learning_rate": 9.127160456519168e-06, "loss": 132.9842, "step": 33640 }, { "epoch": 0.2783637341274765, "grad_norm": 953.0177612304688, "learning_rate": 9.126364138113251e-06, "loss": 119.1077, "step": 33650 }, { "epoch": 0.27844645737684576, "grad_norm": 771.2464599609375, "learning_rate": 9.125567491391476e-06, "loss": 117.123, "step": 33660 }, { "epoch": 0.278529180626215, "grad_norm": 1822.6439208984375, "learning_rate": 9.12477051641723e-06, "loss": 151.1186, "step": 33670 }, { "epoch": 0.2786119038755842, "grad_norm": 872.4586791992188, "learning_rate": 9.123973213253923e-06, "loss": 112.0873, "step": 33680 }, { "epoch": 0.27869462712495346, "grad_norm": 494.63134765625, "learning_rate": 9.123175581964995e-06, "loss": 100.9059, "step": 33690 }, { "epoch": 0.2787773503743227, "grad_norm": 959.332275390625, "learning_rate": 9.122377622613909e-06, "loss": 106.5335, "step": 33700 }, { "epoch": 0.2788600736236919, "grad_norm": 886.007568359375, "learning_rate": 9.121579335264155e-06, "loss": 128.5588, "step": 33710 }, { "epoch": 0.27894279687306117, "grad_norm": 1174.72265625, "learning_rate": 9.120780719979248e-06, "loss": 92.5889, "step": 33720 }, { "epoch": 0.2790255201224304, "grad_norm": 2207.4111328125, "learning_rate": 9.11998177682273e-06, "loss": 132.5646, "step": 33730 }, { "epoch": 0.2791082433717996, "grad_norm": 730.7549438476562, "learning_rate": 9.11918250585817e-06, "loss": 82.5129, "step": 33740 }, { "epoch": 0.2791909666211689, "grad_norm": 876.688232421875, "learning_rate": 9.118382907149164e-06, "loss": 109.9412, "step": 33750 }, { "epoch": 0.27927368987053813, "grad_norm": 831.474609375, "learning_rate": 9.117582980759332e-06, "loss": 124.5468, "step": 33760 }, { "epoch": 0.27935641311990733, "grad_norm": 820.8474731445312, "learning_rate": 9.116782726752317e-06, "loss": 126.4644, "step": 33770 }, { "epoch": 0.2794391363692766, "grad_norm": 921.0737915039062, "learning_rate": 9.115982145191796e-06, "loss": 116.8273, "step": 33780 }, { "epoch": 0.27952185961864584, "grad_norm": 1426.6785888671875, "learning_rate": 9.115181236141463e-06, "loss": 127.0457, "step": 33790 }, { "epoch": 0.27960458286801504, "grad_norm": 1093.164794921875, "learning_rate": 9.114379999665047e-06, "loss": 85.9147, "step": 33800 }, { "epoch": 0.2796873061173843, "grad_norm": 1163.352783203125, "learning_rate": 9.113578435826295e-06, "loss": 140.9147, "step": 33810 }, { "epoch": 0.27977002936675355, "grad_norm": 412.13787841796875, "learning_rate": 9.112776544688988e-06, "loss": 127.3203, "step": 33820 }, { "epoch": 0.27985275261612275, "grad_norm": 785.0323486328125, "learning_rate": 9.111974326316926e-06, "loss": 103.5417, "step": 33830 }, { "epoch": 0.279935475865492, "grad_norm": 1045.812255859375, "learning_rate": 9.111171780773938e-06, "loss": 120.1187, "step": 33840 }, { "epoch": 0.28001819911486125, "grad_norm": 916.0906372070312, "learning_rate": 9.110368908123878e-06, "loss": 139.0241, "step": 33850 }, { "epoch": 0.28010092236423045, "grad_norm": 1398.31982421875, "learning_rate": 9.10956570843063e-06, "loss": 132.5252, "step": 33860 }, { "epoch": 0.2801836456135997, "grad_norm": 1527.2587890625, "learning_rate": 9.108762181758096e-06, "loss": 130.4855, "step": 33870 }, { "epoch": 0.28026636886296896, "grad_norm": 751.3243408203125, "learning_rate": 9.107958328170215e-06, "loss": 106.5942, "step": 33880 }, { "epoch": 0.28034909211233816, "grad_norm": 1024.8853759765625, "learning_rate": 9.10715414773094e-06, "loss": 109.6669, "step": 33890 }, { "epoch": 0.2804318153617074, "grad_norm": 1099.5294189453125, "learning_rate": 9.10634964050426e-06, "loss": 119.2499, "step": 33900 }, { "epoch": 0.28051453861107667, "grad_norm": 775.4506225585938, "learning_rate": 9.105544806554184e-06, "loss": 157.9587, "step": 33910 }, { "epoch": 0.28059726186044587, "grad_norm": 1175.566162109375, "learning_rate": 9.104739645944752e-06, "loss": 179.6702, "step": 33920 }, { "epoch": 0.2806799851098151, "grad_norm": 1368.4671630859375, "learning_rate": 9.103934158740023e-06, "loss": 129.1513, "step": 33930 }, { "epoch": 0.2807627083591844, "grad_norm": 1385.238037109375, "learning_rate": 9.10312834500409e-06, "loss": 150.3397, "step": 33940 }, { "epoch": 0.2808454316085536, "grad_norm": 1050.946044921875, "learning_rate": 9.102322204801062e-06, "loss": 118.2614, "step": 33950 }, { "epoch": 0.28092815485792283, "grad_norm": 2881.64013671875, "learning_rate": 9.101515738195084e-06, "loss": 100.6495, "step": 33960 }, { "epoch": 0.28101087810729203, "grad_norm": 394.7975769042969, "learning_rate": 9.100708945250322e-06, "loss": 81.3734, "step": 33970 }, { "epoch": 0.2810936013566613, "grad_norm": 1190.456298828125, "learning_rate": 9.099901826030969e-06, "loss": 130.64, "step": 33980 }, { "epoch": 0.28117632460603054, "grad_norm": 1374.1256103515625, "learning_rate": 9.099094380601244e-06, "loss": 119.4305, "step": 33990 }, { "epoch": 0.28125904785539974, "grad_norm": 1412.7530517578125, "learning_rate": 9.098286609025392e-06, "loss": 106.0938, "step": 34000 }, { "epoch": 0.281341771104769, "grad_norm": 617.6422119140625, "learning_rate": 9.097478511367682e-06, "loss": 119.852, "step": 34010 }, { "epoch": 0.28142449435413824, "grad_norm": 857.2781982421875, "learning_rate": 9.096670087692413e-06, "loss": 168.8287, "step": 34020 }, { "epoch": 0.28150721760350744, "grad_norm": 0.0, "learning_rate": 9.095861338063906e-06, "loss": 99.5146, "step": 34030 }, { "epoch": 0.2815899408528767, "grad_norm": 1164.4888916015625, "learning_rate": 9.09505226254651e-06, "loss": 139.5455, "step": 34040 }, { "epoch": 0.28167266410224595, "grad_norm": 1813.4522705078125, "learning_rate": 9.094242861204598e-06, "loss": 153.4502, "step": 34050 }, { "epoch": 0.28175538735161515, "grad_norm": 918.6724243164062, "learning_rate": 9.093433134102572e-06, "loss": 95.0513, "step": 34060 }, { "epoch": 0.2818381106009844, "grad_norm": 758.8914794921875, "learning_rate": 9.09262308130486e-06, "loss": 127.6234, "step": 34070 }, { "epoch": 0.28192083385035366, "grad_norm": 1214.897705078125, "learning_rate": 9.091812702875908e-06, "loss": 135.7131, "step": 34080 }, { "epoch": 0.28200355709972286, "grad_norm": 1762.4208984375, "learning_rate": 9.0910019988802e-06, "loss": 116.575, "step": 34090 }, { "epoch": 0.2820862803490921, "grad_norm": 1027.57958984375, "learning_rate": 9.09019096938224e-06, "loss": 115.0406, "step": 34100 }, { "epoch": 0.28216900359846137, "grad_norm": 549.1436157226562, "learning_rate": 9.089379614446554e-06, "loss": 101.1465, "step": 34110 }, { "epoch": 0.28225172684783056, "grad_norm": 1149.4486083984375, "learning_rate": 9.0885679341377e-06, "loss": 111.7543, "step": 34120 }, { "epoch": 0.2823344500971998, "grad_norm": 827.5548095703125, "learning_rate": 9.08775592852026e-06, "loss": 84.8386, "step": 34130 }, { "epoch": 0.2824171733465691, "grad_norm": 2282.6884765625, "learning_rate": 9.08694359765884e-06, "loss": 122.2059, "step": 34140 }, { "epoch": 0.28249989659593827, "grad_norm": 1563.5760498046875, "learning_rate": 9.086130941618075e-06, "loss": 127.1692, "step": 34150 }, { "epoch": 0.2825826198453075, "grad_norm": 546.21337890625, "learning_rate": 9.085317960462625e-06, "loss": 91.0043, "step": 34160 }, { "epoch": 0.2826653430946768, "grad_norm": 1092.4716796875, "learning_rate": 9.084504654257173e-06, "loss": 126.0462, "step": 34170 }, { "epoch": 0.282748066344046, "grad_norm": 1006.257568359375, "learning_rate": 9.08369102306643e-06, "loss": 104.315, "step": 34180 }, { "epoch": 0.28283078959341523, "grad_norm": 1649.3040771484375, "learning_rate": 9.082877066955135e-06, "loss": 101.4608, "step": 34190 }, { "epoch": 0.2829135128427845, "grad_norm": 1023.9491577148438, "learning_rate": 9.08206278598805e-06, "loss": 133.0885, "step": 34200 }, { "epoch": 0.2829962360921537, "grad_norm": 1013.92041015625, "learning_rate": 9.081248180229963e-06, "loss": 93.8945, "step": 34210 }, { "epoch": 0.28307895934152294, "grad_norm": 655.7772827148438, "learning_rate": 9.080433249745688e-06, "loss": 104.3141, "step": 34220 }, { "epoch": 0.2831616825908922, "grad_norm": 1191.8131103515625, "learning_rate": 9.079617994600066e-06, "loss": 147.7726, "step": 34230 }, { "epoch": 0.2832444058402614, "grad_norm": 628.782470703125, "learning_rate": 9.078802414857963e-06, "loss": 96.9786, "step": 34240 }, { "epoch": 0.28332712908963065, "grad_norm": 695.0119018554688, "learning_rate": 9.077986510584273e-06, "loss": 111.2695, "step": 34250 }, { "epoch": 0.2834098523389999, "grad_norm": 1148.8243408203125, "learning_rate": 9.07717028184391e-06, "loss": 104.8876, "step": 34260 }, { "epoch": 0.2834925755883691, "grad_norm": 766.49072265625, "learning_rate": 9.07635372870182e-06, "loss": 84.2609, "step": 34270 }, { "epoch": 0.28357529883773835, "grad_norm": 1150.672119140625, "learning_rate": 9.07553685122297e-06, "loss": 146.0944, "step": 34280 }, { "epoch": 0.2836580220871076, "grad_norm": 1328.8944091796875, "learning_rate": 9.074719649472358e-06, "loss": 115.0404, "step": 34290 }, { "epoch": 0.2837407453364768, "grad_norm": 1080.066650390625, "learning_rate": 9.073902123515005e-06, "loss": 144.9194, "step": 34300 }, { "epoch": 0.28382346858584606, "grad_norm": 676.450439453125, "learning_rate": 9.073084273415956e-06, "loss": 79.431, "step": 34310 }, { "epoch": 0.28390619183521526, "grad_norm": 1445.7745361328125, "learning_rate": 9.072266099240286e-06, "loss": 99.8113, "step": 34320 }, { "epoch": 0.2839889150845845, "grad_norm": 739.0885009765625, "learning_rate": 9.07144760105309e-06, "loss": 142.6431, "step": 34330 }, { "epoch": 0.28407163833395377, "grad_norm": 920.0739135742188, "learning_rate": 9.070628778919493e-06, "loss": 93.1577, "step": 34340 }, { "epoch": 0.28415436158332297, "grad_norm": 1188.843017578125, "learning_rate": 9.069809632904647e-06, "loss": 111.0898, "step": 34350 }, { "epoch": 0.2842370848326922, "grad_norm": 991.0109252929688, "learning_rate": 9.068990163073726e-06, "loss": 87.4462, "step": 34360 }, { "epoch": 0.2843198080820615, "grad_norm": 845.0187377929688, "learning_rate": 9.068170369491932e-06, "loss": 95.4965, "step": 34370 }, { "epoch": 0.2844025313314307, "grad_norm": 861.9247436523438, "learning_rate": 9.067350252224491e-06, "loss": 76.6258, "step": 34380 }, { "epoch": 0.28448525458079993, "grad_norm": 1371.8582763671875, "learning_rate": 9.066529811336658e-06, "loss": 117.6796, "step": 34390 }, { "epoch": 0.2845679778301692, "grad_norm": 909.2398681640625, "learning_rate": 9.06570904689371e-06, "loss": 96.8476, "step": 34400 }, { "epoch": 0.2846507010795384, "grad_norm": 941.552978515625, "learning_rate": 9.064887958960953e-06, "loss": 89.4982, "step": 34410 }, { "epoch": 0.28473342432890764, "grad_norm": 2689.89404296875, "learning_rate": 9.064066547603716e-06, "loss": 116.0555, "step": 34420 }, { "epoch": 0.2848161475782769, "grad_norm": 1277.172607421875, "learning_rate": 9.063244812887357e-06, "loss": 111.9663, "step": 34430 }, { "epoch": 0.2848988708276461, "grad_norm": 872.6428833007812, "learning_rate": 9.062422754877253e-06, "loss": 124.443, "step": 34440 }, { "epoch": 0.28498159407701534, "grad_norm": 698.310546875, "learning_rate": 9.061600373638816e-06, "loss": 127.376, "step": 34450 }, { "epoch": 0.2850643173263846, "grad_norm": 914.8101806640625, "learning_rate": 9.06077766923748e-06, "loss": 113.9157, "step": 34460 }, { "epoch": 0.2851470405757538, "grad_norm": 1122.4351806640625, "learning_rate": 9.059954641738697e-06, "loss": 126.3374, "step": 34470 }, { "epoch": 0.28522976382512305, "grad_norm": 1123.8736572265625, "learning_rate": 9.059131291207958e-06, "loss": 105.3611, "step": 34480 }, { "epoch": 0.2853124870744923, "grad_norm": 656.4722290039062, "learning_rate": 9.058307617710771e-06, "loss": 142.5772, "step": 34490 }, { "epoch": 0.2853952103238615, "grad_norm": 608.4116821289062, "learning_rate": 9.057483621312671e-06, "loss": 115.3732, "step": 34500 }, { "epoch": 0.28547793357323076, "grad_norm": 581.153564453125, "learning_rate": 9.056659302079222e-06, "loss": 118.1142, "step": 34510 }, { "epoch": 0.2855606568226, "grad_norm": 1226.16552734375, "learning_rate": 9.055834660076008e-06, "loss": 110.4029, "step": 34520 }, { "epoch": 0.2856433800719692, "grad_norm": 1013.7431640625, "learning_rate": 9.055009695368646e-06, "loss": 135.0033, "step": 34530 }, { "epoch": 0.28572610332133846, "grad_norm": 749.7244262695312, "learning_rate": 9.054184408022772e-06, "loss": 157.7157, "step": 34540 }, { "epoch": 0.2858088265707077, "grad_norm": 456.33026123046875, "learning_rate": 9.05335879810405e-06, "loss": 145.5047, "step": 34550 }, { "epoch": 0.2858915498200769, "grad_norm": 622.3046264648438, "learning_rate": 9.052532865678171e-06, "loss": 123.2549, "step": 34560 }, { "epoch": 0.28597427306944617, "grad_norm": 1478.7266845703125, "learning_rate": 9.05170661081085e-06, "loss": 127.2245, "step": 34570 }, { "epoch": 0.2860569963188154, "grad_norm": 619.5127563476562, "learning_rate": 9.050880033567831e-06, "loss": 132.0401, "step": 34580 }, { "epoch": 0.2861397195681846, "grad_norm": 1351.7064208984375, "learning_rate": 9.050053134014878e-06, "loss": 100.6227, "step": 34590 }, { "epoch": 0.2862224428175539, "grad_norm": 1285.1185302734375, "learning_rate": 9.049225912217782e-06, "loss": 102.2231, "step": 34600 }, { "epoch": 0.28630516606692313, "grad_norm": 1479.285888671875, "learning_rate": 9.048398368242365e-06, "loss": 93.523, "step": 34610 }, { "epoch": 0.28638788931629233, "grad_norm": 845.21826171875, "learning_rate": 9.047570502154471e-06, "loss": 97.2673, "step": 34620 }, { "epoch": 0.2864706125656616, "grad_norm": 1318.1756591796875, "learning_rate": 9.046742314019968e-06, "loss": 136.1264, "step": 34630 }, { "epoch": 0.28655333581503084, "grad_norm": 1689.4859619140625, "learning_rate": 9.045913803904748e-06, "loss": 127.8525, "step": 34640 }, { "epoch": 0.28663605906440004, "grad_norm": 759.2603149414062, "learning_rate": 9.045084971874738e-06, "loss": 119.6263, "step": 34650 }, { "epoch": 0.2867187823137693, "grad_norm": 1472.703857421875, "learning_rate": 9.04425581799588e-06, "loss": 122.0518, "step": 34660 }, { "epoch": 0.2868015055631385, "grad_norm": 903.3499755859375, "learning_rate": 9.043426342334147e-06, "loss": 104.4996, "step": 34670 }, { "epoch": 0.28688422881250775, "grad_norm": 726.6737670898438, "learning_rate": 9.042596544955538e-06, "loss": 93.7296, "step": 34680 }, { "epoch": 0.286966952061877, "grad_norm": 1467.8472900390625, "learning_rate": 9.041766425926073e-06, "loss": 109.8673, "step": 34690 }, { "epoch": 0.2870496753112462, "grad_norm": 1312.5286865234375, "learning_rate": 9.040935985311804e-06, "loss": 83.5798, "step": 34700 }, { "epoch": 0.28713239856061545, "grad_norm": 1040.613525390625, "learning_rate": 9.040105223178803e-06, "loss": 105.7686, "step": 34710 }, { "epoch": 0.2872151218099847, "grad_norm": 737.3902587890625, "learning_rate": 9.039274139593173e-06, "loss": 100.6217, "step": 34720 }, { "epoch": 0.2872978450593539, "grad_norm": 1900.810302734375, "learning_rate": 9.038442734621034e-06, "loss": 108.4988, "step": 34730 }, { "epoch": 0.28738056830872316, "grad_norm": 823.1213989257812, "learning_rate": 9.037611008328544e-06, "loss": 87.0847, "step": 34740 }, { "epoch": 0.2874632915580924, "grad_norm": 1027.710693359375, "learning_rate": 9.036778960781874e-06, "loss": 96.1623, "step": 34750 }, { "epoch": 0.2875460148074616, "grad_norm": 1012.5924682617188, "learning_rate": 9.03594659204723e-06, "loss": 99.4886, "step": 34760 }, { "epoch": 0.28762873805683087, "grad_norm": 1981.74609375, "learning_rate": 9.035113902190838e-06, "loss": 131.8146, "step": 34770 }, { "epoch": 0.2877114613062001, "grad_norm": 838.7272338867188, "learning_rate": 9.03428089127895e-06, "loss": 138.8772, "step": 34780 }, { "epoch": 0.2877941845555693, "grad_norm": 614.944091796875, "learning_rate": 9.033447559377847e-06, "loss": 102.8104, "step": 34790 }, { "epoch": 0.2878769078049386, "grad_norm": 1577.7645263671875, "learning_rate": 9.032613906553833e-06, "loss": 128.1048, "step": 34800 }, { "epoch": 0.28795963105430783, "grad_norm": 1021.2303466796875, "learning_rate": 9.031779932873238e-06, "loss": 107.6142, "step": 34810 }, { "epoch": 0.28804235430367703, "grad_norm": 644.7843017578125, "learning_rate": 9.030945638402415e-06, "loss": 103.4213, "step": 34820 }, { "epoch": 0.2881250775530463, "grad_norm": 873.067138671875, "learning_rate": 9.030111023207751e-06, "loss": 93.0126, "step": 34830 }, { "epoch": 0.28820780080241554, "grad_norm": 1150.903564453125, "learning_rate": 9.029276087355646e-06, "loss": 121.5422, "step": 34840 }, { "epoch": 0.28829052405178474, "grad_norm": 573.4194946289062, "learning_rate": 9.028440830912536e-06, "loss": 120.833, "step": 34850 }, { "epoch": 0.288373247301154, "grad_norm": 951.9305419921875, "learning_rate": 9.027605253944874e-06, "loss": 146.5991, "step": 34860 }, { "epoch": 0.28845597055052324, "grad_norm": 646.8169555664062, "learning_rate": 9.026769356519149e-06, "loss": 73.8264, "step": 34870 }, { "epoch": 0.28853869379989244, "grad_norm": 2599.574462890625, "learning_rate": 9.025933138701865e-06, "loss": 121.7481, "step": 34880 }, { "epoch": 0.2886214170492617, "grad_norm": 964.8515625, "learning_rate": 9.02509660055956e-06, "loss": 98.844, "step": 34890 }, { "epoch": 0.28870414029863095, "grad_norm": 665.4266357421875, "learning_rate": 9.02425974215879e-06, "loss": 126.3558, "step": 34900 }, { "epoch": 0.28878686354800015, "grad_norm": 747.7141723632812, "learning_rate": 9.02342256356614e-06, "loss": 94.1504, "step": 34910 }, { "epoch": 0.2888695867973694, "grad_norm": 1255.60302734375, "learning_rate": 9.022585064848222e-06, "loss": 101.1276, "step": 34920 }, { "epoch": 0.28895231004673866, "grad_norm": 1393.7789306640625, "learning_rate": 9.021747246071673e-06, "loss": 106.2699, "step": 34930 }, { "epoch": 0.28903503329610786, "grad_norm": 860.56689453125, "learning_rate": 9.020909107303152e-06, "loss": 124.7073, "step": 34940 }, { "epoch": 0.2891177565454771, "grad_norm": 982.3703002929688, "learning_rate": 9.020070648609347e-06, "loss": 112.9233, "step": 34950 }, { "epoch": 0.28920047979484637, "grad_norm": 1855.6322021484375, "learning_rate": 9.01923187005697e-06, "loss": 108.5488, "step": 34960 }, { "epoch": 0.28928320304421556, "grad_norm": 904.826416015625, "learning_rate": 9.018392771712758e-06, "loss": 125.191, "step": 34970 }, { "epoch": 0.2893659262935848, "grad_norm": 704.2764892578125, "learning_rate": 9.017553353643479e-06, "loss": 126.2956, "step": 34980 }, { "epoch": 0.2894486495429541, "grad_norm": 956.4945678710938, "learning_rate": 9.016713615915913e-06, "loss": 81.5747, "step": 34990 }, { "epoch": 0.28953137279232327, "grad_norm": 783.7576293945312, "learning_rate": 9.01587355859688e-06, "loss": 92.5522, "step": 35000 }, { "epoch": 0.2896140960416925, "grad_norm": 1208.607421875, "learning_rate": 9.015033181753219e-06, "loss": 106.08, "step": 35010 }, { "epoch": 0.2896968192910618, "grad_norm": 852.6314697265625, "learning_rate": 9.014192485451794e-06, "loss": 116.7134, "step": 35020 }, { "epoch": 0.289779542540431, "grad_norm": 1022.0242309570312, "learning_rate": 9.013351469759497e-06, "loss": 144.6814, "step": 35030 }, { "epoch": 0.28986226578980023, "grad_norm": 1243.4373779296875, "learning_rate": 9.01251013474324e-06, "loss": 104.1561, "step": 35040 }, { "epoch": 0.28994498903916943, "grad_norm": 960.918701171875, "learning_rate": 9.011668480469969e-06, "loss": 105.9182, "step": 35050 }, { "epoch": 0.2900277122885387, "grad_norm": 601.8880004882812, "learning_rate": 9.010826507006644e-06, "loss": 94.2774, "step": 35060 }, { "epoch": 0.29011043553790794, "grad_norm": 642.360107421875, "learning_rate": 9.009984214420265e-06, "loss": 142.8863, "step": 35070 }, { "epoch": 0.29019315878727714, "grad_norm": 1095.9666748046875, "learning_rate": 9.009141602777845e-06, "loss": 128.7609, "step": 35080 }, { "epoch": 0.2902758820366464, "grad_norm": 1230.556396484375, "learning_rate": 9.008298672146425e-06, "loss": 111.0792, "step": 35090 }, { "epoch": 0.29035860528601565, "grad_norm": 1034.6533203125, "learning_rate": 9.007455422593077e-06, "loss": 111.8609, "step": 35100 }, { "epoch": 0.29044132853538485, "grad_norm": 509.1571960449219, "learning_rate": 9.006611854184893e-06, "loss": 88.8575, "step": 35110 }, { "epoch": 0.2905240517847541, "grad_norm": 832.6845703125, "learning_rate": 9.00576796698899e-06, "loss": 113.1765, "step": 35120 }, { "epoch": 0.29060677503412335, "grad_norm": 1436.23388671875, "learning_rate": 9.004923761072515e-06, "loss": 103.5024, "step": 35130 }, { "epoch": 0.29068949828349255, "grad_norm": 1610.103759765625, "learning_rate": 9.004079236502636e-06, "loss": 113.4215, "step": 35140 }, { "epoch": 0.2907722215328618, "grad_norm": 881.2527465820312, "learning_rate": 9.00323439334655e-06, "loss": 123.1393, "step": 35150 }, { "epoch": 0.29085494478223106, "grad_norm": 734.708740234375, "learning_rate": 9.002389231671474e-06, "loss": 121.4382, "step": 35160 }, { "epoch": 0.29093766803160026, "grad_norm": 787.5501708984375, "learning_rate": 9.001543751544654e-06, "loss": 107.295, "step": 35170 }, { "epoch": 0.2910203912809695, "grad_norm": 1131.2176513671875, "learning_rate": 9.000697953033364e-06, "loss": 107.077, "step": 35180 }, { "epoch": 0.29110311453033877, "grad_norm": 1214.8603515625, "learning_rate": 8.999851836204901e-06, "loss": 103.6586, "step": 35190 }, { "epoch": 0.29118583777970797, "grad_norm": 895.4462280273438, "learning_rate": 8.99900540112658e-06, "loss": 83.9514, "step": 35200 }, { "epoch": 0.2912685610290772, "grad_norm": 543.6385498046875, "learning_rate": 8.998158647865753e-06, "loss": 107.0998, "step": 35210 }, { "epoch": 0.2913512842784465, "grad_norm": 1422.5147705078125, "learning_rate": 8.997311576489793e-06, "loss": 127.3802, "step": 35220 }, { "epoch": 0.2914340075278157, "grad_norm": 1031.476806640625, "learning_rate": 8.996464187066096e-06, "loss": 132.6045, "step": 35230 }, { "epoch": 0.29151673077718493, "grad_norm": 962.57568359375, "learning_rate": 8.995616479662084e-06, "loss": 76.9806, "step": 35240 }, { "epoch": 0.2915994540265542, "grad_norm": 888.2576293945312, "learning_rate": 8.994768454345207e-06, "loss": 88.927, "step": 35250 }, { "epoch": 0.2916821772759234, "grad_norm": 991.271484375, "learning_rate": 8.993920111182937e-06, "loss": 116.7842, "step": 35260 }, { "epoch": 0.29176490052529264, "grad_norm": 2302.41064453125, "learning_rate": 8.993071450242775e-06, "loss": 99.9801, "step": 35270 }, { "epoch": 0.2918476237746619, "grad_norm": 729.2933349609375, "learning_rate": 8.99222247159224e-06, "loss": 108.2754, "step": 35280 }, { "epoch": 0.2919303470240311, "grad_norm": 940.2220458984375, "learning_rate": 8.991373175298887e-06, "loss": 104.497, "step": 35290 }, { "epoch": 0.29201307027340034, "grad_norm": 893.2850952148438, "learning_rate": 8.99052356143029e-06, "loss": 104.8119, "step": 35300 }, { "epoch": 0.2920957935227696, "grad_norm": 1792.8408203125, "learning_rate": 8.989673630054044e-06, "loss": 134.8155, "step": 35310 }, { "epoch": 0.2921785167721388, "grad_norm": 823.2114868164062, "learning_rate": 8.988823381237778e-06, "loss": 91.5063, "step": 35320 }, { "epoch": 0.29226124002150805, "grad_norm": 1062.8973388671875, "learning_rate": 8.987972815049144e-06, "loss": 130.8569, "step": 35330 }, { "epoch": 0.2923439632708773, "grad_norm": 676.7745361328125, "learning_rate": 8.987121931555814e-06, "loss": 124.0575, "step": 35340 }, { "epoch": 0.2924266865202465, "grad_norm": 1002.80029296875, "learning_rate": 8.986270730825489e-06, "loss": 101.8287, "step": 35350 }, { "epoch": 0.29250940976961576, "grad_norm": 1145.8223876953125, "learning_rate": 8.985419212925898e-06, "loss": 110.8668, "step": 35360 }, { "epoch": 0.292592133018985, "grad_norm": 992.4735717773438, "learning_rate": 8.98456737792479e-06, "loss": 80.9995, "step": 35370 }, { "epoch": 0.2926748562683542, "grad_norm": 488.52618408203125, "learning_rate": 8.983715225889942e-06, "loss": 111.5234, "step": 35380 }, { "epoch": 0.29275757951772347, "grad_norm": 1136.89794921875, "learning_rate": 8.982862756889158e-06, "loss": 162.2252, "step": 35390 }, { "epoch": 0.29284030276709266, "grad_norm": 1113.3115234375, "learning_rate": 8.982009970990262e-06, "loss": 89.9033, "step": 35400 }, { "epoch": 0.2929230260164619, "grad_norm": 1392.5189208984375, "learning_rate": 8.98115686826111e-06, "loss": 131.7283, "step": 35410 }, { "epoch": 0.2930057492658312, "grad_norm": 617.0336303710938, "learning_rate": 8.980303448769574e-06, "loss": 99.9001, "step": 35420 }, { "epoch": 0.29308847251520037, "grad_norm": 2442.5810546875, "learning_rate": 8.979449712583562e-06, "loss": 112.8064, "step": 35430 }, { "epoch": 0.2931711957645696, "grad_norm": 544.6372680664062, "learning_rate": 8.978595659770997e-06, "loss": 109.6494, "step": 35440 }, { "epoch": 0.2932539190139389, "grad_norm": 860.6021118164062, "learning_rate": 8.977741290399836e-06, "loss": 106.3515, "step": 35450 }, { "epoch": 0.2933366422633081, "grad_norm": 1385.670654296875, "learning_rate": 8.976886604538055e-06, "loss": 117.2203, "step": 35460 }, { "epoch": 0.29341936551267733, "grad_norm": 653.8483276367188, "learning_rate": 8.976031602253661e-06, "loss": 91.5749, "step": 35470 }, { "epoch": 0.2935020887620466, "grad_norm": 707.4151000976562, "learning_rate": 8.975176283614677e-06, "loss": 137.304, "step": 35480 }, { "epoch": 0.2935848120114158, "grad_norm": 724.526123046875, "learning_rate": 8.97432064868916e-06, "loss": 117.3225, "step": 35490 }, { "epoch": 0.29366753526078504, "grad_norm": 684.9421997070312, "learning_rate": 8.973464697545191e-06, "loss": 118.9565, "step": 35500 }, { "epoch": 0.2937502585101543, "grad_norm": 1034.4615478515625, "learning_rate": 8.97260843025087e-06, "loss": 117.1274, "step": 35510 }, { "epoch": 0.2938329817595235, "grad_norm": 808.4039916992188, "learning_rate": 8.971751846874329e-06, "loss": 109.1277, "step": 35520 }, { "epoch": 0.29391570500889275, "grad_norm": 793.2233276367188, "learning_rate": 8.97089494748372e-06, "loss": 150.5428, "step": 35530 }, { "epoch": 0.293998428258262, "grad_norm": 1373.2613525390625, "learning_rate": 8.970037732147226e-06, "loss": 144.7835, "step": 35540 }, { "epoch": 0.2940811515076312, "grad_norm": 876.5359497070312, "learning_rate": 8.969180200933048e-06, "loss": 175.0752, "step": 35550 }, { "epoch": 0.29416387475700045, "grad_norm": 1215.04443359375, "learning_rate": 8.968322353909417e-06, "loss": 105.4971, "step": 35560 }, { "epoch": 0.2942465980063697, "grad_norm": 1001.4893188476562, "learning_rate": 8.96746419114459e-06, "loss": 109.2281, "step": 35570 }, { "epoch": 0.2943293212557389, "grad_norm": 1082.677001953125, "learning_rate": 8.966605712706844e-06, "loss": 90.3954, "step": 35580 }, { "epoch": 0.29441204450510816, "grad_norm": 962.15380859375, "learning_rate": 8.965746918664486e-06, "loss": 96.3644, "step": 35590 }, { "epoch": 0.2944947677544774, "grad_norm": 1004.3434448242188, "learning_rate": 8.964887809085846e-06, "loss": 128.4367, "step": 35600 }, { "epoch": 0.2945774910038466, "grad_norm": 535.0782470703125, "learning_rate": 8.96402838403928e-06, "loss": 101.0775, "step": 35610 }, { "epoch": 0.29466021425321587, "grad_norm": 451.95941162109375, "learning_rate": 8.96316864359317e-06, "loss": 103.9834, "step": 35620 }, { "epoch": 0.2947429375025851, "grad_norm": 861.1315307617188, "learning_rate": 8.962308587815916e-06, "loss": 95.3887, "step": 35630 }, { "epoch": 0.2948256607519543, "grad_norm": 873.6254272460938, "learning_rate": 8.961448216775955e-06, "loss": 106.6663, "step": 35640 }, { "epoch": 0.2949083840013236, "grad_norm": 647.5992431640625, "learning_rate": 8.960587530541737e-06, "loss": 113.9285, "step": 35650 }, { "epoch": 0.29499110725069283, "grad_norm": 818.12744140625, "learning_rate": 8.959726529181748e-06, "loss": 93.2586, "step": 35660 }, { "epoch": 0.29507383050006203, "grad_norm": 2233.892578125, "learning_rate": 8.95886521276449e-06, "loss": 99.4201, "step": 35670 }, { "epoch": 0.2951565537494313, "grad_norm": 1236.3218994140625, "learning_rate": 8.958003581358498e-06, "loss": 122.5037, "step": 35680 }, { "epoch": 0.29523927699880054, "grad_norm": 1387.99462890625, "learning_rate": 8.957141635032325e-06, "loss": 103.0061, "step": 35690 }, { "epoch": 0.29532200024816974, "grad_norm": 363.5679626464844, "learning_rate": 8.956279373854553e-06, "loss": 297.6621, "step": 35700 }, { "epoch": 0.295404723497539, "grad_norm": 848.06884765625, "learning_rate": 8.955416797893787e-06, "loss": 133.0075, "step": 35710 }, { "epoch": 0.29548744674690824, "grad_norm": 767.8993530273438, "learning_rate": 8.95455390721866e-06, "loss": 86.9913, "step": 35720 }, { "epoch": 0.29557016999627744, "grad_norm": 947.8824462890625, "learning_rate": 8.953690701897827e-06, "loss": 126.0984, "step": 35730 }, { "epoch": 0.2956528932456467, "grad_norm": 866.94970703125, "learning_rate": 8.952827181999973e-06, "loss": 100.0804, "step": 35740 }, { "epoch": 0.29573561649501595, "grad_norm": 997.9763793945312, "learning_rate": 8.951963347593797e-06, "loss": 158.5358, "step": 35750 }, { "epoch": 0.29581833974438515, "grad_norm": 682.8516235351562, "learning_rate": 8.951099198748036e-06, "loss": 101.2486, "step": 35760 }, { "epoch": 0.2959010629937544, "grad_norm": 804.0764770507812, "learning_rate": 8.950234735531445e-06, "loss": 98.5626, "step": 35770 }, { "epoch": 0.2959837862431236, "grad_norm": 10388.48046875, "learning_rate": 8.949369958012806e-06, "loss": 155.4089, "step": 35780 }, { "epoch": 0.29606650949249286, "grad_norm": 886.81298828125, "learning_rate": 8.948504866260924e-06, "loss": 113.0202, "step": 35790 }, { "epoch": 0.2961492327418621, "grad_norm": 897.130859375, "learning_rate": 8.94763946034463e-06, "loss": 142.2092, "step": 35800 }, { "epoch": 0.2962319559912313, "grad_norm": 966.3784790039062, "learning_rate": 8.946773740332781e-06, "loss": 157.32, "step": 35810 }, { "epoch": 0.29631467924060056, "grad_norm": 1265.8228759765625, "learning_rate": 8.945907706294262e-06, "loss": 87.6832, "step": 35820 }, { "epoch": 0.2963974024899698, "grad_norm": 434.8976135253906, "learning_rate": 8.945041358297973e-06, "loss": 115.3741, "step": 35830 }, { "epoch": 0.296480125739339, "grad_norm": 1037.9794921875, "learning_rate": 8.94417469641285e-06, "loss": 101.735, "step": 35840 }, { "epoch": 0.29656284898870827, "grad_norm": 1543.5999755859375, "learning_rate": 8.943307720707846e-06, "loss": 149.339, "step": 35850 }, { "epoch": 0.2966455722380775, "grad_norm": 851.0203247070312, "learning_rate": 8.942440431251947e-06, "loss": 126.8035, "step": 35860 }, { "epoch": 0.2967282954874467, "grad_norm": 1087.814697265625, "learning_rate": 8.941572828114154e-06, "loss": 154.3589, "step": 35870 }, { "epoch": 0.296811018736816, "grad_norm": 1085.237548828125, "learning_rate": 8.9407049113635e-06, "loss": 133.6983, "step": 35880 }, { "epoch": 0.29689374198618523, "grad_norm": 657.667724609375, "learning_rate": 8.939836681069042e-06, "loss": 77.88, "step": 35890 }, { "epoch": 0.29697646523555443, "grad_norm": 937.2635498046875, "learning_rate": 8.938968137299861e-06, "loss": 121.5767, "step": 35900 }, { "epoch": 0.2970591884849237, "grad_norm": 692.5191650390625, "learning_rate": 8.938099280125064e-06, "loss": 110.6443, "step": 35910 }, { "epoch": 0.29714191173429294, "grad_norm": 1191.0335693359375, "learning_rate": 8.937230109613778e-06, "loss": 125.6926, "step": 35920 }, { "epoch": 0.29722463498366214, "grad_norm": 600.857177734375, "learning_rate": 8.936360625835164e-06, "loss": 114.9589, "step": 35930 }, { "epoch": 0.2973073582330314, "grad_norm": 786.0075073242188, "learning_rate": 8.935490828858399e-06, "loss": 156.1116, "step": 35940 }, { "epoch": 0.29739008148240065, "grad_norm": 923.1871948242188, "learning_rate": 8.934620718752691e-06, "loss": 102.9856, "step": 35950 }, { "epoch": 0.29747280473176985, "grad_norm": 785.8792114257812, "learning_rate": 8.933750295587269e-06, "loss": 121.2862, "step": 35960 }, { "epoch": 0.2975555279811391, "grad_norm": 1262.131591796875, "learning_rate": 8.932879559431392e-06, "loss": 144.9996, "step": 35970 }, { "epoch": 0.29763825123050835, "grad_norm": 1094.29296875, "learning_rate": 8.932008510354336e-06, "loss": 99.4907, "step": 35980 }, { "epoch": 0.29772097447987755, "grad_norm": 1515.66748046875, "learning_rate": 8.931137148425407e-06, "loss": 117.0325, "step": 35990 }, { "epoch": 0.2978036977292468, "grad_norm": 702.0181274414062, "learning_rate": 8.930265473713939e-06, "loss": 89.8551, "step": 36000 }, { "epoch": 0.29788642097861606, "grad_norm": 924.1445922851562, "learning_rate": 8.929393486289283e-06, "loss": 91.7574, "step": 36010 }, { "epoch": 0.29796914422798526, "grad_norm": 886.2630615234375, "learning_rate": 8.928521186220822e-06, "loss": 134.8864, "step": 36020 }, { "epoch": 0.2980518674773545, "grad_norm": 571.9080200195312, "learning_rate": 8.92764857357796e-06, "loss": 166.6288, "step": 36030 }, { "epoch": 0.29813459072672377, "grad_norm": 1017.0745239257812, "learning_rate": 8.926775648430124e-06, "loss": 97.3446, "step": 36040 }, { "epoch": 0.29821731397609297, "grad_norm": 1384.15869140625, "learning_rate": 8.925902410846774e-06, "loss": 102.3454, "step": 36050 }, { "epoch": 0.2983000372254622, "grad_norm": 627.1011962890625, "learning_rate": 8.925028860897384e-06, "loss": 99.8053, "step": 36060 }, { "epoch": 0.2983827604748315, "grad_norm": 1164.1707763671875, "learning_rate": 8.924154998651461e-06, "loss": 150.2465, "step": 36070 }, { "epoch": 0.2984654837242007, "grad_norm": 806.7046508789062, "learning_rate": 8.923280824178538e-06, "loss": 127.2189, "step": 36080 }, { "epoch": 0.29854820697356993, "grad_norm": 936.9692993164062, "learning_rate": 8.922406337548162e-06, "loss": 106.9401, "step": 36090 }, { "epoch": 0.2986309302229392, "grad_norm": 861.8311157226562, "learning_rate": 8.921531538829917e-06, "loss": 92.577, "step": 36100 }, { "epoch": 0.2987136534723084, "grad_norm": 901.859375, "learning_rate": 8.920656428093403e-06, "loss": 83.8378, "step": 36110 }, { "epoch": 0.29879637672167764, "grad_norm": 1293.734375, "learning_rate": 8.919781005408251e-06, "loss": 114.6592, "step": 36120 }, { "epoch": 0.29887909997104684, "grad_norm": 697.5694580078125, "learning_rate": 8.918905270844113e-06, "loss": 141.8754, "step": 36130 }, { "epoch": 0.2989618232204161, "grad_norm": 822.4385986328125, "learning_rate": 8.918029224470671e-06, "loss": 101.8231, "step": 36140 }, { "epoch": 0.29904454646978534, "grad_norm": 886.5821533203125, "learning_rate": 8.917152866357621e-06, "loss": 97.08, "step": 36150 }, { "epoch": 0.29912726971915454, "grad_norm": 1727.11279296875, "learning_rate": 8.916276196574698e-06, "loss": 124.1994, "step": 36160 }, { "epoch": 0.2992099929685238, "grad_norm": 649.4053955078125, "learning_rate": 8.91539921519165e-06, "loss": 113.0979, "step": 36170 }, { "epoch": 0.29929271621789305, "grad_norm": 762.2130737304688, "learning_rate": 8.914521922278255e-06, "loss": 118.0666, "step": 36180 }, { "epoch": 0.29937543946726225, "grad_norm": 995.956787109375, "learning_rate": 8.913644317904317e-06, "loss": 125.7407, "step": 36190 }, { "epoch": 0.2994581627166315, "grad_norm": 1194.2001953125, "learning_rate": 8.912766402139662e-06, "loss": 124.1992, "step": 36200 }, { "epoch": 0.29954088596600076, "grad_norm": 1277.1484375, "learning_rate": 8.91188817505414e-06, "loss": 146.9492, "step": 36210 }, { "epoch": 0.29962360921536996, "grad_norm": 992.379638671875, "learning_rate": 8.91100963671763e-06, "loss": 107.3992, "step": 36220 }, { "epoch": 0.2997063324647392, "grad_norm": 820.5222778320312, "learning_rate": 8.910130787200032e-06, "loss": 93.3464, "step": 36230 }, { "epoch": 0.29978905571410847, "grad_norm": 2073.3251953125, "learning_rate": 8.909251626571273e-06, "loss": 101.3619, "step": 36240 }, { "epoch": 0.29987177896347766, "grad_norm": 767.80615234375, "learning_rate": 8.908372154901302e-06, "loss": 89.5982, "step": 36250 }, { "epoch": 0.2999545022128469, "grad_norm": 1339.1114501953125, "learning_rate": 8.907492372260096e-06, "loss": 118.8273, "step": 36260 }, { "epoch": 0.3000372254622162, "grad_norm": 1395.037353515625, "learning_rate": 8.906612278717657e-06, "loss": 114.5038, "step": 36270 }, { "epoch": 0.30011994871158537, "grad_norm": 1279.881591796875, "learning_rate": 8.905731874344005e-06, "loss": 110.8277, "step": 36280 }, { "epoch": 0.3002026719609546, "grad_norm": 1153.6810302734375, "learning_rate": 8.904851159209193e-06, "loss": 111.3379, "step": 36290 }, { "epoch": 0.3002853952103239, "grad_norm": 910.88623046875, "learning_rate": 8.903970133383297e-06, "loss": 83.3806, "step": 36300 }, { "epoch": 0.3003681184596931, "grad_norm": 993.2054443359375, "learning_rate": 8.903088796936414e-06, "loss": 117.134, "step": 36310 }, { "epoch": 0.30045084170906233, "grad_norm": 1444.7095947265625, "learning_rate": 8.902207149938667e-06, "loss": 118.84, "step": 36320 }, { "epoch": 0.3005335649584316, "grad_norm": 722.3483276367188, "learning_rate": 8.901325192460206e-06, "loss": 100.5878, "step": 36330 }, { "epoch": 0.3006162882078008, "grad_norm": 2218.97119140625, "learning_rate": 8.900442924571204e-06, "loss": 125.5503, "step": 36340 }, { "epoch": 0.30069901145717004, "grad_norm": 933.83984375, "learning_rate": 8.89956034634186e-06, "loss": 100.4451, "step": 36350 }, { "epoch": 0.3007817347065393, "grad_norm": 1003.083251953125, "learning_rate": 8.898677457842394e-06, "loss": 97.8874, "step": 36360 }, { "epoch": 0.3008644579559085, "grad_norm": 1543.9967041015625, "learning_rate": 8.897794259143057e-06, "loss": 144.2935, "step": 36370 }, { "epoch": 0.30094718120527775, "grad_norm": 1334.9381103515625, "learning_rate": 8.896910750314118e-06, "loss": 91.8307, "step": 36380 }, { "epoch": 0.301029904454647, "grad_norm": 1186.411865234375, "learning_rate": 8.896026931425876e-06, "loss": 131.1232, "step": 36390 }, { "epoch": 0.3011126277040162, "grad_norm": 1868.9034423828125, "learning_rate": 8.895142802548653e-06, "loss": 124.4849, "step": 36400 }, { "epoch": 0.30119535095338545, "grad_norm": 840.4161987304688, "learning_rate": 8.89425836375279e-06, "loss": 81.0873, "step": 36410 }, { "epoch": 0.3012780742027547, "grad_norm": 1163.7984619140625, "learning_rate": 8.893373615108663e-06, "loss": 136.477, "step": 36420 }, { "epoch": 0.3013607974521239, "grad_norm": 1086.72216796875, "learning_rate": 8.892488556686665e-06, "loss": 126.1113, "step": 36430 }, { "epoch": 0.30144352070149316, "grad_norm": 879.4309692382812, "learning_rate": 8.891603188557218e-06, "loss": 96.8926, "step": 36440 }, { "epoch": 0.3015262439508624, "grad_norm": 1197.5047607421875, "learning_rate": 8.890717510790763e-06, "loss": 110.1916, "step": 36450 }, { "epoch": 0.3016089672002316, "grad_norm": 349.7181701660156, "learning_rate": 8.889831523457773e-06, "loss": 154.8354, "step": 36460 }, { "epoch": 0.30169169044960087, "grad_norm": 1460.5762939453125, "learning_rate": 8.888945226628742e-06, "loss": 140.0803, "step": 36470 }, { "epoch": 0.3017744136989701, "grad_norm": 665.8645629882812, "learning_rate": 8.888058620374185e-06, "loss": 92.3529, "step": 36480 }, { "epoch": 0.3018571369483393, "grad_norm": 879.9156494140625, "learning_rate": 8.887171704764647e-06, "loss": 129.1003, "step": 36490 }, { "epoch": 0.3019398601977086, "grad_norm": 815.3402709960938, "learning_rate": 8.8862844798707e-06, "loss": 118.2584, "step": 36500 }, { "epoch": 0.3020225834470778, "grad_norm": 1314.0321044921875, "learning_rate": 8.885396945762928e-06, "loss": 126.3761, "step": 36510 }, { "epoch": 0.30210530669644703, "grad_norm": 858.8338012695312, "learning_rate": 8.884509102511956e-06, "loss": 98.5212, "step": 36520 }, { "epoch": 0.3021880299458163, "grad_norm": 1700.220458984375, "learning_rate": 8.883620950188422e-06, "loss": 106.6714, "step": 36530 }, { "epoch": 0.3022707531951855, "grad_norm": 696.0690307617188, "learning_rate": 8.882732488862988e-06, "loss": 118.2453, "step": 36540 }, { "epoch": 0.30235347644455474, "grad_norm": 793.1904907226562, "learning_rate": 8.881843718606353e-06, "loss": 125.0236, "step": 36550 }, { "epoch": 0.302436199693924, "grad_norm": 970.5441284179688, "learning_rate": 8.880954639489227e-06, "loss": 108.3671, "step": 36560 }, { "epoch": 0.3025189229432932, "grad_norm": 1777.4910888671875, "learning_rate": 8.880065251582354e-06, "loss": 160.0988, "step": 36570 }, { "epoch": 0.30260164619266244, "grad_norm": 1037.1177978515625, "learning_rate": 8.879175554956495e-06, "loss": 104.0029, "step": 36580 }, { "epoch": 0.3026843694420317, "grad_norm": 806.9476928710938, "learning_rate": 8.87828554968244e-06, "loss": 88.7504, "step": 36590 }, { "epoch": 0.3027670926914009, "grad_norm": 1243.7362060546875, "learning_rate": 8.877395235831002e-06, "loss": 111.0507, "step": 36600 }, { "epoch": 0.30284981594077015, "grad_norm": 1789.92529296875, "learning_rate": 8.876504613473019e-06, "loss": 154.0786, "step": 36610 }, { "epoch": 0.3029325391901394, "grad_norm": 718.7664184570312, "learning_rate": 8.875613682679356e-06, "loss": 108.9329, "step": 36620 }, { "epoch": 0.3030152624395086, "grad_norm": 748.9815063476562, "learning_rate": 8.874722443520898e-06, "loss": 123.7988, "step": 36630 }, { "epoch": 0.30309798568887786, "grad_norm": 606.9013671875, "learning_rate": 8.873830896068559e-06, "loss": 107.3505, "step": 36640 }, { "epoch": 0.3031807089382471, "grad_norm": 878.5547485351562, "learning_rate": 8.872939040393274e-06, "loss": 113.0779, "step": 36650 }, { "epoch": 0.3032634321876163, "grad_norm": 1300.362060546875, "learning_rate": 8.872046876566003e-06, "loss": 130.3682, "step": 36660 }, { "epoch": 0.30334615543698557, "grad_norm": 886.26025390625, "learning_rate": 8.871154404657734e-06, "loss": 106.1408, "step": 36670 }, { "epoch": 0.3034288786863548, "grad_norm": 1735.70947265625, "learning_rate": 8.870261624739474e-06, "loss": 120.6958, "step": 36680 }, { "epoch": 0.303511601935724, "grad_norm": 886.688232421875, "learning_rate": 8.869368536882258e-06, "loss": 102.5698, "step": 36690 }, { "epoch": 0.3035943251850933, "grad_norm": 632.3603515625, "learning_rate": 8.868475141157146e-06, "loss": 88.5606, "step": 36700 }, { "epoch": 0.3036770484344625, "grad_norm": 764.1319580078125, "learning_rate": 8.867581437635221e-06, "loss": 107.0108, "step": 36710 }, { "epoch": 0.3037597716838317, "grad_norm": 1273.7620849609375, "learning_rate": 8.866687426387592e-06, "loss": 159.5809, "step": 36720 }, { "epoch": 0.303842494933201, "grad_norm": 685.6810302734375, "learning_rate": 8.86579310748539e-06, "loss": 99.7657, "step": 36730 }, { "epoch": 0.30392521818257023, "grad_norm": 777.0698852539062, "learning_rate": 8.86489848099977e-06, "loss": 120.2746, "step": 36740 }, { "epoch": 0.30400794143193943, "grad_norm": 746.861572265625, "learning_rate": 8.864003547001916e-06, "loss": 106.8211, "step": 36750 }, { "epoch": 0.3040906646813087, "grad_norm": 853.1625366210938, "learning_rate": 8.863108305563035e-06, "loss": 91.7284, "step": 36760 }, { "epoch": 0.30417338793067794, "grad_norm": 1231.1922607421875, "learning_rate": 8.862212756754354e-06, "loss": 155.4766, "step": 36770 }, { "epoch": 0.30425611118004714, "grad_norm": 1215.257568359375, "learning_rate": 8.861316900647129e-06, "loss": 149.8827, "step": 36780 }, { "epoch": 0.3043388344294164, "grad_norm": 3567.228515625, "learning_rate": 8.860420737312638e-06, "loss": 121.5637, "step": 36790 }, { "epoch": 0.30442155767878565, "grad_norm": 694.6704711914062, "learning_rate": 8.859524266822188e-06, "loss": 101.825, "step": 36800 }, { "epoch": 0.30450428092815485, "grad_norm": 1621.055908203125, "learning_rate": 8.858627489247105e-06, "loss": 128.4847, "step": 36810 }, { "epoch": 0.3045870041775241, "grad_norm": 974.5038452148438, "learning_rate": 8.85773040465874e-06, "loss": 157.9853, "step": 36820 }, { "epoch": 0.30466972742689336, "grad_norm": 1332.6234130859375, "learning_rate": 8.856833013128472e-06, "loss": 107.964, "step": 36830 }, { "epoch": 0.30475245067626255, "grad_norm": 1167.7923583984375, "learning_rate": 8.855935314727702e-06, "loss": 109.8553, "step": 36840 }, { "epoch": 0.3048351739256318, "grad_norm": 704.8397216796875, "learning_rate": 8.855037309527854e-06, "loss": 115.8193, "step": 36850 }, { "epoch": 0.304917897175001, "grad_norm": 883.3186645507812, "learning_rate": 8.854138997600382e-06, "loss": 91.2245, "step": 36860 }, { "epoch": 0.30500062042437026, "grad_norm": 660.802001953125, "learning_rate": 8.853240379016757e-06, "loss": 88.3629, "step": 36870 }, { "epoch": 0.3050833436737395, "grad_norm": 1194.5704345703125, "learning_rate": 8.852341453848477e-06, "loss": 113.7338, "step": 36880 }, { "epoch": 0.3051660669231087, "grad_norm": 1196.6671142578125, "learning_rate": 8.851442222167068e-06, "loss": 96.392, "step": 36890 }, { "epoch": 0.30524879017247797, "grad_norm": 1096.315673828125, "learning_rate": 8.850542684044078e-06, "loss": 132.9981, "step": 36900 }, { "epoch": 0.3053315134218472, "grad_norm": 843.752685546875, "learning_rate": 8.849642839551079e-06, "loss": 120.9463, "step": 36910 }, { "epoch": 0.3054142366712164, "grad_norm": 1700.408203125, "learning_rate": 8.848742688759666e-06, "loss": 106.4173, "step": 36920 }, { "epoch": 0.3054969599205857, "grad_norm": 1714.5902099609375, "learning_rate": 8.847842231741462e-06, "loss": 119.7494, "step": 36930 }, { "epoch": 0.30557968316995493, "grad_norm": 998.3058471679688, "learning_rate": 8.846941468568108e-06, "loss": 104.204, "step": 36940 }, { "epoch": 0.30566240641932413, "grad_norm": 1132.1826171875, "learning_rate": 8.846040399311278e-06, "loss": 112.0986, "step": 36950 }, { "epoch": 0.3057451296686934, "grad_norm": 661.6875, "learning_rate": 8.845139024042664e-06, "loss": 81.744, "step": 36960 }, { "epoch": 0.30582785291806264, "grad_norm": 932.6510009765625, "learning_rate": 8.844237342833985e-06, "loss": 127.1373, "step": 36970 }, { "epoch": 0.30591057616743184, "grad_norm": 780.3368530273438, "learning_rate": 8.843335355756983e-06, "loss": 75.523, "step": 36980 }, { "epoch": 0.3059932994168011, "grad_norm": 754.1832275390625, "learning_rate": 8.842433062883427e-06, "loss": 89.4337, "step": 36990 }, { "epoch": 0.30607602266617034, "grad_norm": 764.4425659179688, "learning_rate": 8.841530464285105e-06, "loss": 129.0274, "step": 37000 }, { "epoch": 0.30615874591553954, "grad_norm": 861.7986450195312, "learning_rate": 8.840627560033833e-06, "loss": 82.3673, "step": 37010 }, { "epoch": 0.3062414691649088, "grad_norm": 988.38916015625, "learning_rate": 8.839724350201452e-06, "loss": 114.8146, "step": 37020 }, { "epoch": 0.30632419241427805, "grad_norm": 530.607421875, "learning_rate": 8.838820834859829e-06, "loss": 103.93, "step": 37030 }, { "epoch": 0.30640691566364725, "grad_norm": 941.2070922851562, "learning_rate": 8.837917014080849e-06, "loss": 89.9074, "step": 37040 }, { "epoch": 0.3064896389130165, "grad_norm": 688.3717651367188, "learning_rate": 8.837012887936426e-06, "loss": 111.2304, "step": 37050 }, { "epoch": 0.30657236216238576, "grad_norm": 1092.7115478515625, "learning_rate": 8.836108456498497e-06, "loss": 93.2177, "step": 37060 }, { "epoch": 0.30665508541175496, "grad_norm": 970.1156616210938, "learning_rate": 8.835203719839024e-06, "loss": 113.6382, "step": 37070 }, { "epoch": 0.3067378086611242, "grad_norm": 1119.5540771484375, "learning_rate": 8.834298678029988e-06, "loss": 120.2044, "step": 37080 }, { "epoch": 0.30682053191049347, "grad_norm": 840.0438842773438, "learning_rate": 8.833393331143409e-06, "loss": 103.9367, "step": 37090 }, { "epoch": 0.30690325515986266, "grad_norm": 1232.6182861328125, "learning_rate": 8.832487679251311e-06, "loss": 107.9942, "step": 37100 }, { "epoch": 0.3069859784092319, "grad_norm": 702.6771850585938, "learning_rate": 8.831581722425761e-06, "loss": 107.5534, "step": 37110 }, { "epoch": 0.3070687016586012, "grad_norm": 933.55615234375, "learning_rate": 8.830675460738835e-06, "loss": 92.3703, "step": 37120 }, { "epoch": 0.30715142490797037, "grad_norm": 1156.6966552734375, "learning_rate": 8.829768894262644e-06, "loss": 118.0975, "step": 37130 }, { "epoch": 0.3072341481573396, "grad_norm": 766.1238403320312, "learning_rate": 8.82886202306932e-06, "loss": 91.8914, "step": 37140 }, { "epoch": 0.3073168714067089, "grad_norm": 1076.5308837890625, "learning_rate": 8.827954847231016e-06, "loss": 115.0902, "step": 37150 }, { "epoch": 0.3073995946560781, "grad_norm": 867.0706176757812, "learning_rate": 8.82704736681991e-06, "loss": 128.4827, "step": 37160 }, { "epoch": 0.30748231790544733, "grad_norm": 963.2921752929688, "learning_rate": 8.826139581908211e-06, "loss": 112.9323, "step": 37170 }, { "epoch": 0.3075650411548166, "grad_norm": 735.8433227539062, "learning_rate": 8.825231492568146e-06, "loss": 114.1932, "step": 37180 }, { "epoch": 0.3076477644041858, "grad_norm": 1258.63818359375, "learning_rate": 8.824323098871966e-06, "loss": 136.7632, "step": 37190 }, { "epoch": 0.30773048765355504, "grad_norm": 1213.376220703125, "learning_rate": 8.823414400891948e-06, "loss": 140.9363, "step": 37200 }, { "epoch": 0.30781321090292424, "grad_norm": 816.0035400390625, "learning_rate": 8.822505398700395e-06, "loss": 97.0494, "step": 37210 }, { "epoch": 0.3078959341522935, "grad_norm": 908.9609375, "learning_rate": 8.821596092369627e-06, "loss": 112.1852, "step": 37220 }, { "epoch": 0.30797865740166275, "grad_norm": 1214.3897705078125, "learning_rate": 8.820686481971998e-06, "loss": 133.1782, "step": 37230 }, { "epoch": 0.30806138065103195, "grad_norm": 1155.935546875, "learning_rate": 8.81977656757988e-06, "loss": 91.5354, "step": 37240 }, { "epoch": 0.3081441039004012, "grad_norm": 1258.32275390625, "learning_rate": 8.81886634926567e-06, "loss": 108.436, "step": 37250 }, { "epoch": 0.30822682714977045, "grad_norm": 1130.4833984375, "learning_rate": 8.817955827101794e-06, "loss": 133.1508, "step": 37260 }, { "epoch": 0.30830955039913965, "grad_norm": 738.8419799804688, "learning_rate": 8.817045001160693e-06, "loss": 122.5803, "step": 37270 }, { "epoch": 0.3083922736485089, "grad_norm": 913.4530639648438, "learning_rate": 8.816133871514838e-06, "loss": 108.7282, "step": 37280 }, { "epoch": 0.30847499689787816, "grad_norm": 600.1754150390625, "learning_rate": 8.815222438236726e-06, "loss": 91.7117, "step": 37290 }, { "epoch": 0.30855772014724736, "grad_norm": 987.5612182617188, "learning_rate": 8.814310701398873e-06, "loss": 111.6003, "step": 37300 }, { "epoch": 0.3086404433966166, "grad_norm": 718.8853759765625, "learning_rate": 8.813398661073823e-06, "loss": 120.6641, "step": 37310 }, { "epoch": 0.30872316664598587, "grad_norm": 969.3789672851562, "learning_rate": 8.812486317334145e-06, "loss": 112.9521, "step": 37320 }, { "epoch": 0.30880588989535507, "grad_norm": 1069.70849609375, "learning_rate": 8.811573670252426e-06, "loss": 102.9678, "step": 37330 }, { "epoch": 0.3088886131447243, "grad_norm": 1160.0357666015625, "learning_rate": 8.810660719901283e-06, "loss": 142.8662, "step": 37340 }, { "epoch": 0.3089713363940936, "grad_norm": 923.1897583007812, "learning_rate": 8.809747466353356e-06, "loss": 134.5727, "step": 37350 }, { "epoch": 0.3090540596434628, "grad_norm": 1227.94580078125, "learning_rate": 8.808833909681305e-06, "loss": 144.0661, "step": 37360 }, { "epoch": 0.30913678289283203, "grad_norm": 636.2891235351562, "learning_rate": 8.80792004995782e-06, "loss": 120.0885, "step": 37370 }, { "epoch": 0.3092195061422013, "grad_norm": 1097.7366943359375, "learning_rate": 8.807005887255615e-06, "loss": 138.4748, "step": 37380 }, { "epoch": 0.3093022293915705, "grad_norm": 814.5217895507812, "learning_rate": 8.806091421647423e-06, "loss": 113.6995, "step": 37390 }, { "epoch": 0.30938495264093974, "grad_norm": 737.350341796875, "learning_rate": 8.805176653206004e-06, "loss": 116.3498, "step": 37400 }, { "epoch": 0.309467675890309, "grad_norm": 2945.936279296875, "learning_rate": 8.80426158200414e-06, "loss": 109.3581, "step": 37410 }, { "epoch": 0.3095503991396782, "grad_norm": 1349.6761474609375, "learning_rate": 8.803346208114643e-06, "loss": 117.0218, "step": 37420 }, { "epoch": 0.30963312238904744, "grad_norm": 744.024658203125, "learning_rate": 8.802430531610344e-06, "loss": 101.151, "step": 37430 }, { "epoch": 0.3097158456384167, "grad_norm": 1006.9130859375, "learning_rate": 8.801514552564097e-06, "loss": 87.0184, "step": 37440 }, { "epoch": 0.3097985688877859, "grad_norm": 1106.60693359375, "learning_rate": 8.800598271048784e-06, "loss": 159.6884, "step": 37450 }, { "epoch": 0.30988129213715515, "grad_norm": 1054.00244140625, "learning_rate": 8.799681687137309e-06, "loss": 97.845, "step": 37460 }, { "epoch": 0.3099640153865244, "grad_norm": 551.2701416015625, "learning_rate": 8.7987648009026e-06, "loss": 87.5821, "step": 37470 }, { "epoch": 0.3100467386358936, "grad_norm": 2352.765869140625, "learning_rate": 8.79784761241761e-06, "loss": 144.6846, "step": 37480 }, { "epoch": 0.31012946188526286, "grad_norm": 899.6213989257812, "learning_rate": 8.796930121755315e-06, "loss": 110.2097, "step": 37490 }, { "epoch": 0.3102121851346321, "grad_norm": 527.784912109375, "learning_rate": 8.796012328988716e-06, "loss": 98.5166, "step": 37500 }, { "epoch": 0.3102949083840013, "grad_norm": 1046.642822265625, "learning_rate": 8.795094234190837e-06, "loss": 94.5682, "step": 37510 }, { "epoch": 0.31037763163337057, "grad_norm": 1084.1600341796875, "learning_rate": 8.794175837434729e-06, "loss": 141.1946, "step": 37520 }, { "epoch": 0.3104603548827398, "grad_norm": 761.6773071289062, "learning_rate": 8.79325713879346e-06, "loss": 87.328, "step": 37530 }, { "epoch": 0.310543078132109, "grad_norm": 1374.055908203125, "learning_rate": 8.792338138340131e-06, "loss": 102.905, "step": 37540 }, { "epoch": 0.3106258013814783, "grad_norm": 935.9447631835938, "learning_rate": 8.791418836147858e-06, "loss": 146.4921, "step": 37550 }, { "epoch": 0.3107085246308475, "grad_norm": 1152.4400634765625, "learning_rate": 8.790499232289793e-06, "loss": 128.4351, "step": 37560 }, { "epoch": 0.3107912478802167, "grad_norm": 658.3630981445312, "learning_rate": 8.789579326839097e-06, "loss": 121.7294, "step": 37570 }, { "epoch": 0.310873971129586, "grad_norm": 830.0960083007812, "learning_rate": 8.788659119868966e-06, "loss": 133.9257, "step": 37580 }, { "epoch": 0.3109566943789552, "grad_norm": 866.0687255859375, "learning_rate": 8.787738611452616e-06, "loss": 116.4662, "step": 37590 }, { "epoch": 0.31103941762832443, "grad_norm": 1266.32177734375, "learning_rate": 8.78681780166329e-06, "loss": 120.4589, "step": 37600 }, { "epoch": 0.3111221408776937, "grad_norm": 876.0301513671875, "learning_rate": 8.785896690574248e-06, "loss": 103.7038, "step": 37610 }, { "epoch": 0.3112048641270629, "grad_norm": 822.327880859375, "learning_rate": 8.784975278258783e-06, "loss": 146.5088, "step": 37620 }, { "epoch": 0.31128758737643214, "grad_norm": 976.1931762695312, "learning_rate": 8.784053564790205e-06, "loss": 110.9248, "step": 37630 }, { "epoch": 0.3113703106258014, "grad_norm": 1754.4827880859375, "learning_rate": 8.783131550241853e-06, "loss": 132.1888, "step": 37640 }, { "epoch": 0.3114530338751706, "grad_norm": 889.1416015625, "learning_rate": 8.782209234687083e-06, "loss": 98.5607, "step": 37650 }, { "epoch": 0.31153575712453985, "grad_norm": 578.06689453125, "learning_rate": 8.781286618199285e-06, "loss": 93.0681, "step": 37660 }, { "epoch": 0.3116184803739091, "grad_norm": 625.0341796875, "learning_rate": 8.780363700851863e-06, "loss": 84.4234, "step": 37670 }, { "epoch": 0.3117012036232783, "grad_norm": 1180.2471923828125, "learning_rate": 8.779440482718251e-06, "loss": 126.0896, "step": 37680 }, { "epoch": 0.31178392687264755, "grad_norm": 1036.988525390625, "learning_rate": 8.778516963871904e-06, "loss": 109.445, "step": 37690 }, { "epoch": 0.3118666501220168, "grad_norm": 771.7061157226562, "learning_rate": 8.777593144386305e-06, "loss": 106.4233, "step": 37700 }, { "epoch": 0.311949373371386, "grad_norm": 853.5861206054688, "learning_rate": 8.776669024334955e-06, "loss": 149.2146, "step": 37710 }, { "epoch": 0.31203209662075526, "grad_norm": 1038.280029296875, "learning_rate": 8.775744603791385e-06, "loss": 87.6942, "step": 37720 }, { "epoch": 0.3121148198701245, "grad_norm": 785.39892578125, "learning_rate": 8.774819882829144e-06, "loss": 101.3138, "step": 37730 }, { "epoch": 0.3121975431194937, "grad_norm": 810.301513671875, "learning_rate": 8.77389486152181e-06, "loss": 104.7605, "step": 37740 }, { "epoch": 0.31228026636886297, "grad_norm": 907.918212890625, "learning_rate": 8.772969539942981e-06, "loss": 93.2778, "step": 37750 }, { "epoch": 0.3123629896182322, "grad_norm": 1630.78955078125, "learning_rate": 8.772043918166282e-06, "loss": 118.6197, "step": 37760 }, { "epoch": 0.3124457128676014, "grad_norm": 792.9736328125, "learning_rate": 8.771117996265358e-06, "loss": 110.336, "step": 37770 }, { "epoch": 0.3125284361169707, "grad_norm": 515.8519287109375, "learning_rate": 8.770191774313883e-06, "loss": 98.6496, "step": 37780 }, { "epoch": 0.31261115936633993, "grad_norm": 889.776611328125, "learning_rate": 8.769265252385552e-06, "loss": 165.6621, "step": 37790 }, { "epoch": 0.31269388261570913, "grad_norm": 780.7835693359375, "learning_rate": 8.768338430554083e-06, "loss": 91.5842, "step": 37800 }, { "epoch": 0.3127766058650784, "grad_norm": 1138.0804443359375, "learning_rate": 8.76741130889322e-06, "loss": 100.7134, "step": 37810 }, { "epoch": 0.31285932911444764, "grad_norm": 942.5914916992188, "learning_rate": 8.766483887476727e-06, "loss": 105.6115, "step": 37820 }, { "epoch": 0.31294205236381684, "grad_norm": 618.5468139648438, "learning_rate": 8.7655561663784e-06, "loss": 75.4478, "step": 37830 }, { "epoch": 0.3130247756131861, "grad_norm": 1693.069580078125, "learning_rate": 8.764628145672048e-06, "loss": 127.7167, "step": 37840 }, { "epoch": 0.31310749886255534, "grad_norm": 1026.7965087890625, "learning_rate": 8.763699825431513e-06, "loss": 138.911, "step": 37850 }, { "epoch": 0.31319022211192454, "grad_norm": 1648.09765625, "learning_rate": 8.762771205730656e-06, "loss": 117.1393, "step": 37860 }, { "epoch": 0.3132729453612938, "grad_norm": 933.2096557617188, "learning_rate": 8.761842286643362e-06, "loss": 96.2922, "step": 37870 }, { "epoch": 0.31335566861066305, "grad_norm": 802.694580078125, "learning_rate": 8.760913068243542e-06, "loss": 144.4842, "step": 37880 }, { "epoch": 0.31343839186003225, "grad_norm": 1243.0850830078125, "learning_rate": 8.759983550605132e-06, "loss": 128.3055, "step": 37890 }, { "epoch": 0.3135211151094015, "grad_norm": 1257.254638671875, "learning_rate": 8.759053733802083e-06, "loss": 100.2831, "step": 37900 }, { "epoch": 0.31360383835877076, "grad_norm": 934.783935546875, "learning_rate": 8.758123617908383e-06, "loss": 100.1143, "step": 37910 }, { "epoch": 0.31368656160813996, "grad_norm": 989.8101196289062, "learning_rate": 8.757193202998033e-06, "loss": 127.9963, "step": 37920 }, { "epoch": 0.3137692848575092, "grad_norm": 1011.3324584960938, "learning_rate": 8.756262489145061e-06, "loss": 112.5696, "step": 37930 }, { "epoch": 0.3138520081068784, "grad_norm": 925.4760131835938, "learning_rate": 8.755331476423526e-06, "loss": 79.9976, "step": 37940 }, { "epoch": 0.31393473135624766, "grad_norm": 932.2153930664062, "learning_rate": 8.754400164907496e-06, "loss": 118.1142, "step": 37950 }, { "epoch": 0.3140174546056169, "grad_norm": 869.6834106445312, "learning_rate": 8.753468554671078e-06, "loss": 122.9429, "step": 37960 }, { "epoch": 0.3141001778549861, "grad_norm": 1097.6431884765625, "learning_rate": 8.752536645788391e-06, "loss": 116.1235, "step": 37970 }, { "epoch": 0.31418290110435537, "grad_norm": 943.837158203125, "learning_rate": 8.751604438333587e-06, "loss": 120.1827, "step": 37980 }, { "epoch": 0.3142656243537246, "grad_norm": 965.2254028320312, "learning_rate": 8.750671932380834e-06, "loss": 111.8385, "step": 37990 }, { "epoch": 0.3143483476030938, "grad_norm": 2054.935546875, "learning_rate": 8.749739128004329e-06, "loss": 105.5353, "step": 38000 }, { "epoch": 0.3144310708524631, "grad_norm": 885.7263793945312, "learning_rate": 8.748806025278292e-06, "loss": 113.4429, "step": 38010 }, { "epoch": 0.31451379410183233, "grad_norm": 1407.1341552734375, "learning_rate": 8.747872624276963e-06, "loss": 99.107, "step": 38020 }, { "epoch": 0.31459651735120153, "grad_norm": 1092.1849365234375, "learning_rate": 8.746938925074609e-06, "loss": 130.1728, "step": 38030 }, { "epoch": 0.3146792406005708, "grad_norm": 977.42578125, "learning_rate": 8.746004927745522e-06, "loss": 116.9955, "step": 38040 }, { "epoch": 0.31476196384994004, "grad_norm": 813.2716064453125, "learning_rate": 8.745070632364014e-06, "loss": 103.2874, "step": 38050 }, { "epoch": 0.31484468709930924, "grad_norm": 823.4459838867188, "learning_rate": 8.744136039004422e-06, "loss": 122.7185, "step": 38060 }, { "epoch": 0.3149274103486785, "grad_norm": 675.5857543945312, "learning_rate": 8.743201147741112e-06, "loss": 117.528, "step": 38070 }, { "epoch": 0.31501013359804775, "grad_norm": 1719.105224609375, "learning_rate": 8.742265958648464e-06, "loss": 110.0581, "step": 38080 }, { "epoch": 0.31509285684741695, "grad_norm": 1066.7659912109375, "learning_rate": 8.741330471800888e-06, "loss": 89.2473, "step": 38090 }, { "epoch": 0.3151755800967862, "grad_norm": 776.9163818359375, "learning_rate": 8.740394687272817e-06, "loss": 124.1178, "step": 38100 }, { "epoch": 0.31525830334615546, "grad_norm": 760.9815063476562, "learning_rate": 8.739458605138706e-06, "loss": 119.1256, "step": 38110 }, { "epoch": 0.31534102659552465, "grad_norm": 974.4657592773438, "learning_rate": 8.738522225473036e-06, "loss": 105.6252, "step": 38120 }, { "epoch": 0.3154237498448939, "grad_norm": 634.0014038085938, "learning_rate": 8.737585548350312e-06, "loss": 119.6853, "step": 38130 }, { "epoch": 0.31550647309426316, "grad_norm": 765.5140380859375, "learning_rate": 8.736648573845057e-06, "loss": 99.3297, "step": 38140 }, { "epoch": 0.31558919634363236, "grad_norm": 1316.4825439453125, "learning_rate": 8.735711302031824e-06, "loss": 123.088, "step": 38150 }, { "epoch": 0.3156719195930016, "grad_norm": 1433.5438232421875, "learning_rate": 8.734773732985186e-06, "loss": 116.7357, "step": 38160 }, { "epoch": 0.31575464284237087, "grad_norm": 826.6362915039062, "learning_rate": 8.733835866779745e-06, "loss": 93.5203, "step": 38170 }, { "epoch": 0.31583736609174007, "grad_norm": 1368.983642578125, "learning_rate": 8.73289770349012e-06, "loss": 90.895, "step": 38180 }, { "epoch": 0.3159200893411093, "grad_norm": 764.3739013671875, "learning_rate": 8.731959243190955e-06, "loss": 115.1358, "step": 38190 }, { "epoch": 0.3160028125904786, "grad_norm": 1166.0986328125, "learning_rate": 8.73102048595692e-06, "loss": 110.7284, "step": 38200 }, { "epoch": 0.3160855358398478, "grad_norm": 900.78759765625, "learning_rate": 8.730081431862709e-06, "loss": 114.7286, "step": 38210 }, { "epoch": 0.31616825908921703, "grad_norm": 694.7711791992188, "learning_rate": 8.729142080983037e-06, "loss": 99.7621, "step": 38220 }, { "epoch": 0.3162509823385863, "grad_norm": 1478.10546875, "learning_rate": 8.728202433392645e-06, "loss": 103.1368, "step": 38230 }, { "epoch": 0.3163337055879555, "grad_norm": 739.2174682617188, "learning_rate": 8.727262489166295e-06, "loss": 91.7107, "step": 38240 }, { "epoch": 0.31641642883732474, "grad_norm": 1041.38525390625, "learning_rate": 8.726322248378775e-06, "loss": 133.2948, "step": 38250 }, { "epoch": 0.316499152086694, "grad_norm": 709.4502563476562, "learning_rate": 8.725381711104894e-06, "loss": 134.3007, "step": 38260 }, { "epoch": 0.3165818753360632, "grad_norm": 1081.0482177734375, "learning_rate": 8.724440877419487e-06, "loss": 107.7189, "step": 38270 }, { "epoch": 0.31666459858543244, "grad_norm": 1248.0484619140625, "learning_rate": 8.723499747397415e-06, "loss": 105.8039, "step": 38280 }, { "epoch": 0.3167473218348017, "grad_norm": 1431.8619384765625, "learning_rate": 8.722558321113555e-06, "loss": 108.0174, "step": 38290 }, { "epoch": 0.3168300450841709, "grad_norm": 972.2454223632812, "learning_rate": 8.721616598642812e-06, "loss": 124.3465, "step": 38300 }, { "epoch": 0.31691276833354015, "grad_norm": 1106.1824951171875, "learning_rate": 8.720674580060117e-06, "loss": 92.7966, "step": 38310 }, { "epoch": 0.31699549158290935, "grad_norm": 1259.550048828125, "learning_rate": 8.719732265440423e-06, "loss": 91.808, "step": 38320 }, { "epoch": 0.3170782148322786, "grad_norm": 1003.4591064453125, "learning_rate": 8.718789654858702e-06, "loss": 97.9086, "step": 38330 }, { "epoch": 0.31716093808164786, "grad_norm": 1087.510498046875, "learning_rate": 8.717846748389956e-06, "loss": 190.7288, "step": 38340 }, { "epoch": 0.31724366133101706, "grad_norm": 1307.7303466796875, "learning_rate": 8.716903546109208e-06, "loss": 100.8898, "step": 38350 }, { "epoch": 0.3173263845803863, "grad_norm": 389.17926025390625, "learning_rate": 8.715960048091502e-06, "loss": 105.3628, "step": 38360 }, { "epoch": 0.31740910782975557, "grad_norm": 634.9244995117188, "learning_rate": 8.715016254411908e-06, "loss": 93.6207, "step": 38370 }, { "epoch": 0.31749183107912476, "grad_norm": 1184.2578125, "learning_rate": 8.714072165145521e-06, "loss": 135.4022, "step": 38380 }, { "epoch": 0.317574554328494, "grad_norm": 619.5433349609375, "learning_rate": 8.713127780367458e-06, "loss": 82.4095, "step": 38390 }, { "epoch": 0.3176572775778633, "grad_norm": 1421.2760009765625, "learning_rate": 8.712183100152858e-06, "loss": 104.6683, "step": 38400 }, { "epoch": 0.31774000082723247, "grad_norm": 569.2383422851562, "learning_rate": 8.711238124576884e-06, "loss": 100.9128, "step": 38410 }, { "epoch": 0.3178227240766017, "grad_norm": 486.7202453613281, "learning_rate": 8.710292853714726e-06, "loss": 102.4042, "step": 38420 }, { "epoch": 0.317905447325971, "grad_norm": 1279.0054931640625, "learning_rate": 8.709347287641593e-06, "loss": 107.4948, "step": 38430 }, { "epoch": 0.3179881705753402, "grad_norm": 777.2363891601562, "learning_rate": 8.70840142643272e-06, "loss": 126.2411, "step": 38440 }, { "epoch": 0.31807089382470943, "grad_norm": 680.6844482421875, "learning_rate": 8.707455270163365e-06, "loss": 98.6142, "step": 38450 }, { "epoch": 0.3181536170740787, "grad_norm": 1288.6944580078125, "learning_rate": 8.70650881890881e-06, "loss": 125.0557, "step": 38460 }, { "epoch": 0.3182363403234479, "grad_norm": 760.6031494140625, "learning_rate": 8.705562072744358e-06, "loss": 138.251, "step": 38470 }, { "epoch": 0.31831906357281714, "grad_norm": 883.0706176757812, "learning_rate": 8.704615031745337e-06, "loss": 111.4153, "step": 38480 }, { "epoch": 0.3184017868221864, "grad_norm": 1131.5177001953125, "learning_rate": 8.703667695987102e-06, "loss": 113.8998, "step": 38490 }, { "epoch": 0.3184845100715556, "grad_norm": 973.4132690429688, "learning_rate": 8.702720065545024e-06, "loss": 106.239, "step": 38500 }, { "epoch": 0.31856723332092485, "grad_norm": 1401.8857421875, "learning_rate": 8.701772140494504e-06, "loss": 131.2176, "step": 38510 }, { "epoch": 0.3186499565702941, "grad_norm": 735.4816284179688, "learning_rate": 8.700823920910964e-06, "loss": 124.5568, "step": 38520 }, { "epoch": 0.3187326798196633, "grad_norm": 1129.6258544921875, "learning_rate": 8.699875406869848e-06, "loss": 103.7197, "step": 38530 }, { "epoch": 0.31881540306903255, "grad_norm": 802.9367065429688, "learning_rate": 8.69892659844663e-06, "loss": 129.3652, "step": 38540 }, { "epoch": 0.3188981263184018, "grad_norm": 1019.4291381835938, "learning_rate": 8.697977495716794e-06, "loss": 113.1963, "step": 38550 }, { "epoch": 0.318980849567771, "grad_norm": 1141.768798828125, "learning_rate": 8.697028098755863e-06, "loss": 75.0446, "step": 38560 }, { "epoch": 0.31906357281714026, "grad_norm": 561.0872192382812, "learning_rate": 8.69607840763937e-06, "loss": 107.2292, "step": 38570 }, { "epoch": 0.3191462960665095, "grad_norm": 567.6842651367188, "learning_rate": 8.695128422442882e-06, "loss": 105.8062, "step": 38580 }, { "epoch": 0.3192290193158787, "grad_norm": 1812.7880859375, "learning_rate": 8.694178143241984e-06, "loss": 116.8599, "step": 38590 }, { "epoch": 0.31931174256524797, "grad_norm": 821.3114013671875, "learning_rate": 8.693227570112285e-06, "loss": 113.9192, "step": 38600 }, { "epoch": 0.3193944658146172, "grad_norm": 1383.909423828125, "learning_rate": 8.692276703129421e-06, "loss": 123.0948, "step": 38610 }, { "epoch": 0.3194771890639864, "grad_norm": 559.3286743164062, "learning_rate": 8.691325542369041e-06, "loss": 81.4486, "step": 38620 }, { "epoch": 0.3195599123133557, "grad_norm": 1106.412109375, "learning_rate": 8.69037408790683e-06, "loss": 119.6945, "step": 38630 }, { "epoch": 0.31964263556272493, "grad_norm": 1364.677490234375, "learning_rate": 8.689422339818489e-06, "loss": 140.9282, "step": 38640 }, { "epoch": 0.31972535881209413, "grad_norm": 0.0, "learning_rate": 8.688470298179746e-06, "loss": 140.5661, "step": 38650 }, { "epoch": 0.3198080820614634, "grad_norm": 859.6204833984375, "learning_rate": 8.687517963066347e-06, "loss": 110.4718, "step": 38660 }, { "epoch": 0.3198908053108326, "grad_norm": 1258.269775390625, "learning_rate": 8.686565334554069e-06, "loss": 126.0004, "step": 38670 }, { "epoch": 0.31997352856020184, "grad_norm": 1039.6004638671875, "learning_rate": 8.685612412718704e-06, "loss": 119.8658, "step": 38680 }, { "epoch": 0.3200562518095711, "grad_norm": 923.8244018554688, "learning_rate": 8.684659197636076e-06, "loss": 124.2017, "step": 38690 }, { "epoch": 0.3201389750589403, "grad_norm": 1117.4451904296875, "learning_rate": 8.683705689382025e-06, "loss": 107.7295, "step": 38700 }, { "epoch": 0.32022169830830954, "grad_norm": 1363.1929931640625, "learning_rate": 8.682751888032419e-06, "loss": 99.3945, "step": 38710 }, { "epoch": 0.3203044215576788, "grad_norm": 701.418212890625, "learning_rate": 8.681797793663147e-06, "loss": 120.6914, "step": 38720 }, { "epoch": 0.320387144807048, "grad_norm": 1962.092041015625, "learning_rate": 8.680843406350122e-06, "loss": 105.0907, "step": 38730 }, { "epoch": 0.32046986805641725, "grad_norm": 1574.8798828125, "learning_rate": 8.679888726169277e-06, "loss": 123.2075, "step": 38740 }, { "epoch": 0.3205525913057865, "grad_norm": 979.2647094726562, "learning_rate": 8.678933753196577e-06, "loss": 117.9523, "step": 38750 }, { "epoch": 0.3206353145551557, "grad_norm": 961.4496459960938, "learning_rate": 8.677978487508002e-06, "loss": 130.495, "step": 38760 }, { "epoch": 0.32071803780452496, "grad_norm": 1239.4903564453125, "learning_rate": 8.677022929179558e-06, "loss": 116.15, "step": 38770 }, { "epoch": 0.3208007610538942, "grad_norm": 936.8592529296875, "learning_rate": 8.676067078287276e-06, "loss": 102.5058, "step": 38780 }, { "epoch": 0.3208834843032634, "grad_norm": 1102.019775390625, "learning_rate": 8.675110934907206e-06, "loss": 105.1739, "step": 38790 }, { "epoch": 0.32096620755263267, "grad_norm": 1137.979736328125, "learning_rate": 8.674154499115426e-06, "loss": 103.8995, "step": 38800 }, { "epoch": 0.3210489308020019, "grad_norm": 983.4441528320312, "learning_rate": 8.673197770988034e-06, "loss": 100.5983, "step": 38810 }, { "epoch": 0.3211316540513711, "grad_norm": 601.077392578125, "learning_rate": 8.672240750601152e-06, "loss": 100.5274, "step": 38820 }, { "epoch": 0.3212143773007404, "grad_norm": 897.4487915039062, "learning_rate": 8.67128343803093e-06, "loss": 99.9841, "step": 38830 }, { "epoch": 0.3212971005501096, "grad_norm": 1025.739013671875, "learning_rate": 8.670325833353532e-06, "loss": 89.5816, "step": 38840 }, { "epoch": 0.3213798237994788, "grad_norm": 677.2406005859375, "learning_rate": 8.669367936645152e-06, "loss": 105.6764, "step": 38850 }, { "epoch": 0.3214625470488481, "grad_norm": 912.070068359375, "learning_rate": 8.668409747982005e-06, "loss": 129.2276, "step": 38860 }, { "epoch": 0.32154527029821733, "grad_norm": 909.7315673828125, "learning_rate": 8.667451267440332e-06, "loss": 98.6507, "step": 38870 }, { "epoch": 0.32162799354758653, "grad_norm": 934.35400390625, "learning_rate": 8.666492495096391e-06, "loss": 121.8479, "step": 38880 }, { "epoch": 0.3217107167969558, "grad_norm": 1550.7830810546875, "learning_rate": 8.66553343102647e-06, "loss": 124.7869, "step": 38890 }, { "epoch": 0.32179344004632504, "grad_norm": 1913.998291015625, "learning_rate": 8.664574075306876e-06, "loss": 109.4713, "step": 38900 }, { "epoch": 0.32187616329569424, "grad_norm": 665.229736328125, "learning_rate": 8.66361442801394e-06, "loss": 94.5389, "step": 38910 }, { "epoch": 0.3219588865450635, "grad_norm": 762.2039794921875, "learning_rate": 8.662654489224018e-06, "loss": 94.5307, "step": 38920 }, { "epoch": 0.32204160979443275, "grad_norm": 1010.405029296875, "learning_rate": 8.661694259013489e-06, "loss": 143.722, "step": 38930 }, { "epoch": 0.32212433304380195, "grad_norm": 808.5379638671875, "learning_rate": 8.660733737458751e-06, "loss": 134.8724, "step": 38940 }, { "epoch": 0.3222070562931712, "grad_norm": 964.7113037109375, "learning_rate": 8.659772924636232e-06, "loss": 122.8288, "step": 38950 }, { "epoch": 0.32228977954254046, "grad_norm": 908.0009765625, "learning_rate": 8.658811820622376e-06, "loss": 90.802, "step": 38960 }, { "epoch": 0.32237250279190965, "grad_norm": 1564.2470703125, "learning_rate": 8.657850425493656e-06, "loss": 129.7668, "step": 38970 }, { "epoch": 0.3224552260412789, "grad_norm": 1255.0084228515625, "learning_rate": 8.656888739326564e-06, "loss": 96.6529, "step": 38980 }, { "epoch": 0.32253794929064816, "grad_norm": 960.5479125976562, "learning_rate": 8.65592676219762e-06, "loss": 103.2335, "step": 38990 }, { "epoch": 0.32262067254001736, "grad_norm": 815.9246826171875, "learning_rate": 8.65496449418336e-06, "loss": 94.21, "step": 39000 }, { "epoch": 0.3227033957893866, "grad_norm": 942.2803344726562, "learning_rate": 8.654001935360349e-06, "loss": 108.4447, "step": 39010 }, { "epoch": 0.32278611903875587, "grad_norm": 1212.7076416015625, "learning_rate": 8.653039085805174e-06, "loss": 97.4576, "step": 39020 }, { "epoch": 0.32286884228812507, "grad_norm": 1259.3065185546875, "learning_rate": 8.652075945594444e-06, "loss": 96.3901, "step": 39030 }, { "epoch": 0.3229515655374943, "grad_norm": 673.8052368164062, "learning_rate": 8.651112514804793e-06, "loss": 94.6694, "step": 39040 }, { "epoch": 0.3230342887868635, "grad_norm": 1086.0980224609375, "learning_rate": 8.650148793512874e-06, "loss": 161.4135, "step": 39050 }, { "epoch": 0.3231170120362328, "grad_norm": 1190.9241943359375, "learning_rate": 8.649184781795367e-06, "loss": 122.2091, "step": 39060 }, { "epoch": 0.32319973528560203, "grad_norm": 750.9871215820312, "learning_rate": 8.648220479728976e-06, "loss": 129.1647, "step": 39070 }, { "epoch": 0.32328245853497123, "grad_norm": 675.1224365234375, "learning_rate": 8.647255887390425e-06, "loss": 87.7561, "step": 39080 }, { "epoch": 0.3233651817843405, "grad_norm": 960.1796875, "learning_rate": 8.64629100485646e-06, "loss": 107.4958, "step": 39090 }, { "epoch": 0.32344790503370974, "grad_norm": 1386.735595703125, "learning_rate": 8.645325832203855e-06, "loss": 135.6421, "step": 39100 }, { "epoch": 0.32353062828307894, "grad_norm": 589.93896484375, "learning_rate": 8.644360369509403e-06, "loss": 102.7022, "step": 39110 }, { "epoch": 0.3236133515324482, "grad_norm": 1394.318603515625, "learning_rate": 8.64339461684992e-06, "loss": 109.0257, "step": 39120 }, { "epoch": 0.32369607478181744, "grad_norm": 1033.558349609375, "learning_rate": 8.64242857430225e-06, "loss": 120.0752, "step": 39130 }, { "epoch": 0.32377879803118664, "grad_norm": 877.220947265625, "learning_rate": 8.641462241943255e-06, "loss": 149.2554, "step": 39140 }, { "epoch": 0.3238615212805559, "grad_norm": 1799.2706298828125, "learning_rate": 8.640495619849821e-06, "loss": 106.8699, "step": 39150 }, { "epoch": 0.32394424452992515, "grad_norm": 354.8163146972656, "learning_rate": 8.639528708098858e-06, "loss": 138.8774, "step": 39160 }, { "epoch": 0.32402696777929435, "grad_norm": 734.3604125976562, "learning_rate": 8.6385615067673e-06, "loss": 113.2349, "step": 39170 }, { "epoch": 0.3241096910286636, "grad_norm": 1038.3038330078125, "learning_rate": 8.6375940159321e-06, "loss": 80.891, "step": 39180 }, { "epoch": 0.32419241427803286, "grad_norm": 1265.29638671875, "learning_rate": 8.63662623567024e-06, "loss": 100.5664, "step": 39190 }, { "epoch": 0.32427513752740206, "grad_norm": 975.9793701171875, "learning_rate": 8.63565816605872e-06, "loss": 98.3975, "step": 39200 }, { "epoch": 0.3243578607767713, "grad_norm": 1260.0736083984375, "learning_rate": 8.634689807174564e-06, "loss": 122.2016, "step": 39210 }, { "epoch": 0.32444058402614057, "grad_norm": 745.4339599609375, "learning_rate": 8.633721159094823e-06, "loss": 118.936, "step": 39220 }, { "epoch": 0.32452330727550976, "grad_norm": 803.7348022460938, "learning_rate": 8.632752221896562e-06, "loss": 76.6836, "step": 39230 }, { "epoch": 0.324606030524879, "grad_norm": 901.3794555664062, "learning_rate": 8.631782995656884e-06, "loss": 114.3698, "step": 39240 }, { "epoch": 0.3246887537742483, "grad_norm": 2110.507080078125, "learning_rate": 8.630813480452898e-06, "loss": 109.549, "step": 39250 }, { "epoch": 0.32477147702361747, "grad_norm": 868.3207397460938, "learning_rate": 8.629843676361747e-06, "loss": 147.7418, "step": 39260 }, { "epoch": 0.3248542002729867, "grad_norm": 776.3412475585938, "learning_rate": 8.628873583460593e-06, "loss": 126.7695, "step": 39270 }, { "epoch": 0.324936923522356, "grad_norm": 1359.6387939453125, "learning_rate": 8.627903201826622e-06, "loss": 120.8187, "step": 39280 }, { "epoch": 0.3250196467717252, "grad_norm": 1282.950439453125, "learning_rate": 8.626932531537042e-06, "loss": 123.0786, "step": 39290 }, { "epoch": 0.32510237002109443, "grad_norm": 530.9898071289062, "learning_rate": 8.625961572669087e-06, "loss": 120.1885, "step": 39300 }, { "epoch": 0.3251850932704637, "grad_norm": 997.0206298828125, "learning_rate": 8.62499032530001e-06, "loss": 120.9154, "step": 39310 }, { "epoch": 0.3252678165198329, "grad_norm": 676.1962890625, "learning_rate": 8.624018789507091e-06, "loss": 97.3104, "step": 39320 }, { "epoch": 0.32535053976920214, "grad_norm": 692.87255859375, "learning_rate": 8.62304696536763e-06, "loss": 118.6817, "step": 39330 }, { "epoch": 0.3254332630185714, "grad_norm": 980.8681030273438, "learning_rate": 8.622074852958946e-06, "loss": 112.1015, "step": 39340 }, { "epoch": 0.3255159862679406, "grad_norm": 1021.6939697265625, "learning_rate": 8.621102452358393e-06, "loss": 190.7402, "step": 39350 }, { "epoch": 0.32559870951730985, "grad_norm": 1223.068115234375, "learning_rate": 8.620129763643333e-06, "loss": 128.2917, "step": 39360 }, { "epoch": 0.3256814327666791, "grad_norm": 1219.437744140625, "learning_rate": 8.619156786891162e-06, "loss": 136.7339, "step": 39370 }, { "epoch": 0.3257641560160483, "grad_norm": 1412.034912109375, "learning_rate": 8.618183522179295e-06, "loss": 131.6702, "step": 39380 }, { "epoch": 0.32584687926541755, "grad_norm": 1203.2330322265625, "learning_rate": 8.617209969585171e-06, "loss": 88.7958, "step": 39390 }, { "epoch": 0.32592960251478675, "grad_norm": 809.6597290039062, "learning_rate": 8.616236129186252e-06, "loss": 102.6644, "step": 39400 }, { "epoch": 0.326012325764156, "grad_norm": 3100.604736328125, "learning_rate": 8.615262001060019e-06, "loss": 176.8819, "step": 39410 }, { "epoch": 0.32609504901352526, "grad_norm": 1474.5286865234375, "learning_rate": 8.61428758528398e-06, "loss": 123.6517, "step": 39420 }, { "epoch": 0.32617777226289446, "grad_norm": 893.0943603515625, "learning_rate": 8.613312881935667e-06, "loss": 118.6461, "step": 39430 }, { "epoch": 0.3262604955122637, "grad_norm": 1108.7327880859375, "learning_rate": 8.61233789109263e-06, "loss": 129.676, "step": 39440 }, { "epoch": 0.32634321876163297, "grad_norm": 799.1575317382812, "learning_rate": 8.611362612832445e-06, "loss": 109.5865, "step": 39450 }, { "epoch": 0.32642594201100217, "grad_norm": 1402.5484619140625, "learning_rate": 8.610387047232711e-06, "loss": 103.1031, "step": 39460 }, { "epoch": 0.3265086652603714, "grad_norm": 1590.6834716796875, "learning_rate": 8.609411194371049e-06, "loss": 114.6393, "step": 39470 }, { "epoch": 0.3265913885097407, "grad_norm": 1389.5260009765625, "learning_rate": 8.608435054325103e-06, "loss": 100.0405, "step": 39480 }, { "epoch": 0.3266741117591099, "grad_norm": 1510.9293212890625, "learning_rate": 8.60745862717254e-06, "loss": 78.9952, "step": 39490 }, { "epoch": 0.32675683500847913, "grad_norm": 2515.405029296875, "learning_rate": 8.606481912991052e-06, "loss": 125.8343, "step": 39500 }, { "epoch": 0.3268395582578484, "grad_norm": 1044.6246337890625, "learning_rate": 8.605504911858347e-06, "loss": 95.9947, "step": 39510 }, { "epoch": 0.3269222815072176, "grad_norm": 742.9393920898438, "learning_rate": 8.604527623852165e-06, "loss": 129.3403, "step": 39520 }, { "epoch": 0.32700500475658684, "grad_norm": 843.3123779296875, "learning_rate": 8.603550049050262e-06, "loss": 124.6452, "step": 39530 }, { "epoch": 0.3270877280059561, "grad_norm": 629.9082641601562, "learning_rate": 8.602572187530421e-06, "loss": 103.7542, "step": 39540 }, { "epoch": 0.3271704512553253, "grad_norm": 1079.95556640625, "learning_rate": 8.601594039370441e-06, "loss": 117.5058, "step": 39550 }, { "epoch": 0.32725317450469454, "grad_norm": 785.5013427734375, "learning_rate": 8.600615604648155e-06, "loss": 67.8067, "step": 39560 }, { "epoch": 0.3273358977540638, "grad_norm": 831.0355224609375, "learning_rate": 8.599636883441408e-06, "loss": 123.9131, "step": 39570 }, { "epoch": 0.327418621003433, "grad_norm": 2647.583251953125, "learning_rate": 8.598657875828078e-06, "loss": 107.162, "step": 39580 }, { "epoch": 0.32750134425280225, "grad_norm": 2026.9219970703125, "learning_rate": 8.597678581886055e-06, "loss": 130.6936, "step": 39590 }, { "epoch": 0.3275840675021715, "grad_norm": 991.1618041992188, "learning_rate": 8.596699001693257e-06, "loss": 107.1374, "step": 39600 }, { "epoch": 0.3276667907515407, "grad_norm": 1521.5604248046875, "learning_rate": 8.595719135327627e-06, "loss": 98.5977, "step": 39610 }, { "epoch": 0.32774951400090996, "grad_norm": 613.502685546875, "learning_rate": 8.594738982867126e-06, "loss": 100.9653, "step": 39620 }, { "epoch": 0.3278322372502792, "grad_norm": 1424.407470703125, "learning_rate": 8.593758544389743e-06, "loss": 119.1369, "step": 39630 }, { "epoch": 0.3279149604996484, "grad_norm": 519.1179809570312, "learning_rate": 8.592777819973486e-06, "loss": 121.2218, "step": 39640 }, { "epoch": 0.32799768374901767, "grad_norm": 1449.661865234375, "learning_rate": 8.591796809696386e-06, "loss": 114.1455, "step": 39650 }, { "epoch": 0.3280804069983869, "grad_norm": 986.8948364257812, "learning_rate": 8.590815513636498e-06, "loss": 111.6402, "step": 39660 }, { "epoch": 0.3281631302477561, "grad_norm": 839.0859375, "learning_rate": 8.5898339318719e-06, "loss": 85.4794, "step": 39670 }, { "epoch": 0.3282458534971254, "grad_norm": 1216.8238525390625, "learning_rate": 8.58885206448069e-06, "loss": 126.5229, "step": 39680 }, { "epoch": 0.3283285767464946, "grad_norm": 1210.4658203125, "learning_rate": 8.587869911540993e-06, "loss": 131.2425, "step": 39690 }, { "epoch": 0.3284112999958638, "grad_norm": 638.7323608398438, "learning_rate": 8.586887473130951e-06, "loss": 117.0074, "step": 39700 }, { "epoch": 0.3284940232452331, "grad_norm": 1674.9326171875, "learning_rate": 8.585904749328736e-06, "loss": 101.3178, "step": 39710 }, { "epoch": 0.32857674649460233, "grad_norm": 730.9718627929688, "learning_rate": 8.584921740212537e-06, "loss": 79.7682, "step": 39720 }, { "epoch": 0.32865946974397153, "grad_norm": 849.2908325195312, "learning_rate": 8.583938445860569e-06, "loss": 134.0528, "step": 39730 }, { "epoch": 0.3287421929933408, "grad_norm": 1976.713134765625, "learning_rate": 8.582954866351065e-06, "loss": 109.2086, "step": 39740 }, { "epoch": 0.32882491624271004, "grad_norm": 1140.69140625, "learning_rate": 8.581971001762287e-06, "loss": 115.7576, "step": 39750 }, { "epoch": 0.32890763949207924, "grad_norm": 1253.4771728515625, "learning_rate": 8.580986852172514e-06, "loss": 99.3701, "step": 39760 }, { "epoch": 0.3289903627414485, "grad_norm": 1577.8370361328125, "learning_rate": 8.580002417660054e-06, "loss": 137.5488, "step": 39770 }, { "epoch": 0.3290730859908177, "grad_norm": 759.9320068359375, "learning_rate": 8.579017698303228e-06, "loss": 110.6118, "step": 39780 }, { "epoch": 0.32915580924018695, "grad_norm": 807.0444946289062, "learning_rate": 8.578032694180394e-06, "loss": 108.5404, "step": 39790 }, { "epoch": 0.3292385324895562, "grad_norm": 901.5609741210938, "learning_rate": 8.577047405369916e-06, "loss": 92.3528, "step": 39800 }, { "epoch": 0.3293212557389254, "grad_norm": 815.9768676757812, "learning_rate": 8.576061831950193e-06, "loss": 116.8808, "step": 39810 }, { "epoch": 0.32940397898829465, "grad_norm": 676.6227416992188, "learning_rate": 8.575075973999642e-06, "loss": 104.0332, "step": 39820 }, { "epoch": 0.3294867022376639, "grad_norm": 655.098876953125, "learning_rate": 8.574089831596703e-06, "loss": 114.4098, "step": 39830 }, { "epoch": 0.3295694254870331, "grad_norm": 1169.68359375, "learning_rate": 8.57310340481984e-06, "loss": 88.758, "step": 39840 }, { "epoch": 0.32965214873640236, "grad_norm": 635.2750244140625, "learning_rate": 8.572116693747537e-06, "loss": 98.1875, "step": 39850 }, { "epoch": 0.3297348719857716, "grad_norm": 797.3588256835938, "learning_rate": 8.571129698458302e-06, "loss": 101.033, "step": 39860 }, { "epoch": 0.3298175952351408, "grad_norm": 1276.1683349609375, "learning_rate": 8.570142419030668e-06, "loss": 111.7359, "step": 39870 }, { "epoch": 0.32990031848451007, "grad_norm": 700.5169677734375, "learning_rate": 8.569154855543184e-06, "loss": 101.829, "step": 39880 }, { "epoch": 0.3299830417338793, "grad_norm": 1665.83984375, "learning_rate": 8.56816700807443e-06, "loss": 120.2223, "step": 39890 }, { "epoch": 0.3300657649832485, "grad_norm": 619.46435546875, "learning_rate": 8.567178876703002e-06, "loss": 101.4117, "step": 39900 }, { "epoch": 0.3301484882326178, "grad_norm": 1413.1790771484375, "learning_rate": 8.566190461507521e-06, "loss": 108.3938, "step": 39910 }, { "epoch": 0.33023121148198703, "grad_norm": 917.2999877929688, "learning_rate": 8.565201762566632e-06, "loss": 80.6623, "step": 39920 }, { "epoch": 0.33031393473135623, "grad_norm": 787.8756713867188, "learning_rate": 8.564212779959003e-06, "loss": 130.3724, "step": 39930 }, { "epoch": 0.3303966579807255, "grad_norm": 649.1900634765625, "learning_rate": 8.563223513763319e-06, "loss": 107.5673, "step": 39940 }, { "epoch": 0.33047938123009474, "grad_norm": 656.0354614257812, "learning_rate": 8.562233964058294e-06, "loss": 138.9998, "step": 39950 }, { "epoch": 0.33056210447946394, "grad_norm": 1630.0479736328125, "learning_rate": 8.561244130922658e-06, "loss": 79.6873, "step": 39960 }, { "epoch": 0.3306448277288332, "grad_norm": 908.9981689453125, "learning_rate": 8.560254014435172e-06, "loss": 124.0382, "step": 39970 }, { "epoch": 0.33072755097820244, "grad_norm": 1071.929931640625, "learning_rate": 8.559263614674615e-06, "loss": 102.3747, "step": 39980 }, { "epoch": 0.33081027422757164, "grad_norm": 827.8428344726562, "learning_rate": 8.558272931719785e-06, "loss": 100.2324, "step": 39990 }, { "epoch": 0.3308929974769409, "grad_norm": 2593.23779296875, "learning_rate": 8.557281965649508e-06, "loss": 107.4415, "step": 40000 }, { "epoch": 0.33097572072631015, "grad_norm": 1010.6577758789062, "learning_rate": 8.556290716542632e-06, "loss": 84.2611, "step": 40010 }, { "epoch": 0.33105844397567935, "grad_norm": 787.0457153320312, "learning_rate": 8.555299184478026e-06, "loss": 129.8781, "step": 40020 }, { "epoch": 0.3311411672250486, "grad_norm": 953.2777099609375, "learning_rate": 8.554307369534577e-06, "loss": 103.5916, "step": 40030 }, { "epoch": 0.33122389047441786, "grad_norm": 1413.6817626953125, "learning_rate": 8.553315271791207e-06, "loss": 92.5186, "step": 40040 }, { "epoch": 0.33130661372378706, "grad_norm": 1188.9342041015625, "learning_rate": 8.552322891326846e-06, "loss": 98.2379, "step": 40050 }, { "epoch": 0.3313893369731563, "grad_norm": 1236.0517578125, "learning_rate": 8.551330228220454e-06, "loss": 107.0516, "step": 40060 }, { "epoch": 0.33147206022252557, "grad_norm": 919.5791015625, "learning_rate": 8.550337282551016e-06, "loss": 101.3186, "step": 40070 }, { "epoch": 0.33155478347189477, "grad_norm": 1503.684326171875, "learning_rate": 8.549344054397533e-06, "loss": 108.6517, "step": 40080 }, { "epoch": 0.331637506721264, "grad_norm": 419.35028076171875, "learning_rate": 8.548350543839034e-06, "loss": 85.9801, "step": 40090 }, { "epoch": 0.3317202299706333, "grad_norm": 1511.6007080078125, "learning_rate": 8.547356750954568e-06, "loss": 109.9655, "step": 40100 }, { "epoch": 0.3318029532200025, "grad_norm": 811.8706665039062, "learning_rate": 8.546362675823204e-06, "loss": 186.2005, "step": 40110 }, { "epoch": 0.3318856764693717, "grad_norm": 910.4181518554688, "learning_rate": 8.545368318524036e-06, "loss": 113.1569, "step": 40120 }, { "epoch": 0.3319683997187409, "grad_norm": 841.266357421875, "learning_rate": 8.544373679136184e-06, "loss": 102.7385, "step": 40130 }, { "epoch": 0.3320511229681102, "grad_norm": 1087.2705078125, "learning_rate": 8.543378757738785e-06, "loss": 83.0132, "step": 40140 }, { "epoch": 0.33213384621747943, "grad_norm": 844.1511840820312, "learning_rate": 8.542383554411e-06, "loss": 97.4727, "step": 40150 }, { "epoch": 0.33221656946684863, "grad_norm": 1138.1463623046875, "learning_rate": 8.541388069232012e-06, "loss": 96.1207, "step": 40160 }, { "epoch": 0.3322992927162179, "grad_norm": 1386.313232421875, "learning_rate": 8.54039230228103e-06, "loss": 121.9133, "step": 40170 }, { "epoch": 0.33238201596558714, "grad_norm": 1644.196044921875, "learning_rate": 8.53939625363728e-06, "loss": 110.2846, "step": 40180 }, { "epoch": 0.33246473921495634, "grad_norm": 663.9628295898438, "learning_rate": 8.538399923380011e-06, "loss": 147.1378, "step": 40190 }, { "epoch": 0.3325474624643256, "grad_norm": 921.0543212890625, "learning_rate": 8.537403311588502e-06, "loss": 94.7127, "step": 40200 }, { "epoch": 0.33263018571369485, "grad_norm": 1512.567626953125, "learning_rate": 8.536406418342044e-06, "loss": 87.8837, "step": 40210 }, { "epoch": 0.33271290896306405, "grad_norm": 1098.78369140625, "learning_rate": 8.53540924371996e-06, "loss": 108.6445, "step": 40220 }, { "epoch": 0.3327956322124333, "grad_norm": 1846.2921142578125, "learning_rate": 8.534411787801586e-06, "loss": 95.6519, "step": 40230 }, { "epoch": 0.33287835546180256, "grad_norm": 1103.0107421875, "learning_rate": 8.533414050666287e-06, "loss": 109.1561, "step": 40240 }, { "epoch": 0.33296107871117175, "grad_norm": 903.0642700195312, "learning_rate": 8.532416032393447e-06, "loss": 99.2833, "step": 40250 }, { "epoch": 0.333043801960541, "grad_norm": 1340.9583740234375, "learning_rate": 8.531417733062476e-06, "loss": 116.9413, "step": 40260 }, { "epoch": 0.33312652520991026, "grad_norm": 1119.4525146484375, "learning_rate": 8.530419152752804e-06, "loss": 124.4811, "step": 40270 }, { "epoch": 0.33320924845927946, "grad_norm": 1566.3560791015625, "learning_rate": 8.529420291543882e-06, "loss": 127.6215, "step": 40280 }, { "epoch": 0.3332919717086487, "grad_norm": 1316.3895263671875, "learning_rate": 8.528421149515185e-06, "loss": 107.4906, "step": 40290 }, { "epoch": 0.33337469495801797, "grad_norm": 825.5098266601562, "learning_rate": 8.52742172674621e-06, "loss": 129.04, "step": 40300 }, { "epoch": 0.33345741820738717, "grad_norm": 1096.2099609375, "learning_rate": 8.526422023316478e-06, "loss": 91.2496, "step": 40310 }, { "epoch": 0.3335401414567564, "grad_norm": 781.985107421875, "learning_rate": 8.525422039305529e-06, "loss": 124.4936, "step": 40320 }, { "epoch": 0.3336228647061257, "grad_norm": 1158.1458740234375, "learning_rate": 8.524421774792926e-06, "loss": 84.0795, "step": 40330 }, { "epoch": 0.3337055879554949, "grad_norm": 1207.7098388671875, "learning_rate": 8.52342122985826e-06, "loss": 91.0112, "step": 40340 }, { "epoch": 0.33378831120486413, "grad_norm": 847.1415405273438, "learning_rate": 8.522420404581135e-06, "loss": 101.6437, "step": 40350 }, { "epoch": 0.3338710344542334, "grad_norm": 995.0086669921875, "learning_rate": 8.521419299041185e-06, "loss": 93.0817, "step": 40360 }, { "epoch": 0.3339537577036026, "grad_norm": 892.8756103515625, "learning_rate": 8.520417913318065e-06, "loss": 110.2036, "step": 40370 }, { "epoch": 0.33403648095297184, "grad_norm": 1183.2650146484375, "learning_rate": 8.519416247491445e-06, "loss": 126.0844, "step": 40380 }, { "epoch": 0.3341192042023411, "grad_norm": 605.9612426757812, "learning_rate": 8.518414301641027e-06, "loss": 69.7784, "step": 40390 }, { "epoch": 0.3342019274517103, "grad_norm": 1190.9295654296875, "learning_rate": 8.517412075846529e-06, "loss": 138.7514, "step": 40400 }, { "epoch": 0.33428465070107954, "grad_norm": 792.2731323242188, "learning_rate": 8.516409570187698e-06, "loss": 98.212, "step": 40410 }, { "epoch": 0.3343673739504488, "grad_norm": 820.3512573242188, "learning_rate": 8.515406784744294e-06, "loss": 87.1192, "step": 40420 }, { "epoch": 0.334450097199818, "grad_norm": 1030.4779052734375, "learning_rate": 8.514403719596104e-06, "loss": 112.4568, "step": 40430 }, { "epoch": 0.33453282044918725, "grad_norm": 649.7733764648438, "learning_rate": 8.513400374822942e-06, "loss": 129.4392, "step": 40440 }, { "epoch": 0.3346155436985565, "grad_norm": 1169.7542724609375, "learning_rate": 8.512396750504635e-06, "loss": 96.2116, "step": 40450 }, { "epoch": 0.3346982669479257, "grad_norm": 1340.5599365234375, "learning_rate": 8.511392846721037e-06, "loss": 130.6511, "step": 40460 }, { "epoch": 0.33478099019729496, "grad_norm": 1084.3248291015625, "learning_rate": 8.510388663552027e-06, "loss": 96.2522, "step": 40470 }, { "epoch": 0.33486371344666416, "grad_norm": 940.4993896484375, "learning_rate": 8.509384201077502e-06, "loss": 182.6661, "step": 40480 }, { "epoch": 0.3349464366960334, "grad_norm": 473.4619445800781, "learning_rate": 8.508379459377381e-06, "loss": 98.2326, "step": 40490 }, { "epoch": 0.33502915994540267, "grad_norm": 915.9678344726562, "learning_rate": 8.507374438531606e-06, "loss": 90.029, "step": 40500 }, { "epoch": 0.33511188319477186, "grad_norm": 644.4666748046875, "learning_rate": 8.506369138620148e-06, "loss": 160.186, "step": 40510 }, { "epoch": 0.3351946064441411, "grad_norm": 771.171875, "learning_rate": 8.505363559722985e-06, "loss": 96.5032, "step": 40520 }, { "epoch": 0.3352773296935104, "grad_norm": 575.1885375976562, "learning_rate": 8.504357701920134e-06, "loss": 78.7146, "step": 40530 }, { "epoch": 0.33536005294287957, "grad_norm": 728.8525390625, "learning_rate": 8.503351565291622e-06, "loss": 130.1776, "step": 40540 }, { "epoch": 0.3354427761922488, "grad_norm": 1049.326416015625, "learning_rate": 8.502345149917506e-06, "loss": 91.4142, "step": 40550 }, { "epoch": 0.3355254994416181, "grad_norm": 1567.1068115234375, "learning_rate": 8.501338455877859e-06, "loss": 128.1109, "step": 40560 }, { "epoch": 0.3356082226909873, "grad_norm": 639.6390380859375, "learning_rate": 8.50033148325278e-06, "loss": 89.2162, "step": 40570 }, { "epoch": 0.33569094594035653, "grad_norm": 1309.4464111328125, "learning_rate": 8.499324232122389e-06, "loss": 119.5868, "step": 40580 }, { "epoch": 0.3357736691897258, "grad_norm": 961.0704345703125, "learning_rate": 8.498316702566828e-06, "loss": 108.5671, "step": 40590 }, { "epoch": 0.335856392439095, "grad_norm": 1294.3653564453125, "learning_rate": 8.497308894666263e-06, "loss": 114.0025, "step": 40600 }, { "epoch": 0.33593911568846424, "grad_norm": 1210.564697265625, "learning_rate": 8.496300808500878e-06, "loss": 122.3642, "step": 40610 }, { "epoch": 0.3360218389378335, "grad_norm": 1512.3568115234375, "learning_rate": 8.495292444150887e-06, "loss": 146.3031, "step": 40620 }, { "epoch": 0.3361045621872027, "grad_norm": 956.16162109375, "learning_rate": 8.494283801696514e-06, "loss": 140.3855, "step": 40630 }, { "epoch": 0.33618728543657195, "grad_norm": 862.5502319335938, "learning_rate": 8.493274881218017e-06, "loss": 92.3681, "step": 40640 }, { "epoch": 0.3362700086859412, "grad_norm": 715.0096435546875, "learning_rate": 8.49226568279567e-06, "loss": 93.8356, "step": 40650 }, { "epoch": 0.3363527319353104, "grad_norm": 858.4218139648438, "learning_rate": 8.49125620650977e-06, "loss": 89.4689, "step": 40660 }, { "epoch": 0.33643545518467965, "grad_norm": 797.36328125, "learning_rate": 8.490246452440636e-06, "loss": 131.3191, "step": 40670 }, { "epoch": 0.3365181784340489, "grad_norm": 887.1189575195312, "learning_rate": 8.48923642066861e-06, "loss": 143.5478, "step": 40680 }, { "epoch": 0.3366009016834181, "grad_norm": 688.75732421875, "learning_rate": 8.488226111274055e-06, "loss": 129.7013, "step": 40690 }, { "epoch": 0.33668362493278736, "grad_norm": 770.3273315429688, "learning_rate": 8.487215524337357e-06, "loss": 77.0376, "step": 40700 }, { "epoch": 0.3367663481821566, "grad_norm": 960.6439819335938, "learning_rate": 8.486204659938924e-06, "loss": 127.0703, "step": 40710 }, { "epoch": 0.3368490714315258, "grad_norm": 764.961669921875, "learning_rate": 8.485193518159186e-06, "loss": 111.9176, "step": 40720 }, { "epoch": 0.33693179468089507, "grad_norm": 900.38134765625, "learning_rate": 8.484182099078596e-06, "loss": 112.1536, "step": 40730 }, { "epoch": 0.3370145179302643, "grad_norm": 960.113525390625, "learning_rate": 8.483170402777624e-06, "loss": 127.3206, "step": 40740 }, { "epoch": 0.3370972411796335, "grad_norm": 1646.762939453125, "learning_rate": 8.482158429336769e-06, "loss": 143.5467, "step": 40750 }, { "epoch": 0.3371799644290028, "grad_norm": 985.1450805664062, "learning_rate": 8.48114617883655e-06, "loss": 114.6298, "step": 40760 }, { "epoch": 0.33726268767837203, "grad_norm": 759.6780395507812, "learning_rate": 8.480133651357507e-06, "loss": 116.8154, "step": 40770 }, { "epoch": 0.33734541092774123, "grad_norm": 885.9656982421875, "learning_rate": 8.479120846980197e-06, "loss": 108.7685, "step": 40780 }, { "epoch": 0.3374281341771105, "grad_norm": 575.7235717773438, "learning_rate": 8.478107765785212e-06, "loss": 88.1911, "step": 40790 }, { "epoch": 0.33751085742647974, "grad_norm": 666.4598388671875, "learning_rate": 8.477094407853153e-06, "loss": 69.0146, "step": 40800 }, { "epoch": 0.33759358067584894, "grad_norm": 645.81201171875, "learning_rate": 8.47608077326465e-06, "loss": 94.4932, "step": 40810 }, { "epoch": 0.3376763039252182, "grad_norm": 994.3779907226562, "learning_rate": 8.475066862100352e-06, "loss": 88.9415, "step": 40820 }, { "epoch": 0.33775902717458745, "grad_norm": 759.6505737304688, "learning_rate": 8.474052674440934e-06, "loss": 117.1598, "step": 40830 }, { "epoch": 0.33784175042395664, "grad_norm": 869.6111450195312, "learning_rate": 8.473038210367086e-06, "loss": 78.2449, "step": 40840 }, { "epoch": 0.3379244736733259, "grad_norm": 947.123046875, "learning_rate": 8.47202346995953e-06, "loss": 93.2359, "step": 40850 }, { "epoch": 0.3380071969226951, "grad_norm": 1361.4940185546875, "learning_rate": 8.471008453298998e-06, "loss": 134.8301, "step": 40860 }, { "epoch": 0.33808992017206435, "grad_norm": 729.060546875, "learning_rate": 8.469993160466254e-06, "loss": 94.2659, "step": 40870 }, { "epoch": 0.3381726434214336, "grad_norm": 643.9381713867188, "learning_rate": 8.46897759154208e-06, "loss": 88.6729, "step": 40880 }, { "epoch": 0.3382553666708028, "grad_norm": 927.8340454101562, "learning_rate": 8.467961746607279e-06, "loss": 127.7991, "step": 40890 }, { "epoch": 0.33833808992017206, "grad_norm": 604.8875122070312, "learning_rate": 8.466945625742678e-06, "loss": 76.509, "step": 40900 }, { "epoch": 0.3384208131695413, "grad_norm": 672.4412841796875, "learning_rate": 8.465929229029124e-06, "loss": 165.3088, "step": 40910 }, { "epoch": 0.3385035364189105, "grad_norm": 649.953369140625, "learning_rate": 8.464912556547486e-06, "loss": 112.9105, "step": 40920 }, { "epoch": 0.33858625966827977, "grad_norm": 1930.8785400390625, "learning_rate": 8.46389560837866e-06, "loss": 108.5322, "step": 40930 }, { "epoch": 0.338668982917649, "grad_norm": 622.740966796875, "learning_rate": 8.462878384603558e-06, "loss": 117.5824, "step": 40940 }, { "epoch": 0.3387517061670182, "grad_norm": 474.0765686035156, "learning_rate": 8.461860885303116e-06, "loss": 99.5456, "step": 40950 }, { "epoch": 0.3388344294163875, "grad_norm": 952.2894287109375, "learning_rate": 8.460843110558287e-06, "loss": 124.9169, "step": 40960 }, { "epoch": 0.3389171526657567, "grad_norm": 654.9668579101562, "learning_rate": 8.459825060450058e-06, "loss": 90.4174, "step": 40970 }, { "epoch": 0.3389998759151259, "grad_norm": 1069.80029296875, "learning_rate": 8.458806735059428e-06, "loss": 134.0334, "step": 40980 }, { "epoch": 0.3390825991644952, "grad_norm": 657.5958862304688, "learning_rate": 8.45778813446742e-06, "loss": 97.927, "step": 40990 }, { "epoch": 0.33916532241386443, "grad_norm": 1148.0079345703125, "learning_rate": 8.456769258755078e-06, "loss": 111.504, "step": 41000 }, { "epoch": 0.33924804566323363, "grad_norm": 1073.7718505859375, "learning_rate": 8.455750108003468e-06, "loss": 78.7796, "step": 41010 }, { "epoch": 0.3393307689126029, "grad_norm": 530.935302734375, "learning_rate": 8.454730682293686e-06, "loss": 76.4729, "step": 41020 }, { "epoch": 0.33941349216197214, "grad_norm": 584.3358764648438, "learning_rate": 8.453710981706838e-06, "loss": 100.3047, "step": 41030 }, { "epoch": 0.33949621541134134, "grad_norm": 1019.848388671875, "learning_rate": 8.452691006324055e-06, "loss": 101.324, "step": 41040 }, { "epoch": 0.3395789386607106, "grad_norm": 991.2681274414062, "learning_rate": 8.451670756226496e-06, "loss": 75.0817, "step": 41050 }, { "epoch": 0.33966166191007985, "grad_norm": 730.0400390625, "learning_rate": 8.450650231495336e-06, "loss": 85.305, "step": 41060 }, { "epoch": 0.33974438515944905, "grad_norm": 929.1596069335938, "learning_rate": 8.449629432211774e-06, "loss": 92.8536, "step": 41070 }, { "epoch": 0.3398271084088183, "grad_norm": 1377.75146484375, "learning_rate": 8.44860835845703e-06, "loss": 103.3764, "step": 41080 }, { "epoch": 0.33990983165818756, "grad_norm": 522.6357421875, "learning_rate": 8.447587010312343e-06, "loss": 107.57, "step": 41090 }, { "epoch": 0.33999255490755675, "grad_norm": 2292.6142578125, "learning_rate": 8.44656538785898e-06, "loss": 135.6061, "step": 41100 }, { "epoch": 0.340075278156926, "grad_norm": 419.8533630371094, "learning_rate": 8.44554349117823e-06, "loss": 80.6014, "step": 41110 }, { "epoch": 0.34015800140629526, "grad_norm": 1163.7928466796875, "learning_rate": 8.444521320351397e-06, "loss": 110.4075, "step": 41120 }, { "epoch": 0.34024072465566446, "grad_norm": 942.8880615234375, "learning_rate": 8.44349887545981e-06, "loss": 118.3985, "step": 41130 }, { "epoch": 0.3403234479050337, "grad_norm": 509.7681884765625, "learning_rate": 8.442476156584818e-06, "loss": 133.2833, "step": 41140 }, { "epoch": 0.34040617115440297, "grad_norm": 1343.72705078125, "learning_rate": 8.4414531638078e-06, "loss": 134.3201, "step": 41150 }, { "epoch": 0.34048889440377217, "grad_norm": 1334.9111328125, "learning_rate": 8.440429897210148e-06, "loss": 94.3114, "step": 41160 }, { "epoch": 0.3405716176531414, "grad_norm": 918.8824462890625, "learning_rate": 8.439406356873279e-06, "loss": 105.6756, "step": 41170 }, { "epoch": 0.3406543409025107, "grad_norm": 435.05010986328125, "learning_rate": 8.43838254287863e-06, "loss": 86.3829, "step": 41180 }, { "epoch": 0.3407370641518799, "grad_norm": 934.824462890625, "learning_rate": 8.43735845530766e-06, "loss": 122.4491, "step": 41190 }, { "epoch": 0.34081978740124913, "grad_norm": 1095.854248046875, "learning_rate": 8.436334094241855e-06, "loss": 110.3371, "step": 41200 }, { "epoch": 0.34090251065061833, "grad_norm": 1048.58154296875, "learning_rate": 8.435309459762718e-06, "loss": 135.8438, "step": 41210 }, { "epoch": 0.3409852338999876, "grad_norm": 722.0227661132812, "learning_rate": 8.434284551951772e-06, "loss": 86.2307, "step": 41220 }, { "epoch": 0.34106795714935684, "grad_norm": 835.3677368164062, "learning_rate": 8.433259370890565e-06, "loss": 79.5151, "step": 41230 }, { "epoch": 0.34115068039872604, "grad_norm": 1341.2940673828125, "learning_rate": 8.432233916660669e-06, "loss": 102.6455, "step": 41240 }, { "epoch": 0.3412334036480953, "grad_norm": 972.4404907226562, "learning_rate": 8.43120818934367e-06, "loss": 105.9913, "step": 41250 }, { "epoch": 0.34131612689746454, "grad_norm": 752.3978881835938, "learning_rate": 8.43018218902118e-06, "loss": 97.6527, "step": 41260 }, { "epoch": 0.34139885014683374, "grad_norm": 744.5557250976562, "learning_rate": 8.429155915774839e-06, "loss": 98.6538, "step": 41270 }, { "epoch": 0.341481573396203, "grad_norm": 1084.3350830078125, "learning_rate": 8.428129369686299e-06, "loss": 96.4803, "step": 41280 }, { "epoch": 0.34156429664557225, "grad_norm": 887.330078125, "learning_rate": 8.427102550837238e-06, "loss": 111.9868, "step": 41290 }, { "epoch": 0.34164701989494145, "grad_norm": 869.30908203125, "learning_rate": 8.426075459309356e-06, "loss": 120.3469, "step": 41300 }, { "epoch": 0.3417297431443107, "grad_norm": 2174.565673828125, "learning_rate": 8.42504809518437e-06, "loss": 115.0281, "step": 41310 }, { "epoch": 0.34181246639367996, "grad_norm": 939.2781372070312, "learning_rate": 8.42402045854403e-06, "loss": 118.3146, "step": 41320 }, { "epoch": 0.34189518964304916, "grad_norm": 962.0004272460938, "learning_rate": 8.422992549470094e-06, "loss": 111.6336, "step": 41330 }, { "epoch": 0.3419779128924184, "grad_norm": 856.4796142578125, "learning_rate": 8.42196436804435e-06, "loss": 89.8677, "step": 41340 }, { "epoch": 0.34206063614178767, "grad_norm": 882.47509765625, "learning_rate": 8.420935914348607e-06, "loss": 109.2613, "step": 41350 }, { "epoch": 0.34214335939115686, "grad_norm": 1152.5184326171875, "learning_rate": 8.419907188464691e-06, "loss": 83.5429, "step": 41360 }, { "epoch": 0.3422260826405261, "grad_norm": 1086.84521484375, "learning_rate": 8.418878190474459e-06, "loss": 107.8546, "step": 41370 }, { "epoch": 0.3423088058898954, "grad_norm": 1217.004150390625, "learning_rate": 8.417848920459778e-06, "loss": 137.2482, "step": 41380 }, { "epoch": 0.34239152913926457, "grad_norm": 706.2305297851562, "learning_rate": 8.416819378502543e-06, "loss": 74.3434, "step": 41390 }, { "epoch": 0.3424742523886338, "grad_norm": 1614.1566162109375, "learning_rate": 8.415789564684673e-06, "loss": 142.1887, "step": 41400 }, { "epoch": 0.3425569756380031, "grad_norm": 875.3898315429688, "learning_rate": 8.414759479088102e-06, "loss": 97.1488, "step": 41410 }, { "epoch": 0.3426396988873723, "grad_norm": 1026.1566162109375, "learning_rate": 8.413729121794794e-06, "loss": 130.0628, "step": 41420 }, { "epoch": 0.34272242213674153, "grad_norm": 977.263427734375, "learning_rate": 8.412698492886723e-06, "loss": 78.8849, "step": 41430 }, { "epoch": 0.3428051453861108, "grad_norm": 1244.2081298828125, "learning_rate": 8.411667592445898e-06, "loss": 142.179, "step": 41440 }, { "epoch": 0.34288786863548, "grad_norm": 751.5814819335938, "learning_rate": 8.410636420554337e-06, "loss": 95.5533, "step": 41450 }, { "epoch": 0.34297059188484924, "grad_norm": 786.3610229492188, "learning_rate": 8.409604977294093e-06, "loss": 103.3417, "step": 41460 }, { "epoch": 0.3430533151342185, "grad_norm": 931.486572265625, "learning_rate": 8.408573262747225e-06, "loss": 141.6174, "step": 41470 }, { "epoch": 0.3431360383835877, "grad_norm": 838.9139404296875, "learning_rate": 8.407541276995828e-06, "loss": 105.8545, "step": 41480 }, { "epoch": 0.34321876163295695, "grad_norm": 654.8321533203125, "learning_rate": 8.40650902012201e-06, "loss": 107.5087, "step": 41490 }, { "epoch": 0.3433014848823262, "grad_norm": 766.4340209960938, "learning_rate": 8.405476492207902e-06, "loss": 113.7733, "step": 41500 }, { "epoch": 0.3433842081316954, "grad_norm": 1260.985595703125, "learning_rate": 8.404443693335658e-06, "loss": 108.2389, "step": 41510 }, { "epoch": 0.34346693138106466, "grad_norm": 777.0344848632812, "learning_rate": 8.403410623587454e-06, "loss": 112.1793, "step": 41520 }, { "epoch": 0.3435496546304339, "grad_norm": 836.8817749023438, "learning_rate": 8.402377283045487e-06, "loss": 121.241, "step": 41530 }, { "epoch": 0.3436323778798031, "grad_norm": 675.0514526367188, "learning_rate": 8.401343671791974e-06, "loss": 121.9953, "step": 41540 }, { "epoch": 0.34371510112917236, "grad_norm": 1098.513916015625, "learning_rate": 8.400309789909155e-06, "loss": 129.7842, "step": 41550 }, { "epoch": 0.3437978243785416, "grad_norm": 728.864013671875, "learning_rate": 8.399275637479291e-06, "loss": 99.4059, "step": 41560 }, { "epoch": 0.3438805476279108, "grad_norm": 744.5280151367188, "learning_rate": 8.398241214584666e-06, "loss": 115.1609, "step": 41570 }, { "epoch": 0.34396327087728007, "grad_norm": 938.07080078125, "learning_rate": 8.397206521307584e-06, "loss": 102.9427, "step": 41580 }, { "epoch": 0.34404599412664927, "grad_norm": 1658.468994140625, "learning_rate": 8.396171557730369e-06, "loss": 115.574, "step": 41590 }, { "epoch": 0.3441287173760185, "grad_norm": 634.4130859375, "learning_rate": 8.39513632393537e-06, "loss": 93.7718, "step": 41600 }, { "epoch": 0.3442114406253878, "grad_norm": 579.6549072265625, "learning_rate": 8.394100820004954e-06, "loss": 92.9008, "step": 41610 }, { "epoch": 0.344294163874757, "grad_norm": 450.8374938964844, "learning_rate": 8.393065046021513e-06, "loss": 90.1996, "step": 41620 }, { "epoch": 0.34437688712412623, "grad_norm": 572.2847290039062, "learning_rate": 8.39202900206746e-06, "loss": 147.2724, "step": 41630 }, { "epoch": 0.3444596103734955, "grad_norm": 2148.787353515625, "learning_rate": 8.390992688225226e-06, "loss": 132.4656, "step": 41640 }, { "epoch": 0.3445423336228647, "grad_norm": 707.6249389648438, "learning_rate": 8.389956104577265e-06, "loss": 90.7987, "step": 41650 }, { "epoch": 0.34462505687223394, "grad_norm": 1235.2393798828125, "learning_rate": 8.388919251206054e-06, "loss": 120.5997, "step": 41660 }, { "epoch": 0.3447077801216032, "grad_norm": 802.1920166015625, "learning_rate": 8.387882128194094e-06, "loss": 127.8519, "step": 41670 }, { "epoch": 0.3447905033709724, "grad_norm": 1075.386962890625, "learning_rate": 8.3868447356239e-06, "loss": 95.1565, "step": 41680 }, { "epoch": 0.34487322662034164, "grad_norm": 1524.40283203125, "learning_rate": 8.385807073578014e-06, "loss": 125.0543, "step": 41690 }, { "epoch": 0.3449559498697109, "grad_norm": 887.3607177734375, "learning_rate": 8.384769142138998e-06, "loss": 86.6021, "step": 41700 }, { "epoch": 0.3450386731190801, "grad_norm": 932.5927124023438, "learning_rate": 8.383730941389434e-06, "loss": 96.8807, "step": 41710 }, { "epoch": 0.34512139636844935, "grad_norm": 956.2900390625, "learning_rate": 8.382692471411931e-06, "loss": 97.0625, "step": 41720 }, { "epoch": 0.3452041196178186, "grad_norm": 646.645263671875, "learning_rate": 8.38165373228911e-06, "loss": 95.7504, "step": 41730 }, { "epoch": 0.3452868428671878, "grad_norm": 1114.9278564453125, "learning_rate": 8.380614724103622e-06, "loss": 120.9078, "step": 41740 }, { "epoch": 0.34536956611655706, "grad_norm": 693.3104248046875, "learning_rate": 8.379575446938136e-06, "loss": 106.318, "step": 41750 }, { "epoch": 0.3454522893659263, "grad_norm": 8694.8857421875, "learning_rate": 8.37853590087534e-06, "loss": 133.577, "step": 41760 }, { "epoch": 0.3455350126152955, "grad_norm": 645.8015747070312, "learning_rate": 8.377496085997949e-06, "loss": 124.9107, "step": 41770 }, { "epoch": 0.34561773586466477, "grad_norm": 2443.57421875, "learning_rate": 8.376456002388695e-06, "loss": 115.6421, "step": 41780 }, { "epoch": 0.345700459114034, "grad_norm": 1185.3404541015625, "learning_rate": 8.375415650130332e-06, "loss": 103.3099, "step": 41790 }, { "epoch": 0.3457831823634032, "grad_norm": 1205.232666015625, "learning_rate": 8.37437502930564e-06, "loss": 100.6235, "step": 41800 }, { "epoch": 0.3458659056127725, "grad_norm": 1242.7322998046875, "learning_rate": 8.373334139997409e-06, "loss": 131.52, "step": 41810 }, { "epoch": 0.3459486288621417, "grad_norm": 747.3458862304688, "learning_rate": 8.372292982288463e-06, "loss": 118.5125, "step": 41820 }, { "epoch": 0.3460313521115109, "grad_norm": 1096.74462890625, "learning_rate": 8.371251556261642e-06, "loss": 136.9112, "step": 41830 }, { "epoch": 0.3461140753608802, "grad_norm": 801.9876708984375, "learning_rate": 8.370209861999807e-06, "loss": 106.0218, "step": 41840 }, { "epoch": 0.34619679861024943, "grad_norm": 909.0194091796875, "learning_rate": 8.36916789958584e-06, "loss": 103.4507, "step": 41850 }, { "epoch": 0.34627952185961863, "grad_norm": 1055.92041015625, "learning_rate": 8.368125669102645e-06, "loss": 94.5659, "step": 41860 }, { "epoch": 0.3463622451089879, "grad_norm": 867.0262451171875, "learning_rate": 8.36708317063315e-06, "loss": 135.4051, "step": 41870 }, { "epoch": 0.34644496835835714, "grad_norm": 871.2109985351562, "learning_rate": 8.366040404260298e-06, "loss": 99.1085, "step": 41880 }, { "epoch": 0.34652769160772634, "grad_norm": 830.19384765625, "learning_rate": 8.36499737006706e-06, "loss": 76.3817, "step": 41890 }, { "epoch": 0.3466104148570956, "grad_norm": 951.84326171875, "learning_rate": 8.363954068136424e-06, "loss": 119.5681, "step": 41900 }, { "epoch": 0.34669313810646485, "grad_norm": 731.0087890625, "learning_rate": 8.362910498551402e-06, "loss": 114.4868, "step": 41910 }, { "epoch": 0.34677586135583405, "grad_norm": 635.1656494140625, "learning_rate": 8.361866661395024e-06, "loss": 116.8612, "step": 41920 }, { "epoch": 0.3468585846052033, "grad_norm": 1068.5445556640625, "learning_rate": 8.360822556750345e-06, "loss": 91.3164, "step": 41930 }, { "epoch": 0.3469413078545725, "grad_norm": 949.19873046875, "learning_rate": 8.35977818470044e-06, "loss": 104.9618, "step": 41940 }, { "epoch": 0.34702403110394175, "grad_norm": 429.62677001953125, "learning_rate": 8.358733545328404e-06, "loss": 93.7747, "step": 41950 }, { "epoch": 0.347106754353311, "grad_norm": 893.44189453125, "learning_rate": 8.357688638717354e-06, "loss": 106.5521, "step": 41960 }, { "epoch": 0.3471894776026802, "grad_norm": 585.1188354492188, "learning_rate": 8.356643464950428e-06, "loss": 80.4151, "step": 41970 }, { "epoch": 0.34727220085204946, "grad_norm": 1215.0146484375, "learning_rate": 8.355598024110789e-06, "loss": 181.5081, "step": 41980 }, { "epoch": 0.3473549241014187, "grad_norm": 1458.9388427734375, "learning_rate": 8.354552316281613e-06, "loss": 141.7899, "step": 41990 }, { "epoch": 0.3474376473507879, "grad_norm": 915.29443359375, "learning_rate": 8.353506341546106e-06, "loss": 108.375, "step": 42000 }, { "epoch": 0.34752037060015717, "grad_norm": 1018.5140991210938, "learning_rate": 8.352460099987488e-06, "loss": 118.4601, "step": 42010 }, { "epoch": 0.3476030938495264, "grad_norm": 1653.6846923828125, "learning_rate": 8.351413591689007e-06, "loss": 127.7061, "step": 42020 }, { "epoch": 0.3476858170988956, "grad_norm": 1303.524169921875, "learning_rate": 8.350366816733927e-06, "loss": 109.428, "step": 42030 }, { "epoch": 0.3477685403482649, "grad_norm": 1039.2083740234375, "learning_rate": 8.349319775205536e-06, "loss": 120.8401, "step": 42040 }, { "epoch": 0.34785126359763413, "grad_norm": 1214.731689453125, "learning_rate": 8.34827246718714e-06, "loss": 157.5093, "step": 42050 }, { "epoch": 0.34793398684700333, "grad_norm": 832.408203125, "learning_rate": 8.347224892762072e-06, "loss": 106.25, "step": 42060 }, { "epoch": 0.3480167100963726, "grad_norm": 1899.1883544921875, "learning_rate": 8.346177052013681e-06, "loss": 128.2392, "step": 42070 }, { "epoch": 0.34809943334574184, "grad_norm": 475.4049072265625, "learning_rate": 8.345128945025338e-06, "loss": 128.0041, "step": 42080 }, { "epoch": 0.34818215659511104, "grad_norm": 483.9809875488281, "learning_rate": 8.344080571880438e-06, "loss": 92.6426, "step": 42090 }, { "epoch": 0.3482648798444803, "grad_norm": 1139.35302734375, "learning_rate": 8.343031932662394e-06, "loss": 89.6336, "step": 42100 }, { "epoch": 0.34834760309384954, "grad_norm": 998.1423950195312, "learning_rate": 8.341983027454641e-06, "loss": 148.1835, "step": 42110 }, { "epoch": 0.34843032634321874, "grad_norm": 1257.747802734375, "learning_rate": 8.340933856340637e-06, "loss": 116.7931, "step": 42120 }, { "epoch": 0.348513049592588, "grad_norm": 1050.6527099609375, "learning_rate": 8.339884419403857e-06, "loss": 124.4426, "step": 42130 }, { "epoch": 0.34859577284195725, "grad_norm": 935.9532470703125, "learning_rate": 8.338834716727801e-06, "loss": 100.9498, "step": 42140 }, { "epoch": 0.34867849609132645, "grad_norm": 1319.49365234375, "learning_rate": 8.337784748395992e-06, "loss": 89.4219, "step": 42150 }, { "epoch": 0.3487612193406957, "grad_norm": 583.6215209960938, "learning_rate": 8.336734514491968e-06, "loss": 101.7242, "step": 42160 }, { "epoch": 0.34884394259006496, "grad_norm": 1093.5703125, "learning_rate": 8.335684015099294e-06, "loss": 100.3031, "step": 42170 }, { "epoch": 0.34892666583943416, "grad_norm": 7167.025390625, "learning_rate": 8.33463325030155e-06, "loss": 142.965, "step": 42180 }, { "epoch": 0.3490093890888034, "grad_norm": 1053.3721923828125, "learning_rate": 8.333582220182344e-06, "loss": 118.1564, "step": 42190 }, { "epoch": 0.34909211233817267, "grad_norm": 1960.47705078125, "learning_rate": 8.332530924825297e-06, "loss": 119.301, "step": 42200 }, { "epoch": 0.34917483558754187, "grad_norm": 973.1529541015625, "learning_rate": 8.33147936431406e-06, "loss": 131.6417, "step": 42210 }, { "epoch": 0.3492575588369111, "grad_norm": 925.6248168945312, "learning_rate": 8.3304275387323e-06, "loss": 116.9732, "step": 42220 }, { "epoch": 0.3493402820862804, "grad_norm": 0.0, "learning_rate": 8.329375448163703e-06, "loss": 87.3547, "step": 42230 }, { "epoch": 0.3494230053356496, "grad_norm": 1005.3858032226562, "learning_rate": 8.328323092691985e-06, "loss": 104.9806, "step": 42240 }, { "epoch": 0.3495057285850188, "grad_norm": 790.18603515625, "learning_rate": 8.32727047240087e-06, "loss": 141.6189, "step": 42250 }, { "epoch": 0.3495884518343881, "grad_norm": 885.5327758789062, "learning_rate": 8.326217587374115e-06, "loss": 95.2874, "step": 42260 }, { "epoch": 0.3496711750837573, "grad_norm": 1333.3345947265625, "learning_rate": 8.325164437695493e-06, "loss": 110.1591, "step": 42270 }, { "epoch": 0.34975389833312653, "grad_norm": 444.2149963378906, "learning_rate": 8.324111023448795e-06, "loss": 89.4089, "step": 42280 }, { "epoch": 0.3498366215824958, "grad_norm": 712.4427490234375, "learning_rate": 8.32305734471784e-06, "loss": 104.8016, "step": 42290 }, { "epoch": 0.349919344831865, "grad_norm": 825.5255737304688, "learning_rate": 8.322003401586463e-06, "loss": 109.8285, "step": 42300 }, { "epoch": 0.35000206808123424, "grad_norm": 674.3945922851562, "learning_rate": 8.32094919413852e-06, "loss": 118.0758, "step": 42310 }, { "epoch": 0.35008479133060344, "grad_norm": 1092.6353759765625, "learning_rate": 8.319894722457892e-06, "loss": 100.6579, "step": 42320 }, { "epoch": 0.3501675145799727, "grad_norm": 791.6581420898438, "learning_rate": 8.318839986628477e-06, "loss": 96.1517, "step": 42330 }, { "epoch": 0.35025023782934195, "grad_norm": 483.9101867675781, "learning_rate": 8.317784986734194e-06, "loss": 86.3806, "step": 42340 }, { "epoch": 0.35033296107871115, "grad_norm": 982.6383056640625, "learning_rate": 8.316729722858987e-06, "loss": 142.3889, "step": 42350 }, { "epoch": 0.3504156843280804, "grad_norm": 1031.0849609375, "learning_rate": 8.31567419508682e-06, "loss": 89.9149, "step": 42360 }, { "epoch": 0.35049840757744966, "grad_norm": 1122.879150390625, "learning_rate": 8.31461840350167e-06, "loss": 103.2544, "step": 42370 }, { "epoch": 0.35058113082681885, "grad_norm": 1027.361328125, "learning_rate": 8.313562348187549e-06, "loss": 90.0591, "step": 42380 }, { "epoch": 0.3506638540761881, "grad_norm": 789.521240234375, "learning_rate": 8.312506029228478e-06, "loss": 90.2937, "step": 42390 }, { "epoch": 0.35074657732555736, "grad_norm": 616.3732299804688, "learning_rate": 8.311449446708506e-06, "loss": 129.6206, "step": 42400 }, { "epoch": 0.35082930057492656, "grad_norm": 1325.065185546875, "learning_rate": 8.310392600711698e-06, "loss": 114.9959, "step": 42410 }, { "epoch": 0.3509120238242958, "grad_norm": 976.2049560546875, "learning_rate": 8.309335491322143e-06, "loss": 95.8001, "step": 42420 }, { "epoch": 0.35099474707366507, "grad_norm": 1060.470947265625, "learning_rate": 8.30827811862395e-06, "loss": 101.8484, "step": 42430 }, { "epoch": 0.35107747032303427, "grad_norm": 813.407470703125, "learning_rate": 8.307220482701251e-06, "loss": 119.0382, "step": 42440 }, { "epoch": 0.3511601935724035, "grad_norm": 769.6190795898438, "learning_rate": 8.306162583638197e-06, "loss": 116.1655, "step": 42450 }, { "epoch": 0.3512429168217728, "grad_norm": 1182.1982421875, "learning_rate": 8.305104421518959e-06, "loss": 116.6159, "step": 42460 }, { "epoch": 0.351325640071142, "grad_norm": 1151.38671875, "learning_rate": 8.30404599642773e-06, "loss": 97.3802, "step": 42470 }, { "epoch": 0.35140836332051123, "grad_norm": 0.0, "learning_rate": 8.302987308448724e-06, "loss": 81.0704, "step": 42480 }, { "epoch": 0.3514910865698805, "grad_norm": 728.1912231445312, "learning_rate": 8.301928357666178e-06, "loss": 92.2258, "step": 42490 }, { "epoch": 0.3515738098192497, "grad_norm": 793.99267578125, "learning_rate": 8.300869144164346e-06, "loss": 110.2738, "step": 42500 }, { "epoch": 0.35165653306861894, "grad_norm": 993.7640991210938, "learning_rate": 8.299809668027505e-06, "loss": 131.7156, "step": 42510 }, { "epoch": 0.3517392563179882, "grad_norm": 606.397705078125, "learning_rate": 8.298749929339953e-06, "loss": 104.0031, "step": 42520 }, { "epoch": 0.3518219795673574, "grad_norm": 827.4598388671875, "learning_rate": 8.297689928186009e-06, "loss": 110.1917, "step": 42530 }, { "epoch": 0.35190470281672664, "grad_norm": 2409.9150390625, "learning_rate": 8.29662966465001e-06, "loss": 135.4566, "step": 42540 }, { "epoch": 0.3519874260660959, "grad_norm": 613.3368530273438, "learning_rate": 8.295569138816319e-06, "loss": 80.187, "step": 42550 }, { "epoch": 0.3520701493154651, "grad_norm": 843.9635009765625, "learning_rate": 8.294508350769315e-06, "loss": 109.0294, "step": 42560 }, { "epoch": 0.35215287256483435, "grad_norm": 622.92578125, "learning_rate": 8.293447300593402e-06, "loss": 127.0855, "step": 42570 }, { "epoch": 0.3522355958142036, "grad_norm": 657.6416625976562, "learning_rate": 8.292385988373005e-06, "loss": 108.607, "step": 42580 }, { "epoch": 0.3523183190635728, "grad_norm": 1057.9915771484375, "learning_rate": 8.29132441419256e-06, "loss": 117.3586, "step": 42590 }, { "epoch": 0.35240104231294206, "grad_norm": 704.32421875, "learning_rate": 8.290262578136541e-06, "loss": 92.2817, "step": 42600 }, { "epoch": 0.3524837655623113, "grad_norm": 391.53265380859375, "learning_rate": 8.289200480289426e-06, "loss": 91.4089, "step": 42610 }, { "epoch": 0.3525664888116805, "grad_norm": 1248.0340576171875, "learning_rate": 8.288138120735726e-06, "loss": 94.6713, "step": 42620 }, { "epoch": 0.35264921206104977, "grad_norm": 1576.25, "learning_rate": 8.287075499559965e-06, "loss": 120.6687, "step": 42630 }, { "epoch": 0.352731935310419, "grad_norm": 850.7510375976562, "learning_rate": 8.286012616846693e-06, "loss": 68.9104, "step": 42640 }, { "epoch": 0.3528146585597882, "grad_norm": 1018.4036254882812, "learning_rate": 8.284949472680477e-06, "loss": 96.3005, "step": 42650 }, { "epoch": 0.3528973818091575, "grad_norm": 769.276611328125, "learning_rate": 8.283886067145908e-06, "loss": 101.5941, "step": 42660 }, { "epoch": 0.35298010505852667, "grad_norm": 1009.304931640625, "learning_rate": 8.282822400327595e-06, "loss": 100.8695, "step": 42670 }, { "epoch": 0.3530628283078959, "grad_norm": 965.6119384765625, "learning_rate": 8.28175847231017e-06, "loss": 112.0248, "step": 42680 }, { "epoch": 0.3531455515572652, "grad_norm": 862.6695556640625, "learning_rate": 8.280694283178285e-06, "loss": 97.2944, "step": 42690 }, { "epoch": 0.3532282748066344, "grad_norm": 698.0172119140625, "learning_rate": 8.27962983301661e-06, "loss": 66.0908, "step": 42700 }, { "epoch": 0.35331099805600363, "grad_norm": 680.3596801757812, "learning_rate": 8.278565121909845e-06, "loss": 56.0012, "step": 42710 }, { "epoch": 0.3533937213053729, "grad_norm": 979.0956420898438, "learning_rate": 8.277500149942697e-06, "loss": 111.6114, "step": 42720 }, { "epoch": 0.3534764445547421, "grad_norm": 839.169921875, "learning_rate": 8.276434917199904e-06, "loss": 126.5481, "step": 42730 }, { "epoch": 0.35355916780411134, "grad_norm": 1183.0323486328125, "learning_rate": 8.275369423766222e-06, "loss": 97.7488, "step": 42740 }, { "epoch": 0.3536418910534806, "grad_norm": 1349.1572265625, "learning_rate": 8.274303669726427e-06, "loss": 110.0713, "step": 42750 }, { "epoch": 0.3537246143028498, "grad_norm": 916.60107421875, "learning_rate": 8.273237655165314e-06, "loss": 106.8208, "step": 42760 }, { "epoch": 0.35380733755221905, "grad_norm": 800.146484375, "learning_rate": 8.272171380167705e-06, "loss": 83.875, "step": 42770 }, { "epoch": 0.3538900608015883, "grad_norm": 1539.4642333984375, "learning_rate": 8.271104844818436e-06, "loss": 130.0894, "step": 42780 }, { "epoch": 0.3539727840509575, "grad_norm": 1235.24560546875, "learning_rate": 8.270038049202366e-06, "loss": 131.9467, "step": 42790 }, { "epoch": 0.35405550730032675, "grad_norm": 962.0623168945312, "learning_rate": 8.268970993404377e-06, "loss": 100.8265, "step": 42800 }, { "epoch": 0.354138230549696, "grad_norm": 836.7759399414062, "learning_rate": 8.267903677509368e-06, "loss": 119.7814, "step": 42810 }, { "epoch": 0.3542209537990652, "grad_norm": 1205.632080078125, "learning_rate": 8.266836101602263e-06, "loss": 107.5889, "step": 42820 }, { "epoch": 0.35430367704843446, "grad_norm": 1180.9609375, "learning_rate": 8.265768265767999e-06, "loss": 146.9007, "step": 42830 }, { "epoch": 0.3543864002978037, "grad_norm": 784.8786010742188, "learning_rate": 8.264700170091543e-06, "loss": 93.1217, "step": 42840 }, { "epoch": 0.3544691235471729, "grad_norm": 583.6641845703125, "learning_rate": 8.263631814657879e-06, "loss": 127.952, "step": 42850 }, { "epoch": 0.35455184679654217, "grad_norm": 1015.6278076171875, "learning_rate": 8.262563199552007e-06, "loss": 109.5995, "step": 42860 }, { "epoch": 0.3546345700459114, "grad_norm": 1008.2879638671875, "learning_rate": 8.261494324858956e-06, "loss": 83.4442, "step": 42870 }, { "epoch": 0.3547172932952806, "grad_norm": 934.4283447265625, "learning_rate": 8.26042519066377e-06, "loss": 81.4526, "step": 42880 }, { "epoch": 0.3548000165446499, "grad_norm": 602.0402221679688, "learning_rate": 8.259355797051515e-06, "loss": 87.8508, "step": 42890 }, { "epoch": 0.35488273979401913, "grad_norm": 952.8324584960938, "learning_rate": 8.258286144107277e-06, "loss": 101.1536, "step": 42900 }, { "epoch": 0.35496546304338833, "grad_norm": 924.0335693359375, "learning_rate": 8.257216231916162e-06, "loss": 113.0049, "step": 42910 }, { "epoch": 0.3550481862927576, "grad_norm": 684.9584350585938, "learning_rate": 8.256146060563304e-06, "loss": 117.6641, "step": 42920 }, { "epoch": 0.35513090954212684, "grad_norm": 1416.010009765625, "learning_rate": 8.255075630133847e-06, "loss": 93.2686, "step": 42930 }, { "epoch": 0.35521363279149604, "grad_norm": 1231.5831298828125, "learning_rate": 8.254004940712958e-06, "loss": 111.2918, "step": 42940 }, { "epoch": 0.3552963560408653, "grad_norm": 1368.33935546875, "learning_rate": 8.252933992385833e-06, "loss": 101.9154, "step": 42950 }, { "epoch": 0.35537907929023455, "grad_norm": 754.49609375, "learning_rate": 8.251862785237676e-06, "loss": 88.8121, "step": 42960 }, { "epoch": 0.35546180253960374, "grad_norm": 1223.3607177734375, "learning_rate": 8.250791319353723e-06, "loss": 111.0358, "step": 42970 }, { "epoch": 0.355544525788973, "grad_norm": 1084.2374267578125, "learning_rate": 8.249719594819225e-06, "loss": 107.8028, "step": 42980 }, { "epoch": 0.35562724903834225, "grad_norm": 1002.8360595703125, "learning_rate": 8.248647611719452e-06, "loss": 87.2639, "step": 42990 }, { "epoch": 0.35570997228771145, "grad_norm": 882.518310546875, "learning_rate": 8.247575370139695e-06, "loss": 120.7826, "step": 43000 }, { "epoch": 0.3557926955370807, "grad_norm": 710.4674682617188, "learning_rate": 8.246502870165273e-06, "loss": 130.1348, "step": 43010 }, { "epoch": 0.3558754187864499, "grad_norm": 705.7244262695312, "learning_rate": 8.245430111881519e-06, "loss": 82.2953, "step": 43020 }, { "epoch": 0.35595814203581916, "grad_norm": 1012.916748046875, "learning_rate": 8.244357095373783e-06, "loss": 103.8642, "step": 43030 }, { "epoch": 0.3560408652851884, "grad_norm": 823.5924682617188, "learning_rate": 8.243283820727441e-06, "loss": 120.169, "step": 43040 }, { "epoch": 0.3561235885345576, "grad_norm": 506.7729187011719, "learning_rate": 8.242210288027893e-06, "loss": 106.6605, "step": 43050 }, { "epoch": 0.35620631178392687, "grad_norm": 573.303955078125, "learning_rate": 8.241136497360552e-06, "loss": 96.0248, "step": 43060 }, { "epoch": 0.3562890350332961, "grad_norm": 1349.73974609375, "learning_rate": 8.240062448810853e-06, "loss": 150.305, "step": 43070 }, { "epoch": 0.3563717582826653, "grad_norm": 898.83349609375, "learning_rate": 8.238988142464254e-06, "loss": 99.9782, "step": 43080 }, { "epoch": 0.3564544815320346, "grad_norm": 1892.2440185546875, "learning_rate": 8.237913578406236e-06, "loss": 127.4689, "step": 43090 }, { "epoch": 0.3565372047814038, "grad_norm": 1101.9765625, "learning_rate": 8.236838756722294e-06, "loss": 99.9242, "step": 43100 }, { "epoch": 0.356619928030773, "grad_norm": 785.5177001953125, "learning_rate": 8.235763677497945e-06, "loss": 102.3918, "step": 43110 }, { "epoch": 0.3567026512801423, "grad_norm": 1374.7742919921875, "learning_rate": 8.234688340818732e-06, "loss": 113.4264, "step": 43120 }, { "epoch": 0.35678537452951153, "grad_norm": 606.4083862304688, "learning_rate": 8.233612746770214e-06, "loss": 93.4949, "step": 43130 }, { "epoch": 0.35686809777888073, "grad_norm": 922.9923095703125, "learning_rate": 8.232536895437968e-06, "loss": 85.8221, "step": 43140 }, { "epoch": 0.35695082102825, "grad_norm": 790.5139770507812, "learning_rate": 8.231460786907597e-06, "loss": 117.3538, "step": 43150 }, { "epoch": 0.35703354427761924, "grad_norm": 417.8067626953125, "learning_rate": 8.230384421264722e-06, "loss": 87.6937, "step": 43160 }, { "epoch": 0.35711626752698844, "grad_norm": 1458.1962890625, "learning_rate": 8.229307798594985e-06, "loss": 133.923, "step": 43170 }, { "epoch": 0.3571989907763577, "grad_norm": 987.9276123046875, "learning_rate": 8.228230918984046e-06, "loss": 111.6961, "step": 43180 }, { "epoch": 0.35728171402572695, "grad_norm": 1103.388427734375, "learning_rate": 8.22715378251759e-06, "loss": 93.5427, "step": 43190 }, { "epoch": 0.35736443727509615, "grad_norm": 697.0567626953125, "learning_rate": 8.226076389281316e-06, "loss": 117.8876, "step": 43200 }, { "epoch": 0.3574471605244654, "grad_norm": 1062.5294189453125, "learning_rate": 8.22499873936095e-06, "loss": 85.2779, "step": 43210 }, { "epoch": 0.35752988377383466, "grad_norm": 997.5350341796875, "learning_rate": 8.223920832842236e-06, "loss": 127.3359, "step": 43220 }, { "epoch": 0.35761260702320385, "grad_norm": 839.9952392578125, "learning_rate": 8.222842669810936e-06, "loss": 112.6368, "step": 43230 }, { "epoch": 0.3576953302725731, "grad_norm": 1012.621337890625, "learning_rate": 8.221764250352835e-06, "loss": 108.84, "step": 43240 }, { "epoch": 0.35777805352194236, "grad_norm": 918.2972412109375, "learning_rate": 8.220685574553739e-06, "loss": 85.8282, "step": 43250 }, { "epoch": 0.35786077677131156, "grad_norm": 776.9376220703125, "learning_rate": 8.219606642499474e-06, "loss": 96.5936, "step": 43260 }, { "epoch": 0.3579435000206808, "grad_norm": 868.3795166015625, "learning_rate": 8.218527454275884e-06, "loss": 91.7565, "step": 43270 }, { "epoch": 0.35802622327005007, "grad_norm": 958.1405029296875, "learning_rate": 8.217448009968834e-06, "loss": 110.3028, "step": 43280 }, { "epoch": 0.35810894651941927, "grad_norm": 1635.0120849609375, "learning_rate": 8.216368309664213e-06, "loss": 115.6983, "step": 43290 }, { "epoch": 0.3581916697687885, "grad_norm": 495.8813781738281, "learning_rate": 8.215288353447927e-06, "loss": 125.6738, "step": 43300 }, { "epoch": 0.3582743930181578, "grad_norm": 1934.987548828125, "learning_rate": 8.214208141405903e-06, "loss": 96.0109, "step": 43310 }, { "epoch": 0.358357116267527, "grad_norm": 1259.3150634765625, "learning_rate": 8.213127673624088e-06, "loss": 98.7009, "step": 43320 }, { "epoch": 0.35843983951689623, "grad_norm": 985.3812255859375, "learning_rate": 8.212046950188451e-06, "loss": 119.01, "step": 43330 }, { "epoch": 0.3585225627662655, "grad_norm": 657.3631591796875, "learning_rate": 8.21096597118498e-06, "loss": 102.065, "step": 43340 }, { "epoch": 0.3586052860156347, "grad_norm": 808.63232421875, "learning_rate": 8.209884736699681e-06, "loss": 86.2247, "step": 43350 }, { "epoch": 0.35868800926500394, "grad_norm": 963.5828857421875, "learning_rate": 8.208803246818586e-06, "loss": 99.5541, "step": 43360 }, { "epoch": 0.3587707325143732, "grad_norm": 356.1247253417969, "learning_rate": 8.207721501627743e-06, "loss": 132.9291, "step": 43370 }, { "epoch": 0.3588534557637424, "grad_norm": 1359.7357177734375, "learning_rate": 8.20663950121322e-06, "loss": 106.6871, "step": 43380 }, { "epoch": 0.35893617901311164, "grad_norm": 741.9186401367188, "learning_rate": 8.20555724566111e-06, "loss": 110.5265, "step": 43390 }, { "epoch": 0.35901890226248084, "grad_norm": 804.3803100585938, "learning_rate": 8.204474735057522e-06, "loss": 89.7678, "step": 43400 }, { "epoch": 0.3591016255118501, "grad_norm": 986.8497924804688, "learning_rate": 8.203391969488586e-06, "loss": 76.5805, "step": 43410 }, { "epoch": 0.35918434876121935, "grad_norm": 981.0845947265625, "learning_rate": 8.20230894904045e-06, "loss": 108.0519, "step": 43420 }, { "epoch": 0.35926707201058855, "grad_norm": 967.68310546875, "learning_rate": 8.20122567379929e-06, "loss": 144.2405, "step": 43430 }, { "epoch": 0.3593497952599578, "grad_norm": 1253.8336181640625, "learning_rate": 8.200142143851295e-06, "loss": 85.0357, "step": 43440 }, { "epoch": 0.35943251850932706, "grad_norm": 660.7843627929688, "learning_rate": 8.199058359282675e-06, "loss": 110.5242, "step": 43450 }, { "epoch": 0.35951524175869626, "grad_norm": 876.0247802734375, "learning_rate": 8.197974320179664e-06, "loss": 143.5727, "step": 43460 }, { "epoch": 0.3595979650080655, "grad_norm": 1184.567626953125, "learning_rate": 8.19689002662851e-06, "loss": 87.2803, "step": 43470 }, { "epoch": 0.35968068825743477, "grad_norm": 899.4736938476562, "learning_rate": 8.195805478715492e-06, "loss": 94.5841, "step": 43480 }, { "epoch": 0.35976341150680397, "grad_norm": 1176.9891357421875, "learning_rate": 8.194720676526898e-06, "loss": 105.5688, "step": 43490 }, { "epoch": 0.3598461347561732, "grad_norm": 1430.493896484375, "learning_rate": 8.193635620149041e-06, "loss": 120.161, "step": 43500 }, { "epoch": 0.3599288580055425, "grad_norm": 1398.2462158203125, "learning_rate": 8.192550309668254e-06, "loss": 153.4543, "step": 43510 }, { "epoch": 0.3600115812549117, "grad_norm": 978.09033203125, "learning_rate": 8.191464745170892e-06, "loss": 97.1732, "step": 43520 }, { "epoch": 0.3600943045042809, "grad_norm": 1013.3282470703125, "learning_rate": 8.190378926743327e-06, "loss": 101.2923, "step": 43530 }, { "epoch": 0.3601770277536502, "grad_norm": 881.2088012695312, "learning_rate": 8.189292854471953e-06, "loss": 148.854, "step": 43540 }, { "epoch": 0.3602597510030194, "grad_norm": 870.5662841796875, "learning_rate": 8.188206528443182e-06, "loss": 92.8082, "step": 43550 }, { "epoch": 0.36034247425238863, "grad_norm": 776.38916015625, "learning_rate": 8.18711994874345e-06, "loss": 82.2745, "step": 43560 }, { "epoch": 0.3604251975017579, "grad_norm": 904.0106811523438, "learning_rate": 8.186033115459211e-06, "loss": 97.3916, "step": 43570 }, { "epoch": 0.3605079207511271, "grad_norm": 1215.83349609375, "learning_rate": 8.184946028676937e-06, "loss": 106.3127, "step": 43580 }, { "epoch": 0.36059064400049634, "grad_norm": 794.0036010742188, "learning_rate": 8.183858688483126e-06, "loss": 91.0681, "step": 43590 }, { "epoch": 0.3606733672498656, "grad_norm": 1268.9676513671875, "learning_rate": 8.182771094964292e-06, "loss": 123.4264, "step": 43600 }, { "epoch": 0.3607560904992348, "grad_norm": 777.9105224609375, "learning_rate": 8.181683248206968e-06, "loss": 111.6841, "step": 43610 }, { "epoch": 0.36083881374860405, "grad_norm": 854.5106201171875, "learning_rate": 8.180595148297709e-06, "loss": 113.4441, "step": 43620 }, { "epoch": 0.3609215369979733, "grad_norm": 850.8244018554688, "learning_rate": 8.179506795323092e-06, "loss": 135.2171, "step": 43630 }, { "epoch": 0.3610042602473425, "grad_norm": 1269.437255859375, "learning_rate": 8.17841818936971e-06, "loss": 155.0749, "step": 43640 }, { "epoch": 0.36108698349671176, "grad_norm": 1103.447509765625, "learning_rate": 8.177329330524182e-06, "loss": 85.1156, "step": 43650 }, { "epoch": 0.361169706746081, "grad_norm": 800.2313232421875, "learning_rate": 8.17624021887314e-06, "loss": 103.1669, "step": 43660 }, { "epoch": 0.3612524299954502, "grad_norm": 662.4274291992188, "learning_rate": 8.17515085450324e-06, "loss": 98.6518, "step": 43670 }, { "epoch": 0.36133515324481946, "grad_norm": 975.4820556640625, "learning_rate": 8.174061237501159e-06, "loss": 120.7466, "step": 43680 }, { "epoch": 0.3614178764941887, "grad_norm": 1076.44873046875, "learning_rate": 8.172971367953593e-06, "loss": 80.2128, "step": 43690 }, { "epoch": 0.3615005997435579, "grad_norm": 1007.4608764648438, "learning_rate": 8.171881245947257e-06, "loss": 62.0215, "step": 43700 }, { "epoch": 0.36158332299292717, "grad_norm": 1051.32177734375, "learning_rate": 8.170790871568887e-06, "loss": 157.7504, "step": 43710 }, { "epoch": 0.3616660462422964, "grad_norm": 1688.2108154296875, "learning_rate": 8.169700244905239e-06, "loss": 123.6984, "step": 43720 }, { "epoch": 0.3617487694916656, "grad_norm": 1116.3714599609375, "learning_rate": 8.168609366043089e-06, "loss": 92.2827, "step": 43730 }, { "epoch": 0.3618314927410349, "grad_norm": 845.093505859375, "learning_rate": 8.167518235069234e-06, "loss": 77.9922, "step": 43740 }, { "epoch": 0.3619142159904041, "grad_norm": 2042.611572265625, "learning_rate": 8.16642685207049e-06, "loss": 141.047, "step": 43750 }, { "epoch": 0.36199693923977333, "grad_norm": 602.3466186523438, "learning_rate": 8.165335217133695e-06, "loss": 122.7751, "step": 43760 }, { "epoch": 0.3620796624891426, "grad_norm": 998.31005859375, "learning_rate": 8.164243330345702e-06, "loss": 95.6849, "step": 43770 }, { "epoch": 0.3621623857385118, "grad_norm": 939.9814453125, "learning_rate": 8.16315119179339e-06, "loss": 88.2418, "step": 43780 }, { "epoch": 0.36224510898788104, "grad_norm": 2236.7392578125, "learning_rate": 8.162058801563652e-06, "loss": 116.7937, "step": 43790 }, { "epoch": 0.3623278322372503, "grad_norm": 973.8187255859375, "learning_rate": 8.160966159743411e-06, "loss": 94.5988, "step": 43800 }, { "epoch": 0.3624105554866195, "grad_norm": 640.3901977539062, "learning_rate": 8.159873266419598e-06, "loss": 103.685, "step": 43810 }, { "epoch": 0.36249327873598874, "grad_norm": 1432.554931640625, "learning_rate": 8.15878012167917e-06, "loss": 106.0658, "step": 43820 }, { "epoch": 0.362576001985358, "grad_norm": 859.1890258789062, "learning_rate": 8.157686725609105e-06, "loss": 87.8233, "step": 43830 }, { "epoch": 0.3626587252347272, "grad_norm": 1776.3751220703125, "learning_rate": 8.1565930782964e-06, "loss": 114.2181, "step": 43840 }, { "epoch": 0.36274144848409645, "grad_norm": 926.181884765625, "learning_rate": 8.155499179828068e-06, "loss": 114.7968, "step": 43850 }, { "epoch": 0.3628241717334657, "grad_norm": 959.3102416992188, "learning_rate": 8.15440503029115e-06, "loss": 105.2323, "step": 43860 }, { "epoch": 0.3629068949828349, "grad_norm": 1929.3040771484375, "learning_rate": 8.153310629772702e-06, "loss": 131.4064, "step": 43870 }, { "epoch": 0.36298961823220416, "grad_norm": 1120.273193359375, "learning_rate": 8.152215978359796e-06, "loss": 92.3281, "step": 43880 }, { "epoch": 0.3630723414815734, "grad_norm": 830.0736694335938, "learning_rate": 8.151121076139534e-06, "loss": 91.5073, "step": 43890 }, { "epoch": 0.3631550647309426, "grad_norm": 1354.7823486328125, "learning_rate": 8.150025923199027e-06, "loss": 201.7689, "step": 43900 }, { "epoch": 0.36323778798031187, "grad_norm": 1229.7574462890625, "learning_rate": 8.148930519625417e-06, "loss": 116.0604, "step": 43910 }, { "epoch": 0.3633205112296811, "grad_norm": 1157.707763671875, "learning_rate": 8.147834865505855e-06, "loss": 118.8252, "step": 43920 }, { "epoch": 0.3634032344790503, "grad_norm": 391.3431396484375, "learning_rate": 8.14673896092752e-06, "loss": 94.0042, "step": 43930 }, { "epoch": 0.3634859577284196, "grad_norm": 963.4109497070312, "learning_rate": 8.145642805977608e-06, "loss": 94.306, "step": 43940 }, { "epoch": 0.3635686809777888, "grad_norm": 680.21826171875, "learning_rate": 8.144546400743334e-06, "loss": 121.9921, "step": 43950 }, { "epoch": 0.363651404227158, "grad_norm": 763.9702758789062, "learning_rate": 8.143449745311934e-06, "loss": 105.5048, "step": 43960 }, { "epoch": 0.3637341274765273, "grad_norm": 706.5518188476562, "learning_rate": 8.142352839770663e-06, "loss": 112.0056, "step": 43970 }, { "epoch": 0.36381685072589653, "grad_norm": 1533.0765380859375, "learning_rate": 8.1412556842068e-06, "loss": 109.1279, "step": 43980 }, { "epoch": 0.36389957397526573, "grad_norm": 784.7587890625, "learning_rate": 8.140158278707637e-06, "loss": 121.243, "step": 43990 }, { "epoch": 0.363982297224635, "grad_norm": 886.728759765625, "learning_rate": 8.139060623360494e-06, "loss": 131.5882, "step": 44000 }, { "epoch": 0.36406502047400424, "grad_norm": 990.3309936523438, "learning_rate": 8.1379627182527e-06, "loss": 95.6152, "step": 44010 }, { "epoch": 0.36414774372337344, "grad_norm": 1236.8133544921875, "learning_rate": 8.136864563471617e-06, "loss": 104.638, "step": 44020 }, { "epoch": 0.3642304669727427, "grad_norm": 560.0664672851562, "learning_rate": 8.135766159104615e-06, "loss": 90.362, "step": 44030 }, { "epoch": 0.36431319022211195, "grad_norm": 796.653076171875, "learning_rate": 8.134667505239092e-06, "loss": 99.4829, "step": 44040 }, { "epoch": 0.36439591347148115, "grad_norm": 777.6304321289062, "learning_rate": 8.133568601962462e-06, "loss": 106.3731, "step": 44050 }, { "epoch": 0.3644786367208504, "grad_norm": 746.3777465820312, "learning_rate": 8.132469449362158e-06, "loss": 101.6638, "step": 44060 }, { "epoch": 0.36456135997021966, "grad_norm": 824.9530029296875, "learning_rate": 8.131370047525637e-06, "loss": 144.5076, "step": 44070 }, { "epoch": 0.36464408321958885, "grad_norm": 1749.48095703125, "learning_rate": 8.130270396540372e-06, "loss": 107.8925, "step": 44080 }, { "epoch": 0.3647268064689581, "grad_norm": 1590.762451171875, "learning_rate": 8.129170496493857e-06, "loss": 129.4328, "step": 44090 }, { "epoch": 0.36480952971832736, "grad_norm": 854.1180419921875, "learning_rate": 8.128070347473609e-06, "loss": 97.3397, "step": 44100 }, { "epoch": 0.36489225296769656, "grad_norm": 920.0262451171875, "learning_rate": 8.126969949567157e-06, "loss": 90.8626, "step": 44110 }, { "epoch": 0.3649749762170658, "grad_norm": 802.9644775390625, "learning_rate": 8.125869302862058e-06, "loss": 101.2598, "step": 44120 }, { "epoch": 0.365057699466435, "grad_norm": 721.7289428710938, "learning_rate": 8.124768407445883e-06, "loss": 91.293, "step": 44130 }, { "epoch": 0.36514042271580427, "grad_norm": 452.2812805175781, "learning_rate": 8.123667263406228e-06, "loss": 115.1237, "step": 44140 }, { "epoch": 0.3652231459651735, "grad_norm": 755.5316162109375, "learning_rate": 8.122565870830704e-06, "loss": 95.5803, "step": 44150 }, { "epoch": 0.3653058692145427, "grad_norm": 583.1863403320312, "learning_rate": 8.121464229806944e-06, "loss": 91.2347, "step": 44160 }, { "epoch": 0.365388592463912, "grad_norm": 894.2572631835938, "learning_rate": 8.120362340422601e-06, "loss": 101.122, "step": 44170 }, { "epoch": 0.36547131571328123, "grad_norm": 1000.3782958984375, "learning_rate": 8.119260202765347e-06, "loss": 104.094, "step": 44180 }, { "epoch": 0.36555403896265043, "grad_norm": 1205.3262939453125, "learning_rate": 8.118157816922874e-06, "loss": 107.1193, "step": 44190 }, { "epoch": 0.3656367622120197, "grad_norm": 1496.134033203125, "learning_rate": 8.117055182982895e-06, "loss": 84.7695, "step": 44200 }, { "epoch": 0.36571948546138894, "grad_norm": 1437.298095703125, "learning_rate": 8.115952301033141e-06, "loss": 117.0865, "step": 44210 }, { "epoch": 0.36580220871075814, "grad_norm": 1193.9093017578125, "learning_rate": 8.11484917116136e-06, "loss": 104.2912, "step": 44220 }, { "epoch": 0.3658849319601274, "grad_norm": 912.3463745117188, "learning_rate": 8.113745793455328e-06, "loss": 105.9879, "step": 44230 }, { "epoch": 0.36596765520949665, "grad_norm": 1175.63134765625, "learning_rate": 8.112642168002831e-06, "loss": 95.1146, "step": 44240 }, { "epoch": 0.36605037845886584, "grad_norm": 957.2208862304688, "learning_rate": 8.111538294891684e-06, "loss": 133.3301, "step": 44250 }, { "epoch": 0.3661331017082351, "grad_norm": 780.9307861328125, "learning_rate": 8.110434174209714e-06, "loss": 112.0869, "step": 44260 }, { "epoch": 0.36621582495760435, "grad_norm": 1046.54052734375, "learning_rate": 8.109329806044772e-06, "loss": 122.4268, "step": 44270 }, { "epoch": 0.36629854820697355, "grad_norm": 593.833984375, "learning_rate": 8.108225190484728e-06, "loss": 134.9322, "step": 44280 }, { "epoch": 0.3663812714563428, "grad_norm": 1084.296630859375, "learning_rate": 8.107120327617469e-06, "loss": 108.1544, "step": 44290 }, { "epoch": 0.36646399470571206, "grad_norm": 795.716552734375, "learning_rate": 8.106015217530906e-06, "loss": 104.3661, "step": 44300 }, { "epoch": 0.36654671795508126, "grad_norm": 969.0634765625, "learning_rate": 8.104909860312968e-06, "loss": 118.4515, "step": 44310 }, { "epoch": 0.3666294412044505, "grad_norm": 1363.8905029296875, "learning_rate": 8.1038042560516e-06, "loss": 109.1337, "step": 44320 }, { "epoch": 0.36671216445381977, "grad_norm": 715.2791748046875, "learning_rate": 8.102698404834773e-06, "loss": 79.2838, "step": 44330 }, { "epoch": 0.36679488770318897, "grad_norm": 1469.4854736328125, "learning_rate": 8.101592306750472e-06, "loss": 110.5569, "step": 44340 }, { "epoch": 0.3668776109525582, "grad_norm": 992.7304077148438, "learning_rate": 8.100485961886707e-06, "loss": 97.823, "step": 44350 }, { "epoch": 0.3669603342019275, "grad_norm": 1063.874755859375, "learning_rate": 8.099379370331502e-06, "loss": 112.1215, "step": 44360 }, { "epoch": 0.3670430574512967, "grad_norm": 726.4131469726562, "learning_rate": 8.098272532172906e-06, "loss": 135.1273, "step": 44370 }, { "epoch": 0.3671257807006659, "grad_norm": 1017.98193359375, "learning_rate": 8.097165447498985e-06, "loss": 102.8711, "step": 44380 }, { "epoch": 0.3672085039500352, "grad_norm": 1006.7951049804688, "learning_rate": 8.09605811639782e-06, "loss": 120.6296, "step": 44390 }, { "epoch": 0.3672912271994044, "grad_norm": 1553.7777099609375, "learning_rate": 8.094950538957523e-06, "loss": 116.9153, "step": 44400 }, { "epoch": 0.36737395044877363, "grad_norm": 578.3179931640625, "learning_rate": 8.093842715266214e-06, "loss": 86.2257, "step": 44410 }, { "epoch": 0.3674566736981429, "grad_norm": 1568.3966064453125, "learning_rate": 8.092734645412037e-06, "loss": 103.4828, "step": 44420 }, { "epoch": 0.3675393969475121, "grad_norm": 1186.5718994140625, "learning_rate": 8.09162632948316e-06, "loss": 117.7764, "step": 44430 }, { "epoch": 0.36762212019688134, "grad_norm": 1135.4630126953125, "learning_rate": 8.090517767567765e-06, "loss": 95.9603, "step": 44440 }, { "epoch": 0.3677048434462506, "grad_norm": 959.4806518554688, "learning_rate": 8.089408959754055e-06, "loss": 99.0822, "step": 44450 }, { "epoch": 0.3677875666956198, "grad_norm": 758.1735229492188, "learning_rate": 8.088299906130252e-06, "loss": 149.9401, "step": 44460 }, { "epoch": 0.36787028994498905, "grad_norm": 791.1016845703125, "learning_rate": 8.087190606784598e-06, "loss": 79.0925, "step": 44470 }, { "epoch": 0.36795301319435825, "grad_norm": 640.9752807617188, "learning_rate": 8.086081061805357e-06, "loss": 95.8233, "step": 44480 }, { "epoch": 0.3680357364437275, "grad_norm": 1189.6053466796875, "learning_rate": 8.084971271280808e-06, "loss": 122.035, "step": 44490 }, { "epoch": 0.36811845969309676, "grad_norm": 1168.3837890625, "learning_rate": 8.083861235299253e-06, "loss": 100.2019, "step": 44500 }, { "epoch": 0.36820118294246595, "grad_norm": 891.6553955078125, "learning_rate": 8.082750953949015e-06, "loss": 115.5503, "step": 44510 }, { "epoch": 0.3682839061918352, "grad_norm": 884.3746337890625, "learning_rate": 8.081640427318429e-06, "loss": 94.7234, "step": 44520 }, { "epoch": 0.36836662944120446, "grad_norm": 760.7252807617188, "learning_rate": 8.080529655495856e-06, "loss": 126.0468, "step": 44530 }, { "epoch": 0.36844935269057366, "grad_norm": 831.2738647460938, "learning_rate": 8.079418638569679e-06, "loss": 92.954, "step": 44540 }, { "epoch": 0.3685320759399429, "grad_norm": 460.5909118652344, "learning_rate": 8.078307376628292e-06, "loss": 83.0053, "step": 44550 }, { "epoch": 0.36861479918931217, "grad_norm": 795.8884887695312, "learning_rate": 8.077195869760114e-06, "loss": 86.7317, "step": 44560 }, { "epoch": 0.36869752243868137, "grad_norm": 683.8246459960938, "learning_rate": 8.076084118053584e-06, "loss": 90.0042, "step": 44570 }, { "epoch": 0.3687802456880506, "grad_norm": 1078.8155517578125, "learning_rate": 8.074972121597158e-06, "loss": 140.497, "step": 44580 }, { "epoch": 0.3688629689374199, "grad_norm": 447.01214599609375, "learning_rate": 8.073859880479314e-06, "loss": 76.7604, "step": 44590 }, { "epoch": 0.3689456921867891, "grad_norm": 948.76416015625, "learning_rate": 8.072747394788545e-06, "loss": 83.8023, "step": 44600 }, { "epoch": 0.36902841543615833, "grad_norm": 1048.3646240234375, "learning_rate": 8.071634664613367e-06, "loss": 102.7217, "step": 44610 }, { "epoch": 0.3691111386855276, "grad_norm": 859.9012451171875, "learning_rate": 8.070521690042317e-06, "loss": 103.1552, "step": 44620 }, { "epoch": 0.3691938619348968, "grad_norm": 1066.72314453125, "learning_rate": 8.069408471163947e-06, "loss": 121.4919, "step": 44630 }, { "epoch": 0.36927658518426604, "grad_norm": 969.5633544921875, "learning_rate": 8.068295008066832e-06, "loss": 128.1989, "step": 44640 }, { "epoch": 0.3693593084336353, "grad_norm": 1198.6636962890625, "learning_rate": 8.067181300839565e-06, "loss": 161.2369, "step": 44650 }, { "epoch": 0.3694420316830045, "grad_norm": 700.932861328125, "learning_rate": 8.066067349570757e-06, "loss": 86.4731, "step": 44660 }, { "epoch": 0.36952475493237374, "grad_norm": 1984.68017578125, "learning_rate": 8.064953154349042e-06, "loss": 85.2434, "step": 44670 }, { "epoch": 0.369607478181743, "grad_norm": 951.8640747070312, "learning_rate": 8.063838715263072e-06, "loss": 92.2627, "step": 44680 }, { "epoch": 0.3696902014311122, "grad_norm": 991.0081176757812, "learning_rate": 8.062724032401515e-06, "loss": 82.2411, "step": 44690 }, { "epoch": 0.36977292468048145, "grad_norm": 915.6588134765625, "learning_rate": 8.061609105853062e-06, "loss": 123.3313, "step": 44700 }, { "epoch": 0.3698556479298507, "grad_norm": 648.7603759765625, "learning_rate": 8.060493935706425e-06, "loss": 86.3172, "step": 44710 }, { "epoch": 0.3699383711792199, "grad_norm": 731.9179077148438, "learning_rate": 8.059378522050332e-06, "loss": 142.6297, "step": 44720 }, { "epoch": 0.37002109442858916, "grad_norm": 738.2081909179688, "learning_rate": 8.05826286497353e-06, "loss": 100.2493, "step": 44730 }, { "epoch": 0.3701038176779584, "grad_norm": 961.7689208984375, "learning_rate": 8.057146964564786e-06, "loss": 108.2104, "step": 44740 }, { "epoch": 0.3701865409273276, "grad_norm": 1143.8248291015625, "learning_rate": 8.05603082091289e-06, "loss": 102.1352, "step": 44750 }, { "epoch": 0.37026926417669687, "grad_norm": 968.6405639648438, "learning_rate": 8.054914434106647e-06, "loss": 89.4334, "step": 44760 }, { "epoch": 0.3703519874260661, "grad_norm": 800.0516967773438, "learning_rate": 8.053797804234882e-06, "loss": 100.4505, "step": 44770 }, { "epoch": 0.3704347106754353, "grad_norm": 928.8748168945312, "learning_rate": 8.052680931386441e-06, "loss": 88.3113, "step": 44780 }, { "epoch": 0.3705174339248046, "grad_norm": 680.3980102539062, "learning_rate": 8.051563815650187e-06, "loss": 93.7541, "step": 44790 }, { "epoch": 0.3706001571741738, "grad_norm": 988.8343505859375, "learning_rate": 8.050446457115005e-06, "loss": 138.652, "step": 44800 }, { "epoch": 0.370682880423543, "grad_norm": 5196.66015625, "learning_rate": 8.0493288558698e-06, "loss": 161.6813, "step": 44810 }, { "epoch": 0.3707656036729123, "grad_norm": 1195.2532958984375, "learning_rate": 8.04821101200349e-06, "loss": 104.2872, "step": 44820 }, { "epoch": 0.37084832692228153, "grad_norm": 916.4719848632812, "learning_rate": 8.047092925605022e-06, "loss": 72.5952, "step": 44830 }, { "epoch": 0.37093105017165073, "grad_norm": 1935.22119140625, "learning_rate": 8.045974596763352e-06, "loss": 124.3693, "step": 44840 }, { "epoch": 0.37101377342102, "grad_norm": 660.0169677734375, "learning_rate": 8.044856025567464e-06, "loss": 118.5706, "step": 44850 }, { "epoch": 0.3710964966703892, "grad_norm": 1358.344482421875, "learning_rate": 8.043737212106356e-06, "loss": 116.8551, "step": 44860 }, { "epoch": 0.37117921991975844, "grad_norm": 963.0214233398438, "learning_rate": 8.042618156469045e-06, "loss": 88.5938, "step": 44870 }, { "epoch": 0.3712619431691277, "grad_norm": 718.8231811523438, "learning_rate": 8.041498858744572e-06, "loss": 93.5438, "step": 44880 }, { "epoch": 0.3713446664184969, "grad_norm": 884.6295776367188, "learning_rate": 8.040379319021994e-06, "loss": 98.0878, "step": 44890 }, { "epoch": 0.37142738966786615, "grad_norm": 818.326171875, "learning_rate": 8.039259537390388e-06, "loss": 106.2791, "step": 44900 }, { "epoch": 0.3715101129172354, "grad_norm": 722.222900390625, "learning_rate": 8.038139513938847e-06, "loss": 101.4262, "step": 44910 }, { "epoch": 0.3715928361666046, "grad_norm": 706.7052001953125, "learning_rate": 8.037019248756488e-06, "loss": 103.0186, "step": 44920 }, { "epoch": 0.37167555941597386, "grad_norm": 783.2863159179688, "learning_rate": 8.035898741932447e-06, "loss": 101.9469, "step": 44930 }, { "epoch": 0.3717582826653431, "grad_norm": 1484.9461669921875, "learning_rate": 8.034777993555875e-06, "loss": 140.325, "step": 44940 }, { "epoch": 0.3718410059147123, "grad_norm": 669.56640625, "learning_rate": 8.033657003715945e-06, "loss": 116.9738, "step": 44950 }, { "epoch": 0.37192372916408156, "grad_norm": 705.1654663085938, "learning_rate": 8.032535772501851e-06, "loss": 102.4115, "step": 44960 }, { "epoch": 0.3720064524134508, "grad_norm": 1263.1771240234375, "learning_rate": 8.031414300002802e-06, "loss": 77.7587, "step": 44970 }, { "epoch": 0.37208917566282, "grad_norm": 1212.223876953125, "learning_rate": 8.03029258630803e-06, "loss": 112.9885, "step": 44980 }, { "epoch": 0.37217189891218927, "grad_norm": 951.40185546875, "learning_rate": 8.029170631506785e-06, "loss": 95.1384, "step": 44990 }, { "epoch": 0.3722546221615585, "grad_norm": 1134.679443359375, "learning_rate": 8.028048435688333e-06, "loss": 106.5822, "step": 45000 }, { "epoch": 0.3723373454109277, "grad_norm": 932.267822265625, "learning_rate": 8.026925998941965e-06, "loss": 110.1278, "step": 45010 }, { "epoch": 0.372420068660297, "grad_norm": 457.553955078125, "learning_rate": 8.025803321356989e-06, "loss": 95.6591, "step": 45020 }, { "epoch": 0.37250279190966623, "grad_norm": 655.8544921875, "learning_rate": 8.024680403022726e-06, "loss": 89.4721, "step": 45030 }, { "epoch": 0.37258551515903543, "grad_norm": 893.6047973632812, "learning_rate": 8.023557244028526e-06, "loss": 101.3761, "step": 45040 }, { "epoch": 0.3726682384084047, "grad_norm": 765.0892333984375, "learning_rate": 8.022433844463752e-06, "loss": 85.2344, "step": 45050 }, { "epoch": 0.37275096165777394, "grad_norm": 955.836181640625, "learning_rate": 8.02131020441779e-06, "loss": 105.1853, "step": 45060 }, { "epoch": 0.37283368490714314, "grad_norm": 1159.584228515625, "learning_rate": 8.02018632398004e-06, "loss": 82.6549, "step": 45070 }, { "epoch": 0.3729164081565124, "grad_norm": 827.6931762695312, "learning_rate": 8.019062203239923e-06, "loss": 145.6601, "step": 45080 }, { "epoch": 0.37299913140588165, "grad_norm": 1674.2239990234375, "learning_rate": 8.017937842286882e-06, "loss": 104.1544, "step": 45090 }, { "epoch": 0.37308185465525084, "grad_norm": 1010.4392700195312, "learning_rate": 8.01681324121038e-06, "loss": 102.7155, "step": 45100 }, { "epoch": 0.3731645779046201, "grad_norm": 812.0460205078125, "learning_rate": 8.015688400099893e-06, "loss": 80.9767, "step": 45110 }, { "epoch": 0.37324730115398935, "grad_norm": 676.9262084960938, "learning_rate": 8.014563319044919e-06, "loss": 107.2131, "step": 45120 }, { "epoch": 0.37333002440335855, "grad_norm": 1148.935302734375, "learning_rate": 8.013437998134978e-06, "loss": 96.8717, "step": 45130 }, { "epoch": 0.3734127476527278, "grad_norm": 2210.831298828125, "learning_rate": 8.012312437459604e-06, "loss": 115.9179, "step": 45140 }, { "epoch": 0.37349547090209706, "grad_norm": 740.3560180664062, "learning_rate": 8.011186637108354e-06, "loss": 95.7675, "step": 45150 }, { "epoch": 0.37357819415146626, "grad_norm": 1315.1302490234375, "learning_rate": 8.010060597170805e-06, "loss": 108.1328, "step": 45160 }, { "epoch": 0.3736609174008355, "grad_norm": 1144.85400390625, "learning_rate": 8.008934317736546e-06, "loss": 104.8319, "step": 45170 }, { "epoch": 0.37374364065020477, "grad_norm": 642.925048828125, "learning_rate": 8.007807798895195e-06, "loss": 114.467, "step": 45180 }, { "epoch": 0.37382636389957397, "grad_norm": 1433.939208984375, "learning_rate": 8.00668104073638e-06, "loss": 142.9769, "step": 45190 }, { "epoch": 0.3739090871489432, "grad_norm": 615.9909057617188, "learning_rate": 8.005554043349753e-06, "loss": 77.9182, "step": 45200 }, { "epoch": 0.3739918103983124, "grad_norm": 1355.69287109375, "learning_rate": 8.004426806824985e-06, "loss": 98.0649, "step": 45210 }, { "epoch": 0.3740745336476817, "grad_norm": 716.5189208984375, "learning_rate": 8.003299331251764e-06, "loss": 113.2009, "step": 45220 }, { "epoch": 0.3741572568970509, "grad_norm": 860.948974609375, "learning_rate": 8.002171616719798e-06, "loss": 89.8221, "step": 45230 }, { "epoch": 0.3742399801464201, "grad_norm": 1081.593994140625, "learning_rate": 8.001043663318815e-06, "loss": 148.4487, "step": 45240 }, { "epoch": 0.3743227033957894, "grad_norm": 714.3857421875, "learning_rate": 7.999915471138562e-06, "loss": 111.338, "step": 45250 }, { "epoch": 0.37440542664515863, "grad_norm": 863.04052734375, "learning_rate": 7.9987870402688e-06, "loss": 123.2091, "step": 45260 }, { "epoch": 0.37448814989452783, "grad_norm": 541.889404296875, "learning_rate": 7.997658370799318e-06, "loss": 122.7542, "step": 45270 }, { "epoch": 0.3745708731438971, "grad_norm": 948.584716796875, "learning_rate": 7.996529462819915e-06, "loss": 119.426, "step": 45280 }, { "epoch": 0.37465359639326634, "grad_norm": 736.9681396484375, "learning_rate": 7.995400316420416e-06, "loss": 64.0782, "step": 45290 }, { "epoch": 0.37473631964263554, "grad_norm": 1551.1883544921875, "learning_rate": 7.994270931690662e-06, "loss": 136.7847, "step": 45300 }, { "epoch": 0.3748190428920048, "grad_norm": 804.4282836914062, "learning_rate": 7.993141308720511e-06, "loss": 136.6521, "step": 45310 }, { "epoch": 0.37490176614137405, "grad_norm": 778.236083984375, "learning_rate": 7.99201144759984e-06, "loss": 95.8208, "step": 45320 }, { "epoch": 0.37498448939074325, "grad_norm": 2299.00048828125, "learning_rate": 7.990881348418554e-06, "loss": 124.4509, "step": 45330 }, { "epoch": 0.3750672126401125, "grad_norm": 973.8239135742188, "learning_rate": 7.989751011266565e-06, "loss": 112.5193, "step": 45340 }, { "epoch": 0.37514993588948176, "grad_norm": 745.3870239257812, "learning_rate": 7.988620436233806e-06, "loss": 112.1299, "step": 45350 }, { "epoch": 0.37523265913885095, "grad_norm": 2419.01953125, "learning_rate": 7.987489623410236e-06, "loss": 130.317, "step": 45360 }, { "epoch": 0.3753153823882202, "grad_norm": 2003.9739990234375, "learning_rate": 7.986358572885828e-06, "loss": 118.5811, "step": 45370 }, { "epoch": 0.37539810563758946, "grad_norm": 1251.0047607421875, "learning_rate": 7.985227284750574e-06, "loss": 106.9141, "step": 45380 }, { "epoch": 0.37548082888695866, "grad_norm": 879.7947998046875, "learning_rate": 7.984095759094485e-06, "loss": 93.4843, "step": 45390 }, { "epoch": 0.3755635521363279, "grad_norm": 852.9107055664062, "learning_rate": 7.982963996007591e-06, "loss": 141.075, "step": 45400 }, { "epoch": 0.37564627538569717, "grad_norm": 952.68798828125, "learning_rate": 7.981831995579943e-06, "loss": 91.4658, "step": 45410 }, { "epoch": 0.37572899863506637, "grad_norm": 1191.1885986328125, "learning_rate": 7.980699757901607e-06, "loss": 80.4354, "step": 45420 }, { "epoch": 0.3758117218844356, "grad_norm": 813.9268188476562, "learning_rate": 7.97956728306267e-06, "loss": 90.7331, "step": 45430 }, { "epoch": 0.3758944451338049, "grad_norm": 697.1983032226562, "learning_rate": 7.97843457115324e-06, "loss": 98.3303, "step": 45440 }, { "epoch": 0.3759771683831741, "grad_norm": 628.31982421875, "learning_rate": 7.97730162226344e-06, "loss": 103.7881, "step": 45450 }, { "epoch": 0.37605989163254333, "grad_norm": 2688.026123046875, "learning_rate": 7.976168436483415e-06, "loss": 97.9229, "step": 45460 }, { "epoch": 0.3761426148819126, "grad_norm": 813.2411499023438, "learning_rate": 7.975035013903326e-06, "loss": 94.9314, "step": 45470 }, { "epoch": 0.3762253381312818, "grad_norm": 712.8557739257812, "learning_rate": 7.973901354613353e-06, "loss": 85.9302, "step": 45480 }, { "epoch": 0.37630806138065104, "grad_norm": 1530.84033203125, "learning_rate": 7.972767458703697e-06, "loss": 128.0701, "step": 45490 }, { "epoch": 0.3763907846300203, "grad_norm": 1192.7733154296875, "learning_rate": 7.971633326264581e-06, "loss": 99.9536, "step": 45500 }, { "epoch": 0.3764735078793895, "grad_norm": 854.33203125, "learning_rate": 7.970498957386237e-06, "loss": 121.3529, "step": 45510 }, { "epoch": 0.37655623112875874, "grad_norm": 673.5110473632812, "learning_rate": 7.969364352158922e-06, "loss": 102.1387, "step": 45520 }, { "epoch": 0.376638954378128, "grad_norm": 868.6480712890625, "learning_rate": 7.968229510672915e-06, "loss": 97.5747, "step": 45530 }, { "epoch": 0.3767216776274972, "grad_norm": 1399.73779296875, "learning_rate": 7.967094433018508e-06, "loss": 125.0937, "step": 45540 }, { "epoch": 0.37680440087686645, "grad_norm": 1239.8349609375, "learning_rate": 7.965959119286013e-06, "loss": 113.5951, "step": 45550 }, { "epoch": 0.37688712412623565, "grad_norm": 1359.953125, "learning_rate": 7.964823569565765e-06, "loss": 103.3041, "step": 45560 }, { "epoch": 0.3769698473756049, "grad_norm": 720.0075073242188, "learning_rate": 7.963687783948111e-06, "loss": 81.487, "step": 45570 }, { "epoch": 0.37705257062497416, "grad_norm": 1454.417724609375, "learning_rate": 7.96255176252342e-06, "loss": 83.4532, "step": 45580 }, { "epoch": 0.37713529387434336, "grad_norm": 1103.7496337890625, "learning_rate": 7.961415505382083e-06, "loss": 125.8114, "step": 45590 }, { "epoch": 0.3772180171237126, "grad_norm": 948.4056396484375, "learning_rate": 7.960279012614508e-06, "loss": 90.2628, "step": 45600 }, { "epoch": 0.37730074037308187, "grad_norm": 1128.2615966796875, "learning_rate": 7.959142284311115e-06, "loss": 113.3847, "step": 45610 }, { "epoch": 0.37738346362245107, "grad_norm": 921.497802734375, "learning_rate": 7.958005320562349e-06, "loss": 125.8096, "step": 45620 }, { "epoch": 0.3774661868718203, "grad_norm": 1018.8650512695312, "learning_rate": 7.95686812145868e-06, "loss": 122.7865, "step": 45630 }, { "epoch": 0.3775489101211896, "grad_norm": 979.5180053710938, "learning_rate": 7.955730687090582e-06, "loss": 125.0732, "step": 45640 }, { "epoch": 0.3776316333705588, "grad_norm": 978.5775756835938, "learning_rate": 7.954593017548557e-06, "loss": 87.2099, "step": 45650 }, { "epoch": 0.377714356619928, "grad_norm": 978.8362426757812, "learning_rate": 7.953455112923127e-06, "loss": 134.838, "step": 45660 }, { "epoch": 0.3777970798692973, "grad_norm": 961.3007202148438, "learning_rate": 7.952316973304828e-06, "loss": 109.7581, "step": 45670 }, { "epoch": 0.3778798031186665, "grad_norm": 872.6028442382812, "learning_rate": 7.951178598784217e-06, "loss": 94.058, "step": 45680 }, { "epoch": 0.37796252636803573, "grad_norm": 603.7721557617188, "learning_rate": 7.950039989451868e-06, "loss": 142.0334, "step": 45690 }, { "epoch": 0.378045249617405, "grad_norm": 769.1307373046875, "learning_rate": 7.948901145398376e-06, "loss": 101.5725, "step": 45700 }, { "epoch": 0.3781279728667742, "grad_norm": 523.3740844726562, "learning_rate": 7.947762066714353e-06, "loss": 99.7676, "step": 45710 }, { "epoch": 0.37821069611614344, "grad_norm": 569.120361328125, "learning_rate": 7.946622753490433e-06, "loss": 73.9602, "step": 45720 }, { "epoch": 0.3782934193655127, "grad_norm": 850.3909912109375, "learning_rate": 7.945483205817262e-06, "loss": 99.4327, "step": 45730 }, { "epoch": 0.3783761426148819, "grad_norm": 1302.0482177734375, "learning_rate": 7.94434342378551e-06, "loss": 132.1461, "step": 45740 }, { "epoch": 0.37845886586425115, "grad_norm": 698.3201293945312, "learning_rate": 7.943203407485864e-06, "loss": 69.9039, "step": 45750 }, { "epoch": 0.3785415891136204, "grad_norm": 911.8252563476562, "learning_rate": 7.942063157009033e-06, "loss": 102.4791, "step": 45760 }, { "epoch": 0.3786243123629896, "grad_norm": 991.9946899414062, "learning_rate": 7.940922672445737e-06, "loss": 113.1581, "step": 45770 }, { "epoch": 0.37870703561235886, "grad_norm": 1038.773681640625, "learning_rate": 7.939781953886722e-06, "loss": 128.2211, "step": 45780 }, { "epoch": 0.3787897588617281, "grad_norm": 1063.1624755859375, "learning_rate": 7.938641001422747e-06, "loss": 106.1225, "step": 45790 }, { "epoch": 0.3788724821110973, "grad_norm": 897.7556762695312, "learning_rate": 7.937499815144597e-06, "loss": 102.7701, "step": 45800 }, { "epoch": 0.37895520536046656, "grad_norm": 454.84552001953125, "learning_rate": 7.936358395143065e-06, "loss": 133.0522, "step": 45810 }, { "epoch": 0.3790379286098358, "grad_norm": 491.5146484375, "learning_rate": 7.935216741508971e-06, "loss": 131.0702, "step": 45820 }, { "epoch": 0.379120651859205, "grad_norm": 816.953125, "learning_rate": 7.934074854333153e-06, "loss": 102.6913, "step": 45830 }, { "epoch": 0.37920337510857427, "grad_norm": 991.1273193359375, "learning_rate": 7.932932733706467e-06, "loss": 96.2913, "step": 45840 }, { "epoch": 0.3792860983579435, "grad_norm": 2715.989501953125, "learning_rate": 7.931790379719781e-06, "loss": 145.7241, "step": 45850 }, { "epoch": 0.3793688216073127, "grad_norm": 733.826416015625, "learning_rate": 7.93064779246399e-06, "loss": 79.5076, "step": 45860 }, { "epoch": 0.379451544856682, "grad_norm": 1925.5067138671875, "learning_rate": 7.929504972030003e-06, "loss": 129.3025, "step": 45870 }, { "epoch": 0.37953426810605123, "grad_norm": 723.109130859375, "learning_rate": 7.928361918508752e-06, "loss": 124.5502, "step": 45880 }, { "epoch": 0.37961699135542043, "grad_norm": 942.7910766601562, "learning_rate": 7.927218631991182e-06, "loss": 87.0695, "step": 45890 }, { "epoch": 0.3796997146047897, "grad_norm": 1327.8336181640625, "learning_rate": 7.92607511256826e-06, "loss": 90.3665, "step": 45900 }, { "epoch": 0.37978243785415894, "grad_norm": 1204.0654296875, "learning_rate": 7.924931360330968e-06, "loss": 120.6505, "step": 45910 }, { "epoch": 0.37986516110352814, "grad_norm": 643.5101928710938, "learning_rate": 7.92378737537031e-06, "loss": 84.0929, "step": 45920 }, { "epoch": 0.3799478843528974, "grad_norm": 1710.7091064453125, "learning_rate": 7.922643157777314e-06, "loss": 94.1093, "step": 45930 }, { "epoch": 0.3800306076022666, "grad_norm": 1306.8468017578125, "learning_rate": 7.921498707643011e-06, "loss": 110.1609, "step": 45940 }, { "epoch": 0.38011333085163584, "grad_norm": 1266.775146484375, "learning_rate": 7.920354025058467e-06, "loss": 94.9035, "step": 45950 }, { "epoch": 0.3801960541010051, "grad_norm": 1168.3138427734375, "learning_rate": 7.919209110114752e-06, "loss": 165.2224, "step": 45960 }, { "epoch": 0.3802787773503743, "grad_norm": 760.196044921875, "learning_rate": 7.918063962902968e-06, "loss": 118.3697, "step": 45970 }, { "epoch": 0.38036150059974355, "grad_norm": 1145.4422607421875, "learning_rate": 7.916918583514227e-06, "loss": 102.1873, "step": 45980 }, { "epoch": 0.3804442238491128, "grad_norm": 523.3143310546875, "learning_rate": 7.91577297203966e-06, "loss": 94.5064, "step": 45990 }, { "epoch": 0.380526947098482, "grad_norm": 777.9324951171875, "learning_rate": 7.91462712857042e-06, "loss": 99.4327, "step": 46000 }, { "epoch": 0.38060967034785126, "grad_norm": 1038.9837646484375, "learning_rate": 7.913481053197673e-06, "loss": 120.6933, "step": 46010 }, { "epoch": 0.3806923935972205, "grad_norm": 702.442138671875, "learning_rate": 7.912334746012613e-06, "loss": 141.1689, "step": 46020 }, { "epoch": 0.3807751168465897, "grad_norm": 910.6521606445312, "learning_rate": 7.911188207106442e-06, "loss": 84.091, "step": 46030 }, { "epoch": 0.38085784009595897, "grad_norm": 836.0477905273438, "learning_rate": 7.910041436570386e-06, "loss": 92.7803, "step": 46040 }, { "epoch": 0.3809405633453282, "grad_norm": 796.7591552734375, "learning_rate": 7.90889443449569e-06, "loss": 93.9601, "step": 46050 }, { "epoch": 0.3810232865946974, "grad_norm": 1023.9012451171875, "learning_rate": 7.90774720097361e-06, "loss": 125.3338, "step": 46060 }, { "epoch": 0.3811060098440667, "grad_norm": 1169.42822265625, "learning_rate": 7.906599736095433e-06, "loss": 89.7407, "step": 46070 }, { "epoch": 0.3811887330934359, "grad_norm": 924.0548706054688, "learning_rate": 7.905452039952453e-06, "loss": 86.3304, "step": 46080 }, { "epoch": 0.3812714563428051, "grad_norm": 595.2965087890625, "learning_rate": 7.904304112635987e-06, "loss": 106.9174, "step": 46090 }, { "epoch": 0.3813541795921744, "grad_norm": 472.991943359375, "learning_rate": 7.903155954237375e-06, "loss": 120.8275, "step": 46100 }, { "epoch": 0.38143690284154363, "grad_norm": 1009.1423950195312, "learning_rate": 7.902007564847967e-06, "loss": 105.188, "step": 46110 }, { "epoch": 0.38151962609091283, "grad_norm": 851.7573852539062, "learning_rate": 7.900858944559133e-06, "loss": 111.3603, "step": 46120 }, { "epoch": 0.3816023493402821, "grad_norm": 981.0646362304688, "learning_rate": 7.899710093462267e-06, "loss": 86.6864, "step": 46130 }, { "epoch": 0.38168507258965134, "grad_norm": 864.4039306640625, "learning_rate": 7.898561011648777e-06, "loss": 155.4255, "step": 46140 }, { "epoch": 0.38176779583902054, "grad_norm": 1161.7957763671875, "learning_rate": 7.89741169921009e-06, "loss": 92.5417, "step": 46150 }, { "epoch": 0.3818505190883898, "grad_norm": 1559.1328125, "learning_rate": 7.896262156237652e-06, "loss": 109.0317, "step": 46160 }, { "epoch": 0.38193324233775905, "grad_norm": 839.4773559570312, "learning_rate": 7.895112382822925e-06, "loss": 124.5884, "step": 46170 }, { "epoch": 0.38201596558712825, "grad_norm": 1135.74658203125, "learning_rate": 7.893962379057393e-06, "loss": 115.562, "step": 46180 }, { "epoch": 0.3820986888364975, "grad_norm": 979.9491577148438, "learning_rate": 7.892812145032557e-06, "loss": 118.3164, "step": 46190 }, { "epoch": 0.38218141208586676, "grad_norm": 4294.3427734375, "learning_rate": 7.891661680839932e-06, "loss": 125.0205, "step": 46200 }, { "epoch": 0.38226413533523596, "grad_norm": 933.6452026367188, "learning_rate": 7.89051098657106e-06, "loss": 121.8078, "step": 46210 }, { "epoch": 0.3823468585846052, "grad_norm": 527.7791137695312, "learning_rate": 7.889360062317495e-06, "loss": 96.7531, "step": 46220 }, { "epoch": 0.38242958183397446, "grad_norm": 777.3706665039062, "learning_rate": 7.888208908170812e-06, "loss": 134.1928, "step": 46230 }, { "epoch": 0.38251230508334366, "grad_norm": 805.9109497070312, "learning_rate": 7.887057524222596e-06, "loss": 101.3832, "step": 46240 }, { "epoch": 0.3825950283327129, "grad_norm": 1473.904296875, "learning_rate": 7.885905910564466e-06, "loss": 87.6336, "step": 46250 }, { "epoch": 0.38267775158208217, "grad_norm": 784.258056640625, "learning_rate": 7.884754067288047e-06, "loss": 94.394, "step": 46260 }, { "epoch": 0.38276047483145137, "grad_norm": 1091.794677734375, "learning_rate": 7.883601994484986e-06, "loss": 106.1401, "step": 46270 }, { "epoch": 0.3828431980808206, "grad_norm": 1147.5867919921875, "learning_rate": 7.882449692246948e-06, "loss": 104.8446, "step": 46280 }, { "epoch": 0.3829259213301898, "grad_norm": 1361.256591796875, "learning_rate": 7.881297160665616e-06, "loss": 97.2336, "step": 46290 }, { "epoch": 0.3830086445795591, "grad_norm": 1118.20556640625, "learning_rate": 7.880144399832693e-06, "loss": 109.2898, "step": 46300 }, { "epoch": 0.38309136782892833, "grad_norm": 1180.1666259765625, "learning_rate": 7.878991409839897e-06, "loss": 107.2897, "step": 46310 }, { "epoch": 0.38317409107829753, "grad_norm": 976.9520874023438, "learning_rate": 7.87783819077897e-06, "loss": 127.2354, "step": 46320 }, { "epoch": 0.3832568143276668, "grad_norm": 569.0108032226562, "learning_rate": 7.876684742741665e-06, "loss": 89.219, "step": 46330 }, { "epoch": 0.38333953757703604, "grad_norm": 1490.4720458984375, "learning_rate": 7.875531065819755e-06, "loss": 88.2369, "step": 46340 }, { "epoch": 0.38342226082640524, "grad_norm": 1101.564697265625, "learning_rate": 7.874377160105037e-06, "loss": 104.7651, "step": 46350 }, { "epoch": 0.3835049840757745, "grad_norm": 1799.0706787109375, "learning_rate": 7.873223025689319e-06, "loss": 138.4782, "step": 46360 }, { "epoch": 0.38358770732514375, "grad_norm": 1268.860595703125, "learning_rate": 7.872068662664432e-06, "loss": 91.4783, "step": 46370 }, { "epoch": 0.38367043057451294, "grad_norm": 1431.305419921875, "learning_rate": 7.870914071122222e-06, "loss": 100.8851, "step": 46380 }, { "epoch": 0.3837531538238822, "grad_norm": 1034.6007080078125, "learning_rate": 7.869759251154554e-06, "loss": 94.5458, "step": 46390 }, { "epoch": 0.38383587707325145, "grad_norm": 1329.562255859375, "learning_rate": 7.868604202853314e-06, "loss": 95.9945, "step": 46400 }, { "epoch": 0.38391860032262065, "grad_norm": 755.7239379882812, "learning_rate": 7.867448926310403e-06, "loss": 106.9036, "step": 46410 }, { "epoch": 0.3840013235719899, "grad_norm": 1034.0926513671875, "learning_rate": 7.866293421617741e-06, "loss": 94.1212, "step": 46420 }, { "epoch": 0.38408404682135916, "grad_norm": 1169.0289306640625, "learning_rate": 7.865137688867264e-06, "loss": 76.2746, "step": 46430 }, { "epoch": 0.38416677007072836, "grad_norm": 1374.6610107421875, "learning_rate": 7.86398172815093e-06, "loss": 141.711, "step": 46440 }, { "epoch": 0.3842494933200976, "grad_norm": 715.2680053710938, "learning_rate": 7.862825539560716e-06, "loss": 127.8841, "step": 46450 }, { "epoch": 0.38433221656946687, "grad_norm": 1387.7430419921875, "learning_rate": 7.861669123188613e-06, "loss": 124.0174, "step": 46460 }, { "epoch": 0.38441493981883607, "grad_norm": 826.4485473632812, "learning_rate": 7.86051247912663e-06, "loss": 110.8221, "step": 46470 }, { "epoch": 0.3844976630682053, "grad_norm": 807.095703125, "learning_rate": 7.859355607466797e-06, "loss": 87.1465, "step": 46480 }, { "epoch": 0.3845803863175746, "grad_norm": 1193.6893310546875, "learning_rate": 7.858198508301161e-06, "loss": 133.6182, "step": 46490 }, { "epoch": 0.3846631095669438, "grad_norm": 527.4124755859375, "learning_rate": 7.857041181721788e-06, "loss": 97.8456, "step": 46500 }, { "epoch": 0.384745832816313, "grad_norm": 862.8912963867188, "learning_rate": 7.855883627820757e-06, "loss": 134.22, "step": 46510 }, { "epoch": 0.3848285560656823, "grad_norm": 548.6507568359375, "learning_rate": 7.854725846690175e-06, "loss": 83.1777, "step": 46520 }, { "epoch": 0.3849112793150515, "grad_norm": 902.584228515625, "learning_rate": 7.85356783842216e-06, "loss": 106.9863, "step": 46530 }, { "epoch": 0.38499400256442073, "grad_norm": 535.5631103515625, "learning_rate": 7.852409603108845e-06, "loss": 113.9048, "step": 46540 }, { "epoch": 0.38507672581379, "grad_norm": 938.954345703125, "learning_rate": 7.85125114084239e-06, "loss": 105.1793, "step": 46550 }, { "epoch": 0.3851594490631592, "grad_norm": 1107.5294189453125, "learning_rate": 7.850092451714967e-06, "loss": 115.7462, "step": 46560 }, { "epoch": 0.38524217231252844, "grad_norm": 1338.5953369140625, "learning_rate": 7.84893353581877e-06, "loss": 93.1394, "step": 46570 }, { "epoch": 0.3853248955618977, "grad_norm": 1115.7308349609375, "learning_rate": 7.847774393246005e-06, "loss": 117.0743, "step": 46580 }, { "epoch": 0.3854076188112669, "grad_norm": 712.0968017578125, "learning_rate": 7.8466150240889e-06, "loss": 92.4161, "step": 46590 }, { "epoch": 0.38549034206063615, "grad_norm": 1130.7666015625, "learning_rate": 7.845455428439703e-06, "loss": 90.2969, "step": 46600 }, { "epoch": 0.3855730653100054, "grad_norm": 694.2098388671875, "learning_rate": 7.844295606390675e-06, "loss": 106.4844, "step": 46610 }, { "epoch": 0.3856557885593746, "grad_norm": 758.242431640625, "learning_rate": 7.843135558034101e-06, "loss": 85.5782, "step": 46620 }, { "epoch": 0.38573851180874386, "grad_norm": 847.0222778320312, "learning_rate": 7.841975283462278e-06, "loss": 83.5226, "step": 46630 }, { "epoch": 0.3858212350581131, "grad_norm": 828.2359619140625, "learning_rate": 7.840814782767525e-06, "loss": 74.5017, "step": 46640 }, { "epoch": 0.3859039583074823, "grad_norm": 575.1474609375, "learning_rate": 7.839654056042176e-06, "loss": 84.7638, "step": 46650 }, { "epoch": 0.38598668155685156, "grad_norm": 961.8069458007812, "learning_rate": 7.838493103378588e-06, "loss": 105.9289, "step": 46660 }, { "epoch": 0.38606940480622076, "grad_norm": 323.90765380859375, "learning_rate": 7.83733192486913e-06, "loss": 134.9893, "step": 46670 }, { "epoch": 0.38615212805559, "grad_norm": 906.62060546875, "learning_rate": 7.836170520606191e-06, "loss": 132.9467, "step": 46680 }, { "epoch": 0.38623485130495927, "grad_norm": 1547.6925048828125, "learning_rate": 7.83500889068218e-06, "loss": 145.7491, "step": 46690 }, { "epoch": 0.38631757455432847, "grad_norm": 1164.046142578125, "learning_rate": 7.833847035189524e-06, "loss": 110.9907, "step": 46700 }, { "epoch": 0.3864002978036977, "grad_norm": 1105.657470703125, "learning_rate": 7.832684954220664e-06, "loss": 124.7499, "step": 46710 }, { "epoch": 0.386483021053067, "grad_norm": 508.67803955078125, "learning_rate": 7.831522647868064e-06, "loss": 95.8649, "step": 46720 }, { "epoch": 0.3865657443024362, "grad_norm": 674.171875, "learning_rate": 7.8303601162242e-06, "loss": 99.7405, "step": 46730 }, { "epoch": 0.38664846755180543, "grad_norm": 1009.1093139648438, "learning_rate": 7.829197359381571e-06, "loss": 111.1325, "step": 46740 }, { "epoch": 0.3867311908011747, "grad_norm": 1165.2833251953125, "learning_rate": 7.828034377432694e-06, "loss": 111.5451, "step": 46750 }, { "epoch": 0.3868139140505439, "grad_norm": 542.6980590820312, "learning_rate": 7.826871170470099e-06, "loss": 93.1879, "step": 46760 }, { "epoch": 0.38689663729991314, "grad_norm": 1133.044921875, "learning_rate": 7.82570773858634e-06, "loss": 90.8988, "step": 46770 }, { "epoch": 0.3869793605492824, "grad_norm": 684.3965454101562, "learning_rate": 7.824544081873984e-06, "loss": 79.4068, "step": 46780 }, { "epoch": 0.3870620837986516, "grad_norm": 1281.01513671875, "learning_rate": 7.823380200425618e-06, "loss": 132.8948, "step": 46790 }, { "epoch": 0.38714480704802084, "grad_norm": 1106.5760498046875, "learning_rate": 7.822216094333847e-06, "loss": 109.8843, "step": 46800 }, { "epoch": 0.3872275302973901, "grad_norm": 736.9039306640625, "learning_rate": 7.821051763691293e-06, "loss": 95.4937, "step": 46810 }, { "epoch": 0.3873102535467593, "grad_norm": 983.0117797851562, "learning_rate": 7.819887208590597e-06, "loss": 106.8212, "step": 46820 }, { "epoch": 0.38739297679612855, "grad_norm": 900.6527099609375, "learning_rate": 7.818722429124418e-06, "loss": 97.8815, "step": 46830 }, { "epoch": 0.3874757000454978, "grad_norm": 850.8548583984375, "learning_rate": 7.817557425385433e-06, "loss": 81.9495, "step": 46840 }, { "epoch": 0.387558423294867, "grad_norm": 855.3139038085938, "learning_rate": 7.816392197466333e-06, "loss": 89.8393, "step": 46850 }, { "epoch": 0.38764114654423626, "grad_norm": 587.1494750976562, "learning_rate": 7.815226745459831e-06, "loss": 106.3622, "step": 46860 }, { "epoch": 0.3877238697936055, "grad_norm": 3245.526123046875, "learning_rate": 7.814061069458657e-06, "loss": 111.3302, "step": 46870 }, { "epoch": 0.3878065930429747, "grad_norm": 547.8377685546875, "learning_rate": 7.81289516955556e-06, "loss": 78.6426, "step": 46880 }, { "epoch": 0.38788931629234397, "grad_norm": 1064.7379150390625, "learning_rate": 7.811729045843303e-06, "loss": 90.7304, "step": 46890 }, { "epoch": 0.3879720395417132, "grad_norm": 616.6322631835938, "learning_rate": 7.81056269841467e-06, "loss": 107.5491, "step": 46900 }, { "epoch": 0.3880547627910824, "grad_norm": 1086.90185546875, "learning_rate": 7.80939612736246e-06, "loss": 112.952, "step": 46910 }, { "epoch": 0.3881374860404517, "grad_norm": 762.38916015625, "learning_rate": 7.808229332779496e-06, "loss": 93.649, "step": 46920 }, { "epoch": 0.38822020928982093, "grad_norm": 1226.2991943359375, "learning_rate": 7.807062314758612e-06, "loss": 122.8837, "step": 46930 }, { "epoch": 0.3883029325391901, "grad_norm": 980.0850219726562, "learning_rate": 7.80589507339266e-06, "loss": 118.3508, "step": 46940 }, { "epoch": 0.3883856557885594, "grad_norm": 899.0253295898438, "learning_rate": 7.804727608774516e-06, "loss": 95.3277, "step": 46950 }, { "epoch": 0.38846837903792864, "grad_norm": 992.0533447265625, "learning_rate": 7.803559920997067e-06, "loss": 100.9585, "step": 46960 }, { "epoch": 0.38855110228729783, "grad_norm": 1150.515869140625, "learning_rate": 7.802392010153223e-06, "loss": 106.7416, "step": 46970 }, { "epoch": 0.3886338255366671, "grad_norm": 392.8724670410156, "learning_rate": 7.801223876335907e-06, "loss": 81.8418, "step": 46980 }, { "epoch": 0.38871654878603634, "grad_norm": 958.8024291992188, "learning_rate": 7.800055519638064e-06, "loss": 104.2003, "step": 46990 }, { "epoch": 0.38879927203540554, "grad_norm": 1033.255859375, "learning_rate": 7.798886940152654e-06, "loss": 111.1013, "step": 47000 }, { "epoch": 0.3888819952847748, "grad_norm": 670.1970825195312, "learning_rate": 7.797718137972654e-06, "loss": 108.9194, "step": 47010 }, { "epoch": 0.388964718534144, "grad_norm": 927.21630859375, "learning_rate": 7.79654911319106e-06, "loss": 107.1055, "step": 47020 }, { "epoch": 0.38904744178351325, "grad_norm": 1264.6552734375, "learning_rate": 7.795379865900892e-06, "loss": 131.0568, "step": 47030 }, { "epoch": 0.3891301650328825, "grad_norm": 1989.842529296875, "learning_rate": 7.794210396195175e-06, "loss": 94.6299, "step": 47040 }, { "epoch": 0.3892128882822517, "grad_norm": 530.0291748046875, "learning_rate": 7.79304070416696e-06, "loss": 121.8401, "step": 47050 }, { "epoch": 0.38929561153162096, "grad_norm": 936.9892578125, "learning_rate": 7.791870789909315e-06, "loss": 89.362, "step": 47060 }, { "epoch": 0.3893783347809902, "grad_norm": 2093.591796875, "learning_rate": 7.790700653515324e-06, "loss": 90.1807, "step": 47070 }, { "epoch": 0.3894610580303594, "grad_norm": 549.9674682617188, "learning_rate": 7.789530295078089e-06, "loss": 100.1859, "step": 47080 }, { "epoch": 0.38954378127972866, "grad_norm": 882.615966796875, "learning_rate": 7.788359714690732e-06, "loss": 117.6557, "step": 47090 }, { "epoch": 0.3896265045290979, "grad_norm": 798.0199584960938, "learning_rate": 7.787188912446389e-06, "loss": 127.0244, "step": 47100 }, { "epoch": 0.3897092277784671, "grad_norm": 776.5193481445312, "learning_rate": 7.786017888438214e-06, "loss": 126.0215, "step": 47110 }, { "epoch": 0.38979195102783637, "grad_norm": 654.182373046875, "learning_rate": 7.784846642759383e-06, "loss": 82.0503, "step": 47120 }, { "epoch": 0.3898746742772056, "grad_norm": 822.113525390625, "learning_rate": 7.783675175503087e-06, "loss": 89.8243, "step": 47130 }, { "epoch": 0.3899573975265748, "grad_norm": 865.35546875, "learning_rate": 7.78250348676253e-06, "loss": 98.9063, "step": 47140 }, { "epoch": 0.3900401207759441, "grad_norm": 1154.8814697265625, "learning_rate": 7.781331576630941e-06, "loss": 146.3992, "step": 47150 }, { "epoch": 0.39012284402531333, "grad_norm": 1378.4947509765625, "learning_rate": 7.780159445201562e-06, "loss": 130.5315, "step": 47160 }, { "epoch": 0.39020556727468253, "grad_norm": 1242.33251953125, "learning_rate": 7.778987092567658e-06, "loss": 119.4341, "step": 47170 }, { "epoch": 0.3902882905240518, "grad_norm": 797.1754760742188, "learning_rate": 7.777814518822504e-06, "loss": 67.0835, "step": 47180 }, { "epoch": 0.39037101377342104, "grad_norm": 607.6759033203125, "learning_rate": 7.776641724059398e-06, "loss": 93.4001, "step": 47190 }, { "epoch": 0.39045373702279024, "grad_norm": 819.6945190429688, "learning_rate": 7.77546870837165e-06, "loss": 74.3229, "step": 47200 }, { "epoch": 0.3905364602721595, "grad_norm": 903.440185546875, "learning_rate": 7.774295471852596e-06, "loss": 108.8114, "step": 47210 }, { "epoch": 0.39061918352152875, "grad_norm": 785.1321411132812, "learning_rate": 7.773122014595584e-06, "loss": 169.5685, "step": 47220 }, { "epoch": 0.39070190677089794, "grad_norm": 1930.775146484375, "learning_rate": 7.771948336693983e-06, "loss": 108.4483, "step": 47230 }, { "epoch": 0.3907846300202672, "grad_norm": 515.0844116210938, "learning_rate": 7.770774438241168e-06, "loss": 67.2212, "step": 47240 }, { "epoch": 0.39086735326963645, "grad_norm": 1046.875, "learning_rate": 7.769600319330553e-06, "loss": 122.4751, "step": 47250 }, { "epoch": 0.39095007651900565, "grad_norm": 516.0284423828125, "learning_rate": 7.768425980055548e-06, "loss": 87.5364, "step": 47260 }, { "epoch": 0.3910327997683749, "grad_norm": 1055.4755859375, "learning_rate": 7.767251420509593e-06, "loss": 127.459, "step": 47270 }, { "epoch": 0.39111552301774416, "grad_norm": 1103.45361328125, "learning_rate": 7.766076640786145e-06, "loss": 91.529, "step": 47280 }, { "epoch": 0.39119824626711336, "grad_norm": 751.03759765625, "learning_rate": 7.764901640978671e-06, "loss": 97.0965, "step": 47290 }, { "epoch": 0.3912809695164826, "grad_norm": 849.8720703125, "learning_rate": 7.763726421180664e-06, "loss": 100.8384, "step": 47300 }, { "epoch": 0.39136369276585187, "grad_norm": 1227.1131591796875, "learning_rate": 7.762550981485629e-06, "loss": 85.1875, "step": 47310 }, { "epoch": 0.39144641601522107, "grad_norm": 1593.5885009765625, "learning_rate": 7.76137532198709e-06, "loss": 117.0101, "step": 47320 }, { "epoch": 0.3915291392645903, "grad_norm": 901.8778686523438, "learning_rate": 7.76019944277859e-06, "loss": 132.7024, "step": 47330 }, { "epoch": 0.3916118625139596, "grad_norm": 851.2544555664062, "learning_rate": 7.759023343953689e-06, "loss": 89.3048, "step": 47340 }, { "epoch": 0.3916945857633288, "grad_norm": 605.6761474609375, "learning_rate": 7.757847025605963e-06, "loss": 103.3425, "step": 47350 }, { "epoch": 0.391777309012698, "grad_norm": 1666.5958251953125, "learning_rate": 7.756670487829005e-06, "loss": 112.8522, "step": 47360 }, { "epoch": 0.3918600322620673, "grad_norm": 928.7994384765625, "learning_rate": 7.755493730716428e-06, "loss": 139.6545, "step": 47370 }, { "epoch": 0.3919427555114365, "grad_norm": 1118.8592529296875, "learning_rate": 7.75431675436186e-06, "loss": 101.6983, "step": 47380 }, { "epoch": 0.39202547876080573, "grad_norm": 753.40185546875, "learning_rate": 7.753139558858949e-06, "loss": 111.5847, "step": 47390 }, { "epoch": 0.39210820201017493, "grad_norm": 1253.97509765625, "learning_rate": 7.751962144301359e-06, "loss": 101.4553, "step": 47400 }, { "epoch": 0.3921909252595442, "grad_norm": 1033.0867919921875, "learning_rate": 7.75078451078277e-06, "loss": 92.0257, "step": 47410 }, { "epoch": 0.39227364850891344, "grad_norm": 747.4989624023438, "learning_rate": 7.749606658396883e-06, "loss": 100.6043, "step": 47420 }, { "epoch": 0.39235637175828264, "grad_norm": 822.0686645507812, "learning_rate": 7.748428587237412e-06, "loss": 80.0977, "step": 47430 }, { "epoch": 0.3924390950076519, "grad_norm": 1061.1436767578125, "learning_rate": 7.747250297398092e-06, "loss": 120.7229, "step": 47440 }, { "epoch": 0.39252181825702115, "grad_norm": 497.0961608886719, "learning_rate": 7.746071788972675e-06, "loss": 92.1028, "step": 47450 }, { "epoch": 0.39260454150639035, "grad_norm": 747.8739624023438, "learning_rate": 7.744893062054928e-06, "loss": 84.091, "step": 47460 }, { "epoch": 0.3926872647557596, "grad_norm": 1407.6231689453125, "learning_rate": 7.743714116738636e-06, "loss": 96.2375, "step": 47470 }, { "epoch": 0.39276998800512886, "grad_norm": 1389.636962890625, "learning_rate": 7.742534953117607e-06, "loss": 83.5301, "step": 47480 }, { "epoch": 0.39285271125449805, "grad_norm": 827.8368530273438, "learning_rate": 7.741355571285656e-06, "loss": 98.6002, "step": 47490 }, { "epoch": 0.3929354345038673, "grad_norm": 1032.63330078125, "learning_rate": 7.740175971336624e-06, "loss": 111.3695, "step": 47500 }, { "epoch": 0.39301815775323656, "grad_norm": 938.416015625, "learning_rate": 7.738996153364364e-06, "loss": 128.9613, "step": 47510 }, { "epoch": 0.39310088100260576, "grad_norm": 960.8037719726562, "learning_rate": 7.737816117462752e-06, "loss": 96.7483, "step": 47520 }, { "epoch": 0.393183604251975, "grad_norm": 880.8233642578125, "learning_rate": 7.736635863725677e-06, "loss": 112.1702, "step": 47530 }, { "epoch": 0.39326632750134427, "grad_norm": 643.0890502929688, "learning_rate": 7.735455392247044e-06, "loss": 172.7514, "step": 47540 }, { "epoch": 0.39334905075071347, "grad_norm": 481.9482421875, "learning_rate": 7.73427470312078e-06, "loss": 98.0854, "step": 47550 }, { "epoch": 0.3934317740000827, "grad_norm": 7659.08837890625, "learning_rate": 7.733093796440828e-06, "loss": 94.3791, "step": 47560 }, { "epoch": 0.393514497249452, "grad_norm": 611.7446899414062, "learning_rate": 7.731912672301145e-06, "loss": 121.1342, "step": 47570 }, { "epoch": 0.3935972204988212, "grad_norm": 1074.301513671875, "learning_rate": 7.730731330795707e-06, "loss": 107.5944, "step": 47580 }, { "epoch": 0.39367994374819043, "grad_norm": 1130.7880859375, "learning_rate": 7.72954977201851e-06, "loss": 88.7905, "step": 47590 }, { "epoch": 0.3937626669975597, "grad_norm": 427.6048278808594, "learning_rate": 7.728367996063566e-06, "loss": 95.4467, "step": 47600 }, { "epoch": 0.3938453902469289, "grad_norm": 1087.6966552734375, "learning_rate": 7.727186003024902e-06, "loss": 116.7486, "step": 47610 }, { "epoch": 0.39392811349629814, "grad_norm": 677.6015014648438, "learning_rate": 7.726003792996562e-06, "loss": 112.2149, "step": 47620 }, { "epoch": 0.3940108367456674, "grad_norm": 1281.115478515625, "learning_rate": 7.724821366072612e-06, "loss": 112.6385, "step": 47630 }, { "epoch": 0.3940935599950366, "grad_norm": 1624.0828857421875, "learning_rate": 7.723638722347132e-06, "loss": 114.2262, "step": 47640 }, { "epoch": 0.39417628324440585, "grad_norm": 835.2601318359375, "learning_rate": 7.722455861914218e-06, "loss": 87.17, "step": 47650 }, { "epoch": 0.3942590064937751, "grad_norm": 721.5057983398438, "learning_rate": 7.721272784867983e-06, "loss": 112.5632, "step": 47660 }, { "epoch": 0.3943417297431443, "grad_norm": 909.7614135742188, "learning_rate": 7.720089491302565e-06, "loss": 105.2768, "step": 47670 }, { "epoch": 0.39442445299251355, "grad_norm": 641.9271240234375, "learning_rate": 7.718905981312108e-06, "loss": 93.7019, "step": 47680 }, { "epoch": 0.3945071762418828, "grad_norm": 585.551025390625, "learning_rate": 7.71772225499078e-06, "loss": 80.3911, "step": 47690 }, { "epoch": 0.394589899491252, "grad_norm": 783.8123168945312, "learning_rate": 7.716538312432767e-06, "loss": 115.101, "step": 47700 }, { "epoch": 0.39467262274062126, "grad_norm": 644.3832397460938, "learning_rate": 7.715354153732265e-06, "loss": 99.6165, "step": 47710 }, { "epoch": 0.3947553459899905, "grad_norm": 1999.86572265625, "learning_rate": 7.714169778983496e-06, "loss": 117.3061, "step": 47720 }, { "epoch": 0.3948380692393597, "grad_norm": 1040.9405517578125, "learning_rate": 7.712985188280694e-06, "loss": 101.7906, "step": 47730 }, { "epoch": 0.39492079248872897, "grad_norm": 1040.4888916015625, "learning_rate": 7.711800381718111e-06, "loss": 110.293, "step": 47740 }, { "epoch": 0.39500351573809817, "grad_norm": 1044.0440673828125, "learning_rate": 7.710615359390018e-06, "loss": 94.9559, "step": 47750 }, { "epoch": 0.3950862389874674, "grad_norm": 749.12939453125, "learning_rate": 7.7094301213907e-06, "loss": 92.2077, "step": 47760 }, { "epoch": 0.3951689622368367, "grad_norm": 1238.37158203125, "learning_rate": 7.708244667814463e-06, "loss": 100.6183, "step": 47770 }, { "epoch": 0.3952516854862059, "grad_norm": 821.638916015625, "learning_rate": 7.707058998755626e-06, "loss": 109.0208, "step": 47780 }, { "epoch": 0.3953344087355751, "grad_norm": 767.8424072265625, "learning_rate": 7.705873114308529e-06, "loss": 85.0486, "step": 47790 }, { "epoch": 0.3954171319849444, "grad_norm": 756.989501953125, "learning_rate": 7.704687014567524e-06, "loss": 89.8211, "step": 47800 }, { "epoch": 0.3954998552343136, "grad_norm": 1021.226806640625, "learning_rate": 7.703500699626988e-06, "loss": 90.8781, "step": 47810 }, { "epoch": 0.39558257848368283, "grad_norm": 749.4614868164062, "learning_rate": 7.702314169581311e-06, "loss": 103.451, "step": 47820 }, { "epoch": 0.3956653017330521, "grad_norm": 954.7346801757812, "learning_rate": 7.701127424524894e-06, "loss": 97.5412, "step": 47830 }, { "epoch": 0.3957480249824213, "grad_norm": 704.9465942382812, "learning_rate": 7.699940464552166e-06, "loss": 128.5169, "step": 47840 }, { "epoch": 0.39583074823179054, "grad_norm": 926.2518310546875, "learning_rate": 7.698753289757565e-06, "loss": 103.6893, "step": 47850 }, { "epoch": 0.3959134714811598, "grad_norm": 903.693115234375, "learning_rate": 7.69756590023555e-06, "loss": 117.7347, "step": 47860 }, { "epoch": 0.395996194730529, "grad_norm": 576.5755004882812, "learning_rate": 7.696378296080598e-06, "loss": 77.2486, "step": 47870 }, { "epoch": 0.39607891797989825, "grad_norm": 1016.8932495117188, "learning_rate": 7.6951904773872e-06, "loss": 102.5683, "step": 47880 }, { "epoch": 0.3961616412292675, "grad_norm": 649.89013671875, "learning_rate": 7.694002444249863e-06, "loss": 101.7743, "step": 47890 }, { "epoch": 0.3962443644786367, "grad_norm": 2596.306396484375, "learning_rate": 7.692814196763118e-06, "loss": 125.352, "step": 47900 }, { "epoch": 0.39632708772800596, "grad_norm": 1052.6181640625, "learning_rate": 7.691625735021505e-06, "loss": 109.4487, "step": 47910 }, { "epoch": 0.3964098109773752, "grad_norm": 909.6773071289062, "learning_rate": 7.690437059119584e-06, "loss": 132.6711, "step": 47920 }, { "epoch": 0.3964925342267444, "grad_norm": 468.37310791015625, "learning_rate": 7.689248169151935e-06, "loss": 89.8137, "step": 47930 }, { "epoch": 0.39657525747611366, "grad_norm": 1061.165771484375, "learning_rate": 7.68805906521315e-06, "loss": 117.4328, "step": 47940 }, { "epoch": 0.3966579807254829, "grad_norm": 757.1315307617188, "learning_rate": 7.686869747397843e-06, "loss": 101.1938, "step": 47950 }, { "epoch": 0.3967407039748521, "grad_norm": 811.54052734375, "learning_rate": 7.685680215800639e-06, "loss": 98.3037, "step": 47960 }, { "epoch": 0.39682342722422137, "grad_norm": 880.4942626953125, "learning_rate": 7.684490470516185e-06, "loss": 105.742, "step": 47970 }, { "epoch": 0.3969061504735906, "grad_norm": 3485.52685546875, "learning_rate": 7.683300511639149e-06, "loss": 121.3876, "step": 47980 }, { "epoch": 0.3969888737229598, "grad_norm": 794.6721801757812, "learning_rate": 7.682110339264203e-06, "loss": 99.216, "step": 47990 }, { "epoch": 0.3970715969723291, "grad_norm": 1388.62255859375, "learning_rate": 7.680919953486047e-06, "loss": 114.4334, "step": 48000 }, { "epoch": 0.39715432022169833, "grad_norm": 617.267578125, "learning_rate": 7.679729354399395e-06, "loss": 96.2605, "step": 48010 }, { "epoch": 0.39723704347106753, "grad_norm": 577.44189453125, "learning_rate": 7.678538542098974e-06, "loss": 106.764, "step": 48020 }, { "epoch": 0.3973197667204368, "grad_norm": 922.736083984375, "learning_rate": 7.677347516679536e-06, "loss": 94.4455, "step": 48030 }, { "epoch": 0.39740248996980604, "grad_norm": 1115.3370361328125, "learning_rate": 7.676156278235845e-06, "loss": 124.8899, "step": 48040 }, { "epoch": 0.39748521321917524, "grad_norm": 956.1436157226562, "learning_rate": 7.674964826862679e-06, "loss": 114.437, "step": 48050 }, { "epoch": 0.3975679364685445, "grad_norm": 1833.231201171875, "learning_rate": 7.673773162654836e-06, "loss": 127.1646, "step": 48060 }, { "epoch": 0.39765065971791375, "grad_norm": 424.2684631347656, "learning_rate": 7.672581285707135e-06, "loss": 88.6944, "step": 48070 }, { "epoch": 0.39773338296728294, "grad_norm": 1856.982421875, "learning_rate": 7.67138919611441e-06, "loss": 154.6424, "step": 48080 }, { "epoch": 0.3978161062166522, "grad_norm": 907.585205078125, "learning_rate": 7.670196893971502e-06, "loss": 121.6254, "step": 48090 }, { "epoch": 0.3978988294660214, "grad_norm": 1184.2100830078125, "learning_rate": 7.669004379373284e-06, "loss": 86.6673, "step": 48100 }, { "epoch": 0.39798155271539065, "grad_norm": 1260.71875, "learning_rate": 7.667811652414637e-06, "loss": 87.2874, "step": 48110 }, { "epoch": 0.3980642759647599, "grad_norm": 986.4711303710938, "learning_rate": 7.666618713190459e-06, "loss": 98.885, "step": 48120 }, { "epoch": 0.3981469992141291, "grad_norm": 1163.977783203125, "learning_rate": 7.665425561795669e-06, "loss": 86.0785, "step": 48130 }, { "epoch": 0.39822972246349836, "grad_norm": 851.3588256835938, "learning_rate": 7.664232198325198e-06, "loss": 88.372, "step": 48140 }, { "epoch": 0.3983124457128676, "grad_norm": 786.80419921875, "learning_rate": 7.663038622873999e-06, "loss": 83.9941, "step": 48150 }, { "epoch": 0.3983951689622368, "grad_norm": 1034.7119140625, "learning_rate": 7.66184483553704e-06, "loss": 91.0622, "step": 48160 }, { "epoch": 0.39847789221160607, "grad_norm": 1714.0181884765625, "learning_rate": 7.660650836409302e-06, "loss": 88.6052, "step": 48170 }, { "epoch": 0.3985606154609753, "grad_norm": 1282.2288818359375, "learning_rate": 7.65945662558579e-06, "loss": 102.4522, "step": 48180 }, { "epoch": 0.3986433387103445, "grad_norm": 1070.42626953125, "learning_rate": 7.658262203161517e-06, "loss": 120.5579, "step": 48190 }, { "epoch": 0.3987260619597138, "grad_norm": 1412.6171875, "learning_rate": 7.65706756923152e-06, "loss": 88.6738, "step": 48200 }, { "epoch": 0.398808785209083, "grad_norm": 745.8485107421875, "learning_rate": 7.655872723890854e-06, "loss": 116.865, "step": 48210 }, { "epoch": 0.3988915084584522, "grad_norm": 938.5888671875, "learning_rate": 7.654677667234582e-06, "loss": 112.7861, "step": 48220 }, { "epoch": 0.3989742317078215, "grad_norm": 692.1904296875, "learning_rate": 7.65348239935779e-06, "loss": 93.3395, "step": 48230 }, { "epoch": 0.39905695495719073, "grad_norm": 892.9244384765625, "learning_rate": 7.652286920355583e-06, "loss": 123.322, "step": 48240 }, { "epoch": 0.39913967820655993, "grad_norm": 353.6568603515625, "learning_rate": 7.651091230323079e-06, "loss": 75.8525, "step": 48250 }, { "epoch": 0.3992224014559292, "grad_norm": 1054.718017578125, "learning_rate": 7.649895329355411e-06, "loss": 144.2353, "step": 48260 }, { "epoch": 0.39930512470529844, "grad_norm": 854.1619873046875, "learning_rate": 7.648699217547733e-06, "loss": 89.6152, "step": 48270 }, { "epoch": 0.39938784795466764, "grad_norm": 1015.1759033203125, "learning_rate": 7.647502894995215e-06, "loss": 97.5599, "step": 48280 }, { "epoch": 0.3994705712040369, "grad_norm": 691.9119873046875, "learning_rate": 7.646306361793042e-06, "loss": 95.9693, "step": 48290 }, { "epoch": 0.39955329445340615, "grad_norm": 545.0184936523438, "learning_rate": 7.645109618036416e-06, "loss": 89.2513, "step": 48300 }, { "epoch": 0.39963601770277535, "grad_norm": 1180.732666015625, "learning_rate": 7.643912663820559e-06, "loss": 107.0819, "step": 48310 }, { "epoch": 0.3997187409521446, "grad_norm": 634.8629760742188, "learning_rate": 7.642715499240702e-06, "loss": 93.8869, "step": 48320 }, { "epoch": 0.39980146420151386, "grad_norm": 878.6117553710938, "learning_rate": 7.641518124392105e-06, "loss": 117.2257, "step": 48330 }, { "epoch": 0.39988418745088306, "grad_norm": 741.9995727539062, "learning_rate": 7.640320539370032e-06, "loss": 92.1223, "step": 48340 }, { "epoch": 0.3999669107002523, "grad_norm": 693.643310546875, "learning_rate": 7.63912274426977e-06, "loss": 102.7763, "step": 48350 }, { "epoch": 0.40004963394962156, "grad_norm": 747.3237915039062, "learning_rate": 7.637924739186624e-06, "loss": 83.4088, "step": 48360 }, { "epoch": 0.40013235719899076, "grad_norm": 1104.4271240234375, "learning_rate": 7.636726524215913e-06, "loss": 96.2126, "step": 48370 }, { "epoch": 0.40021508044836, "grad_norm": 1066.0506591796875, "learning_rate": 7.635528099452974e-06, "loss": 137.2111, "step": 48380 }, { "epoch": 0.40029780369772927, "grad_norm": 764.3858032226562, "learning_rate": 7.634329464993158e-06, "loss": 99.6301, "step": 48390 }, { "epoch": 0.40038052694709847, "grad_norm": 619.43017578125, "learning_rate": 7.633130620931837e-06, "loss": 95.3814, "step": 48400 }, { "epoch": 0.4004632501964677, "grad_norm": 576.7614135742188, "learning_rate": 7.631931567364398e-06, "loss": 115.1573, "step": 48410 }, { "epoch": 0.400545973445837, "grad_norm": 1004.3753662109375, "learning_rate": 7.630732304386244e-06, "loss": 90.6397, "step": 48420 }, { "epoch": 0.4006286966952062, "grad_norm": 1928.7547607421875, "learning_rate": 7.629532832092792e-06, "loss": 98.6528, "step": 48430 }, { "epoch": 0.40071141994457543, "grad_norm": 1009.5800170898438, "learning_rate": 7.62833315057948e-06, "loss": 105.3359, "step": 48440 }, { "epoch": 0.4007941431939447, "grad_norm": 573.4387817382812, "learning_rate": 7.627133259941762e-06, "loss": 95.6264, "step": 48450 }, { "epoch": 0.4008768664433139, "grad_norm": 1055.6937255859375, "learning_rate": 7.625933160275109e-06, "loss": 98.1826, "step": 48460 }, { "epoch": 0.40095958969268314, "grad_norm": 636.2737426757812, "learning_rate": 7.6247328516750055e-06, "loss": 106.1883, "step": 48470 }, { "epoch": 0.40104231294205234, "grad_norm": 624.5562744140625, "learning_rate": 7.623532334236954e-06, "loss": 115.2045, "step": 48480 }, { "epoch": 0.4011250361914216, "grad_norm": 1036.740234375, "learning_rate": 7.622331608056474e-06, "loss": 100.7731, "step": 48490 }, { "epoch": 0.40120775944079085, "grad_norm": 1890.8753662109375, "learning_rate": 7.621130673229105e-06, "loss": 98.8333, "step": 48500 }, { "epoch": 0.40129048269016004, "grad_norm": 967.46044921875, "learning_rate": 7.619929529850397e-06, "loss": 62.4726, "step": 48510 }, { "epoch": 0.4013732059395293, "grad_norm": 861.5704956054688, "learning_rate": 7.618728178015919e-06, "loss": 94.6595, "step": 48520 }, { "epoch": 0.40145592918889855, "grad_norm": 1017.3840942382812, "learning_rate": 7.617526617821259e-06, "loss": 107.7875, "step": 48530 }, { "epoch": 0.40153865243826775, "grad_norm": 2595.431640625, "learning_rate": 7.616324849362019e-06, "loss": 104.2326, "step": 48540 }, { "epoch": 0.401621375687637, "grad_norm": 767.1851196289062, "learning_rate": 7.615122872733819e-06, "loss": 126.0866, "step": 48550 }, { "epoch": 0.40170409893700626, "grad_norm": 901.1397705078125, "learning_rate": 7.613920688032293e-06, "loss": 120.4315, "step": 48560 }, { "epoch": 0.40178682218637546, "grad_norm": 1408.295166015625, "learning_rate": 7.612718295353094e-06, "loss": 104.612, "step": 48570 }, { "epoch": 0.4018695454357447, "grad_norm": 746.8675537109375, "learning_rate": 7.61151569479189e-06, "loss": 84.4078, "step": 48580 }, { "epoch": 0.40195226868511397, "grad_norm": 925.1431884765625, "learning_rate": 7.610312886444369e-06, "loss": 89.8368, "step": 48590 }, { "epoch": 0.40203499193448317, "grad_norm": 757.10693359375, "learning_rate": 7.60910987040623e-06, "loss": 92.3049, "step": 48600 }, { "epoch": 0.4021177151838524, "grad_norm": 777.216552734375, "learning_rate": 7.607906646773195e-06, "loss": 105.3716, "step": 48610 }, { "epoch": 0.4022004384332217, "grad_norm": 751.5682983398438, "learning_rate": 7.606703215640995e-06, "loss": 92.2909, "step": 48620 }, { "epoch": 0.4022831616825909, "grad_norm": 1239.3206787109375, "learning_rate": 7.605499577105382e-06, "loss": 106.9025, "step": 48630 }, { "epoch": 0.4023658849319601, "grad_norm": 750.9842529296875, "learning_rate": 7.604295731262128e-06, "loss": 115.5005, "step": 48640 }, { "epoch": 0.4024486081813294, "grad_norm": 1378.44677734375, "learning_rate": 7.603091678207013e-06, "loss": 95.4831, "step": 48650 }, { "epoch": 0.4025313314306986, "grad_norm": 883.4937133789062, "learning_rate": 7.60188741803584e-06, "loss": 94.2378, "step": 48660 }, { "epoch": 0.40261405468006783, "grad_norm": 927.4861450195312, "learning_rate": 7.600682950844428e-06, "loss": 97.9098, "step": 48670 }, { "epoch": 0.4026967779294371, "grad_norm": 999.6773071289062, "learning_rate": 7.599478276728607e-06, "loss": 88.1278, "step": 48680 }, { "epoch": 0.4027795011788063, "grad_norm": 981.5151977539062, "learning_rate": 7.5982733957842304e-06, "loss": 96.1045, "step": 48690 }, { "epoch": 0.40286222442817554, "grad_norm": 779.521728515625, "learning_rate": 7.597068308107165e-06, "loss": 96.5194, "step": 48700 }, { "epoch": 0.4029449476775448, "grad_norm": 723.9231567382812, "learning_rate": 7.595863013793292e-06, "loss": 105.2723, "step": 48710 }, { "epoch": 0.403027670926914, "grad_norm": 688.5134887695312, "learning_rate": 7.594657512938513e-06, "loss": 73.8923, "step": 48720 }, { "epoch": 0.40311039417628325, "grad_norm": 1107.6732177734375, "learning_rate": 7.593451805638743e-06, "loss": 84.0132, "step": 48730 }, { "epoch": 0.4031931174256525, "grad_norm": 971.2855834960938, "learning_rate": 7.592245891989914e-06, "loss": 126.4593, "step": 48740 }, { "epoch": 0.4032758406750217, "grad_norm": 906.73388671875, "learning_rate": 7.5910397720879785e-06, "loss": 112.5872, "step": 48750 }, { "epoch": 0.40335856392439096, "grad_norm": 976.5882568359375, "learning_rate": 7.589833446028898e-06, "loss": 113.1635, "step": 48760 }, { "epoch": 0.4034412871737602, "grad_norm": 517.5700073242188, "learning_rate": 7.5886269139086565e-06, "loss": 95.9183, "step": 48770 }, { "epoch": 0.4035240104231294, "grad_norm": 1049.484375, "learning_rate": 7.587420175823252e-06, "loss": 114.5341, "step": 48780 }, { "epoch": 0.40360673367249866, "grad_norm": 1442.658447265625, "learning_rate": 7.586213231868699e-06, "loss": 89.2601, "step": 48790 }, { "epoch": 0.4036894569218679, "grad_norm": 923.6640625, "learning_rate": 7.585006082141028e-06, "loss": 100.5833, "step": 48800 }, { "epoch": 0.4037721801712371, "grad_norm": 882.6138916015625, "learning_rate": 7.583798726736286e-06, "loss": 111.9895, "step": 48810 }, { "epoch": 0.40385490342060637, "grad_norm": 841.87548828125, "learning_rate": 7.5825911657505365e-06, "loss": 110.3644, "step": 48820 }, { "epoch": 0.40393762666997557, "grad_norm": 686.5569458007812, "learning_rate": 7.581383399279863e-06, "loss": 113.7908, "step": 48830 }, { "epoch": 0.4040203499193448, "grad_norm": 1229.239501953125, "learning_rate": 7.580175427420358e-06, "loss": 94.2136, "step": 48840 }, { "epoch": 0.4041030731687141, "grad_norm": 1013.9456176757812, "learning_rate": 7.578967250268137e-06, "loss": 112.4359, "step": 48850 }, { "epoch": 0.4041857964180833, "grad_norm": 843.6847534179688, "learning_rate": 7.577758867919325e-06, "loss": 119.3482, "step": 48860 }, { "epoch": 0.40426851966745253, "grad_norm": 660.8492431640625, "learning_rate": 7.576550280470072e-06, "loss": 105.5132, "step": 48870 }, { "epoch": 0.4043512429168218, "grad_norm": 1110.703125, "learning_rate": 7.5753414880165365e-06, "loss": 117.6248, "step": 48880 }, { "epoch": 0.404433966166191, "grad_norm": 971.7891235351562, "learning_rate": 7.5741324906548996e-06, "loss": 84.8425, "step": 48890 }, { "epoch": 0.40451668941556024, "grad_norm": 674.7775268554688, "learning_rate": 7.572923288481355e-06, "loss": 93.9152, "step": 48900 }, { "epoch": 0.4045994126649295, "grad_norm": 1152.0150146484375, "learning_rate": 7.571713881592109e-06, "loss": 97.321, "step": 48910 }, { "epoch": 0.4046821359142987, "grad_norm": 813.1100463867188, "learning_rate": 7.570504270083394e-06, "loss": 107.887, "step": 48920 }, { "epoch": 0.40476485916366794, "grad_norm": 860.7267456054688, "learning_rate": 7.569294454051452e-06, "loss": 93.7367, "step": 48930 }, { "epoch": 0.4048475824130372, "grad_norm": 641.8156127929688, "learning_rate": 7.568084433592542e-06, "loss": 103.0248, "step": 48940 }, { "epoch": 0.4049303056624064, "grad_norm": 1134.04736328125, "learning_rate": 7.566874208802939e-06, "loss": 101.0284, "step": 48950 }, { "epoch": 0.40501302891177565, "grad_norm": 596.593017578125, "learning_rate": 7.5656637797789335e-06, "loss": 80.7004, "step": 48960 }, { "epoch": 0.4050957521611449, "grad_norm": 791.4570922851562, "learning_rate": 7.564453146616837e-06, "loss": 109.8588, "step": 48970 }, { "epoch": 0.4051784754105141, "grad_norm": 681.3819580078125, "learning_rate": 7.563242309412975e-06, "loss": 105.6946, "step": 48980 }, { "epoch": 0.40526119865988336, "grad_norm": 881.9998168945312, "learning_rate": 7.562031268263686e-06, "loss": 104.3682, "step": 48990 }, { "epoch": 0.4053439219092526, "grad_norm": 985.5377807617188, "learning_rate": 7.5608200232653254e-06, "loss": 146.3788, "step": 49000 }, { "epoch": 0.4054266451586218, "grad_norm": 547.89697265625, "learning_rate": 7.5596085745142654e-06, "loss": 126.0642, "step": 49010 }, { "epoch": 0.40550936840799107, "grad_norm": 1244.3304443359375, "learning_rate": 7.558396922106903e-06, "loss": 105.9497, "step": 49020 }, { "epoch": 0.4055920916573603, "grad_norm": 1159.8284912109375, "learning_rate": 7.557185066139638e-06, "loss": 123.0697, "step": 49030 }, { "epoch": 0.4056748149067295, "grad_norm": 1442.5506591796875, "learning_rate": 7.555973006708892e-06, "loss": 124.4241, "step": 49040 }, { "epoch": 0.4057575381560988, "grad_norm": 1344.17236328125, "learning_rate": 7.554760743911104e-06, "loss": 98.3027, "step": 49050 }, { "epoch": 0.40584026140546803, "grad_norm": 733.8927001953125, "learning_rate": 7.553548277842729e-06, "loss": 90.8391, "step": 49060 }, { "epoch": 0.4059229846548372, "grad_norm": 1737.365966796875, "learning_rate": 7.5523356086002364e-06, "loss": 128.2919, "step": 49070 }, { "epoch": 0.4060057079042065, "grad_norm": 1121.77783203125, "learning_rate": 7.551122736280113e-06, "loss": 114.6872, "step": 49080 }, { "epoch": 0.40608843115357574, "grad_norm": 830.7010498046875, "learning_rate": 7.549909660978863e-06, "loss": 103.0037, "step": 49090 }, { "epoch": 0.40617115440294493, "grad_norm": 950.9368896484375, "learning_rate": 7.548696382793002e-06, "loss": 76.3594, "step": 49100 }, { "epoch": 0.4062538776523142, "grad_norm": 721.6538696289062, "learning_rate": 7.547482901819066e-06, "loss": 101.0682, "step": 49110 }, { "epoch": 0.40633660090168344, "grad_norm": 1179.335693359375, "learning_rate": 7.5462692181536094e-06, "loss": 113.5078, "step": 49120 }, { "epoch": 0.40641932415105264, "grad_norm": 1521.753173828125, "learning_rate": 7.545055331893195e-06, "loss": 127.0566, "step": 49130 }, { "epoch": 0.4065020474004219, "grad_norm": 718.9688720703125, "learning_rate": 7.543841243134409e-06, "loss": 77.3431, "step": 49140 }, { "epoch": 0.40658477064979115, "grad_norm": 1070.0787353515625, "learning_rate": 7.5426269519738495e-06, "loss": 108.8352, "step": 49150 }, { "epoch": 0.40666749389916035, "grad_norm": 514.2880859375, "learning_rate": 7.541412458508133e-06, "loss": 104.5422, "step": 49160 }, { "epoch": 0.4067502171485296, "grad_norm": 774.6307983398438, "learning_rate": 7.54019776283389e-06, "loss": 100.5812, "step": 49170 }, { "epoch": 0.40683294039789886, "grad_norm": 600.9510498046875, "learning_rate": 7.53898286504777e-06, "loss": 75.3506, "step": 49180 }, { "epoch": 0.40691566364726806, "grad_norm": 843.1857299804688, "learning_rate": 7.537767765246436e-06, "loss": 110.9489, "step": 49190 }, { "epoch": 0.4069983868966373, "grad_norm": 957.2750854492188, "learning_rate": 7.536552463526565e-06, "loss": 86.3647, "step": 49200 }, { "epoch": 0.4070811101460065, "grad_norm": 1061.4150390625, "learning_rate": 7.535336959984858e-06, "loss": 127.3531, "step": 49210 }, { "epoch": 0.40716383339537576, "grad_norm": 2062.2001953125, "learning_rate": 7.5341212547180246e-06, "loss": 111.2139, "step": 49220 }, { "epoch": 0.407246556644745, "grad_norm": 768.780517578125, "learning_rate": 7.532905347822792e-06, "loss": 116.6353, "step": 49230 }, { "epoch": 0.4073292798941142, "grad_norm": 780.7527465820312, "learning_rate": 7.5316892393959064e-06, "loss": 91.1807, "step": 49240 }, { "epoch": 0.40741200314348347, "grad_norm": 487.50665283203125, "learning_rate": 7.530472929534126e-06, "loss": 110.9754, "step": 49250 }, { "epoch": 0.4074947263928527, "grad_norm": 578.7011108398438, "learning_rate": 7.529256418334228e-06, "loss": 131.2194, "step": 49260 }, { "epoch": 0.4075774496422219, "grad_norm": 818.4442749023438, "learning_rate": 7.528039705893006e-06, "loss": 98.4813, "step": 49270 }, { "epoch": 0.4076601728915912, "grad_norm": 555.372314453125, "learning_rate": 7.5268227923072665e-06, "loss": 100.5567, "step": 49280 }, { "epoch": 0.40774289614096043, "grad_norm": 1478.378173828125, "learning_rate": 7.525605677673831e-06, "loss": 94.0006, "step": 49290 }, { "epoch": 0.40782561939032963, "grad_norm": 1054.407958984375, "learning_rate": 7.524388362089545e-06, "loss": 110.3638, "step": 49300 }, { "epoch": 0.4079083426396989, "grad_norm": 936.5081176757812, "learning_rate": 7.523170845651263e-06, "loss": 108.1984, "step": 49310 }, { "epoch": 0.40799106588906814, "grad_norm": 714.961181640625, "learning_rate": 7.521953128455856e-06, "loss": 109.1958, "step": 49320 }, { "epoch": 0.40807378913843734, "grad_norm": 850.507080078125, "learning_rate": 7.520735210600213e-06, "loss": 117.9377, "step": 49330 }, { "epoch": 0.4081565123878066, "grad_norm": 815.2796020507812, "learning_rate": 7.519517092181237e-06, "loss": 128.2985, "step": 49340 }, { "epoch": 0.40823923563717585, "grad_norm": 1209.8961181640625, "learning_rate": 7.518298773295849e-06, "loss": 108.5808, "step": 49350 }, { "epoch": 0.40832195888654504, "grad_norm": 1075.5911865234375, "learning_rate": 7.517080254040985e-06, "loss": 87.4483, "step": 49360 }, { "epoch": 0.4084046821359143, "grad_norm": 952.4523315429688, "learning_rate": 7.5158615345136e-06, "loss": 85.5586, "step": 49370 }, { "epoch": 0.40848740538528355, "grad_norm": 688.7626342773438, "learning_rate": 7.514642614810655e-06, "loss": 113.6158, "step": 49380 }, { "epoch": 0.40857012863465275, "grad_norm": 754.0592651367188, "learning_rate": 7.51342349502914e-06, "loss": 95.794, "step": 49390 }, { "epoch": 0.408652851884022, "grad_norm": 1221.88818359375, "learning_rate": 7.512204175266052e-06, "loss": 99.691, "step": 49400 }, { "epoch": 0.40873557513339126, "grad_norm": 781.3279418945312, "learning_rate": 7.510984655618407e-06, "loss": 105.6204, "step": 49410 }, { "epoch": 0.40881829838276046, "grad_norm": 593.7680053710938, "learning_rate": 7.509764936183237e-06, "loss": 82.4356, "step": 49420 }, { "epoch": 0.4089010216321297, "grad_norm": 2055.203369140625, "learning_rate": 7.5085450170575876e-06, "loss": 120.1463, "step": 49430 }, { "epoch": 0.40898374488149897, "grad_norm": 1296.7412109375, "learning_rate": 7.5073248983385265e-06, "loss": 130.0576, "step": 49440 }, { "epoch": 0.40906646813086817, "grad_norm": 754.3132934570312, "learning_rate": 7.50610458012313e-06, "loss": 88.8123, "step": 49450 }, { "epoch": 0.4091491913802374, "grad_norm": 827.0701904296875, "learning_rate": 7.504884062508493e-06, "loss": 85.6176, "step": 49460 }, { "epoch": 0.4092319146296067, "grad_norm": 533.4575805664062, "learning_rate": 7.503663345591726e-06, "loss": 99.3742, "step": 49470 }, { "epoch": 0.4093146378789759, "grad_norm": 914.8536376953125, "learning_rate": 7.502442429469956e-06, "loss": 123.0487, "step": 49480 }, { "epoch": 0.4093973611283451, "grad_norm": 1090.47998046875, "learning_rate": 7.501221314240329e-06, "loss": 110.9421, "step": 49490 }, { "epoch": 0.4094800843777144, "grad_norm": 969.5048828125, "learning_rate": 7.500000000000001e-06, "loss": 132.2819, "step": 49500 }, { "epoch": 0.4095628076270836, "grad_norm": 1003.5017700195312, "learning_rate": 7.4987784868461455e-06, "loss": 125.4059, "step": 49510 }, { "epoch": 0.40964553087645283, "grad_norm": 1233.950927734375, "learning_rate": 7.497556774875953e-06, "loss": 100.2119, "step": 49520 }, { "epoch": 0.4097282541258221, "grad_norm": 638.7094116210938, "learning_rate": 7.496334864186632e-06, "loss": 133.3458, "step": 49530 }, { "epoch": 0.4098109773751913, "grad_norm": 448.5832214355469, "learning_rate": 7.4951127548754025e-06, "loss": 95.0754, "step": 49540 }, { "epoch": 0.40989370062456054, "grad_norm": 1257.3818359375, "learning_rate": 7.4938904470395e-06, "loss": 91.5809, "step": 49550 }, { "epoch": 0.40997642387392974, "grad_norm": 1006.02294921875, "learning_rate": 7.492667940776182e-06, "loss": 102.9834, "step": 49560 }, { "epoch": 0.410059147123299, "grad_norm": 1712.72607421875, "learning_rate": 7.491445236182715e-06, "loss": 103.0404, "step": 49570 }, { "epoch": 0.41014187037266825, "grad_norm": 799.1417236328125, "learning_rate": 7.490222333356384e-06, "loss": 106.2483, "step": 49580 }, { "epoch": 0.41022459362203745, "grad_norm": 1142.0809326171875, "learning_rate": 7.488999232394492e-06, "loss": 151.4166, "step": 49590 }, { "epoch": 0.4103073168714067, "grad_norm": 1388.60693359375, "learning_rate": 7.487775933394353e-06, "loss": 103.9434, "step": 49600 }, { "epoch": 0.41039004012077596, "grad_norm": 835.6099853515625, "learning_rate": 7.4865524364533e-06, "loss": 104.3207, "step": 49610 }, { "epoch": 0.41047276337014516, "grad_norm": 653.7705688476562, "learning_rate": 7.485328741668683e-06, "loss": 83.743, "step": 49620 }, { "epoch": 0.4105554866195144, "grad_norm": 870.46484375, "learning_rate": 7.484104849137862e-06, "loss": 108.1679, "step": 49630 }, { "epoch": 0.41063820986888366, "grad_norm": 839.9007568359375, "learning_rate": 7.482880758958219e-06, "loss": 83.2425, "step": 49640 }, { "epoch": 0.41072093311825286, "grad_norm": 931.4784545898438, "learning_rate": 7.48165647122715e-06, "loss": 114.039, "step": 49650 }, { "epoch": 0.4108036563676221, "grad_norm": 776.007080078125, "learning_rate": 7.480431986042065e-06, "loss": 138.1686, "step": 49660 }, { "epoch": 0.41088637961699137, "grad_norm": 890.8333129882812, "learning_rate": 7.47920730350039e-06, "loss": 129.4647, "step": 49670 }, { "epoch": 0.41096910286636057, "grad_norm": 1296.5220947265625, "learning_rate": 7.477982423699568e-06, "loss": 120.0857, "step": 49680 }, { "epoch": 0.4110518261157298, "grad_norm": 1131.2828369140625, "learning_rate": 7.476757346737057e-06, "loss": 112.3677, "step": 49690 }, { "epoch": 0.4111345493650991, "grad_norm": 1283.157470703125, "learning_rate": 7.47553207271033e-06, "loss": 114.4162, "step": 49700 }, { "epoch": 0.4112172726144683, "grad_norm": 309.3968200683594, "learning_rate": 7.474306601716877e-06, "loss": 89.349, "step": 49710 }, { "epoch": 0.41129999586383753, "grad_norm": 961.7044677734375, "learning_rate": 7.473080933854205e-06, "loss": 81.6981, "step": 49720 }, { "epoch": 0.4113827191132068, "grad_norm": 427.75469970703125, "learning_rate": 7.471855069219831e-06, "loss": 111.079, "step": 49730 }, { "epoch": 0.411465442362576, "grad_norm": 973.0006103515625, "learning_rate": 7.470629007911294e-06, "loss": 99.5809, "step": 49740 }, { "epoch": 0.41154816561194524, "grad_norm": 1471.537109375, "learning_rate": 7.469402750026147e-06, "loss": 130.2198, "step": 49750 }, { "epoch": 0.4116308888613145, "grad_norm": 765.6078491210938, "learning_rate": 7.468176295661955e-06, "loss": 124.7598, "step": 49760 }, { "epoch": 0.4117136121106837, "grad_norm": 918.6041259765625, "learning_rate": 7.466949644916301e-06, "loss": 103.558, "step": 49770 }, { "epoch": 0.41179633536005295, "grad_norm": 933.6361694335938, "learning_rate": 7.465722797886788e-06, "loss": 94.5863, "step": 49780 }, { "epoch": 0.4118790586094222, "grad_norm": 623.9163818359375, "learning_rate": 7.464495754671027e-06, "loss": 86.9486, "step": 49790 }, { "epoch": 0.4119617818587914, "grad_norm": 592.259033203125, "learning_rate": 7.4632685153666505e-06, "loss": 111.5722, "step": 49800 }, { "epoch": 0.41204450510816065, "grad_norm": 430.714599609375, "learning_rate": 7.462041080071301e-06, "loss": 81.418, "step": 49810 }, { "epoch": 0.4121272283575299, "grad_norm": 1098.07373046875, "learning_rate": 7.460813448882643e-06, "loss": 145.5894, "step": 49820 }, { "epoch": 0.4122099516068991, "grad_norm": 630.3162841796875, "learning_rate": 7.459585621898353e-06, "loss": 88.393, "step": 49830 }, { "epoch": 0.41229267485626836, "grad_norm": 783.9848022460938, "learning_rate": 7.4583575992161235e-06, "loss": 73.975, "step": 49840 }, { "epoch": 0.4123753981056376, "grad_norm": 645.27392578125, "learning_rate": 7.457129380933662e-06, "loss": 108.0057, "step": 49850 }, { "epoch": 0.4124581213550068, "grad_norm": 522.0549926757812, "learning_rate": 7.4559009671486906e-06, "loss": 65.7118, "step": 49860 }, { "epoch": 0.41254084460437607, "grad_norm": 706.9224853515625, "learning_rate": 7.454672357958951e-06, "loss": 127.6793, "step": 49870 }, { "epoch": 0.4126235678537453, "grad_norm": 1543.4708251953125, "learning_rate": 7.453443553462198e-06, "loss": 138.2186, "step": 49880 }, { "epoch": 0.4127062911031145, "grad_norm": 1025.1986083984375, "learning_rate": 7.4522145537562015e-06, "loss": 117.5677, "step": 49890 }, { "epoch": 0.4127890143524838, "grad_norm": 611.4989013671875, "learning_rate": 7.450985358938747e-06, "loss": 106.0235, "step": 49900 }, { "epoch": 0.41287173760185303, "grad_norm": 1154.43701171875, "learning_rate": 7.449755969107635e-06, "loss": 113.3347, "step": 49910 }, { "epoch": 0.4129544608512222, "grad_norm": 1076.0106201171875, "learning_rate": 7.4485263843606835e-06, "loss": 104.9749, "step": 49920 }, { "epoch": 0.4130371841005915, "grad_norm": 1215.836669921875, "learning_rate": 7.447296604795726e-06, "loss": 119.1512, "step": 49930 }, { "epoch": 0.4131199073499607, "grad_norm": 1207.3167724609375, "learning_rate": 7.4460666305106084e-06, "loss": 101.0483, "step": 49940 }, { "epoch": 0.41320263059932993, "grad_norm": 1147.7183837890625, "learning_rate": 7.444836461603195e-06, "loss": 101.2153, "step": 49950 }, { "epoch": 0.4132853538486992, "grad_norm": 784.3466796875, "learning_rate": 7.443606098171363e-06, "loss": 86.7002, "step": 49960 }, { "epoch": 0.4133680770980684, "grad_norm": 452.4320068359375, "learning_rate": 7.442375540313012e-06, "loss": 94.4948, "step": 49970 }, { "epoch": 0.41345080034743764, "grad_norm": 911.47021484375, "learning_rate": 7.441144788126045e-06, "loss": 110.6758, "step": 49980 }, { "epoch": 0.4135335235968069, "grad_norm": 1147.6248779296875, "learning_rate": 7.4399138417083925e-06, "loss": 95.634, "step": 49990 }, { "epoch": 0.4136162468461761, "grad_norm": 1253.6624755859375, "learning_rate": 7.438682701157993e-06, "loss": 141.2699, "step": 50000 }, { "epoch": 0.41369897009554535, "grad_norm": 1440.07080078125, "learning_rate": 7.437451366572803e-06, "loss": 109.6948, "step": 50010 }, { "epoch": 0.4137816933449146, "grad_norm": 833.8212280273438, "learning_rate": 7.436219838050793e-06, "loss": 106.7741, "step": 50020 }, { "epoch": 0.4138644165942838, "grad_norm": 999.9931640625, "learning_rate": 7.4349881156899525e-06, "loss": 110.1778, "step": 50030 }, { "epoch": 0.41394713984365306, "grad_norm": 1517.33251953125, "learning_rate": 7.433756199588282e-06, "loss": 106.7463, "step": 50040 }, { "epoch": 0.4140298630930223, "grad_norm": 749.0157470703125, "learning_rate": 7.4325240898438e-06, "loss": 86.2277, "step": 50050 }, { "epoch": 0.4141125863423915, "grad_norm": 1004.859619140625, "learning_rate": 7.4312917865545406e-06, "loss": 101.3041, "step": 50060 }, { "epoch": 0.41419530959176076, "grad_norm": 2136.435546875, "learning_rate": 7.430059289818552e-06, "loss": 109.2253, "step": 50070 }, { "epoch": 0.41427803284113, "grad_norm": 1305.9910888671875, "learning_rate": 7.4288265997338985e-06, "loss": 116.3073, "step": 50080 }, { "epoch": 0.4143607560904992, "grad_norm": 1031.19921875, "learning_rate": 7.427593716398658e-06, "loss": 136.2479, "step": 50090 }, { "epoch": 0.41444347933986847, "grad_norm": 539.9341430664062, "learning_rate": 7.426360639910927e-06, "loss": 86.8462, "step": 50100 }, { "epoch": 0.4145262025892377, "grad_norm": 527.5219116210938, "learning_rate": 7.425127370368815e-06, "loss": 104.5311, "step": 50110 }, { "epoch": 0.4146089258386069, "grad_norm": 836.485107421875, "learning_rate": 7.423893907870449e-06, "loss": 125.8744, "step": 50120 }, { "epoch": 0.4146916490879762, "grad_norm": 514.3257446289062, "learning_rate": 7.422660252513969e-06, "loss": 100.2241, "step": 50130 }, { "epoch": 0.41477437233734543, "grad_norm": 773.184814453125, "learning_rate": 7.421426404397531e-06, "loss": 82.5671, "step": 50140 }, { "epoch": 0.41485709558671463, "grad_norm": 1118.6546630859375, "learning_rate": 7.420192363619305e-06, "loss": 116.5798, "step": 50150 }, { "epoch": 0.4149398188360839, "grad_norm": 1059.586669921875, "learning_rate": 7.418958130277483e-06, "loss": 94.7955, "step": 50160 }, { "epoch": 0.41502254208545314, "grad_norm": 468.1589660644531, "learning_rate": 7.417723704470261e-06, "loss": 92.9759, "step": 50170 }, { "epoch": 0.41510526533482234, "grad_norm": 2084.325927734375, "learning_rate": 7.4164890862958615e-06, "loss": 128.8667, "step": 50180 }, { "epoch": 0.4151879885841916, "grad_norm": 834.3111572265625, "learning_rate": 7.415254275852515e-06, "loss": 103.7669, "step": 50190 }, { "epoch": 0.41527071183356085, "grad_norm": 869.2041625976562, "learning_rate": 7.414019273238471e-06, "loss": 88.4897, "step": 50200 }, { "epoch": 0.41535343508293004, "grad_norm": 1044.08056640625, "learning_rate": 7.4127840785519915e-06, "loss": 98.155, "step": 50210 }, { "epoch": 0.4154361583322993, "grad_norm": 967.443603515625, "learning_rate": 7.411548691891357e-06, "loss": 115.068, "step": 50220 }, { "epoch": 0.41551888158166855, "grad_norm": 534.2567138671875, "learning_rate": 7.41031311335486e-06, "loss": 106.5501, "step": 50230 }, { "epoch": 0.41560160483103775, "grad_norm": 1074.7396240234375, "learning_rate": 7.409077343040809e-06, "loss": 153.6957, "step": 50240 }, { "epoch": 0.415684328080407, "grad_norm": 657.9391479492188, "learning_rate": 7.407841381047533e-06, "loss": 99.6328, "step": 50250 }, { "epoch": 0.41576705132977626, "grad_norm": 750.8782958984375, "learning_rate": 7.406605227473367e-06, "loss": 88.5056, "step": 50260 }, { "epoch": 0.41584977457914546, "grad_norm": 912.8604125976562, "learning_rate": 7.405368882416668e-06, "loss": 127.2815, "step": 50270 }, { "epoch": 0.4159324978285147, "grad_norm": 722.0275268554688, "learning_rate": 7.404132345975806e-06, "loss": 89.6634, "step": 50280 }, { "epoch": 0.4160152210778839, "grad_norm": 503.6108093261719, "learning_rate": 7.4028956182491665e-06, "loss": 65.2768, "step": 50290 }, { "epoch": 0.41609794432725317, "grad_norm": 1528.93212890625, "learning_rate": 7.401658699335151e-06, "loss": 110.4871, "step": 50300 }, { "epoch": 0.4161806675766224, "grad_norm": 742.7732543945312, "learning_rate": 7.400421589332175e-06, "loss": 85.2059, "step": 50310 }, { "epoch": 0.4162633908259916, "grad_norm": 1395.8526611328125, "learning_rate": 7.39918428833867e-06, "loss": 94.5206, "step": 50320 }, { "epoch": 0.4163461140753609, "grad_norm": 978.8078002929688, "learning_rate": 7.397946796453081e-06, "loss": 122.651, "step": 50330 }, { "epoch": 0.41642883732473013, "grad_norm": 738.7698974609375, "learning_rate": 7.39670911377387e-06, "loss": 128.4523, "step": 50340 }, { "epoch": 0.4165115605740993, "grad_norm": 924.0703735351562, "learning_rate": 7.395471240399515e-06, "loss": 100.5796, "step": 50350 }, { "epoch": 0.4165942838234686, "grad_norm": 1406.5579833984375, "learning_rate": 7.394233176428508e-06, "loss": 84.9948, "step": 50360 }, { "epoch": 0.41667700707283784, "grad_norm": 692.1464233398438, "learning_rate": 7.3929949219593545e-06, "loss": 108.0571, "step": 50370 }, { "epoch": 0.41675973032220703, "grad_norm": 935.2982177734375, "learning_rate": 7.391756477090577e-06, "loss": 106.4006, "step": 50380 }, { "epoch": 0.4168424535715763, "grad_norm": 803.556396484375, "learning_rate": 7.3905178419207126e-06, "loss": 91.8345, "step": 50390 }, { "epoch": 0.41692517682094554, "grad_norm": 170.64480590820312, "learning_rate": 7.3892790165483164e-06, "loss": 86.5613, "step": 50400 }, { "epoch": 0.41700790007031474, "grad_norm": 846.9085693359375, "learning_rate": 7.388040001071953e-06, "loss": 116.5208, "step": 50410 }, { "epoch": 0.417090623319684, "grad_norm": 488.96343994140625, "learning_rate": 7.386800795590208e-06, "loss": 84.3048, "step": 50420 }, { "epoch": 0.41717334656905325, "grad_norm": 2128.975830078125, "learning_rate": 7.385561400201675e-06, "loss": 101.9137, "step": 50430 }, { "epoch": 0.41725606981842245, "grad_norm": 886.4940795898438, "learning_rate": 7.384321815004971e-06, "loss": 122.7343, "step": 50440 }, { "epoch": 0.4173387930677917, "grad_norm": 1239.8963623046875, "learning_rate": 7.383082040098723e-06, "loss": 118.5375, "step": 50450 }, { "epoch": 0.41742151631716096, "grad_norm": 598.2474365234375, "learning_rate": 7.381842075581573e-06, "loss": 82.4002, "step": 50460 }, { "epoch": 0.41750423956653016, "grad_norm": 927.5972900390625, "learning_rate": 7.380601921552181e-06, "loss": 96.2848, "step": 50470 }, { "epoch": 0.4175869628158994, "grad_norm": 1360.900146484375, "learning_rate": 7.379361578109218e-06, "loss": 99.3107, "step": 50480 }, { "epoch": 0.41766968606526866, "grad_norm": 637.6759643554688, "learning_rate": 7.378121045351378e-06, "loss": 115.3234, "step": 50490 }, { "epoch": 0.41775240931463786, "grad_norm": 1148.364990234375, "learning_rate": 7.376880323377357e-06, "loss": 106.6024, "step": 50500 }, { "epoch": 0.4178351325640071, "grad_norm": 567.2054443359375, "learning_rate": 7.375639412285877e-06, "loss": 94.4765, "step": 50510 }, { "epoch": 0.41791785581337637, "grad_norm": 742.42236328125, "learning_rate": 7.374398312175674e-06, "loss": 103.2163, "step": 50520 }, { "epoch": 0.41800057906274557, "grad_norm": 698.6530151367188, "learning_rate": 7.373157023145493e-06, "loss": 86.0616, "step": 50530 }, { "epoch": 0.4180833023121148, "grad_norm": 1061.374755859375, "learning_rate": 7.371915545294098e-06, "loss": 74.492, "step": 50540 }, { "epoch": 0.4181660255614841, "grad_norm": 748.8624877929688, "learning_rate": 7.37067387872027e-06, "loss": 75.0185, "step": 50550 }, { "epoch": 0.4182487488108533, "grad_norm": 315.02374267578125, "learning_rate": 7.369432023522801e-06, "loss": 77.1597, "step": 50560 }, { "epoch": 0.41833147206022253, "grad_norm": 981.3681030273438, "learning_rate": 7.3681899798005006e-06, "loss": 118.4615, "step": 50570 }, { "epoch": 0.4184141953095918, "grad_norm": 1849.541259765625, "learning_rate": 7.366947747652191e-06, "loss": 104.9723, "step": 50580 }, { "epoch": 0.418496918558961, "grad_norm": 1334.4669189453125, "learning_rate": 7.365705327176713e-06, "loss": 100.1431, "step": 50590 }, { "epoch": 0.41857964180833024, "grad_norm": 1951.8896484375, "learning_rate": 7.364462718472919e-06, "loss": 142.2957, "step": 50600 }, { "epoch": 0.4186623650576995, "grad_norm": 659.5794067382812, "learning_rate": 7.363219921639677e-06, "loss": 96.1194, "step": 50610 }, { "epoch": 0.4187450883070687, "grad_norm": 960.1915893554688, "learning_rate": 7.361976936775872e-06, "loss": 117.1287, "step": 50620 }, { "epoch": 0.41882781155643795, "grad_norm": 940.3717651367188, "learning_rate": 7.360733763980404e-06, "loss": 110.9005, "step": 50630 }, { "epoch": 0.4189105348058072, "grad_norm": 741.2780151367188, "learning_rate": 7.3594904033521815e-06, "loss": 86.0149, "step": 50640 }, { "epoch": 0.4189932580551764, "grad_norm": 1676.6136474609375, "learning_rate": 7.358246854990138e-06, "loss": 119.8522, "step": 50650 }, { "epoch": 0.41907598130454565, "grad_norm": 1362.4383544921875, "learning_rate": 7.357003118993215e-06, "loss": 125.9308, "step": 50660 }, { "epoch": 0.41915870455391485, "grad_norm": 1395.822998046875, "learning_rate": 7.355759195460371e-06, "loss": 125.4457, "step": 50670 }, { "epoch": 0.4192414278032841, "grad_norm": 754.3294067382812, "learning_rate": 7.354515084490579e-06, "loss": 105.6307, "step": 50680 }, { "epoch": 0.41932415105265336, "grad_norm": 1079.152587890625, "learning_rate": 7.353270786182828e-06, "loss": 95.9782, "step": 50690 }, { "epoch": 0.41940687430202256, "grad_norm": 713.3341674804688, "learning_rate": 7.352026300636121e-06, "loss": 104.3826, "step": 50700 }, { "epoch": 0.4194895975513918, "grad_norm": 879.9957275390625, "learning_rate": 7.350781627949475e-06, "loss": 86.2512, "step": 50710 }, { "epoch": 0.41957232080076107, "grad_norm": 1013.7462158203125, "learning_rate": 7.3495367682219236e-06, "loss": 110.0644, "step": 50720 }, { "epoch": 0.41965504405013027, "grad_norm": 948.6749877929688, "learning_rate": 7.348291721552514e-06, "loss": 145.048, "step": 50730 }, { "epoch": 0.4197377672994995, "grad_norm": 2777.333251953125, "learning_rate": 7.3470464880403105e-06, "loss": 116.8931, "step": 50740 }, { "epoch": 0.4198204905488688, "grad_norm": 1682.1556396484375, "learning_rate": 7.345801067784388e-06, "loss": 94.5308, "step": 50750 }, { "epoch": 0.419903213798238, "grad_norm": 988.3783569335938, "learning_rate": 7.34455546088384e-06, "loss": 100.0454, "step": 50760 }, { "epoch": 0.4199859370476072, "grad_norm": 721.0875244140625, "learning_rate": 7.343309667437775e-06, "loss": 116.8833, "step": 50770 }, { "epoch": 0.4200686602969765, "grad_norm": 832.2625122070312, "learning_rate": 7.3420636875453135e-06, "loss": 77.9705, "step": 50780 }, { "epoch": 0.4201513835463457, "grad_norm": 995.8257446289062, "learning_rate": 7.340817521305595e-06, "loss": 101.0271, "step": 50790 }, { "epoch": 0.42023410679571493, "grad_norm": 935.89404296875, "learning_rate": 7.3395711688177676e-06, "loss": 123.7885, "step": 50800 }, { "epoch": 0.4203168300450842, "grad_norm": 957.3890380859375, "learning_rate": 7.3383246301809985e-06, "loss": 79.4234, "step": 50810 }, { "epoch": 0.4203995532944534, "grad_norm": 1143.3251953125, "learning_rate": 7.337077905494472e-06, "loss": 79.35, "step": 50820 }, { "epoch": 0.42048227654382264, "grad_norm": 617.7476806640625, "learning_rate": 7.335830994857382e-06, "loss": 83.5682, "step": 50830 }, { "epoch": 0.4205649997931919, "grad_norm": 923.8682861328125, "learning_rate": 7.334583898368939e-06, "loss": 83.5606, "step": 50840 }, { "epoch": 0.4206477230425611, "grad_norm": 456.18902587890625, "learning_rate": 7.333336616128369e-06, "loss": 110.9043, "step": 50850 }, { "epoch": 0.42073044629193035, "grad_norm": 629.4192504882812, "learning_rate": 7.332089148234913e-06, "loss": 84.0271, "step": 50860 }, { "epoch": 0.4208131695412996, "grad_norm": 777.4642333984375, "learning_rate": 7.330841494787828e-06, "loss": 94.1915, "step": 50870 }, { "epoch": 0.4208958927906688, "grad_norm": 1021.780029296875, "learning_rate": 7.329593655886382e-06, "loss": 86.0737, "step": 50880 }, { "epoch": 0.42097861604003806, "grad_norm": 1032.098876953125, "learning_rate": 7.3283456316298595e-06, "loss": 83.7801, "step": 50890 }, { "epoch": 0.4210613392894073, "grad_norm": 987.5896606445312, "learning_rate": 7.32709742211756e-06, "loss": 86.3313, "step": 50900 }, { "epoch": 0.4211440625387765, "grad_norm": 799.8026733398438, "learning_rate": 7.325849027448799e-06, "loss": 71.7253, "step": 50910 }, { "epoch": 0.42122678578814576, "grad_norm": 675.6878051757812, "learning_rate": 7.324600447722907e-06, "loss": 79.754, "step": 50920 }, { "epoch": 0.421309509037515, "grad_norm": 578.5625, "learning_rate": 7.323351683039224e-06, "loss": 103.9349, "step": 50930 }, { "epoch": 0.4213922322868842, "grad_norm": 1150.8740234375, "learning_rate": 7.32210273349711e-06, "loss": 101.4098, "step": 50940 }, { "epoch": 0.42147495553625347, "grad_norm": 1209.810546875, "learning_rate": 7.32085359919594e-06, "loss": 112.2176, "step": 50950 }, { "epoch": 0.4215576787856227, "grad_norm": 1046.1680908203125, "learning_rate": 7.3196042802350995e-06, "loss": 81.5384, "step": 50960 }, { "epoch": 0.4216404020349919, "grad_norm": 1647.843017578125, "learning_rate": 7.3183547767139916e-06, "loss": 106.6991, "step": 50970 }, { "epoch": 0.4217231252843612, "grad_norm": 1031.4130859375, "learning_rate": 7.317105088732035e-06, "loss": 102.2982, "step": 50980 }, { "epoch": 0.42180584853373043, "grad_norm": 739.594482421875, "learning_rate": 7.31585521638866e-06, "loss": 83.5706, "step": 50990 }, { "epoch": 0.42188857178309963, "grad_norm": 1355.9227294921875, "learning_rate": 7.314605159783313e-06, "loss": 155.9577, "step": 51000 }, { "epoch": 0.4219712950324689, "grad_norm": 752.1953735351562, "learning_rate": 7.313354919015457e-06, "loss": 87.4393, "step": 51010 }, { "epoch": 0.4220540182818381, "grad_norm": 1794.4256591796875, "learning_rate": 7.312104494184566e-06, "loss": 140.2643, "step": 51020 }, { "epoch": 0.42213674153120734, "grad_norm": 1220.9268798828125, "learning_rate": 7.310853885390133e-06, "loss": 98.7091, "step": 51030 }, { "epoch": 0.4222194647805766, "grad_norm": 2305.75, "learning_rate": 7.309603092731661e-06, "loss": 112.4157, "step": 51040 }, { "epoch": 0.4223021880299458, "grad_norm": 1095.7064208984375, "learning_rate": 7.30835211630867e-06, "loss": 102.6219, "step": 51050 }, { "epoch": 0.42238491127931505, "grad_norm": 678.8933715820312, "learning_rate": 7.3071009562206965e-06, "loss": 88.1793, "step": 51060 }, { "epoch": 0.4224676345286843, "grad_norm": 809.6482543945312, "learning_rate": 7.305849612567287e-06, "loss": 99.8888, "step": 51070 }, { "epoch": 0.4225503577780535, "grad_norm": 1535.9327392578125, "learning_rate": 7.304598085448007e-06, "loss": 119.08, "step": 51080 }, { "epoch": 0.42263308102742275, "grad_norm": 953.1244506835938, "learning_rate": 7.303346374962433e-06, "loss": 86.5312, "step": 51090 }, { "epoch": 0.422715804276792, "grad_norm": 1713.26318359375, "learning_rate": 7.302094481210159e-06, "loss": 103.2178, "step": 51100 }, { "epoch": 0.4227985275261612, "grad_norm": 826.4808959960938, "learning_rate": 7.300842404290792e-06, "loss": 96.0054, "step": 51110 }, { "epoch": 0.42288125077553046, "grad_norm": 1028.5643310546875, "learning_rate": 7.2995901443039554e-06, "loss": 128.5591, "step": 51120 }, { "epoch": 0.4229639740248997, "grad_norm": 840.199462890625, "learning_rate": 7.298337701349285e-06, "loss": 116.7384, "step": 51130 }, { "epoch": 0.4230466972742689, "grad_norm": 672.873779296875, "learning_rate": 7.29708507552643e-06, "loss": 68.3276, "step": 51140 }, { "epoch": 0.42312942052363817, "grad_norm": 383.77313232421875, "learning_rate": 7.295832266935059e-06, "loss": 84.0062, "step": 51150 }, { "epoch": 0.4232121437730074, "grad_norm": 1001.8519287109375, "learning_rate": 7.2945792756748505e-06, "loss": 140.7101, "step": 51160 }, { "epoch": 0.4232948670223766, "grad_norm": 709.9981689453125, "learning_rate": 7.2933261018455005e-06, "loss": 75.1831, "step": 51170 }, { "epoch": 0.4233775902717459, "grad_norm": 675.74658203125, "learning_rate": 7.292072745546716e-06, "loss": 73.5509, "step": 51180 }, { "epoch": 0.42346031352111513, "grad_norm": 886.9816284179688, "learning_rate": 7.290819206878223e-06, "loss": 92.8137, "step": 51190 }, { "epoch": 0.4235430367704843, "grad_norm": 688.6138305664062, "learning_rate": 7.289565485939759e-06, "loss": 109.6997, "step": 51200 }, { "epoch": 0.4236257600198536, "grad_norm": 1372.497802734375, "learning_rate": 7.288311582831078e-06, "loss": 86.5049, "step": 51210 }, { "epoch": 0.42370848326922284, "grad_norm": 632.5243530273438, "learning_rate": 7.2870574976519455e-06, "loss": 73.3826, "step": 51220 }, { "epoch": 0.42379120651859203, "grad_norm": 564.6524658203125, "learning_rate": 7.2858032305021455e-06, "loss": 123.0075, "step": 51230 }, { "epoch": 0.4238739297679613, "grad_norm": 758.579833984375, "learning_rate": 7.28454878148147e-06, "loss": 97.95, "step": 51240 }, { "epoch": 0.42395665301733054, "grad_norm": 1073.8216552734375, "learning_rate": 7.283294150689735e-06, "loss": 88.5506, "step": 51250 }, { "epoch": 0.42403937626669974, "grad_norm": 621.6570434570312, "learning_rate": 7.282039338226763e-06, "loss": 106.7351, "step": 51260 }, { "epoch": 0.424122099516069, "grad_norm": 1591.994384765625, "learning_rate": 7.280784344192393e-06, "loss": 117.9606, "step": 51270 }, { "epoch": 0.42420482276543825, "grad_norm": 811.947265625, "learning_rate": 7.279529168686481e-06, "loss": 83.5623, "step": 51280 }, { "epoch": 0.42428754601480745, "grad_norm": 939.7516479492188, "learning_rate": 7.278273811808894e-06, "loss": 91.11, "step": 51290 }, { "epoch": 0.4243702692641767, "grad_norm": 1210.847900390625, "learning_rate": 7.2770182736595164e-06, "loss": 130.9754, "step": 51300 }, { "epoch": 0.42445299251354596, "grad_norm": 1220.7769775390625, "learning_rate": 7.275762554338244e-06, "loss": 76.7486, "step": 51310 }, { "epoch": 0.42453571576291516, "grad_norm": 1265.53271484375, "learning_rate": 7.2745066539449905e-06, "loss": 95.9141, "step": 51320 }, { "epoch": 0.4246184390122844, "grad_norm": 875.6475219726562, "learning_rate": 7.27325057257968e-06, "loss": 97.6407, "step": 51330 }, { "epoch": 0.42470116226165366, "grad_norm": 693.2019653320312, "learning_rate": 7.271994310342254e-06, "loss": 99.6023, "step": 51340 }, { "epoch": 0.42478388551102286, "grad_norm": 897.9375, "learning_rate": 7.270737867332669e-06, "loss": 104.1033, "step": 51350 }, { "epoch": 0.4248666087603921, "grad_norm": 1381.2633056640625, "learning_rate": 7.2694812436508934e-06, "loss": 98.0639, "step": 51360 }, { "epoch": 0.4249493320097613, "grad_norm": 825.0390625, "learning_rate": 7.268224439396909e-06, "loss": 109.9973, "step": 51370 }, { "epoch": 0.42503205525913057, "grad_norm": 997.9774780273438, "learning_rate": 7.266967454670717e-06, "loss": 115.4315, "step": 51380 }, { "epoch": 0.4251147785084998, "grad_norm": 1014.7069702148438, "learning_rate": 7.265710289572328e-06, "loss": 122.6427, "step": 51390 }, { "epoch": 0.425197501757869, "grad_norm": 1186.5941162109375, "learning_rate": 7.264452944201771e-06, "loss": 79.164, "step": 51400 }, { "epoch": 0.4252802250072383, "grad_norm": 896.6864013671875, "learning_rate": 7.263195418659083e-06, "loss": 94.9668, "step": 51410 }, { "epoch": 0.42536294825660753, "grad_norm": 1269.0845947265625, "learning_rate": 7.261937713044325e-06, "loss": 111.4878, "step": 51420 }, { "epoch": 0.42544567150597673, "grad_norm": 737.5647583007812, "learning_rate": 7.260679827457562e-06, "loss": 144.4097, "step": 51430 }, { "epoch": 0.425528394755346, "grad_norm": 1106.5899658203125, "learning_rate": 7.259421761998881e-06, "loss": 97.2455, "step": 51440 }, { "epoch": 0.42561111800471524, "grad_norm": 867.489013671875, "learning_rate": 7.2581635167683805e-06, "loss": 105.3224, "step": 51450 }, { "epoch": 0.42569384125408444, "grad_norm": 693.6383666992188, "learning_rate": 7.256905091866171e-06, "loss": 101.9842, "step": 51460 }, { "epoch": 0.4257765645034537, "grad_norm": 611.8115844726562, "learning_rate": 7.255646487392382e-06, "loss": 115.6671, "step": 51470 }, { "epoch": 0.42585928775282295, "grad_norm": 1504.69775390625, "learning_rate": 7.254387703447154e-06, "loss": 95.3933, "step": 51480 }, { "epoch": 0.42594201100219214, "grad_norm": 600.2556762695312, "learning_rate": 7.2531287401306435e-06, "loss": 92.5239, "step": 51490 }, { "epoch": 0.4260247342515614, "grad_norm": 407.8628845214844, "learning_rate": 7.251869597543019e-06, "loss": 82.7194, "step": 51500 }, { "epoch": 0.42610745750093065, "grad_norm": 614.1116943359375, "learning_rate": 7.250610275784464e-06, "loss": 93.6232, "step": 51510 }, { "epoch": 0.42619018075029985, "grad_norm": 898.9727172851562, "learning_rate": 7.2493507749551795e-06, "loss": 85.4422, "step": 51520 }, { "epoch": 0.4262729039996691, "grad_norm": 1155.338623046875, "learning_rate": 7.248091095155378e-06, "loss": 118.6169, "step": 51530 }, { "epoch": 0.42635562724903836, "grad_norm": 1159.13916015625, "learning_rate": 7.246831236485283e-06, "loss": 135.8041, "step": 51540 }, { "epoch": 0.42643835049840756, "grad_norm": 928.2463989257812, "learning_rate": 7.245571199045139e-06, "loss": 108.4106, "step": 51550 }, { "epoch": 0.4265210737477768, "grad_norm": 767.41259765625, "learning_rate": 7.244310982935202e-06, "loss": 74.6865, "step": 51560 }, { "epoch": 0.42660379699714607, "grad_norm": 709.4650268554688, "learning_rate": 7.243050588255738e-06, "loss": 76.4944, "step": 51570 }, { "epoch": 0.42668652024651527, "grad_norm": 1164.441162109375, "learning_rate": 7.241790015107034e-06, "loss": 88.2155, "step": 51580 }, { "epoch": 0.4267692434958845, "grad_norm": 1503.4273681640625, "learning_rate": 7.240529263589386e-06, "loss": 126.4441, "step": 51590 }, { "epoch": 0.4268519667452538, "grad_norm": 760.6532592773438, "learning_rate": 7.239268333803109e-06, "loss": 105.8668, "step": 51600 }, { "epoch": 0.426934689994623, "grad_norm": 1101.9364013671875, "learning_rate": 7.2380072258485265e-06, "loss": 92.2242, "step": 51610 }, { "epoch": 0.4270174132439922, "grad_norm": 554.7161254882812, "learning_rate": 7.2367459398259795e-06, "loss": 83.6779, "step": 51620 }, { "epoch": 0.4271001364933615, "grad_norm": 972.8421020507812, "learning_rate": 7.2354844758358234e-06, "loss": 123.7181, "step": 51630 }, { "epoch": 0.4271828597427307, "grad_norm": 1161.73828125, "learning_rate": 7.234222833978427e-06, "loss": 106.6433, "step": 51640 }, { "epoch": 0.42726558299209993, "grad_norm": 1067.09765625, "learning_rate": 7.232961014354175e-06, "loss": 104.9934, "step": 51650 }, { "epoch": 0.4273483062414692, "grad_norm": 1101.327392578125, "learning_rate": 7.23169901706346e-06, "loss": 106.5562, "step": 51660 }, { "epoch": 0.4274310294908384, "grad_norm": 1341.46044921875, "learning_rate": 7.2304368422067e-06, "loss": 134.7537, "step": 51670 }, { "epoch": 0.42751375274020764, "grad_norm": 1436.9295654296875, "learning_rate": 7.2291744898843145e-06, "loss": 98.4549, "step": 51680 }, { "epoch": 0.4275964759895769, "grad_norm": 540.67236328125, "learning_rate": 7.227911960196746e-06, "loss": 86.389, "step": 51690 }, { "epoch": 0.4276791992389461, "grad_norm": 1108.2845458984375, "learning_rate": 7.226649253244448e-06, "loss": 74.8376, "step": 51700 }, { "epoch": 0.42776192248831535, "grad_norm": 964.5218505859375, "learning_rate": 7.225386369127886e-06, "loss": 112.0215, "step": 51710 }, { "epoch": 0.4278446457376846, "grad_norm": 768.393310546875, "learning_rate": 7.224123307947545e-06, "loss": 94.5367, "step": 51720 }, { "epoch": 0.4279273689870538, "grad_norm": 708.0656127929688, "learning_rate": 7.2228600698039205e-06, "loss": 108.4423, "step": 51730 }, { "epoch": 0.42801009223642306, "grad_norm": 1345.224853515625, "learning_rate": 7.221596654797522e-06, "loss": 91.7173, "step": 51740 }, { "epoch": 0.42809281548579226, "grad_norm": 1059.3037109375, "learning_rate": 7.2203330630288714e-06, "loss": 109.0566, "step": 51750 }, { "epoch": 0.4281755387351615, "grad_norm": 975.2772216796875, "learning_rate": 7.21906929459851e-06, "loss": 140.6817, "step": 51760 }, { "epoch": 0.42825826198453076, "grad_norm": 1177.9395751953125, "learning_rate": 7.217805349606988e-06, "loss": 91.187, "step": 51770 }, { "epoch": 0.42834098523389996, "grad_norm": 685.8370971679688, "learning_rate": 7.216541228154875e-06, "loss": 93.8095, "step": 51780 }, { "epoch": 0.4284237084832692, "grad_norm": 1138.31689453125, "learning_rate": 7.215276930342747e-06, "loss": 104.4566, "step": 51790 }, { "epoch": 0.42850643173263847, "grad_norm": 601.1682739257812, "learning_rate": 7.214012456271202e-06, "loss": 130.8603, "step": 51800 }, { "epoch": 0.42858915498200767, "grad_norm": 1000.6312866210938, "learning_rate": 7.212747806040845e-06, "loss": 123.3161, "step": 51810 }, { "epoch": 0.4286718782313769, "grad_norm": 1142.5823974609375, "learning_rate": 7.211482979752302e-06, "loss": 85.4368, "step": 51820 }, { "epoch": 0.4287546014807462, "grad_norm": 816.385986328125, "learning_rate": 7.210217977506207e-06, "loss": 109.9975, "step": 51830 }, { "epoch": 0.4288373247301154, "grad_norm": 944.0142822265625, "learning_rate": 7.208952799403211e-06, "loss": 108.7334, "step": 51840 }, { "epoch": 0.42892004797948463, "grad_norm": 824.8801879882812, "learning_rate": 7.207687445543977e-06, "loss": 82.7929, "step": 51850 }, { "epoch": 0.4290027712288539, "grad_norm": 964.5783081054688, "learning_rate": 7.206421916029187e-06, "loss": 121.5998, "step": 51860 }, { "epoch": 0.4290854944782231, "grad_norm": 1063.656982421875, "learning_rate": 7.205156210959529e-06, "loss": 90.583, "step": 51870 }, { "epoch": 0.42916821772759234, "grad_norm": 3292.513671875, "learning_rate": 7.203890330435715e-06, "loss": 105.8095, "step": 51880 }, { "epoch": 0.4292509409769616, "grad_norm": 703.033447265625, "learning_rate": 7.202624274558458e-06, "loss": 106.7044, "step": 51890 }, { "epoch": 0.4293336642263308, "grad_norm": 669.6672973632812, "learning_rate": 7.201358043428499e-06, "loss": 89.5573, "step": 51900 }, { "epoch": 0.42941638747570005, "grad_norm": 647.8473510742188, "learning_rate": 7.200091637146582e-06, "loss": 99.5425, "step": 51910 }, { "epoch": 0.4294991107250693, "grad_norm": 1656.5313720703125, "learning_rate": 7.198825055813471e-06, "loss": 120.4202, "step": 51920 }, { "epoch": 0.4295818339744385, "grad_norm": 755.7421875, "learning_rate": 7.197558299529941e-06, "loss": 74.5723, "step": 51930 }, { "epoch": 0.42966455722380775, "grad_norm": 1194.8021240234375, "learning_rate": 7.196291368396784e-06, "loss": 104.8849, "step": 51940 }, { "epoch": 0.429747280473177, "grad_norm": 810.8287353515625, "learning_rate": 7.1950242625148e-06, "loss": 109.7433, "step": 51950 }, { "epoch": 0.4298300037225462, "grad_norm": 5541.013671875, "learning_rate": 7.1937569819848115e-06, "loss": 117.9274, "step": 51960 }, { "epoch": 0.42991272697191546, "grad_norm": 1193.3897705078125, "learning_rate": 7.192489526907646e-06, "loss": 112.9372, "step": 51970 }, { "epoch": 0.4299954502212847, "grad_norm": 1110.0302734375, "learning_rate": 7.191221897384153e-06, "loss": 120.9369, "step": 51980 }, { "epoch": 0.4300781734706539, "grad_norm": 1447.90771484375, "learning_rate": 7.189954093515189e-06, "loss": 142.4958, "step": 51990 }, { "epoch": 0.43016089672002317, "grad_norm": 1462.6444091796875, "learning_rate": 7.188686115401628e-06, "loss": 127.5024, "step": 52000 } ], "logging_steps": 10, "max_steps": 123750, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }