{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999888074318652, "eval_steps": 500, "global_step": 11168, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.954054507806816e-05, "grad_norm": 6.8605176727777835, "learning_rate": 5.952380952380953e-08, "loss": 1.3694, "step": 1 }, { "epoch": 0.00017908109015613632, "grad_norm": 7.691125126718495, "learning_rate": 1.1904761904761906e-07, "loss": 1.4215, "step": 2 }, { "epoch": 0.0002686216352342045, "grad_norm": 5.680496943421626, "learning_rate": 1.7857142857142858e-07, "loss": 1.3723, "step": 3 }, { "epoch": 0.00035816218031227263, "grad_norm": 5.401625404885626, "learning_rate": 2.3809523809523811e-07, "loss": 1.3616, "step": 4 }, { "epoch": 0.0004477027253903408, "grad_norm": 4.967220662606621, "learning_rate": 2.9761904761904765e-07, "loss": 1.3153, "step": 5 }, { "epoch": 0.000537243270468409, "grad_norm": 6.1817393765434865, "learning_rate": 3.5714285714285716e-07, "loss": 1.3156, "step": 6 }, { "epoch": 0.0006267838155464771, "grad_norm": 6.276733593598593, "learning_rate": 4.1666666666666667e-07, "loss": 1.3896, "step": 7 }, { "epoch": 0.0007163243606245453, "grad_norm": 7.256473178370257, "learning_rate": 4.7619047619047623e-07, "loss": 1.4456, "step": 8 }, { "epoch": 0.0008058649057026135, "grad_norm": 3.884565476184392, "learning_rate": 5.357142857142857e-07, "loss": 1.356, "step": 9 }, { "epoch": 0.0008954054507806816, "grad_norm": 8.322504183965533, "learning_rate": 5.952380952380953e-07, "loss": 1.4909, "step": 10 }, { "epoch": 0.0009849459958587497, "grad_norm": 6.815961533452488, "learning_rate": 6.547619047619048e-07, "loss": 1.4052, "step": 11 }, { "epoch": 0.001074486540936818, "grad_norm": 5.162486223457818, "learning_rate": 7.142857142857143e-07, "loss": 1.3826, "step": 12 }, { "epoch": 0.0011640270860148862, "grad_norm": 4.065629083093374, "learning_rate": 7.738095238095239e-07, "loss": 1.3456, "step": 13 }, { "epoch": 0.0012535676310929543, "grad_norm": 5.967045434829423, "learning_rate": 8.333333333333333e-07, "loss": 1.314, "step": 14 }, { "epoch": 0.0013431081761710224, "grad_norm": 6.255988604020014, "learning_rate": 8.928571428571429e-07, "loss": 1.2564, "step": 15 }, { "epoch": 0.0014326487212490905, "grad_norm": 5.005226032915486, "learning_rate": 9.523809523809525e-07, "loss": 1.3065, "step": 16 }, { "epoch": 0.0015221892663271589, "grad_norm": 5.42562394704774, "learning_rate": 1.011904761904762e-06, "loss": 1.3002, "step": 17 }, { "epoch": 0.001611729811405227, "grad_norm": 4.772711542442117, "learning_rate": 1.0714285714285714e-06, "loss": 1.2308, "step": 18 }, { "epoch": 0.001701270356483295, "grad_norm": 3.1174431394341227, "learning_rate": 1.130952380952381e-06, "loss": 1.27, "step": 19 }, { "epoch": 0.0017908109015613632, "grad_norm": 4.0990299111949655, "learning_rate": 1.1904761904761906e-06, "loss": 1.2408, "step": 20 }, { "epoch": 0.0018803514466394313, "grad_norm": 3.4441565958301013, "learning_rate": 1.25e-06, "loss": 1.2137, "step": 21 }, { "epoch": 0.0019698919917174994, "grad_norm": 3.85140860102152, "learning_rate": 1.3095238095238096e-06, "loss": 1.1605, "step": 22 }, { "epoch": 0.0020594325367955676, "grad_norm": 3.2980804912499093, "learning_rate": 1.3690476190476193e-06, "loss": 1.1968, "step": 23 }, { "epoch": 0.002148973081873636, "grad_norm": 2.8501123951614105, "learning_rate": 1.4285714285714286e-06, "loss": 1.2073, "step": 24 }, { "epoch": 0.0022385136269517042, "grad_norm": 1.9955484371820171, "learning_rate": 1.4880952380952381e-06, "loss": 1.1405, "step": 25 }, { "epoch": 0.0023280541720297723, "grad_norm": 2.0796096933942554, "learning_rate": 1.5476190476190479e-06, "loss": 1.2414, "step": 26 }, { "epoch": 0.0024175947171078405, "grad_norm": 1.9258413112172945, "learning_rate": 1.6071428571428574e-06, "loss": 1.1795, "step": 27 }, { "epoch": 0.0025071352621859086, "grad_norm": 2.219156059146595, "learning_rate": 1.6666666666666667e-06, "loss": 1.1385, "step": 28 }, { "epoch": 0.0025966758072639767, "grad_norm": 1.9782264643890668, "learning_rate": 1.7261904761904764e-06, "loss": 1.2193, "step": 29 }, { "epoch": 0.002686216352342045, "grad_norm": 1.7202534544729537, "learning_rate": 1.7857142857142859e-06, "loss": 1.1381, "step": 30 }, { "epoch": 0.002775756897420113, "grad_norm": 1.9188370445046954, "learning_rate": 1.8452380952380954e-06, "loss": 1.1534, "step": 31 }, { "epoch": 0.002865297442498181, "grad_norm": 1.6628023890548072, "learning_rate": 1.904761904761905e-06, "loss": 1.111, "step": 32 }, { "epoch": 0.002954837987576249, "grad_norm": 1.687574299864664, "learning_rate": 1.9642857142857144e-06, "loss": 1.1263, "step": 33 }, { "epoch": 0.0030443785326543177, "grad_norm": 1.9568607394885413, "learning_rate": 2.023809523809524e-06, "loss": 1.117, "step": 34 }, { "epoch": 0.003133919077732386, "grad_norm": 1.4443616128675203, "learning_rate": 2.0833333333333334e-06, "loss": 1.1181, "step": 35 }, { "epoch": 0.003223459622810454, "grad_norm": 1.5247390243321048, "learning_rate": 2.1428571428571427e-06, "loss": 1.1419, "step": 36 }, { "epoch": 0.003313000167888522, "grad_norm": 1.8050294358262133, "learning_rate": 2.2023809523809525e-06, "loss": 1.05, "step": 37 }, { "epoch": 0.00340254071296659, "grad_norm": 1.659186619613239, "learning_rate": 2.261904761904762e-06, "loss": 1.0356, "step": 38 }, { "epoch": 0.0034920812580446583, "grad_norm": 1.6504799209044219, "learning_rate": 2.321428571428572e-06, "loss": 1.0876, "step": 39 }, { "epoch": 0.0035816218031227264, "grad_norm": 2.1681526883367717, "learning_rate": 2.380952380952381e-06, "loss": 1.0765, "step": 40 }, { "epoch": 0.0036711623482007945, "grad_norm": 1.5610114447711314, "learning_rate": 2.4404761904761905e-06, "loss": 1.0003, "step": 41 }, { "epoch": 0.0037607028932788627, "grad_norm": 1.8664840413808228, "learning_rate": 2.5e-06, "loss": 1.0805, "step": 42 }, { "epoch": 0.003850243438356931, "grad_norm": 1.6464834387199405, "learning_rate": 2.5595238095238095e-06, "loss": 1.024, "step": 43 }, { "epoch": 0.003939783983434999, "grad_norm": 1.832559754455683, "learning_rate": 2.6190476190476192e-06, "loss": 1.1215, "step": 44 }, { "epoch": 0.0040293245285130674, "grad_norm": 1.5690875950142695, "learning_rate": 2.6785714285714285e-06, "loss": 1.07, "step": 45 }, { "epoch": 0.004118865073591135, "grad_norm": 1.7754511705781226, "learning_rate": 2.7380952380952387e-06, "loss": 1.0774, "step": 46 }, { "epoch": 0.004208405618669204, "grad_norm": 1.7547670602622754, "learning_rate": 2.797619047619048e-06, "loss": 1.1511, "step": 47 }, { "epoch": 0.004297946163747272, "grad_norm": 1.3840214830218103, "learning_rate": 2.8571428571428573e-06, "loss": 1.0025, "step": 48 }, { "epoch": 0.00438748670882534, "grad_norm": 1.479028052166697, "learning_rate": 2.916666666666667e-06, "loss": 1.0388, "step": 49 }, { "epoch": 0.0044770272539034085, "grad_norm": 1.3895404661557715, "learning_rate": 2.9761904761904763e-06, "loss": 1.0936, "step": 50 }, { "epoch": 0.004566567798981476, "grad_norm": 1.5593391246124038, "learning_rate": 3.0357142857142856e-06, "loss": 1.1312, "step": 51 }, { "epoch": 0.004656108344059545, "grad_norm": 1.381786521648719, "learning_rate": 3.0952380952380957e-06, "loss": 1.0383, "step": 52 }, { "epoch": 0.004745648889137612, "grad_norm": 1.6108949600051055, "learning_rate": 3.154761904761905e-06, "loss": 1.1068, "step": 53 }, { "epoch": 0.004835189434215681, "grad_norm": 1.3335728546559724, "learning_rate": 3.2142857142857147e-06, "loss": 1.0878, "step": 54 }, { "epoch": 0.004924729979293749, "grad_norm": 1.3893733824914392, "learning_rate": 3.273809523809524e-06, "loss": 1.0441, "step": 55 }, { "epoch": 0.005014270524371817, "grad_norm": 1.591666225031461, "learning_rate": 3.3333333333333333e-06, "loss": 1.1077, "step": 56 }, { "epoch": 0.005103811069449885, "grad_norm": 1.4876988140406033, "learning_rate": 3.3928571428571435e-06, "loss": 1.0835, "step": 57 }, { "epoch": 0.005193351614527953, "grad_norm": 1.612063957917511, "learning_rate": 3.4523809523809528e-06, "loss": 1.0452, "step": 58 }, { "epoch": 0.005282892159606022, "grad_norm": 1.6995990850825666, "learning_rate": 3.511904761904762e-06, "loss": 1.0326, "step": 59 }, { "epoch": 0.00537243270468409, "grad_norm": 1.5465428512000507, "learning_rate": 3.5714285714285718e-06, "loss": 1.0428, "step": 60 }, { "epoch": 0.005461973249762158, "grad_norm": 1.4221354638775416, "learning_rate": 3.630952380952381e-06, "loss": 1.0711, "step": 61 }, { "epoch": 0.005551513794840226, "grad_norm": 1.5346769717891102, "learning_rate": 3.690476190476191e-06, "loss": 1.0837, "step": 62 }, { "epoch": 0.005641054339918294, "grad_norm": 1.4159568033864736, "learning_rate": 3.7500000000000005e-06, "loss": 1.0506, "step": 63 }, { "epoch": 0.005730594884996362, "grad_norm": 1.5690530218920364, "learning_rate": 3.80952380952381e-06, "loss": 1.0556, "step": 64 }, { "epoch": 0.005820135430074431, "grad_norm": 1.5697504797030712, "learning_rate": 3.869047619047619e-06, "loss": 1.108, "step": 65 }, { "epoch": 0.005909675975152498, "grad_norm": 1.8258299131084181, "learning_rate": 3.928571428571429e-06, "loss": 1.0756, "step": 66 }, { "epoch": 0.005999216520230567, "grad_norm": 1.3728995166901874, "learning_rate": 3.9880952380952386e-06, "loss": 1.0429, "step": 67 }, { "epoch": 0.006088757065308635, "grad_norm": 1.6939658639097959, "learning_rate": 4.047619047619048e-06, "loss": 1.0502, "step": 68 }, { "epoch": 0.006178297610386703, "grad_norm": 1.4741960493495423, "learning_rate": 4.107142857142857e-06, "loss": 1.0528, "step": 69 }, { "epoch": 0.006267838155464772, "grad_norm": 1.2853134828284618, "learning_rate": 4.166666666666667e-06, "loss": 1.0087, "step": 70 }, { "epoch": 0.006357378700542839, "grad_norm": 1.4131068074499027, "learning_rate": 4.226190476190477e-06, "loss": 1.0035, "step": 71 }, { "epoch": 0.006446919245620908, "grad_norm": 1.732671970636036, "learning_rate": 4.2857142857142855e-06, "loss": 1.0427, "step": 72 }, { "epoch": 0.006536459790698976, "grad_norm": 1.6582392452806265, "learning_rate": 4.345238095238096e-06, "loss": 1.0438, "step": 73 }, { "epoch": 0.006626000335777044, "grad_norm": 1.3182809895287744, "learning_rate": 4.404761904761905e-06, "loss": 1.0354, "step": 74 }, { "epoch": 0.006715540880855112, "grad_norm": 1.3635845321776408, "learning_rate": 4.464285714285715e-06, "loss": 1.0494, "step": 75 }, { "epoch": 0.00680508142593318, "grad_norm": 1.360498693077617, "learning_rate": 4.523809523809524e-06, "loss": 0.9935, "step": 76 }, { "epoch": 0.006894621971011249, "grad_norm": 1.342450847470953, "learning_rate": 4.583333333333333e-06, "loss": 0.9944, "step": 77 }, { "epoch": 0.006984162516089317, "grad_norm": 1.5740992165930958, "learning_rate": 4.642857142857144e-06, "loss": 0.9559, "step": 78 }, { "epoch": 0.007073703061167385, "grad_norm": 1.3305886574115868, "learning_rate": 4.702380952380953e-06, "loss": 0.9313, "step": 79 }, { "epoch": 0.007163243606245453, "grad_norm": 1.4627139160234712, "learning_rate": 4.761904761904762e-06, "loss": 1.0191, "step": 80 }, { "epoch": 0.007252784151323521, "grad_norm": 1.5236143264045536, "learning_rate": 4.821428571428572e-06, "loss": 1.0295, "step": 81 }, { "epoch": 0.007342324696401589, "grad_norm": 1.3070050715842225, "learning_rate": 4.880952380952381e-06, "loss": 1.0189, "step": 82 }, { "epoch": 0.007431865241479658, "grad_norm": 1.33321070796309, "learning_rate": 4.940476190476191e-06, "loss": 1.0472, "step": 83 }, { "epoch": 0.007521405786557725, "grad_norm": 1.2816371085172078, "learning_rate": 5e-06, "loss": 1.0327, "step": 84 }, { "epoch": 0.007610946331635794, "grad_norm": 1.5235193144001336, "learning_rate": 5.05952380952381e-06, "loss": 1.0228, "step": 85 }, { "epoch": 0.007700486876713862, "grad_norm": 1.3640744753358327, "learning_rate": 5.119047619047619e-06, "loss": 1.0084, "step": 86 }, { "epoch": 0.00779002742179193, "grad_norm": 1.348787452974458, "learning_rate": 5.1785714285714296e-06, "loss": 0.9735, "step": 87 }, { "epoch": 0.007879567966869998, "grad_norm": 1.276205787658554, "learning_rate": 5.2380952380952384e-06, "loss": 1.0244, "step": 88 }, { "epoch": 0.007969108511948066, "grad_norm": 1.3052753646594772, "learning_rate": 5.297619047619048e-06, "loss": 0.9838, "step": 89 }, { "epoch": 0.008058649057026135, "grad_norm": 1.3457428597610344, "learning_rate": 5.357142857142857e-06, "loss": 0.9628, "step": 90 }, { "epoch": 0.008148189602104203, "grad_norm": 1.2868020672550435, "learning_rate": 5.416666666666667e-06, "loss": 0.937, "step": 91 }, { "epoch": 0.00823773014718227, "grad_norm": 2.081956527692941, "learning_rate": 5.476190476190477e-06, "loss": 0.9857, "step": 92 }, { "epoch": 0.008327270692260339, "grad_norm": 1.2536316087509864, "learning_rate": 5.535714285714286e-06, "loss": 1.0148, "step": 93 }, { "epoch": 0.008416811237338407, "grad_norm": 1.6015988253413718, "learning_rate": 5.595238095238096e-06, "loss": 1.0322, "step": 94 }, { "epoch": 0.008506351782416476, "grad_norm": 1.6548495158042813, "learning_rate": 5.654761904761905e-06, "loss": 1.0035, "step": 95 }, { "epoch": 0.008595892327494544, "grad_norm": 1.4078823723272547, "learning_rate": 5.7142857142857145e-06, "loss": 0.9758, "step": 96 }, { "epoch": 0.008685432872572611, "grad_norm": 1.5218636021557224, "learning_rate": 5.773809523809523e-06, "loss": 0.9634, "step": 97 }, { "epoch": 0.00877497341765068, "grad_norm": 1.4411111232736358, "learning_rate": 5.833333333333334e-06, "loss": 0.9968, "step": 98 }, { "epoch": 0.008864513962728748, "grad_norm": 1.4845544049865678, "learning_rate": 5.892857142857144e-06, "loss": 0.9755, "step": 99 }, { "epoch": 0.008954054507806817, "grad_norm": 1.4805450172448043, "learning_rate": 5.9523809523809525e-06, "loss": 0.954, "step": 100 }, { "epoch": 0.009043595052884884, "grad_norm": 1.3168462755841288, "learning_rate": 6.011904761904762e-06, "loss": 0.9991, "step": 101 }, { "epoch": 0.009133135597962952, "grad_norm": 1.4411969742896522, "learning_rate": 6.071428571428571e-06, "loss": 0.9616, "step": 102 }, { "epoch": 0.00922267614304102, "grad_norm": 1.4503808055083731, "learning_rate": 6.130952380952382e-06, "loss": 0.9989, "step": 103 }, { "epoch": 0.00931221668811909, "grad_norm": 1.441168610056887, "learning_rate": 6.1904761904761914e-06, "loss": 1.0268, "step": 104 }, { "epoch": 0.009401757233197158, "grad_norm": 1.390408253303954, "learning_rate": 6.25e-06, "loss": 0.9328, "step": 105 }, { "epoch": 0.009491297778275225, "grad_norm": 1.3895700800718895, "learning_rate": 6.30952380952381e-06, "loss": 1.008, "step": 106 }, { "epoch": 0.009580838323353293, "grad_norm": 1.4775664954059724, "learning_rate": 6.369047619047619e-06, "loss": 1.0632, "step": 107 }, { "epoch": 0.009670378868431362, "grad_norm": 1.550361713976216, "learning_rate": 6.4285714285714295e-06, "loss": 0.9601, "step": 108 }, { "epoch": 0.00975991941350943, "grad_norm": 1.3078283659340315, "learning_rate": 6.488095238095239e-06, "loss": 1.0247, "step": 109 }, { "epoch": 0.009849459958587497, "grad_norm": 1.322044169233702, "learning_rate": 6.547619047619048e-06, "loss": 0.9589, "step": 110 }, { "epoch": 0.009939000503665566, "grad_norm": 1.403959589278741, "learning_rate": 6.607142857142858e-06, "loss": 1.0003, "step": 111 }, { "epoch": 0.010028541048743634, "grad_norm": 1.4502321635121647, "learning_rate": 6.666666666666667e-06, "loss": 1.005, "step": 112 }, { "epoch": 0.010118081593821703, "grad_norm": 1.4420768567846465, "learning_rate": 6.726190476190477e-06, "loss": 0.9415, "step": 113 }, { "epoch": 0.01020762213889977, "grad_norm": 1.4698692117387229, "learning_rate": 6.785714285714287e-06, "loss": 1.0095, "step": 114 }, { "epoch": 0.010297162683977838, "grad_norm": 1.3241258190333822, "learning_rate": 6.845238095238096e-06, "loss": 0.9819, "step": 115 }, { "epoch": 0.010386703229055907, "grad_norm": 1.5308240430048938, "learning_rate": 6.9047619047619055e-06, "loss": 1.0196, "step": 116 }, { "epoch": 0.010476243774133975, "grad_norm": 1.3400065057088364, "learning_rate": 6.964285714285714e-06, "loss": 0.9778, "step": 117 }, { "epoch": 0.010565784319212044, "grad_norm": 1.3984498071873686, "learning_rate": 7.023809523809524e-06, "loss": 0.9846, "step": 118 }, { "epoch": 0.01065532486429011, "grad_norm": 1.425113759601797, "learning_rate": 7.083333333333335e-06, "loss": 0.9965, "step": 119 }, { "epoch": 0.01074486540936818, "grad_norm": 1.3770492080848322, "learning_rate": 7.1428571428571436e-06, "loss": 1.024, "step": 120 }, { "epoch": 0.010834405954446248, "grad_norm": 1.4459962866429865, "learning_rate": 7.202380952380953e-06, "loss": 1.0129, "step": 121 }, { "epoch": 0.010923946499524316, "grad_norm": 1.3100912174599022, "learning_rate": 7.261904761904762e-06, "loss": 0.9614, "step": 122 }, { "epoch": 0.011013487044602383, "grad_norm": 1.5305042076623463, "learning_rate": 7.321428571428572e-06, "loss": 0.981, "step": 123 }, { "epoch": 0.011103027589680452, "grad_norm": 1.2433333731355567, "learning_rate": 7.380952380952382e-06, "loss": 0.9791, "step": 124 }, { "epoch": 0.01119256813475852, "grad_norm": 1.3889845259093059, "learning_rate": 7.440476190476191e-06, "loss": 0.9765, "step": 125 }, { "epoch": 0.011282108679836589, "grad_norm": 1.502651313820656, "learning_rate": 7.500000000000001e-06, "loss": 1.0094, "step": 126 }, { "epoch": 0.011371649224914657, "grad_norm": 1.4771171422373752, "learning_rate": 7.55952380952381e-06, "loss": 0.992, "step": 127 }, { "epoch": 0.011461189769992724, "grad_norm": 1.2753017174252659, "learning_rate": 7.61904761904762e-06, "loss": 1.0145, "step": 128 }, { "epoch": 0.011550730315070793, "grad_norm": 1.3288798388981164, "learning_rate": 7.67857142857143e-06, "loss": 0.965, "step": 129 }, { "epoch": 0.011640270860148861, "grad_norm": 1.3896000789531564, "learning_rate": 7.738095238095238e-06, "loss": 0.9574, "step": 130 }, { "epoch": 0.01172981140522693, "grad_norm": 1.400636887292073, "learning_rate": 7.797619047619049e-06, "loss": 0.9677, "step": 131 }, { "epoch": 0.011819351950304997, "grad_norm": 1.4834782043503996, "learning_rate": 7.857142857142858e-06, "loss": 0.9613, "step": 132 }, { "epoch": 0.011908892495383065, "grad_norm": 1.4226787998801091, "learning_rate": 7.916666666666667e-06, "loss": 0.9504, "step": 133 }, { "epoch": 0.011998433040461134, "grad_norm": 1.5098653254615386, "learning_rate": 7.976190476190477e-06, "loss": 0.9955, "step": 134 }, { "epoch": 0.012087973585539202, "grad_norm": 1.2705414690639203, "learning_rate": 8.035714285714286e-06, "loss": 0.9821, "step": 135 }, { "epoch": 0.01217751413061727, "grad_norm": 1.3407959556633955, "learning_rate": 8.095238095238097e-06, "loss": 0.9363, "step": 136 }, { "epoch": 0.012267054675695338, "grad_norm": 1.4441549278470907, "learning_rate": 8.154761904761905e-06, "loss": 0.9111, "step": 137 }, { "epoch": 0.012356595220773406, "grad_norm": 1.4028581785408012, "learning_rate": 8.214285714285714e-06, "loss": 0.9877, "step": 138 }, { "epoch": 0.012446135765851475, "grad_norm": 1.398569645765137, "learning_rate": 8.273809523809523e-06, "loss": 1.0216, "step": 139 }, { "epoch": 0.012535676310929543, "grad_norm": 1.2531219966896094, "learning_rate": 8.333333333333334e-06, "loss": 0.9097, "step": 140 }, { "epoch": 0.01262521685600761, "grad_norm": 1.3940632443651146, "learning_rate": 8.392857142857144e-06, "loss": 0.9529, "step": 141 }, { "epoch": 0.012714757401085679, "grad_norm": 1.505737845025738, "learning_rate": 8.452380952380953e-06, "loss": 0.9739, "step": 142 }, { "epoch": 0.012804297946163747, "grad_norm": 1.4344630058806271, "learning_rate": 8.511904761904762e-06, "loss": 1.04, "step": 143 }, { "epoch": 0.012893838491241816, "grad_norm": 1.361451985668616, "learning_rate": 8.571428571428571e-06, "loss": 1.006, "step": 144 }, { "epoch": 0.012983379036319884, "grad_norm": 1.3657036535038112, "learning_rate": 8.630952380952381e-06, "loss": 0.9894, "step": 145 }, { "epoch": 0.013072919581397951, "grad_norm": 1.4027313732363873, "learning_rate": 8.690476190476192e-06, "loss": 0.9422, "step": 146 }, { "epoch": 0.01316246012647602, "grad_norm": 1.339486554692635, "learning_rate": 8.750000000000001e-06, "loss": 0.946, "step": 147 }, { "epoch": 0.013252000671554088, "grad_norm": 1.2207375784735799, "learning_rate": 8.80952380952381e-06, "loss": 0.9685, "step": 148 }, { "epoch": 0.013341541216632157, "grad_norm": 1.2898935942769207, "learning_rate": 8.869047619047619e-06, "loss": 0.9228, "step": 149 }, { "epoch": 0.013431081761710224, "grad_norm": 1.4123772858280599, "learning_rate": 8.92857142857143e-06, "loss": 1.0187, "step": 150 }, { "epoch": 0.013520622306788292, "grad_norm": 1.2596251148002016, "learning_rate": 8.98809523809524e-06, "loss": 0.9466, "step": 151 }, { "epoch": 0.01361016285186636, "grad_norm": 1.3755666142514171, "learning_rate": 9.047619047619049e-06, "loss": 0.9445, "step": 152 }, { "epoch": 0.01369970339694443, "grad_norm": 1.391555104218961, "learning_rate": 9.107142857142858e-06, "loss": 1.0195, "step": 153 }, { "epoch": 0.013789243942022498, "grad_norm": 1.2783454670190193, "learning_rate": 9.166666666666666e-06, "loss": 1.0155, "step": 154 }, { "epoch": 0.013878784487100565, "grad_norm": 1.3063039982513691, "learning_rate": 9.226190476190477e-06, "loss": 0.9396, "step": 155 }, { "epoch": 0.013968325032178633, "grad_norm": 1.2938757442437152, "learning_rate": 9.285714285714288e-06, "loss": 1.0104, "step": 156 }, { "epoch": 0.014057865577256702, "grad_norm": 1.3808606773264782, "learning_rate": 9.345238095238096e-06, "loss": 0.9891, "step": 157 }, { "epoch": 0.01414740612233477, "grad_norm": 1.2614854087003178, "learning_rate": 9.404761904761905e-06, "loss": 0.9717, "step": 158 }, { "epoch": 0.014236946667412837, "grad_norm": 1.2642808998858541, "learning_rate": 9.464285714285714e-06, "loss": 0.9344, "step": 159 }, { "epoch": 0.014326487212490906, "grad_norm": 1.2302383357152784, "learning_rate": 9.523809523809525e-06, "loss": 1.0114, "step": 160 }, { "epoch": 0.014416027757568974, "grad_norm": 1.255141844023732, "learning_rate": 9.583333333333335e-06, "loss": 0.9564, "step": 161 }, { "epoch": 0.014505568302647043, "grad_norm": 1.3698214664865263, "learning_rate": 9.642857142857144e-06, "loss": 0.9686, "step": 162 }, { "epoch": 0.014595108847725111, "grad_norm": 1.3419018411191843, "learning_rate": 9.702380952380953e-06, "loss": 0.9748, "step": 163 }, { "epoch": 0.014684649392803178, "grad_norm": 1.1614847734170037, "learning_rate": 9.761904761904762e-06, "loss": 0.9161, "step": 164 }, { "epoch": 0.014774189937881247, "grad_norm": 1.368116245539826, "learning_rate": 9.821428571428573e-06, "loss": 1.0298, "step": 165 }, { "epoch": 0.014863730482959315, "grad_norm": 1.4199667158707043, "learning_rate": 9.880952380952381e-06, "loss": 0.9031, "step": 166 }, { "epoch": 0.014953271028037384, "grad_norm": 1.4252433399103757, "learning_rate": 9.940476190476192e-06, "loss": 0.9912, "step": 167 }, { "epoch": 0.01504281157311545, "grad_norm": 1.3896954573715155, "learning_rate": 1e-05, "loss": 0.938, "step": 168 }, { "epoch": 0.01513235211819352, "grad_norm": 1.3013901404188426, "learning_rate": 1.005952380952381e-05, "loss": 0.9786, "step": 169 }, { "epoch": 0.015221892663271588, "grad_norm": 1.3296804047070019, "learning_rate": 1.011904761904762e-05, "loss": 1.0077, "step": 170 }, { "epoch": 0.015311433208349656, "grad_norm": 1.3918359833476754, "learning_rate": 1.0178571428571429e-05, "loss": 0.9493, "step": 171 }, { "epoch": 0.015400973753427725, "grad_norm": 1.283405187122454, "learning_rate": 1.0238095238095238e-05, "loss": 0.9433, "step": 172 }, { "epoch": 0.015490514298505792, "grad_norm": 1.3750965752510205, "learning_rate": 1.0297619047619047e-05, "loss": 0.991, "step": 173 }, { "epoch": 0.01558005484358386, "grad_norm": 1.300250340319829, "learning_rate": 1.0357142857142859e-05, "loss": 0.9566, "step": 174 }, { "epoch": 0.015669595388661927, "grad_norm": 1.1949688314302664, "learning_rate": 1.0416666666666668e-05, "loss": 0.8441, "step": 175 }, { "epoch": 0.015759135933739996, "grad_norm": 1.321743869216069, "learning_rate": 1.0476190476190477e-05, "loss": 0.909, "step": 176 }, { "epoch": 0.015848676478818064, "grad_norm": 1.3363798807219291, "learning_rate": 1.0535714285714287e-05, "loss": 0.905, "step": 177 }, { "epoch": 0.015938217023896133, "grad_norm": 1.4593971998537487, "learning_rate": 1.0595238095238096e-05, "loss": 0.938, "step": 178 }, { "epoch": 0.0160277575689742, "grad_norm": 1.445421099859946, "learning_rate": 1.0654761904761905e-05, "loss": 0.9366, "step": 179 }, { "epoch": 0.01611729811405227, "grad_norm": 1.3614058430879477, "learning_rate": 1.0714285714285714e-05, "loss": 1.0041, "step": 180 }, { "epoch": 0.01620683865913034, "grad_norm": 1.5686440239591053, "learning_rate": 1.0773809523809525e-05, "loss": 0.9215, "step": 181 }, { "epoch": 0.016296379204208407, "grad_norm": 1.2934528124033098, "learning_rate": 1.0833333333333334e-05, "loss": 0.9991, "step": 182 }, { "epoch": 0.016385919749286475, "grad_norm": 1.2021664875612033, "learning_rate": 1.0892857142857142e-05, "loss": 0.9283, "step": 183 }, { "epoch": 0.01647546029436454, "grad_norm": 1.3767762471746128, "learning_rate": 1.0952380952380955e-05, "loss": 0.9849, "step": 184 }, { "epoch": 0.01656500083944261, "grad_norm": 1.2164130350107607, "learning_rate": 1.1011904761904764e-05, "loss": 0.9246, "step": 185 }, { "epoch": 0.016654541384520678, "grad_norm": 1.368032712333836, "learning_rate": 1.1071428571428572e-05, "loss": 0.9453, "step": 186 }, { "epoch": 0.016744081929598746, "grad_norm": 1.3469222988219791, "learning_rate": 1.1130952380952383e-05, "loss": 1.0545, "step": 187 }, { "epoch": 0.016833622474676815, "grad_norm": 1.8300489265056759, "learning_rate": 1.1190476190476192e-05, "loss": 0.8654, "step": 188 }, { "epoch": 0.016923163019754883, "grad_norm": 1.3505891905752436, "learning_rate": 1.125e-05, "loss": 0.9516, "step": 189 }, { "epoch": 0.017012703564832952, "grad_norm": 1.4346732544846332, "learning_rate": 1.130952380952381e-05, "loss": 0.9323, "step": 190 }, { "epoch": 0.01710224410991102, "grad_norm": 1.4145336071053909, "learning_rate": 1.136904761904762e-05, "loss": 0.9982, "step": 191 }, { "epoch": 0.01719178465498909, "grad_norm": 1.3729621826328517, "learning_rate": 1.1428571428571429e-05, "loss": 0.9534, "step": 192 }, { "epoch": 0.017281325200067154, "grad_norm": 1.3545358955770777, "learning_rate": 1.1488095238095238e-05, "loss": 0.9654, "step": 193 }, { "epoch": 0.017370865745145223, "grad_norm": 1.2958480407714497, "learning_rate": 1.1547619047619047e-05, "loss": 0.9709, "step": 194 }, { "epoch": 0.01746040629022329, "grad_norm": 1.417419616391308, "learning_rate": 1.1607142857142859e-05, "loss": 0.9933, "step": 195 }, { "epoch": 0.01754994683530136, "grad_norm": 1.2187334395358913, "learning_rate": 1.1666666666666668e-05, "loss": 0.9824, "step": 196 }, { "epoch": 0.017639487380379428, "grad_norm": 1.382354810818814, "learning_rate": 1.1726190476190478e-05, "loss": 0.9633, "step": 197 }, { "epoch": 0.017729027925457497, "grad_norm": 1.4095044078409356, "learning_rate": 1.1785714285714287e-05, "loss": 0.947, "step": 198 }, { "epoch": 0.017818568470535565, "grad_norm": 1.3356052977273825, "learning_rate": 1.1845238095238096e-05, "loss": 0.994, "step": 199 }, { "epoch": 0.017908109015613634, "grad_norm": 1.4655877002540365, "learning_rate": 1.1904761904761905e-05, "loss": 0.9436, "step": 200 }, { "epoch": 0.017997649560691702, "grad_norm": 1.488756592776806, "learning_rate": 1.1964285714285716e-05, "loss": 0.9966, "step": 201 }, { "epoch": 0.018087190105769767, "grad_norm": 1.3525701096076792, "learning_rate": 1.2023809523809525e-05, "loss": 0.9324, "step": 202 }, { "epoch": 0.018176730650847836, "grad_norm": 1.4121435139307525, "learning_rate": 1.2083333333333333e-05, "loss": 0.9448, "step": 203 }, { "epoch": 0.018266271195925905, "grad_norm": 1.3258960013505587, "learning_rate": 1.2142857142857142e-05, "loss": 1.0262, "step": 204 }, { "epoch": 0.018355811741003973, "grad_norm": 1.1839792345048852, "learning_rate": 1.2202380952380955e-05, "loss": 0.8914, "step": 205 }, { "epoch": 0.01844535228608204, "grad_norm": 1.3564771867906358, "learning_rate": 1.2261904761904763e-05, "loss": 0.8948, "step": 206 }, { "epoch": 0.01853489283116011, "grad_norm": 1.6777434640215823, "learning_rate": 1.2321428571428572e-05, "loss": 0.9361, "step": 207 }, { "epoch": 0.01862443337623818, "grad_norm": 1.2414949636325754, "learning_rate": 1.2380952380952383e-05, "loss": 0.981, "step": 208 }, { "epoch": 0.018713973921316247, "grad_norm": 1.350636107950117, "learning_rate": 1.2440476190476192e-05, "loss": 0.9689, "step": 209 }, { "epoch": 0.018803514466394316, "grad_norm": 1.33427149141748, "learning_rate": 1.25e-05, "loss": 0.9206, "step": 210 }, { "epoch": 0.01889305501147238, "grad_norm": 1.1998127387502966, "learning_rate": 1.2559523809523811e-05, "loss": 0.9281, "step": 211 }, { "epoch": 0.01898259555655045, "grad_norm": 1.3476198056947557, "learning_rate": 1.261904761904762e-05, "loss": 0.934, "step": 212 }, { "epoch": 0.019072136101628518, "grad_norm": 1.3741871404916675, "learning_rate": 1.2678571428571429e-05, "loss": 1.0093, "step": 213 }, { "epoch": 0.019161676646706587, "grad_norm": 1.2270861175633545, "learning_rate": 1.2738095238095238e-05, "loss": 0.9155, "step": 214 }, { "epoch": 0.019251217191784655, "grad_norm": 1.2452907840956695, "learning_rate": 1.2797619047619048e-05, "loss": 0.9528, "step": 215 }, { "epoch": 0.019340757736862724, "grad_norm": 1.3856294183070534, "learning_rate": 1.2857142857142859e-05, "loss": 0.957, "step": 216 }, { "epoch": 0.019430298281940792, "grad_norm": 1.4644668358733208, "learning_rate": 1.2916666666666668e-05, "loss": 0.9731, "step": 217 }, { "epoch": 0.01951983882701886, "grad_norm": 1.2869545198770278, "learning_rate": 1.2976190476190478e-05, "loss": 0.9313, "step": 218 }, { "epoch": 0.01960937937209693, "grad_norm": 1.233394188597046, "learning_rate": 1.3035714285714287e-05, "loss": 0.9386, "step": 219 }, { "epoch": 0.019698919917174994, "grad_norm": 1.1779294605283495, "learning_rate": 1.3095238095238096e-05, "loss": 0.8988, "step": 220 }, { "epoch": 0.019788460462253063, "grad_norm": 1.3649114963818465, "learning_rate": 1.3154761904761905e-05, "loss": 0.9694, "step": 221 }, { "epoch": 0.01987800100733113, "grad_norm": 1.2929423505249116, "learning_rate": 1.3214285714285716e-05, "loss": 0.9131, "step": 222 }, { "epoch": 0.0199675415524092, "grad_norm": 1.2651829321222632, "learning_rate": 1.3273809523809524e-05, "loss": 1.0312, "step": 223 }, { "epoch": 0.02005708209748727, "grad_norm": 1.4679051518959685, "learning_rate": 1.3333333333333333e-05, "loss": 0.9725, "step": 224 }, { "epoch": 0.020146622642565337, "grad_norm": 1.2523234262550003, "learning_rate": 1.3392857142857142e-05, "loss": 0.9389, "step": 225 }, { "epoch": 0.020236163187643406, "grad_norm": 1.2768673189458624, "learning_rate": 1.3452380952380954e-05, "loss": 0.9746, "step": 226 }, { "epoch": 0.020325703732721474, "grad_norm": 1.3282583630031086, "learning_rate": 1.3511904761904763e-05, "loss": 0.9726, "step": 227 }, { "epoch": 0.02041524427779954, "grad_norm": 1.245528315258455, "learning_rate": 1.3571428571428574e-05, "loss": 0.9756, "step": 228 }, { "epoch": 0.020504784822877608, "grad_norm": 1.2482999509375599, "learning_rate": 1.3630952380952383e-05, "loss": 0.9426, "step": 229 }, { "epoch": 0.020594325367955676, "grad_norm": 1.2269661387783157, "learning_rate": 1.3690476190476192e-05, "loss": 0.8852, "step": 230 }, { "epoch": 0.020683865913033745, "grad_norm": 1.2918128081032816, "learning_rate": 1.375e-05, "loss": 0.8946, "step": 231 }, { "epoch": 0.020773406458111814, "grad_norm": 1.294616303481284, "learning_rate": 1.3809523809523811e-05, "loss": 0.9958, "step": 232 }, { "epoch": 0.020862947003189882, "grad_norm": 1.3451743251615973, "learning_rate": 1.386904761904762e-05, "loss": 0.9277, "step": 233 }, { "epoch": 0.02095248754826795, "grad_norm": 1.3030767633167066, "learning_rate": 1.3928571428571429e-05, "loss": 0.9886, "step": 234 }, { "epoch": 0.02104202809334602, "grad_norm": 1.5956175984393084, "learning_rate": 1.3988095238095238e-05, "loss": 0.9578, "step": 235 }, { "epoch": 0.021131568638424088, "grad_norm": 1.6042283010809089, "learning_rate": 1.4047619047619048e-05, "loss": 0.9404, "step": 236 }, { "epoch": 0.021221109183502153, "grad_norm": 1.4714817421979074, "learning_rate": 1.4107142857142859e-05, "loss": 0.906, "step": 237 }, { "epoch": 0.02131064972858022, "grad_norm": 1.1734285774248108, "learning_rate": 1.416666666666667e-05, "loss": 0.938, "step": 238 }, { "epoch": 0.02140019027365829, "grad_norm": 1.2504049411129798, "learning_rate": 1.4226190476190478e-05, "loss": 0.964, "step": 239 }, { "epoch": 0.02148973081873636, "grad_norm": 1.348650134376146, "learning_rate": 1.4285714285714287e-05, "loss": 0.9242, "step": 240 }, { "epoch": 0.021579271363814427, "grad_norm": 1.125410541920277, "learning_rate": 1.4345238095238096e-05, "loss": 0.9829, "step": 241 }, { "epoch": 0.021668811908892496, "grad_norm": 1.2425939596990612, "learning_rate": 1.4404761904761907e-05, "loss": 0.9329, "step": 242 }, { "epoch": 0.021758352453970564, "grad_norm": 1.2947312519312828, "learning_rate": 1.4464285714285715e-05, "loss": 0.9172, "step": 243 }, { "epoch": 0.021847892999048633, "grad_norm": 1.2421461784241594, "learning_rate": 1.4523809523809524e-05, "loss": 0.938, "step": 244 }, { "epoch": 0.0219374335441267, "grad_norm": 1.2572211370448974, "learning_rate": 1.4583333333333333e-05, "loss": 0.9676, "step": 245 }, { "epoch": 0.022026974089204766, "grad_norm": 1.254642676133443, "learning_rate": 1.4642857142857144e-05, "loss": 0.9531, "step": 246 }, { "epoch": 0.022116514634282835, "grad_norm": 1.3851200771479248, "learning_rate": 1.4702380952380954e-05, "loss": 1.0124, "step": 247 }, { "epoch": 0.022206055179360903, "grad_norm": 1.4017617822521042, "learning_rate": 1.4761904761904763e-05, "loss": 0.8903, "step": 248 }, { "epoch": 0.022295595724438972, "grad_norm": 1.1560384690623424, "learning_rate": 1.4821428571428574e-05, "loss": 0.9086, "step": 249 }, { "epoch": 0.02238513626951704, "grad_norm": 1.4586151690902063, "learning_rate": 1.4880952380952383e-05, "loss": 0.9175, "step": 250 }, { "epoch": 0.02247467681459511, "grad_norm": 1.2338910970784036, "learning_rate": 1.4940476190476192e-05, "loss": 0.9879, "step": 251 }, { "epoch": 0.022564217359673178, "grad_norm": 1.3922914075241761, "learning_rate": 1.5000000000000002e-05, "loss": 0.9513, "step": 252 }, { "epoch": 0.022653757904751246, "grad_norm": 1.3300603379010327, "learning_rate": 1.5059523809523811e-05, "loss": 1.0006, "step": 253 }, { "epoch": 0.022743298449829315, "grad_norm": 1.5273512053589549, "learning_rate": 1.511904761904762e-05, "loss": 0.9738, "step": 254 }, { "epoch": 0.02283283899490738, "grad_norm": 1.396678207050215, "learning_rate": 1.5178571428571429e-05, "loss": 0.8837, "step": 255 }, { "epoch": 0.02292237953998545, "grad_norm": 1.2861620665630484, "learning_rate": 1.523809523809524e-05, "loss": 0.9543, "step": 256 }, { "epoch": 0.023011920085063517, "grad_norm": 1.4691272216286264, "learning_rate": 1.5297619047619046e-05, "loss": 0.9201, "step": 257 }, { "epoch": 0.023101460630141585, "grad_norm": 1.2754063399808757, "learning_rate": 1.535714285714286e-05, "loss": 0.9442, "step": 258 }, { "epoch": 0.023191001175219654, "grad_norm": 1.3005351288923879, "learning_rate": 1.5416666666666668e-05, "loss": 1.001, "step": 259 }, { "epoch": 0.023280541720297723, "grad_norm": 1.2515896550362557, "learning_rate": 1.5476190476190476e-05, "loss": 0.9559, "step": 260 }, { "epoch": 0.02337008226537579, "grad_norm": 1.237838957465389, "learning_rate": 1.553571428571429e-05, "loss": 0.9448, "step": 261 }, { "epoch": 0.02345962281045386, "grad_norm": 1.2897409321131819, "learning_rate": 1.5595238095238098e-05, "loss": 0.9845, "step": 262 }, { "epoch": 0.023549163355531928, "grad_norm": 1.17906683487718, "learning_rate": 1.5654761904761906e-05, "loss": 1.0076, "step": 263 }, { "epoch": 0.023638703900609993, "grad_norm": 1.2490508619903085, "learning_rate": 1.5714285714285715e-05, "loss": 0.9666, "step": 264 }, { "epoch": 0.023728244445688062, "grad_norm": 1.223240675225147, "learning_rate": 1.5773809523809524e-05, "loss": 0.9171, "step": 265 }, { "epoch": 0.02381778499076613, "grad_norm": 1.1985738977030302, "learning_rate": 1.5833333333333333e-05, "loss": 0.9431, "step": 266 }, { "epoch": 0.0239073255358442, "grad_norm": 1.3688562148592254, "learning_rate": 1.5892857142857142e-05, "loss": 0.9309, "step": 267 }, { "epoch": 0.023996866080922268, "grad_norm": 1.3354654095343694, "learning_rate": 1.5952380952380954e-05, "loss": 0.9838, "step": 268 }, { "epoch": 0.024086406626000336, "grad_norm": 1.3708472732213348, "learning_rate": 1.6011904761904763e-05, "loss": 1.0132, "step": 269 }, { "epoch": 0.024175947171078405, "grad_norm": 1.268922384284169, "learning_rate": 1.6071428571428572e-05, "loss": 0.9706, "step": 270 }, { "epoch": 0.024265487716156473, "grad_norm": 1.2617017544944156, "learning_rate": 1.6130952380952384e-05, "loss": 0.8745, "step": 271 }, { "epoch": 0.02435502826123454, "grad_norm": 1.139790716312867, "learning_rate": 1.6190476190476193e-05, "loss": 0.9588, "step": 272 }, { "epoch": 0.024444568806312607, "grad_norm": 1.1818596979066731, "learning_rate": 1.6250000000000002e-05, "loss": 0.9524, "step": 273 }, { "epoch": 0.024534109351390675, "grad_norm": 1.2738117528692605, "learning_rate": 1.630952380952381e-05, "loss": 0.9619, "step": 274 }, { "epoch": 0.024623649896468744, "grad_norm": 1.1467449114397317, "learning_rate": 1.636904761904762e-05, "loss": 0.9172, "step": 275 }, { "epoch": 0.024713190441546812, "grad_norm": 1.2632158063398984, "learning_rate": 1.642857142857143e-05, "loss": 0.992, "step": 276 }, { "epoch": 0.02480273098662488, "grad_norm": 1.2015540600116177, "learning_rate": 1.6488095238095237e-05, "loss": 0.9638, "step": 277 }, { "epoch": 0.02489227153170295, "grad_norm": 1.6194864119335934, "learning_rate": 1.6547619047619046e-05, "loss": 0.9563, "step": 278 }, { "epoch": 0.024981812076781018, "grad_norm": 1.317110222923093, "learning_rate": 1.660714285714286e-05, "loss": 0.9467, "step": 279 }, { "epoch": 0.025071352621859087, "grad_norm": 1.2084077377335762, "learning_rate": 1.6666666666666667e-05, "loss": 0.9559, "step": 280 }, { "epoch": 0.025160893166937155, "grad_norm": 1.2177578314772777, "learning_rate": 1.672619047619048e-05, "loss": 0.9208, "step": 281 }, { "epoch": 0.02525043371201522, "grad_norm": 1.3396968157893492, "learning_rate": 1.678571428571429e-05, "loss": 0.929, "step": 282 }, { "epoch": 0.02533997425709329, "grad_norm": 1.2709315475510545, "learning_rate": 1.6845238095238097e-05, "loss": 0.9717, "step": 283 }, { "epoch": 0.025429514802171357, "grad_norm": 1.1736756823003907, "learning_rate": 1.6904761904761906e-05, "loss": 0.9442, "step": 284 }, { "epoch": 0.025519055347249426, "grad_norm": 1.2819378880729473, "learning_rate": 1.6964285714285715e-05, "loss": 0.9784, "step": 285 }, { "epoch": 0.025608595892327495, "grad_norm": 1.2072524557125448, "learning_rate": 1.7023809523809524e-05, "loss": 0.9367, "step": 286 }, { "epoch": 0.025698136437405563, "grad_norm": 1.1931206254308466, "learning_rate": 1.7083333333333333e-05, "loss": 0.928, "step": 287 }, { "epoch": 0.02578767698248363, "grad_norm": 1.2451944086669584, "learning_rate": 1.7142857142857142e-05, "loss": 0.9856, "step": 288 }, { "epoch": 0.0258772175275617, "grad_norm": 1.186333557285608, "learning_rate": 1.7202380952380954e-05, "loss": 0.9207, "step": 289 }, { "epoch": 0.02596675807263977, "grad_norm": 1.2859414579975315, "learning_rate": 1.7261904761904763e-05, "loss": 1.0336, "step": 290 }, { "epoch": 0.026056298617717834, "grad_norm": 1.2692773660377794, "learning_rate": 1.7321428571428572e-05, "loss": 0.9622, "step": 291 }, { "epoch": 0.026145839162795902, "grad_norm": 1.2009944656927962, "learning_rate": 1.7380952380952384e-05, "loss": 0.8634, "step": 292 }, { "epoch": 0.02623537970787397, "grad_norm": 1.192861316510647, "learning_rate": 1.7440476190476193e-05, "loss": 0.9012, "step": 293 }, { "epoch": 0.02632492025295204, "grad_norm": 1.2344980831774923, "learning_rate": 1.7500000000000002e-05, "loss": 0.9481, "step": 294 }, { "epoch": 0.026414460798030108, "grad_norm": 1.3273083647356205, "learning_rate": 1.755952380952381e-05, "loss": 0.9185, "step": 295 }, { "epoch": 0.026504001343108177, "grad_norm": 1.2967623142399103, "learning_rate": 1.761904761904762e-05, "loss": 0.939, "step": 296 }, { "epoch": 0.026593541888186245, "grad_norm": 1.2671348071058701, "learning_rate": 1.767857142857143e-05, "loss": 0.9737, "step": 297 }, { "epoch": 0.026683082433264314, "grad_norm": 1.4884601300030078, "learning_rate": 1.7738095238095237e-05, "loss": 0.9058, "step": 298 }, { "epoch": 0.026772622978342382, "grad_norm": 1.3585149324752888, "learning_rate": 1.779761904761905e-05, "loss": 0.9579, "step": 299 }, { "epoch": 0.026862163523420447, "grad_norm": 1.251343435632289, "learning_rate": 1.785714285714286e-05, "loss": 0.9218, "step": 300 }, { "epoch": 0.026951704068498516, "grad_norm": 1.1879850640137528, "learning_rate": 1.7916666666666667e-05, "loss": 0.8826, "step": 301 }, { "epoch": 0.027041244613576584, "grad_norm": 1.1686705636610717, "learning_rate": 1.797619047619048e-05, "loss": 0.8943, "step": 302 }, { "epoch": 0.027130785158654653, "grad_norm": 1.3269898001103417, "learning_rate": 1.803571428571429e-05, "loss": 0.9295, "step": 303 }, { "epoch": 0.02722032570373272, "grad_norm": 1.188519023547051, "learning_rate": 1.8095238095238097e-05, "loss": 0.9809, "step": 304 }, { "epoch": 0.02730986624881079, "grad_norm": 1.3206352421494414, "learning_rate": 1.8154761904761906e-05, "loss": 0.9565, "step": 305 }, { "epoch": 0.02739940679388886, "grad_norm": 1.2560526345765262, "learning_rate": 1.8214285714285715e-05, "loss": 0.9757, "step": 306 }, { "epoch": 0.027488947338966927, "grad_norm": 1.4476339312774624, "learning_rate": 1.8273809523809524e-05, "loss": 0.9555, "step": 307 }, { "epoch": 0.027578487884044996, "grad_norm": 1.3209041933174963, "learning_rate": 1.8333333333333333e-05, "loss": 0.893, "step": 308 }, { "epoch": 0.02766802842912306, "grad_norm": 1.278921184328932, "learning_rate": 1.8392857142857142e-05, "loss": 0.9433, "step": 309 }, { "epoch": 0.02775756897420113, "grad_norm": 1.238903595474599, "learning_rate": 1.8452380952380954e-05, "loss": 0.9346, "step": 310 }, { "epoch": 0.027847109519279198, "grad_norm": 1.2846775825747934, "learning_rate": 1.8511904761904763e-05, "loss": 0.9268, "step": 311 }, { "epoch": 0.027936650064357266, "grad_norm": 1.1542758792293397, "learning_rate": 1.8571428571428575e-05, "loss": 0.9662, "step": 312 }, { "epoch": 0.028026190609435335, "grad_norm": 1.2940399503698272, "learning_rate": 1.8630952380952384e-05, "loss": 0.9071, "step": 313 }, { "epoch": 0.028115731154513404, "grad_norm": 1.1780693686095078, "learning_rate": 1.8690476190476193e-05, "loss": 0.8983, "step": 314 }, { "epoch": 0.028205271699591472, "grad_norm": 1.2731633775610305, "learning_rate": 1.8750000000000002e-05, "loss": 0.9516, "step": 315 }, { "epoch": 0.02829481224466954, "grad_norm": 1.337023007007858, "learning_rate": 1.880952380952381e-05, "loss": 0.9505, "step": 316 }, { "epoch": 0.02838435278974761, "grad_norm": 1.2936786958242825, "learning_rate": 1.886904761904762e-05, "loss": 0.9611, "step": 317 }, { "epoch": 0.028473893334825674, "grad_norm": 1.3612811004646033, "learning_rate": 1.892857142857143e-05, "loss": 0.9937, "step": 318 }, { "epoch": 0.028563433879903743, "grad_norm": 1.1658265192952217, "learning_rate": 1.8988095238095237e-05, "loss": 0.9922, "step": 319 }, { "epoch": 0.02865297442498181, "grad_norm": 1.2044117714956288, "learning_rate": 1.904761904761905e-05, "loss": 0.8805, "step": 320 }, { "epoch": 0.02874251497005988, "grad_norm": 1.2259452468459933, "learning_rate": 1.910714285714286e-05, "loss": 0.9154, "step": 321 }, { "epoch": 0.02883205551513795, "grad_norm": 1.1179903436614513, "learning_rate": 1.916666666666667e-05, "loss": 0.941, "step": 322 }, { "epoch": 0.028921596060216017, "grad_norm": 1.3082458991850165, "learning_rate": 1.922619047619048e-05, "loss": 0.8364, "step": 323 }, { "epoch": 0.029011136605294086, "grad_norm": 1.103997232008849, "learning_rate": 1.928571428571429e-05, "loss": 0.9284, "step": 324 }, { "epoch": 0.029100677150372154, "grad_norm": 1.2402651208124122, "learning_rate": 1.9345238095238097e-05, "loss": 0.9415, "step": 325 }, { "epoch": 0.029190217695450223, "grad_norm": 1.1946287678518863, "learning_rate": 1.9404761904761906e-05, "loss": 0.861, "step": 326 }, { "epoch": 0.029279758240528288, "grad_norm": 1.2913893706035076, "learning_rate": 1.9464285714285715e-05, "loss": 0.945, "step": 327 }, { "epoch": 0.029369298785606356, "grad_norm": 1.2357971553362492, "learning_rate": 1.9523809523809524e-05, "loss": 0.9335, "step": 328 }, { "epoch": 0.029458839330684425, "grad_norm": 1.3450287990312693, "learning_rate": 1.9583333333333333e-05, "loss": 0.9363, "step": 329 }, { "epoch": 0.029548379875762493, "grad_norm": 1.3454130192674454, "learning_rate": 1.9642857142857145e-05, "loss": 0.903, "step": 330 }, { "epoch": 0.029637920420840562, "grad_norm": 1.3951306355796904, "learning_rate": 1.9702380952380954e-05, "loss": 0.9393, "step": 331 }, { "epoch": 0.02972746096591863, "grad_norm": 1.2117680774838118, "learning_rate": 1.9761904761904763e-05, "loss": 0.9326, "step": 332 }, { "epoch": 0.0298170015109967, "grad_norm": 1.2228522050442563, "learning_rate": 1.9821428571428575e-05, "loss": 0.9054, "step": 333 }, { "epoch": 0.029906542056074768, "grad_norm": 1.1940677033805458, "learning_rate": 1.9880952380952384e-05, "loss": 1.0374, "step": 334 }, { "epoch": 0.029996082601152836, "grad_norm": 1.445666161448832, "learning_rate": 1.9940476190476193e-05, "loss": 0.8591, "step": 335 }, { "epoch": 0.0300856231462309, "grad_norm": 1.175121282559073, "learning_rate": 2e-05, "loss": 0.9034, "step": 336 }, { "epoch": 0.03017516369130897, "grad_norm": 1.1840759609844393, "learning_rate": 1.9999999579416295e-05, "loss": 0.9852, "step": 337 }, { "epoch": 0.03026470423638704, "grad_norm": 1.2972577877784766, "learning_rate": 1.999999831766521e-05, "loss": 0.9217, "step": 338 }, { "epoch": 0.030354244781465107, "grad_norm": 1.3410857496455018, "learning_rate": 1.9999996214746854e-05, "loss": 0.922, "step": 339 }, { "epoch": 0.030443785326543175, "grad_norm": 1.2858331779234244, "learning_rate": 1.9999993270661405e-05, "loss": 0.9902, "step": 340 }, { "epoch": 0.030533325871621244, "grad_norm": 1.2906061274610103, "learning_rate": 1.9999989485409108e-05, "loss": 0.9497, "step": 341 }, { "epoch": 0.030622866416699313, "grad_norm": 1.3438050934807848, "learning_rate": 1.9999984858990286e-05, "loss": 0.9774, "step": 342 }, { "epoch": 0.03071240696177738, "grad_norm": 1.122789551808367, "learning_rate": 1.9999979391405317e-05, "loss": 0.9693, "step": 343 }, { "epoch": 0.03080194750685545, "grad_norm": 1.2958773216664197, "learning_rate": 1.9999973082654672e-05, "loss": 0.9157, "step": 344 }, { "epoch": 0.030891488051933515, "grad_norm": 1.1076076237587629, "learning_rate": 1.999996593273888e-05, "loss": 0.9138, "step": 345 }, { "epoch": 0.030981028597011583, "grad_norm": 1.274050445689624, "learning_rate": 1.9999957941658542e-05, "loss": 0.9782, "step": 346 }, { "epoch": 0.031070569142089652, "grad_norm": 1.1724707586484096, "learning_rate": 1.9999949109414324e-05, "loss": 0.9915, "step": 347 }, { "epoch": 0.03116010968716772, "grad_norm": 1.2030585442294828, "learning_rate": 1.9999939436006975e-05, "loss": 0.9435, "step": 348 }, { "epoch": 0.03124965023224579, "grad_norm": 1.260596168675043, "learning_rate": 1.9999928921437312e-05, "loss": 0.9314, "step": 349 }, { "epoch": 0.031339190777323854, "grad_norm": 1.130308218986913, "learning_rate": 1.9999917565706212e-05, "loss": 0.9528, "step": 350 }, { "epoch": 0.031428731322401926, "grad_norm": 1.2635135167153189, "learning_rate": 1.999990536881463e-05, "loss": 0.9238, "step": 351 }, { "epoch": 0.03151827186747999, "grad_norm": 1.2419529359399137, "learning_rate": 1.99998923307636e-05, "loss": 0.9508, "step": 352 }, { "epoch": 0.03160781241255806, "grad_norm": 1.3089890933275954, "learning_rate": 1.999987845155421e-05, "loss": 0.9375, "step": 353 }, { "epoch": 0.03169735295763613, "grad_norm": 1.1855978529504216, "learning_rate": 1.9999863731187633e-05, "loss": 1.0427, "step": 354 }, { "epoch": 0.0317868935027142, "grad_norm": 1.24608020740071, "learning_rate": 1.9999848169665106e-05, "loss": 0.9625, "step": 355 }, { "epoch": 0.031876434047792265, "grad_norm": 1.0665556967385843, "learning_rate": 1.9999831766987937e-05, "loss": 0.9446, "step": 356 }, { "epoch": 0.03196597459287034, "grad_norm": 1.2824843635764926, "learning_rate": 1.999981452315751e-05, "loss": 0.8955, "step": 357 }, { "epoch": 0.0320555151379484, "grad_norm": 1.1380499628928542, "learning_rate": 1.9999796438175267e-05, "loss": 0.9579, "step": 358 }, { "epoch": 0.03214505568302647, "grad_norm": 1.1681159772783272, "learning_rate": 1.9999777512042735e-05, "loss": 1.0003, "step": 359 }, { "epoch": 0.03223459622810454, "grad_norm": 1.159237296775598, "learning_rate": 1.99997577447615e-05, "loss": 0.9512, "step": 360 }, { "epoch": 0.032324136773182605, "grad_norm": 1.1051170969030417, "learning_rate": 1.9999737136333238e-05, "loss": 0.9539, "step": 361 }, { "epoch": 0.03241367731826068, "grad_norm": 1.2179667737810091, "learning_rate": 1.9999715686759672e-05, "loss": 0.9592, "step": 362 }, { "epoch": 0.03250321786333874, "grad_norm": 1.3549356438467903, "learning_rate": 1.9999693396042606e-05, "loss": 0.9131, "step": 363 }, { "epoch": 0.032592758408416814, "grad_norm": 1.1247588026228426, "learning_rate": 1.999967026418392e-05, "loss": 0.9621, "step": 364 }, { "epoch": 0.03268229895349488, "grad_norm": 1.110894022484044, "learning_rate": 1.9999646291185556e-05, "loss": 0.8561, "step": 365 }, { "epoch": 0.03277183949857295, "grad_norm": 1.1168106343953137, "learning_rate": 1.9999621477049533e-05, "loss": 0.9586, "step": 366 }, { "epoch": 0.032861380043651016, "grad_norm": 1.3079644833146529, "learning_rate": 1.999959582177794e-05, "loss": 0.8862, "step": 367 }, { "epoch": 0.03295092058872908, "grad_norm": 2.341257425754997, "learning_rate": 1.9999569325372924e-05, "loss": 0.9556, "step": 368 }, { "epoch": 0.03304046113380715, "grad_norm": 1.2118853822143971, "learning_rate": 1.999954198783673e-05, "loss": 0.9515, "step": 369 }, { "epoch": 0.03313000167888522, "grad_norm": 1.2054431047537317, "learning_rate": 1.9999513809171645e-05, "loss": 0.9037, "step": 370 }, { "epoch": 0.03321954222396329, "grad_norm": 1.1627706090098924, "learning_rate": 1.9999484789380043e-05, "loss": 0.9678, "step": 371 }, { "epoch": 0.033309082769041355, "grad_norm": 1.317538657224851, "learning_rate": 1.999945492846437e-05, "loss": 0.9004, "step": 372 }, { "epoch": 0.03339862331411943, "grad_norm": 1.0334149458465276, "learning_rate": 1.9999424226427132e-05, "loss": 0.8923, "step": 373 }, { "epoch": 0.03348816385919749, "grad_norm": 1.1182486507943536, "learning_rate": 1.9999392683270913e-05, "loss": 0.9031, "step": 374 }, { "epoch": 0.033577704404275564, "grad_norm": 1.1021130166795021, "learning_rate": 1.9999360298998366e-05, "loss": 0.9194, "step": 375 }, { "epoch": 0.03366724494935363, "grad_norm": 1.1480514714426844, "learning_rate": 1.9999327073612215e-05, "loss": 0.95, "step": 376 }, { "epoch": 0.033756785494431694, "grad_norm": 1.2102737349523482, "learning_rate": 1.999929300711526e-05, "loss": 0.9607, "step": 377 }, { "epoch": 0.033846326039509766, "grad_norm": 1.1460299125442661, "learning_rate": 1.9999258099510358e-05, "loss": 0.959, "step": 378 }, { "epoch": 0.03393586658458783, "grad_norm": 1.340156983358306, "learning_rate": 1.9999222350800447e-05, "loss": 0.9048, "step": 379 }, { "epoch": 0.034025407129665904, "grad_norm": 1.1394376365295866, "learning_rate": 1.999918576098854e-05, "loss": 0.9143, "step": 380 }, { "epoch": 0.03411494767474397, "grad_norm": 1.239318983026435, "learning_rate": 1.999914833007771e-05, "loss": 0.9614, "step": 381 }, { "epoch": 0.03420448821982204, "grad_norm": 1.08302823697214, "learning_rate": 1.999911005807111e-05, "loss": 0.936, "step": 382 }, { "epoch": 0.034294028764900106, "grad_norm": 1.0989866494677702, "learning_rate": 1.999907094497195e-05, "loss": 0.9845, "step": 383 }, { "epoch": 0.03438356930997818, "grad_norm": 1.2556845899582212, "learning_rate": 1.999903099078353e-05, "loss": 0.9983, "step": 384 }, { "epoch": 0.03447310985505624, "grad_norm": 1.1480845483728792, "learning_rate": 1.9998990195509206e-05, "loss": 0.9746, "step": 385 }, { "epoch": 0.03456265040013431, "grad_norm": 1.2305798693471501, "learning_rate": 1.999894855915241e-05, "loss": 0.8903, "step": 386 }, { "epoch": 0.03465219094521238, "grad_norm": 1.125735318482672, "learning_rate": 1.9998906081716645e-05, "loss": 0.9234, "step": 387 }, { "epoch": 0.034741731490290445, "grad_norm": 1.294810748801314, "learning_rate": 1.9998862763205483e-05, "loss": 0.9278, "step": 388 }, { "epoch": 0.03483127203536852, "grad_norm": 1.1759301483100835, "learning_rate": 1.9998818603622575e-05, "loss": 0.9174, "step": 389 }, { "epoch": 0.03492081258044658, "grad_norm": 1.260059086574787, "learning_rate": 1.999877360297162e-05, "loss": 1.0049, "step": 390 }, { "epoch": 0.035010353125524654, "grad_norm": 1.3752136311451377, "learning_rate": 1.999872776125642e-05, "loss": 0.9573, "step": 391 }, { "epoch": 0.03509989367060272, "grad_norm": 1.147360042768058, "learning_rate": 1.9998681078480818e-05, "loss": 0.9478, "step": 392 }, { "epoch": 0.03518943421568079, "grad_norm": 1.2233832039373622, "learning_rate": 1.999863355464875e-05, "loss": 0.9409, "step": 393 }, { "epoch": 0.035278974760758856, "grad_norm": 1.15056592762095, "learning_rate": 1.9998585189764207e-05, "loss": 0.9302, "step": 394 }, { "epoch": 0.03536851530583692, "grad_norm": 1.2384668684871656, "learning_rate": 1.9998535983831263e-05, "loss": 0.9427, "step": 395 }, { "epoch": 0.03545805585091499, "grad_norm": 1.3431795917504665, "learning_rate": 1.9998485936854056e-05, "loss": 0.9102, "step": 396 }, { "epoch": 0.03554759639599306, "grad_norm": 1.0871331326251104, "learning_rate": 1.999843504883679e-05, "loss": 0.8903, "step": 397 }, { "epoch": 0.03563713694107113, "grad_norm": 1.2290120067211536, "learning_rate": 1.9998383319783752e-05, "loss": 0.9418, "step": 398 }, { "epoch": 0.035726677486149196, "grad_norm": 1.1409857583770284, "learning_rate": 1.9998330749699287e-05, "loss": 0.9443, "step": 399 }, { "epoch": 0.03581621803122727, "grad_norm": 1.0803116597017157, "learning_rate": 1.9998277338587826e-05, "loss": 0.9996, "step": 400 }, { "epoch": 0.03590575857630533, "grad_norm": 1.2605290135576785, "learning_rate": 1.9998223086453855e-05, "loss": 0.9771, "step": 401 }, { "epoch": 0.035995299121383405, "grad_norm": 1.1815088737776223, "learning_rate": 1.9998167993301938e-05, "loss": 0.9321, "step": 402 }, { "epoch": 0.03608483966646147, "grad_norm": 1.17998901759651, "learning_rate": 1.999811205913671e-05, "loss": 0.883, "step": 403 }, { "epoch": 0.036174380211539535, "grad_norm": 1.116737020520798, "learning_rate": 1.999805528396288e-05, "loss": 0.918, "step": 404 }, { "epoch": 0.03626392075661761, "grad_norm": 1.2038238201377083, "learning_rate": 1.999799766778522e-05, "loss": 1.0135, "step": 405 }, { "epoch": 0.03635346130169567, "grad_norm": 1.2055240672593202, "learning_rate": 1.9997939210608573e-05, "loss": 0.9373, "step": 406 }, { "epoch": 0.036443001846773744, "grad_norm": 1.138475667606657, "learning_rate": 1.999787991243786e-05, "loss": 0.9546, "step": 407 }, { "epoch": 0.03653254239185181, "grad_norm": 1.1026782481638375, "learning_rate": 1.9997819773278074e-05, "loss": 0.9716, "step": 408 }, { "epoch": 0.03662208293692988, "grad_norm": 1.18258582282077, "learning_rate": 1.9997758793134264e-05, "loss": 0.936, "step": 409 }, { "epoch": 0.036711623482007946, "grad_norm": 1.2339445059329697, "learning_rate": 1.9997696972011563e-05, "loss": 0.9309, "step": 410 }, { "epoch": 0.03680116402708602, "grad_norm": 1.1089366234467675, "learning_rate": 1.9997634309915175e-05, "loss": 0.9657, "step": 411 }, { "epoch": 0.03689070457216408, "grad_norm": 1.2797706811043954, "learning_rate": 1.999757080685037e-05, "loss": 0.9765, "step": 412 }, { "epoch": 0.03698024511724215, "grad_norm": 1.0922420262255677, "learning_rate": 1.9997506462822485e-05, "loss": 0.9832, "step": 413 }, { "epoch": 0.03706978566232022, "grad_norm": 1.3469308175814345, "learning_rate": 1.9997441277836935e-05, "loss": 0.9861, "step": 414 }, { "epoch": 0.037159326207398286, "grad_norm": 1.2351600276828343, "learning_rate": 1.9997375251899204e-05, "loss": 0.9614, "step": 415 }, { "epoch": 0.03724886675247636, "grad_norm": 1.130633680767699, "learning_rate": 1.9997308385014843e-05, "loss": 0.9451, "step": 416 }, { "epoch": 0.03733840729755442, "grad_norm": 1.3925859902896762, "learning_rate": 1.9997240677189484e-05, "loss": 1.0413, "step": 417 }, { "epoch": 0.037427947842632495, "grad_norm": 1.123328941203412, "learning_rate": 1.9997172128428815e-05, "loss": 0.9286, "step": 418 }, { "epoch": 0.03751748838771056, "grad_norm": 1.2204541169927308, "learning_rate": 1.9997102738738607e-05, "loss": 0.952, "step": 419 }, { "epoch": 0.03760702893278863, "grad_norm": 1.3518948043649528, "learning_rate": 1.9997032508124687e-05, "loss": 0.9437, "step": 420 }, { "epoch": 0.0376965694778667, "grad_norm": 1.2385395750726995, "learning_rate": 1.9996961436592977e-05, "loss": 0.9578, "step": 421 }, { "epoch": 0.03778611002294476, "grad_norm": 1.1959920011410816, "learning_rate": 1.9996889524149444e-05, "loss": 0.9175, "step": 422 }, { "epoch": 0.037875650568022834, "grad_norm": 1.2159878915973947, "learning_rate": 1.9996816770800143e-05, "loss": 0.9322, "step": 423 }, { "epoch": 0.0379651911131009, "grad_norm": 1.0299544045858229, "learning_rate": 1.999674317655119e-05, "loss": 0.8602, "step": 424 }, { "epoch": 0.03805473165817897, "grad_norm": 1.113403302132828, "learning_rate": 1.999666874140878e-05, "loss": 0.9484, "step": 425 }, { "epoch": 0.038144272203257036, "grad_norm": 1.2025517736216589, "learning_rate": 1.9996593465379168e-05, "loss": 0.9388, "step": 426 }, { "epoch": 0.03823381274833511, "grad_norm": 1.138031720481696, "learning_rate": 1.9996517348468694e-05, "loss": 0.943, "step": 427 }, { "epoch": 0.03832335329341317, "grad_norm": 1.1108921533296434, "learning_rate": 1.9996440390683752e-05, "loss": 0.979, "step": 428 }, { "epoch": 0.038412893838491245, "grad_norm": 1.024459313852225, "learning_rate": 1.9996362592030822e-05, "loss": 0.9139, "step": 429 }, { "epoch": 0.03850243438356931, "grad_norm": 1.1076151056767962, "learning_rate": 1.9996283952516448e-05, "loss": 0.9639, "step": 430 }, { "epoch": 0.038591974928647375, "grad_norm": 1.0879548490443949, "learning_rate": 1.9996204472147238e-05, "loss": 0.9499, "step": 431 }, { "epoch": 0.03868151547372545, "grad_norm": 1.1733192906965282, "learning_rate": 1.9996124150929886e-05, "loss": 0.9239, "step": 432 }, { "epoch": 0.03877105601880351, "grad_norm": 1.241422221144721, "learning_rate": 1.9996042988871146e-05, "loss": 0.8901, "step": 433 }, { "epoch": 0.038860596563881585, "grad_norm": 1.1091807248142462, "learning_rate": 1.9995960985977844e-05, "loss": 0.9268, "step": 434 }, { "epoch": 0.03895013710895965, "grad_norm": 1.2636147143991028, "learning_rate": 1.9995878142256872e-05, "loss": 0.9633, "step": 435 }, { "epoch": 0.03903967765403772, "grad_norm": 1.0162106523467713, "learning_rate": 1.9995794457715208e-05, "loss": 0.9258, "step": 436 }, { "epoch": 0.03912921819911579, "grad_norm": 1.098656229656379, "learning_rate": 1.999570993235989e-05, "loss": 0.8883, "step": 437 }, { "epoch": 0.03921875874419386, "grad_norm": 1.0691834365895647, "learning_rate": 1.9995624566198023e-05, "loss": 0.8965, "step": 438 }, { "epoch": 0.039308299289271924, "grad_norm": 1.1137901476993821, "learning_rate": 1.999553835923679e-05, "loss": 0.9311, "step": 439 }, { "epoch": 0.03939783983434999, "grad_norm": 1.3594161594027432, "learning_rate": 1.9995451311483442e-05, "loss": 0.9408, "step": 440 }, { "epoch": 0.03948738037942806, "grad_norm": 1.103002573666054, "learning_rate": 1.9995363422945303e-05, "loss": 0.9071, "step": 441 }, { "epoch": 0.039576920924506126, "grad_norm": 1.0808630142789442, "learning_rate": 1.9995274693629765e-05, "loss": 0.9343, "step": 442 }, { "epoch": 0.0396664614695842, "grad_norm": 1.193375569151024, "learning_rate": 1.9995185123544292e-05, "loss": 0.9647, "step": 443 }, { "epoch": 0.03975600201466226, "grad_norm": 1.0958279624064393, "learning_rate": 1.9995094712696413e-05, "loss": 0.9862, "step": 444 }, { "epoch": 0.039845542559740335, "grad_norm": 1.0094831712069403, "learning_rate": 1.9995003461093744e-05, "loss": 0.9528, "step": 445 }, { "epoch": 0.0399350831048184, "grad_norm": 1.0360560736768474, "learning_rate": 1.9994911368743953e-05, "loss": 0.9518, "step": 446 }, { "epoch": 0.04002462364989647, "grad_norm": 1.0161465901307165, "learning_rate": 1.9994818435654787e-05, "loss": 0.9509, "step": 447 }, { "epoch": 0.04011416419497454, "grad_norm": 1.112758987143763, "learning_rate": 1.9994724661834065e-05, "loss": 0.9917, "step": 448 }, { "epoch": 0.0402037047400526, "grad_norm": 1.1811256418627025, "learning_rate": 1.9994630047289675e-05, "loss": 0.9369, "step": 449 }, { "epoch": 0.040293245285130674, "grad_norm": 1.2142042607316923, "learning_rate": 1.9994534592029575e-05, "loss": 0.8877, "step": 450 }, { "epoch": 0.04038278583020874, "grad_norm": 1.2873978374756583, "learning_rate": 1.9994438296061793e-05, "loss": 0.8962, "step": 451 }, { "epoch": 0.04047232637528681, "grad_norm": 1.1547875572102886, "learning_rate": 1.999434115939443e-05, "loss": 0.904, "step": 452 }, { "epoch": 0.04056186692036488, "grad_norm": 1.2216695070885337, "learning_rate": 1.9994243182035658e-05, "loss": 0.9964, "step": 453 }, { "epoch": 0.04065140746544295, "grad_norm": 1.0418262682528086, "learning_rate": 1.999414436399372e-05, "loss": 0.9159, "step": 454 }, { "epoch": 0.040740948010521014, "grad_norm": 1.0003786758740296, "learning_rate": 1.9994044705276924e-05, "loss": 0.9723, "step": 455 }, { "epoch": 0.04083048855559908, "grad_norm": 1.0678157331913944, "learning_rate": 1.9993944205893654e-05, "loss": 0.929, "step": 456 }, { "epoch": 0.04092002910067715, "grad_norm": 1.0590530498465156, "learning_rate": 1.9993842865852366e-05, "loss": 0.9192, "step": 457 }, { "epoch": 0.041009569645755216, "grad_norm": 1.1364089137580078, "learning_rate": 1.999374068516158e-05, "loss": 0.9362, "step": 458 }, { "epoch": 0.04109911019083329, "grad_norm": 1.150935142690887, "learning_rate": 1.9993637663829898e-05, "loss": 0.9626, "step": 459 }, { "epoch": 0.04118865073591135, "grad_norm": 1.1161011977567339, "learning_rate": 1.9993533801865984e-05, "loss": 0.9452, "step": 460 }, { "epoch": 0.041278191280989425, "grad_norm": 1.0916024650943628, "learning_rate": 1.9993429099278567e-05, "loss": 0.939, "step": 461 }, { "epoch": 0.04136773182606749, "grad_norm": 1.1049388674486635, "learning_rate": 1.9993323556076466e-05, "loss": 0.9046, "step": 462 }, { "epoch": 0.04145727237114556, "grad_norm": 1.1582655424392185, "learning_rate": 1.9993217172268548e-05, "loss": 0.9352, "step": 463 }, { "epoch": 0.04154681291622363, "grad_norm": 1.2105801488656158, "learning_rate": 1.9993109947863768e-05, "loss": 0.9388, "step": 464 }, { "epoch": 0.04163635346130169, "grad_norm": 1.192649949701616, "learning_rate": 1.9993001882871144e-05, "loss": 1.0016, "step": 465 }, { "epoch": 0.041725894006379764, "grad_norm": 1.110099893030217, "learning_rate": 1.9992892977299765e-05, "loss": 0.9141, "step": 466 }, { "epoch": 0.04181543455145783, "grad_norm": 1.210350536116226, "learning_rate": 1.999278323115879e-05, "loss": 0.9627, "step": 467 }, { "epoch": 0.0419049750965359, "grad_norm": 1.1038053999985182, "learning_rate": 1.9992672644457455e-05, "loss": 0.9471, "step": 468 }, { "epoch": 0.041994515641613966, "grad_norm": 1.1373982218123397, "learning_rate": 1.9992561217205064e-05, "loss": 0.9586, "step": 469 }, { "epoch": 0.04208405618669204, "grad_norm": 1.0604068313149375, "learning_rate": 1.9992448949410984e-05, "loss": 0.9341, "step": 470 }, { "epoch": 0.042173596731770104, "grad_norm": 1.034261570588313, "learning_rate": 1.999233584108466e-05, "loss": 0.8946, "step": 471 }, { "epoch": 0.042263137276848176, "grad_norm": 1.0552614129053202, "learning_rate": 1.9992221892235605e-05, "loss": 0.923, "step": 472 }, { "epoch": 0.04235267782192624, "grad_norm": 1.2337679200552256, "learning_rate": 1.999210710287341e-05, "loss": 0.959, "step": 473 }, { "epoch": 0.042442218367004306, "grad_norm": 1.1658435350844465, "learning_rate": 1.9991991473007724e-05, "loss": 0.9643, "step": 474 }, { "epoch": 0.04253175891208238, "grad_norm": 1.069903998029625, "learning_rate": 1.999187500264828e-05, "loss": 0.8971, "step": 475 }, { "epoch": 0.04262129945716044, "grad_norm": 1.2237395659698547, "learning_rate": 1.9991757691804866e-05, "loss": 0.9618, "step": 476 }, { "epoch": 0.042710840002238515, "grad_norm": 1.1178284750387897, "learning_rate": 1.999163954048736e-05, "loss": 0.983, "step": 477 }, { "epoch": 0.04280038054731658, "grad_norm": 1.0711916917599673, "learning_rate": 1.9991520548705695e-05, "loss": 0.9555, "step": 478 }, { "epoch": 0.04288992109239465, "grad_norm": 1.0089883890975022, "learning_rate": 1.999140071646988e-05, "loss": 0.9253, "step": 479 }, { "epoch": 0.04297946163747272, "grad_norm": 0.9530952959533482, "learning_rate": 1.9991280043789992e-05, "loss": 0.9168, "step": 480 }, { "epoch": 0.04306900218255079, "grad_norm": 1.0543192038478133, "learning_rate": 1.9991158530676192e-05, "loss": 0.9382, "step": 481 }, { "epoch": 0.043158542727628854, "grad_norm": 1.0660027262009064, "learning_rate": 1.999103617713869e-05, "loss": 0.9, "step": 482 }, { "epoch": 0.04324808327270692, "grad_norm": 1.0153999928167032, "learning_rate": 1.9990912983187786e-05, "loss": 0.863, "step": 483 }, { "epoch": 0.04333762381778499, "grad_norm": 1.1310027815228523, "learning_rate": 1.9990788948833835e-05, "loss": 0.9149, "step": 484 }, { "epoch": 0.043427164362863056, "grad_norm": 1.145060884011434, "learning_rate": 1.9990664074087278e-05, "loss": 0.901, "step": 485 }, { "epoch": 0.04351670490794113, "grad_norm": 1.0882347830609251, "learning_rate": 1.9990538358958616e-05, "loss": 0.9194, "step": 486 }, { "epoch": 0.04360624545301919, "grad_norm": 1.1157463077907417, "learning_rate": 1.999041180345842e-05, "loss": 0.9135, "step": 487 }, { "epoch": 0.043695785998097265, "grad_norm": 1.1289838198075401, "learning_rate": 1.9990284407597343e-05, "loss": 0.9185, "step": 488 }, { "epoch": 0.04378532654317533, "grad_norm": 1.0537068780705858, "learning_rate": 1.9990156171386092e-05, "loss": 0.9711, "step": 489 }, { "epoch": 0.0438748670882534, "grad_norm": 1.1472070588947672, "learning_rate": 1.9990027094835463e-05, "loss": 0.9498, "step": 490 }, { "epoch": 0.04396440763333147, "grad_norm": 1.2653321553280341, "learning_rate": 1.9989897177956308e-05, "loss": 0.8896, "step": 491 }, { "epoch": 0.04405394817840953, "grad_norm": 1.0291960991493623, "learning_rate": 1.9989766420759554e-05, "loss": 0.9283, "step": 492 }, { "epoch": 0.044143488723487605, "grad_norm": 1.2134748302201406, "learning_rate": 1.9989634823256206e-05, "loss": 0.9518, "step": 493 }, { "epoch": 0.04423302926856567, "grad_norm": 1.1918645895354567, "learning_rate": 1.998950238545733e-05, "loss": 0.8942, "step": 494 }, { "epoch": 0.04432256981364374, "grad_norm": 1.1747911685732364, "learning_rate": 1.9989369107374064e-05, "loss": 0.993, "step": 495 }, { "epoch": 0.04441211035872181, "grad_norm": 1.0404512175154763, "learning_rate": 1.9989234989017622e-05, "loss": 0.8671, "step": 496 }, { "epoch": 0.04450165090379988, "grad_norm": 1.1566238142038705, "learning_rate": 1.9989100030399285e-05, "loss": 0.9272, "step": 497 }, { "epoch": 0.044591191448877944, "grad_norm": 1.3440675808590532, "learning_rate": 1.9988964231530404e-05, "loss": 0.9688, "step": 498 }, { "epoch": 0.044680731993956016, "grad_norm": 1.1060168723939328, "learning_rate": 1.9988827592422404e-05, "loss": 0.8904, "step": 499 }, { "epoch": 0.04477027253903408, "grad_norm": 1.0456481612834965, "learning_rate": 1.9988690113086776e-05, "loss": 0.8563, "step": 500 }, { "epoch": 0.044859813084112146, "grad_norm": 1.0684988092141845, "learning_rate": 1.9988551793535088e-05, "loss": 0.9535, "step": 501 }, { "epoch": 0.04494935362919022, "grad_norm": 1.086717744123546, "learning_rate": 1.998841263377897e-05, "loss": 0.9526, "step": 502 }, { "epoch": 0.04503889417426828, "grad_norm": 1.211186497271187, "learning_rate": 1.9988272633830136e-05, "loss": 0.9543, "step": 503 }, { "epoch": 0.045128434719346355, "grad_norm": 1.6278515277901342, "learning_rate": 1.9988131793700352e-05, "loss": 0.9347, "step": 504 }, { "epoch": 0.04521797526442442, "grad_norm": 1.139406994339983, "learning_rate": 1.998799011340147e-05, "loss": 0.9104, "step": 505 }, { "epoch": 0.04530751580950249, "grad_norm": 1.4280525627471077, "learning_rate": 1.9987847592945412e-05, "loss": 0.9899, "step": 506 }, { "epoch": 0.04539705635458056, "grad_norm": 1.1584947776891752, "learning_rate": 1.9987704232344156e-05, "loss": 0.9458, "step": 507 }, { "epoch": 0.04548659689965863, "grad_norm": 1.1226407021941534, "learning_rate": 1.998756003160977e-05, "loss": 0.9451, "step": 508 }, { "epoch": 0.045576137444736695, "grad_norm": 1.3863609193402193, "learning_rate": 1.998741499075438e-05, "loss": 0.9927, "step": 509 }, { "epoch": 0.04566567798981476, "grad_norm": 1.1968426322900878, "learning_rate": 1.998726910979019e-05, "loss": 0.8881, "step": 510 }, { "epoch": 0.04575521853489283, "grad_norm": 1.08556114964565, "learning_rate": 1.998712238872946e-05, "loss": 0.8783, "step": 511 }, { "epoch": 0.0458447590799709, "grad_norm": 1.1971729440195378, "learning_rate": 1.998697482758455e-05, "loss": 0.9201, "step": 512 }, { "epoch": 0.04593429962504897, "grad_norm": 1.1125170942528069, "learning_rate": 1.998682642636786e-05, "loss": 0.9597, "step": 513 }, { "epoch": 0.046023840170127034, "grad_norm": 1.0745580226459157, "learning_rate": 1.9986677185091868e-05, "loss": 0.897, "step": 514 }, { "epoch": 0.046113380715205106, "grad_norm": 1.157707969839321, "learning_rate": 1.998652710376914e-05, "loss": 0.9256, "step": 515 }, { "epoch": 0.04620292126028317, "grad_norm": 1.062027978298153, "learning_rate": 1.9986376182412296e-05, "loss": 0.9083, "step": 516 }, { "epoch": 0.04629246180536124, "grad_norm": 1.2015544741184867, "learning_rate": 1.9986224421034028e-05, "loss": 0.9583, "step": 517 }, { "epoch": 0.04638200235043931, "grad_norm": 1.3945234898694425, "learning_rate": 1.9986071819647104e-05, "loss": 0.9219, "step": 518 }, { "epoch": 0.04647154289551737, "grad_norm": 1.3935398855835808, "learning_rate": 1.998591837826436e-05, "loss": 0.9184, "step": 519 }, { "epoch": 0.046561083440595445, "grad_norm": 1.1685740438852183, "learning_rate": 1.9985764096898705e-05, "loss": 0.9721, "step": 520 }, { "epoch": 0.04665062398567351, "grad_norm": 1.2563967857846845, "learning_rate": 1.998560897556311e-05, "loss": 1.0154, "step": 521 }, { "epoch": 0.04674016453075158, "grad_norm": 1.3228718360054317, "learning_rate": 1.9985453014270633e-05, "loss": 0.8816, "step": 522 }, { "epoch": 0.04682970507582965, "grad_norm": 1.1379330285449913, "learning_rate": 1.9985296213034386e-05, "loss": 0.8987, "step": 523 }, { "epoch": 0.04691924562090772, "grad_norm": 1.106124654091436, "learning_rate": 1.9985138571867562e-05, "loss": 0.9433, "step": 524 }, { "epoch": 0.047008786165985784, "grad_norm": 1.0288504965094727, "learning_rate": 1.998498009078342e-05, "loss": 0.9271, "step": 525 }, { "epoch": 0.047098326711063856, "grad_norm": 1.2236402273226397, "learning_rate": 1.998482076979529e-05, "loss": 0.9362, "step": 526 }, { "epoch": 0.04718786725614192, "grad_norm": 1.1421657543249277, "learning_rate": 1.9984660608916572e-05, "loss": 0.9194, "step": 527 }, { "epoch": 0.04727740780121999, "grad_norm": 1.347421957195768, "learning_rate": 1.9984499608160744e-05, "loss": 0.843, "step": 528 }, { "epoch": 0.04736694834629806, "grad_norm": 1.260130347357029, "learning_rate": 1.9984337767541347e-05, "loss": 1.0104, "step": 529 }, { "epoch": 0.047456488891376124, "grad_norm": 1.1557217809639428, "learning_rate": 1.998417508707199e-05, "loss": 0.8651, "step": 530 }, { "epoch": 0.047546029436454196, "grad_norm": 1.038526136282594, "learning_rate": 1.998401156676636e-05, "loss": 0.9021, "step": 531 }, { "epoch": 0.04763556998153226, "grad_norm": 1.1454247279704335, "learning_rate": 1.9983847206638213e-05, "loss": 1.0541, "step": 532 }, { "epoch": 0.04772511052661033, "grad_norm": 1.2248960104339464, "learning_rate": 1.9983682006701373e-05, "loss": 0.95, "step": 533 }, { "epoch": 0.0478146510716884, "grad_norm": 1.3004474112049667, "learning_rate": 1.9983515966969737e-05, "loss": 0.9257, "step": 534 }, { "epoch": 0.04790419161676647, "grad_norm": 1.2110056095798232, "learning_rate": 1.998334908745727e-05, "loss": 0.9363, "step": 535 }, { "epoch": 0.047993732161844535, "grad_norm": 1.0385118312856492, "learning_rate": 1.9983181368178012e-05, "loss": 0.8833, "step": 536 }, { "epoch": 0.0480832727069226, "grad_norm": 1.125943127368221, "learning_rate": 1.998301280914607e-05, "loss": 0.936, "step": 537 }, { "epoch": 0.04817281325200067, "grad_norm": 1.1319848892973925, "learning_rate": 1.998284341037562e-05, "loss": 0.9937, "step": 538 }, { "epoch": 0.04826235379707874, "grad_norm": 1.0607842578107185, "learning_rate": 1.9982673171880912e-05, "loss": 0.9319, "step": 539 }, { "epoch": 0.04835189434215681, "grad_norm": 1.1458161108848985, "learning_rate": 1.9982502093676273e-05, "loss": 0.9875, "step": 540 }, { "epoch": 0.048441434887234874, "grad_norm": 1.107451632698255, "learning_rate": 1.9982330175776083e-05, "loss": 0.9711, "step": 541 }, { "epoch": 0.048530975432312946, "grad_norm": 1.1904523680192518, "learning_rate": 1.9982157418194812e-05, "loss": 0.9501, "step": 542 }, { "epoch": 0.04862051597739101, "grad_norm": 1.0940295640504563, "learning_rate": 1.9981983820946985e-05, "loss": 0.8758, "step": 543 }, { "epoch": 0.04871005652246908, "grad_norm": 1.0502987545970643, "learning_rate": 1.998180938404721e-05, "loss": 0.9263, "step": 544 }, { "epoch": 0.04879959706754715, "grad_norm": 1.242706689006851, "learning_rate": 1.9981634107510153e-05, "loss": 0.9758, "step": 545 }, { "epoch": 0.048889137612625214, "grad_norm": 1.3864003614126603, "learning_rate": 1.9981457991350567e-05, "loss": 0.9059, "step": 546 }, { "epoch": 0.048978678157703286, "grad_norm": 1.1293650869323646, "learning_rate": 1.9981281035583254e-05, "loss": 0.9336, "step": 547 }, { "epoch": 0.04906821870278135, "grad_norm": 1.0280092484012466, "learning_rate": 1.9981103240223113e-05, "loss": 0.896, "step": 548 }, { "epoch": 0.04915775924785942, "grad_norm": 1.1709194702693853, "learning_rate": 1.998092460528509e-05, "loss": 0.9523, "step": 549 }, { "epoch": 0.04924729979293749, "grad_norm": 1.1476814546059773, "learning_rate": 1.9980745130784214e-05, "loss": 0.9635, "step": 550 }, { "epoch": 0.04933684033801556, "grad_norm": 1.0333891967525806, "learning_rate": 1.9980564816735586e-05, "loss": 0.8626, "step": 551 }, { "epoch": 0.049426380883093625, "grad_norm": 1.3634541348571476, "learning_rate": 1.9980383663154366e-05, "loss": 0.9291, "step": 552 }, { "epoch": 0.0495159214281717, "grad_norm": 1.208095092028711, "learning_rate": 1.9980201670055796e-05, "loss": 0.9755, "step": 553 }, { "epoch": 0.04960546197324976, "grad_norm": 1.0221694100166283, "learning_rate": 1.9980018837455184e-05, "loss": 0.9257, "step": 554 }, { "epoch": 0.04969500251832783, "grad_norm": 1.0478036866932399, "learning_rate": 1.997983516536791e-05, "loss": 0.9699, "step": 555 }, { "epoch": 0.0497845430634059, "grad_norm": 1.2212610955837915, "learning_rate": 1.997965065380942e-05, "loss": 0.9573, "step": 556 }, { "epoch": 0.049874083608483964, "grad_norm": 1.087699437566681, "learning_rate": 1.9979465302795242e-05, "loss": 0.8721, "step": 557 }, { "epoch": 0.049963624153562036, "grad_norm": 1.2123012027101887, "learning_rate": 1.9979279112340963e-05, "loss": 0.872, "step": 558 }, { "epoch": 0.0500531646986401, "grad_norm": 1.0614116504270437, "learning_rate": 1.997909208246224e-05, "loss": 0.9204, "step": 559 }, { "epoch": 0.05014270524371817, "grad_norm": 1.0127235021587757, "learning_rate": 1.9978904213174812e-05, "loss": 0.9754, "step": 560 }, { "epoch": 0.05023224578879624, "grad_norm": 1.0843901874857937, "learning_rate": 1.997871550449448e-05, "loss": 0.9378, "step": 561 }, { "epoch": 0.05032178633387431, "grad_norm": 1.2064090720729816, "learning_rate": 1.997852595643712e-05, "loss": 0.919, "step": 562 }, { "epoch": 0.050411326878952376, "grad_norm": 1.1005671262672223, "learning_rate": 1.997833556901867e-05, "loss": 0.958, "step": 563 }, { "epoch": 0.05050086742403044, "grad_norm": 1.1649992322011302, "learning_rate": 1.997814434225515e-05, "loss": 0.9456, "step": 564 }, { "epoch": 0.05059040796910851, "grad_norm": 1.1739740175270943, "learning_rate": 1.997795227616264e-05, "loss": 0.9093, "step": 565 }, { "epoch": 0.05067994851418658, "grad_norm": 1.1671784388675037, "learning_rate": 1.9977759370757304e-05, "loss": 0.9211, "step": 566 }, { "epoch": 0.05076948905926465, "grad_norm": 1.1333127285191198, "learning_rate": 1.9977565626055364e-05, "loss": 0.9349, "step": 567 }, { "epoch": 0.050859029604342715, "grad_norm": 1.0997562688467957, "learning_rate": 1.9977371042073116e-05, "loss": 0.9142, "step": 568 }, { "epoch": 0.05094857014942079, "grad_norm": 1.0953041818107725, "learning_rate": 1.997717561882693e-05, "loss": 0.8733, "step": 569 }, { "epoch": 0.05103811069449885, "grad_norm": 1.18009258753543, "learning_rate": 1.9976979356333247e-05, "loss": 0.9188, "step": 570 }, { "epoch": 0.051127651239576924, "grad_norm": 1.0726950014500436, "learning_rate": 1.997678225460857e-05, "loss": 0.9342, "step": 571 }, { "epoch": 0.05121719178465499, "grad_norm": 1.2158227422112158, "learning_rate": 1.9976584313669478e-05, "loss": 0.8607, "step": 572 }, { "epoch": 0.051306732329733054, "grad_norm": 1.1215672193741464, "learning_rate": 1.9976385533532626e-05, "loss": 0.988, "step": 573 }, { "epoch": 0.051396272874811126, "grad_norm": 1.2408152351441124, "learning_rate": 1.9976185914214734e-05, "loss": 0.9906, "step": 574 }, { "epoch": 0.05148581341988919, "grad_norm": 1.131612251100872, "learning_rate": 1.997598545573259e-05, "loss": 0.9769, "step": 575 }, { "epoch": 0.05157535396496726, "grad_norm": 1.2431275805880857, "learning_rate": 1.9975784158103062e-05, "loss": 0.9896, "step": 576 }, { "epoch": 0.05166489451004533, "grad_norm": 0.9744941604639711, "learning_rate": 1.997558202134308e-05, "loss": 0.9318, "step": 577 }, { "epoch": 0.0517544350551234, "grad_norm": 1.0712919163391104, "learning_rate": 1.997537904546964e-05, "loss": 0.9492, "step": 578 }, { "epoch": 0.051843975600201465, "grad_norm": 1.1767193915535439, "learning_rate": 1.9975175230499823e-05, "loss": 0.9981, "step": 579 }, { "epoch": 0.05193351614527954, "grad_norm": 1.0550438416242316, "learning_rate": 1.9974970576450775e-05, "loss": 0.9399, "step": 580 }, { "epoch": 0.0520230566903576, "grad_norm": 1.4142653171545716, "learning_rate": 1.9974765083339705e-05, "loss": 0.9455, "step": 581 }, { "epoch": 0.05211259723543567, "grad_norm": 1.0749929344780913, "learning_rate": 1.9974558751183904e-05, "loss": 0.9818, "step": 582 }, { "epoch": 0.05220213778051374, "grad_norm": 1.1463017384115077, "learning_rate": 1.997435158000072e-05, "loss": 0.9401, "step": 583 }, { "epoch": 0.052291678325591805, "grad_norm": 1.1325236638007723, "learning_rate": 1.9974143569807588e-05, "loss": 0.8954, "step": 584 }, { "epoch": 0.05238121887066988, "grad_norm": 1.1119954022615302, "learning_rate": 1.9973934720622003e-05, "loss": 0.9505, "step": 585 }, { "epoch": 0.05247075941574794, "grad_norm": 1.1654191722717122, "learning_rate": 1.9973725032461528e-05, "loss": 0.9031, "step": 586 }, { "epoch": 0.052560299960826014, "grad_norm": 1.1713497526622438, "learning_rate": 1.9973514505343806e-05, "loss": 0.9457, "step": 587 }, { "epoch": 0.05264984050590408, "grad_norm": 1.0933210606777743, "learning_rate": 1.9973303139286547e-05, "loss": 0.9663, "step": 588 }, { "epoch": 0.05273938105098215, "grad_norm": 1.114316259574633, "learning_rate": 1.9973090934307522e-05, "loss": 0.9503, "step": 589 }, { "epoch": 0.052828921596060216, "grad_norm": 1.1087547414253103, "learning_rate": 1.9972877890424592e-05, "loss": 0.9185, "step": 590 }, { "epoch": 0.05291846214113828, "grad_norm": 1.1276566435567281, "learning_rate": 1.997266400765567e-05, "loss": 0.9153, "step": 591 }, { "epoch": 0.05300800268621635, "grad_norm": 1.108682136901737, "learning_rate": 1.997244928601875e-05, "loss": 0.9203, "step": 592 }, { "epoch": 0.05309754323129442, "grad_norm": 1.1168038060334835, "learning_rate": 1.9972233725531897e-05, "loss": 0.9608, "step": 593 }, { "epoch": 0.05318708377637249, "grad_norm": 1.0767143358634241, "learning_rate": 1.9972017326213233e-05, "loss": 0.9466, "step": 594 }, { "epoch": 0.053276624321450555, "grad_norm": 1.040012073149749, "learning_rate": 1.997180008808097e-05, "loss": 0.877, "step": 595 }, { "epoch": 0.05336616486652863, "grad_norm": 1.101845324693482, "learning_rate": 1.9971582011153382e-05, "loss": 0.8476, "step": 596 }, { "epoch": 0.05345570541160669, "grad_norm": 1.0410640849956347, "learning_rate": 1.9971363095448808e-05, "loss": 0.91, "step": 597 }, { "epoch": 0.053545245956684764, "grad_norm": 1.0700556692464638, "learning_rate": 1.9971143340985663e-05, "loss": 0.9779, "step": 598 }, { "epoch": 0.05363478650176283, "grad_norm": 1.2281299288441592, "learning_rate": 1.9970922747782433e-05, "loss": 0.9262, "step": 599 }, { "epoch": 0.053724327046840895, "grad_norm": 1.485882965567142, "learning_rate": 1.9970701315857676e-05, "loss": 0.8884, "step": 600 }, { "epoch": 0.05381386759191897, "grad_norm": 1.0429377141618017, "learning_rate": 1.9970479045230015e-05, "loss": 0.952, "step": 601 }, { "epoch": 0.05390340813699703, "grad_norm": 0.9372628392739595, "learning_rate": 1.9970255935918143e-05, "loss": 0.8715, "step": 602 }, { "epoch": 0.053992948682075104, "grad_norm": 1.0449426674025424, "learning_rate": 1.9970031987940837e-05, "loss": 0.9774, "step": 603 }, { "epoch": 0.05408248922715317, "grad_norm": 1.2112614408094393, "learning_rate": 1.996980720131693e-05, "loss": 0.9425, "step": 604 }, { "epoch": 0.05417202977223124, "grad_norm": 1.1378702326987429, "learning_rate": 1.9969581576065326e-05, "loss": 0.9648, "step": 605 }, { "epoch": 0.054261570317309306, "grad_norm": 1.179717432847686, "learning_rate": 1.996935511220501e-05, "loss": 0.9532, "step": 606 }, { "epoch": 0.05435111086238738, "grad_norm": 1.0957510025405282, "learning_rate": 1.996912780975503e-05, "loss": 0.9289, "step": 607 }, { "epoch": 0.05444065140746544, "grad_norm": 1.0479000160328913, "learning_rate": 1.9968899668734503e-05, "loss": 0.8953, "step": 608 }, { "epoch": 0.05453019195254351, "grad_norm": 1.0589172649202057, "learning_rate": 1.996867068916262e-05, "loss": 0.8829, "step": 609 }, { "epoch": 0.05461973249762158, "grad_norm": 1.0656660635624877, "learning_rate": 1.9968440871058647e-05, "loss": 0.9562, "step": 610 }, { "epoch": 0.054709273042699645, "grad_norm": 1.1521771886233159, "learning_rate": 1.9968210214441912e-05, "loss": 0.9341, "step": 611 }, { "epoch": 0.05479881358777772, "grad_norm": 0.9914658075430345, "learning_rate": 1.9967978719331816e-05, "loss": 0.883, "step": 612 }, { "epoch": 0.05488835413285578, "grad_norm": 1.126233882712661, "learning_rate": 1.996774638574783e-05, "loss": 0.9475, "step": 613 }, { "epoch": 0.054977894677933854, "grad_norm": 1.048059636538473, "learning_rate": 1.9967513213709506e-05, "loss": 0.9232, "step": 614 }, { "epoch": 0.05506743522301192, "grad_norm": 1.3257402549838415, "learning_rate": 1.996727920323645e-05, "loss": 0.9704, "step": 615 }, { "epoch": 0.05515697576808999, "grad_norm": 1.0793772383901195, "learning_rate": 1.9967044354348347e-05, "loss": 0.9737, "step": 616 }, { "epoch": 0.055246516313168056, "grad_norm": 1.0297255467555129, "learning_rate": 1.9966808667064953e-05, "loss": 0.9414, "step": 617 }, { "epoch": 0.05533605685824612, "grad_norm": 1.1534164261942317, "learning_rate": 1.9966572141406092e-05, "loss": 0.9009, "step": 618 }, { "epoch": 0.055425597403324194, "grad_norm": 0.9709791088401495, "learning_rate": 1.996633477739166e-05, "loss": 0.8876, "step": 619 }, { "epoch": 0.05551513794840226, "grad_norm": 1.124398555405512, "learning_rate": 1.9966096575041622e-05, "loss": 0.9383, "step": 620 }, { "epoch": 0.05560467849348033, "grad_norm": 1.0490222700760865, "learning_rate": 1.996585753437602e-05, "loss": 0.9308, "step": 621 }, { "epoch": 0.055694219038558396, "grad_norm": 1.3896934558159915, "learning_rate": 1.9965617655414956e-05, "loss": 0.9643, "step": 622 }, { "epoch": 0.05578375958363647, "grad_norm": 1.075250567179632, "learning_rate": 1.996537693817861e-05, "loss": 0.9255, "step": 623 }, { "epoch": 0.05587330012871453, "grad_norm": 1.0535140508053211, "learning_rate": 1.9965135382687235e-05, "loss": 0.9371, "step": 624 }, { "epoch": 0.055962840673792605, "grad_norm": 1.1622469969808344, "learning_rate": 1.996489298896114e-05, "loss": 0.9383, "step": 625 }, { "epoch": 0.05605238121887067, "grad_norm": 1.1868976653596344, "learning_rate": 1.996464975702072e-05, "loss": 0.9276, "step": 626 }, { "epoch": 0.056141921763948735, "grad_norm": 1.1593603029995556, "learning_rate": 1.9964405686886436e-05, "loss": 0.9202, "step": 627 }, { "epoch": 0.05623146230902681, "grad_norm": 1.0989889823901322, "learning_rate": 1.9964160778578812e-05, "loss": 1.0104, "step": 628 }, { "epoch": 0.05632100285410487, "grad_norm": 1.1852552873018605, "learning_rate": 1.996391503211846e-05, "loss": 0.9384, "step": 629 }, { "epoch": 0.056410543399182944, "grad_norm": 1.089308515529538, "learning_rate": 1.9963668447526042e-05, "loss": 0.8921, "step": 630 }, { "epoch": 0.05650008394426101, "grad_norm": 1.073263054756886, "learning_rate": 1.9963421024822304e-05, "loss": 0.9205, "step": 631 }, { "epoch": 0.05658962448933908, "grad_norm": 1.1009873472869982, "learning_rate": 1.9963172764028054e-05, "loss": 0.9565, "step": 632 }, { "epoch": 0.056679165034417146, "grad_norm": 1.1318445024988277, "learning_rate": 1.996292366516418e-05, "loss": 0.9097, "step": 633 }, { "epoch": 0.05676870557949522, "grad_norm": 1.0442891198225548, "learning_rate": 1.9962673728251634e-05, "loss": 0.9093, "step": 634 }, { "epoch": 0.05685824612457328, "grad_norm": 1.1082866731928176, "learning_rate": 1.996242295331144e-05, "loss": 0.9177, "step": 635 }, { "epoch": 0.05694778666965135, "grad_norm": 1.0202709197207074, "learning_rate": 1.996217134036469e-05, "loss": 0.9412, "step": 636 }, { "epoch": 0.05703732721472942, "grad_norm": 1.144559414301597, "learning_rate": 1.9961918889432552e-05, "loss": 0.9646, "step": 637 }, { "epoch": 0.057126867759807486, "grad_norm": 1.1896197317023043, "learning_rate": 1.996166560053626e-05, "loss": 0.9464, "step": 638 }, { "epoch": 0.05721640830488556, "grad_norm": 1.1887641692600903, "learning_rate": 1.996141147369712e-05, "loss": 1.0243, "step": 639 }, { "epoch": 0.05730594884996362, "grad_norm": 1.3190908728534263, "learning_rate": 1.9961156508936505e-05, "loss": 0.8771, "step": 640 }, { "epoch": 0.057395489395041695, "grad_norm": 1.289480233669961, "learning_rate": 1.996090070627587e-05, "loss": 0.9248, "step": 641 }, { "epoch": 0.05748502994011976, "grad_norm": 1.0006480549984968, "learning_rate": 1.9960644065736723e-05, "loss": 0.9403, "step": 642 }, { "epoch": 0.05757457048519783, "grad_norm": 1.04256239080739, "learning_rate": 1.9960386587340656e-05, "loss": 0.8683, "step": 643 }, { "epoch": 0.0576641110302759, "grad_norm": 1.0619556320844847, "learning_rate": 1.9960128271109326e-05, "loss": 0.8949, "step": 644 }, { "epoch": 0.05775365157535396, "grad_norm": 1.2410313721954176, "learning_rate": 1.9959869117064467e-05, "loss": 0.9752, "step": 645 }, { "epoch": 0.057843192120432034, "grad_norm": 1.1187897684133061, "learning_rate": 1.9959609125227874e-05, "loss": 0.9362, "step": 646 }, { "epoch": 0.0579327326655101, "grad_norm": 1.0739149709563547, "learning_rate": 1.9959348295621416e-05, "loss": 0.9509, "step": 647 }, { "epoch": 0.05802227321058817, "grad_norm": 1.082878174473754, "learning_rate": 1.9959086628267032e-05, "loss": 0.9332, "step": 648 }, { "epoch": 0.058111813755666236, "grad_norm": 1.0371676170664081, "learning_rate": 1.9958824123186734e-05, "loss": 0.9046, "step": 649 }, { "epoch": 0.05820135430074431, "grad_norm": 1.0430717449528382, "learning_rate": 1.9958560780402608e-05, "loss": 0.934, "step": 650 }, { "epoch": 0.05829089484582237, "grad_norm": 1.08412250412559, "learning_rate": 1.9958296599936798e-05, "loss": 0.8913, "step": 651 }, { "epoch": 0.058380435390900445, "grad_norm": 1.089961350342515, "learning_rate": 1.995803158181153e-05, "loss": 0.8941, "step": 652 }, { "epoch": 0.05846997593597851, "grad_norm": 1.054212999722257, "learning_rate": 1.9957765726049095e-05, "loss": 0.9501, "step": 653 }, { "epoch": 0.058559516481056575, "grad_norm": 1.019834205471067, "learning_rate": 1.995749903267186e-05, "loss": 0.9476, "step": 654 }, { "epoch": 0.05864905702613465, "grad_norm": 1.0765348926529934, "learning_rate": 1.9957231501702254e-05, "loss": 0.929, "step": 655 }, { "epoch": 0.05873859757121271, "grad_norm": 1.1482856973975353, "learning_rate": 1.9956963133162776e-05, "loss": 0.9268, "step": 656 }, { "epoch": 0.058828138116290785, "grad_norm": 1.2888721925077, "learning_rate": 1.995669392707601e-05, "loss": 0.931, "step": 657 }, { "epoch": 0.05891767866136885, "grad_norm": 1.1179859099571603, "learning_rate": 1.9956423883464597e-05, "loss": 0.8722, "step": 658 }, { "epoch": 0.05900721920644692, "grad_norm": 1.0914660764876054, "learning_rate": 1.9956153002351254e-05, "loss": 0.9779, "step": 659 }, { "epoch": 0.05909675975152499, "grad_norm": 1.265095230889255, "learning_rate": 1.995588128375876e-05, "loss": 0.9414, "step": 660 }, { "epoch": 0.05918630029660306, "grad_norm": 1.0392274440222413, "learning_rate": 1.9955608727709977e-05, "loss": 0.9498, "step": 661 }, { "epoch": 0.059275840841681124, "grad_norm": 1.424774737510009, "learning_rate": 1.995533533422783e-05, "loss": 0.9636, "step": 662 }, { "epoch": 0.05936538138675919, "grad_norm": 1.1180189283395208, "learning_rate": 1.9955061103335317e-05, "loss": 0.909, "step": 663 }, { "epoch": 0.05945492193183726, "grad_norm": 0.9975520497198124, "learning_rate": 1.9954786035055505e-05, "loss": 0.9036, "step": 664 }, { "epoch": 0.059544462476915326, "grad_norm": 1.0954873575181803, "learning_rate": 1.9954510129411534e-05, "loss": 0.9227, "step": 665 }, { "epoch": 0.0596340030219934, "grad_norm": 0.9895627093679141, "learning_rate": 1.9954233386426605e-05, "loss": 0.9027, "step": 666 }, { "epoch": 0.05972354356707146, "grad_norm": 1.1027784033415864, "learning_rate": 1.9953955806124003e-05, "loss": 0.9398, "step": 667 }, { "epoch": 0.059813084112149535, "grad_norm": 1.084472059437037, "learning_rate": 1.9953677388527076e-05, "loss": 0.9178, "step": 668 }, { "epoch": 0.0599026246572276, "grad_norm": 0.9488185418148994, "learning_rate": 1.9953398133659243e-05, "loss": 0.9242, "step": 669 }, { "epoch": 0.05999216520230567, "grad_norm": 1.1836720414780657, "learning_rate": 1.9953118041543994e-05, "loss": 0.9657, "step": 670 }, { "epoch": 0.06008170574738374, "grad_norm": 1.022971600888171, "learning_rate": 1.995283711220489e-05, "loss": 0.9431, "step": 671 }, { "epoch": 0.0601712462924618, "grad_norm": 0.9986351487156575, "learning_rate": 1.9952555345665563e-05, "loss": 0.8774, "step": 672 }, { "epoch": 0.060260786837539874, "grad_norm": 1.018768964697811, "learning_rate": 1.995227274194971e-05, "loss": 0.9333, "step": 673 }, { "epoch": 0.06035032738261794, "grad_norm": 1.2250536889430095, "learning_rate": 1.9951989301081105e-05, "loss": 0.875, "step": 674 }, { "epoch": 0.06043986792769601, "grad_norm": 1.0301347028192598, "learning_rate": 1.9951705023083594e-05, "loss": 0.9078, "step": 675 }, { "epoch": 0.06052940847277408, "grad_norm": 1.172855360599762, "learning_rate": 1.9951419907981083e-05, "loss": 0.9228, "step": 676 }, { "epoch": 0.06061894901785215, "grad_norm": 1.07567298403186, "learning_rate": 1.995113395579756e-05, "loss": 0.9847, "step": 677 }, { "epoch": 0.060708489562930214, "grad_norm": 1.0908027401090645, "learning_rate": 1.9950847166557076e-05, "loss": 0.9516, "step": 678 }, { "epoch": 0.060798030108008286, "grad_norm": 1.0281355250535715, "learning_rate": 1.9950559540283753e-05, "loss": 0.935, "step": 679 }, { "epoch": 0.06088757065308635, "grad_norm": 1.040634376362086, "learning_rate": 1.9950271077001792e-05, "loss": 0.8898, "step": 680 }, { "epoch": 0.060977111198164416, "grad_norm": 1.0301798312707877, "learning_rate": 1.994998177673545e-05, "loss": 0.8949, "step": 681 }, { "epoch": 0.06106665174324249, "grad_norm": 1.036320069153192, "learning_rate": 1.9949691639509067e-05, "loss": 0.9649, "step": 682 }, { "epoch": 0.06115619228832055, "grad_norm": 1.2418971335715927, "learning_rate": 1.9949400665347042e-05, "loss": 0.8822, "step": 683 }, { "epoch": 0.061245732833398625, "grad_norm": 1.0331561026970484, "learning_rate": 1.9949108854273856e-05, "loss": 0.9129, "step": 684 }, { "epoch": 0.06133527337847669, "grad_norm": 1.0468745440096618, "learning_rate": 1.994881620631406e-05, "loss": 0.9157, "step": 685 }, { "epoch": 0.06142481392355476, "grad_norm": 1.157857271770634, "learning_rate": 1.9948522721492257e-05, "loss": 0.915, "step": 686 }, { "epoch": 0.06151435446863283, "grad_norm": 1.033996036594039, "learning_rate": 1.9948228399833148e-05, "loss": 0.9868, "step": 687 }, { "epoch": 0.0616038950137109, "grad_norm": 1.0570236723148827, "learning_rate": 1.9947933241361482e-05, "loss": 0.8782, "step": 688 }, { "epoch": 0.061693435558788964, "grad_norm": 1.039545198862132, "learning_rate": 1.9947637246102092e-05, "loss": 0.9528, "step": 689 }, { "epoch": 0.06178297610386703, "grad_norm": 1.3440834269331907, "learning_rate": 1.994734041407987e-05, "loss": 1.0033, "step": 690 }, { "epoch": 0.0618725166489451, "grad_norm": 1.1108547797258492, "learning_rate": 1.9947042745319786e-05, "loss": 0.9167, "step": 691 }, { "epoch": 0.061962057194023167, "grad_norm": 0.9262542314026714, "learning_rate": 1.9946744239846886e-05, "loss": 0.86, "step": 692 }, { "epoch": 0.06205159773910124, "grad_norm": 1.0721863516696286, "learning_rate": 1.9946444897686273e-05, "loss": 0.9295, "step": 693 }, { "epoch": 0.062141138284179304, "grad_norm": 1.0757509069978242, "learning_rate": 1.9946144718863122e-05, "loss": 0.9592, "step": 694 }, { "epoch": 0.062230678829257376, "grad_norm": 1.1073251674974691, "learning_rate": 1.9945843703402694e-05, "loss": 0.9614, "step": 695 }, { "epoch": 0.06232021937433544, "grad_norm": 1.2278039551367435, "learning_rate": 1.9945541851330304e-05, "loss": 0.9418, "step": 696 }, { "epoch": 0.06240975991941351, "grad_norm": 1.153469521421713, "learning_rate": 1.9945239162671343e-05, "loss": 0.8972, "step": 697 }, { "epoch": 0.06249930046449158, "grad_norm": 1.0771149673276852, "learning_rate": 1.9944935637451272e-05, "loss": 0.8749, "step": 698 }, { "epoch": 0.06258884100956964, "grad_norm": 1.0828949811249642, "learning_rate": 1.994463127569562e-05, "loss": 0.9127, "step": 699 }, { "epoch": 0.06267838155464771, "grad_norm": 1.0228026949840077, "learning_rate": 1.9944326077429995e-05, "loss": 0.9695, "step": 700 }, { "epoch": 0.06276792209972579, "grad_norm": 1.0360618043078018, "learning_rate": 1.9944020042680065e-05, "loss": 0.9147, "step": 701 }, { "epoch": 0.06285746264480385, "grad_norm": 1.1527617984007996, "learning_rate": 1.9943713171471573e-05, "loss": 0.9435, "step": 702 }, { "epoch": 0.06294700318988192, "grad_norm": 1.0334789718750361, "learning_rate": 1.9943405463830336e-05, "loss": 0.9699, "step": 703 }, { "epoch": 0.06303654373495998, "grad_norm": 1.0816970656093612, "learning_rate": 1.9943096919782227e-05, "loss": 0.8716, "step": 704 }, { "epoch": 0.06312608428003806, "grad_norm": 1.1970472273480843, "learning_rate": 1.9942787539353216e-05, "loss": 0.9684, "step": 705 }, { "epoch": 0.06321562482511613, "grad_norm": 1.1481551794171696, "learning_rate": 1.9942477322569308e-05, "loss": 0.9098, "step": 706 }, { "epoch": 0.06330516537019419, "grad_norm": 1.133303881985732, "learning_rate": 1.9942166269456614e-05, "loss": 0.9411, "step": 707 }, { "epoch": 0.06339470591527226, "grad_norm": 1.2683539892124873, "learning_rate": 1.9941854380041292e-05, "loss": 0.9292, "step": 708 }, { "epoch": 0.06348424646035032, "grad_norm": 1.176531679026709, "learning_rate": 1.9941541654349575e-05, "loss": 0.9297, "step": 709 }, { "epoch": 0.0635737870054284, "grad_norm": 1.032583099485249, "learning_rate": 1.994122809240777e-05, "loss": 0.9516, "step": 710 }, { "epoch": 0.06366332755050647, "grad_norm": 1.2374308346536853, "learning_rate": 1.994091369424225e-05, "loss": 0.9087, "step": 711 }, { "epoch": 0.06375286809558453, "grad_norm": 1.1127319938875315, "learning_rate": 1.994059845987947e-05, "loss": 0.9108, "step": 712 }, { "epoch": 0.0638424086406626, "grad_norm": 1.1136323085651891, "learning_rate": 1.994028238934594e-05, "loss": 0.9148, "step": 713 }, { "epoch": 0.06393194918574067, "grad_norm": 0.9652342289170723, "learning_rate": 1.9939965482668247e-05, "loss": 0.8795, "step": 714 }, { "epoch": 0.06402148973081874, "grad_norm": 1.0758253722995965, "learning_rate": 1.993964773987305e-05, "loss": 0.9394, "step": 715 }, { "epoch": 0.0641110302758968, "grad_norm": 1.1272120727854955, "learning_rate": 1.9939329160987075e-05, "loss": 0.8927, "step": 716 }, { "epoch": 0.06420057082097487, "grad_norm": 1.0937618755412848, "learning_rate": 1.993900974603712e-05, "loss": 0.9187, "step": 717 }, { "epoch": 0.06429011136605293, "grad_norm": 1.0082067105606212, "learning_rate": 1.9938689495050055e-05, "loss": 0.9166, "step": 718 }, { "epoch": 0.06437965191113101, "grad_norm": 1.1444950401152703, "learning_rate": 1.9938368408052814e-05, "loss": 0.8718, "step": 719 }, { "epoch": 0.06446919245620908, "grad_norm": 1.0131926028406713, "learning_rate": 1.993804648507241e-05, "loss": 0.9133, "step": 720 }, { "epoch": 0.06455873300128714, "grad_norm": 1.1528048760202945, "learning_rate": 1.993772372613592e-05, "loss": 0.939, "step": 721 }, { "epoch": 0.06464827354636521, "grad_norm": 1.289270887826232, "learning_rate": 1.9937400131270496e-05, "loss": 0.9872, "step": 722 }, { "epoch": 0.06473781409144329, "grad_norm": 1.080640969956045, "learning_rate": 1.9937075700503357e-05, "loss": 0.9279, "step": 723 }, { "epoch": 0.06482735463652135, "grad_norm": 1.0877680650662236, "learning_rate": 1.993675043386179e-05, "loss": 0.9524, "step": 724 }, { "epoch": 0.06491689518159942, "grad_norm": 1.0071707596319672, "learning_rate": 1.993642433137316e-05, "loss": 0.905, "step": 725 }, { "epoch": 0.06500643572667748, "grad_norm": 1.0147041270842982, "learning_rate": 1.993609739306489e-05, "loss": 0.9448, "step": 726 }, { "epoch": 0.06509597627175555, "grad_norm": 1.1591289258361375, "learning_rate": 1.993576961896449e-05, "loss": 0.915, "step": 727 }, { "epoch": 0.06518551681683363, "grad_norm": 1.0680404207397045, "learning_rate": 1.9935441009099527e-05, "loss": 0.949, "step": 728 }, { "epoch": 0.06527505736191169, "grad_norm": 1.0514825561451777, "learning_rate": 1.993511156349764e-05, "loss": 0.9384, "step": 729 }, { "epoch": 0.06536459790698976, "grad_norm": 1.0066444729579083, "learning_rate": 1.9934781282186545e-05, "loss": 0.9459, "step": 730 }, { "epoch": 0.06545413845206782, "grad_norm": 1.120524370645983, "learning_rate": 1.9934450165194027e-05, "loss": 0.9639, "step": 731 }, { "epoch": 0.0655436789971459, "grad_norm": 1.1574029963684436, "learning_rate": 1.993411821254793e-05, "loss": 0.9241, "step": 732 }, { "epoch": 0.06563321954222397, "grad_norm": 1.0686164605858943, "learning_rate": 1.9933785424276185e-05, "loss": 0.9314, "step": 733 }, { "epoch": 0.06572276008730203, "grad_norm": 1.028393781221605, "learning_rate": 1.993345180040678e-05, "loss": 0.8969, "step": 734 }, { "epoch": 0.0658123006323801, "grad_norm": 1.1457741722924495, "learning_rate": 1.993311734096778e-05, "loss": 0.9203, "step": 735 }, { "epoch": 0.06590184117745816, "grad_norm": 1.1183776235429703, "learning_rate": 1.9932782045987317e-05, "loss": 0.9621, "step": 736 }, { "epoch": 0.06599138172253624, "grad_norm": 1.1047821001895943, "learning_rate": 1.9932445915493598e-05, "loss": 0.9215, "step": 737 }, { "epoch": 0.0660809222676143, "grad_norm": 1.029184225030201, "learning_rate": 1.993210894951489e-05, "loss": 0.9349, "step": 738 }, { "epoch": 0.06617046281269237, "grad_norm": 1.2640231568202325, "learning_rate": 1.9931771148079552e-05, "loss": 0.9084, "step": 739 }, { "epoch": 0.06626000335777044, "grad_norm": 1.1438205090033562, "learning_rate": 1.9931432511215983e-05, "loss": 0.9238, "step": 740 }, { "epoch": 0.06634954390284852, "grad_norm": 1.1469170456221658, "learning_rate": 1.9931093038952677e-05, "loss": 0.8769, "step": 741 }, { "epoch": 0.06643908444792658, "grad_norm": 1.0191067074856213, "learning_rate": 1.993075273131819e-05, "loss": 0.9429, "step": 742 }, { "epoch": 0.06652862499300465, "grad_norm": 1.0266690271911931, "learning_rate": 1.9930411588341138e-05, "loss": 0.9899, "step": 743 }, { "epoch": 0.06661816553808271, "grad_norm": 0.9929885173697578, "learning_rate": 1.9930069610050228e-05, "loss": 0.9072, "step": 744 }, { "epoch": 0.06670770608316078, "grad_norm": 1.0841949886666007, "learning_rate": 1.992972679647422e-05, "loss": 0.9465, "step": 745 }, { "epoch": 0.06679724662823885, "grad_norm": 1.0581337489643985, "learning_rate": 1.9929383147641952e-05, "loss": 0.9418, "step": 746 }, { "epoch": 0.06688678717331692, "grad_norm": 1.1024536604600896, "learning_rate": 1.992903866358233e-05, "loss": 0.9142, "step": 747 }, { "epoch": 0.06697632771839498, "grad_norm": 1.2154089387451648, "learning_rate": 1.9928693344324333e-05, "loss": 0.9489, "step": 748 }, { "epoch": 0.06706586826347305, "grad_norm": 1.5743215424180574, "learning_rate": 1.9928347189897006e-05, "loss": 0.9951, "step": 749 }, { "epoch": 0.06715540880855113, "grad_norm": 1.185152359928579, "learning_rate": 1.9928000200329468e-05, "loss": 0.9525, "step": 750 }, { "epoch": 0.0672449493536292, "grad_norm": 1.022310677169066, "learning_rate": 1.9927652375650904e-05, "loss": 0.9605, "step": 751 }, { "epoch": 0.06733448989870726, "grad_norm": 1.11537810169956, "learning_rate": 1.9927303715890573e-05, "loss": 0.9051, "step": 752 }, { "epoch": 0.06742403044378532, "grad_norm": 1.096699551478262, "learning_rate": 1.9926954221077807e-05, "loss": 0.9183, "step": 753 }, { "epoch": 0.06751357098886339, "grad_norm": 0.9775570344486216, "learning_rate": 1.9926603891241997e-05, "loss": 0.8832, "step": 754 }, { "epoch": 0.06760311153394147, "grad_norm": 1.0633362765624412, "learning_rate": 1.9926252726412618e-05, "loss": 0.9248, "step": 755 }, { "epoch": 0.06769265207901953, "grad_norm": 1.0628771492992868, "learning_rate": 1.9925900726619206e-05, "loss": 1.0107, "step": 756 }, { "epoch": 0.0677821926240976, "grad_norm": 1.036884912617968, "learning_rate": 1.9925547891891368e-05, "loss": 0.8572, "step": 757 }, { "epoch": 0.06787173316917566, "grad_norm": 1.0716293114445457, "learning_rate": 1.9925194222258786e-05, "loss": 0.9175, "step": 758 }, { "epoch": 0.06796127371425374, "grad_norm": 1.238577582204527, "learning_rate": 1.9924839717751213e-05, "loss": 0.9242, "step": 759 }, { "epoch": 0.06805081425933181, "grad_norm": 0.9948418188725063, "learning_rate": 1.9924484378398462e-05, "loss": 0.9117, "step": 760 }, { "epoch": 0.06814035480440987, "grad_norm": 1.126841904173083, "learning_rate": 1.992412820423043e-05, "loss": 0.9804, "step": 761 }, { "epoch": 0.06822989534948794, "grad_norm": 1.057157979297986, "learning_rate": 1.9923771195277067e-05, "loss": 0.8706, "step": 762 }, { "epoch": 0.068319435894566, "grad_norm": 1.0278117915693268, "learning_rate": 1.9923413351568413e-05, "loss": 0.9279, "step": 763 }, { "epoch": 0.06840897643964408, "grad_norm": 0.9886181854200796, "learning_rate": 1.9923054673134566e-05, "loss": 0.9177, "step": 764 }, { "epoch": 0.06849851698472215, "grad_norm": 1.0578080927047577, "learning_rate": 1.9922695160005694e-05, "loss": 0.885, "step": 765 }, { "epoch": 0.06858805752980021, "grad_norm": 0.979068442637175, "learning_rate": 1.992233481221204e-05, "loss": 0.8821, "step": 766 }, { "epoch": 0.06867759807487828, "grad_norm": 1.1492093605802958, "learning_rate": 1.992197362978392e-05, "loss": 0.9065, "step": 767 }, { "epoch": 0.06876713861995636, "grad_norm": 1.019436623612196, "learning_rate": 1.9921611612751707e-05, "loss": 0.9073, "step": 768 }, { "epoch": 0.06885667916503442, "grad_norm": 1.19956497370407, "learning_rate": 1.992124876114586e-05, "loss": 0.9438, "step": 769 }, { "epoch": 0.06894621971011249, "grad_norm": 1.049777686435467, "learning_rate": 1.9920885074996893e-05, "loss": 0.9115, "step": 770 }, { "epoch": 0.06903576025519055, "grad_norm": 1.0312476632289158, "learning_rate": 1.9920520554335408e-05, "loss": 0.9416, "step": 771 }, { "epoch": 0.06912530080026862, "grad_norm": 0.98640571376336, "learning_rate": 1.992015519919206e-05, "loss": 0.9169, "step": 772 }, { "epoch": 0.0692148413453467, "grad_norm": 1.0546292616976334, "learning_rate": 1.991978900959758e-05, "loss": 0.9038, "step": 773 }, { "epoch": 0.06930438189042476, "grad_norm": 0.9632518494489212, "learning_rate": 1.991942198558278e-05, "loss": 0.914, "step": 774 }, { "epoch": 0.06939392243550283, "grad_norm": 0.9932321274334242, "learning_rate": 1.9919054127178522e-05, "loss": 0.8046, "step": 775 }, { "epoch": 0.06948346298058089, "grad_norm": 1.0315970954329294, "learning_rate": 1.9918685434415757e-05, "loss": 0.8409, "step": 776 }, { "epoch": 0.06957300352565897, "grad_norm": 1.1104393529970686, "learning_rate": 1.991831590732549e-05, "loss": 0.8697, "step": 777 }, { "epoch": 0.06966254407073703, "grad_norm": 1.0774883418576322, "learning_rate": 1.9917945545938817e-05, "loss": 0.8738, "step": 778 }, { "epoch": 0.0697520846158151, "grad_norm": 1.2136997380403043, "learning_rate": 1.991757435028688e-05, "loss": 1.0073, "step": 779 }, { "epoch": 0.06984162516089316, "grad_norm": 1.1341939791921614, "learning_rate": 1.991720232040091e-05, "loss": 0.9125, "step": 780 }, { "epoch": 0.06993116570597123, "grad_norm": 1.1292080385781416, "learning_rate": 1.9916829456312198e-05, "loss": 0.916, "step": 781 }, { "epoch": 0.07002070625104931, "grad_norm": 1.1282089583036534, "learning_rate": 1.9916455758052104e-05, "loss": 0.9932, "step": 782 }, { "epoch": 0.07011024679612737, "grad_norm": 1.1525777881606178, "learning_rate": 1.991608122565207e-05, "loss": 0.9463, "step": 783 }, { "epoch": 0.07019978734120544, "grad_norm": 1.2735045207630338, "learning_rate": 1.9915705859143597e-05, "loss": 0.9512, "step": 784 }, { "epoch": 0.0702893278862835, "grad_norm": 1.1241584606605324, "learning_rate": 1.991532965855826e-05, "loss": 0.9903, "step": 785 }, { "epoch": 0.07037886843136158, "grad_norm": 0.9606026571169505, "learning_rate": 1.9914952623927698e-05, "loss": 0.9578, "step": 786 }, { "epoch": 0.07046840897643965, "grad_norm": 1.205581042483934, "learning_rate": 1.9914574755283636e-05, "loss": 0.9117, "step": 787 }, { "epoch": 0.07055794952151771, "grad_norm": 1.0778708362648222, "learning_rate": 1.9914196052657852e-05, "loss": 0.8901, "step": 788 }, { "epoch": 0.07064749006659578, "grad_norm": 1.2084250909336678, "learning_rate": 1.9913816516082205e-05, "loss": 0.9264, "step": 789 }, { "epoch": 0.07073703061167384, "grad_norm": 1.1627785089895557, "learning_rate": 1.991343614558862e-05, "loss": 0.9399, "step": 790 }, { "epoch": 0.07082657115675192, "grad_norm": 1.0955128950897222, "learning_rate": 1.9913054941209087e-05, "loss": 0.9741, "step": 791 }, { "epoch": 0.07091611170182999, "grad_norm": 1.1112343380824248, "learning_rate": 1.9912672902975682e-05, "loss": 0.9113, "step": 792 }, { "epoch": 0.07100565224690805, "grad_norm": 0.9962753370106672, "learning_rate": 1.9912290030920533e-05, "loss": 0.9057, "step": 793 }, { "epoch": 0.07109519279198612, "grad_norm": 1.1351398681104958, "learning_rate": 1.9911906325075844e-05, "loss": 0.9131, "step": 794 }, { "epoch": 0.0711847333370642, "grad_norm": 1.0941497698684952, "learning_rate": 1.99115217854739e-05, "loss": 0.9038, "step": 795 }, { "epoch": 0.07127427388214226, "grad_norm": 1.1346991099505284, "learning_rate": 1.9911136412147037e-05, "loss": 0.9282, "step": 796 }, { "epoch": 0.07136381442722033, "grad_norm": 1.1032383197771924, "learning_rate": 1.991075020512768e-05, "loss": 0.9722, "step": 797 }, { "epoch": 0.07145335497229839, "grad_norm": 0.9687945397059757, "learning_rate": 1.9910363164448313e-05, "loss": 0.8917, "step": 798 }, { "epoch": 0.07154289551737646, "grad_norm": 1.0943644743553063, "learning_rate": 1.990997529014149e-05, "loss": 0.9229, "step": 799 }, { "epoch": 0.07163243606245454, "grad_norm": 1.0725468683762778, "learning_rate": 1.9909586582239835e-05, "loss": 0.8433, "step": 800 }, { "epoch": 0.0717219766075326, "grad_norm": 0.9890577527704585, "learning_rate": 1.9909197040776055e-05, "loss": 0.8861, "step": 801 }, { "epoch": 0.07181151715261067, "grad_norm": 1.195411864157985, "learning_rate": 1.9908806665782907e-05, "loss": 0.9638, "step": 802 }, { "epoch": 0.07190105769768873, "grad_norm": 0.9967918875619503, "learning_rate": 1.9908415457293236e-05, "loss": 0.98, "step": 803 }, { "epoch": 0.07199059824276681, "grad_norm": 1.0812973662883574, "learning_rate": 1.9908023415339942e-05, "loss": 0.9884, "step": 804 }, { "epoch": 0.07208013878784487, "grad_norm": 1.1885252655924663, "learning_rate": 1.990763053995601e-05, "loss": 0.9026, "step": 805 }, { "epoch": 0.07216967933292294, "grad_norm": 1.0891739197120993, "learning_rate": 1.9907236831174478e-05, "loss": 0.9449, "step": 806 }, { "epoch": 0.072259219878001, "grad_norm": 1.147050963805907, "learning_rate": 1.990684228902847e-05, "loss": 0.9787, "step": 807 }, { "epoch": 0.07234876042307907, "grad_norm": 1.1691899328969124, "learning_rate": 1.9906446913551175e-05, "loss": 0.8988, "step": 808 }, { "epoch": 0.07243830096815715, "grad_norm": 1.3178311352243741, "learning_rate": 1.9906050704775843e-05, "loss": 0.9695, "step": 809 }, { "epoch": 0.07252784151323521, "grad_norm": 1.0663062406941355, "learning_rate": 1.990565366273581e-05, "loss": 0.9881, "step": 810 }, { "epoch": 0.07261738205831328, "grad_norm": 1.0793152584661805, "learning_rate": 1.9905255787464472e-05, "loss": 0.8543, "step": 811 }, { "epoch": 0.07270692260339134, "grad_norm": 1.0461102867171508, "learning_rate": 1.990485707899529e-05, "loss": 0.9698, "step": 812 }, { "epoch": 0.07279646314846942, "grad_norm": 1.2109224126389504, "learning_rate": 1.9904457537361813e-05, "loss": 0.9961, "step": 813 }, { "epoch": 0.07288600369354749, "grad_norm": 0.98085687766607, "learning_rate": 1.990405716259764e-05, "loss": 0.9005, "step": 814 }, { "epoch": 0.07297554423862555, "grad_norm": 1.069474099020944, "learning_rate": 1.9903655954736453e-05, "loss": 0.9127, "step": 815 }, { "epoch": 0.07306508478370362, "grad_norm": 1.1862009982987851, "learning_rate": 1.9903253913812003e-05, "loss": 0.8805, "step": 816 }, { "epoch": 0.07315462532878168, "grad_norm": 1.1064552442642315, "learning_rate": 1.9902851039858106e-05, "loss": 1.0002, "step": 817 }, { "epoch": 0.07324416587385976, "grad_norm": 0.9780107926806036, "learning_rate": 1.9902447332908644e-05, "loss": 0.8877, "step": 818 }, { "epoch": 0.07333370641893783, "grad_norm": 1.2882364895710248, "learning_rate": 1.9902042792997587e-05, "loss": 0.9507, "step": 819 }, { "epoch": 0.07342324696401589, "grad_norm": 1.195830915019672, "learning_rate": 1.9901637420158954e-05, "loss": 0.8662, "step": 820 }, { "epoch": 0.07351278750909396, "grad_norm": 1.0877331953843028, "learning_rate": 1.990123121442685e-05, "loss": 0.9253, "step": 821 }, { "epoch": 0.07360232805417204, "grad_norm": 1.1889239438416268, "learning_rate": 1.9900824175835444e-05, "loss": 0.9345, "step": 822 }, { "epoch": 0.0736918685992501, "grad_norm": 1.2332819647234583, "learning_rate": 1.990041630441897e-05, "loss": 0.9167, "step": 823 }, { "epoch": 0.07378140914432817, "grad_norm": 1.0893100152241175, "learning_rate": 1.990000760021174e-05, "loss": 0.8805, "step": 824 }, { "epoch": 0.07387094968940623, "grad_norm": 1.3023400058167218, "learning_rate": 1.989959806324813e-05, "loss": 0.9154, "step": 825 }, { "epoch": 0.0739604902344843, "grad_norm": 1.1044464403255647, "learning_rate": 1.989918769356259e-05, "loss": 0.9626, "step": 826 }, { "epoch": 0.07405003077956238, "grad_norm": 1.0409212186922727, "learning_rate": 1.989877649118964e-05, "loss": 0.9323, "step": 827 }, { "epoch": 0.07413957132464044, "grad_norm": 1.0148919188027719, "learning_rate": 1.989836445616387e-05, "loss": 0.9261, "step": 828 }, { "epoch": 0.0742291118697185, "grad_norm": 0.9834129070493376, "learning_rate": 1.989795158851994e-05, "loss": 0.9019, "step": 829 }, { "epoch": 0.07431865241479657, "grad_norm": 1.1172460684844427, "learning_rate": 1.9897537888292574e-05, "loss": 0.9054, "step": 830 }, { "epoch": 0.07440819295987465, "grad_norm": 1.044811474679815, "learning_rate": 1.9897123355516573e-05, "loss": 0.9059, "step": 831 }, { "epoch": 0.07449773350495272, "grad_norm": 1.1855525786704262, "learning_rate": 1.989670799022681e-05, "loss": 0.9503, "step": 832 }, { "epoch": 0.07458727405003078, "grad_norm": 1.3257963130221033, "learning_rate": 1.9896291792458218e-05, "loss": 0.851, "step": 833 }, { "epoch": 0.07467681459510885, "grad_norm": 1.063757540663316, "learning_rate": 1.9895874762245812e-05, "loss": 0.908, "step": 834 }, { "epoch": 0.07476635514018691, "grad_norm": 1.0275307848780335, "learning_rate": 1.989545689962467e-05, "loss": 0.9003, "step": 835 }, { "epoch": 0.07485589568526499, "grad_norm": 1.1221584966859024, "learning_rate": 1.989503820462994e-05, "loss": 0.9226, "step": 836 }, { "epoch": 0.07494543623034305, "grad_norm": 1.049560766570043, "learning_rate": 1.989461867729684e-05, "loss": 0.8622, "step": 837 }, { "epoch": 0.07503497677542112, "grad_norm": 0.9460285130519487, "learning_rate": 1.9894198317660657e-05, "loss": 0.979, "step": 838 }, { "epoch": 0.07512451732049918, "grad_norm": 1.0053222426446038, "learning_rate": 1.9893777125756755e-05, "loss": 0.9135, "step": 839 }, { "epoch": 0.07521405786557726, "grad_norm": 1.172358860642403, "learning_rate": 1.9893355101620564e-05, "loss": 0.9244, "step": 840 }, { "epoch": 0.07530359841065533, "grad_norm": 0.986120722221041, "learning_rate": 1.989293224528758e-05, "loss": 0.883, "step": 841 }, { "epoch": 0.0753931389557334, "grad_norm": 0.979125132554207, "learning_rate": 1.9892508556793376e-05, "loss": 0.9187, "step": 842 }, { "epoch": 0.07548267950081146, "grad_norm": 1.0469806246332762, "learning_rate": 1.9892084036173587e-05, "loss": 0.9008, "step": 843 }, { "epoch": 0.07557222004588952, "grad_norm": 1.015302680443969, "learning_rate": 1.9891658683463926e-05, "loss": 0.9167, "step": 844 }, { "epoch": 0.0756617605909676, "grad_norm": 1.1782112472479447, "learning_rate": 1.989123249870017e-05, "loss": 0.9699, "step": 845 }, { "epoch": 0.07575130113604567, "grad_norm": 1.16077616347798, "learning_rate": 1.989080548191817e-05, "loss": 0.9116, "step": 846 }, { "epoch": 0.07584084168112373, "grad_norm": 0.9522964757493606, "learning_rate": 1.989037763315384e-05, "loss": 0.9472, "step": 847 }, { "epoch": 0.0759303822262018, "grad_norm": 1.398726611683997, "learning_rate": 1.9889948952443174e-05, "loss": 0.9338, "step": 848 }, { "epoch": 0.07601992277127988, "grad_norm": 1.1083501916564726, "learning_rate": 1.9889519439822232e-05, "loss": 0.8741, "step": 849 }, { "epoch": 0.07610946331635794, "grad_norm": 1.0855537638641066, "learning_rate": 1.9889089095327143e-05, "loss": 0.9401, "step": 850 }, { "epoch": 0.07619900386143601, "grad_norm": 1.0898070345910735, "learning_rate": 1.9888657918994102e-05, "loss": 0.9068, "step": 851 }, { "epoch": 0.07628854440651407, "grad_norm": 1.0136753208067075, "learning_rate": 1.9888225910859386e-05, "loss": 0.8943, "step": 852 }, { "epoch": 0.07637808495159214, "grad_norm": 1.072756220801226, "learning_rate": 1.9887793070959325e-05, "loss": 0.9233, "step": 853 }, { "epoch": 0.07646762549667022, "grad_norm": 0.9978351664243232, "learning_rate": 1.9887359399330335e-05, "loss": 0.8617, "step": 854 }, { "epoch": 0.07655716604174828, "grad_norm": 1.0893068667969263, "learning_rate": 1.9886924896008887e-05, "loss": 0.9063, "step": 855 }, { "epoch": 0.07664670658682635, "grad_norm": 1.0421004835075962, "learning_rate": 1.988648956103154e-05, "loss": 0.9575, "step": 856 }, { "epoch": 0.07673624713190441, "grad_norm": 0.9726347044403362, "learning_rate": 1.988605339443491e-05, "loss": 0.8479, "step": 857 }, { "epoch": 0.07682578767698249, "grad_norm": 0.9524692222340961, "learning_rate": 1.988561639625568e-05, "loss": 0.93, "step": 858 }, { "epoch": 0.07691532822206056, "grad_norm": 1.1265438692635157, "learning_rate": 1.9885178566530615e-05, "loss": 0.9638, "step": 859 }, { "epoch": 0.07700486876713862, "grad_norm": 0.9978472712549302, "learning_rate": 1.988473990529654e-05, "loss": 0.9611, "step": 860 }, { "epoch": 0.07709440931221669, "grad_norm": 1.2762091930965278, "learning_rate": 1.9884300412590357e-05, "loss": 0.9169, "step": 861 }, { "epoch": 0.07718394985729475, "grad_norm": 1.0698741410776418, "learning_rate": 1.9883860088449035e-05, "loss": 0.9678, "step": 862 }, { "epoch": 0.07727349040237283, "grad_norm": 1.0051623823314215, "learning_rate": 1.988341893290961e-05, "loss": 0.8831, "step": 863 }, { "epoch": 0.0773630309474509, "grad_norm": 1.058636369730302, "learning_rate": 1.9882976946009188e-05, "loss": 0.9564, "step": 864 }, { "epoch": 0.07745257149252896, "grad_norm": 1.2072745181127453, "learning_rate": 1.9882534127784954e-05, "loss": 0.9106, "step": 865 }, { "epoch": 0.07754211203760702, "grad_norm": 1.154359450731333, "learning_rate": 1.9882090478274155e-05, "loss": 0.9099, "step": 866 }, { "epoch": 0.0776316525826851, "grad_norm": 1.1964280560949228, "learning_rate": 1.9881645997514103e-05, "loss": 1.0146, "step": 867 }, { "epoch": 0.07772119312776317, "grad_norm": 1.0275174830566007, "learning_rate": 1.9881200685542194e-05, "loss": 0.944, "step": 868 }, { "epoch": 0.07781073367284123, "grad_norm": 1.107584853628805, "learning_rate": 1.9880754542395883e-05, "loss": 0.9567, "step": 869 }, { "epoch": 0.0779002742179193, "grad_norm": 0.9936334902527764, "learning_rate": 1.98803075681127e-05, "loss": 0.9024, "step": 870 }, { "epoch": 0.07798981476299736, "grad_norm": 1.1440551976412965, "learning_rate": 1.9879859762730242e-05, "loss": 0.9362, "step": 871 }, { "epoch": 0.07807935530807544, "grad_norm": 1.0233180174699994, "learning_rate": 1.987941112628617e-05, "loss": 0.9495, "step": 872 }, { "epoch": 0.07816889585315351, "grad_norm": 0.9609663633090297, "learning_rate": 1.9878961658818232e-05, "loss": 0.8511, "step": 873 }, { "epoch": 0.07825843639823157, "grad_norm": 1.0342393719877254, "learning_rate": 1.9878511360364234e-05, "loss": 0.8937, "step": 874 }, { "epoch": 0.07834797694330964, "grad_norm": 0.954815428892118, "learning_rate": 1.9878060230962052e-05, "loss": 0.9195, "step": 875 }, { "epoch": 0.07843751748838772, "grad_norm": 1.2511537339138603, "learning_rate": 1.987760827064963e-05, "loss": 0.9031, "step": 876 }, { "epoch": 0.07852705803346578, "grad_norm": 1.149015218587212, "learning_rate": 1.9877155479464986e-05, "loss": 0.934, "step": 877 }, { "epoch": 0.07861659857854385, "grad_norm": 0.9973306217903806, "learning_rate": 1.9876701857446216e-05, "loss": 0.8586, "step": 878 }, { "epoch": 0.07870613912362191, "grad_norm": 1.091318391136101, "learning_rate": 1.9876247404631467e-05, "loss": 0.9178, "step": 879 }, { "epoch": 0.07879567966869998, "grad_norm": 1.0190097633580029, "learning_rate": 1.987579212105897e-05, "loss": 0.9068, "step": 880 }, { "epoch": 0.07888522021377806, "grad_norm": 0.9919129161513033, "learning_rate": 1.9875336006767022e-05, "loss": 0.8158, "step": 881 }, { "epoch": 0.07897476075885612, "grad_norm": 1.1200371529016633, "learning_rate": 1.9874879061793992e-05, "loss": 0.9459, "step": 882 }, { "epoch": 0.07906430130393419, "grad_norm": 0.9482865080430126, "learning_rate": 1.9874421286178312e-05, "loss": 0.9465, "step": 883 }, { "epoch": 0.07915384184901225, "grad_norm": 1.0414767471873674, "learning_rate": 1.9873962679958496e-05, "loss": 0.8941, "step": 884 }, { "epoch": 0.07924338239409033, "grad_norm": 1.1940635940311715, "learning_rate": 1.987350324317311e-05, "loss": 0.8841, "step": 885 }, { "epoch": 0.0793329229391684, "grad_norm": 1.1039658120559788, "learning_rate": 1.987304297586081e-05, "loss": 0.9095, "step": 886 }, { "epoch": 0.07942246348424646, "grad_norm": 1.1060534646017763, "learning_rate": 1.9872581878060308e-05, "loss": 0.937, "step": 887 }, { "epoch": 0.07951200402932453, "grad_norm": 1.095542849670436, "learning_rate": 1.9872119949810388e-05, "loss": 0.9469, "step": 888 }, { "epoch": 0.07960154457440259, "grad_norm": 1.3756595992905571, "learning_rate": 1.987165719114991e-05, "loss": 0.8979, "step": 889 }, { "epoch": 0.07969108511948067, "grad_norm": 1.0765276310332224, "learning_rate": 1.9871193602117797e-05, "loss": 0.9172, "step": 890 }, { "epoch": 0.07978062566455874, "grad_norm": 1.0442086599616847, "learning_rate": 1.9870729182753042e-05, "loss": 0.8942, "step": 891 }, { "epoch": 0.0798701662096368, "grad_norm": 1.0289097323592844, "learning_rate": 1.987026393309472e-05, "loss": 0.966, "step": 892 }, { "epoch": 0.07995970675471487, "grad_norm": 1.1004293616793352, "learning_rate": 1.986979785318196e-05, "loss": 0.9621, "step": 893 }, { "epoch": 0.08004924729979294, "grad_norm": 0.97487133086226, "learning_rate": 1.9869330943053963e-05, "loss": 0.9094, "step": 894 }, { "epoch": 0.08013878784487101, "grad_norm": 0.9922116079755481, "learning_rate": 1.9868863202750012e-05, "loss": 0.9102, "step": 895 }, { "epoch": 0.08022832838994907, "grad_norm": 0.9995009918783716, "learning_rate": 1.9868394632309443e-05, "loss": 0.9394, "step": 896 }, { "epoch": 0.08031786893502714, "grad_norm": 1.3502338924070718, "learning_rate": 1.986792523177168e-05, "loss": 0.9702, "step": 897 }, { "epoch": 0.0804074094801052, "grad_norm": 1.0314188606483061, "learning_rate": 1.9867455001176203e-05, "loss": 0.9697, "step": 898 }, { "epoch": 0.08049695002518328, "grad_norm": 1.01381117466966, "learning_rate": 1.9866983940562564e-05, "loss": 0.9302, "step": 899 }, { "epoch": 0.08058649057026135, "grad_norm": 1.0049766874986243, "learning_rate": 1.9866512049970393e-05, "loss": 0.8975, "step": 900 }, { "epoch": 0.08067603111533941, "grad_norm": 1.0668122923505434, "learning_rate": 1.9866039329439376e-05, "loss": 0.9539, "step": 901 }, { "epoch": 0.08076557166041748, "grad_norm": 1.0762283633960257, "learning_rate": 1.9865565779009282e-05, "loss": 0.891, "step": 902 }, { "epoch": 0.08085511220549556, "grad_norm": 1.157596894027934, "learning_rate": 1.986509139871995e-05, "loss": 0.9734, "step": 903 }, { "epoch": 0.08094465275057362, "grad_norm": 0.9713434745409791, "learning_rate": 1.9864616188611273e-05, "loss": 0.8772, "step": 904 }, { "epoch": 0.08103419329565169, "grad_norm": 1.0674147217625458, "learning_rate": 1.9864140148723225e-05, "loss": 0.9998, "step": 905 }, { "epoch": 0.08112373384072975, "grad_norm": 1.110490493275662, "learning_rate": 1.986366327909585e-05, "loss": 0.9432, "step": 906 }, { "epoch": 0.08121327438580782, "grad_norm": 1.0269226247158223, "learning_rate": 1.986318557976927e-05, "loss": 0.8966, "step": 907 }, { "epoch": 0.0813028149308859, "grad_norm": 0.9354483742041779, "learning_rate": 1.986270705078366e-05, "loss": 0.8899, "step": 908 }, { "epoch": 0.08139235547596396, "grad_norm": 1.2840740644893676, "learning_rate": 1.9862227692179266e-05, "loss": 0.9791, "step": 909 }, { "epoch": 0.08148189602104203, "grad_norm": 1.1052827689776052, "learning_rate": 1.9861747503996423e-05, "loss": 0.8913, "step": 910 }, { "epoch": 0.08157143656612009, "grad_norm": 1.0484833184344298, "learning_rate": 1.9861266486275516e-05, "loss": 0.9623, "step": 911 }, { "epoch": 0.08166097711119816, "grad_norm": 0.9624154642879219, "learning_rate": 1.9860784639057e-05, "loss": 0.9145, "step": 912 }, { "epoch": 0.08175051765627624, "grad_norm": 1.1638598847203674, "learning_rate": 1.986030196238142e-05, "loss": 0.8488, "step": 913 }, { "epoch": 0.0818400582013543, "grad_norm": 1.1421538146287942, "learning_rate": 1.985981845628937e-05, "loss": 0.8823, "step": 914 }, { "epoch": 0.08192959874643237, "grad_norm": 0.9717725518722011, "learning_rate": 1.985933412082152e-05, "loss": 0.9264, "step": 915 }, { "epoch": 0.08201913929151043, "grad_norm": 1.0125066079262945, "learning_rate": 1.9858848956018615e-05, "loss": 0.9675, "step": 916 }, { "epoch": 0.08210867983658851, "grad_norm": 0.9626661384813827, "learning_rate": 1.985836296192146e-05, "loss": 0.8714, "step": 917 }, { "epoch": 0.08219822038166658, "grad_norm": 1.1893799942898362, "learning_rate": 1.985787613857094e-05, "loss": 0.9077, "step": 918 }, { "epoch": 0.08228776092674464, "grad_norm": 0.995297789501123, "learning_rate": 1.9857388486008e-05, "loss": 0.9568, "step": 919 }, { "epoch": 0.0823773014718227, "grad_norm": 1.2055443657267433, "learning_rate": 1.9856900004273667e-05, "loss": 0.9707, "step": 920 }, { "epoch": 0.08246684201690077, "grad_norm": 1.1798788577781218, "learning_rate": 1.9856410693409027e-05, "loss": 0.8836, "step": 921 }, { "epoch": 0.08255638256197885, "grad_norm": 0.9974944211547258, "learning_rate": 1.9855920553455233e-05, "loss": 0.97, "step": 922 }, { "epoch": 0.08264592310705692, "grad_norm": 1.0293306086586178, "learning_rate": 1.9855429584453525e-05, "loss": 0.78, "step": 923 }, { "epoch": 0.08273546365213498, "grad_norm": 1.0079138553253082, "learning_rate": 1.9854937786445195e-05, "loss": 0.9556, "step": 924 }, { "epoch": 0.08282500419721305, "grad_norm": 1.0546394442349447, "learning_rate": 1.9854445159471612e-05, "loss": 0.9234, "step": 925 }, { "epoch": 0.08291454474229112, "grad_norm": 0.9751918705661583, "learning_rate": 1.9853951703574212e-05, "loss": 0.8623, "step": 926 }, { "epoch": 0.08300408528736919, "grad_norm": 1.027224139591515, "learning_rate": 1.985345741879451e-05, "loss": 0.8928, "step": 927 }, { "epoch": 0.08309362583244725, "grad_norm": 1.2112814301711503, "learning_rate": 1.985296230517408e-05, "loss": 0.9415, "step": 928 }, { "epoch": 0.08318316637752532, "grad_norm": 1.0106080884570308, "learning_rate": 1.9852466362754566e-05, "loss": 0.8633, "step": 929 }, { "epoch": 0.08327270692260338, "grad_norm": 0.9541153973238627, "learning_rate": 1.9851969591577688e-05, "loss": 0.9079, "step": 930 }, { "epoch": 0.08336224746768146, "grad_norm": 1.0439510120663027, "learning_rate": 1.985147199168523e-05, "loss": 0.8783, "step": 931 }, { "epoch": 0.08345178801275953, "grad_norm": 0.9664476396941193, "learning_rate": 1.9850973563119057e-05, "loss": 0.8979, "step": 932 }, { "epoch": 0.0835413285578376, "grad_norm": 0.9895526614269158, "learning_rate": 1.9850474305921085e-05, "loss": 0.9285, "step": 933 }, { "epoch": 0.08363086910291566, "grad_norm": 0.9934600739152486, "learning_rate": 1.984997422013332e-05, "loss": 0.9336, "step": 934 }, { "epoch": 0.08372040964799374, "grad_norm": 0.94432367653176, "learning_rate": 1.9849473305797816e-05, "loss": 0.9344, "step": 935 }, { "epoch": 0.0838099501930718, "grad_norm": 0.9718989759799674, "learning_rate": 1.9848971562956714e-05, "loss": 0.9661, "step": 936 }, { "epoch": 0.08389949073814987, "grad_norm": 1.103217733103473, "learning_rate": 1.9848468991652223e-05, "loss": 0.9605, "step": 937 }, { "epoch": 0.08398903128322793, "grad_norm": 1.0776765612303294, "learning_rate": 1.984796559192661e-05, "loss": 0.9576, "step": 938 }, { "epoch": 0.084078571828306, "grad_norm": 1.0640643135955883, "learning_rate": 1.9847461363822226e-05, "loss": 0.9284, "step": 939 }, { "epoch": 0.08416811237338408, "grad_norm": 1.0498269360731822, "learning_rate": 1.9846956307381478e-05, "loss": 0.9225, "step": 940 }, { "epoch": 0.08425765291846214, "grad_norm": 1.0038722167931113, "learning_rate": 1.9846450422646856e-05, "loss": 0.8449, "step": 941 }, { "epoch": 0.08434719346354021, "grad_norm": 1.1707854737741155, "learning_rate": 1.984594370966091e-05, "loss": 0.8808, "step": 942 }, { "epoch": 0.08443673400861827, "grad_norm": 1.0075888082366793, "learning_rate": 1.9845436168466268e-05, "loss": 0.8595, "step": 943 }, { "epoch": 0.08452627455369635, "grad_norm": 1.0611600414896925, "learning_rate": 1.9844927799105615e-05, "loss": 0.966, "step": 944 }, { "epoch": 0.08461581509877442, "grad_norm": 1.2655662234817153, "learning_rate": 1.9844418601621717e-05, "loss": 0.8728, "step": 945 }, { "epoch": 0.08470535564385248, "grad_norm": 0.9937763950261357, "learning_rate": 1.984390857605741e-05, "loss": 0.8802, "step": 946 }, { "epoch": 0.08479489618893055, "grad_norm": 1.1086121999821106, "learning_rate": 1.984339772245559e-05, "loss": 0.9178, "step": 947 }, { "epoch": 0.08488443673400861, "grad_norm": 1.152711637981438, "learning_rate": 1.9842886040859227e-05, "loss": 0.8792, "step": 948 }, { "epoch": 0.08497397727908669, "grad_norm": 0.9684161167811586, "learning_rate": 1.9842373531311368e-05, "loss": 0.8956, "step": 949 }, { "epoch": 0.08506351782416476, "grad_norm": 0.9779956833136649, "learning_rate": 1.9841860193855123e-05, "loss": 0.8916, "step": 950 }, { "epoch": 0.08515305836924282, "grad_norm": 0.9926653513776225, "learning_rate": 1.984134602853367e-05, "loss": 0.9212, "step": 951 }, { "epoch": 0.08524259891432089, "grad_norm": 1.0049715602180254, "learning_rate": 1.9840831035390256e-05, "loss": 0.9167, "step": 952 }, { "epoch": 0.08533213945939896, "grad_norm": 0.9951534227797122, "learning_rate": 1.9840315214468205e-05, "loss": 0.9622, "step": 953 }, { "epoch": 0.08542168000447703, "grad_norm": 0.9840824273732606, "learning_rate": 1.9839798565810904e-05, "loss": 0.9117, "step": 954 }, { "epoch": 0.0855112205495551, "grad_norm": 1.1096416555984059, "learning_rate": 1.9839281089461814e-05, "loss": 0.9509, "step": 955 }, { "epoch": 0.08560076109463316, "grad_norm": 0.9371054583857013, "learning_rate": 1.9838762785464463e-05, "loss": 0.8573, "step": 956 }, { "epoch": 0.08569030163971122, "grad_norm": 1.0397168721562617, "learning_rate": 1.9838243653862445e-05, "loss": 0.8528, "step": 957 }, { "epoch": 0.0857798421847893, "grad_norm": 0.9974252072131928, "learning_rate": 1.9837723694699433e-05, "loss": 0.9024, "step": 958 }, { "epoch": 0.08586938272986737, "grad_norm": 0.9269167540838222, "learning_rate": 1.9837202908019163e-05, "loss": 0.8868, "step": 959 }, { "epoch": 0.08595892327494543, "grad_norm": 1.026676707058281, "learning_rate": 1.9836681293865437e-05, "loss": 0.9318, "step": 960 }, { "epoch": 0.0860484638200235, "grad_norm": 1.0231294889452878, "learning_rate": 1.983615885228214e-05, "loss": 0.8848, "step": 961 }, { "epoch": 0.08613800436510158, "grad_norm": 1.0344469809116108, "learning_rate": 1.983563558331321e-05, "loss": 0.8844, "step": 962 }, { "epoch": 0.08622754491017964, "grad_norm": 0.9965134612996915, "learning_rate": 1.983511148700267e-05, "loss": 0.8785, "step": 963 }, { "epoch": 0.08631708545525771, "grad_norm": 1.0534718798171145, "learning_rate": 1.9834586563394597e-05, "loss": 1.0106, "step": 964 }, { "epoch": 0.08640662600033577, "grad_norm": 1.0481195463158086, "learning_rate": 1.9834060812533154e-05, "loss": 0.9701, "step": 965 }, { "epoch": 0.08649616654541384, "grad_norm": 0.9899595228445589, "learning_rate": 1.9833534234462557e-05, "loss": 0.8952, "step": 966 }, { "epoch": 0.08658570709049192, "grad_norm": 1.0868492373640468, "learning_rate": 1.983300682922711e-05, "loss": 0.9329, "step": 967 }, { "epoch": 0.08667524763556998, "grad_norm": 1.2262011641275323, "learning_rate": 1.983247859687117e-05, "loss": 0.9234, "step": 968 }, { "epoch": 0.08676478818064805, "grad_norm": 1.0772032862620653, "learning_rate": 1.983194953743917e-05, "loss": 0.8953, "step": 969 }, { "epoch": 0.08685432872572611, "grad_norm": 1.0675983895005767, "learning_rate": 1.9831419650975615e-05, "loss": 0.9733, "step": 970 }, { "epoch": 0.08694386927080419, "grad_norm": 1.2501123395676448, "learning_rate": 1.9830888937525076e-05, "loss": 0.9397, "step": 971 }, { "epoch": 0.08703340981588226, "grad_norm": 1.095557769603638, "learning_rate": 1.9830357397132195e-05, "loss": 0.9316, "step": 972 }, { "epoch": 0.08712295036096032, "grad_norm": 1.0971694381241326, "learning_rate": 1.9829825029841685e-05, "loss": 0.9679, "step": 973 }, { "epoch": 0.08721249090603839, "grad_norm": 1.0667984812123603, "learning_rate": 1.9829291835698327e-05, "loss": 0.8779, "step": 974 }, { "epoch": 0.08730203145111645, "grad_norm": 1.521079951676263, "learning_rate": 1.982875781474697e-05, "loss": 0.9467, "step": 975 }, { "epoch": 0.08739157199619453, "grad_norm": 1.3260451496365067, "learning_rate": 1.9828222967032533e-05, "loss": 0.887, "step": 976 }, { "epoch": 0.0874811125412726, "grad_norm": 1.0134691260707183, "learning_rate": 1.982768729260001e-05, "loss": 0.9104, "step": 977 }, { "epoch": 0.08757065308635066, "grad_norm": 0.9913957574891373, "learning_rate": 1.9827150791494456e-05, "loss": 0.8669, "step": 978 }, { "epoch": 0.08766019363142873, "grad_norm": 1.1784309301231677, "learning_rate": 1.9826613463761e-05, "loss": 0.9756, "step": 979 }, { "epoch": 0.0877497341765068, "grad_norm": 1.1074836383849385, "learning_rate": 1.9826075309444844e-05, "loss": 0.8943, "step": 980 }, { "epoch": 0.08783927472158487, "grad_norm": 1.1385921601241735, "learning_rate": 1.982553632859125e-05, "loss": 0.9255, "step": 981 }, { "epoch": 0.08792881526666294, "grad_norm": 1.0959945777606708, "learning_rate": 1.982499652124556e-05, "loss": 0.9173, "step": 982 }, { "epoch": 0.088018355811741, "grad_norm": 0.9969992545068587, "learning_rate": 1.9824455887453183e-05, "loss": 0.9662, "step": 983 }, { "epoch": 0.08810789635681907, "grad_norm": 1.0181188967985257, "learning_rate": 1.9823914427259587e-05, "loss": 0.9067, "step": 984 }, { "epoch": 0.08819743690189714, "grad_norm": 1.0875032959411282, "learning_rate": 1.9823372140710323e-05, "loss": 0.9194, "step": 985 }, { "epoch": 0.08828697744697521, "grad_norm": 1.0299510514153334, "learning_rate": 1.9822829027851008e-05, "loss": 0.8837, "step": 986 }, { "epoch": 0.08837651799205327, "grad_norm": 1.0798399483776007, "learning_rate": 1.9822285088727325e-05, "loss": 0.85, "step": 987 }, { "epoch": 0.08846605853713134, "grad_norm": 1.1956424938804886, "learning_rate": 1.982174032338503e-05, "loss": 0.9412, "step": 988 }, { "epoch": 0.08855559908220942, "grad_norm": 1.0271872795928232, "learning_rate": 1.982119473186994e-05, "loss": 0.9272, "step": 989 }, { "epoch": 0.08864513962728748, "grad_norm": 0.9585939455906575, "learning_rate": 1.9820648314227955e-05, "loss": 0.8847, "step": 990 }, { "epoch": 0.08873468017236555, "grad_norm": 1.0072877375774447, "learning_rate": 1.9820101070505037e-05, "loss": 0.9752, "step": 991 }, { "epoch": 0.08882422071744361, "grad_norm": 0.9466452666562966, "learning_rate": 1.981955300074722e-05, "loss": 0.9197, "step": 992 }, { "epoch": 0.08891376126252168, "grad_norm": 1.0779060526672624, "learning_rate": 1.98190041050006e-05, "loss": 0.9326, "step": 993 }, { "epoch": 0.08900330180759976, "grad_norm": 1.110192928947384, "learning_rate": 1.9818454383311354e-05, "loss": 0.9118, "step": 994 }, { "epoch": 0.08909284235267782, "grad_norm": 1.079618550688469, "learning_rate": 1.9817903835725722e-05, "loss": 0.9472, "step": 995 }, { "epoch": 0.08918238289775589, "grad_norm": 1.1231999133280581, "learning_rate": 1.981735246229001e-05, "loss": 0.8823, "step": 996 }, { "epoch": 0.08927192344283395, "grad_norm": 0.9555806211868249, "learning_rate": 1.98168002630506e-05, "loss": 0.9189, "step": 997 }, { "epoch": 0.08936146398791203, "grad_norm": 1.0599882822921154, "learning_rate": 1.9816247238053945e-05, "loss": 0.8937, "step": 998 }, { "epoch": 0.0894510045329901, "grad_norm": 1.0330507579481685, "learning_rate": 1.981569338734656e-05, "loss": 0.8559, "step": 999 }, { "epoch": 0.08954054507806816, "grad_norm": 1.0274329533470439, "learning_rate": 1.9815138710975034e-05, "loss": 0.9322, "step": 1000 }, { "epoch": 0.08963008562314623, "grad_norm": 1.0234300158613456, "learning_rate": 1.9814583208986025e-05, "loss": 0.8729, "step": 1001 }, { "epoch": 0.08971962616822429, "grad_norm": 0.9421514095390102, "learning_rate": 1.9814026881426257e-05, "loss": 0.9092, "step": 1002 }, { "epoch": 0.08980916671330237, "grad_norm": 1.2209412227878513, "learning_rate": 1.9813469728342528e-05, "loss": 0.959, "step": 1003 }, { "epoch": 0.08989870725838044, "grad_norm": 1.1655560856800042, "learning_rate": 1.9812911749781708e-05, "loss": 0.9118, "step": 1004 }, { "epoch": 0.0899882478034585, "grad_norm": 1.0597807712499405, "learning_rate": 1.9812352945790727e-05, "loss": 0.9842, "step": 1005 }, { "epoch": 0.09007778834853657, "grad_norm": 1.0958791052542398, "learning_rate": 1.981179331641659e-05, "loss": 0.9567, "step": 1006 }, { "epoch": 0.09016732889361465, "grad_norm": 1.046416417326152, "learning_rate": 1.9811232861706375e-05, "loss": 0.8955, "step": 1007 }, { "epoch": 0.09025686943869271, "grad_norm": 1.0485843963938237, "learning_rate": 1.9810671581707223e-05, "loss": 0.9137, "step": 1008 }, { "epoch": 0.09034640998377078, "grad_norm": 0.9669841723056106, "learning_rate": 1.981010947646635e-05, "loss": 0.9143, "step": 1009 }, { "epoch": 0.09043595052884884, "grad_norm": 1.4164449333950715, "learning_rate": 1.9809546546031034e-05, "loss": 0.9608, "step": 1010 }, { "epoch": 0.0905254910739269, "grad_norm": 1.2457272570540057, "learning_rate": 1.9808982790448626e-05, "loss": 0.8898, "step": 1011 }, { "epoch": 0.09061503161900498, "grad_norm": 1.1020328166852127, "learning_rate": 1.9808418209766555e-05, "loss": 0.9283, "step": 1012 }, { "epoch": 0.09070457216408305, "grad_norm": 1.415791903060292, "learning_rate": 1.9807852804032306e-05, "loss": 0.9224, "step": 1013 }, { "epoch": 0.09079411270916111, "grad_norm": 0.9646842119953011, "learning_rate": 1.980728657329344e-05, "loss": 0.8747, "step": 1014 }, { "epoch": 0.09088365325423918, "grad_norm": 1.0564722822310322, "learning_rate": 1.9806719517597585e-05, "loss": 0.981, "step": 1015 }, { "epoch": 0.09097319379931726, "grad_norm": 0.9845211264969549, "learning_rate": 1.9806151636992442e-05, "loss": 0.9722, "step": 1016 }, { "epoch": 0.09106273434439532, "grad_norm": 1.0403992744328583, "learning_rate": 1.9805582931525778e-05, "loss": 0.9104, "step": 1017 }, { "epoch": 0.09115227488947339, "grad_norm": 1.0133472160579726, "learning_rate": 1.9805013401245433e-05, "loss": 0.8911, "step": 1018 }, { "epoch": 0.09124181543455145, "grad_norm": 1.2142596298266948, "learning_rate": 1.9804443046199312e-05, "loss": 0.9061, "step": 1019 }, { "epoch": 0.09133135597962952, "grad_norm": 0.9486957964706776, "learning_rate": 1.9803871866435388e-05, "loss": 0.9635, "step": 1020 }, { "epoch": 0.0914208965247076, "grad_norm": 0.9151812311535943, "learning_rate": 1.9803299862001718e-05, "loss": 0.8816, "step": 1021 }, { "epoch": 0.09151043706978566, "grad_norm": 1.1240229704895737, "learning_rate": 1.9802727032946403e-05, "loss": 0.9645, "step": 1022 }, { "epoch": 0.09159997761486373, "grad_norm": 1.058999213458839, "learning_rate": 1.9802153379317637e-05, "loss": 0.9191, "step": 1023 }, { "epoch": 0.0916895181599418, "grad_norm": 1.1658814233499575, "learning_rate": 1.9801578901163672e-05, "loss": 0.944, "step": 1024 }, { "epoch": 0.09177905870501987, "grad_norm": 1.0605232071772988, "learning_rate": 1.9801003598532827e-05, "loss": 0.7986, "step": 1025 }, { "epoch": 0.09186859925009794, "grad_norm": 0.955664807178467, "learning_rate": 1.98004274714735e-05, "loss": 0.8903, "step": 1026 }, { "epoch": 0.091958139795176, "grad_norm": 1.0847315551485432, "learning_rate": 1.9799850520034153e-05, "loss": 0.902, "step": 1027 }, { "epoch": 0.09204768034025407, "grad_norm": 0.9939200285737662, "learning_rate": 1.9799272744263313e-05, "loss": 0.9115, "step": 1028 }, { "epoch": 0.09213722088533213, "grad_norm": 1.0532498032742155, "learning_rate": 1.979869414420958e-05, "loss": 0.9297, "step": 1029 }, { "epoch": 0.09222676143041021, "grad_norm": 0.9622891522514779, "learning_rate": 1.9798114719921628e-05, "loss": 0.8777, "step": 1030 }, { "epoch": 0.09231630197548828, "grad_norm": 1.0012282236948824, "learning_rate": 1.9797534471448196e-05, "loss": 0.8606, "step": 1031 }, { "epoch": 0.09240584252056634, "grad_norm": 1.0312431295016427, "learning_rate": 1.9796953398838093e-05, "loss": 0.9968, "step": 1032 }, { "epoch": 0.09249538306564441, "grad_norm": 0.9830740030757321, "learning_rate": 1.979637150214019e-05, "loss": 0.901, "step": 1033 }, { "epoch": 0.09258492361072249, "grad_norm": 1.146771052458193, "learning_rate": 1.9795788781403447e-05, "loss": 0.8475, "step": 1034 }, { "epoch": 0.09267446415580055, "grad_norm": 0.9883362276670867, "learning_rate": 1.9795205236676865e-05, "loss": 0.9003, "step": 1035 }, { "epoch": 0.09276400470087862, "grad_norm": 1.019018445826406, "learning_rate": 1.9794620868009545e-05, "loss": 0.9499, "step": 1036 }, { "epoch": 0.09285354524595668, "grad_norm": 1.003740687539369, "learning_rate": 1.9794035675450635e-05, "loss": 0.8682, "step": 1037 }, { "epoch": 0.09294308579103475, "grad_norm": 1.1111575509548541, "learning_rate": 1.9793449659049357e-05, "loss": 0.8974, "step": 1038 }, { "epoch": 0.09303262633611283, "grad_norm": 1.086207399449774, "learning_rate": 1.979286281885501e-05, "loss": 0.8601, "step": 1039 }, { "epoch": 0.09312216688119089, "grad_norm": 0.9864269597188605, "learning_rate": 1.979227515491695e-05, "loss": 0.9164, "step": 1040 }, { "epoch": 0.09321170742626896, "grad_norm": 1.09340168363352, "learning_rate": 1.9791686667284618e-05, "loss": 0.8485, "step": 1041 }, { "epoch": 0.09330124797134702, "grad_norm": 1.0355627750293275, "learning_rate": 1.9791097356007513e-05, "loss": 0.907, "step": 1042 }, { "epoch": 0.0933907885164251, "grad_norm": 1.090475822875002, "learning_rate": 1.9790507221135202e-05, "loss": 0.9047, "step": 1043 }, { "epoch": 0.09348032906150316, "grad_norm": 1.2004433326352437, "learning_rate": 1.978991626271733e-05, "loss": 0.947, "step": 1044 }, { "epoch": 0.09356986960658123, "grad_norm": 1.0894461996314844, "learning_rate": 1.9789324480803605e-05, "loss": 0.9804, "step": 1045 }, { "epoch": 0.0936594101516593, "grad_norm": 1.0098904594767402, "learning_rate": 1.9788731875443802e-05, "loss": 0.8698, "step": 1046 }, { "epoch": 0.09374895069673736, "grad_norm": 1.0577728695864634, "learning_rate": 1.9788138446687773e-05, "loss": 0.8765, "step": 1047 }, { "epoch": 0.09383849124181544, "grad_norm": 1.2769421847128455, "learning_rate": 1.9787544194585437e-05, "loss": 0.9203, "step": 1048 }, { "epoch": 0.0939280317868935, "grad_norm": 0.9849360932620246, "learning_rate": 1.9786949119186774e-05, "loss": 0.9374, "step": 1049 }, { "epoch": 0.09401757233197157, "grad_norm": 1.0576888252311865, "learning_rate": 1.978635322054185e-05, "loss": 0.9129, "step": 1050 }, { "epoch": 0.09410711287704963, "grad_norm": 1.0051680354901051, "learning_rate": 1.9785756498700777e-05, "loss": 0.9505, "step": 1051 }, { "epoch": 0.09419665342212771, "grad_norm": 0.952109883653877, "learning_rate": 1.978515895371376e-05, "loss": 0.8648, "step": 1052 }, { "epoch": 0.09428619396720578, "grad_norm": 1.0794268734044075, "learning_rate": 1.9784560585631056e-05, "loss": 0.9103, "step": 1053 }, { "epoch": 0.09437573451228384, "grad_norm": 1.0076834506740375, "learning_rate": 1.9783961394503006e-05, "loss": 0.9404, "step": 1054 }, { "epoch": 0.09446527505736191, "grad_norm": 1.1272984789338714, "learning_rate": 1.978336138038e-05, "loss": 0.9581, "step": 1055 }, { "epoch": 0.09455481560243997, "grad_norm": 1.0641271107973076, "learning_rate": 1.9782760543312516e-05, "loss": 0.8743, "step": 1056 }, { "epoch": 0.09464435614751805, "grad_norm": 1.075565797992441, "learning_rate": 1.97821588833511e-05, "loss": 0.9017, "step": 1057 }, { "epoch": 0.09473389669259612, "grad_norm": 1.0516850040224077, "learning_rate": 1.9781556400546353e-05, "loss": 0.9039, "step": 1058 }, { "epoch": 0.09482343723767418, "grad_norm": 1.0503580297337258, "learning_rate": 1.978095309494896e-05, "loss": 0.9712, "step": 1059 }, { "epoch": 0.09491297778275225, "grad_norm": 0.9516708996642765, "learning_rate": 1.978034896660966e-05, "loss": 0.9009, "step": 1060 }, { "epoch": 0.09500251832783033, "grad_norm": 1.910174287334749, "learning_rate": 1.9779744015579277e-05, "loss": 0.9339, "step": 1061 }, { "epoch": 0.09509205887290839, "grad_norm": 1.034851018690442, "learning_rate": 1.97791382419087e-05, "loss": 0.8583, "step": 1062 }, { "epoch": 0.09518159941798646, "grad_norm": 0.9734652060442432, "learning_rate": 1.977853164564888e-05, "loss": 0.8798, "step": 1063 }, { "epoch": 0.09527113996306452, "grad_norm": 0.996597694037772, "learning_rate": 1.9777924226850844e-05, "loss": 1.0061, "step": 1064 }, { "epoch": 0.09536068050814259, "grad_norm": 0.9819965332412642, "learning_rate": 1.9777315985565683e-05, "loss": 0.9103, "step": 1065 }, { "epoch": 0.09545022105322067, "grad_norm": 1.0127562631858829, "learning_rate": 1.9776706921844564e-05, "loss": 0.888, "step": 1066 }, { "epoch": 0.09553976159829873, "grad_norm": 0.9380302714651403, "learning_rate": 1.977609703573872e-05, "loss": 0.8817, "step": 1067 }, { "epoch": 0.0956293021433768, "grad_norm": 0.9862625751950775, "learning_rate": 1.977548632729945e-05, "loss": 0.9401, "step": 1068 }, { "epoch": 0.09571884268845486, "grad_norm": 1.1309850156196304, "learning_rate": 1.9774874796578124e-05, "loss": 0.9306, "step": 1069 }, { "epoch": 0.09580838323353294, "grad_norm": 0.9895531720071091, "learning_rate": 1.9774262443626185e-05, "loss": 0.9768, "step": 1070 }, { "epoch": 0.095897923778611, "grad_norm": 1.1417478118624353, "learning_rate": 1.977364926849514e-05, "loss": 0.9471, "step": 1071 }, { "epoch": 0.09598746432368907, "grad_norm": 1.0980272794419574, "learning_rate": 1.9773035271236566e-05, "loss": 0.9188, "step": 1072 }, { "epoch": 0.09607700486876714, "grad_norm": 0.9888380867734908, "learning_rate": 1.9772420451902115e-05, "loss": 0.8634, "step": 1073 }, { "epoch": 0.0961665454138452, "grad_norm": 1.037767551312556, "learning_rate": 1.97718048105435e-05, "loss": 0.8821, "step": 1074 }, { "epoch": 0.09625608595892328, "grad_norm": 1.066034715830074, "learning_rate": 1.977118834721251e-05, "loss": 0.9147, "step": 1075 }, { "epoch": 0.09634562650400134, "grad_norm": 1.0316301053496342, "learning_rate": 1.9770571061960996e-05, "loss": 0.9247, "step": 1076 }, { "epoch": 0.09643516704907941, "grad_norm": 1.0474911181696074, "learning_rate": 1.9769952954840882e-05, "loss": 0.9821, "step": 1077 }, { "epoch": 0.09652470759415747, "grad_norm": 1.0092610666110011, "learning_rate": 1.9769334025904162e-05, "loss": 0.932, "step": 1078 }, { "epoch": 0.09661424813923555, "grad_norm": 1.0203325383301238, "learning_rate": 1.9768714275202903e-05, "loss": 0.8728, "step": 1079 }, { "epoch": 0.09670378868431362, "grad_norm": 1.259346019100911, "learning_rate": 1.9768093702789232e-05, "loss": 0.9368, "step": 1080 }, { "epoch": 0.09679332922939168, "grad_norm": 1.0060297315961224, "learning_rate": 1.9767472308715347e-05, "loss": 0.9569, "step": 1081 }, { "epoch": 0.09688286977446975, "grad_norm": 1.0122703628121168, "learning_rate": 1.9766850093033524e-05, "loss": 0.8733, "step": 1082 }, { "epoch": 0.09697241031954781, "grad_norm": 1.0590893057428121, "learning_rate": 1.9766227055796098e-05, "loss": 0.9278, "step": 1083 }, { "epoch": 0.09706195086462589, "grad_norm": 1.159262333022713, "learning_rate": 1.9765603197055474e-05, "loss": 0.9562, "step": 1084 }, { "epoch": 0.09715149140970396, "grad_norm": 1.0036398522477317, "learning_rate": 1.9764978516864134e-05, "loss": 0.9499, "step": 1085 }, { "epoch": 0.09724103195478202, "grad_norm": 0.9611113089961691, "learning_rate": 1.9764353015274625e-05, "loss": 0.9706, "step": 1086 }, { "epoch": 0.09733057249986009, "grad_norm": 1.0257411372994734, "learning_rate": 1.9763726692339557e-05, "loss": 0.9178, "step": 1087 }, { "epoch": 0.09742011304493817, "grad_norm": 1.0295769059231825, "learning_rate": 1.9763099548111616e-05, "loss": 0.9514, "step": 1088 }, { "epoch": 0.09750965359001623, "grad_norm": 1.1071447559979999, "learning_rate": 1.976247158264356e-05, "loss": 0.9599, "step": 1089 }, { "epoch": 0.0975991941350943, "grad_norm": 1.0219363715398784, "learning_rate": 1.9761842795988203e-05, "loss": 0.9188, "step": 1090 }, { "epoch": 0.09768873468017236, "grad_norm": 1.007994452495011, "learning_rate": 1.976121318819844e-05, "loss": 0.8456, "step": 1091 }, { "epoch": 0.09777827522525043, "grad_norm": 1.0461179140238144, "learning_rate": 1.9760582759327235e-05, "loss": 0.9205, "step": 1092 }, { "epoch": 0.0978678157703285, "grad_norm": 0.9338443115476841, "learning_rate": 1.9759951509427617e-05, "loss": 0.893, "step": 1093 }, { "epoch": 0.09795735631540657, "grad_norm": 1.0303488204276987, "learning_rate": 1.975931943855268e-05, "loss": 0.9195, "step": 1094 }, { "epoch": 0.09804689686048464, "grad_norm": 1.1332943161797837, "learning_rate": 1.97586865467556e-05, "loss": 0.9269, "step": 1095 }, { "epoch": 0.0981364374055627, "grad_norm": 0.9467693460557545, "learning_rate": 1.97580528340896e-05, "loss": 0.9311, "step": 1096 }, { "epoch": 0.09822597795064078, "grad_norm": 0.9737908555982472, "learning_rate": 1.9757418300608e-05, "loss": 0.881, "step": 1097 }, { "epoch": 0.09831551849571885, "grad_norm": 1.0222661133425628, "learning_rate": 1.9756782946364167e-05, "loss": 0.9064, "step": 1098 }, { "epoch": 0.09840505904079691, "grad_norm": 1.070488127045209, "learning_rate": 1.9756146771411548e-05, "loss": 0.8713, "step": 1099 }, { "epoch": 0.09849459958587498, "grad_norm": 1.006387573826333, "learning_rate": 1.9755509775803655e-05, "loss": 0.8702, "step": 1100 }, { "epoch": 0.09858414013095304, "grad_norm": 1.0776658103075902, "learning_rate": 1.9754871959594068e-05, "loss": 0.9183, "step": 1101 }, { "epoch": 0.09867368067603112, "grad_norm": 1.1021292886742495, "learning_rate": 1.9754233322836442e-05, "loss": 0.9987, "step": 1102 }, { "epoch": 0.09876322122110918, "grad_norm": 1.0777557693088786, "learning_rate": 1.9753593865584494e-05, "loss": 0.9454, "step": 1103 }, { "epoch": 0.09885276176618725, "grad_norm": 1.051129296962062, "learning_rate": 1.9752953587892013e-05, "loss": 0.9751, "step": 1104 }, { "epoch": 0.09894230231126531, "grad_norm": 1.064424084371974, "learning_rate": 1.9752312489812858e-05, "loss": 0.9069, "step": 1105 }, { "epoch": 0.0990318428563434, "grad_norm": 1.0717808176527297, "learning_rate": 1.975167057140096e-05, "loss": 0.935, "step": 1106 }, { "epoch": 0.09912138340142146, "grad_norm": 0.9886460816081393, "learning_rate": 1.975102783271031e-05, "loss": 0.9105, "step": 1107 }, { "epoch": 0.09921092394649952, "grad_norm": 0.9541678205642731, "learning_rate": 1.9750384273794973e-05, "loss": 0.8864, "step": 1108 }, { "epoch": 0.09930046449157759, "grad_norm": 1.036816717341618, "learning_rate": 1.9749739894709086e-05, "loss": 0.865, "step": 1109 }, { "epoch": 0.09939000503665565, "grad_norm": 1.1418725226503275, "learning_rate": 1.974909469550685e-05, "loss": 0.9053, "step": 1110 }, { "epoch": 0.09947954558173373, "grad_norm": 1.1069408548954163, "learning_rate": 1.9748448676242537e-05, "loss": 0.9581, "step": 1111 }, { "epoch": 0.0995690861268118, "grad_norm": 1.054975568639669, "learning_rate": 1.9747801836970488e-05, "loss": 0.8699, "step": 1112 }, { "epoch": 0.09965862667188986, "grad_norm": 1.0890106441783503, "learning_rate": 1.9747154177745114e-05, "loss": 0.8498, "step": 1113 }, { "epoch": 0.09974816721696793, "grad_norm": 1.1323011383718826, "learning_rate": 1.9746505698620895e-05, "loss": 0.9866, "step": 1114 }, { "epoch": 0.09983770776204601, "grad_norm": 1.0241738730740941, "learning_rate": 1.9745856399652377e-05, "loss": 0.8983, "step": 1115 }, { "epoch": 0.09992724830712407, "grad_norm": 1.1240433898033322, "learning_rate": 1.974520628089418e-05, "loss": 0.9776, "step": 1116 }, { "epoch": 0.10001678885220214, "grad_norm": 1.0650535772906575, "learning_rate": 1.9744555342400984e-05, "loss": 0.847, "step": 1117 }, { "epoch": 0.1001063293972802, "grad_norm": 0.9777548495138415, "learning_rate": 1.9743903584227546e-05, "loss": 0.9401, "step": 1118 }, { "epoch": 0.10019586994235827, "grad_norm": 0.9959217337503538, "learning_rate": 1.9743251006428693e-05, "loss": 0.9555, "step": 1119 }, { "epoch": 0.10028541048743635, "grad_norm": 0.9817017279641013, "learning_rate": 1.9742597609059317e-05, "loss": 0.8934, "step": 1120 }, { "epoch": 0.10037495103251441, "grad_norm": 0.9835651836478607, "learning_rate": 1.9741943392174377e-05, "loss": 0.88, "step": 1121 }, { "epoch": 0.10046449157759248, "grad_norm": 0.977627600993251, "learning_rate": 1.9741288355828906e-05, "loss": 0.9089, "step": 1122 }, { "epoch": 0.10055403212267054, "grad_norm": 1.131363326208526, "learning_rate": 1.9740632500078e-05, "loss": 0.9504, "step": 1123 }, { "epoch": 0.10064357266774862, "grad_norm": 1.0834079387161248, "learning_rate": 1.9739975824976832e-05, "loss": 0.9591, "step": 1124 }, { "epoch": 0.10073311321282669, "grad_norm": 0.9589963755139915, "learning_rate": 1.9739318330580637e-05, "loss": 0.9139, "step": 1125 }, { "epoch": 0.10082265375790475, "grad_norm": 0.9409126984197141, "learning_rate": 1.973866001694472e-05, "loss": 0.8619, "step": 1126 }, { "epoch": 0.10091219430298282, "grad_norm": 1.0591025019406468, "learning_rate": 1.973800088412446e-05, "loss": 0.8933, "step": 1127 }, { "epoch": 0.10100173484806088, "grad_norm": 1.0481879420224454, "learning_rate": 1.9737340932175297e-05, "loss": 0.9378, "step": 1128 }, { "epoch": 0.10109127539313896, "grad_norm": 0.9861865493268669, "learning_rate": 1.9736680161152747e-05, "loss": 0.8456, "step": 1129 }, { "epoch": 0.10118081593821703, "grad_norm": 1.034652246288241, "learning_rate": 1.9736018571112393e-05, "loss": 0.9027, "step": 1130 }, { "epoch": 0.10127035648329509, "grad_norm": 1.0819514775286645, "learning_rate": 1.973535616210988e-05, "loss": 0.9308, "step": 1131 }, { "epoch": 0.10135989702837316, "grad_norm": 1.0264013999120254, "learning_rate": 1.9734692934200936e-05, "loss": 0.9381, "step": 1132 }, { "epoch": 0.10144943757345123, "grad_norm": 0.9786435371653626, "learning_rate": 1.9734028887441336e-05, "loss": 0.9589, "step": 1133 }, { "epoch": 0.1015389781185293, "grad_norm": 1.064483474996282, "learning_rate": 1.9733364021886953e-05, "loss": 0.8995, "step": 1134 }, { "epoch": 0.10162851866360736, "grad_norm": 1.0417357242274723, "learning_rate": 1.973269833759371e-05, "loss": 0.954, "step": 1135 }, { "epoch": 0.10171805920868543, "grad_norm": 1.0192819587711033, "learning_rate": 1.973203183461759e-05, "loss": 0.907, "step": 1136 }, { "epoch": 0.1018075997537635, "grad_norm": 1.0036681947326171, "learning_rate": 1.9731364513014673e-05, "loss": 0.8687, "step": 1137 }, { "epoch": 0.10189714029884157, "grad_norm": 0.9484731881705861, "learning_rate": 1.973069637284108e-05, "loss": 0.9013, "step": 1138 }, { "epoch": 0.10198668084391964, "grad_norm": 1.0322437328572343, "learning_rate": 1.973002741415302e-05, "loss": 0.9365, "step": 1139 }, { "epoch": 0.1020762213889977, "grad_norm": 1.1618378607753725, "learning_rate": 1.9729357637006758e-05, "loss": 0.9173, "step": 1140 }, { "epoch": 0.10216576193407577, "grad_norm": 1.0109044867943355, "learning_rate": 1.972868704145864e-05, "loss": 0.9307, "step": 1141 }, { "epoch": 0.10225530247915385, "grad_norm": 1.2838042114930606, "learning_rate": 1.972801562756507e-05, "loss": 0.8625, "step": 1142 }, { "epoch": 0.10234484302423191, "grad_norm": 0.9872604215141734, "learning_rate": 1.972734339538253e-05, "loss": 0.9257, "step": 1143 }, { "epoch": 0.10243438356930998, "grad_norm": 1.0917688054282646, "learning_rate": 1.9726670344967558e-05, "loss": 0.9373, "step": 1144 }, { "epoch": 0.10252392411438804, "grad_norm": 1.0259275639408205, "learning_rate": 1.972599647637677e-05, "loss": 0.8999, "step": 1145 }, { "epoch": 0.10261346465946611, "grad_norm": 0.98610979625717, "learning_rate": 1.9725321789666858e-05, "loss": 0.9178, "step": 1146 }, { "epoch": 0.10270300520454419, "grad_norm": 1.0290554851743896, "learning_rate": 1.9724646284894565e-05, "loss": 0.8958, "step": 1147 }, { "epoch": 0.10279254574962225, "grad_norm": 0.949912473371854, "learning_rate": 1.9723969962116723e-05, "loss": 0.9462, "step": 1148 }, { "epoch": 0.10288208629470032, "grad_norm": 1.0109177267552307, "learning_rate": 1.972329282139021e-05, "loss": 0.8549, "step": 1149 }, { "epoch": 0.10297162683977838, "grad_norm": 1.068537105194316, "learning_rate": 1.972261486277199e-05, "loss": 0.8918, "step": 1150 }, { "epoch": 0.10306116738485646, "grad_norm": 1.0629665724590318, "learning_rate": 1.972193608631909e-05, "loss": 0.8754, "step": 1151 }, { "epoch": 0.10315070792993453, "grad_norm": 0.9839002607992869, "learning_rate": 1.9721256492088612e-05, "loss": 0.8914, "step": 1152 }, { "epoch": 0.10324024847501259, "grad_norm": 0.9379596512612858, "learning_rate": 1.9720576080137712e-05, "loss": 0.953, "step": 1153 }, { "epoch": 0.10332978902009066, "grad_norm": 1.0249004898873135, "learning_rate": 1.9719894850523632e-05, "loss": 0.8928, "step": 1154 }, { "epoch": 0.10341932956516872, "grad_norm": 1.0652426568227717, "learning_rate": 1.9719212803303667e-05, "loss": 0.8851, "step": 1155 }, { "epoch": 0.1035088701102468, "grad_norm": 0.9785425929421279, "learning_rate": 1.97185299385352e-05, "loss": 0.9825, "step": 1156 }, { "epoch": 0.10359841065532487, "grad_norm": 1.119670890099895, "learning_rate": 1.9717846256275658e-05, "loss": 0.8895, "step": 1157 }, { "epoch": 0.10368795120040293, "grad_norm": 0.9543151647224134, "learning_rate": 1.9717161756582556e-05, "loss": 0.9026, "step": 1158 }, { "epoch": 0.103777491745481, "grad_norm": 1.043367843043276, "learning_rate": 1.9716476439513475e-05, "loss": 0.9232, "step": 1159 }, { "epoch": 0.10386703229055907, "grad_norm": 0.9736616098318307, "learning_rate": 1.971579030512606e-05, "loss": 0.9333, "step": 1160 }, { "epoch": 0.10395657283563714, "grad_norm": 0.9769493635721329, "learning_rate": 1.9715103353478025e-05, "loss": 0.9446, "step": 1161 }, { "epoch": 0.1040461133807152, "grad_norm": 1.0398216882607856, "learning_rate": 1.9714415584627154e-05, "loss": 0.896, "step": 1162 }, { "epoch": 0.10413565392579327, "grad_norm": 1.0670522282771877, "learning_rate": 1.97137269986313e-05, "loss": 0.8979, "step": 1163 }, { "epoch": 0.10422519447087134, "grad_norm": 1.0388046666302806, "learning_rate": 1.9713037595548384e-05, "loss": 0.9799, "step": 1164 }, { "epoch": 0.10431473501594941, "grad_norm": 1.0583555233844038, "learning_rate": 1.97123473754364e-05, "loss": 0.8833, "step": 1165 }, { "epoch": 0.10440427556102748, "grad_norm": 1.1093430023141209, "learning_rate": 1.9711656338353405e-05, "loss": 0.9151, "step": 1166 }, { "epoch": 0.10449381610610554, "grad_norm": 1.1563792817894711, "learning_rate": 1.9710964484357525e-05, "loss": 0.9366, "step": 1167 }, { "epoch": 0.10458335665118361, "grad_norm": 0.9504001200600805, "learning_rate": 1.9710271813506954e-05, "loss": 0.8996, "step": 1168 }, { "epoch": 0.10467289719626169, "grad_norm": 1.0381207129895476, "learning_rate": 1.9709578325859966e-05, "loss": 0.8413, "step": 1169 }, { "epoch": 0.10476243774133975, "grad_norm": 1.0776500750960065, "learning_rate": 1.9708884021474887e-05, "loss": 0.941, "step": 1170 }, { "epoch": 0.10485197828641782, "grad_norm": 0.8768290045595767, "learning_rate": 1.9708188900410124e-05, "loss": 0.9412, "step": 1171 }, { "epoch": 0.10494151883149588, "grad_norm": 1.3607932869922454, "learning_rate": 1.9707492962724145e-05, "loss": 0.9074, "step": 1172 }, { "epoch": 0.10503105937657395, "grad_norm": 1.0274254118100763, "learning_rate": 1.9706796208475494e-05, "loss": 0.9784, "step": 1173 }, { "epoch": 0.10512059992165203, "grad_norm": 0.9784190327116986, "learning_rate": 1.9706098637722777e-05, "loss": 0.8839, "step": 1174 }, { "epoch": 0.10521014046673009, "grad_norm": 1.2148588628092742, "learning_rate": 1.970540025052467e-05, "loss": 0.9178, "step": 1175 }, { "epoch": 0.10529968101180816, "grad_norm": 1.0851605430794924, "learning_rate": 1.9704701046939924e-05, "loss": 0.9373, "step": 1176 }, { "epoch": 0.10538922155688622, "grad_norm": 1.0293826814610691, "learning_rate": 1.970400102702735e-05, "loss": 0.9305, "step": 1177 }, { "epoch": 0.1054787621019643, "grad_norm": 1.057856097232794, "learning_rate": 1.9703300190845832e-05, "loss": 0.9592, "step": 1178 }, { "epoch": 0.10556830264704237, "grad_norm": 0.9613266217718636, "learning_rate": 1.970259853845432e-05, "loss": 0.9335, "step": 1179 }, { "epoch": 0.10565784319212043, "grad_norm": 1.0505166946126554, "learning_rate": 1.9701896069911837e-05, "loss": 0.8769, "step": 1180 }, { "epoch": 0.1057473837371985, "grad_norm": 1.0470387979497533, "learning_rate": 1.9701192785277474e-05, "loss": 0.8985, "step": 1181 }, { "epoch": 0.10583692428227656, "grad_norm": 1.075553118941208, "learning_rate": 1.9700488684610384e-05, "loss": 0.9204, "step": 1182 }, { "epoch": 0.10592646482735464, "grad_norm": 1.1046751641836459, "learning_rate": 1.9699783767969802e-05, "loss": 0.896, "step": 1183 }, { "epoch": 0.1060160053724327, "grad_norm": 0.8720513561211994, "learning_rate": 1.9699078035415016e-05, "loss": 0.8709, "step": 1184 }, { "epoch": 0.10610554591751077, "grad_norm": 1.0955988736273328, "learning_rate": 1.9698371487005393e-05, "loss": 0.8522, "step": 1185 }, { "epoch": 0.10619508646258884, "grad_norm": 0.9512189929963133, "learning_rate": 1.9697664122800364e-05, "loss": 0.9689, "step": 1186 }, { "epoch": 0.10628462700766692, "grad_norm": 0.9939746933502719, "learning_rate": 1.969695594285943e-05, "loss": 0.8877, "step": 1187 }, { "epoch": 0.10637416755274498, "grad_norm": 1.0442084088085233, "learning_rate": 1.969624694724216e-05, "loss": 0.927, "step": 1188 }, { "epoch": 0.10646370809782305, "grad_norm": 1.100768922037487, "learning_rate": 1.9695537136008198e-05, "loss": 0.9042, "step": 1189 }, { "epoch": 0.10655324864290111, "grad_norm": 0.9857780037114541, "learning_rate": 1.9694826509217246e-05, "loss": 0.9408, "step": 1190 }, { "epoch": 0.10664278918797918, "grad_norm": 1.0793850672267609, "learning_rate": 1.9694115066929077e-05, "loss": 0.859, "step": 1191 }, { "epoch": 0.10673232973305725, "grad_norm": 1.049658191831442, "learning_rate": 1.969340280920354e-05, "loss": 0.888, "step": 1192 }, { "epoch": 0.10682187027813532, "grad_norm": 1.236077198601478, "learning_rate": 1.969268973610055e-05, "loss": 0.9171, "step": 1193 }, { "epoch": 0.10691141082321338, "grad_norm": 1.0645498495100685, "learning_rate": 1.9691975847680083e-05, "loss": 0.9539, "step": 1194 }, { "epoch": 0.10700095136829145, "grad_norm": 1.121622986724315, "learning_rate": 1.969126114400219e-05, "loss": 0.9562, "step": 1195 }, { "epoch": 0.10709049191336953, "grad_norm": 1.0356234207089117, "learning_rate": 1.9690545625126992e-05, "loss": 0.8937, "step": 1196 }, { "epoch": 0.1071800324584476, "grad_norm": 0.9323487948990646, "learning_rate": 1.9689829291114672e-05, "loss": 0.9112, "step": 1197 }, { "epoch": 0.10726957300352566, "grad_norm": 1.0351496136265128, "learning_rate": 1.968911214202549e-05, "loss": 0.8923, "step": 1198 }, { "epoch": 0.10735911354860372, "grad_norm": 1.0053938729950094, "learning_rate": 1.968839417791977e-05, "loss": 0.8822, "step": 1199 }, { "epoch": 0.10744865409368179, "grad_norm": 0.9521780714901025, "learning_rate": 1.96876753988579e-05, "loss": 0.8733, "step": 1200 }, { "epoch": 0.10753819463875987, "grad_norm": 1.0557986448112182, "learning_rate": 1.968695580490035e-05, "loss": 0.9224, "step": 1201 }, { "epoch": 0.10762773518383793, "grad_norm": 1.0233107602760776, "learning_rate": 1.9686235396107643e-05, "loss": 0.9881, "step": 1202 }, { "epoch": 0.107717275728916, "grad_norm": 1.1013271469703025, "learning_rate": 1.9685514172540376e-05, "loss": 0.893, "step": 1203 }, { "epoch": 0.10780681627399406, "grad_norm": 1.025278197486952, "learning_rate": 1.968479213425922e-05, "loss": 0.9139, "step": 1204 }, { "epoch": 0.10789635681907214, "grad_norm": 1.0934839232056313, "learning_rate": 1.968406928132491e-05, "loss": 0.9483, "step": 1205 }, { "epoch": 0.10798589736415021, "grad_norm": 1.111228569236409, "learning_rate": 1.968334561379825e-05, "loss": 0.9196, "step": 1206 }, { "epoch": 0.10807543790922827, "grad_norm": 1.0331827908027558, "learning_rate": 1.9682621131740113e-05, "loss": 0.966, "step": 1207 }, { "epoch": 0.10816497845430634, "grad_norm": 0.9575784559686565, "learning_rate": 1.968189583521144e-05, "loss": 0.9089, "step": 1208 }, { "epoch": 0.1082545189993844, "grad_norm": 1.0059004166608105, "learning_rate": 1.9681169724273234e-05, "loss": 0.8681, "step": 1209 }, { "epoch": 0.10834405954446248, "grad_norm": 1.0402474580728576, "learning_rate": 1.9680442798986583e-05, "loss": 0.8892, "step": 1210 }, { "epoch": 0.10843360008954055, "grad_norm": 1.119499429680588, "learning_rate": 1.967971505941263e-05, "loss": 0.9499, "step": 1211 }, { "epoch": 0.10852314063461861, "grad_norm": 1.0433341773204021, "learning_rate": 1.9678986505612588e-05, "loss": 0.9099, "step": 1212 }, { "epoch": 0.10861268117969668, "grad_norm": 1.0373608720394272, "learning_rate": 1.9678257137647743e-05, "loss": 0.9815, "step": 1213 }, { "epoch": 0.10870222172477476, "grad_norm": 1.1026693588643004, "learning_rate": 1.9677526955579447e-05, "loss": 0.9411, "step": 1214 }, { "epoch": 0.10879176226985282, "grad_norm": 1.1355303994980288, "learning_rate": 1.9676795959469118e-05, "loss": 0.9029, "step": 1215 }, { "epoch": 0.10888130281493089, "grad_norm": 1.151370074061366, "learning_rate": 1.9676064149378246e-05, "loss": 0.9007, "step": 1216 }, { "epoch": 0.10897084336000895, "grad_norm": 0.9464784551112853, "learning_rate": 1.9675331525368393e-05, "loss": 0.8788, "step": 1217 }, { "epoch": 0.10906038390508702, "grad_norm": 0.9992943285997412, "learning_rate": 1.9674598087501178e-05, "loss": 0.95, "step": 1218 }, { "epoch": 0.1091499244501651, "grad_norm": 0.9700550999863214, "learning_rate": 1.96738638358383e-05, "loss": 0.8586, "step": 1219 }, { "epoch": 0.10923946499524316, "grad_norm": 1.0514072744605174, "learning_rate": 1.9673128770441517e-05, "loss": 0.9006, "step": 1220 }, { "epoch": 0.10932900554032123, "grad_norm": 1.065278865721725, "learning_rate": 1.9672392891372665e-05, "loss": 0.9489, "step": 1221 }, { "epoch": 0.10941854608539929, "grad_norm": 1.0641722812386099, "learning_rate": 1.9671656198693644e-05, "loss": 0.9231, "step": 1222 }, { "epoch": 0.10950808663047737, "grad_norm": 0.9797263632262364, "learning_rate": 1.9670918692466423e-05, "loss": 0.923, "step": 1223 }, { "epoch": 0.10959762717555543, "grad_norm": 1.0830307286486993, "learning_rate": 1.9670180372753032e-05, "loss": 0.9225, "step": 1224 }, { "epoch": 0.1096871677206335, "grad_norm": 1.0118130084972117, "learning_rate": 1.9669441239615582e-05, "loss": 0.9758, "step": 1225 }, { "epoch": 0.10977670826571156, "grad_norm": 1.0992328513356622, "learning_rate": 1.9668701293116242e-05, "loss": 0.9025, "step": 1226 }, { "epoch": 0.10986624881078963, "grad_norm": 1.0747676542516822, "learning_rate": 1.9667960533317262e-05, "loss": 0.9033, "step": 1227 }, { "epoch": 0.10995578935586771, "grad_norm": 1.0246617639121685, "learning_rate": 1.9667218960280944e-05, "loss": 0.8871, "step": 1228 }, { "epoch": 0.11004532990094577, "grad_norm": 1.1254608814943894, "learning_rate": 1.966647657406967e-05, "loss": 0.9509, "step": 1229 }, { "epoch": 0.11013487044602384, "grad_norm": 0.9452480528440533, "learning_rate": 1.966573337474589e-05, "loss": 0.9192, "step": 1230 }, { "epoch": 0.1102244109911019, "grad_norm": 1.1344532791201427, "learning_rate": 1.966498936237211e-05, "loss": 0.8675, "step": 1231 }, { "epoch": 0.11031395153617998, "grad_norm": 1.092476633311408, "learning_rate": 1.9664244537010924e-05, "loss": 0.9328, "step": 1232 }, { "epoch": 0.11040349208125805, "grad_norm": 0.9830555593313395, "learning_rate": 1.9663498898724976e-05, "loss": 0.9252, "step": 1233 }, { "epoch": 0.11049303262633611, "grad_norm": 0.9786953216543333, "learning_rate": 1.9662752447576996e-05, "loss": 0.9117, "step": 1234 }, { "epoch": 0.11058257317141418, "grad_norm": 0.9475314214421442, "learning_rate": 1.966200518362977e-05, "loss": 0.8858, "step": 1235 }, { "epoch": 0.11067211371649224, "grad_norm": 0.9040971276887072, "learning_rate": 1.9661257106946146e-05, "loss": 0.9135, "step": 1236 }, { "epoch": 0.11076165426157032, "grad_norm": 0.9044584768865382, "learning_rate": 1.9660508217589064e-05, "loss": 0.9115, "step": 1237 }, { "epoch": 0.11085119480664839, "grad_norm": 1.0430566794140101, "learning_rate": 1.965975851562151e-05, "loss": 0.9192, "step": 1238 }, { "epoch": 0.11094073535172645, "grad_norm": 0.9559080000154662, "learning_rate": 1.9659008001106548e-05, "loss": 0.934, "step": 1239 }, { "epoch": 0.11103027589680452, "grad_norm": 1.0633370194896092, "learning_rate": 1.9658256674107306e-05, "loss": 0.9443, "step": 1240 }, { "epoch": 0.1111198164418826, "grad_norm": 0.9743264823192463, "learning_rate": 1.965750453468699e-05, "loss": 0.9485, "step": 1241 }, { "epoch": 0.11120935698696066, "grad_norm": 1.0840768942277546, "learning_rate": 1.965675158290886e-05, "loss": 0.866, "step": 1242 }, { "epoch": 0.11129889753203873, "grad_norm": 0.9560330327922378, "learning_rate": 1.9655997818836255e-05, "loss": 0.854, "step": 1243 }, { "epoch": 0.11138843807711679, "grad_norm": 1.0010794767547186, "learning_rate": 1.9655243242532586e-05, "loss": 0.8588, "step": 1244 }, { "epoch": 0.11147797862219486, "grad_norm": 1.074962805523423, "learning_rate": 1.9654487854061312e-05, "loss": 0.9025, "step": 1245 }, { "epoch": 0.11156751916727294, "grad_norm": 1.0563225838759038, "learning_rate": 1.9653731653485983e-05, "loss": 0.9225, "step": 1246 }, { "epoch": 0.111657059712351, "grad_norm": 1.0501217229968427, "learning_rate": 1.9652974640870205e-05, "loss": 0.9209, "step": 1247 }, { "epoch": 0.11174660025742907, "grad_norm": 0.9483180350747387, "learning_rate": 1.9652216816277657e-05, "loss": 0.8596, "step": 1248 }, { "epoch": 0.11183614080250713, "grad_norm": 0.9817309708807991, "learning_rate": 1.9651458179772086e-05, "loss": 0.9526, "step": 1249 }, { "epoch": 0.11192568134758521, "grad_norm": 1.2195318914924007, "learning_rate": 1.9650698731417303e-05, "loss": 0.9498, "step": 1250 }, { "epoch": 0.11201522189266327, "grad_norm": 1.130307496339976, "learning_rate": 1.964993847127719e-05, "loss": 0.9311, "step": 1251 }, { "epoch": 0.11210476243774134, "grad_norm": 1.008420338131407, "learning_rate": 1.96491773994157e-05, "loss": 0.9409, "step": 1252 }, { "epoch": 0.1121943029828194, "grad_norm": 1.0715620898182732, "learning_rate": 1.9648415515896856e-05, "loss": 0.8776, "step": 1253 }, { "epoch": 0.11228384352789747, "grad_norm": 0.9533874022368671, "learning_rate": 1.964765282078473e-05, "loss": 0.9018, "step": 1254 }, { "epoch": 0.11237338407297555, "grad_norm": 1.0056522970358104, "learning_rate": 1.9646889314143497e-05, "loss": 0.9187, "step": 1255 }, { "epoch": 0.11246292461805361, "grad_norm": 1.0602916349108247, "learning_rate": 1.9646124996037366e-05, "loss": 0.809, "step": 1256 }, { "epoch": 0.11255246516313168, "grad_norm": 1.0903747948514444, "learning_rate": 1.9645359866530637e-05, "loss": 0.9285, "step": 1257 }, { "epoch": 0.11264200570820974, "grad_norm": 1.139057600192617, "learning_rate": 1.9644593925687664e-05, "loss": 0.8864, "step": 1258 }, { "epoch": 0.11273154625328782, "grad_norm": 1.0728366758513683, "learning_rate": 1.964382717357288e-05, "loss": 0.9195, "step": 1259 }, { "epoch": 0.11282108679836589, "grad_norm": 0.982301522296523, "learning_rate": 1.9643059610250782e-05, "loss": 0.9798, "step": 1260 }, { "epoch": 0.11291062734344395, "grad_norm": 1.0579189148034154, "learning_rate": 1.9642291235785934e-05, "loss": 0.8847, "step": 1261 }, { "epoch": 0.11300016788852202, "grad_norm": 1.0315195463976252, "learning_rate": 1.9641522050242967e-05, "loss": 0.9224, "step": 1262 }, { "epoch": 0.11308970843360008, "grad_norm": 0.9928912917214777, "learning_rate": 1.9640752053686583e-05, "loss": 0.8708, "step": 1263 }, { "epoch": 0.11317924897867816, "grad_norm": 1.038513303491386, "learning_rate": 1.9639981246181557e-05, "loss": 0.9055, "step": 1264 }, { "epoch": 0.11326878952375623, "grad_norm": 0.9414946277870256, "learning_rate": 1.9639209627792717e-05, "loss": 0.9221, "step": 1265 }, { "epoch": 0.11335833006883429, "grad_norm": 0.9990236547935908, "learning_rate": 1.9638437198584977e-05, "loss": 0.9065, "step": 1266 }, { "epoch": 0.11344787061391236, "grad_norm": 1.2040427413386114, "learning_rate": 1.9637663958623306e-05, "loss": 0.8889, "step": 1267 }, { "epoch": 0.11353741115899044, "grad_norm": 1.0203027446154496, "learning_rate": 1.9636889907972755e-05, "loss": 0.9152, "step": 1268 }, { "epoch": 0.1136269517040685, "grad_norm": 0.9526450534387981, "learning_rate": 1.9636115046698425e-05, "loss": 0.9374, "step": 1269 }, { "epoch": 0.11371649224914657, "grad_norm": 0.9658103267865827, "learning_rate": 1.9635339374865498e-05, "loss": 0.9298, "step": 1270 }, { "epoch": 0.11380603279422463, "grad_norm": 1.000474720218233, "learning_rate": 1.9634562892539223e-05, "loss": 0.925, "step": 1271 }, { "epoch": 0.1138955733393027, "grad_norm": 1.0189020642376136, "learning_rate": 1.9633785599784915e-05, "loss": 0.9059, "step": 1272 }, { "epoch": 0.11398511388438078, "grad_norm": 1.0329732228244708, "learning_rate": 1.9633007496667952e-05, "loss": 0.8741, "step": 1273 }, { "epoch": 0.11407465442945884, "grad_norm": 0.960745478530226, "learning_rate": 1.963222858325379e-05, "loss": 0.9007, "step": 1274 }, { "epoch": 0.1141641949745369, "grad_norm": 0.9432458237189475, "learning_rate": 1.9631448859607952e-05, "loss": 0.8629, "step": 1275 }, { "epoch": 0.11425373551961497, "grad_norm": 1.0785546845689313, "learning_rate": 1.9630668325796018e-05, "loss": 0.9023, "step": 1276 }, { "epoch": 0.11434327606469305, "grad_norm": 0.986932891460453, "learning_rate": 1.962988698188365e-05, "loss": 0.964, "step": 1277 }, { "epoch": 0.11443281660977112, "grad_norm": 0.9930103543877402, "learning_rate": 1.9629104827936568e-05, "loss": 0.9789, "step": 1278 }, { "epoch": 0.11452235715484918, "grad_norm": 1.3197895204871661, "learning_rate": 1.962832186402057e-05, "loss": 0.9482, "step": 1279 }, { "epoch": 0.11461189769992725, "grad_norm": 1.0023922177161164, "learning_rate": 1.962753809020151e-05, "loss": 0.9381, "step": 1280 }, { "epoch": 0.11470143824500531, "grad_norm": 1.0796816078717577, "learning_rate": 1.962675350654532e-05, "loss": 0.984, "step": 1281 }, { "epoch": 0.11479097879008339, "grad_norm": 1.0019777684476319, "learning_rate": 1.9625968113117995e-05, "loss": 0.9128, "step": 1282 }, { "epoch": 0.11488051933516145, "grad_norm": 0.9195471247574172, "learning_rate": 1.96251819099856e-05, "loss": 0.9241, "step": 1283 }, { "epoch": 0.11497005988023952, "grad_norm": 1.0017246281437422, "learning_rate": 1.962439489721427e-05, "loss": 0.9037, "step": 1284 }, { "epoch": 0.11505960042531758, "grad_norm": 1.0563661871155432, "learning_rate": 1.9623607074870203e-05, "loss": 0.9244, "step": 1285 }, { "epoch": 0.11514914097039566, "grad_norm": 0.9967000872367829, "learning_rate": 1.9622818443019672e-05, "loss": 0.867, "step": 1286 }, { "epoch": 0.11523868151547373, "grad_norm": 1.0444741091111438, "learning_rate": 1.962202900172901e-05, "loss": 0.9055, "step": 1287 }, { "epoch": 0.1153282220605518, "grad_norm": 1.0007276453154084, "learning_rate": 1.962123875106462e-05, "loss": 0.9484, "step": 1288 }, { "epoch": 0.11541776260562986, "grad_norm": 0.9484499221437401, "learning_rate": 1.9620447691092984e-05, "loss": 0.8488, "step": 1289 }, { "epoch": 0.11550730315070792, "grad_norm": 1.0568259400726043, "learning_rate": 1.9619655821880634e-05, "loss": 0.918, "step": 1290 }, { "epoch": 0.115596843695786, "grad_norm": 0.9607336072178856, "learning_rate": 1.961886314349419e-05, "loss": 0.945, "step": 1291 }, { "epoch": 0.11568638424086407, "grad_norm": 0.9911020404746291, "learning_rate": 1.961806965600032e-05, "loss": 0.8589, "step": 1292 }, { "epoch": 0.11577592478594213, "grad_norm": 1.0640787527727469, "learning_rate": 1.961727535946577e-05, "loss": 0.8809, "step": 1293 }, { "epoch": 0.1158654653310202, "grad_norm": 1.049525796813032, "learning_rate": 1.961648025395736e-05, "loss": 0.9131, "step": 1294 }, { "epoch": 0.11595500587609828, "grad_norm": 0.8800093257356222, "learning_rate": 1.9615684339541968e-05, "loss": 0.8469, "step": 1295 }, { "epoch": 0.11604454642117634, "grad_norm": 1.0011524451971945, "learning_rate": 1.9614887616286544e-05, "loss": 0.917, "step": 1296 }, { "epoch": 0.11613408696625441, "grad_norm": 0.9938437228070213, "learning_rate": 1.9614090084258106e-05, "loss": 0.9394, "step": 1297 }, { "epoch": 0.11622362751133247, "grad_norm": 1.1706007213667857, "learning_rate": 1.961329174352374e-05, "loss": 0.9289, "step": 1298 }, { "epoch": 0.11631316805641054, "grad_norm": 0.9135298725831902, "learning_rate": 1.9612492594150597e-05, "loss": 0.8945, "step": 1299 }, { "epoch": 0.11640270860148862, "grad_norm": 1.1327584467114007, "learning_rate": 1.9611692636205903e-05, "loss": 0.9087, "step": 1300 }, { "epoch": 0.11649224914656668, "grad_norm": 0.9293213619614189, "learning_rate": 1.9610891869756945e-05, "loss": 0.8751, "step": 1301 }, { "epoch": 0.11658178969164475, "grad_norm": 0.9517342838468502, "learning_rate": 1.961009029487108e-05, "loss": 0.8847, "step": 1302 }, { "epoch": 0.11667133023672281, "grad_norm": 1.268999077738142, "learning_rate": 1.9609287911615743e-05, "loss": 0.8826, "step": 1303 }, { "epoch": 0.11676087078180089, "grad_norm": 1.0774037052649448, "learning_rate": 1.9608484720058416e-05, "loss": 0.9087, "step": 1304 }, { "epoch": 0.11685041132687896, "grad_norm": 0.917636695883539, "learning_rate": 1.9607680720266664e-05, "loss": 0.9503, "step": 1305 }, { "epoch": 0.11693995187195702, "grad_norm": 1.0041451981443772, "learning_rate": 1.960687591230812e-05, "loss": 0.9535, "step": 1306 }, { "epoch": 0.11702949241703509, "grad_norm": 1.178052741132566, "learning_rate": 1.9606070296250485e-05, "loss": 0.9291, "step": 1307 }, { "epoch": 0.11711903296211315, "grad_norm": 1.1387933577252016, "learning_rate": 1.9605263872161513e-05, "loss": 0.8966, "step": 1308 }, { "epoch": 0.11720857350719123, "grad_norm": 0.9580981245675096, "learning_rate": 1.9604456640109047e-05, "loss": 0.8317, "step": 1309 }, { "epoch": 0.1172981140522693, "grad_norm": 1.0492772508419754, "learning_rate": 1.9603648600160988e-05, "loss": 0.9076, "step": 1310 }, { "epoch": 0.11738765459734736, "grad_norm": 0.9685112528271055, "learning_rate": 1.96028397523853e-05, "loss": 0.8614, "step": 1311 }, { "epoch": 0.11747719514242543, "grad_norm": 0.9979387068071756, "learning_rate": 1.960203009685003e-05, "loss": 0.929, "step": 1312 }, { "epoch": 0.1175667356875035, "grad_norm": 1.0871400666474973, "learning_rate": 1.9601219633623277e-05, "loss": 0.9635, "step": 1313 }, { "epoch": 0.11765627623258157, "grad_norm": 0.9707767008056238, "learning_rate": 1.9600408362773215e-05, "loss": 0.8943, "step": 1314 }, { "epoch": 0.11774581677765963, "grad_norm": 1.029009711331632, "learning_rate": 1.9599596284368087e-05, "loss": 0.9128, "step": 1315 }, { "epoch": 0.1178353573227377, "grad_norm": 1.0253709226875778, "learning_rate": 1.95987833984762e-05, "loss": 0.8756, "step": 1316 }, { "epoch": 0.11792489786781576, "grad_norm": 1.0468990731319678, "learning_rate": 1.9597969705165936e-05, "loss": 0.9011, "step": 1317 }, { "epoch": 0.11801443841289384, "grad_norm": 0.9523541487052847, "learning_rate": 1.9597155204505737e-05, "loss": 0.8866, "step": 1318 }, { "epoch": 0.11810397895797191, "grad_norm": 1.0977667673280584, "learning_rate": 1.9596339896564114e-05, "loss": 0.8663, "step": 1319 }, { "epoch": 0.11819351950304997, "grad_norm": 0.9868904828494608, "learning_rate": 1.9595523781409654e-05, "loss": 0.8761, "step": 1320 }, { "epoch": 0.11828306004812804, "grad_norm": 1.1474701193617396, "learning_rate": 1.9594706859110997e-05, "loss": 0.9371, "step": 1321 }, { "epoch": 0.11837260059320612, "grad_norm": 0.997464603543136, "learning_rate": 1.959388912973687e-05, "loss": 0.9388, "step": 1322 }, { "epoch": 0.11846214113828418, "grad_norm": 0.9898154160231575, "learning_rate": 1.959307059335605e-05, "loss": 0.9189, "step": 1323 }, { "epoch": 0.11855168168336225, "grad_norm": 1.1182368383827372, "learning_rate": 1.9592251250037395e-05, "loss": 0.9144, "step": 1324 }, { "epoch": 0.11864122222844031, "grad_norm": 1.0422720169200772, "learning_rate": 1.959143109984982e-05, "loss": 0.9105, "step": 1325 }, { "epoch": 0.11873076277351838, "grad_norm": 1.0456592228955999, "learning_rate": 1.9590610142862324e-05, "loss": 0.9929, "step": 1326 }, { "epoch": 0.11882030331859646, "grad_norm": 1.0598323873598947, "learning_rate": 1.9589788379143952e-05, "loss": 0.8422, "step": 1327 }, { "epoch": 0.11890984386367452, "grad_norm": 0.980231010678184, "learning_rate": 1.958896580876383e-05, "loss": 0.9093, "step": 1328 }, { "epoch": 0.11899938440875259, "grad_norm": 1.164696223390671, "learning_rate": 1.958814243179115e-05, "loss": 0.9115, "step": 1329 }, { "epoch": 0.11908892495383065, "grad_norm": 1.0256176481410755, "learning_rate": 1.9587318248295176e-05, "loss": 0.9427, "step": 1330 }, { "epoch": 0.11917846549890873, "grad_norm": 1.1037358444667853, "learning_rate": 1.9586493258345232e-05, "loss": 0.8661, "step": 1331 }, { "epoch": 0.1192680060439868, "grad_norm": 1.0967941877583305, "learning_rate": 1.9585667462010717e-05, "loss": 0.9429, "step": 1332 }, { "epoch": 0.11935754658906486, "grad_norm": 1.0338266025151217, "learning_rate": 1.9584840859361094e-05, "loss": 0.902, "step": 1333 }, { "epoch": 0.11944708713414293, "grad_norm": 0.9354979339617655, "learning_rate": 1.9584013450465887e-05, "loss": 0.916, "step": 1334 }, { "epoch": 0.11953662767922099, "grad_norm": 0.9315103323360198, "learning_rate": 1.9583185235394703e-05, "loss": 0.9199, "step": 1335 }, { "epoch": 0.11962616822429907, "grad_norm": 1.2879183953016171, "learning_rate": 1.9582356214217204e-05, "loss": 0.8869, "step": 1336 }, { "epoch": 0.11971570876937714, "grad_norm": 0.9695556840439387, "learning_rate": 1.9581526387003126e-05, "loss": 0.8468, "step": 1337 }, { "epoch": 0.1198052493144552, "grad_norm": 1.0231523228703185, "learning_rate": 1.9580695753822274e-05, "loss": 0.9181, "step": 1338 }, { "epoch": 0.11989478985953327, "grad_norm": 1.1355268343581768, "learning_rate": 1.9579864314744514e-05, "loss": 0.8865, "step": 1339 }, { "epoch": 0.11998433040461134, "grad_norm": 0.912833529452868, "learning_rate": 1.9579032069839785e-05, "loss": 0.9134, "step": 1340 }, { "epoch": 0.12007387094968941, "grad_norm": 1.4615830579029223, "learning_rate": 1.9578199019178095e-05, "loss": 0.9369, "step": 1341 }, { "epoch": 0.12016341149476747, "grad_norm": 1.040844642944266, "learning_rate": 1.957736516282952e-05, "loss": 0.8588, "step": 1342 }, { "epoch": 0.12025295203984554, "grad_norm": 1.105762657959043, "learning_rate": 1.9576530500864192e-05, "loss": 0.8451, "step": 1343 }, { "epoch": 0.1203424925849236, "grad_norm": 1.0083957740387428, "learning_rate": 1.9575695033352325e-05, "loss": 0.8598, "step": 1344 }, { "epoch": 0.12043203313000168, "grad_norm": 0.9735755367126849, "learning_rate": 1.9574858760364197e-05, "loss": 0.8555, "step": 1345 }, { "epoch": 0.12052157367507975, "grad_norm": 0.9619789573848628, "learning_rate": 1.9574021681970153e-05, "loss": 0.8936, "step": 1346 }, { "epoch": 0.12061111422015781, "grad_norm": 1.0684423416476119, "learning_rate": 1.95731837982406e-05, "loss": 0.8959, "step": 1347 }, { "epoch": 0.12070065476523588, "grad_norm": 1.2905960845220332, "learning_rate": 1.957234510924603e-05, "loss": 0.9335, "step": 1348 }, { "epoch": 0.12079019531031396, "grad_norm": 1.070410605663533, "learning_rate": 1.9571505615056977e-05, "loss": 0.912, "step": 1349 }, { "epoch": 0.12087973585539202, "grad_norm": 1.3565165539778345, "learning_rate": 1.957066531574406e-05, "loss": 0.9237, "step": 1350 }, { "epoch": 0.12096927640047009, "grad_norm": 1.081582842643077, "learning_rate": 1.9569824211377972e-05, "loss": 0.9423, "step": 1351 }, { "epoch": 0.12105881694554815, "grad_norm": 0.942909315005756, "learning_rate": 1.956898230202945e-05, "loss": 0.961, "step": 1352 }, { "epoch": 0.12114835749062622, "grad_norm": 0.9762582587043057, "learning_rate": 1.9568139587769325e-05, "loss": 0.9822, "step": 1353 }, { "epoch": 0.1212378980357043, "grad_norm": 1.1937028301107047, "learning_rate": 1.9567296068668474e-05, "loss": 0.8534, "step": 1354 }, { "epoch": 0.12132743858078236, "grad_norm": 0.9728571119574848, "learning_rate": 1.9566451744797855e-05, "loss": 0.9113, "step": 1355 }, { "epoch": 0.12141697912586043, "grad_norm": 1.0412336952219956, "learning_rate": 1.956560661622849e-05, "loss": 0.8391, "step": 1356 }, { "epoch": 0.12150651967093849, "grad_norm": 1.0081667447099796, "learning_rate": 1.9564760683031468e-05, "loss": 0.8843, "step": 1357 }, { "epoch": 0.12159606021601657, "grad_norm": 1.0776103925833755, "learning_rate": 1.9563913945277947e-05, "loss": 0.9138, "step": 1358 }, { "epoch": 0.12168560076109464, "grad_norm": 0.9802067978216537, "learning_rate": 1.9563066403039147e-05, "loss": 0.9422, "step": 1359 }, { "epoch": 0.1217751413061727, "grad_norm": 1.0721933551598724, "learning_rate": 1.9562218056386366e-05, "loss": 0.915, "step": 1360 }, { "epoch": 0.12186468185125077, "grad_norm": 3.6529779386806465, "learning_rate": 1.9561368905390964e-05, "loss": 0.9146, "step": 1361 }, { "epoch": 0.12195422239632883, "grad_norm": 1.0660970137140113, "learning_rate": 1.9560518950124368e-05, "loss": 0.9248, "step": 1362 }, { "epoch": 0.12204376294140691, "grad_norm": 1.014861806715312, "learning_rate": 1.955966819065807e-05, "loss": 0.9326, "step": 1363 }, { "epoch": 0.12213330348648498, "grad_norm": 1.0998755985273443, "learning_rate": 1.9558816627063638e-05, "loss": 0.9175, "step": 1364 }, { "epoch": 0.12222284403156304, "grad_norm": 1.058809204649994, "learning_rate": 1.9557964259412703e-05, "loss": 0.8788, "step": 1365 }, { "epoch": 0.1223123845766411, "grad_norm": 0.9609168163702838, "learning_rate": 1.955711108777696e-05, "loss": 0.8697, "step": 1366 }, { "epoch": 0.12240192512171919, "grad_norm": 1.1223672412545525, "learning_rate": 1.9556257112228173e-05, "loss": 0.8563, "step": 1367 }, { "epoch": 0.12249146566679725, "grad_norm": 0.9801982052958138, "learning_rate": 1.955540233283818e-05, "loss": 0.9284, "step": 1368 }, { "epoch": 0.12258100621187532, "grad_norm": 0.8696421486641737, "learning_rate": 1.955454674967888e-05, "loss": 0.8915, "step": 1369 }, { "epoch": 0.12267054675695338, "grad_norm": 1.1238639911121264, "learning_rate": 1.9553690362822245e-05, "loss": 0.8764, "step": 1370 }, { "epoch": 0.12276008730203145, "grad_norm": 1.0568310077230139, "learning_rate": 1.955283317234031e-05, "loss": 0.8866, "step": 1371 }, { "epoch": 0.12284962784710952, "grad_norm": 1.0167342829540726, "learning_rate": 1.9551975178305172e-05, "loss": 0.9173, "step": 1372 }, { "epoch": 0.12293916839218759, "grad_norm": 1.2826334745425008, "learning_rate": 1.9551116380789015e-05, "loss": 0.89, "step": 1373 }, { "epoch": 0.12302870893726565, "grad_norm": 1.2400165128867031, "learning_rate": 1.9550256779864073e-05, "loss": 0.8928, "step": 1374 }, { "epoch": 0.12311824948234372, "grad_norm": 1.0205867079465514, "learning_rate": 1.954939637560265e-05, "loss": 0.9495, "step": 1375 }, { "epoch": 0.1232077900274218, "grad_norm": 1.0033611091567889, "learning_rate": 1.9548535168077124e-05, "loss": 0.9175, "step": 1376 }, { "epoch": 0.12329733057249986, "grad_norm": 1.001156894696269, "learning_rate": 1.9547673157359933e-05, "loss": 0.9021, "step": 1377 }, { "epoch": 0.12338687111757793, "grad_norm": 0.9831636288095823, "learning_rate": 1.954681034352359e-05, "loss": 0.8808, "step": 1378 }, { "epoch": 0.123476411662656, "grad_norm": 1.1729743070737861, "learning_rate": 1.9545946726640673e-05, "loss": 0.9301, "step": 1379 }, { "epoch": 0.12356595220773406, "grad_norm": 1.0761998970688234, "learning_rate": 1.954508230678382e-05, "loss": 0.872, "step": 1380 }, { "epoch": 0.12365549275281214, "grad_norm": 1.0859356517130991, "learning_rate": 1.9544217084025755e-05, "loss": 0.932, "step": 1381 }, { "epoch": 0.1237450332978902, "grad_norm": 0.9370717100137363, "learning_rate": 1.9543351058439245e-05, "loss": 0.9105, "step": 1382 }, { "epoch": 0.12383457384296827, "grad_norm": 0.9773938441864269, "learning_rate": 1.9542484230097145e-05, "loss": 0.9565, "step": 1383 }, { "epoch": 0.12392411438804633, "grad_norm": 0.9875338546039725, "learning_rate": 1.954161659907237e-05, "loss": 0.9357, "step": 1384 }, { "epoch": 0.12401365493312441, "grad_norm": 1.1173362097876427, "learning_rate": 1.9540748165437897e-05, "loss": 0.8907, "step": 1385 }, { "epoch": 0.12410319547820248, "grad_norm": 1.1458961068103404, "learning_rate": 1.9539878929266777e-05, "loss": 0.9128, "step": 1386 }, { "epoch": 0.12419273602328054, "grad_norm": 1.0168128724233172, "learning_rate": 1.953900889063213e-05, "loss": 0.8657, "step": 1387 }, { "epoch": 0.12428227656835861, "grad_norm": 1.1026914557965029, "learning_rate": 1.9538138049607144e-05, "loss": 0.9586, "step": 1388 }, { "epoch": 0.12437181711343667, "grad_norm": 1.0329069955871129, "learning_rate": 1.9537266406265062e-05, "loss": 0.9554, "step": 1389 }, { "epoch": 0.12446135765851475, "grad_norm": 0.9926753658126571, "learning_rate": 1.953639396067921e-05, "loss": 0.9469, "step": 1390 }, { "epoch": 0.12455089820359282, "grad_norm": 1.0645791773515578, "learning_rate": 1.953552071292298e-05, "loss": 0.8637, "step": 1391 }, { "epoch": 0.12464043874867088, "grad_norm": 0.9658738677847174, "learning_rate": 1.9534646663069816e-05, "loss": 0.8595, "step": 1392 }, { "epoch": 0.12472997929374895, "grad_norm": 1.0925703672691132, "learning_rate": 1.953377181119325e-05, "loss": 0.9256, "step": 1393 }, { "epoch": 0.12481951983882703, "grad_norm": 1.1074894473625458, "learning_rate": 1.953289615736686e-05, "loss": 0.989, "step": 1394 }, { "epoch": 0.12490906038390509, "grad_norm": 1.1368889039931307, "learning_rate": 1.9532019701664313e-05, "loss": 0.8866, "step": 1395 }, { "epoch": 0.12499860092898316, "grad_norm": 1.0068562492262372, "learning_rate": 1.9531142444159332e-05, "loss": 0.8895, "step": 1396 }, { "epoch": 0.12508814147406122, "grad_norm": 0.940322014456406, "learning_rate": 1.9530264384925707e-05, "loss": 0.8959, "step": 1397 }, { "epoch": 0.12517768201913929, "grad_norm": 1.1503138338272139, "learning_rate": 1.9529385524037298e-05, "loss": 0.9052, "step": 1398 }, { "epoch": 0.12526722256421735, "grad_norm": 1.0431312140629057, "learning_rate": 1.952850586156803e-05, "loss": 0.9529, "step": 1399 }, { "epoch": 0.12535676310929542, "grad_norm": 1.1144455328212037, "learning_rate": 1.9527625397591903e-05, "loss": 0.9728, "step": 1400 }, { "epoch": 0.1254463036543735, "grad_norm": 1.0079998690513767, "learning_rate": 1.9526744132182975e-05, "loss": 0.8823, "step": 1401 }, { "epoch": 0.12553584419945157, "grad_norm": 0.9052617579464834, "learning_rate": 1.9525862065415374e-05, "loss": 0.9248, "step": 1402 }, { "epoch": 0.12562538474452964, "grad_norm": 1.0468757009788687, "learning_rate": 1.9524979197363298e-05, "loss": 0.9499, "step": 1403 }, { "epoch": 0.1257149252896077, "grad_norm": 1.2404775812989024, "learning_rate": 1.9524095528101012e-05, "loss": 0.907, "step": 1404 }, { "epoch": 0.12580446583468577, "grad_norm": 1.1383673603748692, "learning_rate": 1.9523211057702845e-05, "loss": 0.9525, "step": 1405 }, { "epoch": 0.12589400637976383, "grad_norm": 1.0176565772048503, "learning_rate": 1.9522325786243198e-05, "loss": 0.9264, "step": 1406 }, { "epoch": 0.1259835469248419, "grad_norm": 0.9901489944462893, "learning_rate": 1.9521439713796537e-05, "loss": 0.9125, "step": 1407 }, { "epoch": 0.12607308746991996, "grad_norm": 0.9662831001645319, "learning_rate": 1.9520552840437396e-05, "loss": 0.8835, "step": 1408 }, { "epoch": 0.12616262801499803, "grad_norm": 1.0184823752753038, "learning_rate": 1.951966516624037e-05, "loss": 0.8911, "step": 1409 }, { "epoch": 0.12625216856007612, "grad_norm": 0.9928569316203307, "learning_rate": 1.9518776691280137e-05, "loss": 0.9155, "step": 1410 }, { "epoch": 0.1263417091051542, "grad_norm": 1.0746244904606723, "learning_rate": 1.9517887415631426e-05, "loss": 0.983, "step": 1411 }, { "epoch": 0.12643124965023225, "grad_norm": 1.0825778456784905, "learning_rate": 1.951699733936904e-05, "loss": 0.8769, "step": 1412 }, { "epoch": 0.12652079019531032, "grad_norm": 1.1072741663857915, "learning_rate": 1.951610646256785e-05, "loss": 0.9278, "step": 1413 }, { "epoch": 0.12661033074038838, "grad_norm": 1.0622384816018422, "learning_rate": 1.95152147853028e-05, "loss": 0.898, "step": 1414 }, { "epoch": 0.12669987128546645, "grad_norm": 1.0448712321114952, "learning_rate": 1.9514322307648886e-05, "loss": 0.9751, "step": 1415 }, { "epoch": 0.1267894118305445, "grad_norm": 1.0428131779013292, "learning_rate": 1.9513429029681184e-05, "loss": 0.8597, "step": 1416 }, { "epoch": 0.12687895237562258, "grad_norm": 0.9950711560265061, "learning_rate": 1.951253495147483e-05, "loss": 0.8856, "step": 1417 }, { "epoch": 0.12696849292070064, "grad_norm": 0.982205498762089, "learning_rate": 1.951164007310504e-05, "loss": 0.9317, "step": 1418 }, { "epoch": 0.12705803346577874, "grad_norm": 0.9758720834731823, "learning_rate": 1.951074439464708e-05, "loss": 0.88, "step": 1419 }, { "epoch": 0.1271475740108568, "grad_norm": 0.9216274708561601, "learning_rate": 1.9509847916176294e-05, "loss": 0.9068, "step": 1420 }, { "epoch": 0.12723711455593487, "grad_norm": 1.0548263490445868, "learning_rate": 1.9508950637768093e-05, "loss": 0.9412, "step": 1421 }, { "epoch": 0.12732665510101293, "grad_norm": 0.9347688074901511, "learning_rate": 1.950805255949795e-05, "loss": 0.8875, "step": 1422 }, { "epoch": 0.127416195646091, "grad_norm": 1.0193098894139327, "learning_rate": 1.9507153681441408e-05, "loss": 0.8538, "step": 1423 }, { "epoch": 0.12750573619116906, "grad_norm": 1.09266989261539, "learning_rate": 1.9506254003674084e-05, "loss": 0.9565, "step": 1424 }, { "epoch": 0.12759527673624713, "grad_norm": 0.953919130552437, "learning_rate": 1.9505353526271646e-05, "loss": 0.9209, "step": 1425 }, { "epoch": 0.1276848172813252, "grad_norm": 1.0471619911738836, "learning_rate": 1.9504452249309848e-05, "loss": 0.8724, "step": 1426 }, { "epoch": 0.12777435782640326, "grad_norm": 0.9677614920010378, "learning_rate": 1.9503550172864497e-05, "loss": 0.9344, "step": 1427 }, { "epoch": 0.12786389837148135, "grad_norm": 1.0019501723097841, "learning_rate": 1.9502647297011473e-05, "loss": 0.9137, "step": 1428 }, { "epoch": 0.12795343891655941, "grad_norm": 0.9721084506596254, "learning_rate": 1.950174362182673e-05, "loss": 0.8924, "step": 1429 }, { "epoch": 0.12804297946163748, "grad_norm": 0.9689075518443421, "learning_rate": 1.9500839147386275e-05, "loss": 0.9036, "step": 1430 }, { "epoch": 0.12813252000671554, "grad_norm": 0.9895744158611558, "learning_rate": 1.9499933873766188e-05, "loss": 0.8925, "step": 1431 }, { "epoch": 0.1282220605517936, "grad_norm": 0.920255863937801, "learning_rate": 1.9499027801042624e-05, "loss": 0.8846, "step": 1432 }, { "epoch": 0.12831160109687167, "grad_norm": 1.0509384374278654, "learning_rate": 1.9498120929291797e-05, "loss": 0.929, "step": 1433 }, { "epoch": 0.12840114164194974, "grad_norm": 1.0193063834041702, "learning_rate": 1.9497213258589983e-05, "loss": 0.902, "step": 1434 }, { "epoch": 0.1284906821870278, "grad_norm": 0.999295865258048, "learning_rate": 1.9496304789013544e-05, "loss": 0.9176, "step": 1435 }, { "epoch": 0.12858022273210587, "grad_norm": 0.9971365977577392, "learning_rate": 1.949539552063889e-05, "loss": 0.8513, "step": 1436 }, { "epoch": 0.12866976327718396, "grad_norm": 0.9684297453249247, "learning_rate": 1.9494485453542508e-05, "loss": 0.8508, "step": 1437 }, { "epoch": 0.12875930382226203, "grad_norm": 0.9316713142048046, "learning_rate": 1.949357458780095e-05, "loss": 0.8609, "step": 1438 }, { "epoch": 0.1288488443673401, "grad_norm": 0.9471396520544554, "learning_rate": 1.9492662923490834e-05, "loss": 0.8167, "step": 1439 }, { "epoch": 0.12893838491241816, "grad_norm": 1.0408102054668609, "learning_rate": 1.9491750460688845e-05, "loss": 0.8962, "step": 1440 }, { "epoch": 0.12902792545749622, "grad_norm": 0.9637514342113481, "learning_rate": 1.949083719947174e-05, "loss": 0.86, "step": 1441 }, { "epoch": 0.1291174660025743, "grad_norm": 1.0275215280399723, "learning_rate": 1.948992313991634e-05, "loss": 0.9447, "step": 1442 }, { "epoch": 0.12920700654765235, "grad_norm": 1.0616604776106664, "learning_rate": 1.9489008282099523e-05, "loss": 0.8897, "step": 1443 }, { "epoch": 0.12929654709273042, "grad_norm": 1.0239220021055173, "learning_rate": 1.9488092626098256e-05, "loss": 0.8867, "step": 1444 }, { "epoch": 0.12938608763780848, "grad_norm": 1.211101786405548, "learning_rate": 1.9487176171989555e-05, "loss": 0.912, "step": 1445 }, { "epoch": 0.12947562818288658, "grad_norm": 1.0345748790552884, "learning_rate": 1.948625891985051e-05, "loss": 0.9375, "step": 1446 }, { "epoch": 0.12956516872796464, "grad_norm": 1.0814639896029998, "learning_rate": 1.948534086975828e-05, "loss": 0.8756, "step": 1447 }, { "epoch": 0.1296547092730427, "grad_norm": 1.0923445547440713, "learning_rate": 1.9484422021790085e-05, "loss": 0.9106, "step": 1448 }, { "epoch": 0.12974424981812077, "grad_norm": 1.0503854170455027, "learning_rate": 1.9483502376023217e-05, "loss": 0.9183, "step": 1449 }, { "epoch": 0.12983379036319884, "grad_norm": 0.9757969244328735, "learning_rate": 1.9482581932535028e-05, "loss": 0.8987, "step": 1450 }, { "epoch": 0.1299233309082769, "grad_norm": 0.8523331483993133, "learning_rate": 1.9481660691402956e-05, "loss": 0.8727, "step": 1451 }, { "epoch": 0.13001287145335497, "grad_norm": 0.9723063006219292, "learning_rate": 1.948073865270448e-05, "loss": 0.902, "step": 1452 }, { "epoch": 0.13010241199843303, "grad_norm": 0.9851324298269891, "learning_rate": 1.9479815816517163e-05, "loss": 0.9167, "step": 1453 }, { "epoch": 0.1301919525435111, "grad_norm": 0.9429330901331978, "learning_rate": 1.947889218291863e-05, "loss": 0.9174, "step": 1454 }, { "epoch": 0.1302814930885892, "grad_norm": 1.042165108015757, "learning_rate": 1.9477967751986576e-05, "loss": 0.9068, "step": 1455 }, { "epoch": 0.13037103363366725, "grad_norm": 1.0664973925487689, "learning_rate": 1.9477042523798762e-05, "loss": 0.95, "step": 1456 }, { "epoch": 0.13046057417874532, "grad_norm": 1.0387430566533364, "learning_rate": 1.9476116498433016e-05, "loss": 0.9417, "step": 1457 }, { "epoch": 0.13055011472382338, "grad_norm": 1.1152744512934578, "learning_rate": 1.9475189675967226e-05, "loss": 0.8765, "step": 1458 }, { "epoch": 0.13063965526890145, "grad_norm": 1.0485207324014405, "learning_rate": 1.9474262056479364e-05, "loss": 0.9255, "step": 1459 }, { "epoch": 0.13072919581397952, "grad_norm": 0.918454917245906, "learning_rate": 1.9473333640047442e-05, "loss": 0.9252, "step": 1460 }, { "epoch": 0.13081873635905758, "grad_norm": 1.0407250514771502, "learning_rate": 1.9472404426749572e-05, "loss": 0.8931, "step": 1461 }, { "epoch": 0.13090827690413565, "grad_norm": 0.9369344038027542, "learning_rate": 1.9471474416663906e-05, "loss": 0.8986, "step": 1462 }, { "epoch": 0.1309978174492137, "grad_norm": 1.2269047348485984, "learning_rate": 1.947054360986868e-05, "loss": 0.9321, "step": 1463 }, { "epoch": 0.1310873579942918, "grad_norm": 1.0219488757101256, "learning_rate": 1.9469612006442184e-05, "loss": 0.91, "step": 1464 }, { "epoch": 0.13117689853936987, "grad_norm": 0.982429351361952, "learning_rate": 1.9468679606462784e-05, "loss": 0.9306, "step": 1465 }, { "epoch": 0.13126643908444793, "grad_norm": 1.1181929588730766, "learning_rate": 1.9467746410008916e-05, "loss": 0.8895, "step": 1466 }, { "epoch": 0.131355979629526, "grad_norm": 0.9858865418958391, "learning_rate": 1.946681241715907e-05, "loss": 0.898, "step": 1467 }, { "epoch": 0.13144552017460406, "grad_norm": 1.0271586454792119, "learning_rate": 1.9465877627991813e-05, "loss": 0.9706, "step": 1468 }, { "epoch": 0.13153506071968213, "grad_norm": 0.9952111706689362, "learning_rate": 1.9464942042585776e-05, "loss": 0.923, "step": 1469 }, { "epoch": 0.1316246012647602, "grad_norm": 1.1117014468500008, "learning_rate": 1.9464005661019656e-05, "loss": 0.8851, "step": 1470 }, { "epoch": 0.13171414180983826, "grad_norm": 1.0546826662691957, "learning_rate": 1.9463068483372222e-05, "loss": 0.9396, "step": 1471 }, { "epoch": 0.13180368235491632, "grad_norm": 1.0473801423027123, "learning_rate": 1.9462130509722307e-05, "loss": 0.8828, "step": 1472 }, { "epoch": 0.13189322289999442, "grad_norm": 1.0884043717294627, "learning_rate": 1.9461191740148805e-05, "loss": 0.8727, "step": 1473 }, { "epoch": 0.13198276344507248, "grad_norm": 1.084270235299537, "learning_rate": 1.9460252174730682e-05, "loss": 0.926, "step": 1474 }, { "epoch": 0.13207230399015055, "grad_norm": 0.9634262768410355, "learning_rate": 1.945931181354698e-05, "loss": 0.9041, "step": 1475 }, { "epoch": 0.1321618445352286, "grad_norm": 1.9828797268357217, "learning_rate": 1.945837065667679e-05, "loss": 0.9037, "step": 1476 }, { "epoch": 0.13225138508030668, "grad_norm": 1.0758670212932726, "learning_rate": 1.9457428704199283e-05, "loss": 0.931, "step": 1477 }, { "epoch": 0.13234092562538474, "grad_norm": 1.0160275806626584, "learning_rate": 1.9456485956193693e-05, "loss": 0.9077, "step": 1478 }, { "epoch": 0.1324304661704628, "grad_norm": 0.9700076012393773, "learning_rate": 1.945554241273932e-05, "loss": 0.9021, "step": 1479 }, { "epoch": 0.13252000671554087, "grad_norm": 0.9927651797145578, "learning_rate": 1.9454598073915534e-05, "loss": 0.8443, "step": 1480 }, { "epoch": 0.13260954726061894, "grad_norm": 1.0915633602469332, "learning_rate": 1.9453652939801766e-05, "loss": 0.8678, "step": 1481 }, { "epoch": 0.13269908780569703, "grad_norm": 1.0347345850308496, "learning_rate": 1.945270701047752e-05, "loss": 0.964, "step": 1482 }, { "epoch": 0.1327886283507751, "grad_norm": 1.0275382379088425, "learning_rate": 1.945176028602236e-05, "loss": 0.902, "step": 1483 }, { "epoch": 0.13287816889585316, "grad_norm": 0.9694285280511057, "learning_rate": 1.945081276651593e-05, "loss": 0.9423, "step": 1484 }, { "epoch": 0.13296770944093123, "grad_norm": 0.9397546016874349, "learning_rate": 1.9449864452037926e-05, "loss": 0.9141, "step": 1485 }, { "epoch": 0.1330572499860093, "grad_norm": 0.9760908531669296, "learning_rate": 1.9448915342668118e-05, "loss": 0.8978, "step": 1486 }, { "epoch": 0.13314679053108736, "grad_norm": 1.2270016597732376, "learning_rate": 1.9447965438486343e-05, "loss": 0.8957, "step": 1487 }, { "epoch": 0.13323633107616542, "grad_norm": 1.0279238363884968, "learning_rate": 1.9447014739572503e-05, "loss": 0.953, "step": 1488 }, { "epoch": 0.13332587162124349, "grad_norm": 1.011348740484823, "learning_rate": 1.944606324600657e-05, "loss": 0.8799, "step": 1489 }, { "epoch": 0.13341541216632155, "grad_norm": 1.0785657113335414, "learning_rate": 1.9445110957868576e-05, "loss": 0.8869, "step": 1490 }, { "epoch": 0.13350495271139964, "grad_norm": 1.092623790544124, "learning_rate": 1.9444157875238628e-05, "loss": 0.9208, "step": 1491 }, { "epoch": 0.1335944932564777, "grad_norm": 1.0083250738153173, "learning_rate": 1.9443203998196895e-05, "loss": 0.9502, "step": 1492 }, { "epoch": 0.13368403380155577, "grad_norm": 1.0534469587509614, "learning_rate": 1.9442249326823613e-05, "loss": 0.9315, "step": 1493 }, { "epoch": 0.13377357434663384, "grad_norm": 0.8623868913924277, "learning_rate": 1.944129386119909e-05, "loss": 0.8555, "step": 1494 }, { "epoch": 0.1338631148917119, "grad_norm": 1.0743877080394968, "learning_rate": 1.9440337601403695e-05, "loss": 0.8852, "step": 1495 }, { "epoch": 0.13395265543678997, "grad_norm": 1.086542869961554, "learning_rate": 1.943938054751786e-05, "loss": 0.9165, "step": 1496 }, { "epoch": 0.13404219598186803, "grad_norm": 0.9654041398149188, "learning_rate": 1.9438422699622096e-05, "loss": 0.9048, "step": 1497 }, { "epoch": 0.1341317365269461, "grad_norm": 1.0769297040831751, "learning_rate": 1.943746405779697e-05, "loss": 0.9125, "step": 1498 }, { "epoch": 0.13422127707202416, "grad_norm": 0.8987234635955466, "learning_rate": 1.943650462212312e-05, "loss": 0.9021, "step": 1499 }, { "epoch": 0.13431081761710226, "grad_norm": 0.9997745296951533, "learning_rate": 1.9435544392681257e-05, "loss": 0.9669, "step": 1500 }, { "epoch": 0.13440035816218032, "grad_norm": 0.9589663355029894, "learning_rate": 1.9434583369552146e-05, "loss": 0.9197, "step": 1501 }, { "epoch": 0.1344898987072584, "grad_norm": 0.9612325544024343, "learning_rate": 1.9433621552816623e-05, "loss": 0.8853, "step": 1502 }, { "epoch": 0.13457943925233645, "grad_norm": 0.979263392994081, "learning_rate": 1.9432658942555597e-05, "loss": 0.9618, "step": 1503 }, { "epoch": 0.13466897979741452, "grad_norm": 1.363251020659175, "learning_rate": 1.943169553885004e-05, "loss": 0.9072, "step": 1504 }, { "epoch": 0.13475852034249258, "grad_norm": 0.9979329837967914, "learning_rate": 1.943073134178099e-05, "loss": 0.9248, "step": 1505 }, { "epoch": 0.13484806088757065, "grad_norm": 0.9559618222497475, "learning_rate": 1.9429766351429554e-05, "loss": 0.8848, "step": 1506 }, { "epoch": 0.1349376014326487, "grad_norm": 1.1167959431368581, "learning_rate": 1.9428800567876898e-05, "loss": 0.8352, "step": 1507 }, { "epoch": 0.13502714197772678, "grad_norm": 0.978643743219982, "learning_rate": 1.9427833991204264e-05, "loss": 0.9145, "step": 1508 }, { "epoch": 0.13511668252280487, "grad_norm": 0.9612792399454821, "learning_rate": 1.9426866621492958e-05, "loss": 0.9054, "step": 1509 }, { "epoch": 0.13520622306788294, "grad_norm": 0.9326269527761527, "learning_rate": 1.9425898458824352e-05, "loss": 0.8592, "step": 1510 }, { "epoch": 0.135295763612961, "grad_norm": 1.1163662805630303, "learning_rate": 1.9424929503279883e-05, "loss": 0.9027, "step": 1511 }, { "epoch": 0.13538530415803907, "grad_norm": 0.961668852243405, "learning_rate": 1.9423959754941055e-05, "loss": 0.8605, "step": 1512 }, { "epoch": 0.13547484470311713, "grad_norm": 1.0053554854834659, "learning_rate": 1.9422989213889446e-05, "loss": 0.8904, "step": 1513 }, { "epoch": 0.1355643852481952, "grad_norm": 1.298556905990335, "learning_rate": 1.9422017880206686e-05, "loss": 0.8567, "step": 1514 }, { "epoch": 0.13565392579327326, "grad_norm": 1.0926006421905428, "learning_rate": 1.942104575397449e-05, "loss": 0.9233, "step": 1515 }, { "epoch": 0.13574346633835133, "grad_norm": 1.0868330635087227, "learning_rate": 1.9420072835274623e-05, "loss": 0.8515, "step": 1516 }, { "epoch": 0.1358330068834294, "grad_norm": 1.0867937686581675, "learning_rate": 1.941909912418893e-05, "loss": 0.8212, "step": 1517 }, { "epoch": 0.13592254742850748, "grad_norm": 0.9451992243311593, "learning_rate": 1.941812462079931e-05, "loss": 0.8571, "step": 1518 }, { "epoch": 0.13601208797358555, "grad_norm": 0.9987910849183542, "learning_rate": 1.9417149325187737e-05, "loss": 0.9381, "step": 1519 }, { "epoch": 0.13610162851866361, "grad_norm": 1.0320960529311347, "learning_rate": 1.9416173237436252e-05, "loss": 0.937, "step": 1520 }, { "epoch": 0.13619116906374168, "grad_norm": 0.9321398916032981, "learning_rate": 1.941519635762696e-05, "loss": 0.9431, "step": 1521 }, { "epoch": 0.13628070960881974, "grad_norm": 0.9727937328557554, "learning_rate": 1.941421868584203e-05, "loss": 0.9432, "step": 1522 }, { "epoch": 0.1363702501538978, "grad_norm": 1.021323779000358, "learning_rate": 1.94132402221637e-05, "loss": 0.9389, "step": 1523 }, { "epoch": 0.13645979069897587, "grad_norm": 1.101850070063544, "learning_rate": 1.9412260966674282e-05, "loss": 0.8924, "step": 1524 }, { "epoch": 0.13654933124405394, "grad_norm": 1.0080311600151963, "learning_rate": 1.9411280919456138e-05, "loss": 0.9171, "step": 1525 }, { "epoch": 0.136638871789132, "grad_norm": 0.9274133464499409, "learning_rate": 1.941030008059172e-05, "loss": 0.9201, "step": 1526 }, { "epoch": 0.1367284123342101, "grad_norm": 0.948262034092094, "learning_rate": 1.9409318450163517e-05, "loss": 0.9076, "step": 1527 }, { "epoch": 0.13681795287928816, "grad_norm": 0.9645324280251374, "learning_rate": 1.9408336028254112e-05, "loss": 0.928, "step": 1528 }, { "epoch": 0.13690749342436623, "grad_norm": 0.8925693454397264, "learning_rate": 1.9407352814946135e-05, "loss": 0.8593, "step": 1529 }, { "epoch": 0.1369970339694443, "grad_norm": 1.029891293607414, "learning_rate": 1.94063688103223e-05, "loss": 0.861, "step": 1530 }, { "epoch": 0.13708657451452236, "grad_norm": 0.9824803984408358, "learning_rate": 1.9405384014465373e-05, "loss": 0.8912, "step": 1531 }, { "epoch": 0.13717611505960042, "grad_norm": 1.0356839904104131, "learning_rate": 1.9404398427458187e-05, "loss": 0.816, "step": 1532 }, { "epoch": 0.1372656556046785, "grad_norm": 0.9566100266281806, "learning_rate": 1.9403412049383658e-05, "loss": 0.8613, "step": 1533 }, { "epoch": 0.13735519614975655, "grad_norm": 1.0029574257539415, "learning_rate": 1.9402424880324745e-05, "loss": 0.8475, "step": 1534 }, { "epoch": 0.13744473669483462, "grad_norm": 1.2308159477896594, "learning_rate": 1.940143692036449e-05, "loss": 0.9555, "step": 1535 }, { "epoch": 0.1375342772399127, "grad_norm": 1.0559521304390151, "learning_rate": 1.9400448169586004e-05, "loss": 0.9003, "step": 1536 }, { "epoch": 0.13762381778499078, "grad_norm": 1.036587298614136, "learning_rate": 1.9399458628072448e-05, "loss": 0.9092, "step": 1537 }, { "epoch": 0.13771335833006884, "grad_norm": 0.955024888380107, "learning_rate": 1.939846829590706e-05, "loss": 0.8447, "step": 1538 }, { "epoch": 0.1378028988751469, "grad_norm": 1.1413416664229863, "learning_rate": 1.9397477173173147e-05, "loss": 0.828, "step": 1539 }, { "epoch": 0.13789243942022497, "grad_norm": 0.9241568895247805, "learning_rate": 1.9396485259954078e-05, "loss": 0.8859, "step": 1540 }, { "epoch": 0.13798197996530304, "grad_norm": 0.944342542022803, "learning_rate": 1.9395492556333292e-05, "loss": 0.818, "step": 1541 }, { "epoch": 0.1380715205103811, "grad_norm": 0.9973154382109269, "learning_rate": 1.9394499062394286e-05, "loss": 0.9294, "step": 1542 }, { "epoch": 0.13816106105545917, "grad_norm": 1.2433425100122892, "learning_rate": 1.9393504778220635e-05, "loss": 0.9763, "step": 1543 }, { "epoch": 0.13825060160053723, "grad_norm": 0.9701117005029266, "learning_rate": 1.9392509703895972e-05, "loss": 0.9085, "step": 1544 }, { "epoch": 0.13834014214561532, "grad_norm": 0.9997288267167611, "learning_rate": 1.9391513839503998e-05, "loss": 0.8332, "step": 1545 }, { "epoch": 0.1384296826906934, "grad_norm": 1.0245934642596468, "learning_rate": 1.9390517185128487e-05, "loss": 0.8924, "step": 1546 }, { "epoch": 0.13851922323577145, "grad_norm": 0.9556787713400058, "learning_rate": 1.9389519740853268e-05, "loss": 0.8947, "step": 1547 }, { "epoch": 0.13860876378084952, "grad_norm": 0.9222217238737335, "learning_rate": 1.9388521506762248e-05, "loss": 0.9183, "step": 1548 }, { "epoch": 0.13869830432592758, "grad_norm": 0.8465551370127095, "learning_rate": 1.9387522482939393e-05, "loss": 0.8105, "step": 1549 }, { "epoch": 0.13878784487100565, "grad_norm": 0.9720122169627724, "learning_rate": 1.9386522669468738e-05, "loss": 0.8891, "step": 1550 }, { "epoch": 0.13887738541608372, "grad_norm": 1.016018093699156, "learning_rate": 1.9385522066434386e-05, "loss": 0.9012, "step": 1551 }, { "epoch": 0.13896692596116178, "grad_norm": 0.9945374041768229, "learning_rate": 1.9384520673920502e-05, "loss": 0.8822, "step": 1552 }, { "epoch": 0.13905646650623985, "grad_norm": 1.0344474643228478, "learning_rate": 1.9383518492011316e-05, "loss": 0.9141, "step": 1553 }, { "epoch": 0.13914600705131794, "grad_norm": 1.0593081677283829, "learning_rate": 1.9382515520791137e-05, "loss": 0.8734, "step": 1554 }, { "epoch": 0.139235547596396, "grad_norm": 0.8936251075463839, "learning_rate": 1.9381511760344323e-05, "loss": 0.9138, "step": 1555 }, { "epoch": 0.13932508814147407, "grad_norm": 1.0302397511557242, "learning_rate": 1.9380507210755314e-05, "loss": 0.8864, "step": 1556 }, { "epoch": 0.13941462868655213, "grad_norm": 0.9713819991914739, "learning_rate": 1.9379501872108608e-05, "loss": 0.8549, "step": 1557 }, { "epoch": 0.1395041692316302, "grad_norm": 1.2490431897457857, "learning_rate": 1.937849574448877e-05, "loss": 0.8783, "step": 1558 }, { "epoch": 0.13959370977670826, "grad_norm": 0.9854464394177337, "learning_rate": 1.9377488827980428e-05, "loss": 0.942, "step": 1559 }, { "epoch": 0.13968325032178633, "grad_norm": 1.0171704098643424, "learning_rate": 1.937648112266829e-05, "loss": 0.8679, "step": 1560 }, { "epoch": 0.1397727908668644, "grad_norm": 1.0275669360245716, "learning_rate": 1.9375472628637107e-05, "loss": 0.9118, "step": 1561 }, { "epoch": 0.13986233141194246, "grad_norm": 1.0690846120342186, "learning_rate": 1.9374463345971723e-05, "loss": 0.9674, "step": 1562 }, { "epoch": 0.13995187195702055, "grad_norm": 0.9874260358139031, "learning_rate": 1.9373453274757032e-05, "loss": 0.8597, "step": 1563 }, { "epoch": 0.14004141250209862, "grad_norm": 1.260386220711374, "learning_rate": 1.9372442415077995e-05, "loss": 0.908, "step": 1564 }, { "epoch": 0.14013095304717668, "grad_norm": 1.0933525690089914, "learning_rate": 1.9371430767019644e-05, "loss": 0.892, "step": 1565 }, { "epoch": 0.14022049359225475, "grad_norm": 0.9588774858687872, "learning_rate": 1.9370418330667076e-05, "loss": 0.9241, "step": 1566 }, { "epoch": 0.1403100341373328, "grad_norm": 0.9626222986840648, "learning_rate": 1.9369405106105454e-05, "loss": 0.9299, "step": 1567 }, { "epoch": 0.14039957468241088, "grad_norm": 1.0362420842252216, "learning_rate": 1.9368391093420004e-05, "loss": 0.9011, "step": 1568 }, { "epoch": 0.14048911522748894, "grad_norm": 1.0089279233651063, "learning_rate": 1.9367376292696028e-05, "loss": 0.9741, "step": 1569 }, { "epoch": 0.140578655772567, "grad_norm": 1.0704996227198464, "learning_rate": 1.936636070401888e-05, "loss": 0.905, "step": 1570 }, { "epoch": 0.14066819631764507, "grad_norm": 0.9967860295408286, "learning_rate": 1.9365344327473996e-05, "loss": 0.9043, "step": 1571 }, { "epoch": 0.14075773686272317, "grad_norm": 0.9960395702834391, "learning_rate": 1.9364327163146864e-05, "loss": 0.9469, "step": 1572 }, { "epoch": 0.14084727740780123, "grad_norm": 1.0661750366501428, "learning_rate": 1.9363309211123046e-05, "loss": 0.9432, "step": 1573 }, { "epoch": 0.1409368179528793, "grad_norm": 0.9358561870698854, "learning_rate": 1.936229047148817e-05, "loss": 0.9408, "step": 1574 }, { "epoch": 0.14102635849795736, "grad_norm": 0.9528100492527942, "learning_rate": 1.9361270944327927e-05, "loss": 0.8017, "step": 1575 }, { "epoch": 0.14111589904303543, "grad_norm": 0.973697849413657, "learning_rate": 1.936025062972808e-05, "loss": 0.9283, "step": 1576 }, { "epoch": 0.1412054395881135, "grad_norm": 0.9737801540220953, "learning_rate": 1.935922952777445e-05, "loss": 0.903, "step": 1577 }, { "epoch": 0.14129498013319156, "grad_norm": 1.0446359434220713, "learning_rate": 1.9358207638552934e-05, "loss": 0.9159, "step": 1578 }, { "epoch": 0.14138452067826962, "grad_norm": 1.461482645941444, "learning_rate": 1.9357184962149483e-05, "loss": 0.9669, "step": 1579 }, { "epoch": 0.14147406122334769, "grad_norm": 0.9171066580256352, "learning_rate": 1.935616149865013e-05, "loss": 0.8946, "step": 1580 }, { "epoch": 0.14156360176842578, "grad_norm": 1.0517842281944485, "learning_rate": 1.935513724814096e-05, "loss": 0.8995, "step": 1581 }, { "epoch": 0.14165314231350384, "grad_norm": 1.0243279650484274, "learning_rate": 1.9354112210708127e-05, "loss": 0.9061, "step": 1582 }, { "epoch": 0.1417426828585819, "grad_norm": 1.0013327753506995, "learning_rate": 1.9353086386437858e-05, "loss": 0.8611, "step": 1583 }, { "epoch": 0.14183222340365997, "grad_norm": 0.9311046332129168, "learning_rate": 1.9352059775416442e-05, "loss": 0.9309, "step": 1584 }, { "epoch": 0.14192176394873804, "grad_norm": 1.009360119571293, "learning_rate": 1.9351032377730235e-05, "loss": 0.8946, "step": 1585 }, { "epoch": 0.1420113044938161, "grad_norm": 0.9596941736964932, "learning_rate": 1.9350004193465653e-05, "loss": 0.8689, "step": 1586 }, { "epoch": 0.14210084503889417, "grad_norm": 0.9497722216304163, "learning_rate": 1.934897522270919e-05, "loss": 0.8827, "step": 1587 }, { "epoch": 0.14219038558397223, "grad_norm": 1.0664829199808332, "learning_rate": 1.9347945465547395e-05, "loss": 0.9353, "step": 1588 }, { "epoch": 0.1422799261290503, "grad_norm": 0.9409816815283192, "learning_rate": 1.934691492206689e-05, "loss": 0.9044, "step": 1589 }, { "epoch": 0.1423694666741284, "grad_norm": 0.9974303083329258, "learning_rate": 1.9345883592354362e-05, "loss": 0.8679, "step": 1590 }, { "epoch": 0.14245900721920646, "grad_norm": 1.0105461364280715, "learning_rate": 1.9344851476496563e-05, "loss": 0.9275, "step": 1591 }, { "epoch": 0.14254854776428452, "grad_norm": 1.007867732092796, "learning_rate": 1.9343818574580306e-05, "loss": 0.9605, "step": 1592 }, { "epoch": 0.1426380883093626, "grad_norm": 1.006724469234831, "learning_rate": 1.934278488669248e-05, "loss": 0.9408, "step": 1593 }, { "epoch": 0.14272762885444065, "grad_norm": 0.9193185720500515, "learning_rate": 1.9341750412920035e-05, "loss": 0.8704, "step": 1594 }, { "epoch": 0.14281716939951872, "grad_norm": 1.0620570933177556, "learning_rate": 1.934071515334999e-05, "loss": 0.8807, "step": 1595 }, { "epoch": 0.14290670994459678, "grad_norm": 1.0364289883575133, "learning_rate": 1.933967910806942e-05, "loss": 0.9204, "step": 1596 }, { "epoch": 0.14299625048967485, "grad_norm": 1.1257155746584762, "learning_rate": 1.9338642277165484e-05, "loss": 0.8554, "step": 1597 }, { "epoch": 0.1430857910347529, "grad_norm": 1.004293766866379, "learning_rate": 1.9337604660725388e-05, "loss": 0.9113, "step": 1598 }, { "epoch": 0.143175331579831, "grad_norm": 1.3246645356078093, "learning_rate": 1.9336566258836417e-05, "loss": 0.936, "step": 1599 }, { "epoch": 0.14326487212490907, "grad_norm": 0.9392777809248289, "learning_rate": 1.9335527071585918e-05, "loss": 0.8597, "step": 1600 }, { "epoch": 0.14335441266998714, "grad_norm": 0.8796597263190294, "learning_rate": 1.9334487099061303e-05, "loss": 0.8656, "step": 1601 }, { "epoch": 0.1434439532150652, "grad_norm": 1.0527805696378545, "learning_rate": 1.9333446341350052e-05, "loss": 0.9208, "step": 1602 }, { "epoch": 0.14353349376014327, "grad_norm": 0.9592535446678568, "learning_rate": 1.933240479853971e-05, "loss": 0.8565, "step": 1603 }, { "epoch": 0.14362303430522133, "grad_norm": 1.0842584074530972, "learning_rate": 1.9331362470717888e-05, "loss": 0.9211, "step": 1604 }, { "epoch": 0.1437125748502994, "grad_norm": 1.165562016191776, "learning_rate": 1.9330319357972263e-05, "loss": 0.9289, "step": 1605 }, { "epoch": 0.14380211539537746, "grad_norm": 1.0079348278471336, "learning_rate": 1.9329275460390575e-05, "loss": 0.9174, "step": 1606 }, { "epoch": 0.14389165594045553, "grad_norm": 1.2910558565204018, "learning_rate": 1.9328230778060638e-05, "loss": 0.9156, "step": 1607 }, { "epoch": 0.14398119648553362, "grad_norm": 0.9808700500735781, "learning_rate": 1.932718531107033e-05, "loss": 0.867, "step": 1608 }, { "epoch": 0.14407073703061168, "grad_norm": 0.9664932419539713, "learning_rate": 1.9326139059507586e-05, "loss": 0.9202, "step": 1609 }, { "epoch": 0.14416027757568975, "grad_norm": 0.9050791304164366, "learning_rate": 1.9325092023460414e-05, "loss": 0.8673, "step": 1610 }, { "epoch": 0.14424981812076781, "grad_norm": 1.110942641231669, "learning_rate": 1.932404420301689e-05, "loss": 0.9282, "step": 1611 }, { "epoch": 0.14433935866584588, "grad_norm": 1.0176081708631357, "learning_rate": 1.932299559826515e-05, "loss": 0.9428, "step": 1612 }, { "epoch": 0.14442889921092394, "grad_norm": 1.0077685018374516, "learning_rate": 1.9321946209293406e-05, "loss": 0.8898, "step": 1613 }, { "epoch": 0.144518439756002, "grad_norm": 1.2406058275683514, "learning_rate": 1.932089603618992e-05, "loss": 0.9397, "step": 1614 }, { "epoch": 0.14460798030108007, "grad_norm": 1.0652400703307328, "learning_rate": 1.9319845079043035e-05, "loss": 0.9077, "step": 1615 }, { "epoch": 0.14469752084615814, "grad_norm": 1.0742662346584966, "learning_rate": 1.931879333794115e-05, "loss": 0.8476, "step": 1616 }, { "epoch": 0.14478706139123623, "grad_norm": 1.035706885899711, "learning_rate": 1.9317740812972742e-05, "loss": 0.8648, "step": 1617 }, { "epoch": 0.1448766019363143, "grad_norm": 1.0067839058283325, "learning_rate": 1.9316687504226335e-05, "loss": 0.9214, "step": 1618 }, { "epoch": 0.14496614248139236, "grad_norm": 0.9531947853533249, "learning_rate": 1.9315633411790538e-05, "loss": 0.8785, "step": 1619 }, { "epoch": 0.14505568302647043, "grad_norm": 1.091467169579033, "learning_rate": 1.9314578535754017e-05, "loss": 0.8625, "step": 1620 }, { "epoch": 0.1451452235715485, "grad_norm": 0.9099326325941713, "learning_rate": 1.93135228762055e-05, "loss": 0.8704, "step": 1621 }, { "epoch": 0.14523476411662656, "grad_norm": 1.0170926000704643, "learning_rate": 1.931246643323379e-05, "loss": 0.9667, "step": 1622 }, { "epoch": 0.14532430466170462, "grad_norm": 1.0040015915710157, "learning_rate": 1.9311409206927748e-05, "loss": 0.9239, "step": 1623 }, { "epoch": 0.1454138452067827, "grad_norm": 0.9409678691498216, "learning_rate": 1.9310351197376312e-05, "loss": 0.9248, "step": 1624 }, { "epoch": 0.14550338575186075, "grad_norm": 1.012496135506889, "learning_rate": 1.930929240466847e-05, "loss": 0.8981, "step": 1625 }, { "epoch": 0.14559292629693885, "grad_norm": 0.9347466760101097, "learning_rate": 1.9308232828893283e-05, "loss": 0.9085, "step": 1626 }, { "epoch": 0.1456824668420169, "grad_norm": 1.0049308782716457, "learning_rate": 1.930717247013989e-05, "loss": 0.86, "step": 1627 }, { "epoch": 0.14577200738709498, "grad_norm": 1.1140220633251008, "learning_rate": 1.930611132849747e-05, "loss": 0.9966, "step": 1628 }, { "epoch": 0.14586154793217304, "grad_norm": 0.9831739867859024, "learning_rate": 1.9305049404055302e-05, "loss": 0.8787, "step": 1629 }, { "epoch": 0.1459510884772511, "grad_norm": 1.01481799373667, "learning_rate": 1.930398669690269e-05, "loss": 0.9152, "step": 1630 }, { "epoch": 0.14604062902232917, "grad_norm": 1.0070583090543324, "learning_rate": 1.9302923207129043e-05, "loss": 0.8129, "step": 1631 }, { "epoch": 0.14613016956740724, "grad_norm": 0.8688964721845012, "learning_rate": 1.930185893482381e-05, "loss": 0.8434, "step": 1632 }, { "epoch": 0.1462197101124853, "grad_norm": 0.9552916009119464, "learning_rate": 1.9300793880076513e-05, "loss": 0.9256, "step": 1633 }, { "epoch": 0.14630925065756337, "grad_norm": 1.1242482708284693, "learning_rate": 1.9299728042976745e-05, "loss": 0.9126, "step": 1634 }, { "epoch": 0.14639879120264146, "grad_norm": 1.0639759991311433, "learning_rate": 1.929866142361416e-05, "loss": 0.8704, "step": 1635 }, { "epoch": 0.14648833174771952, "grad_norm": 1.085683289026062, "learning_rate": 1.9297594022078473e-05, "loss": 0.9189, "step": 1636 }, { "epoch": 0.1465778722927976, "grad_norm": 0.9204699197360561, "learning_rate": 1.929652583845948e-05, "loss": 0.8562, "step": 1637 }, { "epoch": 0.14666741283787565, "grad_norm": 1.2474992675095748, "learning_rate": 1.9295456872847027e-05, "loss": 0.928, "step": 1638 }, { "epoch": 0.14675695338295372, "grad_norm": 1.3276942361495088, "learning_rate": 1.929438712533103e-05, "loss": 0.9217, "step": 1639 }, { "epoch": 0.14684649392803178, "grad_norm": 0.9826261683964727, "learning_rate": 1.929331659600148e-05, "loss": 0.8575, "step": 1640 }, { "epoch": 0.14693603447310985, "grad_norm": 1.3021351143971271, "learning_rate": 1.929224528494842e-05, "loss": 0.8863, "step": 1641 }, { "epoch": 0.14702557501818792, "grad_norm": 0.9634260094680092, "learning_rate": 1.9291173192261966e-05, "loss": 0.8803, "step": 1642 }, { "epoch": 0.14711511556326598, "grad_norm": 0.9643633518283496, "learning_rate": 1.9290100318032303e-05, "loss": 0.9194, "step": 1643 }, { "epoch": 0.14720465610834407, "grad_norm": 0.9393342938255379, "learning_rate": 1.9289026662349674e-05, "loss": 0.87, "step": 1644 }, { "epoch": 0.14729419665342214, "grad_norm": 1.064576242808001, "learning_rate": 1.9287952225304392e-05, "loss": 0.9743, "step": 1645 }, { "epoch": 0.1473837371985002, "grad_norm": 1.1252698317003016, "learning_rate": 1.9286877006986833e-05, "loss": 0.8377, "step": 1646 }, { "epoch": 0.14747327774357827, "grad_norm": 1.0449420888633874, "learning_rate": 1.9285801007487446e-05, "loss": 0.878, "step": 1647 }, { "epoch": 0.14756281828865633, "grad_norm": 1.0862417024625208, "learning_rate": 1.928472422689674e-05, "loss": 0.9638, "step": 1648 }, { "epoch": 0.1476523588337344, "grad_norm": 1.0384229027745993, "learning_rate": 1.9283646665305283e-05, "loss": 0.8814, "step": 1649 }, { "epoch": 0.14774189937881246, "grad_norm": 1.2214679936034576, "learning_rate": 1.9282568322803724e-05, "loss": 0.9657, "step": 1650 }, { "epoch": 0.14783143992389053, "grad_norm": 1.0548244429113125, "learning_rate": 1.928148919948277e-05, "loss": 0.8795, "step": 1651 }, { "epoch": 0.1479209804689686, "grad_norm": 0.9866989311189773, "learning_rate": 1.9280409295433187e-05, "loss": 0.8788, "step": 1652 }, { "epoch": 0.1480105210140467, "grad_norm": 0.9706257870362636, "learning_rate": 1.9279328610745812e-05, "loss": 0.8655, "step": 1653 }, { "epoch": 0.14810006155912475, "grad_norm": 1.1083910929669207, "learning_rate": 1.927824714551156e-05, "loss": 0.8737, "step": 1654 }, { "epoch": 0.14818960210420282, "grad_norm": 1.2928505373657686, "learning_rate": 1.927716489982139e-05, "loss": 0.9181, "step": 1655 }, { "epoch": 0.14827914264928088, "grad_norm": 0.9399123850283069, "learning_rate": 1.9276081873766342e-05, "loss": 0.8435, "step": 1656 }, { "epoch": 0.14836868319435895, "grad_norm": 1.0866699905959638, "learning_rate": 1.9274998067437513e-05, "loss": 0.943, "step": 1657 }, { "epoch": 0.148458223739437, "grad_norm": 1.0342047835482897, "learning_rate": 1.927391348092607e-05, "loss": 0.938, "step": 1658 }, { "epoch": 0.14854776428451508, "grad_norm": 0.9354046794640771, "learning_rate": 1.9272828114323247e-05, "loss": 0.8734, "step": 1659 }, { "epoch": 0.14863730482959314, "grad_norm": 1.1138804707617767, "learning_rate": 1.9271741967720342e-05, "loss": 0.9539, "step": 1660 }, { "epoch": 0.1487268453746712, "grad_norm": 1.0314683170449572, "learning_rate": 1.9270655041208714e-05, "loss": 0.8866, "step": 1661 }, { "epoch": 0.1488163859197493, "grad_norm": 1.3574802110382618, "learning_rate": 1.9269567334879794e-05, "loss": 0.9521, "step": 1662 }, { "epoch": 0.14890592646482737, "grad_norm": 0.9467197932898966, "learning_rate": 1.926847884882508e-05, "loss": 0.8892, "step": 1663 }, { "epoch": 0.14899546700990543, "grad_norm": 1.0826583505192637, "learning_rate": 1.9267389583136124e-05, "loss": 0.8893, "step": 1664 }, { "epoch": 0.1490850075549835, "grad_norm": 0.9967726748430142, "learning_rate": 1.926629953790456e-05, "loss": 0.9929, "step": 1665 }, { "epoch": 0.14917454810006156, "grad_norm": 1.454136198969318, "learning_rate": 1.9265208713222075e-05, "loss": 0.9938, "step": 1666 }, { "epoch": 0.14926408864513963, "grad_norm": 1.3076142784518345, "learning_rate": 1.9264117109180423e-05, "loss": 0.921, "step": 1667 }, { "epoch": 0.1493536291902177, "grad_norm": 0.9624831595207629, "learning_rate": 1.9263024725871427e-05, "loss": 0.8635, "step": 1668 }, { "epoch": 0.14944316973529576, "grad_norm": 1.0900153773481214, "learning_rate": 1.926193156338698e-05, "loss": 0.9409, "step": 1669 }, { "epoch": 0.14953271028037382, "grad_norm": 1.1687275268025108, "learning_rate": 1.9260837621819035e-05, "loss": 0.8564, "step": 1670 }, { "epoch": 0.1496222508254519, "grad_norm": 0.9683242942917771, "learning_rate": 1.92597429012596e-05, "loss": 0.8841, "step": 1671 }, { "epoch": 0.14971179137052998, "grad_norm": 0.9708117550467407, "learning_rate": 1.9258647401800772e-05, "loss": 0.8573, "step": 1672 }, { "epoch": 0.14980133191560804, "grad_norm": 1.049451884123498, "learning_rate": 1.9257551123534696e-05, "loss": 0.8921, "step": 1673 }, { "epoch": 0.1498908724606861, "grad_norm": 1.0402536247413583, "learning_rate": 1.9256454066553583e-05, "loss": 0.9018, "step": 1674 }, { "epoch": 0.14998041300576417, "grad_norm": 0.909739770322449, "learning_rate": 1.925535623094972e-05, "loss": 0.8054, "step": 1675 }, { "epoch": 0.15006995355084224, "grad_norm": 1.0172632156567232, "learning_rate": 1.9254257616815452e-05, "loss": 0.9592, "step": 1676 }, { "epoch": 0.1501594940959203, "grad_norm": 1.072175189305115, "learning_rate": 1.925315822424319e-05, "loss": 0.9451, "step": 1677 }, { "epoch": 0.15024903464099837, "grad_norm": 1.0373673257059377, "learning_rate": 1.925205805332541e-05, "loss": 0.9224, "step": 1678 }, { "epoch": 0.15033857518607643, "grad_norm": 1.1765660537447373, "learning_rate": 1.925095710415466e-05, "loss": 0.8962, "step": 1679 }, { "epoch": 0.15042811573115453, "grad_norm": 1.1048910546103297, "learning_rate": 1.9249855376823542e-05, "loss": 0.9237, "step": 1680 }, { "epoch": 0.1505176562762326, "grad_norm": 0.9904288374472184, "learning_rate": 1.924875287142473e-05, "loss": 0.9093, "step": 1681 }, { "epoch": 0.15060719682131066, "grad_norm": 0.9862771249616881, "learning_rate": 1.924764958805097e-05, "loss": 0.8445, "step": 1682 }, { "epoch": 0.15069673736638872, "grad_norm": 1.0272249353101568, "learning_rate": 1.924654552679506e-05, "loss": 0.9106, "step": 1683 }, { "epoch": 0.1507862779114668, "grad_norm": 1.2453594745514973, "learning_rate": 1.9245440687749872e-05, "loss": 0.9209, "step": 1684 }, { "epoch": 0.15087581845654485, "grad_norm": 1.0942652481282462, "learning_rate": 1.924433507100834e-05, "loss": 0.8245, "step": 1685 }, { "epoch": 0.15096535900162292, "grad_norm": 1.1398269743143796, "learning_rate": 1.9243228676663467e-05, "loss": 0.9486, "step": 1686 }, { "epoch": 0.15105489954670098, "grad_norm": 1.0078284916476308, "learning_rate": 1.924212150480832e-05, "loss": 0.9377, "step": 1687 }, { "epoch": 0.15114444009177905, "grad_norm": 0.9781319299042506, "learning_rate": 1.924101355553603e-05, "loss": 0.9015, "step": 1688 }, { "epoch": 0.15123398063685714, "grad_norm": 1.3356990811091403, "learning_rate": 1.923990482893979e-05, "loss": 0.9382, "step": 1689 }, { "epoch": 0.1513235211819352, "grad_norm": 0.9432762623182328, "learning_rate": 1.9238795325112867e-05, "loss": 0.8906, "step": 1690 }, { "epoch": 0.15141306172701327, "grad_norm": 0.9372007524023076, "learning_rate": 1.923768504414859e-05, "loss": 0.8947, "step": 1691 }, { "epoch": 0.15150260227209134, "grad_norm": 0.9416501743965463, "learning_rate": 1.923657398614035e-05, "loss": 0.8744, "step": 1692 }, { "epoch": 0.1515921428171694, "grad_norm": 1.1262780696943868, "learning_rate": 1.9235462151181603e-05, "loss": 0.91, "step": 1693 }, { "epoch": 0.15168168336224747, "grad_norm": 0.9304155595549114, "learning_rate": 1.923434953936588e-05, "loss": 0.8882, "step": 1694 }, { "epoch": 0.15177122390732553, "grad_norm": 1.2335686477408923, "learning_rate": 1.923323615078676e-05, "loss": 0.8617, "step": 1695 }, { "epoch": 0.1518607644524036, "grad_norm": 1.0874521287515853, "learning_rate": 1.9232121985537907e-05, "loss": 0.9733, "step": 1696 }, { "epoch": 0.15195030499748166, "grad_norm": 1.10016197907857, "learning_rate": 1.9231007043713034e-05, "loss": 0.8738, "step": 1697 }, { "epoch": 0.15203984554255975, "grad_norm": 0.9389593837329341, "learning_rate": 1.9229891325405934e-05, "loss": 0.9086, "step": 1698 }, { "epoch": 0.15212938608763782, "grad_norm": 1.0037799400450909, "learning_rate": 1.922877483071045e-05, "loss": 0.9211, "step": 1699 }, { "epoch": 0.15221892663271588, "grad_norm": 1.0682228841498402, "learning_rate": 1.9227657559720504e-05, "loss": 0.9358, "step": 1700 }, { "epoch": 0.15230846717779395, "grad_norm": 0.928431560949346, "learning_rate": 1.922653951253007e-05, "loss": 0.9146, "step": 1701 }, { "epoch": 0.15239800772287201, "grad_norm": 1.004864152360824, "learning_rate": 1.92254206892332e-05, "loss": 0.867, "step": 1702 }, { "epoch": 0.15248754826795008, "grad_norm": 1.0107481486628573, "learning_rate": 1.922430108992401e-05, "loss": 0.8737, "step": 1703 }, { "epoch": 0.15257708881302814, "grad_norm": 0.8596512905076935, "learning_rate": 1.9223180714696664e-05, "loss": 0.8334, "step": 1704 }, { "epoch": 0.1526666293581062, "grad_norm": 0.9201486337002881, "learning_rate": 1.9222059563645418e-05, "loss": 0.8519, "step": 1705 }, { "epoch": 0.15275616990318427, "grad_norm": 0.9600246831311577, "learning_rate": 1.922093763686457e-05, "loss": 0.9317, "step": 1706 }, { "epoch": 0.15284571044826237, "grad_norm": 1.454375072559627, "learning_rate": 1.9219814934448496e-05, "loss": 0.9027, "step": 1707 }, { "epoch": 0.15293525099334043, "grad_norm": 1.0454228167947963, "learning_rate": 1.9218691456491637e-05, "loss": 0.9104, "step": 1708 }, { "epoch": 0.1530247915384185, "grad_norm": 0.9883197470373759, "learning_rate": 1.921756720308849e-05, "loss": 0.8084, "step": 1709 }, { "epoch": 0.15311433208349656, "grad_norm": 1.0893429584820449, "learning_rate": 1.921644217433363e-05, "loss": 0.9321, "step": 1710 }, { "epoch": 0.15320387262857463, "grad_norm": 0.9689447935480849, "learning_rate": 1.9215316370321686e-05, "loss": 0.9323, "step": 1711 }, { "epoch": 0.1532934131736527, "grad_norm": 1.1843863349688082, "learning_rate": 1.9214189791147363e-05, "loss": 0.865, "step": 1712 }, { "epoch": 0.15338295371873076, "grad_norm": 0.9267532787454216, "learning_rate": 1.9213062436905415e-05, "loss": 0.909, "step": 1713 }, { "epoch": 0.15347249426380882, "grad_norm": 0.9782882450590263, "learning_rate": 1.9211934307690682e-05, "loss": 0.9246, "step": 1714 }, { "epoch": 0.1535620348088869, "grad_norm": 0.9095113387660075, "learning_rate": 1.9210805403598053e-05, "loss": 0.8637, "step": 1715 }, { "epoch": 0.15365157535396498, "grad_norm": 1.1724356192394525, "learning_rate": 1.9209675724722486e-05, "loss": 0.8924, "step": 1716 }, { "epoch": 0.15374111589904305, "grad_norm": 0.9738665955886789, "learning_rate": 1.920854527115901e-05, "loss": 0.9243, "step": 1717 }, { "epoch": 0.1538306564441211, "grad_norm": 1.0246825992957984, "learning_rate": 1.9207414043002718e-05, "loss": 0.8655, "step": 1718 }, { "epoch": 0.15392019698919918, "grad_norm": 1.0482124032108133, "learning_rate": 1.9206282040348757e-05, "loss": 0.9375, "step": 1719 }, { "epoch": 0.15400973753427724, "grad_norm": 1.0704145063796735, "learning_rate": 1.9205149263292352e-05, "loss": 0.9305, "step": 1720 }, { "epoch": 0.1540992780793553, "grad_norm": 1.0371058897114473, "learning_rate": 1.920401571192879e-05, "loss": 0.8676, "step": 1721 }, { "epoch": 0.15418881862443337, "grad_norm": 1.0286335240610645, "learning_rate": 1.9202881386353415e-05, "loss": 0.9735, "step": 1722 }, { "epoch": 0.15427835916951144, "grad_norm": 1.0513961457189702, "learning_rate": 1.920174628666165e-05, "loss": 0.9022, "step": 1723 }, { "epoch": 0.1543678997145895, "grad_norm": 0.9770094298286088, "learning_rate": 1.920061041294897e-05, "loss": 0.8881, "step": 1724 }, { "epoch": 0.1544574402596676, "grad_norm": 0.9990047190345827, "learning_rate": 1.9199473765310928e-05, "loss": 0.9214, "step": 1725 }, { "epoch": 0.15454698080474566, "grad_norm": 0.9384725549469383, "learning_rate": 1.919833634384313e-05, "loss": 0.9048, "step": 1726 }, { "epoch": 0.15463652134982372, "grad_norm": 1.1731562245797693, "learning_rate": 1.9197198148641252e-05, "loss": 0.9057, "step": 1727 }, { "epoch": 0.1547260618949018, "grad_norm": 1.0448253936372807, "learning_rate": 1.9196059179801038e-05, "loss": 0.9146, "step": 1728 }, { "epoch": 0.15481560243997985, "grad_norm": 1.138080988090882, "learning_rate": 1.9194919437418297e-05, "loss": 0.8877, "step": 1729 }, { "epoch": 0.15490514298505792, "grad_norm": 1.0972501186985062, "learning_rate": 1.919377892158889e-05, "loss": 0.9143, "step": 1730 }, { "epoch": 0.15499468353013598, "grad_norm": 1.0017073773753558, "learning_rate": 1.9192637632408765e-05, "loss": 0.895, "step": 1731 }, { "epoch": 0.15508422407521405, "grad_norm": 0.959223867180494, "learning_rate": 1.9191495569973915e-05, "loss": 0.8925, "step": 1732 }, { "epoch": 0.15517376462029212, "grad_norm": 1.0759538408104188, "learning_rate": 1.919035273438041e-05, "loss": 0.8484, "step": 1733 }, { "epoch": 0.1552633051653702, "grad_norm": 0.9958439562387594, "learning_rate": 1.9189209125724383e-05, "loss": 0.9161, "step": 1734 }, { "epoch": 0.15535284571044827, "grad_norm": 1.1272260402792358, "learning_rate": 1.9188064744102027e-05, "loss": 0.9292, "step": 1735 }, { "epoch": 0.15544238625552634, "grad_norm": 0.9798695817442161, "learning_rate": 1.918691958960961e-05, "loss": 0.9102, "step": 1736 }, { "epoch": 0.1555319268006044, "grad_norm": 0.9581948105339737, "learning_rate": 1.918577366234345e-05, "loss": 0.9464, "step": 1737 }, { "epoch": 0.15562146734568247, "grad_norm": 1.014761371939899, "learning_rate": 1.9184626962399946e-05, "loss": 0.8301, "step": 1738 }, { "epoch": 0.15571100789076053, "grad_norm": 1.0417808289977908, "learning_rate": 1.918347948987555e-05, "loss": 0.9079, "step": 1739 }, { "epoch": 0.1558005484358386, "grad_norm": 1.0583141139207035, "learning_rate": 1.918233124486679e-05, "loss": 0.8481, "step": 1740 }, { "epoch": 0.15589008898091666, "grad_norm": 1.0765333402178028, "learning_rate": 1.9181182227470243e-05, "loss": 0.9526, "step": 1741 }, { "epoch": 0.15597962952599473, "grad_norm": 1.0105388316886106, "learning_rate": 1.918003243778257e-05, "loss": 0.878, "step": 1742 }, { "epoch": 0.15606917007107282, "grad_norm": 0.9561199474333818, "learning_rate": 1.917888187590048e-05, "loss": 0.8757, "step": 1743 }, { "epoch": 0.1561587106161509, "grad_norm": 1.0567663791856519, "learning_rate": 1.917773054192076e-05, "loss": 0.9316, "step": 1744 }, { "epoch": 0.15624825116122895, "grad_norm": 1.1955711681783978, "learning_rate": 1.9176578435940253e-05, "loss": 0.9656, "step": 1745 }, { "epoch": 0.15633779170630702, "grad_norm": 1.0858473664831918, "learning_rate": 1.917542555805587e-05, "loss": 0.9149, "step": 1746 }, { "epoch": 0.15642733225138508, "grad_norm": 1.1120073447556666, "learning_rate": 1.917427190836459e-05, "loss": 0.8436, "step": 1747 }, { "epoch": 0.15651687279646315, "grad_norm": 0.9249862821127601, "learning_rate": 1.9173117486963457e-05, "loss": 0.8907, "step": 1748 }, { "epoch": 0.1566064133415412, "grad_norm": 1.03137665728002, "learning_rate": 1.9171962293949572e-05, "loss": 0.8836, "step": 1749 }, { "epoch": 0.15669595388661928, "grad_norm": 0.9754914537185936, "learning_rate": 1.9170806329420105e-05, "loss": 0.9083, "step": 1750 }, { "epoch": 0.15678549443169734, "grad_norm": 1.0829965051577897, "learning_rate": 1.9169649593472297e-05, "loss": 0.8615, "step": 1751 }, { "epoch": 0.15687503497677543, "grad_norm": 1.0353806850141412, "learning_rate": 1.9168492086203444e-05, "loss": 0.8456, "step": 1752 }, { "epoch": 0.1569645755218535, "grad_norm": 0.9325991040514573, "learning_rate": 1.9167333807710915e-05, "loss": 0.9073, "step": 1753 }, { "epoch": 0.15705411606693157, "grad_norm": 0.9753712552157462, "learning_rate": 1.916617475809214e-05, "loss": 0.928, "step": 1754 }, { "epoch": 0.15714365661200963, "grad_norm": 0.9348701748376285, "learning_rate": 1.9165014937444616e-05, "loss": 0.9043, "step": 1755 }, { "epoch": 0.1572331971570877, "grad_norm": 1.0317541602875142, "learning_rate": 1.91638543458659e-05, "loss": 0.9441, "step": 1756 }, { "epoch": 0.15732273770216576, "grad_norm": 0.9895142936683405, "learning_rate": 1.9162692983453617e-05, "loss": 0.9033, "step": 1757 }, { "epoch": 0.15741227824724383, "grad_norm": 0.9642096118727083, "learning_rate": 1.9161530850305464e-05, "loss": 0.9002, "step": 1758 }, { "epoch": 0.1575018187923219, "grad_norm": 1.0232302914839753, "learning_rate": 1.9160367946519186e-05, "loss": 0.9242, "step": 1759 }, { "epoch": 0.15759135933739996, "grad_norm": 1.1157461658286578, "learning_rate": 1.915920427219261e-05, "loss": 0.9536, "step": 1760 }, { "epoch": 0.15768089988247805, "grad_norm": 1.0899057415992095, "learning_rate": 1.9158039827423615e-05, "loss": 0.902, "step": 1761 }, { "epoch": 0.1577704404275561, "grad_norm": 1.0019860379641987, "learning_rate": 1.915687461231015e-05, "loss": 0.8649, "step": 1762 }, { "epoch": 0.15785998097263418, "grad_norm": 1.0592047089991492, "learning_rate": 1.915570862695024e-05, "loss": 0.9045, "step": 1763 }, { "epoch": 0.15794952151771224, "grad_norm": 1.118022235655546, "learning_rate": 1.915454187144195e-05, "loss": 0.8882, "step": 1764 }, { "epoch": 0.1580390620627903, "grad_norm": 1.1065302640531012, "learning_rate": 1.915337434588343e-05, "loss": 0.8864, "step": 1765 }, { "epoch": 0.15812860260786837, "grad_norm": 0.9711734405211765, "learning_rate": 1.9152206050372896e-05, "loss": 0.884, "step": 1766 }, { "epoch": 0.15821814315294644, "grad_norm": 1.0951625441336368, "learning_rate": 1.9151036985008606e-05, "loss": 0.8569, "step": 1767 }, { "epoch": 0.1583076836980245, "grad_norm": 0.9608988518669533, "learning_rate": 1.9149867149888905e-05, "loss": 0.8928, "step": 1768 }, { "epoch": 0.15839722424310257, "grad_norm": 0.9135475004102169, "learning_rate": 1.91486965451122e-05, "loss": 0.8761, "step": 1769 }, { "epoch": 0.15848676478818066, "grad_norm": 0.9720708661546296, "learning_rate": 1.914752517077695e-05, "loss": 0.9435, "step": 1770 }, { "epoch": 0.15857630533325873, "grad_norm": 1.0096825964317873, "learning_rate": 1.9146353026981694e-05, "loss": 0.8708, "step": 1771 }, { "epoch": 0.1586658458783368, "grad_norm": 1.1587247379357897, "learning_rate": 1.914518011382503e-05, "loss": 0.8976, "step": 1772 }, { "epoch": 0.15875538642341486, "grad_norm": 0.9706324214839399, "learning_rate": 1.914400643140561e-05, "loss": 0.8366, "step": 1773 }, { "epoch": 0.15884492696849292, "grad_norm": 0.9899641933223498, "learning_rate": 1.914283197982217e-05, "loss": 0.8812, "step": 1774 }, { "epoch": 0.158934467513571, "grad_norm": 1.0513731266360786, "learning_rate": 1.9141656759173496e-05, "loss": 0.8886, "step": 1775 }, { "epoch": 0.15902400805864905, "grad_norm": 1.2126002470755233, "learning_rate": 1.9140480769558448e-05, "loss": 0.9247, "step": 1776 }, { "epoch": 0.15911354860372712, "grad_norm": 1.2239739020225335, "learning_rate": 1.9139304011075944e-05, "loss": 0.9514, "step": 1777 }, { "epoch": 0.15920308914880518, "grad_norm": 0.9826377993678783, "learning_rate": 1.9138126483824965e-05, "loss": 0.9733, "step": 1778 }, { "epoch": 0.15929262969388328, "grad_norm": 0.9262648217731771, "learning_rate": 1.913694818790457e-05, "loss": 0.8388, "step": 1779 }, { "epoch": 0.15938217023896134, "grad_norm": 1.182564110672693, "learning_rate": 1.9135769123413862e-05, "loss": 0.9113, "step": 1780 }, { "epoch": 0.1594717107840394, "grad_norm": 1.0588149240165425, "learning_rate": 1.913458929045203e-05, "loss": 0.8948, "step": 1781 }, { "epoch": 0.15956125132911747, "grad_norm": 1.0502516062560021, "learning_rate": 1.9133408689118312e-05, "loss": 0.8756, "step": 1782 }, { "epoch": 0.15965079187419554, "grad_norm": 1.0688992063889948, "learning_rate": 1.913222731951202e-05, "loss": 0.918, "step": 1783 }, { "epoch": 0.1597403324192736, "grad_norm": 1.0948674189639584, "learning_rate": 1.9131045181732525e-05, "loss": 0.9795, "step": 1784 }, { "epoch": 0.15982987296435167, "grad_norm": 1.1447905760481059, "learning_rate": 1.9129862275879262e-05, "loss": 0.9086, "step": 1785 }, { "epoch": 0.15991941350942973, "grad_norm": 0.9379086436814146, "learning_rate": 1.912867860205174e-05, "loss": 0.8887, "step": 1786 }, { "epoch": 0.1600089540545078, "grad_norm": 1.0287472385780494, "learning_rate": 1.9127494160349517e-05, "loss": 0.9246, "step": 1787 }, { "epoch": 0.1600984945995859, "grad_norm": 0.8672738727128856, "learning_rate": 1.9126308950872233e-05, "loss": 0.9389, "step": 1788 }, { "epoch": 0.16018803514466395, "grad_norm": 1.166187739527242, "learning_rate": 1.912512297371958e-05, "loss": 0.8844, "step": 1789 }, { "epoch": 0.16027757568974202, "grad_norm": 1.3506284664552248, "learning_rate": 1.9123936228991312e-05, "loss": 0.9404, "step": 1790 }, { "epoch": 0.16036711623482008, "grad_norm": 1.0262532356156928, "learning_rate": 1.9122748716787266e-05, "loss": 0.9149, "step": 1791 }, { "epoch": 0.16045665677989815, "grad_norm": 0.9453563141447522, "learning_rate": 1.912156043720733e-05, "loss": 0.8695, "step": 1792 }, { "epoch": 0.16054619732497621, "grad_norm": 0.9029191295061255, "learning_rate": 1.9120371390351446e-05, "loss": 0.9587, "step": 1793 }, { "epoch": 0.16063573787005428, "grad_norm": 1.0136423533455607, "learning_rate": 1.9119181576319648e-05, "loss": 0.9501, "step": 1794 }, { "epoch": 0.16072527841513234, "grad_norm": 1.0274118793379396, "learning_rate": 1.9117990995212012e-05, "loss": 0.8758, "step": 1795 }, { "epoch": 0.1608148189602104, "grad_norm": 1.024427619480751, "learning_rate": 1.9116799647128683e-05, "loss": 0.9066, "step": 1796 }, { "epoch": 0.1609043595052885, "grad_norm": 0.9686678313654561, "learning_rate": 1.911560753216988e-05, "loss": 0.9292, "step": 1797 }, { "epoch": 0.16099390005036657, "grad_norm": 1.0041898095009703, "learning_rate": 1.9114414650435875e-05, "loss": 0.9043, "step": 1798 }, { "epoch": 0.16108344059544463, "grad_norm": 0.9024253253045971, "learning_rate": 1.9113221002027007e-05, "loss": 0.915, "step": 1799 }, { "epoch": 0.1611729811405227, "grad_norm": 1.1326755850562251, "learning_rate": 1.911202658704369e-05, "loss": 0.915, "step": 1800 }, { "epoch": 0.16126252168560076, "grad_norm": 0.9072375486139627, "learning_rate": 1.9110831405586387e-05, "loss": 0.8906, "step": 1801 }, { "epoch": 0.16135206223067883, "grad_norm": 0.958710214489495, "learning_rate": 1.910963545775564e-05, "loss": 0.8819, "step": 1802 }, { "epoch": 0.1614416027757569, "grad_norm": 0.9624361526271846, "learning_rate": 1.910843874365204e-05, "loss": 0.897, "step": 1803 }, { "epoch": 0.16153114332083496, "grad_norm": 0.9528834239750904, "learning_rate": 1.9107241263376256e-05, "loss": 0.8714, "step": 1804 }, { "epoch": 0.16162068386591302, "grad_norm": 0.9384984165218986, "learning_rate": 1.9106043017029012e-05, "loss": 0.8853, "step": 1805 }, { "epoch": 0.16171022441099112, "grad_norm": 0.9883283282398443, "learning_rate": 1.9104844004711107e-05, "loss": 0.9357, "step": 1806 }, { "epoch": 0.16179976495606918, "grad_norm": 0.9645956168311687, "learning_rate": 1.9103644226523395e-05, "loss": 0.9237, "step": 1807 }, { "epoch": 0.16188930550114725, "grad_norm": 1.0387433363731493, "learning_rate": 1.9102443682566792e-05, "loss": 0.8613, "step": 1808 }, { "epoch": 0.1619788460462253, "grad_norm": 0.9014205844391221, "learning_rate": 1.9101242372942292e-05, "loss": 0.8921, "step": 1809 }, { "epoch": 0.16206838659130338, "grad_norm": 1.029366396147913, "learning_rate": 1.9100040297750942e-05, "loss": 0.9768, "step": 1810 }, { "epoch": 0.16215792713638144, "grad_norm": 0.9672532183859828, "learning_rate": 1.9098837457093858e-05, "loss": 0.8835, "step": 1811 }, { "epoch": 0.1622474676814595, "grad_norm": 0.8894161130914849, "learning_rate": 1.9097633851072212e-05, "loss": 0.8687, "step": 1812 }, { "epoch": 0.16233700822653757, "grad_norm": 1.0813919692184535, "learning_rate": 1.9096429479787256e-05, "loss": 0.8396, "step": 1813 }, { "epoch": 0.16242654877161564, "grad_norm": 1.0204990139340124, "learning_rate": 1.9095224343340298e-05, "loss": 0.9005, "step": 1814 }, { "epoch": 0.1625160893166937, "grad_norm": 1.0300138051134917, "learning_rate": 1.9094018441832704e-05, "loss": 0.9384, "step": 1815 }, { "epoch": 0.1626056298617718, "grad_norm": 0.8883632712759655, "learning_rate": 1.9092811775365914e-05, "loss": 0.8535, "step": 1816 }, { "epoch": 0.16269517040684986, "grad_norm": 0.914692354156227, "learning_rate": 1.9091604344041425e-05, "loss": 0.8711, "step": 1817 }, { "epoch": 0.16278471095192792, "grad_norm": 1.0493945611899447, "learning_rate": 1.9090396147960808e-05, "loss": 0.9119, "step": 1818 }, { "epoch": 0.162874251497006, "grad_norm": 1.000402712180523, "learning_rate": 1.908918718722569e-05, "loss": 0.9041, "step": 1819 }, { "epoch": 0.16296379204208405, "grad_norm": 1.1090771392608443, "learning_rate": 1.9087977461937764e-05, "loss": 0.8712, "step": 1820 }, { "epoch": 0.16305333258716212, "grad_norm": 0.9203066789039771, "learning_rate": 1.908676697219879e-05, "loss": 0.8495, "step": 1821 }, { "epoch": 0.16314287313224018, "grad_norm": 1.0515644618841369, "learning_rate": 1.908555571811059e-05, "loss": 0.8867, "step": 1822 }, { "epoch": 0.16323241367731825, "grad_norm": 1.0059108139988986, "learning_rate": 1.908434369977505e-05, "loss": 0.9524, "step": 1823 }, { "epoch": 0.16332195422239631, "grad_norm": 0.854045888912817, "learning_rate": 1.908313091729412e-05, "loss": 0.8607, "step": 1824 }, { "epoch": 0.1634114947674744, "grad_norm": 0.9739556033902297, "learning_rate": 1.908191737076982e-05, "loss": 0.865, "step": 1825 }, { "epoch": 0.16350103531255247, "grad_norm": 1.0662952962059198, "learning_rate": 1.908070306030422e-05, "loss": 0.9309, "step": 1826 }, { "epoch": 0.16359057585763054, "grad_norm": 1.0493589792741702, "learning_rate": 1.9079487985999473e-05, "loss": 0.9374, "step": 1827 }, { "epoch": 0.1636801164027086, "grad_norm": 0.9216282710286423, "learning_rate": 1.9078272147957784e-05, "loss": 0.8839, "step": 1828 }, { "epoch": 0.16376965694778667, "grad_norm": 1.0301010288935466, "learning_rate": 1.9077055546281425e-05, "loss": 0.9774, "step": 1829 }, { "epoch": 0.16385919749286473, "grad_norm": 1.00434117354553, "learning_rate": 1.9075838181072732e-05, "loss": 0.9358, "step": 1830 }, { "epoch": 0.1639487380379428, "grad_norm": 0.9171998718000218, "learning_rate": 1.9074620052434108e-05, "loss": 0.8609, "step": 1831 }, { "epoch": 0.16403827858302086, "grad_norm": 1.1183297616882255, "learning_rate": 1.9073401160468016e-05, "loss": 0.8675, "step": 1832 }, { "epoch": 0.16412781912809893, "grad_norm": 0.9090755397112024, "learning_rate": 1.9072181505276988e-05, "loss": 0.9263, "step": 1833 }, { "epoch": 0.16421735967317702, "grad_norm": 1.0546326090785478, "learning_rate": 1.907096108696361e-05, "loss": 0.8868, "step": 1834 }, { "epoch": 0.1643069002182551, "grad_norm": 1.0339442292007854, "learning_rate": 1.9069739905630552e-05, "loss": 0.9539, "step": 1835 }, { "epoch": 0.16439644076333315, "grad_norm": 1.0192500250164835, "learning_rate": 1.9068517961380523e-05, "loss": 0.8636, "step": 1836 }, { "epoch": 0.16448598130841122, "grad_norm": 0.9635828736683073, "learning_rate": 1.9067295254316315e-05, "loss": 0.9346, "step": 1837 }, { "epoch": 0.16457552185348928, "grad_norm": 1.0662192135547275, "learning_rate": 1.9066071784540782e-05, "loss": 0.8873, "step": 1838 }, { "epoch": 0.16466506239856735, "grad_norm": 0.9329442247300944, "learning_rate": 1.9064847552156834e-05, "loss": 0.9159, "step": 1839 }, { "epoch": 0.1647546029436454, "grad_norm": 0.9122349557623348, "learning_rate": 1.9063622557267443e-05, "loss": 0.8913, "step": 1840 }, { "epoch": 0.16484414348872348, "grad_norm": 1.0200133268687577, "learning_rate": 1.9062396799975667e-05, "loss": 0.8579, "step": 1841 }, { "epoch": 0.16493368403380154, "grad_norm": 1.0098398098752235, "learning_rate": 1.9061170280384596e-05, "loss": 0.9314, "step": 1842 }, { "epoch": 0.16502322457887963, "grad_norm": 0.9749561796647002, "learning_rate": 1.9059942998597413e-05, "loss": 0.8652, "step": 1843 }, { "epoch": 0.1651127651239577, "grad_norm": 0.9331007820092048, "learning_rate": 1.9058714954717346e-05, "loss": 0.8722, "step": 1844 }, { "epoch": 0.16520230566903576, "grad_norm": 1.113777846520895, "learning_rate": 1.90574861488477e-05, "loss": 0.8882, "step": 1845 }, { "epoch": 0.16529184621411383, "grad_norm": 1.184097028983729, "learning_rate": 1.9056256581091834e-05, "loss": 0.8463, "step": 1846 }, { "epoch": 0.1653813867591919, "grad_norm": 1.0008001927521994, "learning_rate": 1.9055026251553174e-05, "loss": 0.9151, "step": 1847 }, { "epoch": 0.16547092730426996, "grad_norm": 0.927853458445148, "learning_rate": 1.9053795160335216e-05, "loss": 0.8496, "step": 1848 }, { "epoch": 0.16556046784934803, "grad_norm": 1.0154484889116662, "learning_rate": 1.9052563307541512e-05, "loss": 0.8845, "step": 1849 }, { "epoch": 0.1656500083944261, "grad_norm": 1.058028333104348, "learning_rate": 1.905133069327568e-05, "loss": 0.9207, "step": 1850 }, { "epoch": 0.16573954893950416, "grad_norm": 0.976645577135433, "learning_rate": 1.905009731764141e-05, "loss": 0.8764, "step": 1851 }, { "epoch": 0.16582908948458225, "grad_norm": 1.0565341202946095, "learning_rate": 1.904886318074244e-05, "loss": 0.8988, "step": 1852 }, { "epoch": 0.1659186300296603, "grad_norm": 1.3198924133718994, "learning_rate": 1.904762828268259e-05, "loss": 0.9048, "step": 1853 }, { "epoch": 0.16600817057473838, "grad_norm": 0.9766860267319637, "learning_rate": 1.904639262356573e-05, "loss": 0.854, "step": 1854 }, { "epoch": 0.16609771111981644, "grad_norm": 0.928804940651181, "learning_rate": 1.9045156203495808e-05, "loss": 0.8896, "step": 1855 }, { "epoch": 0.1661872516648945, "grad_norm": 1.0057200060113751, "learning_rate": 1.9043919022576817e-05, "loss": 0.9238, "step": 1856 }, { "epoch": 0.16627679220997257, "grad_norm": 0.9129435286379892, "learning_rate": 1.9042681080912827e-05, "loss": 0.8703, "step": 1857 }, { "epoch": 0.16636633275505064, "grad_norm": 0.9658177919701865, "learning_rate": 1.9041442378607975e-05, "loss": 0.9285, "step": 1858 }, { "epoch": 0.1664558733001287, "grad_norm": 1.1002497839677383, "learning_rate": 1.9040202915766452e-05, "loss": 0.8653, "step": 1859 }, { "epoch": 0.16654541384520677, "grad_norm": 0.9560338476802797, "learning_rate": 1.9038962692492522e-05, "loss": 0.9056, "step": 1860 }, { "epoch": 0.16663495439028486, "grad_norm": 0.911403953562522, "learning_rate": 1.9037721708890503e-05, "loss": 0.8939, "step": 1861 }, { "epoch": 0.16672449493536293, "grad_norm": 1.0665333525466567, "learning_rate": 1.903647996506479e-05, "loss": 0.891, "step": 1862 }, { "epoch": 0.166814035480441, "grad_norm": 0.9321967811423212, "learning_rate": 1.9035237461119822e-05, "loss": 0.8011, "step": 1863 }, { "epoch": 0.16690357602551906, "grad_norm": 0.9997822119708683, "learning_rate": 1.9033994197160127e-05, "loss": 0.9351, "step": 1864 }, { "epoch": 0.16699311657059712, "grad_norm": 1.1031609487340701, "learning_rate": 1.9032750173290274e-05, "loss": 0.9047, "step": 1865 }, { "epoch": 0.1670826571156752, "grad_norm": 1.0442359335015263, "learning_rate": 1.9031505389614918e-05, "loss": 0.9414, "step": 1866 }, { "epoch": 0.16717219766075325, "grad_norm": 1.0181507344969802, "learning_rate": 1.9030259846238753e-05, "loss": 0.902, "step": 1867 }, { "epoch": 0.16726173820583132, "grad_norm": 0.9440914233838309, "learning_rate": 1.9029013543266562e-05, "loss": 0.9386, "step": 1868 }, { "epoch": 0.16735127875090938, "grad_norm": 1.046613697917735, "learning_rate": 1.9027766480803173e-05, "loss": 0.953, "step": 1869 }, { "epoch": 0.16744081929598748, "grad_norm": 1.092403235632166, "learning_rate": 1.9026518658953487e-05, "loss": 0.8567, "step": 1870 }, { "epoch": 0.16753035984106554, "grad_norm": 0.9469343959822131, "learning_rate": 1.9025270077822467e-05, "loss": 0.8834, "step": 1871 }, { "epoch": 0.1676199003861436, "grad_norm": 0.9201625504887306, "learning_rate": 1.9024020737515135e-05, "loss": 0.9375, "step": 1872 }, { "epoch": 0.16770944093122167, "grad_norm": 0.9710181610723885, "learning_rate": 1.902277063813659e-05, "loss": 0.9332, "step": 1873 }, { "epoch": 0.16779898147629974, "grad_norm": 1.0348670443098078, "learning_rate": 1.9021519779791978e-05, "loss": 0.8799, "step": 1874 }, { "epoch": 0.1678885220213778, "grad_norm": 1.0588430587991342, "learning_rate": 1.902026816258652e-05, "loss": 0.873, "step": 1875 }, { "epoch": 0.16797806256645587, "grad_norm": 0.9417594834625934, "learning_rate": 1.90190157866255e-05, "loss": 0.8742, "step": 1876 }, { "epoch": 0.16806760311153393, "grad_norm": 0.8651762928670946, "learning_rate": 1.9017762652014262e-05, "loss": 0.8416, "step": 1877 }, { "epoch": 0.168157143656612, "grad_norm": 1.0421629194921471, "learning_rate": 1.901650875885822e-05, "loss": 0.8654, "step": 1878 }, { "epoch": 0.1682466842016901, "grad_norm": 0.8973432707429861, "learning_rate": 1.9015254107262836e-05, "loss": 0.882, "step": 1879 }, { "epoch": 0.16833622474676815, "grad_norm": 1.0250087707742168, "learning_rate": 1.901399869733366e-05, "loss": 0.9123, "step": 1880 }, { "epoch": 0.16842576529184622, "grad_norm": 0.9608320290386494, "learning_rate": 1.901274252917629e-05, "loss": 0.9088, "step": 1881 }, { "epoch": 0.16851530583692428, "grad_norm": 5.517191370791696, "learning_rate": 1.901148560289638e-05, "loss": 0.8773, "step": 1882 }, { "epoch": 0.16860484638200235, "grad_norm": 0.9464700512641965, "learning_rate": 1.901022791859967e-05, "loss": 0.8622, "step": 1883 }, { "epoch": 0.16869438692708041, "grad_norm": 1.0715224470045512, "learning_rate": 1.9008969476391952e-05, "loss": 0.8893, "step": 1884 }, { "epoch": 0.16878392747215848, "grad_norm": 0.8770921005961276, "learning_rate": 1.9007710276379077e-05, "loss": 0.879, "step": 1885 }, { "epoch": 0.16887346801723654, "grad_norm": 0.9446411445367439, "learning_rate": 1.9006450318666966e-05, "loss": 0.8671, "step": 1886 }, { "epoch": 0.1689630085623146, "grad_norm": 0.877136694507198, "learning_rate": 1.9005189603361605e-05, "loss": 0.9676, "step": 1887 }, { "epoch": 0.1690525491073927, "grad_norm": 0.9341515732258432, "learning_rate": 1.900392813056904e-05, "loss": 0.8887, "step": 1888 }, { "epoch": 0.16914208965247077, "grad_norm": 0.9627749431945741, "learning_rate": 1.900266590039538e-05, "loss": 0.872, "step": 1889 }, { "epoch": 0.16923163019754883, "grad_norm": 1.0706570342788464, "learning_rate": 1.9001402912946804e-05, "loss": 0.9331, "step": 1890 }, { "epoch": 0.1693211707426269, "grad_norm": 1.076337435271884, "learning_rate": 1.9000139168329548e-05, "loss": 0.8734, "step": 1891 }, { "epoch": 0.16941071128770496, "grad_norm": 0.9481358249743946, "learning_rate": 1.8998874666649913e-05, "loss": 0.9072, "step": 1892 }, { "epoch": 0.16950025183278303, "grad_norm": 1.236713736962692, "learning_rate": 1.8997609408014263e-05, "loss": 0.9773, "step": 1893 }, { "epoch": 0.1695897923778611, "grad_norm": 1.0123854532286096, "learning_rate": 1.8996343392529034e-05, "loss": 0.9141, "step": 1894 }, { "epoch": 0.16967933292293916, "grad_norm": 1.0006360089868338, "learning_rate": 1.8995076620300714e-05, "loss": 0.8859, "step": 1895 }, { "epoch": 0.16976887346801722, "grad_norm": 0.9121666268264655, "learning_rate": 1.899380909143586e-05, "loss": 0.8843, "step": 1896 }, { "epoch": 0.16985841401309532, "grad_norm": 0.9341349126299177, "learning_rate": 1.8992540806041097e-05, "loss": 0.932, "step": 1897 }, { "epoch": 0.16994795455817338, "grad_norm": 0.9769853749130047, "learning_rate": 1.89912717642231e-05, "loss": 0.9209, "step": 1898 }, { "epoch": 0.17003749510325145, "grad_norm": 1.0421573682821756, "learning_rate": 1.8990001966088628e-05, "loss": 0.928, "step": 1899 }, { "epoch": 0.1701270356483295, "grad_norm": 0.9105995475918409, "learning_rate": 1.8988731411744482e-05, "loss": 0.9085, "step": 1900 }, { "epoch": 0.17021657619340758, "grad_norm": 1.0099341283769252, "learning_rate": 1.8987460101297542e-05, "loss": 0.8693, "step": 1901 }, { "epoch": 0.17030611673848564, "grad_norm": 0.9516414015811238, "learning_rate": 1.8986188034854744e-05, "loss": 0.8568, "step": 1902 }, { "epoch": 0.1703956572835637, "grad_norm": 0.9041235781042996, "learning_rate": 1.8984915212523093e-05, "loss": 0.8967, "step": 1903 }, { "epoch": 0.17048519782864177, "grad_norm": 0.9248095843562465, "learning_rate": 1.8983641634409657e-05, "loss": 0.8873, "step": 1904 }, { "epoch": 0.17057473837371984, "grad_norm": 1.010821377772331, "learning_rate": 1.898236730062156e-05, "loss": 0.8491, "step": 1905 }, { "epoch": 0.17066427891879793, "grad_norm": 0.9792816403181784, "learning_rate": 1.8981092211265994e-05, "loss": 0.8625, "step": 1906 }, { "epoch": 0.170753819463876, "grad_norm": 0.8933261309732216, "learning_rate": 1.897981636645022e-05, "loss": 0.9004, "step": 1907 }, { "epoch": 0.17084336000895406, "grad_norm": 1.0312355734756795, "learning_rate": 1.8978539766281557e-05, "loss": 0.8761, "step": 1908 }, { "epoch": 0.17093290055403212, "grad_norm": 0.8857776422305738, "learning_rate": 1.8977262410867383e-05, "loss": 0.9252, "step": 1909 }, { "epoch": 0.1710224410991102, "grad_norm": 0.9220160744778109, "learning_rate": 1.8975984300315154e-05, "loss": 0.8592, "step": 1910 }, { "epoch": 0.17111198164418825, "grad_norm": 1.0660569245809242, "learning_rate": 1.8974705434732376e-05, "loss": 0.8756, "step": 1911 }, { "epoch": 0.17120152218926632, "grad_norm": 1.2439466997590156, "learning_rate": 1.8973425814226618e-05, "loss": 0.8799, "step": 1912 }, { "epoch": 0.17129106273434438, "grad_norm": 0.9680548658929549, "learning_rate": 1.897214543890552e-05, "loss": 0.8713, "step": 1913 }, { "epoch": 0.17138060327942245, "grad_norm": 0.9218203896462943, "learning_rate": 1.897086430887679e-05, "loss": 0.8642, "step": 1914 }, { "epoch": 0.17147014382450054, "grad_norm": 0.8664864799190866, "learning_rate": 1.896958242424819e-05, "loss": 0.8662, "step": 1915 }, { "epoch": 0.1715596843695786, "grad_norm": 1.0732704995522417, "learning_rate": 1.8968299785127544e-05, "loss": 0.9084, "step": 1916 }, { "epoch": 0.17164922491465667, "grad_norm": 1.0888010343701422, "learning_rate": 1.8967016391622746e-05, "loss": 0.8602, "step": 1917 }, { "epoch": 0.17173876545973474, "grad_norm": 0.9788651414988516, "learning_rate": 1.896573224384175e-05, "loss": 0.9244, "step": 1918 }, { "epoch": 0.1718283060048128, "grad_norm": 0.8998033529415681, "learning_rate": 1.896444734189257e-05, "loss": 0.9174, "step": 1919 }, { "epoch": 0.17191784654989087, "grad_norm": 0.98450095096618, "learning_rate": 1.8963161685883294e-05, "loss": 0.8778, "step": 1920 }, { "epoch": 0.17200738709496893, "grad_norm": 1.0300079714532986, "learning_rate": 1.8961875275922067e-05, "loss": 0.8764, "step": 1921 }, { "epoch": 0.172096927640047, "grad_norm": 1.025865030401448, "learning_rate": 1.8960588112117096e-05, "loss": 0.8795, "step": 1922 }, { "epoch": 0.17218646818512506, "grad_norm": 0.9176159898764925, "learning_rate": 1.8959300194576654e-05, "loss": 0.8309, "step": 1923 }, { "epoch": 0.17227600873020316, "grad_norm": 0.9357280883796574, "learning_rate": 1.895801152340907e-05, "loss": 0.8989, "step": 1924 }, { "epoch": 0.17236554927528122, "grad_norm": 0.9164941714889159, "learning_rate": 1.895672209872275e-05, "loss": 0.8966, "step": 1925 }, { "epoch": 0.1724550898203593, "grad_norm": 1.0536675004756033, "learning_rate": 1.8955431920626158e-05, "loss": 0.8675, "step": 1926 }, { "epoch": 0.17254463036543735, "grad_norm": 0.9925928295973078, "learning_rate": 1.8954140989227815e-05, "loss": 0.8649, "step": 1927 }, { "epoch": 0.17263417091051542, "grad_norm": 1.0723251544668253, "learning_rate": 1.895284930463631e-05, "loss": 0.9037, "step": 1928 }, { "epoch": 0.17272371145559348, "grad_norm": 0.9886715009610579, "learning_rate": 1.8951556866960295e-05, "loss": 0.935, "step": 1929 }, { "epoch": 0.17281325200067155, "grad_norm": 1.1528987428472375, "learning_rate": 1.8950263676308486e-05, "loss": 0.9257, "step": 1930 }, { "epoch": 0.1729027925457496, "grad_norm": 1.1746387736076054, "learning_rate": 1.8948969732789666e-05, "loss": 0.8688, "step": 1931 }, { "epoch": 0.17299233309082768, "grad_norm": 0.959339659138055, "learning_rate": 1.8947675036512673e-05, "loss": 0.9206, "step": 1932 }, { "epoch": 0.17308187363590577, "grad_norm": 0.9474846603977088, "learning_rate": 1.894637958758641e-05, "loss": 0.9444, "step": 1933 }, { "epoch": 0.17317141418098383, "grad_norm": 0.9375627615846007, "learning_rate": 1.8945083386119853e-05, "loss": 0.9152, "step": 1934 }, { "epoch": 0.1732609547260619, "grad_norm": 0.9296436663776422, "learning_rate": 1.8943786432222032e-05, "loss": 0.8914, "step": 1935 }, { "epoch": 0.17335049527113996, "grad_norm": 1.2362422162629454, "learning_rate": 1.894248872600204e-05, "loss": 0.8218, "step": 1936 }, { "epoch": 0.17344003581621803, "grad_norm": 1.0040378164547326, "learning_rate": 1.8941190267569038e-05, "loss": 0.8677, "step": 1937 }, { "epoch": 0.1735295763612961, "grad_norm": 1.1729720800843382, "learning_rate": 1.893989105703225e-05, "loss": 0.8739, "step": 1938 }, { "epoch": 0.17361911690637416, "grad_norm": 0.9567295614841844, "learning_rate": 1.8938591094500953e-05, "loss": 0.8262, "step": 1939 }, { "epoch": 0.17370865745145223, "grad_norm": 0.8783872769783507, "learning_rate": 1.8937290380084502e-05, "loss": 0.9122, "step": 1940 }, { "epoch": 0.1737981979965303, "grad_norm": 0.9887254332103576, "learning_rate": 1.8935988913892314e-05, "loss": 0.8431, "step": 1941 }, { "epoch": 0.17388773854160838, "grad_norm": 0.946956199177709, "learning_rate": 1.8934686696033853e-05, "loss": 0.93, "step": 1942 }, { "epoch": 0.17397727908668645, "grad_norm": 0.9526948957954328, "learning_rate": 1.8933383726618663e-05, "loss": 0.9021, "step": 1943 }, { "epoch": 0.1740668196317645, "grad_norm": 0.9566142561413987, "learning_rate": 1.8932080005756348e-05, "loss": 0.8864, "step": 1944 }, { "epoch": 0.17415636017684258, "grad_norm": 1.052273589932486, "learning_rate": 1.893077553355657e-05, "loss": 0.8982, "step": 1945 }, { "epoch": 0.17424590072192064, "grad_norm": 1.1067352925842118, "learning_rate": 1.8929470310129052e-05, "loss": 0.8844, "step": 1946 }, { "epoch": 0.1743354412669987, "grad_norm": 1.2879206912230712, "learning_rate": 1.8928164335583596e-05, "loss": 0.9149, "step": 1947 }, { "epoch": 0.17442498181207677, "grad_norm": 0.9687815595795757, "learning_rate": 1.8926857610030044e-05, "loss": 0.8803, "step": 1948 }, { "epoch": 0.17451452235715484, "grad_norm": 1.0532763762915853, "learning_rate": 1.8925550133578326e-05, "loss": 0.93, "step": 1949 }, { "epoch": 0.1746040629022329, "grad_norm": 0.9989442239771669, "learning_rate": 1.8924241906338413e-05, "loss": 0.8685, "step": 1950 }, { "epoch": 0.174693603447311, "grad_norm": 1.0186924745351928, "learning_rate": 1.8922932928420354e-05, "loss": 0.8627, "step": 1951 }, { "epoch": 0.17478314399238906, "grad_norm": 0.9746583948679277, "learning_rate": 1.8921623199934255e-05, "loss": 0.8504, "step": 1952 }, { "epoch": 0.17487268453746713, "grad_norm": 1.0184352792648206, "learning_rate": 1.8920312720990283e-05, "loss": 0.889, "step": 1953 }, { "epoch": 0.1749622250825452, "grad_norm": 1.1372382777340262, "learning_rate": 1.8919001491698674e-05, "loss": 0.8855, "step": 1954 }, { "epoch": 0.17505176562762326, "grad_norm": 1.0043620641668143, "learning_rate": 1.8917689512169724e-05, "loss": 0.875, "step": 1955 }, { "epoch": 0.17514130617270132, "grad_norm": 0.8774619443162723, "learning_rate": 1.8916376782513792e-05, "loss": 0.8643, "step": 1956 }, { "epoch": 0.1752308467177794, "grad_norm": 0.9234019514091268, "learning_rate": 1.8915063302841302e-05, "loss": 0.9038, "step": 1957 }, { "epoch": 0.17532038726285745, "grad_norm": 1.02240273572741, "learning_rate": 1.8913749073262738e-05, "loss": 0.9436, "step": 1958 }, { "epoch": 0.17540992780793552, "grad_norm": 0.9910022333390955, "learning_rate": 1.891243409388865e-05, "loss": 0.933, "step": 1959 }, { "epoch": 0.1754994683530136, "grad_norm": 1.2008713326800773, "learning_rate": 1.891111836482965e-05, "loss": 0.9051, "step": 1960 }, { "epoch": 0.17558900889809168, "grad_norm": 1.0097053014828616, "learning_rate": 1.890980188619641e-05, "loss": 0.838, "step": 1961 }, { "epoch": 0.17567854944316974, "grad_norm": 0.8941981883511267, "learning_rate": 1.890848465809967e-05, "loss": 0.8881, "step": 1962 }, { "epoch": 0.1757680899882478, "grad_norm": 0.9372976042199797, "learning_rate": 1.890716668065023e-05, "loss": 0.8245, "step": 1963 }, { "epoch": 0.17585763053332587, "grad_norm": 1.1984549365445827, "learning_rate": 1.8905847953958954e-05, "loss": 0.9745, "step": 1964 }, { "epoch": 0.17594717107840394, "grad_norm": 0.9615371902591314, "learning_rate": 1.890452847813677e-05, "loss": 0.9076, "step": 1965 }, { "epoch": 0.176036711623482, "grad_norm": 1.0179971658018399, "learning_rate": 1.8903208253294667e-05, "loss": 0.9141, "step": 1966 }, { "epoch": 0.17612625216856007, "grad_norm": 1.1467486860283234, "learning_rate": 1.89018872795437e-05, "loss": 0.8949, "step": 1967 }, { "epoch": 0.17621579271363813, "grad_norm": 0.8495222638424595, "learning_rate": 1.8900565556994986e-05, "loss": 0.8654, "step": 1968 }, { "epoch": 0.17630533325871622, "grad_norm": 1.0674984878548497, "learning_rate": 1.88992430857597e-05, "loss": 0.8856, "step": 1969 }, { "epoch": 0.1763948738037943, "grad_norm": 1.0298993416395037, "learning_rate": 1.8897919865949083e-05, "loss": 0.8239, "step": 1970 }, { "epoch": 0.17648441434887235, "grad_norm": 0.8821067762766143, "learning_rate": 1.8896595897674446e-05, "loss": 0.9071, "step": 1971 }, { "epoch": 0.17657395489395042, "grad_norm": 0.9679190447891308, "learning_rate": 1.8895271181047152e-05, "loss": 0.9084, "step": 1972 }, { "epoch": 0.17666349543902848, "grad_norm": 0.9875707276233355, "learning_rate": 1.889394571617863e-05, "loss": 0.8196, "step": 1973 }, { "epoch": 0.17675303598410655, "grad_norm": 0.9531678573190864, "learning_rate": 1.889261950318038e-05, "loss": 0.8799, "step": 1974 }, { "epoch": 0.17684257652918461, "grad_norm": 0.9211076513396957, "learning_rate": 1.8891292542163958e-05, "loss": 0.8719, "step": 1975 }, { "epoch": 0.17693211707426268, "grad_norm": 1.0427539320226746, "learning_rate": 1.8889964833240983e-05, "loss": 0.8605, "step": 1976 }, { "epoch": 0.17702165761934074, "grad_norm": 0.975239870947061, "learning_rate": 1.8888636376523132e-05, "loss": 0.8577, "step": 1977 }, { "epoch": 0.17711119816441884, "grad_norm": 1.1353075959896342, "learning_rate": 1.8887307172122154e-05, "loss": 0.9429, "step": 1978 }, { "epoch": 0.1772007387094969, "grad_norm": 0.9586216357570077, "learning_rate": 1.888597722014986e-05, "loss": 0.934, "step": 1979 }, { "epoch": 0.17729027925457497, "grad_norm": 0.9955488744328173, "learning_rate": 1.8884646520718117e-05, "loss": 0.9025, "step": 1980 }, { "epoch": 0.17737981979965303, "grad_norm": 0.9295597559787909, "learning_rate": 1.888331507393886e-05, "loss": 0.9435, "step": 1981 }, { "epoch": 0.1774693603447311, "grad_norm": 1.080657280996886, "learning_rate": 1.888198287992409e-05, "loss": 0.877, "step": 1982 }, { "epoch": 0.17755890088980916, "grad_norm": 0.9610678715193095, "learning_rate": 1.888064993878586e-05, "loss": 0.8843, "step": 1983 }, { "epoch": 0.17764844143488723, "grad_norm": 0.9993026679773763, "learning_rate": 1.8879316250636305e-05, "loss": 0.899, "step": 1984 }, { "epoch": 0.1777379819799653, "grad_norm": 1.044155063318996, "learning_rate": 1.8877981815587594e-05, "loss": 0.8823, "step": 1985 }, { "epoch": 0.17782752252504336, "grad_norm": 0.9108182858953897, "learning_rate": 1.8876646633751986e-05, "loss": 0.8294, "step": 1986 }, { "epoch": 0.17791706307012145, "grad_norm": 0.9715817475699078, "learning_rate": 1.8875310705241793e-05, "loss": 0.8981, "step": 1987 }, { "epoch": 0.17800660361519952, "grad_norm": 0.9917234832365002, "learning_rate": 1.887397403016938e-05, "loss": 0.8882, "step": 1988 }, { "epoch": 0.17809614416027758, "grad_norm": 1.1369931466621894, "learning_rate": 1.887263660864719e-05, "loss": 0.9686, "step": 1989 }, { "epoch": 0.17818568470535565, "grad_norm": 0.9635371142542071, "learning_rate": 1.8871298440787724e-05, "loss": 0.9622, "step": 1990 }, { "epoch": 0.1782752252504337, "grad_norm": 0.9791321740034132, "learning_rate": 1.886995952670354e-05, "loss": 0.9325, "step": 1991 }, { "epoch": 0.17836476579551178, "grad_norm": 0.9597079835037503, "learning_rate": 1.8868619866507268e-05, "loss": 0.8883, "step": 1992 }, { "epoch": 0.17845430634058984, "grad_norm": 1.0297552557669538, "learning_rate": 1.886727946031159e-05, "loss": 0.8132, "step": 1993 }, { "epoch": 0.1785438468856679, "grad_norm": 0.9762845924503495, "learning_rate": 1.886593830822926e-05, "loss": 0.8892, "step": 1994 }, { "epoch": 0.17863338743074597, "grad_norm": 1.131954070432886, "learning_rate": 1.8864596410373092e-05, "loss": 0.8654, "step": 1995 }, { "epoch": 0.17872292797582406, "grad_norm": 1.0182147426726, "learning_rate": 1.8863253766855964e-05, "loss": 0.888, "step": 1996 }, { "epoch": 0.17881246852090213, "grad_norm": 1.004129999434742, "learning_rate": 1.8861910377790807e-05, "loss": 0.8764, "step": 1997 }, { "epoch": 0.1789020090659802, "grad_norm": 0.9796451968213635, "learning_rate": 1.886056624329063e-05, "loss": 0.8406, "step": 1998 }, { "epoch": 0.17899154961105826, "grad_norm": 1.0282317316256078, "learning_rate": 1.8859221363468493e-05, "loss": 0.8934, "step": 1999 }, { "epoch": 0.17908109015613632, "grad_norm": 0.942954338174738, "learning_rate": 1.8857875738437526e-05, "loss": 0.899, "step": 2000 }, { "epoch": 0.1791706307012144, "grad_norm": 1.0203033012252298, "learning_rate": 1.8856529368310916e-05, "loss": 0.8698, "step": 2001 }, { "epoch": 0.17926017124629245, "grad_norm": 0.9989036434597025, "learning_rate": 1.885518225320192e-05, "loss": 0.8436, "step": 2002 }, { "epoch": 0.17934971179137052, "grad_norm": 1.0599084585580012, "learning_rate": 1.8853834393223843e-05, "loss": 0.8663, "step": 2003 }, { "epoch": 0.17943925233644858, "grad_norm": 1.037842671893135, "learning_rate": 1.885248578849007e-05, "loss": 0.912, "step": 2004 }, { "epoch": 0.17952879288152668, "grad_norm": 1.0863653806701572, "learning_rate": 1.8851136439114045e-05, "loss": 0.9749, "step": 2005 }, { "epoch": 0.17961833342660474, "grad_norm": 0.9655098037867771, "learning_rate": 1.884978634520926e-05, "loss": 0.9255, "step": 2006 }, { "epoch": 0.1797078739716828, "grad_norm": 1.2446128238837677, "learning_rate": 1.884843550688929e-05, "loss": 0.9274, "step": 2007 }, { "epoch": 0.17979741451676087, "grad_norm": 6.753738439110338, "learning_rate": 1.884708392426776e-05, "loss": 0.9271, "step": 2008 }, { "epoch": 0.17988695506183894, "grad_norm": 1.0231375711950297, "learning_rate": 1.884573159745836e-05, "loss": 0.8739, "step": 2009 }, { "epoch": 0.179976495606917, "grad_norm": 0.9793432448990642, "learning_rate": 1.884437852657484e-05, "loss": 0.8788, "step": 2010 }, { "epoch": 0.18006603615199507, "grad_norm": 1.200031794811684, "learning_rate": 1.8843024711731023e-05, "loss": 0.8713, "step": 2011 }, { "epoch": 0.18015557669707313, "grad_norm": 1.1199835683931725, "learning_rate": 1.8841670153040785e-05, "loss": 0.9151, "step": 2012 }, { "epoch": 0.1802451172421512, "grad_norm": 1.053122099573849, "learning_rate": 1.8840314850618063e-05, "loss": 0.8443, "step": 2013 }, { "epoch": 0.1803346577872293, "grad_norm": 0.9759706156010873, "learning_rate": 1.8838958804576866e-05, "loss": 0.8576, "step": 2014 }, { "epoch": 0.18042419833230736, "grad_norm": 0.9065741181637583, "learning_rate": 1.8837602015031256e-05, "loss": 0.8665, "step": 2015 }, { "epoch": 0.18051373887738542, "grad_norm": 0.9680444180198995, "learning_rate": 1.8836244482095366e-05, "loss": 0.9128, "step": 2016 }, { "epoch": 0.1806032794224635, "grad_norm": 0.9433064676561327, "learning_rate": 1.8834886205883386e-05, "loss": 0.8972, "step": 2017 }, { "epoch": 0.18069281996754155, "grad_norm": 0.950258197124031, "learning_rate": 1.8833527186509566e-05, "loss": 0.9339, "step": 2018 }, { "epoch": 0.18078236051261962, "grad_norm": 0.935048411436469, "learning_rate": 1.8832167424088226e-05, "loss": 0.8173, "step": 2019 }, { "epoch": 0.18087190105769768, "grad_norm": 1.0262057360883636, "learning_rate": 1.8830806918733743e-05, "loss": 0.8968, "step": 2020 }, { "epoch": 0.18096144160277575, "grad_norm": 0.9350930834619838, "learning_rate": 1.8829445670560557e-05, "loss": 0.8876, "step": 2021 }, { "epoch": 0.1810509821478538, "grad_norm": 1.0589787654195535, "learning_rate": 1.8828083679683174e-05, "loss": 0.8945, "step": 2022 }, { "epoch": 0.1811405226929319, "grad_norm": 0.8714957365065938, "learning_rate": 1.8826720946216164e-05, "loss": 0.8986, "step": 2023 }, { "epoch": 0.18123006323800997, "grad_norm": 0.966588582425351, "learning_rate": 1.882535747027415e-05, "loss": 0.8878, "step": 2024 }, { "epoch": 0.18131960378308803, "grad_norm": 1.0022062829519047, "learning_rate": 1.8823993251971823e-05, "loss": 0.8703, "step": 2025 }, { "epoch": 0.1814091443281661, "grad_norm": 0.8364996371098609, "learning_rate": 1.882262829142394e-05, "loss": 0.8267, "step": 2026 }, { "epoch": 0.18149868487324416, "grad_norm": 0.9858773258946832, "learning_rate": 1.882126258874532e-05, "loss": 0.9155, "step": 2027 }, { "epoch": 0.18158822541832223, "grad_norm": 1.1291844532575677, "learning_rate": 1.881989614405083e-05, "loss": 0.9264, "step": 2028 }, { "epoch": 0.1816777659634003, "grad_norm": 0.9480222985302227, "learning_rate": 1.8818528957455418e-05, "loss": 0.8741, "step": 2029 }, { "epoch": 0.18176730650847836, "grad_norm": 1.0501255791222808, "learning_rate": 1.881716102907409e-05, "loss": 0.8373, "step": 2030 }, { "epoch": 0.18185684705355643, "grad_norm": 1.0869242750927197, "learning_rate": 1.8815792359021906e-05, "loss": 0.9266, "step": 2031 }, { "epoch": 0.18194638759863452, "grad_norm": 0.970630873189667, "learning_rate": 1.8814422947414e-05, "loss": 0.8465, "step": 2032 }, { "epoch": 0.18203592814371258, "grad_norm": 0.9999269025769625, "learning_rate": 1.8813052794365557e-05, "loss": 0.9291, "step": 2033 }, { "epoch": 0.18212546868879065, "grad_norm": 1.0064082224227495, "learning_rate": 1.8811681899991835e-05, "loss": 0.8799, "step": 2034 }, { "epoch": 0.1822150092338687, "grad_norm": 0.94399842577035, "learning_rate": 1.8810310264408144e-05, "loss": 0.8844, "step": 2035 }, { "epoch": 0.18230454977894678, "grad_norm": 1.1004251782488281, "learning_rate": 1.880893788772986e-05, "loss": 0.9008, "step": 2036 }, { "epoch": 0.18239409032402484, "grad_norm": 1.069211403046441, "learning_rate": 1.880756477007243e-05, "loss": 0.9234, "step": 2037 }, { "epoch": 0.1824836308691029, "grad_norm": 0.9273609457683307, "learning_rate": 1.8806190911551354e-05, "loss": 0.7874, "step": 2038 }, { "epoch": 0.18257317141418097, "grad_norm": 0.9926060265694593, "learning_rate": 1.8804816312282196e-05, "loss": 0.8345, "step": 2039 }, { "epoch": 0.18266271195925904, "grad_norm": 0.9889033596799334, "learning_rate": 1.880344097238058e-05, "loss": 0.9098, "step": 2040 }, { "epoch": 0.18275225250433713, "grad_norm": 0.8992970800221503, "learning_rate": 1.8802064891962196e-05, "loss": 0.8625, "step": 2041 }, { "epoch": 0.1828417930494152, "grad_norm": 1.1380749344208796, "learning_rate": 1.88006880711428e-05, "loss": 0.8794, "step": 2042 }, { "epoch": 0.18293133359449326, "grad_norm": 1.1643531894166756, "learning_rate": 1.87993105100382e-05, "loss": 0.8687, "step": 2043 }, { "epoch": 0.18302087413957133, "grad_norm": 1.0880932481635293, "learning_rate": 1.8797932208764276e-05, "loss": 0.9529, "step": 2044 }, { "epoch": 0.1831104146846494, "grad_norm": 1.1193247354078457, "learning_rate": 1.8796553167436964e-05, "loss": 0.9103, "step": 2045 }, { "epoch": 0.18319995522972746, "grad_norm": 1.0207537626468697, "learning_rate": 1.8795173386172263e-05, "loss": 0.894, "step": 2046 }, { "epoch": 0.18328949577480552, "grad_norm": 1.084818714721135, "learning_rate": 1.879379286508624e-05, "loss": 0.8572, "step": 2047 }, { "epoch": 0.1833790363198836, "grad_norm": 1.0572124454107197, "learning_rate": 1.8792411604295016e-05, "loss": 0.8582, "step": 2048 }, { "epoch": 0.18346857686496165, "grad_norm": 0.9576980217347596, "learning_rate": 1.8791029603914782e-05, "loss": 0.8668, "step": 2049 }, { "epoch": 0.18355811741003975, "grad_norm": 1.1093392048169, "learning_rate": 1.8789646864061782e-05, "loss": 0.8285, "step": 2050 }, { "epoch": 0.1836476579551178, "grad_norm": 1.1694283818505133, "learning_rate": 1.878826338485233e-05, "loss": 0.901, "step": 2051 }, { "epoch": 0.18373719850019588, "grad_norm": 1.0121608726717621, "learning_rate": 1.8786879166402804e-05, "loss": 0.9278, "step": 2052 }, { "epoch": 0.18382673904527394, "grad_norm": 1.0120642871411525, "learning_rate": 1.8785494208829632e-05, "loss": 0.8472, "step": 2053 }, { "epoch": 0.183916279590352, "grad_norm": 0.9770045902225385, "learning_rate": 1.878410851224932e-05, "loss": 0.8985, "step": 2054 }, { "epoch": 0.18400582013543007, "grad_norm": 0.9866142002676872, "learning_rate": 1.8782722076778426e-05, "loss": 0.8796, "step": 2055 }, { "epoch": 0.18409536068050814, "grad_norm": 0.9823511032845382, "learning_rate": 1.8781334902533567e-05, "loss": 0.8954, "step": 2056 }, { "epoch": 0.1841849012255862, "grad_norm": 0.9719154255052916, "learning_rate": 1.8779946989631437e-05, "loss": 0.8636, "step": 2057 }, { "epoch": 0.18427444177066427, "grad_norm": 0.935355186439493, "learning_rate": 1.8778558338188775e-05, "loss": 0.8678, "step": 2058 }, { "epoch": 0.18436398231574236, "grad_norm": 0.9217521270310518, "learning_rate": 1.877716894832239e-05, "loss": 0.9128, "step": 2059 }, { "epoch": 0.18445352286082042, "grad_norm": 1.0759196106492392, "learning_rate": 1.8775778820149155e-05, "loss": 0.9239, "step": 2060 }, { "epoch": 0.1845430634058985, "grad_norm": 1.0217565670205238, "learning_rate": 1.8774387953786006e-05, "loss": 0.8621, "step": 2061 }, { "epoch": 0.18463260395097655, "grad_norm": 0.9681408005270951, "learning_rate": 1.8772996349349934e-05, "loss": 0.8904, "step": 2062 }, { "epoch": 0.18472214449605462, "grad_norm": 0.9839167295888791, "learning_rate": 1.8771604006958e-05, "loss": 0.8939, "step": 2063 }, { "epoch": 0.18481168504113268, "grad_norm": 1.125242616450166, "learning_rate": 1.877021092672732e-05, "loss": 0.9073, "step": 2064 }, { "epoch": 0.18490122558621075, "grad_norm": 1.0733566456704193, "learning_rate": 1.8768817108775075e-05, "loss": 0.9191, "step": 2065 }, { "epoch": 0.18499076613128881, "grad_norm": 0.9051700593408406, "learning_rate": 1.876742255321851e-05, "loss": 0.8421, "step": 2066 }, { "epoch": 0.18508030667636688, "grad_norm": 0.9897681071811837, "learning_rate": 1.876602726017493e-05, "loss": 0.9171, "step": 2067 }, { "epoch": 0.18516984722144497, "grad_norm": 0.874721996556142, "learning_rate": 1.8764631229761707e-05, "loss": 0.8891, "step": 2068 }, { "epoch": 0.18525938776652304, "grad_norm": 0.9174687560671284, "learning_rate": 1.876323446209626e-05, "loss": 0.8829, "step": 2069 }, { "epoch": 0.1853489283116011, "grad_norm": 1.0669450954653164, "learning_rate": 1.876183695729609e-05, "loss": 0.9208, "step": 2070 }, { "epoch": 0.18543846885667917, "grad_norm": 1.0018635800480915, "learning_rate": 1.8760438715478747e-05, "loss": 0.8795, "step": 2071 }, { "epoch": 0.18552800940175723, "grad_norm": 0.9619539045919628, "learning_rate": 1.875903973676185e-05, "loss": 0.8464, "step": 2072 }, { "epoch": 0.1856175499468353, "grad_norm": 0.9941366303740476, "learning_rate": 1.875764002126307e-05, "loss": 0.8883, "step": 2073 }, { "epoch": 0.18570709049191336, "grad_norm": 0.9539166664959589, "learning_rate": 1.875623956910015e-05, "loss": 0.8937, "step": 2074 }, { "epoch": 0.18579663103699143, "grad_norm": 0.9326793686742503, "learning_rate": 1.8754838380390888e-05, "loss": 0.8899, "step": 2075 }, { "epoch": 0.1858861715820695, "grad_norm": 1.009732787098808, "learning_rate": 1.875343645525316e-05, "loss": 0.8378, "step": 2076 }, { "epoch": 0.18597571212714759, "grad_norm": 1.0426230750652328, "learning_rate": 1.8752033793804875e-05, "loss": 0.9094, "step": 2077 }, { "epoch": 0.18606525267222565, "grad_norm": 0.9304114399133168, "learning_rate": 1.875063039616403e-05, "loss": 0.8805, "step": 2078 }, { "epoch": 0.18615479321730372, "grad_norm": 0.8776070187754171, "learning_rate": 1.874922626244867e-05, "loss": 0.8949, "step": 2079 }, { "epoch": 0.18624433376238178, "grad_norm": 0.9009644197043234, "learning_rate": 1.874782139277691e-05, "loss": 0.8051, "step": 2080 }, { "epoch": 0.18633387430745985, "grad_norm": 1.0413676214409295, "learning_rate": 1.874641578726692e-05, "loss": 0.9021, "step": 2081 }, { "epoch": 0.1864234148525379, "grad_norm": 1.008199315049748, "learning_rate": 1.8745009446036934e-05, "loss": 0.8988, "step": 2082 }, { "epoch": 0.18651295539761598, "grad_norm": 0.9359498387700267, "learning_rate": 1.8743602369205253e-05, "loss": 0.863, "step": 2083 }, { "epoch": 0.18660249594269404, "grad_norm": 0.9488001418409905, "learning_rate": 1.8742194556890236e-05, "loss": 0.8817, "step": 2084 }, { "epoch": 0.1866920364877721, "grad_norm": 1.037717733786703, "learning_rate": 1.8740786009210298e-05, "loss": 0.8619, "step": 2085 }, { "epoch": 0.1867815770328502, "grad_norm": 1.0236975567061672, "learning_rate": 1.8739376726283925e-05, "loss": 0.9174, "step": 2086 }, { "epoch": 0.18687111757792826, "grad_norm": 1.0253607069403057, "learning_rate": 1.873796670822966e-05, "loss": 0.9356, "step": 2087 }, { "epoch": 0.18696065812300633, "grad_norm": 0.8855156958116601, "learning_rate": 1.873655595516611e-05, "loss": 0.8543, "step": 2088 }, { "epoch": 0.1870501986680844, "grad_norm": 0.9174525994301415, "learning_rate": 1.8735144467211945e-05, "loss": 0.8716, "step": 2089 }, { "epoch": 0.18713973921316246, "grad_norm": 0.9400546618503367, "learning_rate": 1.8733732244485893e-05, "loss": 0.8918, "step": 2090 }, { "epoch": 0.18722927975824052, "grad_norm": 0.9045414611022441, "learning_rate": 1.8732319287106743e-05, "loss": 0.9017, "step": 2091 }, { "epoch": 0.1873188203033186, "grad_norm": 1.0457875986672203, "learning_rate": 1.8730905595193353e-05, "loss": 0.9085, "step": 2092 }, { "epoch": 0.18740836084839665, "grad_norm": 1.1389311523716208, "learning_rate": 1.8729491168864634e-05, "loss": 0.8342, "step": 2093 }, { "epoch": 0.18749790139347472, "grad_norm": 0.9692127138714356, "learning_rate": 1.8728076008239563e-05, "loss": 0.8691, "step": 2094 }, { "epoch": 0.1875874419385528, "grad_norm": 1.0015255977192068, "learning_rate": 1.8726660113437182e-05, "loss": 0.8136, "step": 2095 }, { "epoch": 0.18767698248363088, "grad_norm": 0.8873289216654391, "learning_rate": 1.872524348457659e-05, "loss": 0.8381, "step": 2096 }, { "epoch": 0.18776652302870894, "grad_norm": 1.0218470372251216, "learning_rate": 1.872382612177695e-05, "loss": 0.9126, "step": 2097 }, { "epoch": 0.187856063573787, "grad_norm": 0.9329838498766835, "learning_rate": 1.8722408025157482e-05, "loss": 0.9278, "step": 2098 }, { "epoch": 0.18794560411886507, "grad_norm": 0.9727209010822274, "learning_rate": 1.872098919483748e-05, "loss": 0.8233, "step": 2099 }, { "epoch": 0.18803514466394314, "grad_norm": 0.9666381568045974, "learning_rate": 1.8719569630936284e-05, "loss": 0.882, "step": 2100 }, { "epoch": 0.1881246852090212, "grad_norm": 0.9611971981078845, "learning_rate": 1.8718149333573305e-05, "loss": 0.9304, "step": 2101 }, { "epoch": 0.18821422575409927, "grad_norm": 0.9399983640296502, "learning_rate": 1.871672830286801e-05, "loss": 0.836, "step": 2102 }, { "epoch": 0.18830376629917733, "grad_norm": 1.0298884568875413, "learning_rate": 1.871530653893994e-05, "loss": 0.8355, "step": 2103 }, { "epoch": 0.18839330684425543, "grad_norm": 1.0087656979303088, "learning_rate": 1.871388404190868e-05, "loss": 0.8535, "step": 2104 }, { "epoch": 0.1884828473893335, "grad_norm": 1.002981890065292, "learning_rate": 1.8712460811893892e-05, "loss": 0.9125, "step": 2105 }, { "epoch": 0.18857238793441156, "grad_norm": 0.8773538375393836, "learning_rate": 1.8711036849015295e-05, "loss": 0.8623, "step": 2106 }, { "epoch": 0.18866192847948962, "grad_norm": 0.9869255903207411, "learning_rate": 1.8709612153392663e-05, "loss": 0.9029, "step": 2107 }, { "epoch": 0.1887514690245677, "grad_norm": 0.9940119861790361, "learning_rate": 1.870818672514584e-05, "loss": 0.838, "step": 2108 }, { "epoch": 0.18884100956964575, "grad_norm": 1.0067281970204898, "learning_rate": 1.8706760564394725e-05, "loss": 0.8872, "step": 2109 }, { "epoch": 0.18893055011472382, "grad_norm": 0.947768659451969, "learning_rate": 1.8705333671259285e-05, "loss": 0.8742, "step": 2110 }, { "epoch": 0.18902009065980188, "grad_norm": 0.9404184235473504, "learning_rate": 1.8703906045859545e-05, "loss": 0.89, "step": 2111 }, { "epoch": 0.18910963120487995, "grad_norm": 0.9249328276985129, "learning_rate": 1.870247768831559e-05, "loss": 0.8998, "step": 2112 }, { "epoch": 0.18919917174995804, "grad_norm": 0.9871538664690727, "learning_rate": 1.8701048598747574e-05, "loss": 0.9299, "step": 2113 }, { "epoch": 0.1892887122950361, "grad_norm": 1.04691117593866, "learning_rate": 1.8699618777275704e-05, "loss": 0.9367, "step": 2114 }, { "epoch": 0.18937825284011417, "grad_norm": 0.8601600353953, "learning_rate": 1.869818822402025e-05, "loss": 0.8674, "step": 2115 }, { "epoch": 0.18946779338519223, "grad_norm": 1.044550566806482, "learning_rate": 1.8696756939101546e-05, "loss": 0.9302, "step": 2116 }, { "epoch": 0.1895573339302703, "grad_norm": 0.9283696375885887, "learning_rate": 1.8695324922639992e-05, "loss": 0.8722, "step": 2117 }, { "epoch": 0.18964687447534836, "grad_norm": 0.9217063591320144, "learning_rate": 1.8693892174756035e-05, "loss": 0.8277, "step": 2118 }, { "epoch": 0.18973641502042643, "grad_norm": 0.9992054859811078, "learning_rate": 1.8692458695570205e-05, "loss": 0.878, "step": 2119 }, { "epoch": 0.1898259555655045, "grad_norm": 0.9891229392484504, "learning_rate": 1.8691024485203075e-05, "loss": 0.9327, "step": 2120 }, { "epoch": 0.18991549611058256, "grad_norm": 1.0443381360713542, "learning_rate": 1.8689589543775285e-05, "loss": 0.8624, "step": 2121 }, { "epoch": 0.19000503665566065, "grad_norm": 0.9556460833741145, "learning_rate": 1.868815387140754e-05, "loss": 0.9356, "step": 2122 }, { "epoch": 0.19009457720073872, "grad_norm": 1.0982853055984374, "learning_rate": 1.86867174682206e-05, "loss": 0.9365, "step": 2123 }, { "epoch": 0.19018411774581678, "grad_norm": 1.0067646731769602, "learning_rate": 1.86852803343353e-05, "loss": 0.8566, "step": 2124 }, { "epoch": 0.19027365829089485, "grad_norm": 1.2497559647129648, "learning_rate": 1.8683842469872517e-05, "loss": 0.8695, "step": 2125 }, { "epoch": 0.1903631988359729, "grad_norm": 0.9356778576536655, "learning_rate": 1.8682403874953207e-05, "loss": 0.8578, "step": 2126 }, { "epoch": 0.19045273938105098, "grad_norm": 1.0060481871224578, "learning_rate": 1.8680964549698373e-05, "loss": 0.9394, "step": 2127 }, { "epoch": 0.19054227992612904, "grad_norm": 0.9329172447615051, "learning_rate": 1.867952449422909e-05, "loss": 0.8868, "step": 2128 }, { "epoch": 0.1906318204712071, "grad_norm": 0.9828821791916115, "learning_rate": 1.8678083708666494e-05, "loss": 0.9244, "step": 2129 }, { "epoch": 0.19072136101628517, "grad_norm": 0.9526600644982036, "learning_rate": 1.867664219313177e-05, "loss": 0.9117, "step": 2130 }, { "epoch": 0.19081090156136327, "grad_norm": 0.9110446366269302, "learning_rate": 1.8675199947746185e-05, "loss": 0.8708, "step": 2131 }, { "epoch": 0.19090044210644133, "grad_norm": 1.1928738394230822, "learning_rate": 1.8673756972631047e-05, "loss": 0.8943, "step": 2132 }, { "epoch": 0.1909899826515194, "grad_norm": 1.0251645327029184, "learning_rate": 1.867231326790774e-05, "loss": 0.8731, "step": 2133 }, { "epoch": 0.19107952319659746, "grad_norm": 0.9601199048308197, "learning_rate": 1.86708688336977e-05, "loss": 0.9122, "step": 2134 }, { "epoch": 0.19116906374167553, "grad_norm": 0.9276325767140973, "learning_rate": 1.866942367012243e-05, "loss": 0.8649, "step": 2135 }, { "epoch": 0.1912586042867536, "grad_norm": 1.0070167769698446, "learning_rate": 1.866797777730349e-05, "loss": 0.8273, "step": 2136 }, { "epoch": 0.19134814483183166, "grad_norm": 0.9087537750063099, "learning_rate": 1.8666531155362505e-05, "loss": 0.8516, "step": 2137 }, { "epoch": 0.19143768537690972, "grad_norm": 1.0700216002501721, "learning_rate": 1.8665083804421165e-05, "loss": 0.857, "step": 2138 }, { "epoch": 0.1915272259219878, "grad_norm": 1.0675483274181667, "learning_rate": 1.866363572460121e-05, "loss": 0.9227, "step": 2139 }, { "epoch": 0.19161676646706588, "grad_norm": 0.8782001475852552, "learning_rate": 1.8662186916024452e-05, "loss": 0.8594, "step": 2140 }, { "epoch": 0.19170630701214394, "grad_norm": 0.9290959052055816, "learning_rate": 1.8660737378812755e-05, "loss": 0.9098, "step": 2141 }, { "epoch": 0.191795847557222, "grad_norm": 1.2945708244779994, "learning_rate": 1.8659287113088057e-05, "loss": 0.8659, "step": 2142 }, { "epoch": 0.19188538810230008, "grad_norm": 0.980169013224945, "learning_rate": 1.8657836118972338e-05, "loss": 0.8804, "step": 2143 }, { "epoch": 0.19197492864737814, "grad_norm": 1.06328409685627, "learning_rate": 1.8656384396587663e-05, "loss": 0.8793, "step": 2144 }, { "epoch": 0.1920644691924562, "grad_norm": 0.8789533764503326, "learning_rate": 1.8654931946056142e-05, "loss": 0.8152, "step": 2145 }, { "epoch": 0.19215400973753427, "grad_norm": 0.9577200567831039, "learning_rate": 1.865347876749995e-05, "loss": 0.8348, "step": 2146 }, { "epoch": 0.19224355028261234, "grad_norm": 0.8899274902843718, "learning_rate": 1.8652024861041316e-05, "loss": 0.862, "step": 2147 }, { "epoch": 0.1923330908276904, "grad_norm": 1.0833933186932274, "learning_rate": 1.8650570226802554e-05, "loss": 0.8754, "step": 2148 }, { "epoch": 0.1924226313727685, "grad_norm": 0.9122585189935996, "learning_rate": 1.864911486490601e-05, "loss": 0.8555, "step": 2149 }, { "epoch": 0.19251217191784656, "grad_norm": 0.9636591277447347, "learning_rate": 1.8647658775474106e-05, "loss": 0.9523, "step": 2150 }, { "epoch": 0.19260171246292462, "grad_norm": 1.022987097486522, "learning_rate": 1.8646201958629332e-05, "loss": 0.8915, "step": 2151 }, { "epoch": 0.1926912530080027, "grad_norm": 0.9439380158619503, "learning_rate": 1.864474441449422e-05, "loss": 0.8983, "step": 2152 }, { "epoch": 0.19278079355308075, "grad_norm": 1.0702329981538026, "learning_rate": 1.864328614319138e-05, "loss": 0.9003, "step": 2153 }, { "epoch": 0.19287033409815882, "grad_norm": 0.970824733604063, "learning_rate": 1.8641827144843473e-05, "loss": 0.9012, "step": 2154 }, { "epoch": 0.19295987464323688, "grad_norm": 0.9688317808543242, "learning_rate": 1.8640367419573232e-05, "loss": 0.8913, "step": 2155 }, { "epoch": 0.19304941518831495, "grad_norm": 0.9730570976517954, "learning_rate": 1.863890696750344e-05, "loss": 0.9096, "step": 2156 }, { "epoch": 0.19313895573339301, "grad_norm": 0.9568918068773145, "learning_rate": 1.8637445788756944e-05, "loss": 0.8465, "step": 2157 }, { "epoch": 0.1932284962784711, "grad_norm": 0.9471598106525235, "learning_rate": 1.863598388345665e-05, "loss": 0.8659, "step": 2158 }, { "epoch": 0.19331803682354917, "grad_norm": 0.9383234387970055, "learning_rate": 1.863452125172554e-05, "loss": 0.9164, "step": 2159 }, { "epoch": 0.19340757736862724, "grad_norm": 1.0197662205805804, "learning_rate": 1.863305789368664e-05, "loss": 0.869, "step": 2160 }, { "epoch": 0.1934971179137053, "grad_norm": 1.0193639294759718, "learning_rate": 1.863159380946304e-05, "loss": 0.8402, "step": 2161 }, { "epoch": 0.19358665845878337, "grad_norm": 1.2553117291425584, "learning_rate": 1.86301289991779e-05, "loss": 0.8981, "step": 2162 }, { "epoch": 0.19367619900386143, "grad_norm": 0.9613084303490637, "learning_rate": 1.8628663462954428e-05, "loss": 0.9202, "step": 2163 }, { "epoch": 0.1937657395489395, "grad_norm": 0.9111142868178581, "learning_rate": 1.8627197200915905e-05, "loss": 0.8397, "step": 2164 }, { "epoch": 0.19385528009401756, "grad_norm": 1.2208208677288694, "learning_rate": 1.8625730213185666e-05, "loss": 0.9081, "step": 2165 }, { "epoch": 0.19394482063909563, "grad_norm": 0.9256580871261344, "learning_rate": 1.8624262499887112e-05, "loss": 0.9033, "step": 2166 }, { "epoch": 0.19403436118417372, "grad_norm": 0.9540346114346476, "learning_rate": 1.8622794061143698e-05, "loss": 0.9022, "step": 2167 }, { "epoch": 0.19412390172925179, "grad_norm": 0.9667623423930666, "learning_rate": 1.862132489707895e-05, "loss": 0.8871, "step": 2168 }, { "epoch": 0.19421344227432985, "grad_norm": 0.9760936914727403, "learning_rate": 1.8619855007816445e-05, "loss": 0.8759, "step": 2169 }, { "epoch": 0.19430298281940792, "grad_norm": 0.9434273649482908, "learning_rate": 1.8618384393479822e-05, "loss": 0.9374, "step": 2170 }, { "epoch": 0.19439252336448598, "grad_norm": 0.9280745561017153, "learning_rate": 1.8616913054192792e-05, "loss": 0.8917, "step": 2171 }, { "epoch": 0.19448206390956405, "grad_norm": 1.1196748115668116, "learning_rate": 1.8615440990079117e-05, "loss": 0.8364, "step": 2172 }, { "epoch": 0.1945716044546421, "grad_norm": 0.986044807084855, "learning_rate": 1.8613968201262622e-05, "loss": 0.8703, "step": 2173 }, { "epoch": 0.19466114499972018, "grad_norm": 1.0156667117110476, "learning_rate": 1.8612494687867188e-05, "loss": 0.8962, "step": 2174 }, { "epoch": 0.19475068554479824, "grad_norm": 0.8859828076031111, "learning_rate": 1.861102045001677e-05, "loss": 0.8239, "step": 2175 }, { "epoch": 0.19484022608987633, "grad_norm": 0.9348579479108126, "learning_rate": 1.860954548783537e-05, "loss": 0.768, "step": 2176 }, { "epoch": 0.1949297666349544, "grad_norm": 0.8997383724537695, "learning_rate": 1.860806980144706e-05, "loss": 0.8946, "step": 2177 }, { "epoch": 0.19501930718003246, "grad_norm": 0.8456604877544933, "learning_rate": 1.8606593390975975e-05, "loss": 0.8389, "step": 2178 }, { "epoch": 0.19510884772511053, "grad_norm": 1.1110522330294677, "learning_rate": 1.8605116256546293e-05, "loss": 0.8427, "step": 2179 }, { "epoch": 0.1951983882701886, "grad_norm": 1.0160263087814307, "learning_rate": 1.8603638398282274e-05, "loss": 0.8956, "step": 2180 }, { "epoch": 0.19528792881526666, "grad_norm": 0.9683835489256681, "learning_rate": 1.860215981630823e-05, "loss": 0.922, "step": 2181 }, { "epoch": 0.19537746936034472, "grad_norm": 0.967127428619307, "learning_rate": 1.8600680510748536e-05, "loss": 0.9135, "step": 2182 }, { "epoch": 0.1954670099054228, "grad_norm": 0.9996503908880348, "learning_rate": 1.8599200481727627e-05, "loss": 0.828, "step": 2183 }, { "epoch": 0.19555655045050085, "grad_norm": 1.100614693335705, "learning_rate": 1.859771972936999e-05, "loss": 0.8806, "step": 2184 }, { "epoch": 0.19564609099557895, "grad_norm": 0.883532422650634, "learning_rate": 1.859623825380019e-05, "loss": 0.8702, "step": 2185 }, { "epoch": 0.195735631540657, "grad_norm": 0.9691539210909547, "learning_rate": 1.859475605514284e-05, "loss": 0.8805, "step": 2186 }, { "epoch": 0.19582517208573508, "grad_norm": 0.8727255611527925, "learning_rate": 1.8593273133522618e-05, "loss": 0.8671, "step": 2187 }, { "epoch": 0.19591471263081314, "grad_norm": 0.9917768382521909, "learning_rate": 1.8591789489064264e-05, "loss": 0.8663, "step": 2188 }, { "epoch": 0.1960042531758912, "grad_norm": 0.9535266940455634, "learning_rate": 1.8590305121892575e-05, "loss": 0.8771, "step": 2189 }, { "epoch": 0.19609379372096927, "grad_norm": 1.298764404885573, "learning_rate": 1.8588820032132414e-05, "loss": 0.8326, "step": 2190 }, { "epoch": 0.19618333426604734, "grad_norm": 0.9259134298605606, "learning_rate": 1.85873342199087e-05, "loss": 0.8543, "step": 2191 }, { "epoch": 0.1962728748111254, "grad_norm": 1.17034996355197, "learning_rate": 1.8585847685346415e-05, "loss": 0.8671, "step": 2192 }, { "epoch": 0.19636241535620347, "grad_norm": 0.9667252677373399, "learning_rate": 1.85843604285706e-05, "loss": 0.8538, "step": 2193 }, { "epoch": 0.19645195590128156, "grad_norm": 1.0271637889961365, "learning_rate": 1.8582872449706358e-05, "loss": 0.8969, "step": 2194 }, { "epoch": 0.19654149644635963, "grad_norm": 1.0031018376159087, "learning_rate": 1.8581383748878856e-05, "loss": 0.9053, "step": 2195 }, { "epoch": 0.1966310369914377, "grad_norm": 1.201736235266263, "learning_rate": 1.857989432621332e-05, "loss": 0.8759, "step": 2196 }, { "epoch": 0.19672057753651576, "grad_norm": 1.2075952692267113, "learning_rate": 1.8578404181835032e-05, "loss": 0.8789, "step": 2197 }, { "epoch": 0.19681011808159382, "grad_norm": 0.9279715908865723, "learning_rate": 1.8576913315869337e-05, "loss": 0.9152, "step": 2198 }, { "epoch": 0.1968996586266719, "grad_norm": 0.8970766735911032, "learning_rate": 1.8575421728441646e-05, "loss": 0.887, "step": 2199 }, { "epoch": 0.19698919917174995, "grad_norm": 0.9757095356538632, "learning_rate": 1.857392941967742e-05, "loss": 0.8501, "step": 2200 }, { "epoch": 0.19707873971682802, "grad_norm": 0.9982619304779129, "learning_rate": 1.857243638970219e-05, "loss": 0.8337, "step": 2201 }, { "epoch": 0.19716828026190608, "grad_norm": 0.9433956420204105, "learning_rate": 1.857094263864155e-05, "loss": 0.8544, "step": 2202 }, { "epoch": 0.19725782080698417, "grad_norm": 0.934352084405641, "learning_rate": 1.8569448166621145e-05, "loss": 0.9233, "step": 2203 }, { "epoch": 0.19734736135206224, "grad_norm": 1.031436327987299, "learning_rate": 1.856795297376669e-05, "loss": 0.8744, "step": 2204 }, { "epoch": 0.1974369018971403, "grad_norm": 1.0240623105697386, "learning_rate": 1.8566457060203942e-05, "loss": 0.8735, "step": 2205 }, { "epoch": 0.19752644244221837, "grad_norm": 1.0906919291585435, "learning_rate": 1.8564960426058745e-05, "loss": 0.8653, "step": 2206 }, { "epoch": 0.19761598298729643, "grad_norm": 0.9910406985137401, "learning_rate": 1.8563463071456992e-05, "loss": 0.8733, "step": 2207 }, { "epoch": 0.1977055235323745, "grad_norm": 0.9453247271459769, "learning_rate": 1.8561964996524628e-05, "loss": 0.8732, "step": 2208 }, { "epoch": 0.19779506407745256, "grad_norm": 1.1468704408192922, "learning_rate": 1.8560466201387667e-05, "loss": 0.8389, "step": 2209 }, { "epoch": 0.19788460462253063, "grad_norm": 1.0361611345917605, "learning_rate": 1.8558966686172185e-05, "loss": 0.931, "step": 2210 }, { "epoch": 0.1979741451676087, "grad_norm": 0.8996302960858265, "learning_rate": 1.8557466451004323e-05, "loss": 0.8797, "step": 2211 }, { "epoch": 0.1980636857126868, "grad_norm": 0.9052330818463086, "learning_rate": 1.8555965496010264e-05, "loss": 0.8755, "step": 2212 }, { "epoch": 0.19815322625776485, "grad_norm": 1.0078760944568, "learning_rate": 1.855446382131627e-05, "loss": 0.9506, "step": 2213 }, { "epoch": 0.19824276680284292, "grad_norm": 1.1178863670983918, "learning_rate": 1.8552961427048655e-05, "loss": 0.9527, "step": 2214 }, { "epoch": 0.19833230734792098, "grad_norm": 1.0001230530025589, "learning_rate": 1.8551458313333796e-05, "loss": 0.8492, "step": 2215 }, { "epoch": 0.19842184789299905, "grad_norm": 0.9956672912856674, "learning_rate": 1.854995448029813e-05, "loss": 0.9267, "step": 2216 }, { "epoch": 0.1985113884380771, "grad_norm": 1.0595658528483614, "learning_rate": 1.854844992806816e-05, "loss": 0.902, "step": 2217 }, { "epoch": 0.19860092898315518, "grad_norm": 1.1899805839581479, "learning_rate": 1.8546944656770432e-05, "loss": 0.8977, "step": 2218 }, { "epoch": 0.19869046952823324, "grad_norm": 0.9447784563291665, "learning_rate": 1.8545438666531576e-05, "loss": 0.8224, "step": 2219 }, { "epoch": 0.1987800100733113, "grad_norm": 1.0220566961267354, "learning_rate": 1.8543931957478266e-05, "loss": 0.8891, "step": 2220 }, { "epoch": 0.1988695506183894, "grad_norm": 1.1364571594323145, "learning_rate": 1.854242452973724e-05, "loss": 0.8934, "step": 2221 }, { "epoch": 0.19895909116346747, "grad_norm": 1.0942620924441164, "learning_rate": 1.8540916383435303e-05, "loss": 0.8898, "step": 2222 }, { "epoch": 0.19904863170854553, "grad_norm": 1.043209974011806, "learning_rate": 1.8539407518699307e-05, "loss": 0.9096, "step": 2223 }, { "epoch": 0.1991381722536236, "grad_norm": 1.2707230369313236, "learning_rate": 1.853789793565618e-05, "loss": 0.8767, "step": 2224 }, { "epoch": 0.19922771279870166, "grad_norm": 0.9546219047171328, "learning_rate": 1.8536387634432904e-05, "loss": 0.8807, "step": 2225 }, { "epoch": 0.19931725334377973, "grad_norm": 1.0673545874512418, "learning_rate": 1.8534876615156518e-05, "loss": 0.8653, "step": 2226 }, { "epoch": 0.1994067938888578, "grad_norm": 1.2109923883941378, "learning_rate": 1.8533364877954118e-05, "loss": 0.9511, "step": 2227 }, { "epoch": 0.19949633443393586, "grad_norm": 0.993671669735077, "learning_rate": 1.8531852422952878e-05, "loss": 0.8557, "step": 2228 }, { "epoch": 0.19958587497901392, "grad_norm": 1.1845301101479957, "learning_rate": 1.8530339250280012e-05, "loss": 0.8443, "step": 2229 }, { "epoch": 0.19967541552409201, "grad_norm": 0.9201463712025468, "learning_rate": 1.852882536006281e-05, "loss": 0.8686, "step": 2230 }, { "epoch": 0.19976495606917008, "grad_norm": 0.9738653800117629, "learning_rate": 1.8527310752428605e-05, "loss": 0.8987, "step": 2231 }, { "epoch": 0.19985449661424814, "grad_norm": 0.951423719642921, "learning_rate": 1.852579542750481e-05, "loss": 0.8699, "step": 2232 }, { "epoch": 0.1999440371593262, "grad_norm": 1.016604686236065, "learning_rate": 1.8524279385418887e-05, "loss": 0.852, "step": 2233 }, { "epoch": 0.20003357770440428, "grad_norm": 0.8505285054144482, "learning_rate": 1.8522762626298364e-05, "loss": 0.8345, "step": 2234 }, { "epoch": 0.20012311824948234, "grad_norm": 1.0960041259666753, "learning_rate": 1.852124515027082e-05, "loss": 0.9099, "step": 2235 }, { "epoch": 0.2002126587945604, "grad_norm": 0.9785559619870202, "learning_rate": 1.8519726957463898e-05, "loss": 0.9079, "step": 2236 }, { "epoch": 0.20030219933963847, "grad_norm": 1.0060972409437032, "learning_rate": 1.851820804800531e-05, "loss": 1.0094, "step": 2237 }, { "epoch": 0.20039173988471654, "grad_norm": 0.8874165284918272, "learning_rate": 1.851668842202282e-05, "loss": 0.9034, "step": 2238 }, { "epoch": 0.20048128042979463, "grad_norm": 0.9824025367951479, "learning_rate": 1.8515168079644256e-05, "loss": 0.9035, "step": 2239 }, { "epoch": 0.2005708209748727, "grad_norm": 0.9215040562482527, "learning_rate": 1.85136470209975e-05, "loss": 0.8632, "step": 2240 }, { "epoch": 0.20066036151995076, "grad_norm": 1.0157440400619355, "learning_rate": 1.8512125246210496e-05, "loss": 0.8759, "step": 2241 }, { "epoch": 0.20074990206502882, "grad_norm": 0.8780438891401056, "learning_rate": 1.8510602755411258e-05, "loss": 0.8428, "step": 2242 }, { "epoch": 0.2008394426101069, "grad_norm": 0.8641754553926261, "learning_rate": 1.850907954872785e-05, "loss": 0.844, "step": 2243 }, { "epoch": 0.20092898315518495, "grad_norm": 1.0074301323388741, "learning_rate": 1.8507555626288398e-05, "loss": 0.8817, "step": 2244 }, { "epoch": 0.20101852370026302, "grad_norm": 1.062915978659435, "learning_rate": 1.850603098822109e-05, "loss": 0.9504, "step": 2245 }, { "epoch": 0.20110806424534108, "grad_norm": 0.9086226602272129, "learning_rate": 1.8504505634654177e-05, "loss": 0.9166, "step": 2246 }, { "epoch": 0.20119760479041915, "grad_norm": 1.194891926642487, "learning_rate": 1.850297956571596e-05, "loss": 0.8712, "step": 2247 }, { "epoch": 0.20128714533549724, "grad_norm": 1.0784646103371456, "learning_rate": 1.850145278153481e-05, "loss": 0.8859, "step": 2248 }, { "epoch": 0.2013766858805753, "grad_norm": 0.990326061063382, "learning_rate": 1.8499925282239158e-05, "loss": 0.8448, "step": 2249 }, { "epoch": 0.20146622642565337, "grad_norm": 1.002340504473485, "learning_rate": 1.849839706795749e-05, "loss": 0.9413, "step": 2250 }, { "epoch": 0.20155576697073144, "grad_norm": 1.0297860229452893, "learning_rate": 1.8496868138818354e-05, "loss": 0.8943, "step": 2251 }, { "epoch": 0.2016453075158095, "grad_norm": 1.1690679545261213, "learning_rate": 1.849533849495036e-05, "loss": 0.8732, "step": 2252 }, { "epoch": 0.20173484806088757, "grad_norm": 0.9404967904005352, "learning_rate": 1.8493808136482176e-05, "loss": 0.8638, "step": 2253 }, { "epoch": 0.20182438860596563, "grad_norm": 0.9162247683723409, "learning_rate": 1.8492277063542524e-05, "loss": 0.8645, "step": 2254 }, { "epoch": 0.2019139291510437, "grad_norm": 0.9996871440155509, "learning_rate": 1.8490745276260208e-05, "loss": 0.9186, "step": 2255 }, { "epoch": 0.20200346969612176, "grad_norm": 0.9952018287608507, "learning_rate": 1.8489212774764064e-05, "loss": 0.8692, "step": 2256 }, { "epoch": 0.20209301024119986, "grad_norm": 0.9717356685883268, "learning_rate": 1.8487679559183e-05, "loss": 0.8843, "step": 2257 }, { "epoch": 0.20218255078627792, "grad_norm": 0.9332154506932737, "learning_rate": 1.8486145629646e-05, "loss": 0.892, "step": 2258 }, { "epoch": 0.20227209133135599, "grad_norm": 1.1144490522914088, "learning_rate": 1.848461098628208e-05, "loss": 0.9278, "step": 2259 }, { "epoch": 0.20236163187643405, "grad_norm": 1.7003814728035742, "learning_rate": 1.8483075629220333e-05, "loss": 0.9214, "step": 2260 }, { "epoch": 0.20245117242151212, "grad_norm": 0.9801119058428106, "learning_rate": 1.8481539558589906e-05, "loss": 0.8527, "step": 2261 }, { "epoch": 0.20254071296659018, "grad_norm": 1.008014406838476, "learning_rate": 1.848000277452001e-05, "loss": 0.8528, "step": 2262 }, { "epoch": 0.20263025351166825, "grad_norm": 0.9126280839115792, "learning_rate": 1.8478465277139914e-05, "loss": 0.877, "step": 2263 }, { "epoch": 0.2027197940567463, "grad_norm": 0.967325954184627, "learning_rate": 1.8476927066578948e-05, "loss": 0.9016, "step": 2264 }, { "epoch": 0.20280933460182438, "grad_norm": 0.929545593222842, "learning_rate": 1.8475388142966503e-05, "loss": 0.8877, "step": 2265 }, { "epoch": 0.20289887514690247, "grad_norm": 1.1397034070437517, "learning_rate": 1.8473848506432025e-05, "loss": 0.8955, "step": 2266 }, { "epoch": 0.20298841569198053, "grad_norm": 1.0627198764167012, "learning_rate": 1.8472308157105026e-05, "loss": 0.8469, "step": 2267 }, { "epoch": 0.2030779562370586, "grad_norm": 0.9521235323210383, "learning_rate": 1.8470767095115074e-05, "loss": 0.8906, "step": 2268 }, { "epoch": 0.20316749678213666, "grad_norm": 1.169231267601412, "learning_rate": 1.8469225320591794e-05, "loss": 0.8857, "step": 2269 }, { "epoch": 0.20325703732721473, "grad_norm": 0.9071337225375996, "learning_rate": 1.846768283366488e-05, "loss": 0.8882, "step": 2270 }, { "epoch": 0.2033465778722928, "grad_norm": 0.9876077221974757, "learning_rate": 1.8466139634464082e-05, "loss": 0.9231, "step": 2271 }, { "epoch": 0.20343611841737086, "grad_norm": 1.154352599620688, "learning_rate": 1.8464595723119206e-05, "loss": 0.8787, "step": 2272 }, { "epoch": 0.20352565896244892, "grad_norm": 0.9010408888430375, "learning_rate": 1.846305109976012e-05, "loss": 0.8744, "step": 2273 }, { "epoch": 0.203615199507527, "grad_norm": 1.1119423771290902, "learning_rate": 1.8461505764516752e-05, "loss": 0.9113, "step": 2274 }, { "epoch": 0.20370474005260508, "grad_norm": 1.1117116621873602, "learning_rate": 1.8459959717519096e-05, "loss": 0.7942, "step": 2275 }, { "epoch": 0.20379428059768315, "grad_norm": 0.9782284100915064, "learning_rate": 1.8458412958897196e-05, "loss": 0.846, "step": 2276 }, { "epoch": 0.2038838211427612, "grad_norm": 0.9534606652957766, "learning_rate": 1.8456865488781162e-05, "loss": 0.9166, "step": 2277 }, { "epoch": 0.20397336168783928, "grad_norm": 0.9209304511029442, "learning_rate": 1.845531730730116e-05, "loss": 0.9099, "step": 2278 }, { "epoch": 0.20406290223291734, "grad_norm": 1.0346689149350008, "learning_rate": 1.8453768414587424e-05, "loss": 0.9293, "step": 2279 }, { "epoch": 0.2041524427779954, "grad_norm": 0.9657162413976449, "learning_rate": 1.8452218810770232e-05, "loss": 0.927, "step": 2280 }, { "epoch": 0.20424198332307347, "grad_norm": 0.9533802716136274, "learning_rate": 1.8450668495979937e-05, "loss": 0.8645, "step": 2281 }, { "epoch": 0.20433152386815154, "grad_norm": 1.0006368922078988, "learning_rate": 1.844911747034695e-05, "loss": 0.8591, "step": 2282 }, { "epoch": 0.2044210644132296, "grad_norm": 0.9706237429879425, "learning_rate": 1.844756573400173e-05, "loss": 0.8721, "step": 2283 }, { "epoch": 0.2045106049583077, "grad_norm": 0.9752254327001012, "learning_rate": 1.8446013287074812e-05, "loss": 0.8422, "step": 2284 }, { "epoch": 0.20460014550338576, "grad_norm": 0.9044730858750591, "learning_rate": 1.844446012969678e-05, "loss": 0.8817, "step": 2285 }, { "epoch": 0.20468968604846383, "grad_norm": 0.9321495665441216, "learning_rate": 1.8442906261998277e-05, "loss": 0.8777, "step": 2286 }, { "epoch": 0.2047792265935419, "grad_norm": 1.0967912583911346, "learning_rate": 1.8441351684110014e-05, "loss": 0.9745, "step": 2287 }, { "epoch": 0.20486876713861996, "grad_norm": 0.892517396538192, "learning_rate": 1.8439796396162756e-05, "loss": 0.8608, "step": 2288 }, { "epoch": 0.20495830768369802, "grad_norm": 1.102513827990313, "learning_rate": 1.8438240398287325e-05, "loss": 0.8773, "step": 2289 }, { "epoch": 0.2050478482287761, "grad_norm": 1.0334482534892258, "learning_rate": 1.843668369061461e-05, "loss": 0.8989, "step": 2290 }, { "epoch": 0.20513738877385415, "grad_norm": 0.9556946031206666, "learning_rate": 1.843512627327556e-05, "loss": 0.9172, "step": 2291 }, { "epoch": 0.20522692931893222, "grad_norm": 1.020493288569179, "learning_rate": 1.843356814640117e-05, "loss": 0.9067, "step": 2292 }, { "epoch": 0.2053164698640103, "grad_norm": 0.9529903207270544, "learning_rate": 1.8432009310122513e-05, "loss": 0.9397, "step": 2293 }, { "epoch": 0.20540601040908837, "grad_norm": 0.9661606044885973, "learning_rate": 1.8430449764570708e-05, "loss": 0.8634, "step": 2294 }, { "epoch": 0.20549555095416644, "grad_norm": 0.9645320086110252, "learning_rate": 1.8428889509876943e-05, "loss": 0.9056, "step": 2295 }, { "epoch": 0.2055850914992445, "grad_norm": 0.898271645970045, "learning_rate": 1.842732854617246e-05, "loss": 0.8977, "step": 2296 }, { "epoch": 0.20567463204432257, "grad_norm": 0.9715530618711378, "learning_rate": 1.842576687358856e-05, "loss": 0.9202, "step": 2297 }, { "epoch": 0.20576417258940063, "grad_norm": 1.0776860530106205, "learning_rate": 1.842420449225661e-05, "loss": 0.8519, "step": 2298 }, { "epoch": 0.2058537131344787, "grad_norm": 0.999185911264621, "learning_rate": 1.8422641402308027e-05, "loss": 0.8701, "step": 2299 }, { "epoch": 0.20594325367955676, "grad_norm": 1.0671519059548256, "learning_rate": 1.84210776038743e-05, "loss": 0.918, "step": 2300 }, { "epoch": 0.20603279422463483, "grad_norm": 0.9568544913626, "learning_rate": 1.8419513097086965e-05, "loss": 0.8333, "step": 2301 }, { "epoch": 0.20612233476971292, "grad_norm": 0.9879354458395455, "learning_rate": 1.8417947882077625e-05, "loss": 0.8645, "step": 2302 }, { "epoch": 0.206211875314791, "grad_norm": 0.8836017727055884, "learning_rate": 1.841638195897794e-05, "loss": 0.8397, "step": 2303 }, { "epoch": 0.20630141585986905, "grad_norm": 0.9626370393798671, "learning_rate": 1.8414815327919633e-05, "loss": 0.9311, "step": 2304 }, { "epoch": 0.20639095640494712, "grad_norm": 0.9674128224778638, "learning_rate": 1.841324798903448e-05, "loss": 0.8801, "step": 2305 }, { "epoch": 0.20648049695002518, "grad_norm": 1.0844014321482562, "learning_rate": 1.8411679942454324e-05, "loss": 0.871, "step": 2306 }, { "epoch": 0.20657003749510325, "grad_norm": 0.9681295636253464, "learning_rate": 1.841011118831106e-05, "loss": 0.8777, "step": 2307 }, { "epoch": 0.2066595780401813, "grad_norm": 1.013305173332575, "learning_rate": 1.840854172673665e-05, "loss": 0.9095, "step": 2308 }, { "epoch": 0.20674911858525938, "grad_norm": 0.9009463850456422, "learning_rate": 1.840697155786311e-05, "loss": 0.8796, "step": 2309 }, { "epoch": 0.20683865913033744, "grad_norm": 0.9758458671539242, "learning_rate": 1.8405400681822523e-05, "loss": 0.9225, "step": 2310 }, { "epoch": 0.20692819967541554, "grad_norm": 1.0661411544736863, "learning_rate": 1.8403829098747017e-05, "loss": 0.8499, "step": 2311 }, { "epoch": 0.2070177402204936, "grad_norm": 1.1632544058505203, "learning_rate": 1.840225680876879e-05, "loss": 0.9372, "step": 2312 }, { "epoch": 0.20710728076557167, "grad_norm": 0.8744858126977464, "learning_rate": 1.8400683812020107e-05, "loss": 0.8443, "step": 2313 }, { "epoch": 0.20719682131064973, "grad_norm": 0.9239166169008859, "learning_rate": 1.8399110108633275e-05, "loss": 0.8266, "step": 2314 }, { "epoch": 0.2072863618557278, "grad_norm": 1.3286468410130272, "learning_rate": 1.839753569874067e-05, "loss": 0.8958, "step": 2315 }, { "epoch": 0.20737590240080586, "grad_norm": 0.9340196406553349, "learning_rate": 1.839596058247473e-05, "loss": 0.9065, "step": 2316 }, { "epoch": 0.20746544294588393, "grad_norm": 1.0134212296588005, "learning_rate": 1.8394384759967943e-05, "loss": 0.8473, "step": 2317 }, { "epoch": 0.207554983490962, "grad_norm": 0.9350759401341093, "learning_rate": 1.8392808231352867e-05, "loss": 0.9388, "step": 2318 }, { "epoch": 0.20764452403604006, "grad_norm": 1.0223254248819673, "learning_rate": 1.8391230996762107e-05, "loss": 0.8575, "step": 2319 }, { "epoch": 0.20773406458111815, "grad_norm": 0.9908714217445399, "learning_rate": 1.8389653056328344e-05, "loss": 0.8585, "step": 2320 }, { "epoch": 0.20782360512619621, "grad_norm": 0.9657244175770199, "learning_rate": 1.8388074410184304e-05, "loss": 0.838, "step": 2321 }, { "epoch": 0.20791314567127428, "grad_norm": 0.8700578346757099, "learning_rate": 1.838649505846278e-05, "loss": 0.815, "step": 2322 }, { "epoch": 0.20800268621635234, "grad_norm": 0.8691703628662262, "learning_rate": 1.8384915001296623e-05, "loss": 0.8868, "step": 2323 }, { "epoch": 0.2080922267614304, "grad_norm": 0.9799360287605824, "learning_rate": 1.8383334238818738e-05, "loss": 0.882, "step": 2324 }, { "epoch": 0.20818176730650848, "grad_norm": 1.1916782650823785, "learning_rate": 1.8381752771162095e-05, "loss": 0.8824, "step": 2325 }, { "epoch": 0.20827130785158654, "grad_norm": 0.9590769507726428, "learning_rate": 1.838017059845972e-05, "loss": 0.8517, "step": 2326 }, { "epoch": 0.2083608483966646, "grad_norm": 0.903858830451462, "learning_rate": 1.8378587720844708e-05, "loss": 0.9012, "step": 2327 }, { "epoch": 0.20845038894174267, "grad_norm": 0.9416101144004887, "learning_rate": 1.8377004138450196e-05, "loss": 0.8305, "step": 2328 }, { "epoch": 0.20853992948682076, "grad_norm": 0.9675763383619337, "learning_rate": 1.8375419851409396e-05, "loss": 0.8669, "step": 2329 }, { "epoch": 0.20862947003189883, "grad_norm": 1.1956420278260163, "learning_rate": 1.8373834859855572e-05, "loss": 0.8705, "step": 2330 }, { "epoch": 0.2087190105769769, "grad_norm": 0.9254323946556332, "learning_rate": 1.8372249163922045e-05, "loss": 0.8704, "step": 2331 }, { "epoch": 0.20880855112205496, "grad_norm": 1.0849889563041322, "learning_rate": 1.83706627637422e-05, "loss": 0.8618, "step": 2332 }, { "epoch": 0.20889809166713302, "grad_norm": 0.9479872523530047, "learning_rate": 1.8369075659449486e-05, "loss": 0.8492, "step": 2333 }, { "epoch": 0.2089876322122111, "grad_norm": 1.2130042495505715, "learning_rate": 1.83674878511774e-05, "loss": 0.9536, "step": 2334 }, { "epoch": 0.20907717275728915, "grad_norm": 1.0511298363043613, "learning_rate": 1.8365899339059496e-05, "loss": 0.8939, "step": 2335 }, { "epoch": 0.20916671330236722, "grad_norm": 0.9150730778417326, "learning_rate": 1.8364310123229406e-05, "loss": 0.9726, "step": 2336 }, { "epoch": 0.20925625384744528, "grad_norm": 0.9914732870978461, "learning_rate": 1.8362720203820807e-05, "loss": 0.8335, "step": 2337 }, { "epoch": 0.20934579439252338, "grad_norm": 0.9492069698840945, "learning_rate": 1.8361129580967432e-05, "loss": 0.863, "step": 2338 }, { "epoch": 0.20943533493760144, "grad_norm": 0.9500460835425355, "learning_rate": 1.8359538254803086e-05, "loss": 0.915, "step": 2339 }, { "epoch": 0.2095248754826795, "grad_norm": 1.0338876708733444, "learning_rate": 1.8357946225461622e-05, "loss": 0.9589, "step": 2340 }, { "epoch": 0.20961441602775757, "grad_norm": 0.932578747668577, "learning_rate": 1.8356353493076958e-05, "loss": 0.9089, "step": 2341 }, { "epoch": 0.20970395657283564, "grad_norm": 0.9495835200375395, "learning_rate": 1.835476005778307e-05, "loss": 0.8113, "step": 2342 }, { "epoch": 0.2097934971179137, "grad_norm": 0.929083138985875, "learning_rate": 1.8353165919713988e-05, "loss": 0.8614, "step": 2343 }, { "epoch": 0.20988303766299177, "grad_norm": 0.9816988023833834, "learning_rate": 1.8351571079003813e-05, "loss": 0.8872, "step": 2344 }, { "epoch": 0.20997257820806983, "grad_norm": 0.9031129362987765, "learning_rate": 1.8349975535786693e-05, "loss": 0.8456, "step": 2345 }, { "epoch": 0.2100621187531479, "grad_norm": 0.9932823622276261, "learning_rate": 1.8348379290196843e-05, "loss": 0.8864, "step": 2346 }, { "epoch": 0.210151659298226, "grad_norm": 0.8910276168675275, "learning_rate": 1.834678234236853e-05, "loss": 0.8729, "step": 2347 }, { "epoch": 0.21024119984330406, "grad_norm": 0.9666084257721075, "learning_rate": 1.8345184692436087e-05, "loss": 0.9274, "step": 2348 }, { "epoch": 0.21033074038838212, "grad_norm": 0.8908680022270415, "learning_rate": 1.83435863405339e-05, "loss": 0.8732, "step": 2349 }, { "epoch": 0.21042028093346019, "grad_norm": 0.8803649263566433, "learning_rate": 1.834198728679642e-05, "loss": 0.8832, "step": 2350 }, { "epoch": 0.21050982147853825, "grad_norm": 0.9377616078307532, "learning_rate": 1.8340387531358156e-05, "loss": 0.8635, "step": 2351 }, { "epoch": 0.21059936202361632, "grad_norm": 0.8635539452490355, "learning_rate": 1.833878707435367e-05, "loss": 0.8356, "step": 2352 }, { "epoch": 0.21068890256869438, "grad_norm": 1.001493615160847, "learning_rate": 1.8337185915917587e-05, "loss": 0.8926, "step": 2353 }, { "epoch": 0.21077844311377245, "grad_norm": 0.9801629498571586, "learning_rate": 1.8335584056184597e-05, "loss": 0.9211, "step": 2354 }, { "epoch": 0.2108679836588505, "grad_norm": 0.959325240539797, "learning_rate": 1.8333981495289437e-05, "loss": 0.8779, "step": 2355 }, { "epoch": 0.2109575242039286, "grad_norm": 0.9093656483913796, "learning_rate": 1.8332378233366912e-05, "loss": 0.8038, "step": 2356 }, { "epoch": 0.21104706474900667, "grad_norm": 1.010983225526202, "learning_rate": 1.8330774270551884e-05, "loss": 0.9086, "step": 2357 }, { "epoch": 0.21113660529408473, "grad_norm": 1.1201327384173354, "learning_rate": 1.832916960697927e-05, "loss": 0.8897, "step": 2358 }, { "epoch": 0.2112261458391628, "grad_norm": 0.8354700243392361, "learning_rate": 1.832756424278405e-05, "loss": 0.8484, "step": 2359 }, { "epoch": 0.21131568638424086, "grad_norm": 0.9371706155650414, "learning_rate": 1.8325958178101266e-05, "loss": 0.8264, "step": 2360 }, { "epoch": 0.21140522692931893, "grad_norm": 1.1543065998751798, "learning_rate": 1.832435141306601e-05, "loss": 0.8042, "step": 2361 }, { "epoch": 0.211494767474397, "grad_norm": 1.1672765163503769, "learning_rate": 1.832274394781344e-05, "loss": 0.9222, "step": 2362 }, { "epoch": 0.21158430801947506, "grad_norm": 0.8967948503815383, "learning_rate": 1.8321135782478773e-05, "loss": 0.81, "step": 2363 }, { "epoch": 0.21167384856455312, "grad_norm": 0.9276379971238726, "learning_rate": 1.8319526917197278e-05, "loss": 0.8702, "step": 2364 }, { "epoch": 0.21176338910963122, "grad_norm": 1.0034197014559743, "learning_rate": 1.8317917352104287e-05, "loss": 0.8772, "step": 2365 }, { "epoch": 0.21185292965470928, "grad_norm": 0.9963851383438483, "learning_rate": 1.8316307087335197e-05, "loss": 0.9241, "step": 2366 }, { "epoch": 0.21194247019978735, "grad_norm": 1.034402821663142, "learning_rate": 1.8314696123025456e-05, "loss": 0.8914, "step": 2367 }, { "epoch": 0.2120320107448654, "grad_norm": 0.9219135549089859, "learning_rate": 1.8313084459310567e-05, "loss": 0.8905, "step": 2368 }, { "epoch": 0.21212155128994348, "grad_norm": 0.972516040517733, "learning_rate": 1.8311472096326107e-05, "loss": 0.854, "step": 2369 }, { "epoch": 0.21221109183502154, "grad_norm": 0.9782277892644399, "learning_rate": 1.83098590342077e-05, "loss": 0.9286, "step": 2370 }, { "epoch": 0.2123006323800996, "grad_norm": 0.9659890813862299, "learning_rate": 1.830824527309103e-05, "loss": 0.8964, "step": 2371 }, { "epoch": 0.21239017292517767, "grad_norm": 1.0012652565914089, "learning_rate": 1.830663081311184e-05, "loss": 0.8075, "step": 2372 }, { "epoch": 0.21247971347025574, "grad_norm": 1.119722591134955, "learning_rate": 1.8305015654405935e-05, "loss": 0.8757, "step": 2373 }, { "epoch": 0.21256925401533383, "grad_norm": 1.0607785324143888, "learning_rate": 1.8303399797109177e-05, "loss": 0.8634, "step": 2374 }, { "epoch": 0.2126587945604119, "grad_norm": 1.060619050660391, "learning_rate": 1.830178324135749e-05, "loss": 0.865, "step": 2375 }, { "epoch": 0.21274833510548996, "grad_norm": 1.0206851872511882, "learning_rate": 1.8300165987286847e-05, "loss": 0.8723, "step": 2376 }, { "epoch": 0.21283787565056803, "grad_norm": 1.089646829848144, "learning_rate": 1.829854803503329e-05, "loss": 0.8418, "step": 2377 }, { "epoch": 0.2129274161956461, "grad_norm": 0.9354182514621092, "learning_rate": 1.8296929384732912e-05, "loss": 0.827, "step": 2378 }, { "epoch": 0.21301695674072416, "grad_norm": 1.0359599840938436, "learning_rate": 1.8295310036521873e-05, "loss": 0.8735, "step": 2379 }, { "epoch": 0.21310649728580222, "grad_norm": 0.9694553494925326, "learning_rate": 1.8293689990536385e-05, "loss": 0.8843, "step": 2380 }, { "epoch": 0.2131960378308803, "grad_norm": 0.8926400234972416, "learning_rate": 1.8292069246912722e-05, "loss": 0.887, "step": 2381 }, { "epoch": 0.21328557837595835, "grad_norm": 0.9938703325044396, "learning_rate": 1.8290447805787215e-05, "loss": 0.919, "step": 2382 }, { "epoch": 0.21337511892103644, "grad_norm": 1.0778689495775973, "learning_rate": 1.8288825667296258e-05, "loss": 0.9056, "step": 2383 }, { "epoch": 0.2134646594661145, "grad_norm": 0.991945507752043, "learning_rate": 1.8287202831576292e-05, "loss": 0.8863, "step": 2384 }, { "epoch": 0.21355420001119257, "grad_norm": 1.1842987904262394, "learning_rate": 1.828557929876383e-05, "loss": 0.9336, "step": 2385 }, { "epoch": 0.21364374055627064, "grad_norm": 0.9559436610716089, "learning_rate": 1.828395506899544e-05, "loss": 0.8603, "step": 2386 }, { "epoch": 0.2137332811013487, "grad_norm": 1.02937528151375, "learning_rate": 1.8282330142407744e-05, "loss": 0.8659, "step": 2387 }, { "epoch": 0.21382282164642677, "grad_norm": 1.0355878641550564, "learning_rate": 1.8280704519137424e-05, "loss": 0.9109, "step": 2388 }, { "epoch": 0.21391236219150483, "grad_norm": 0.9117788276341109, "learning_rate": 1.8279078199321227e-05, "loss": 0.8406, "step": 2389 }, { "epoch": 0.2140019027365829, "grad_norm": 1.1824253091763357, "learning_rate": 1.8277451183095948e-05, "loss": 0.9205, "step": 2390 }, { "epoch": 0.21409144328166096, "grad_norm": 0.9267728934433077, "learning_rate": 1.8275823470598453e-05, "loss": 0.8937, "step": 2391 }, { "epoch": 0.21418098382673906, "grad_norm": 0.8973344687815957, "learning_rate": 1.8274195061965652e-05, "loss": 0.8954, "step": 2392 }, { "epoch": 0.21427052437181712, "grad_norm": 1.0108183443673422, "learning_rate": 1.8272565957334533e-05, "loss": 0.8828, "step": 2393 }, { "epoch": 0.2143600649168952, "grad_norm": 1.0290833780973254, "learning_rate": 1.8270936156842113e-05, "loss": 0.9551, "step": 2394 }, { "epoch": 0.21444960546197325, "grad_norm": 0.9823910298875254, "learning_rate": 1.8269305660625504e-05, "loss": 0.8813, "step": 2395 }, { "epoch": 0.21453914600705132, "grad_norm": 0.9138915602531238, "learning_rate": 1.8267674468821847e-05, "loss": 0.8636, "step": 2396 }, { "epoch": 0.21462868655212938, "grad_norm": 1.013238122058016, "learning_rate": 1.8266042581568355e-05, "loss": 0.8841, "step": 2397 }, { "epoch": 0.21471822709720745, "grad_norm": 0.9314746653929642, "learning_rate": 1.82644099990023e-05, "loss": 0.8543, "step": 2398 }, { "epoch": 0.2148077676422855, "grad_norm": 0.9615886037125216, "learning_rate": 1.8262776721261004e-05, "loss": 0.8606, "step": 2399 }, { "epoch": 0.21489730818736358, "grad_norm": 0.9795353347989464, "learning_rate": 1.826114274848186e-05, "loss": 0.8525, "step": 2400 }, { "epoch": 0.21498684873244167, "grad_norm": 1.0731244977395105, "learning_rate": 1.8259508080802304e-05, "loss": 0.902, "step": 2401 }, { "epoch": 0.21507638927751974, "grad_norm": 0.9160601494799316, "learning_rate": 1.8257872718359847e-05, "loss": 0.8584, "step": 2402 }, { "epoch": 0.2151659298225978, "grad_norm": 0.9227231094400433, "learning_rate": 1.8256236661292047e-05, "loss": 0.8822, "step": 2403 }, { "epoch": 0.21525547036767587, "grad_norm": 0.9434025511349725, "learning_rate": 1.825459990973652e-05, "loss": 0.8652, "step": 2404 }, { "epoch": 0.21534501091275393, "grad_norm": 0.91101944361123, "learning_rate": 1.825296246383095e-05, "loss": 0.8749, "step": 2405 }, { "epoch": 0.215434551457832, "grad_norm": 0.9038147704623257, "learning_rate": 1.8251324323713073e-05, "loss": 0.8905, "step": 2406 }, { "epoch": 0.21552409200291006, "grad_norm": 1.0522085217364545, "learning_rate": 1.824968548952068e-05, "loss": 0.9143, "step": 2407 }, { "epoch": 0.21561363254798813, "grad_norm": 0.940877365545532, "learning_rate": 1.824804596139163e-05, "loss": 0.8757, "step": 2408 }, { "epoch": 0.2157031730930662, "grad_norm": 0.9306804031553232, "learning_rate": 1.824640573946383e-05, "loss": 0.8752, "step": 2409 }, { "epoch": 0.21579271363814428, "grad_norm": 0.9429717658862485, "learning_rate": 1.824476482387525e-05, "loss": 0.9239, "step": 2410 }, { "epoch": 0.21588225418322235, "grad_norm": 0.9433277125967082, "learning_rate": 1.8243123214763924e-05, "loss": 0.9011, "step": 2411 }, { "epoch": 0.21597179472830041, "grad_norm": 0.9062348798505817, "learning_rate": 1.8241480912267932e-05, "loss": 0.9271, "step": 2412 }, { "epoch": 0.21606133527337848, "grad_norm": 1.2752312630537277, "learning_rate": 1.8239837916525423e-05, "loss": 0.8966, "step": 2413 }, { "epoch": 0.21615087581845654, "grad_norm": 0.8969866084944477, "learning_rate": 1.82381942276746e-05, "loss": 0.9156, "step": 2414 }, { "epoch": 0.2162404163635346, "grad_norm": 1.004994647556404, "learning_rate": 1.8236549845853723e-05, "loss": 0.8822, "step": 2415 }, { "epoch": 0.21632995690861268, "grad_norm": 1.107616954536359, "learning_rate": 1.8234904771201115e-05, "loss": 0.9268, "step": 2416 }, { "epoch": 0.21641949745369074, "grad_norm": 0.8615234920114818, "learning_rate": 1.8233259003855153e-05, "loss": 0.8407, "step": 2417 }, { "epoch": 0.2165090379987688, "grad_norm": 1.0041899789411581, "learning_rate": 1.8231612543954272e-05, "loss": 0.8439, "step": 2418 }, { "epoch": 0.2165985785438469, "grad_norm": 0.9616912177434204, "learning_rate": 1.822996539163697e-05, "loss": 0.9266, "step": 2419 }, { "epoch": 0.21668811908892496, "grad_norm": 0.974390662288401, "learning_rate": 1.82283175470418e-05, "loss": 0.8753, "step": 2420 }, { "epoch": 0.21677765963400303, "grad_norm": 0.931638048833105, "learning_rate": 1.8226669010307366e-05, "loss": 0.9048, "step": 2421 }, { "epoch": 0.2168672001790811, "grad_norm": 0.922031028650458, "learning_rate": 1.8225019781572348e-05, "loss": 0.8848, "step": 2422 }, { "epoch": 0.21695674072415916, "grad_norm": 1.0329411783563305, "learning_rate": 1.8223369860975466e-05, "loss": 0.8732, "step": 2423 }, { "epoch": 0.21704628126923722, "grad_norm": 0.9676490035827793, "learning_rate": 1.822171924865551e-05, "loss": 0.9131, "step": 2424 }, { "epoch": 0.2171358218143153, "grad_norm": 0.8814572021818587, "learning_rate": 1.822006794475132e-05, "loss": 0.8895, "step": 2425 }, { "epoch": 0.21722536235939335, "grad_norm": 0.9400735607711662, "learning_rate": 1.8218415949401808e-05, "loss": 0.9035, "step": 2426 }, { "epoch": 0.21731490290447142, "grad_norm": 0.8950728216513003, "learning_rate": 1.821676326274592e-05, "loss": 0.8957, "step": 2427 }, { "epoch": 0.2174044434495495, "grad_norm": 0.935220097707214, "learning_rate": 1.821510988492269e-05, "loss": 0.8545, "step": 2428 }, { "epoch": 0.21749398399462758, "grad_norm": 1.1479663454499143, "learning_rate": 1.821345581607118e-05, "loss": 0.9415, "step": 2429 }, { "epoch": 0.21758352453970564, "grad_norm": 1.0230933831638547, "learning_rate": 1.8211801056330537e-05, "loss": 0.8363, "step": 2430 }, { "epoch": 0.2176730650847837, "grad_norm": 1.0025394868693154, "learning_rate": 1.8210145605839946e-05, "loss": 0.8652, "step": 2431 }, { "epoch": 0.21776260562986177, "grad_norm": 1.2016130975252066, "learning_rate": 1.8208489464738664e-05, "loss": 0.8907, "step": 2432 }, { "epoch": 0.21785214617493984, "grad_norm": 1.000661700842076, "learning_rate": 1.8206832633165996e-05, "loss": 0.8826, "step": 2433 }, { "epoch": 0.2179416867200179, "grad_norm": 0.8823379144012643, "learning_rate": 1.820517511126131e-05, "loss": 0.8819, "step": 2434 }, { "epoch": 0.21803122726509597, "grad_norm": 1.0012576349800977, "learning_rate": 1.820351689916403e-05, "loss": 0.8218, "step": 2435 }, { "epoch": 0.21812076781017403, "grad_norm": 0.9264567782586764, "learning_rate": 1.8201857997013644e-05, "loss": 0.8521, "step": 2436 }, { "epoch": 0.21821030835525212, "grad_norm": 1.005346150299651, "learning_rate": 1.8200198404949688e-05, "loss": 0.859, "step": 2437 }, { "epoch": 0.2182998489003302, "grad_norm": 0.9123350117823129, "learning_rate": 1.819853812311177e-05, "loss": 0.8495, "step": 2438 }, { "epoch": 0.21838938944540826, "grad_norm": 1.027033039433404, "learning_rate": 1.8196877151639537e-05, "loss": 0.9043, "step": 2439 }, { "epoch": 0.21847892999048632, "grad_norm": 0.972354150722636, "learning_rate": 1.8195215490672708e-05, "loss": 0.9621, "step": 2440 }, { "epoch": 0.21856847053556439, "grad_norm": 0.927727867401141, "learning_rate": 1.819355314035106e-05, "loss": 0.8606, "step": 2441 }, { "epoch": 0.21865801108064245, "grad_norm": 1.0856583066609942, "learning_rate": 1.819189010081442e-05, "loss": 0.8233, "step": 2442 }, { "epoch": 0.21874755162572052, "grad_norm": 1.0679478390735815, "learning_rate": 1.819022637220268e-05, "loss": 0.8729, "step": 2443 }, { "epoch": 0.21883709217079858, "grad_norm": 0.9618063368678332, "learning_rate": 1.8188561954655792e-05, "loss": 0.9243, "step": 2444 }, { "epoch": 0.21892663271587665, "grad_norm": 0.9398162238441737, "learning_rate": 1.8186896848313752e-05, "loss": 0.8522, "step": 2445 }, { "epoch": 0.21901617326095474, "grad_norm": 1.0485128452469143, "learning_rate": 1.818523105331663e-05, "loss": 0.8806, "step": 2446 }, { "epoch": 0.2191057138060328, "grad_norm": 0.876430435557789, "learning_rate": 1.818356456980454e-05, "loss": 0.8728, "step": 2447 }, { "epoch": 0.21919525435111087, "grad_norm": 1.0042534113970187, "learning_rate": 1.8181897397917672e-05, "loss": 0.8558, "step": 2448 }, { "epoch": 0.21928479489618893, "grad_norm": 1.0157550710124708, "learning_rate": 1.8180229537796257e-05, "loss": 0.9105, "step": 2449 }, { "epoch": 0.219374335441267, "grad_norm": 0.9741772651910044, "learning_rate": 1.8178560989580586e-05, "loss": 0.8689, "step": 2450 }, { "epoch": 0.21946387598634506, "grad_norm": 0.9312969441118285, "learning_rate": 1.817689175341102e-05, "loss": 0.9185, "step": 2451 }, { "epoch": 0.21955341653142313, "grad_norm": 1.1572547846458558, "learning_rate": 1.8175221829427966e-05, "loss": 0.9016, "step": 2452 }, { "epoch": 0.2196429570765012, "grad_norm": 1.0217207796068877, "learning_rate": 1.817355121777189e-05, "loss": 0.9161, "step": 2453 }, { "epoch": 0.21973249762157926, "grad_norm": 0.9287415250858831, "learning_rate": 1.8171879918583322e-05, "loss": 0.9167, "step": 2454 }, { "epoch": 0.21982203816665735, "grad_norm": 0.9328585528642417, "learning_rate": 1.8170207932002844e-05, "loss": 0.8416, "step": 2455 }, { "epoch": 0.21991157871173542, "grad_norm": 0.9184961769454396, "learning_rate": 1.8168535258171102e-05, "loss": 0.8781, "step": 2456 }, { "epoch": 0.22000111925681348, "grad_norm": 0.9715845873701263, "learning_rate": 1.8166861897228788e-05, "loss": 0.8624, "step": 2457 }, { "epoch": 0.22009065980189155, "grad_norm": 0.9862747398022517, "learning_rate": 1.8165187849316668e-05, "loss": 0.8596, "step": 2458 }, { "epoch": 0.2201802003469696, "grad_norm": 0.8993864243402101, "learning_rate": 1.816351311457555e-05, "loss": 0.891, "step": 2459 }, { "epoch": 0.22026974089204768, "grad_norm": 1.062569435992692, "learning_rate": 1.8161837693146316e-05, "loss": 0.8735, "step": 2460 }, { "epoch": 0.22035928143712574, "grad_norm": 0.9205036651208217, "learning_rate": 1.816016158516989e-05, "loss": 0.8045, "step": 2461 }, { "epoch": 0.2204488219822038, "grad_norm": 1.1680790181822271, "learning_rate": 1.8158484790787265e-05, "loss": 0.8345, "step": 2462 }, { "epoch": 0.22053836252728187, "grad_norm": 0.9703330349505758, "learning_rate": 1.8156807310139482e-05, "loss": 0.8716, "step": 2463 }, { "epoch": 0.22062790307235997, "grad_norm": 1.010707673203592, "learning_rate": 1.8155129143367653e-05, "loss": 0.864, "step": 2464 }, { "epoch": 0.22071744361743803, "grad_norm": 0.8897449856767962, "learning_rate": 1.8153450290612933e-05, "loss": 0.8504, "step": 2465 }, { "epoch": 0.2208069841625161, "grad_norm": 0.8877515188492499, "learning_rate": 1.8151770752016544e-05, "loss": 0.8579, "step": 2466 }, { "epoch": 0.22089652470759416, "grad_norm": 0.9435996370183599, "learning_rate": 1.8150090527719765e-05, "loss": 0.8604, "step": 2467 }, { "epoch": 0.22098606525267223, "grad_norm": 0.8918097750761799, "learning_rate": 1.8148409617863926e-05, "loss": 0.8707, "step": 2468 }, { "epoch": 0.2210756057977503, "grad_norm": 1.1948767801438525, "learning_rate": 1.8146728022590426e-05, "loss": 0.8861, "step": 2469 }, { "epoch": 0.22116514634282836, "grad_norm": 1.285702211040259, "learning_rate": 1.8145045742040716e-05, "loss": 0.8666, "step": 2470 }, { "epoch": 0.22125468688790642, "grad_norm": 0.9614767806921428, "learning_rate": 1.8143362776356294e-05, "loss": 0.8984, "step": 2471 }, { "epoch": 0.22134422743298449, "grad_norm": 1.1699012842176681, "learning_rate": 1.8141679125678736e-05, "loss": 0.9143, "step": 2472 }, { "epoch": 0.22143376797806258, "grad_norm": 0.9472735651683517, "learning_rate": 1.813999479014966e-05, "loss": 0.8561, "step": 2473 }, { "epoch": 0.22152330852314064, "grad_norm": 0.9579782491725819, "learning_rate": 1.8138309769910747e-05, "loss": 0.8876, "step": 2474 }, { "epoch": 0.2216128490682187, "grad_norm": 1.1913490035421463, "learning_rate": 1.813662406510374e-05, "loss": 0.8718, "step": 2475 }, { "epoch": 0.22170238961329677, "grad_norm": 0.8864942915515203, "learning_rate": 1.8134937675870427e-05, "loss": 0.7986, "step": 2476 }, { "epoch": 0.22179193015837484, "grad_norm": 0.9465771328442915, "learning_rate": 1.813325060235267e-05, "loss": 0.9006, "step": 2477 }, { "epoch": 0.2218814707034529, "grad_norm": 0.9343366745027019, "learning_rate": 1.8131562844692375e-05, "loss": 0.8496, "step": 2478 }, { "epoch": 0.22197101124853097, "grad_norm": 0.8754879707723164, "learning_rate": 1.812987440303151e-05, "loss": 0.8571, "step": 2479 }, { "epoch": 0.22206055179360903, "grad_norm": 0.9822651029684982, "learning_rate": 1.8128185277512106e-05, "loss": 0.8728, "step": 2480 }, { "epoch": 0.2221500923386871, "grad_norm": 0.9125410538711639, "learning_rate": 1.8126495468276242e-05, "loss": 0.8659, "step": 2481 }, { "epoch": 0.2222396328837652, "grad_norm": 0.9934659712844545, "learning_rate": 1.812480497546606e-05, "loss": 0.923, "step": 2482 }, { "epoch": 0.22232917342884326, "grad_norm": 0.9818721535560684, "learning_rate": 1.8123113799223763e-05, "loss": 0.9276, "step": 2483 }, { "epoch": 0.22241871397392132, "grad_norm": 0.9329367212816099, "learning_rate": 1.8121421939691602e-05, "loss": 0.8995, "step": 2484 }, { "epoch": 0.2225082545189994, "grad_norm": 0.9631879446163104, "learning_rate": 1.8119729397011892e-05, "loss": 0.8651, "step": 2485 }, { "epoch": 0.22259779506407745, "grad_norm": 1.0095514931491958, "learning_rate": 1.8118036171327006e-05, "loss": 0.8314, "step": 2486 }, { "epoch": 0.22268733560915552, "grad_norm": 0.9431351183531096, "learning_rate": 1.811634226277937e-05, "loss": 0.904, "step": 2487 }, { "epoch": 0.22277687615423358, "grad_norm": 1.0513438756757167, "learning_rate": 1.8114647671511474e-05, "loss": 0.8897, "step": 2488 }, { "epoch": 0.22286641669931165, "grad_norm": 0.9002681160903409, "learning_rate": 1.8112952397665858e-05, "loss": 0.8495, "step": 2489 }, { "epoch": 0.2229559572443897, "grad_norm": 0.8906035776740626, "learning_rate": 1.8111256441385125e-05, "loss": 0.9004, "step": 2490 }, { "epoch": 0.2230454977894678, "grad_norm": 0.9453921528207156, "learning_rate": 1.810955980281193e-05, "loss": 0.9041, "step": 2491 }, { "epoch": 0.22313503833454587, "grad_norm": 0.9175716939079239, "learning_rate": 1.8107862482088994e-05, "loss": 0.8167, "step": 2492 }, { "epoch": 0.22322457887962394, "grad_norm": 0.9147878762136227, "learning_rate": 1.8106164479359083e-05, "loss": 0.8602, "step": 2493 }, { "epoch": 0.223314119424702, "grad_norm": 0.9892504733612675, "learning_rate": 1.8104465794765034e-05, "loss": 0.8904, "step": 2494 }, { "epoch": 0.22340365996978007, "grad_norm": 0.9451410722728956, "learning_rate": 1.8102766428449735e-05, "loss": 0.9189, "step": 2495 }, { "epoch": 0.22349320051485813, "grad_norm": 0.9489445999870919, "learning_rate": 1.8101066380556127e-05, "loss": 0.8843, "step": 2496 }, { "epoch": 0.2235827410599362, "grad_norm": 0.8835652795143871, "learning_rate": 1.8099365651227213e-05, "loss": 0.8384, "step": 2497 }, { "epoch": 0.22367228160501426, "grad_norm": 1.0201550048896366, "learning_rate": 1.809766424060605e-05, "loss": 0.9434, "step": 2498 }, { "epoch": 0.22376182215009233, "grad_norm": 1.0618140225731325, "learning_rate": 1.8095962148835768e-05, "loss": 0.909, "step": 2499 }, { "epoch": 0.22385136269517042, "grad_norm": 0.9767501354402025, "learning_rate": 1.8094259376059527e-05, "loss": 0.8983, "step": 2500 }, { "epoch": 0.22394090324024848, "grad_norm": 0.898521627981632, "learning_rate": 1.8092555922420564e-05, "loss": 0.8936, "step": 2501 }, { "epoch": 0.22403044378532655, "grad_norm": 1.0039108021537608, "learning_rate": 1.8090851788062167e-05, "loss": 0.8683, "step": 2502 }, { "epoch": 0.22411998433040461, "grad_norm": 0.9528426183103165, "learning_rate": 1.8089146973127688e-05, "loss": 0.8829, "step": 2503 }, { "epoch": 0.22420952487548268, "grad_norm": 0.9658704081880096, "learning_rate": 1.808744147776052e-05, "loss": 0.8321, "step": 2504 }, { "epoch": 0.22429906542056074, "grad_norm": 0.9479741464234925, "learning_rate": 1.808573530210413e-05, "loss": 0.8399, "step": 2505 }, { "epoch": 0.2243886059656388, "grad_norm": 1.000469194174804, "learning_rate": 1.808402844630204e-05, "loss": 0.8924, "step": 2506 }, { "epoch": 0.22447814651071687, "grad_norm": 0.9263997781239804, "learning_rate": 1.808232091049782e-05, "loss": 0.8764, "step": 2507 }, { "epoch": 0.22456768705579494, "grad_norm": 0.8966692868755989, "learning_rate": 1.8080612694835096e-05, "loss": 0.8927, "step": 2508 }, { "epoch": 0.22465722760087303, "grad_norm": 1.0767165332844624, "learning_rate": 1.8078903799457572e-05, "loss": 0.9091, "step": 2509 }, { "epoch": 0.2247467681459511, "grad_norm": 1.0488003354563713, "learning_rate": 1.8077194224508983e-05, "loss": 0.8488, "step": 2510 }, { "epoch": 0.22483630869102916, "grad_norm": 1.0244703484057969, "learning_rate": 1.807548397013314e-05, "loss": 0.848, "step": 2511 }, { "epoch": 0.22492584923610723, "grad_norm": 0.9580782040359516, "learning_rate": 1.80737730364739e-05, "loss": 0.8626, "step": 2512 }, { "epoch": 0.2250153897811853, "grad_norm": 0.9119457192277778, "learning_rate": 1.8072061423675183e-05, "loss": 0.8546, "step": 2513 }, { "epoch": 0.22510493032626336, "grad_norm": 1.207608081541003, "learning_rate": 1.807034913188096e-05, "loss": 0.9042, "step": 2514 }, { "epoch": 0.22519447087134142, "grad_norm": 0.9662483204068355, "learning_rate": 1.806863616123527e-05, "loss": 0.9431, "step": 2515 }, { "epoch": 0.2252840114164195, "grad_norm": 0.8922635174027994, "learning_rate": 1.8066922511882198e-05, "loss": 0.8136, "step": 2516 }, { "epoch": 0.22537355196149755, "grad_norm": 0.9582239887819564, "learning_rate": 1.8065208183965893e-05, "loss": 0.8781, "step": 2517 }, { "epoch": 0.22546309250657565, "grad_norm": 1.0434420539541909, "learning_rate": 1.8063493177630556e-05, "loss": 0.947, "step": 2518 }, { "epoch": 0.2255526330516537, "grad_norm": 0.9276314397686056, "learning_rate": 1.806177749302045e-05, "loss": 0.8937, "step": 2519 }, { "epoch": 0.22564217359673178, "grad_norm": 0.9631735141876852, "learning_rate": 1.8060061130279895e-05, "loss": 0.8675, "step": 2520 }, { "epoch": 0.22573171414180984, "grad_norm": 0.9808146663870502, "learning_rate": 1.8058344089553263e-05, "loss": 0.854, "step": 2521 }, { "epoch": 0.2258212546868879, "grad_norm": 1.1222796833599575, "learning_rate": 1.805662637098498e-05, "loss": 0.9385, "step": 2522 }, { "epoch": 0.22591079523196597, "grad_norm": 0.9275537332540574, "learning_rate": 1.8054907974719547e-05, "loss": 0.8658, "step": 2523 }, { "epoch": 0.22600033577704404, "grad_norm": 1.0645121579922552, "learning_rate": 1.80531889009015e-05, "loss": 0.8641, "step": 2524 }, { "epoch": 0.2260898763221221, "grad_norm": 0.9355536910913024, "learning_rate": 1.8051469149675448e-05, "loss": 0.8622, "step": 2525 }, { "epoch": 0.22617941686720017, "grad_norm": 0.9252707911114164, "learning_rate": 1.8049748721186046e-05, "loss": 0.8895, "step": 2526 }, { "epoch": 0.22626895741227826, "grad_norm": 1.0058931450989164, "learning_rate": 1.8048027615578018e-05, "loss": 0.8288, "step": 2527 }, { "epoch": 0.22635849795735632, "grad_norm": 1.0202690460620532, "learning_rate": 1.8046305832996128e-05, "loss": 0.8803, "step": 2528 }, { "epoch": 0.2264480385024344, "grad_norm": 1.0709501224161226, "learning_rate": 1.8044583373585213e-05, "loss": 0.8407, "step": 2529 }, { "epoch": 0.22653757904751246, "grad_norm": 0.8958129508180114, "learning_rate": 1.804286023749016e-05, "loss": 0.8152, "step": 2530 }, { "epoch": 0.22662711959259052, "grad_norm": 0.9467621134233632, "learning_rate": 1.8041136424855915e-05, "loss": 0.8865, "step": 2531 }, { "epoch": 0.22671666013766859, "grad_norm": 1.0505031675590963, "learning_rate": 1.8039411935827474e-05, "loss": 0.915, "step": 2532 }, { "epoch": 0.22680620068274665, "grad_norm": 0.933142619778036, "learning_rate": 1.8037686770549904e-05, "loss": 0.9037, "step": 2533 }, { "epoch": 0.22689574122782472, "grad_norm": 0.8432244177180609, "learning_rate": 1.803596092916831e-05, "loss": 0.8659, "step": 2534 }, { "epoch": 0.22698528177290278, "grad_norm": 1.0677966355782909, "learning_rate": 1.8034234411827874e-05, "loss": 0.8628, "step": 2535 }, { "epoch": 0.22707482231798087, "grad_norm": 1.0191266204932303, "learning_rate": 1.8032507218673817e-05, "loss": 0.9117, "step": 2536 }, { "epoch": 0.22716436286305894, "grad_norm": 1.0446812903638734, "learning_rate": 1.803077934985143e-05, "loss": 0.9206, "step": 2537 }, { "epoch": 0.227253903408137, "grad_norm": 1.0669702997515123, "learning_rate": 1.8029050805506056e-05, "loss": 0.8787, "step": 2538 }, { "epoch": 0.22734344395321507, "grad_norm": 0.9555762543954538, "learning_rate": 1.8027321585783087e-05, "loss": 0.866, "step": 2539 }, { "epoch": 0.22743298449829313, "grad_norm": 1.0120429950640326, "learning_rate": 1.802559169082799e-05, "loss": 0.8081, "step": 2540 }, { "epoch": 0.2275225250433712, "grad_norm": 0.9086390378975789, "learning_rate": 1.802386112078627e-05, "loss": 0.8899, "step": 2541 }, { "epoch": 0.22761206558844926, "grad_norm": 0.956931408650241, "learning_rate": 1.8022129875803503e-05, "loss": 0.8861, "step": 2542 }, { "epoch": 0.22770160613352733, "grad_norm": 1.0290655917010858, "learning_rate": 1.8020397956025308e-05, "loss": 0.874, "step": 2543 }, { "epoch": 0.2277911466786054, "grad_norm": 0.9992495510535807, "learning_rate": 1.8018665361597378e-05, "loss": 0.9552, "step": 2544 }, { "epoch": 0.2278806872236835, "grad_norm": 0.9033937158354753, "learning_rate": 1.8016932092665443e-05, "loss": 0.9056, "step": 2545 }, { "epoch": 0.22797022776876155, "grad_norm": 1.0437158567645224, "learning_rate": 1.801519814937531e-05, "loss": 0.8871, "step": 2546 }, { "epoch": 0.22805976831383962, "grad_norm": 0.8873756681422895, "learning_rate": 1.8013463531872826e-05, "loss": 0.9109, "step": 2547 }, { "epoch": 0.22814930885891768, "grad_norm": 1.0581781924497864, "learning_rate": 1.8011728240303907e-05, "loss": 0.9178, "step": 2548 }, { "epoch": 0.22823884940399575, "grad_norm": 1.079651431634101, "learning_rate": 1.8009992274814507e-05, "loss": 0.8827, "step": 2549 }, { "epoch": 0.2283283899490738, "grad_norm": 1.032263821563887, "learning_rate": 1.8008255635550666e-05, "loss": 0.8941, "step": 2550 }, { "epoch": 0.22841793049415188, "grad_norm": 0.9220953600277201, "learning_rate": 1.800651832265846e-05, "loss": 0.8434, "step": 2551 }, { "epoch": 0.22850747103922994, "grad_norm": 1.054601456870059, "learning_rate": 1.8004780336284016e-05, "loss": 0.9479, "step": 2552 }, { "epoch": 0.228597011584308, "grad_norm": 1.0085492112600614, "learning_rate": 1.800304167657354e-05, "loss": 0.8401, "step": 2553 }, { "epoch": 0.2286865521293861, "grad_norm": 0.9676772516323091, "learning_rate": 1.8001302343673276e-05, "loss": 0.9005, "step": 2554 }, { "epoch": 0.22877609267446417, "grad_norm": 0.8678914177267638, "learning_rate": 1.799956233772953e-05, "loss": 0.8824, "step": 2555 }, { "epoch": 0.22886563321954223, "grad_norm": 1.0702126246287937, "learning_rate": 1.799782165888867e-05, "loss": 0.8676, "step": 2556 }, { "epoch": 0.2289551737646203, "grad_norm": 1.3121248411028528, "learning_rate": 1.799608030729712e-05, "loss": 0.8896, "step": 2557 }, { "epoch": 0.22904471430969836, "grad_norm": 1.0131661199832729, "learning_rate": 1.799433828310135e-05, "loss": 0.893, "step": 2558 }, { "epoch": 0.22913425485477643, "grad_norm": 0.9043152855485418, "learning_rate": 1.799259558644789e-05, "loss": 0.8607, "step": 2559 }, { "epoch": 0.2292237953998545, "grad_norm": 1.1801178679964865, "learning_rate": 1.799085221748334e-05, "loss": 0.9057, "step": 2560 }, { "epoch": 0.22931333594493256, "grad_norm": 0.9315033449661035, "learning_rate": 1.7989108176354335e-05, "loss": 0.8864, "step": 2561 }, { "epoch": 0.22940287649001062, "grad_norm": 1.1104694345334944, "learning_rate": 1.798736346320759e-05, "loss": 0.9229, "step": 2562 }, { "epoch": 0.2294924170350887, "grad_norm": 0.9902123081478843, "learning_rate": 1.7985618078189854e-05, "loss": 0.8994, "step": 2563 }, { "epoch": 0.22958195758016678, "grad_norm": 0.8337760711564342, "learning_rate": 1.798387202144795e-05, "loss": 0.9, "step": 2564 }, { "epoch": 0.22967149812524484, "grad_norm": 0.9593759546794962, "learning_rate": 1.7982125293128752e-05, "loss": 0.8324, "step": 2565 }, { "epoch": 0.2297610386703229, "grad_norm": 1.2005911662238826, "learning_rate": 1.798037789337918e-05, "loss": 0.9079, "step": 2566 }, { "epoch": 0.22985057921540097, "grad_norm": 0.9265124694783007, "learning_rate": 1.7978629822346233e-05, "loss": 0.8878, "step": 2567 }, { "epoch": 0.22994011976047904, "grad_norm": 0.8541826945204339, "learning_rate": 1.797688108017694e-05, "loss": 0.8667, "step": 2568 }, { "epoch": 0.2300296603055571, "grad_norm": 0.9058874652277398, "learning_rate": 1.7975131667018403e-05, "loss": 0.8833, "step": 2569 }, { "epoch": 0.23011920085063517, "grad_norm": 0.9039226307305662, "learning_rate": 1.7973381583017783e-05, "loss": 0.9017, "step": 2570 }, { "epoch": 0.23020874139571323, "grad_norm": 0.9137581852621146, "learning_rate": 1.7971630828322285e-05, "loss": 0.8998, "step": 2571 }, { "epoch": 0.23029828194079133, "grad_norm": 0.9713535632572307, "learning_rate": 1.796987940307918e-05, "loss": 0.9105, "step": 2572 }, { "epoch": 0.2303878224858694, "grad_norm": 0.9935165515419422, "learning_rate": 1.796812730743579e-05, "loss": 0.8464, "step": 2573 }, { "epoch": 0.23047736303094746, "grad_norm": 0.9919506810302618, "learning_rate": 1.7966374541539497e-05, "loss": 0.9083, "step": 2574 }, { "epoch": 0.23056690357602552, "grad_norm": 1.005897362272084, "learning_rate": 1.796462110553774e-05, "loss": 0.9058, "step": 2575 }, { "epoch": 0.2306564441211036, "grad_norm": 0.9616937545104918, "learning_rate": 1.7962866999578005e-05, "loss": 0.8934, "step": 2576 }, { "epoch": 0.23074598466618165, "grad_norm": 0.9765589070992285, "learning_rate": 1.796111222380785e-05, "loss": 0.8947, "step": 2577 }, { "epoch": 0.23083552521125972, "grad_norm": 1.0531262840183424, "learning_rate": 1.7959356778374878e-05, "loss": 0.8806, "step": 2578 }, { "epoch": 0.23092506575633778, "grad_norm": 0.9276498025173632, "learning_rate": 1.7957600663426747e-05, "loss": 0.8567, "step": 2579 }, { "epoch": 0.23101460630141585, "grad_norm": 1.1162326068192843, "learning_rate": 1.7955843879111182e-05, "loss": 0.8748, "step": 2580 }, { "epoch": 0.23110414684649394, "grad_norm": 0.9172631756008399, "learning_rate": 1.795408642557596e-05, "loss": 0.8775, "step": 2581 }, { "epoch": 0.231193687391572, "grad_norm": 1.0451464888650999, "learning_rate": 1.7952328302968904e-05, "loss": 0.8192, "step": 2582 }, { "epoch": 0.23128322793665007, "grad_norm": 0.9125392466065658, "learning_rate": 1.7950569511437903e-05, "loss": 0.8945, "step": 2583 }, { "epoch": 0.23137276848172814, "grad_norm": 0.969917390519741, "learning_rate": 1.7948810051130907e-05, "loss": 0.9172, "step": 2584 }, { "epoch": 0.2314623090268062, "grad_norm": 0.9859667102899877, "learning_rate": 1.794704992219591e-05, "loss": 0.9144, "step": 2585 }, { "epoch": 0.23155184957188427, "grad_norm": 0.8949310569260401, "learning_rate": 1.7945289124780973e-05, "loss": 0.8174, "step": 2586 }, { "epoch": 0.23164139011696233, "grad_norm": 0.935230584064052, "learning_rate": 1.7943527659034204e-05, "loss": 0.8717, "step": 2587 }, { "epoch": 0.2317309306620404, "grad_norm": 0.9503475459266788, "learning_rate": 1.7941765525103777e-05, "loss": 0.8336, "step": 2588 }, { "epoch": 0.23182047120711846, "grad_norm": 0.9609321883625781, "learning_rate": 1.794000272313791e-05, "loss": 0.8623, "step": 2589 }, { "epoch": 0.23191001175219655, "grad_norm": 0.9345102490435835, "learning_rate": 1.793823925328489e-05, "loss": 0.89, "step": 2590 }, { "epoch": 0.23199955229727462, "grad_norm": 0.9438304667160562, "learning_rate": 1.7936475115693054e-05, "loss": 0.8851, "step": 2591 }, { "epoch": 0.23208909284235268, "grad_norm": 0.9828322677280299, "learning_rate": 1.793471031051079e-05, "loss": 0.85, "step": 2592 }, { "epoch": 0.23217863338743075, "grad_norm": 1.0343131063016096, "learning_rate": 1.7932944837886556e-05, "loss": 0.921, "step": 2593 }, { "epoch": 0.23226817393250881, "grad_norm": 1.0114673934588416, "learning_rate": 1.793117869796885e-05, "loss": 0.8872, "step": 2594 }, { "epoch": 0.23235771447758688, "grad_norm": 0.9868974783342381, "learning_rate": 1.7929411890906237e-05, "loss": 0.9114, "step": 2595 }, { "epoch": 0.23244725502266494, "grad_norm": 0.9016025942968532, "learning_rate": 1.7927644416847337e-05, "loss": 0.8787, "step": 2596 }, { "epoch": 0.232536795567743, "grad_norm": 0.9567110155335192, "learning_rate": 1.7925876275940822e-05, "loss": 0.8878, "step": 2597 }, { "epoch": 0.23262633611282107, "grad_norm": 0.952703649408506, "learning_rate": 1.7924107468335422e-05, "loss": 0.8732, "step": 2598 }, { "epoch": 0.23271587665789917, "grad_norm": 1.035802017035399, "learning_rate": 1.7922337994179925e-05, "loss": 0.8149, "step": 2599 }, { "epoch": 0.23280541720297723, "grad_norm": 0.95004596292956, "learning_rate": 1.792056785362317e-05, "loss": 0.8232, "step": 2600 }, { "epoch": 0.2328949577480553, "grad_norm": 0.9581760059483662, "learning_rate": 1.7918797046814065e-05, "loss": 0.9145, "step": 2601 }, { "epoch": 0.23298449829313336, "grad_norm": 1.0471642009871134, "learning_rate": 1.7917025573901552e-05, "loss": 0.8696, "step": 2602 }, { "epoch": 0.23307403883821143, "grad_norm": 0.9364149465864768, "learning_rate": 1.7915253435034647e-05, "loss": 0.8139, "step": 2603 }, { "epoch": 0.2331635793832895, "grad_norm": 0.9521297694837739, "learning_rate": 1.791348063036242e-05, "loss": 0.8289, "step": 2604 }, { "epoch": 0.23325311992836756, "grad_norm": 0.9412998392569706, "learning_rate": 1.7911707160033986e-05, "loss": 0.9158, "step": 2605 }, { "epoch": 0.23334266047344562, "grad_norm": 0.9716043680940335, "learning_rate": 1.7909933024198528e-05, "loss": 0.8187, "step": 2606 }, { "epoch": 0.2334322010185237, "grad_norm": 0.9245215907332207, "learning_rate": 1.790815822300528e-05, "loss": 0.8452, "step": 2607 }, { "epoch": 0.23352174156360178, "grad_norm": 1.0169525429323707, "learning_rate": 1.7906382756603536e-05, "loss": 0.8704, "step": 2608 }, { "epoch": 0.23361128210867985, "grad_norm": 0.9486720265347628, "learning_rate": 1.7904606625142636e-05, "loss": 0.8845, "step": 2609 }, { "epoch": 0.2337008226537579, "grad_norm": 0.9985718616356085, "learning_rate": 1.7902829828771984e-05, "loss": 0.8662, "step": 2610 }, { "epoch": 0.23379036319883598, "grad_norm": 1.0079681307614403, "learning_rate": 1.7901052367641047e-05, "loss": 0.877, "step": 2611 }, { "epoch": 0.23387990374391404, "grad_norm": 1.2877814644958372, "learning_rate": 1.7899274241899324e-05, "loss": 0.8652, "step": 2612 }, { "epoch": 0.2339694442889921, "grad_norm": 1.1685013204505492, "learning_rate": 1.7897495451696395e-05, "loss": 0.8939, "step": 2613 }, { "epoch": 0.23405898483407017, "grad_norm": 0.9271524495405291, "learning_rate": 1.7895715997181887e-05, "loss": 0.9014, "step": 2614 }, { "epoch": 0.23414852537914824, "grad_norm": 0.9605270410006715, "learning_rate": 1.7893935878505477e-05, "loss": 0.9317, "step": 2615 }, { "epoch": 0.2342380659242263, "grad_norm": 1.0457142015273801, "learning_rate": 1.7892155095816904e-05, "loss": 0.8972, "step": 2616 }, { "epoch": 0.2343276064693044, "grad_norm": 0.8897417700216302, "learning_rate": 1.7890373649265967e-05, "loss": 0.8684, "step": 2617 }, { "epoch": 0.23441714701438246, "grad_norm": 0.9863290835446341, "learning_rate": 1.7888591539002506e-05, "loss": 0.8635, "step": 2618 }, { "epoch": 0.23450668755946052, "grad_norm": 0.8763130093836499, "learning_rate": 1.7886808765176433e-05, "loss": 0.8602, "step": 2619 }, { "epoch": 0.2345962281045386, "grad_norm": 1.0029768902041363, "learning_rate": 1.7885025327937707e-05, "loss": 0.8474, "step": 2620 }, { "epoch": 0.23468576864961666, "grad_norm": 0.9938746099417956, "learning_rate": 1.7883241227436346e-05, "loss": 0.8751, "step": 2621 }, { "epoch": 0.23477530919469472, "grad_norm": 1.0819811831139574, "learning_rate": 1.7881456463822426e-05, "loss": 0.8039, "step": 2622 }, { "epoch": 0.23486484973977279, "grad_norm": 1.075059917708264, "learning_rate": 1.7879671037246063e-05, "loss": 0.904, "step": 2623 }, { "epoch": 0.23495439028485085, "grad_norm": 0.952609111176791, "learning_rate": 1.7877884947857457e-05, "loss": 0.8575, "step": 2624 }, { "epoch": 0.23504393082992892, "grad_norm": 0.981070468885405, "learning_rate": 1.787609819580684e-05, "loss": 0.8534, "step": 2625 }, { "epoch": 0.235133471375007, "grad_norm": 0.8934496249456562, "learning_rate": 1.7874310781244505e-05, "loss": 0.8485, "step": 2626 }, { "epoch": 0.23522301192008507, "grad_norm": 0.8662848566055952, "learning_rate": 1.787252270432081e-05, "loss": 0.8984, "step": 2627 }, { "epoch": 0.23531255246516314, "grad_norm": 0.9178788506593065, "learning_rate": 1.7870733965186158e-05, "loss": 0.8762, "step": 2628 }, { "epoch": 0.2354020930102412, "grad_norm": 0.9626225811255891, "learning_rate": 1.7868944563991014e-05, "loss": 0.8785, "step": 2629 }, { "epoch": 0.23549163355531927, "grad_norm": 0.8852751535574673, "learning_rate": 1.7867154500885898e-05, "loss": 0.8586, "step": 2630 }, { "epoch": 0.23558117410039733, "grad_norm": 0.9183662546585113, "learning_rate": 1.786536377602138e-05, "loss": 0.8655, "step": 2631 }, { "epoch": 0.2356707146454754, "grad_norm": 1.007747485845422, "learning_rate": 1.786357238954809e-05, "loss": 0.858, "step": 2632 }, { "epoch": 0.23576025519055346, "grad_norm": 1.0393029191828451, "learning_rate": 1.786178034161672e-05, "loss": 0.8772, "step": 2633 }, { "epoch": 0.23584979573563153, "grad_norm": 0.9257576439738907, "learning_rate": 1.7859987632378003e-05, "loss": 0.881, "step": 2634 }, { "epoch": 0.23593933628070962, "grad_norm": 0.9438131390493132, "learning_rate": 1.7858194261982742e-05, "loss": 0.9049, "step": 2635 }, { "epoch": 0.2360288768257877, "grad_norm": 0.9254056064527866, "learning_rate": 1.7856400230581786e-05, "loss": 0.8646, "step": 2636 }, { "epoch": 0.23611841737086575, "grad_norm": 1.023092547018399, "learning_rate": 1.7854605538326044e-05, "loss": 0.8676, "step": 2637 }, { "epoch": 0.23620795791594382, "grad_norm": 0.944292291619518, "learning_rate": 1.7852810185366483e-05, "loss": 0.8562, "step": 2638 }, { "epoch": 0.23629749846102188, "grad_norm": 1.0546311228616252, "learning_rate": 1.7851014171854112e-05, "loss": 0.9121, "step": 2639 }, { "epoch": 0.23638703900609995, "grad_norm": 0.9230496647553422, "learning_rate": 1.784921749794002e-05, "loss": 0.8929, "step": 2640 }, { "epoch": 0.236476579551178, "grad_norm": 0.8436094813949143, "learning_rate": 1.7847420163775327e-05, "loss": 0.8146, "step": 2641 }, { "epoch": 0.23656612009625608, "grad_norm": 0.9683885604145878, "learning_rate": 1.7845622169511223e-05, "loss": 0.8781, "step": 2642 }, { "epoch": 0.23665566064133414, "grad_norm": 1.0521803476722145, "learning_rate": 1.784382351529895e-05, "loss": 0.8454, "step": 2643 }, { "epoch": 0.23674520118641224, "grad_norm": 0.9170253910131152, "learning_rate": 1.7842024201289803e-05, "loss": 0.8689, "step": 2644 }, { "epoch": 0.2368347417314903, "grad_norm": 0.9729363223624565, "learning_rate": 1.7840224227635136e-05, "loss": 0.9019, "step": 2645 }, { "epoch": 0.23692428227656837, "grad_norm": 0.9304258924835851, "learning_rate": 1.7838423594486355e-05, "loss": 0.9293, "step": 2646 }, { "epoch": 0.23701382282164643, "grad_norm": 1.0137938753598892, "learning_rate": 1.783662230199492e-05, "loss": 0.9052, "step": 2647 }, { "epoch": 0.2371033633667245, "grad_norm": 0.9234728892236547, "learning_rate": 1.783482035031236e-05, "loss": 0.8904, "step": 2648 }, { "epoch": 0.23719290391180256, "grad_norm": 1.0609154343320197, "learning_rate": 1.7833017739590243e-05, "loss": 0.8809, "step": 2649 }, { "epoch": 0.23728244445688063, "grad_norm": 0.9869192441106394, "learning_rate": 1.7831214469980196e-05, "loss": 0.9141, "step": 2650 }, { "epoch": 0.2373719850019587, "grad_norm": 0.9238068631519115, "learning_rate": 1.782941054163391e-05, "loss": 0.8793, "step": 2651 }, { "epoch": 0.23746152554703676, "grad_norm": 1.0022334996559363, "learning_rate": 1.7827605954703126e-05, "loss": 0.9115, "step": 2652 }, { "epoch": 0.23755106609211485, "grad_norm": 0.8891432097363757, "learning_rate": 1.7825800709339632e-05, "loss": 0.8781, "step": 2653 }, { "epoch": 0.2376406066371929, "grad_norm": 0.8989902803091024, "learning_rate": 1.782399480569528e-05, "loss": 0.8425, "step": 2654 }, { "epoch": 0.23773014718227098, "grad_norm": 0.9912972011753028, "learning_rate": 1.782218824392199e-05, "loss": 0.9158, "step": 2655 }, { "epoch": 0.23781968772734904, "grad_norm": 0.983719020031523, "learning_rate": 1.7820381024171713e-05, "loss": 0.8112, "step": 2656 }, { "epoch": 0.2379092282724271, "grad_norm": 0.8918817781164265, "learning_rate": 1.7818573146596465e-05, "loss": 0.8408, "step": 2657 }, { "epoch": 0.23799876881750517, "grad_norm": 1.1084179488449928, "learning_rate": 1.7816764611348324e-05, "loss": 0.9429, "step": 2658 }, { "epoch": 0.23808830936258324, "grad_norm": 0.9568337603666967, "learning_rate": 1.781495541857942e-05, "loss": 0.9176, "step": 2659 }, { "epoch": 0.2381778499076613, "grad_norm": 0.9094113930117377, "learning_rate": 1.7813145568441927e-05, "loss": 0.8074, "step": 2660 }, { "epoch": 0.23826739045273937, "grad_norm": 0.9734006635669809, "learning_rate": 1.7811335061088093e-05, "loss": 0.7893, "step": 2661 }, { "epoch": 0.23835693099781746, "grad_norm": 0.8898647116120292, "learning_rate": 1.7809523896670205e-05, "loss": 0.8337, "step": 2662 }, { "epoch": 0.23844647154289553, "grad_norm": 0.9668526906350915, "learning_rate": 1.780771207534062e-05, "loss": 0.8143, "step": 2663 }, { "epoch": 0.2385360120879736, "grad_norm": 1.155557046212242, "learning_rate": 1.7805899597251736e-05, "loss": 0.8202, "step": 2664 }, { "epoch": 0.23862555263305166, "grad_norm": 1.0978460663297456, "learning_rate": 1.7804086462556015e-05, "loss": 0.872, "step": 2665 }, { "epoch": 0.23871509317812972, "grad_norm": 0.9788033916710924, "learning_rate": 1.7802272671405972e-05, "loss": 0.8301, "step": 2666 }, { "epoch": 0.2388046337232078, "grad_norm": 0.9976434859425, "learning_rate": 1.780045822395418e-05, "loss": 0.8621, "step": 2667 }, { "epoch": 0.23889417426828585, "grad_norm": 0.9656441610717202, "learning_rate": 1.7798643120353262e-05, "loss": 0.8737, "step": 2668 }, { "epoch": 0.23898371481336392, "grad_norm": 0.982047421264553, "learning_rate": 1.7796827360755892e-05, "loss": 0.8233, "step": 2669 }, { "epoch": 0.23907325535844198, "grad_norm": 1.0838198515924433, "learning_rate": 1.7795010945314816e-05, "loss": 0.9123, "step": 2670 }, { "epoch": 0.23916279590352008, "grad_norm": 1.0451740752492333, "learning_rate": 1.779319387418282e-05, "loss": 0.8826, "step": 2671 }, { "epoch": 0.23925233644859814, "grad_norm": 1.003395524090343, "learning_rate": 1.7791376147512754e-05, "loss": 0.8879, "step": 2672 }, { "epoch": 0.2393418769936762, "grad_norm": 0.9953550986807853, "learning_rate": 1.7789557765457514e-05, "loss": 0.8861, "step": 2673 }, { "epoch": 0.23943141753875427, "grad_norm": 1.1030472874146318, "learning_rate": 1.7787738728170057e-05, "loss": 0.8329, "step": 2674 }, { "epoch": 0.23952095808383234, "grad_norm": 0.9052115998066025, "learning_rate": 1.77859190358034e-05, "loss": 0.8419, "step": 2675 }, { "epoch": 0.2396104986289104, "grad_norm": 1.2851847215676462, "learning_rate": 1.77840986885106e-05, "loss": 0.9421, "step": 2676 }, { "epoch": 0.23970003917398847, "grad_norm": 0.8976328324702899, "learning_rate": 1.778227768644479e-05, "loss": 0.9233, "step": 2677 }, { "epoch": 0.23978957971906653, "grad_norm": 0.9928390911819834, "learning_rate": 1.778045602975914e-05, "loss": 0.8793, "step": 2678 }, { "epoch": 0.2398791202641446, "grad_norm": 1.1219352403064662, "learning_rate": 1.7778633718606882e-05, "loss": 0.8689, "step": 2679 }, { "epoch": 0.2399686608092227, "grad_norm": 1.0006621662579105, "learning_rate": 1.77768107531413e-05, "loss": 0.8128, "step": 2680 }, { "epoch": 0.24005820135430075, "grad_norm": 0.9885008535400198, "learning_rate": 1.7774987133515743e-05, "loss": 0.881, "step": 2681 }, { "epoch": 0.24014774189937882, "grad_norm": 0.9434443960493979, "learning_rate": 1.7773162859883607e-05, "loss": 0.9008, "step": 2682 }, { "epoch": 0.24023728244445688, "grad_norm": 0.9530156413917824, "learning_rate": 1.777133793239834e-05, "loss": 0.9299, "step": 2683 }, { "epoch": 0.24032682298953495, "grad_norm": 1.0222522045827696, "learning_rate": 1.776951235121345e-05, "loss": 0.8827, "step": 2684 }, { "epoch": 0.24041636353461301, "grad_norm": 0.9755682089948413, "learning_rate": 1.77676861164825e-05, "loss": 0.8843, "step": 2685 }, { "epoch": 0.24050590407969108, "grad_norm": 1.0368372394799665, "learning_rate": 1.7765859228359107e-05, "loss": 0.8657, "step": 2686 }, { "epoch": 0.24059544462476914, "grad_norm": 0.935599898442727, "learning_rate": 1.776403168699694e-05, "loss": 0.8892, "step": 2687 }, { "epoch": 0.2406849851698472, "grad_norm": 1.0852113247658912, "learning_rate": 1.776220349254973e-05, "loss": 0.9044, "step": 2688 }, { "epoch": 0.2407745257149253, "grad_norm": 0.8611597478426928, "learning_rate": 1.776037464517126e-05, "loss": 0.8814, "step": 2689 }, { "epoch": 0.24086406626000337, "grad_norm": 0.8960616729913411, "learning_rate": 1.7758545145015357e-05, "loss": 0.8411, "step": 2690 }, { "epoch": 0.24095360680508143, "grad_norm": 0.982020686778685, "learning_rate": 1.7756714992235923e-05, "loss": 0.8811, "step": 2691 }, { "epoch": 0.2410431473501595, "grad_norm": 1.0211076463363102, "learning_rate": 1.7754884186986902e-05, "loss": 0.8903, "step": 2692 }, { "epoch": 0.24113268789523756, "grad_norm": 0.9107275047766401, "learning_rate": 1.775305272942229e-05, "loss": 0.8918, "step": 2693 }, { "epoch": 0.24122222844031563, "grad_norm": 0.9017106495487175, "learning_rate": 1.775122061969615e-05, "loss": 0.8736, "step": 2694 }, { "epoch": 0.2413117689853937, "grad_norm": 1.05237763746774, "learning_rate": 1.774938785796259e-05, "loss": 0.895, "step": 2695 }, { "epoch": 0.24140130953047176, "grad_norm": 0.9921704153549351, "learning_rate": 1.7747554444375778e-05, "loss": 0.9303, "step": 2696 }, { "epoch": 0.24149085007554982, "grad_norm": 1.0619069229384261, "learning_rate": 1.774572037908993e-05, "loss": 0.8861, "step": 2697 }, { "epoch": 0.24158039062062792, "grad_norm": 0.9163750106196041, "learning_rate": 1.7743885662259327e-05, "loss": 0.8824, "step": 2698 }, { "epoch": 0.24166993116570598, "grad_norm": 1.0451213070568686, "learning_rate": 1.7742050294038296e-05, "loss": 0.8694, "step": 2699 }, { "epoch": 0.24175947171078405, "grad_norm": 1.1228707866301095, "learning_rate": 1.7740214274581225e-05, "loss": 0.8818, "step": 2700 }, { "epoch": 0.2418490122558621, "grad_norm": 0.8219860263683608, "learning_rate": 1.7738377604042552e-05, "loss": 0.8437, "step": 2701 }, { "epoch": 0.24193855280094018, "grad_norm": 0.9130427729516896, "learning_rate": 1.773654028257677e-05, "loss": 0.8817, "step": 2702 }, { "epoch": 0.24202809334601824, "grad_norm": 0.9578896782605372, "learning_rate": 1.7734702310338432e-05, "loss": 0.9219, "step": 2703 }, { "epoch": 0.2421176338910963, "grad_norm": 0.8716933894015255, "learning_rate": 1.773286368748214e-05, "loss": 0.9295, "step": 2704 }, { "epoch": 0.24220717443617437, "grad_norm": 1.0252457086825695, "learning_rate": 1.7731024414162556e-05, "loss": 0.8979, "step": 2705 }, { "epoch": 0.24229671498125244, "grad_norm": 0.9247889599034405, "learning_rate": 1.7729184490534387e-05, "loss": 0.8642, "step": 2706 }, { "epoch": 0.24238625552633053, "grad_norm": 0.9275643236004367, "learning_rate": 1.772734391675241e-05, "loss": 0.8276, "step": 2707 }, { "epoch": 0.2424757960714086, "grad_norm": 0.8467931196907335, "learning_rate": 1.772550269297144e-05, "loss": 0.8379, "step": 2708 }, { "epoch": 0.24256533661648666, "grad_norm": 1.0013710701177705, "learning_rate": 1.7723660819346362e-05, "loss": 0.9381, "step": 2709 }, { "epoch": 0.24265487716156472, "grad_norm": 0.9187642130023972, "learning_rate": 1.7721818296032102e-05, "loss": 0.8503, "step": 2710 }, { "epoch": 0.2427444177066428, "grad_norm": 1.2330183344577654, "learning_rate": 1.7719975123183654e-05, "loss": 0.8964, "step": 2711 }, { "epoch": 0.24283395825172086, "grad_norm": 0.9827652701156832, "learning_rate": 1.7718131300956055e-05, "loss": 0.8825, "step": 2712 }, { "epoch": 0.24292349879679892, "grad_norm": 1.0645073916860257, "learning_rate": 1.7716286829504397e-05, "loss": 0.8761, "step": 2713 }, { "epoch": 0.24301303934187699, "grad_norm": 0.9559215040123434, "learning_rate": 1.771444170898384e-05, "loss": 0.8963, "step": 2714 }, { "epoch": 0.24310257988695505, "grad_norm": 0.8934086109799099, "learning_rate": 1.7712595939549582e-05, "loss": 0.894, "step": 2715 }, { "epoch": 0.24319212043203314, "grad_norm": 0.9557143202988652, "learning_rate": 1.7710749521356894e-05, "loss": 0.8777, "step": 2716 }, { "epoch": 0.2432816609771112, "grad_norm": 0.9951329789554673, "learning_rate": 1.7708902454561076e-05, "loss": 0.9169, "step": 2717 }, { "epoch": 0.24337120152218927, "grad_norm": 0.9131655089319715, "learning_rate": 1.7707054739317508e-05, "loss": 0.8481, "step": 2718 }, { "epoch": 0.24346074206726734, "grad_norm": 1.0259273191981082, "learning_rate": 1.770520637578161e-05, "loss": 0.8317, "step": 2719 }, { "epoch": 0.2435502826123454, "grad_norm": 0.8934711674176932, "learning_rate": 1.7703357364108862e-05, "loss": 0.9181, "step": 2720 }, { "epoch": 0.24363982315742347, "grad_norm": 0.9518098452068476, "learning_rate": 1.7701507704454794e-05, "loss": 0.9235, "step": 2721 }, { "epoch": 0.24372936370250153, "grad_norm": 0.9615663055458201, "learning_rate": 1.7699657396974993e-05, "loss": 0.8781, "step": 2722 }, { "epoch": 0.2438189042475796, "grad_norm": 0.9672350339557478, "learning_rate": 1.7697806441825106e-05, "loss": 0.9083, "step": 2723 }, { "epoch": 0.24390844479265766, "grad_norm": 1.2235643908573797, "learning_rate": 1.769595483916083e-05, "loss": 0.8948, "step": 2724 }, { "epoch": 0.24399798533773576, "grad_norm": 0.9724308246464767, "learning_rate": 1.7694102589137903e-05, "loss": 0.8558, "step": 2725 }, { "epoch": 0.24408752588281382, "grad_norm": 0.9839020135226488, "learning_rate": 1.769224969191214e-05, "loss": 0.8888, "step": 2726 }, { "epoch": 0.2441770664278919, "grad_norm": 1.0542576330945779, "learning_rate": 1.7690396147639403e-05, "loss": 0.9123, "step": 2727 }, { "epoch": 0.24426660697296995, "grad_norm": 0.9647643797132811, "learning_rate": 1.76885419564756e-05, "loss": 0.8185, "step": 2728 }, { "epoch": 0.24435614751804802, "grad_norm": 1.0668523302626827, "learning_rate": 1.7686687118576707e-05, "loss": 0.9345, "step": 2729 }, { "epoch": 0.24444568806312608, "grad_norm": 0.9731682213180951, "learning_rate": 1.768483163409874e-05, "loss": 0.9307, "step": 2730 }, { "epoch": 0.24453522860820415, "grad_norm": 0.9926343776486147, "learning_rate": 1.7682975503197776e-05, "loss": 0.9298, "step": 2731 }, { "epoch": 0.2446247691532822, "grad_norm": 1.0835089625505936, "learning_rate": 1.7681118726029952e-05, "loss": 0.8636, "step": 2732 }, { "epoch": 0.24471430969836028, "grad_norm": 0.9527805436959033, "learning_rate": 1.7679261302751448e-05, "loss": 0.8977, "step": 2733 }, { "epoch": 0.24480385024343837, "grad_norm": 0.9390805341550253, "learning_rate": 1.767740323351851e-05, "loss": 0.8305, "step": 2734 }, { "epoch": 0.24489339078851644, "grad_norm": 0.9872127560617658, "learning_rate": 1.767554451848743e-05, "loss": 0.8969, "step": 2735 }, { "epoch": 0.2449829313335945, "grad_norm": 0.9465664465372386, "learning_rate": 1.7673685157814556e-05, "loss": 0.844, "step": 2736 }, { "epoch": 0.24507247187867257, "grad_norm": 1.0302506448451823, "learning_rate": 1.76718251516563e-05, "loss": 0.8496, "step": 2737 }, { "epoch": 0.24516201242375063, "grad_norm": 1.0238860822398237, "learning_rate": 1.7669964500169103e-05, "loss": 0.8523, "step": 2738 }, { "epoch": 0.2452515529688287, "grad_norm": 1.0750848462952611, "learning_rate": 1.7668103203509494e-05, "loss": 0.8817, "step": 2739 }, { "epoch": 0.24534109351390676, "grad_norm": 0.8955590253859774, "learning_rate": 1.7666241261834028e-05, "loss": 0.9062, "step": 2740 }, { "epoch": 0.24543063405898483, "grad_norm": 0.8707931441652753, "learning_rate": 1.7664378675299328e-05, "loss": 0.8611, "step": 2741 }, { "epoch": 0.2455201746040629, "grad_norm": 1.0799676357731698, "learning_rate": 1.766251544406207e-05, "loss": 0.9025, "step": 2742 }, { "epoch": 0.24560971514914098, "grad_norm": 0.9687187951683363, "learning_rate": 1.7660651568278983e-05, "loss": 0.8924, "step": 2743 }, { "epoch": 0.24569925569421905, "grad_norm": 0.9243104535331628, "learning_rate": 1.765878704810685e-05, "loss": 0.8382, "step": 2744 }, { "epoch": 0.2457887962392971, "grad_norm": 1.0523232525281316, "learning_rate": 1.7656921883702512e-05, "loss": 0.8809, "step": 2745 }, { "epoch": 0.24587833678437518, "grad_norm": 0.8443677969596001, "learning_rate": 1.765505607522285e-05, "loss": 0.8486, "step": 2746 }, { "epoch": 0.24596787732945324, "grad_norm": 0.926803234417444, "learning_rate": 1.765318962282482e-05, "loss": 0.8838, "step": 2747 }, { "epoch": 0.2460574178745313, "grad_norm": 0.9300929304339939, "learning_rate": 1.765132252666542e-05, "loss": 0.8935, "step": 2748 }, { "epoch": 0.24614695841960937, "grad_norm": 1.0523953372204147, "learning_rate": 1.7649454786901697e-05, "loss": 0.8673, "step": 2749 }, { "epoch": 0.24623649896468744, "grad_norm": 0.9683403739448853, "learning_rate": 1.764758640369077e-05, "loss": 0.8622, "step": 2750 }, { "epoch": 0.2463260395097655, "grad_norm": 0.9175951408387996, "learning_rate": 1.7645717377189795e-05, "loss": 0.9167, "step": 2751 }, { "epoch": 0.2464155800548436, "grad_norm": 0.9670774194552706, "learning_rate": 1.764384770755599e-05, "loss": 0.8792, "step": 2752 }, { "epoch": 0.24650512059992166, "grad_norm": 1.0342858962744663, "learning_rate": 1.7641977394946623e-05, "loss": 0.9173, "step": 2753 }, { "epoch": 0.24659466114499973, "grad_norm": 0.8829273388062568, "learning_rate": 1.7640106439519024e-05, "loss": 0.9118, "step": 2754 }, { "epoch": 0.2466842016900778, "grad_norm": 0.9602603377760228, "learning_rate": 1.7638234841430563e-05, "loss": 0.8875, "step": 2755 }, { "epoch": 0.24677374223515586, "grad_norm": 1.0239336237311405, "learning_rate": 1.763636260083868e-05, "loss": 0.8827, "step": 2756 }, { "epoch": 0.24686328278023392, "grad_norm": 0.9562132243310533, "learning_rate": 1.763448971790086e-05, "loss": 0.8791, "step": 2757 }, { "epoch": 0.246952823325312, "grad_norm": 0.9606704192471304, "learning_rate": 1.763261619277464e-05, "loss": 0.9082, "step": 2758 }, { "epoch": 0.24704236387039005, "grad_norm": 1.0006112315852194, "learning_rate": 1.7630742025617626e-05, "loss": 0.8467, "step": 2759 }, { "epoch": 0.24713190441546812, "grad_norm": 0.9373879963582208, "learning_rate": 1.7628867216587452e-05, "loss": 0.8991, "step": 2760 }, { "epoch": 0.2472214449605462, "grad_norm": 0.8571657260390055, "learning_rate": 1.7626991765841832e-05, "loss": 0.9024, "step": 2761 }, { "epoch": 0.24731098550562428, "grad_norm": 0.8855173483650962, "learning_rate": 1.762511567353852e-05, "loss": 0.8526, "step": 2762 }, { "epoch": 0.24740052605070234, "grad_norm": 0.9525417658088339, "learning_rate": 1.7623238939835322e-05, "loss": 0.8183, "step": 2763 }, { "epoch": 0.2474900665957804, "grad_norm": 0.8264905716337613, "learning_rate": 1.7621361564890108e-05, "loss": 0.8002, "step": 2764 }, { "epoch": 0.24757960714085847, "grad_norm": 0.9611381182640658, "learning_rate": 1.7619483548860792e-05, "loss": 0.8724, "step": 2765 }, { "epoch": 0.24766914768593654, "grad_norm": 1.0909350991237563, "learning_rate": 1.761760489190535e-05, "loss": 0.8594, "step": 2766 }, { "epoch": 0.2477586882310146, "grad_norm": 1.0508531917172441, "learning_rate": 1.7615725594181808e-05, "loss": 0.8874, "step": 2767 }, { "epoch": 0.24784822877609267, "grad_norm": 0.9134953625375521, "learning_rate": 1.761384565584825e-05, "loss": 0.8931, "step": 2768 }, { "epoch": 0.24793776932117073, "grad_norm": 0.9114919325780945, "learning_rate": 1.7611965077062808e-05, "loss": 0.8779, "step": 2769 }, { "epoch": 0.24802730986624882, "grad_norm": 0.9681423120707378, "learning_rate": 1.7610083857983663e-05, "loss": 0.8259, "step": 2770 }, { "epoch": 0.2481168504113269, "grad_norm": 1.0607695766166856, "learning_rate": 1.7608201998769065e-05, "loss": 0.8886, "step": 2771 }, { "epoch": 0.24820639095640495, "grad_norm": 0.9274279877402068, "learning_rate": 1.7606319499577308e-05, "loss": 0.8285, "step": 2772 }, { "epoch": 0.24829593150148302, "grad_norm": 0.9901015756754248, "learning_rate": 1.7604436360566742e-05, "loss": 0.8709, "step": 2773 }, { "epoch": 0.24838547204656108, "grad_norm": 1.0168318332719135, "learning_rate": 1.760255258189577e-05, "loss": 0.8394, "step": 2774 }, { "epoch": 0.24847501259163915, "grad_norm": 0.9561944657924711, "learning_rate": 1.760066816372285e-05, "loss": 0.8442, "step": 2775 }, { "epoch": 0.24856455313671721, "grad_norm": 0.8889753117868241, "learning_rate": 1.7598783106206488e-05, "loss": 0.8368, "step": 2776 }, { "epoch": 0.24865409368179528, "grad_norm": 0.9128333726713597, "learning_rate": 1.7596897409505257e-05, "loss": 0.8396, "step": 2777 }, { "epoch": 0.24874363422687334, "grad_norm": 0.905698393292759, "learning_rate": 1.7595011073777773e-05, "loss": 0.897, "step": 2778 }, { "epoch": 0.24883317477195144, "grad_norm": 0.9205434875374836, "learning_rate": 1.7593124099182705e-05, "loss": 0.835, "step": 2779 }, { "epoch": 0.2489227153170295, "grad_norm": 0.9069947674139621, "learning_rate": 1.7591236485878783e-05, "loss": 0.9214, "step": 2780 }, { "epoch": 0.24901225586210757, "grad_norm": 0.9607755672372462, "learning_rate": 1.7589348234024787e-05, "loss": 0.9125, "step": 2781 }, { "epoch": 0.24910179640718563, "grad_norm": 1.0495989272277562, "learning_rate": 1.7587459343779545e-05, "loss": 0.8987, "step": 2782 }, { "epoch": 0.2491913369522637, "grad_norm": 0.9867383154208691, "learning_rate": 1.758556981530195e-05, "loss": 0.8427, "step": 2783 }, { "epoch": 0.24928087749734176, "grad_norm": 0.9097319160504008, "learning_rate": 1.7583679648750945e-05, "loss": 0.8443, "step": 2784 }, { "epoch": 0.24937041804241983, "grad_norm": 0.9317617865863824, "learning_rate": 1.7581788844285513e-05, "loss": 0.8513, "step": 2785 }, { "epoch": 0.2494599585874979, "grad_norm": 1.0166778739233655, "learning_rate": 1.7579897402064716e-05, "loss": 0.8926, "step": 2786 }, { "epoch": 0.24954949913257596, "grad_norm": 0.9516827087404582, "learning_rate": 1.7578005322247648e-05, "loss": 0.9082, "step": 2787 }, { "epoch": 0.24963903967765405, "grad_norm": 0.9289996303495486, "learning_rate": 1.7576112604993468e-05, "loss": 0.8394, "step": 2788 }, { "epoch": 0.24972858022273212, "grad_norm": 0.9027645565735546, "learning_rate": 1.7574219250461385e-05, "loss": 0.8481, "step": 2789 }, { "epoch": 0.24981812076781018, "grad_norm": 0.9544300252910155, "learning_rate": 1.757232525881066e-05, "loss": 0.8086, "step": 2790 }, { "epoch": 0.24990766131288825, "grad_norm": 0.9722804704566914, "learning_rate": 1.757043063020061e-05, "loss": 0.8849, "step": 2791 }, { "epoch": 0.2499972018579663, "grad_norm": 0.9760416399715646, "learning_rate": 1.75685353647906e-05, "loss": 0.8818, "step": 2792 }, { "epoch": 0.2500867424030444, "grad_norm": 0.9172019223324674, "learning_rate": 1.7566639462740064e-05, "loss": 0.8777, "step": 2793 }, { "epoch": 0.25017628294812244, "grad_norm": 1.1990262362097253, "learning_rate": 1.7564742924208477e-05, "loss": 0.8797, "step": 2794 }, { "epoch": 0.25026582349320053, "grad_norm": 1.0423280661491978, "learning_rate": 1.756284574935536e-05, "loss": 0.9221, "step": 2795 }, { "epoch": 0.25035536403827857, "grad_norm": 0.9970916842384036, "learning_rate": 1.7560947938340306e-05, "loss": 0.8825, "step": 2796 }, { "epoch": 0.25044490458335666, "grad_norm": 1.0147323709176705, "learning_rate": 1.755904949132295e-05, "loss": 0.9022, "step": 2797 }, { "epoch": 0.2505344451284347, "grad_norm": 1.056225729992979, "learning_rate": 1.7557150408462986e-05, "loss": 0.8786, "step": 2798 }, { "epoch": 0.2506239856735128, "grad_norm": 0.9591230350067063, "learning_rate": 1.7555250689920154e-05, "loss": 0.8994, "step": 2799 }, { "epoch": 0.25071352621859083, "grad_norm": 1.0658895295115187, "learning_rate": 1.7553350335854253e-05, "loss": 0.8461, "step": 2800 }, { "epoch": 0.2508030667636689, "grad_norm": 0.9305997578989742, "learning_rate": 1.755144934642514e-05, "loss": 0.878, "step": 2801 }, { "epoch": 0.250892607308747, "grad_norm": 0.9473578769190715, "learning_rate": 1.7549547721792713e-05, "loss": 0.9143, "step": 2802 }, { "epoch": 0.25098214785382505, "grad_norm": 0.9713128629177371, "learning_rate": 1.754764546211693e-05, "loss": 0.8201, "step": 2803 }, { "epoch": 0.25107168839890315, "grad_norm": 0.9508628325420606, "learning_rate": 1.7545742567557813e-05, "loss": 0.9219, "step": 2804 }, { "epoch": 0.2511612289439812, "grad_norm": 0.9014703463077044, "learning_rate": 1.7543839038275416e-05, "loss": 0.8355, "step": 2805 }, { "epoch": 0.2512507694890593, "grad_norm": 0.8254199968080466, "learning_rate": 1.7541934874429864e-05, "loss": 0.8581, "step": 2806 }, { "epoch": 0.2513403100341373, "grad_norm": 1.050064023527621, "learning_rate": 1.754003007618133e-05, "loss": 0.8637, "step": 2807 }, { "epoch": 0.2514298505792154, "grad_norm": 1.0225103566909177, "learning_rate": 1.7538124643690033e-05, "loss": 0.832, "step": 2808 }, { "epoch": 0.25151939112429345, "grad_norm": 1.200794491385842, "learning_rate": 1.7536218577116255e-05, "loss": 0.8707, "step": 2809 }, { "epoch": 0.25160893166937154, "grad_norm": 0.9338264801135122, "learning_rate": 1.7534311876620332e-05, "loss": 0.8918, "step": 2810 }, { "epoch": 0.25169847221444963, "grad_norm": 0.9477966491362365, "learning_rate": 1.7532404542362643e-05, "loss": 0.8287, "step": 2811 }, { "epoch": 0.25178801275952767, "grad_norm": 0.9372558632118163, "learning_rate": 1.753049657450363e-05, "loss": 0.8872, "step": 2812 }, { "epoch": 0.25187755330460576, "grad_norm": 0.9314781001030886, "learning_rate": 1.7528587973203785e-05, "loss": 0.9026, "step": 2813 }, { "epoch": 0.2519670938496838, "grad_norm": 0.9408565072625145, "learning_rate": 1.7526678738623656e-05, "loss": 0.8771, "step": 2814 }, { "epoch": 0.2520566343947619, "grad_norm": 0.9750786292172663, "learning_rate": 1.7524768870923835e-05, "loss": 0.8439, "step": 2815 }, { "epoch": 0.25214617493983993, "grad_norm": 1.05985362590735, "learning_rate": 1.7522858370264976e-05, "loss": 0.8099, "step": 2816 }, { "epoch": 0.252235715484918, "grad_norm": 0.8846640969877093, "learning_rate": 1.752094723680779e-05, "loss": 0.8839, "step": 2817 }, { "epoch": 0.25232525602999606, "grad_norm": 0.9664230725840194, "learning_rate": 1.751903547071303e-05, "loss": 0.8209, "step": 2818 }, { "epoch": 0.25241479657507415, "grad_norm": 0.922721426488302, "learning_rate": 1.751712307214151e-05, "loss": 0.8952, "step": 2819 }, { "epoch": 0.25250433712015224, "grad_norm": 1.1563436484504441, "learning_rate": 1.7515210041254088e-05, "loss": 0.9158, "step": 2820 }, { "epoch": 0.2525938776652303, "grad_norm": 0.9492504085298364, "learning_rate": 1.751329637821169e-05, "loss": 0.9369, "step": 2821 }, { "epoch": 0.2526834182103084, "grad_norm": 0.8701165687089394, "learning_rate": 1.751138208317529e-05, "loss": 0.8711, "step": 2822 }, { "epoch": 0.2527729587553864, "grad_norm": 0.8358804813242242, "learning_rate": 1.75094671563059e-05, "loss": 0.9106, "step": 2823 }, { "epoch": 0.2528624993004645, "grad_norm": 1.0338531132979683, "learning_rate": 1.7507551597764603e-05, "loss": 0.8406, "step": 2824 }, { "epoch": 0.25295203984554254, "grad_norm": 0.8965129315233688, "learning_rate": 1.7505635407712533e-05, "loss": 0.8768, "step": 2825 }, { "epoch": 0.25304158039062064, "grad_norm": 0.9175218369263208, "learning_rate": 1.7503718586310872e-05, "loss": 0.8085, "step": 2826 }, { "epoch": 0.2531311209356987, "grad_norm": 0.9682162629606315, "learning_rate": 1.7501801133720856e-05, "loss": 0.8715, "step": 2827 }, { "epoch": 0.25322066148077677, "grad_norm": 0.9453568389751502, "learning_rate": 1.7499883050103773e-05, "loss": 0.8435, "step": 2828 }, { "epoch": 0.25331020202585486, "grad_norm": 0.8904804787042726, "learning_rate": 1.7497964335620965e-05, "loss": 0.8932, "step": 2829 }, { "epoch": 0.2533997425709329, "grad_norm": 0.9339773370954416, "learning_rate": 1.7496044990433833e-05, "loss": 0.8836, "step": 2830 }, { "epoch": 0.253489283116011, "grad_norm": 0.8950829887510492, "learning_rate": 1.7494125014703825e-05, "loss": 0.8429, "step": 2831 }, { "epoch": 0.253578823661089, "grad_norm": 0.9642177884708231, "learning_rate": 1.7492204408592447e-05, "loss": 0.8316, "step": 2832 }, { "epoch": 0.2536683642061671, "grad_norm": 1.1670573716889592, "learning_rate": 1.749028317226124e-05, "loss": 0.8622, "step": 2833 }, { "epoch": 0.25375790475124516, "grad_norm": 0.974798871965889, "learning_rate": 1.7488361305871827e-05, "loss": 0.9001, "step": 2834 }, { "epoch": 0.25384744529632325, "grad_norm": 0.9133881135448743, "learning_rate": 1.748643880958586e-05, "loss": 0.8976, "step": 2835 }, { "epoch": 0.2539369858414013, "grad_norm": 0.9726097684469386, "learning_rate": 1.7484515683565058e-05, "loss": 0.8545, "step": 2836 }, { "epoch": 0.2540265263864794, "grad_norm": 0.9786331045239071, "learning_rate": 1.7482591927971184e-05, "loss": 0.8382, "step": 2837 }, { "epoch": 0.25411606693155747, "grad_norm": 0.9159840509517129, "learning_rate": 1.7480667542966063e-05, "loss": 0.81, "step": 2838 }, { "epoch": 0.2542056074766355, "grad_norm": 1.0820499580325034, "learning_rate": 1.7478742528711566e-05, "loss": 0.931, "step": 2839 }, { "epoch": 0.2542951480217136, "grad_norm": 0.9686829698345041, "learning_rate": 1.747681688536962e-05, "loss": 0.88, "step": 2840 }, { "epoch": 0.25438468856679164, "grad_norm": 0.8706647913154442, "learning_rate": 1.74748906131022e-05, "loss": 0.8974, "step": 2841 }, { "epoch": 0.25447422911186973, "grad_norm": 0.9435140380959134, "learning_rate": 1.747296371207134e-05, "loss": 0.8526, "step": 2842 }, { "epoch": 0.25456376965694777, "grad_norm": 0.9422761827617258, "learning_rate": 1.7471036182439124e-05, "loss": 0.875, "step": 2843 }, { "epoch": 0.25465331020202586, "grad_norm": 0.9301740124329331, "learning_rate": 1.7469108024367688e-05, "loss": 0.8221, "step": 2844 }, { "epoch": 0.2547428507471039, "grad_norm": 0.9920298900665119, "learning_rate": 1.746717923801923e-05, "loss": 0.8856, "step": 2845 }, { "epoch": 0.254832391292182, "grad_norm": 0.9596495363831525, "learning_rate": 1.7465249823555987e-05, "loss": 0.8556, "step": 2846 }, { "epoch": 0.2549219318372601, "grad_norm": 1.0747696499442136, "learning_rate": 1.7463319781140257e-05, "loss": 0.8332, "step": 2847 }, { "epoch": 0.2550114723823381, "grad_norm": 0.9088360802474033, "learning_rate": 1.7461389110934382e-05, "loss": 0.8548, "step": 2848 }, { "epoch": 0.2551010129274162, "grad_norm": 0.983857389929394, "learning_rate": 1.7459457813100774e-05, "loss": 0.8216, "step": 2849 }, { "epoch": 0.25519055347249425, "grad_norm": 0.9563966487761592, "learning_rate": 1.745752588780188e-05, "loss": 0.8428, "step": 2850 }, { "epoch": 0.25528009401757235, "grad_norm": 1.1235664826006715, "learning_rate": 1.745559333520021e-05, "loss": 0.885, "step": 2851 }, { "epoch": 0.2553696345626504, "grad_norm": 1.0144690281417854, "learning_rate": 1.745366015545833e-05, "loss": 0.8542, "step": 2852 }, { "epoch": 0.2554591751077285, "grad_norm": 1.0757820002498766, "learning_rate": 1.745172634873884e-05, "loss": 0.9457, "step": 2853 }, { "epoch": 0.2555487156528065, "grad_norm": 0.9744657736851683, "learning_rate": 1.7449791915204418e-05, "loss": 0.8676, "step": 2854 }, { "epoch": 0.2556382561978846, "grad_norm": 0.9248408011755717, "learning_rate": 1.7447856855017773e-05, "loss": 0.8879, "step": 2855 }, { "epoch": 0.2557277967429627, "grad_norm": 0.9674868138940755, "learning_rate": 1.7445921168341682e-05, "loss": 0.867, "step": 2856 }, { "epoch": 0.25581733728804074, "grad_norm": 0.9961006464967589, "learning_rate": 1.7443984855338963e-05, "loss": 0.8686, "step": 2857 }, { "epoch": 0.25590687783311883, "grad_norm": 1.0111043281686516, "learning_rate": 1.74420479161725e-05, "loss": 0.8622, "step": 2858 }, { "epoch": 0.25599641837819687, "grad_norm": 0.9793036149431199, "learning_rate": 1.7440110351005212e-05, "loss": 0.924, "step": 2859 }, { "epoch": 0.25608595892327496, "grad_norm": 1.0487794828841637, "learning_rate": 1.7438172160000094e-05, "loss": 0.8566, "step": 2860 }, { "epoch": 0.256175499468353, "grad_norm": 0.8989867017127804, "learning_rate": 1.7436233343320168e-05, "loss": 0.8799, "step": 2861 }, { "epoch": 0.2562650400134311, "grad_norm": 0.9160656342354396, "learning_rate": 1.7434293901128528e-05, "loss": 0.8809, "step": 2862 }, { "epoch": 0.2563545805585091, "grad_norm": 0.9626963100676227, "learning_rate": 1.7432353833588306e-05, "loss": 0.9128, "step": 2863 }, { "epoch": 0.2564441211035872, "grad_norm": 0.8962621689009838, "learning_rate": 1.7430413140862705e-05, "loss": 0.9069, "step": 2864 }, { "epoch": 0.2565336616486653, "grad_norm": 0.9602712959631493, "learning_rate": 1.742847182311496e-05, "loss": 0.8805, "step": 2865 }, { "epoch": 0.25662320219374335, "grad_norm": 0.9292313928933265, "learning_rate": 1.742652988050838e-05, "loss": 0.8817, "step": 2866 }, { "epoch": 0.25671274273882144, "grad_norm": 0.9170967189453116, "learning_rate": 1.74245873132063e-05, "loss": 0.9131, "step": 2867 }, { "epoch": 0.2568022832838995, "grad_norm": 0.9782936213636454, "learning_rate": 1.742264412137213e-05, "loss": 0.9038, "step": 2868 }, { "epoch": 0.2568918238289776, "grad_norm": 0.8919900699314767, "learning_rate": 1.7420700305169326e-05, "loss": 0.8331, "step": 2869 }, { "epoch": 0.2569813643740556, "grad_norm": 1.0129344774806455, "learning_rate": 1.7418755864761398e-05, "loss": 0.8936, "step": 2870 }, { "epoch": 0.2570709049191337, "grad_norm": 0.8827368705842703, "learning_rate": 1.7416810800311897e-05, "loss": 0.8884, "step": 2871 }, { "epoch": 0.25716044546421174, "grad_norm": 1.0060270529812727, "learning_rate": 1.7414865111984443e-05, "loss": 0.8374, "step": 2872 }, { "epoch": 0.25724998600928983, "grad_norm": 0.8958033252826781, "learning_rate": 1.7412918799942696e-05, "loss": 0.8695, "step": 2873 }, { "epoch": 0.2573395265543679, "grad_norm": 0.9400351778361435, "learning_rate": 1.7410971864350377e-05, "loss": 0.9108, "step": 2874 }, { "epoch": 0.25742906709944596, "grad_norm": 0.9277355706856402, "learning_rate": 1.740902430537126e-05, "loss": 0.8914, "step": 2875 }, { "epoch": 0.25751860764452406, "grad_norm": 0.9185780304022725, "learning_rate": 1.7407076123169154e-05, "loss": 0.8697, "step": 2876 }, { "epoch": 0.2576081481896021, "grad_norm": 0.913874028195181, "learning_rate": 1.740512731790795e-05, "loss": 0.8582, "step": 2877 }, { "epoch": 0.2576976887346802, "grad_norm": 1.2109358779364472, "learning_rate": 1.740317788975156e-05, "loss": 0.9348, "step": 2878 }, { "epoch": 0.2577872292797582, "grad_norm": 0.9191780789798356, "learning_rate": 1.7401227838863978e-05, "loss": 0.8337, "step": 2879 }, { "epoch": 0.2578767698248363, "grad_norm": 1.2025305586191626, "learning_rate": 1.7399277165409222e-05, "loss": 0.923, "step": 2880 }, { "epoch": 0.25796631036991435, "grad_norm": 0.906162890261928, "learning_rate": 1.7397325869551385e-05, "loss": 0.8327, "step": 2881 }, { "epoch": 0.25805585091499245, "grad_norm": 1.14846747653532, "learning_rate": 1.7395373951454602e-05, "loss": 0.8846, "step": 2882 }, { "epoch": 0.25814539146007054, "grad_norm": 0.9653495825261464, "learning_rate": 1.7393421411283064e-05, "loss": 0.8703, "step": 2883 }, { "epoch": 0.2582349320051486, "grad_norm": 0.925176230105764, "learning_rate": 1.7391468249201007e-05, "loss": 0.8923, "step": 2884 }, { "epoch": 0.25832447255022667, "grad_norm": 0.9087061226581351, "learning_rate": 1.7389514465372727e-05, "loss": 0.8636, "step": 2885 }, { "epoch": 0.2584140130953047, "grad_norm": 0.9229533669653477, "learning_rate": 1.738756005996257e-05, "loss": 0.8246, "step": 2886 }, { "epoch": 0.2585035536403828, "grad_norm": 0.8878727127881054, "learning_rate": 1.7385605033134938e-05, "loss": 0.8828, "step": 2887 }, { "epoch": 0.25859309418546084, "grad_norm": 1.0366599559480718, "learning_rate": 1.7383649385054274e-05, "loss": 0.9319, "step": 2888 }, { "epoch": 0.25868263473053893, "grad_norm": 0.944010389695665, "learning_rate": 1.738169311588509e-05, "loss": 0.8463, "step": 2889 }, { "epoch": 0.25877217527561697, "grad_norm": 0.9789818657079927, "learning_rate": 1.737973622579193e-05, "loss": 0.9352, "step": 2890 }, { "epoch": 0.25886171582069506, "grad_norm": 0.9835968545824317, "learning_rate": 1.737777871493941e-05, "loss": 0.8759, "step": 2891 }, { "epoch": 0.25895125636577315, "grad_norm": 0.9417550040399009, "learning_rate": 1.7375820583492187e-05, "loss": 0.8874, "step": 2892 }, { "epoch": 0.2590407969108512, "grad_norm": 0.8131937864062204, "learning_rate": 1.7373861831614972e-05, "loss": 0.863, "step": 2893 }, { "epoch": 0.2591303374559293, "grad_norm": 1.0629441250182083, "learning_rate": 1.737190245947253e-05, "loss": 0.8639, "step": 2894 }, { "epoch": 0.2592198780010073, "grad_norm": 0.9206779175558812, "learning_rate": 1.736994246722967e-05, "loss": 0.8712, "step": 2895 }, { "epoch": 0.2593094185460854, "grad_norm": 1.0418122980071844, "learning_rate": 1.7367981855051275e-05, "loss": 0.8831, "step": 2896 }, { "epoch": 0.25939895909116345, "grad_norm": 0.8692206962093396, "learning_rate": 1.736602062310225e-05, "loss": 0.8781, "step": 2897 }, { "epoch": 0.25948849963624154, "grad_norm": 1.0075718214722609, "learning_rate": 1.7364058771547577e-05, "loss": 0.8534, "step": 2898 }, { "epoch": 0.2595780401813196, "grad_norm": 1.044507555111445, "learning_rate": 1.7362096300552277e-05, "loss": 0.9219, "step": 2899 }, { "epoch": 0.2596675807263977, "grad_norm": 0.9037210582476438, "learning_rate": 1.736013321028143e-05, "loss": 0.8477, "step": 2900 }, { "epoch": 0.25975712127147577, "grad_norm": 0.8474037971809412, "learning_rate": 1.7358169500900155e-05, "loss": 0.783, "step": 2901 }, { "epoch": 0.2598466618165538, "grad_norm": 0.9361117081489556, "learning_rate": 1.735620517257364e-05, "loss": 0.8512, "step": 2902 }, { "epoch": 0.2599362023616319, "grad_norm": 1.028882015887797, "learning_rate": 1.7354240225467123e-05, "loss": 0.8696, "step": 2903 }, { "epoch": 0.26002574290670993, "grad_norm": 1.0246798562346628, "learning_rate": 1.735227465974588e-05, "loss": 0.8775, "step": 2904 }, { "epoch": 0.260115283451788, "grad_norm": 0.8859030511238355, "learning_rate": 1.735030847557525e-05, "loss": 0.8695, "step": 2905 }, { "epoch": 0.26020482399686606, "grad_norm": 1.097505255342631, "learning_rate": 1.7348341673120627e-05, "loss": 0.8478, "step": 2906 }, { "epoch": 0.26029436454194416, "grad_norm": 0.939091255631859, "learning_rate": 1.734637425254745e-05, "loss": 0.8598, "step": 2907 }, { "epoch": 0.2603839050870222, "grad_norm": 0.9300800186440121, "learning_rate": 1.7344406214021207e-05, "loss": 0.8246, "step": 2908 }, { "epoch": 0.2604734456321003, "grad_norm": 0.9840642263662197, "learning_rate": 1.7342437557707448e-05, "loss": 0.8536, "step": 2909 }, { "epoch": 0.2605629861771784, "grad_norm": 1.0161836042116503, "learning_rate": 1.734046828377177e-05, "loss": 0.8091, "step": 2910 }, { "epoch": 0.2606525267222564, "grad_norm": 0.9348420316522782, "learning_rate": 1.7338498392379818e-05, "loss": 0.8278, "step": 2911 }, { "epoch": 0.2607420672673345, "grad_norm": 1.1168685771937086, "learning_rate": 1.7336527883697293e-05, "loss": 0.8914, "step": 2912 }, { "epoch": 0.26083160781241255, "grad_norm": 1.016188451451451, "learning_rate": 1.7334556757889954e-05, "loss": 0.9145, "step": 2913 }, { "epoch": 0.26092114835749064, "grad_norm": 1.1298426075186665, "learning_rate": 1.73325850151236e-05, "loss": 0.8844, "step": 2914 }, { "epoch": 0.2610106889025687, "grad_norm": 0.929745140690549, "learning_rate": 1.733061265556409e-05, "loss": 0.9054, "step": 2915 }, { "epoch": 0.26110022944764677, "grad_norm": 0.9049104122104006, "learning_rate": 1.7328639679377333e-05, "loss": 0.8732, "step": 2916 }, { "epoch": 0.2611897699927248, "grad_norm": 0.9371921983312508, "learning_rate": 1.7326666086729284e-05, "loss": 0.8548, "step": 2917 }, { "epoch": 0.2612793105378029, "grad_norm": 1.0086890987763673, "learning_rate": 1.7324691877785965e-05, "loss": 0.8979, "step": 2918 }, { "epoch": 0.261368851082881, "grad_norm": 1.0338213928207955, "learning_rate": 1.7322717052713428e-05, "loss": 0.8301, "step": 2919 }, { "epoch": 0.26145839162795903, "grad_norm": 0.9869678771055899, "learning_rate": 1.73207416116778e-05, "loss": 0.9157, "step": 2920 }, { "epoch": 0.2615479321730371, "grad_norm": 1.103589413726506, "learning_rate": 1.731876555484524e-05, "loss": 0.8806, "step": 2921 }, { "epoch": 0.26163747271811516, "grad_norm": 0.9889377914021891, "learning_rate": 1.7316788882381974e-05, "loss": 0.852, "step": 2922 }, { "epoch": 0.26172701326319325, "grad_norm": 0.8906520294524228, "learning_rate": 1.7314811594454267e-05, "loss": 0.8604, "step": 2923 }, { "epoch": 0.2618165538082713, "grad_norm": 0.9442631263458445, "learning_rate": 1.7312833691228448e-05, "loss": 0.9099, "step": 2924 }, { "epoch": 0.2619060943533494, "grad_norm": 0.9261948108668057, "learning_rate": 1.731085517287089e-05, "loss": 0.8859, "step": 2925 }, { "epoch": 0.2619956348984274, "grad_norm": 1.0949949574509237, "learning_rate": 1.730887603954802e-05, "loss": 0.8389, "step": 2926 }, { "epoch": 0.2620851754435055, "grad_norm": 0.9173996403361719, "learning_rate": 1.7306896291426308e-05, "loss": 0.8901, "step": 2927 }, { "epoch": 0.2621747159885836, "grad_norm": 1.0233456452814471, "learning_rate": 1.73049159286723e-05, "loss": 0.8699, "step": 2928 }, { "epoch": 0.26226425653366164, "grad_norm": 1.0335888197971284, "learning_rate": 1.730293495145256e-05, "loss": 0.885, "step": 2929 }, { "epoch": 0.26235379707873974, "grad_norm": 0.9667654559815505, "learning_rate": 1.7300953359933736e-05, "loss": 0.8574, "step": 2930 }, { "epoch": 0.2624433376238178, "grad_norm": 0.8917525848915481, "learning_rate": 1.7298971154282507e-05, "loss": 0.9044, "step": 2931 }, { "epoch": 0.26253287816889587, "grad_norm": 1.090827913702074, "learning_rate": 1.729698833466561e-05, "loss": 0.9278, "step": 2932 }, { "epoch": 0.2626224187139739, "grad_norm": 0.9370704714168601, "learning_rate": 1.729500490124983e-05, "loss": 0.909, "step": 2933 }, { "epoch": 0.262711959259052, "grad_norm": 1.0108821564454133, "learning_rate": 1.729302085420201e-05, "loss": 0.8496, "step": 2934 }, { "epoch": 0.26280149980413003, "grad_norm": 0.9355897500066436, "learning_rate": 1.729103619368904e-05, "loss": 0.8129, "step": 2935 }, { "epoch": 0.2628910403492081, "grad_norm": 1.1100949325352596, "learning_rate": 1.728905091987787e-05, "loss": 0.9127, "step": 2936 }, { "epoch": 0.2629805808942862, "grad_norm": 0.885538827438999, "learning_rate": 1.7287065032935487e-05, "loss": 0.8537, "step": 2937 }, { "epoch": 0.26307012143936426, "grad_norm": 0.9729968753098472, "learning_rate": 1.728507853302894e-05, "loss": 0.9047, "step": 2938 }, { "epoch": 0.26315966198444235, "grad_norm": 1.136584430193662, "learning_rate": 1.7283091420325327e-05, "loss": 0.9015, "step": 2939 }, { "epoch": 0.2632492025295204, "grad_norm": 0.9440257719518842, "learning_rate": 1.7281103694991798e-05, "loss": 0.8948, "step": 2940 }, { "epoch": 0.2633387430745985, "grad_norm": 1.014813861107895, "learning_rate": 1.727911535719555e-05, "loss": 0.8809, "step": 2941 }, { "epoch": 0.2634282836196765, "grad_norm": 1.0609261940426649, "learning_rate": 1.727712640710384e-05, "loss": 0.8766, "step": 2942 }, { "epoch": 0.2635178241647546, "grad_norm": 1.001943359828992, "learning_rate": 1.7275136844883972e-05, "loss": 0.8951, "step": 2943 }, { "epoch": 0.26360736470983265, "grad_norm": 0.994999284025049, "learning_rate": 1.7273146670703298e-05, "loss": 0.8846, "step": 2944 }, { "epoch": 0.26369690525491074, "grad_norm": 0.9999653023667543, "learning_rate": 1.727115588472923e-05, "loss": 0.8911, "step": 2945 }, { "epoch": 0.26378644579998883, "grad_norm": 0.9406300861729852, "learning_rate": 1.726916448712922e-05, "loss": 0.9293, "step": 2946 }, { "epoch": 0.26387598634506687, "grad_norm": 1.036111310231419, "learning_rate": 1.7267172478070782e-05, "loss": 0.8315, "step": 2947 }, { "epoch": 0.26396552689014496, "grad_norm": 0.9694063528492727, "learning_rate": 1.7265179857721478e-05, "loss": 0.9349, "step": 2948 }, { "epoch": 0.264055067435223, "grad_norm": 0.9251737703209791, "learning_rate": 1.726318662624892e-05, "loss": 0.8738, "step": 2949 }, { "epoch": 0.2641446079803011, "grad_norm": 0.9493334709814716, "learning_rate": 1.7261192783820774e-05, "loss": 0.8338, "step": 2950 }, { "epoch": 0.26423414852537913, "grad_norm": 0.9720537233085843, "learning_rate": 1.725919833060475e-05, "loss": 0.8873, "step": 2951 }, { "epoch": 0.2643236890704572, "grad_norm": 1.0088161400724156, "learning_rate": 1.7257203266768615e-05, "loss": 0.9017, "step": 2952 }, { "epoch": 0.26441322961553526, "grad_norm": 0.9344432955797067, "learning_rate": 1.7255207592480193e-05, "loss": 0.864, "step": 2953 }, { "epoch": 0.26450277016061335, "grad_norm": 1.1747997269649035, "learning_rate": 1.7253211307907352e-05, "loss": 0.9022, "step": 2954 }, { "epoch": 0.26459231070569145, "grad_norm": 0.9042505774813997, "learning_rate": 1.725121441321801e-05, "loss": 0.9085, "step": 2955 }, { "epoch": 0.2646818512507695, "grad_norm": 0.9039367791043005, "learning_rate": 1.724921690858014e-05, "loss": 0.8678, "step": 2956 }, { "epoch": 0.2647713917958476, "grad_norm": 0.8870713581243852, "learning_rate": 1.724721879416177e-05, "loss": 0.8792, "step": 2957 }, { "epoch": 0.2648609323409256, "grad_norm": 0.9750039275370983, "learning_rate": 1.7245220070130974e-05, "loss": 0.9134, "step": 2958 }, { "epoch": 0.2649504728860037, "grad_norm": 0.9161547172453762, "learning_rate": 1.724322073665587e-05, "loss": 0.9053, "step": 2959 }, { "epoch": 0.26504001343108174, "grad_norm": 0.9762803900951708, "learning_rate": 1.7241220793904644e-05, "loss": 0.8437, "step": 2960 }, { "epoch": 0.26512955397615984, "grad_norm": 0.9191671117177782, "learning_rate": 1.7239220242045517e-05, "loss": 0.8748, "step": 2961 }, { "epoch": 0.2652190945212379, "grad_norm": 0.9625698543385949, "learning_rate": 1.723721908124678e-05, "loss": 0.8255, "step": 2962 }, { "epoch": 0.26530863506631597, "grad_norm": 1.114722387902531, "learning_rate": 1.7235217311676758e-05, "loss": 0.8794, "step": 2963 }, { "epoch": 0.26539817561139406, "grad_norm": 0.9210928552573481, "learning_rate": 1.7233214933503828e-05, "loss": 0.829, "step": 2964 }, { "epoch": 0.2654877161564721, "grad_norm": 0.8975709810977502, "learning_rate": 1.723121194689643e-05, "loss": 0.8605, "step": 2965 }, { "epoch": 0.2655772567015502, "grad_norm": 0.9926316424499927, "learning_rate": 1.722920835202305e-05, "loss": 0.9064, "step": 2966 }, { "epoch": 0.26566679724662823, "grad_norm": 1.1510854780751034, "learning_rate": 1.7227204149052223e-05, "loss": 0.8551, "step": 2967 }, { "epoch": 0.2657563377917063, "grad_norm": 0.9576172248559482, "learning_rate": 1.722519933815253e-05, "loss": 0.8482, "step": 2968 }, { "epoch": 0.26584587833678436, "grad_norm": 0.8874076324267962, "learning_rate": 1.7223193919492613e-05, "loss": 0.9231, "step": 2969 }, { "epoch": 0.26593541888186245, "grad_norm": 0.9458157752271144, "learning_rate": 1.7221187893241164e-05, "loss": 0.844, "step": 2970 }, { "epoch": 0.2660249594269405, "grad_norm": 0.9262854908094071, "learning_rate": 1.7219181259566922e-05, "loss": 0.8618, "step": 2971 }, { "epoch": 0.2661144999720186, "grad_norm": 0.9285890536595901, "learning_rate": 1.7217174018638676e-05, "loss": 0.855, "step": 2972 }, { "epoch": 0.2662040405170967, "grad_norm": 0.972517520779482, "learning_rate": 1.721516617062527e-05, "loss": 0.8427, "step": 2973 }, { "epoch": 0.2662935810621747, "grad_norm": 0.8849042617439682, "learning_rate": 1.7213157715695602e-05, "loss": 0.8972, "step": 2974 }, { "epoch": 0.2663831216072528, "grad_norm": 1.0188266432626452, "learning_rate": 1.7211148654018604e-05, "loss": 0.9385, "step": 2975 }, { "epoch": 0.26647266215233084, "grad_norm": 1.0754291125110633, "learning_rate": 1.7209138985763288e-05, "loss": 0.8738, "step": 2976 }, { "epoch": 0.26656220269740893, "grad_norm": 1.003867980266452, "learning_rate": 1.720712871109869e-05, "loss": 0.9009, "step": 2977 }, { "epoch": 0.26665174324248697, "grad_norm": 0.8281005533493064, "learning_rate": 1.720511783019391e-05, "loss": 0.844, "step": 2978 }, { "epoch": 0.26674128378756506, "grad_norm": 1.1464497684137798, "learning_rate": 1.72031063432181e-05, "loss": 0.8933, "step": 2979 }, { "epoch": 0.2668308243326431, "grad_norm": 0.9887753071625773, "learning_rate": 1.7201094250340453e-05, "loss": 0.877, "step": 2980 }, { "epoch": 0.2669203648777212, "grad_norm": 0.8835637537803408, "learning_rate": 1.719908155173023e-05, "loss": 0.8563, "step": 2981 }, { "epoch": 0.2670099054227993, "grad_norm": 0.9479598028553524, "learning_rate": 1.719706824755672e-05, "loss": 0.8877, "step": 2982 }, { "epoch": 0.2670994459678773, "grad_norm": 0.9175059226140866, "learning_rate": 1.7195054337989285e-05, "loss": 0.9403, "step": 2983 }, { "epoch": 0.2671889865129554, "grad_norm": 1.0770342021760735, "learning_rate": 1.7193039823197324e-05, "loss": 0.8717, "step": 2984 }, { "epoch": 0.26727852705803345, "grad_norm": 0.8590796192203384, "learning_rate": 1.7191024703350295e-05, "loss": 0.8595, "step": 2985 }, { "epoch": 0.26736806760311155, "grad_norm": 0.9572147643639068, "learning_rate": 1.7189008978617702e-05, "loss": 0.8855, "step": 2986 }, { "epoch": 0.2674576081481896, "grad_norm": 0.8686870289377042, "learning_rate": 1.71869926491691e-05, "loss": 0.8581, "step": 2987 }, { "epoch": 0.2675471486932677, "grad_norm": 1.007483744761067, "learning_rate": 1.7184975715174096e-05, "loss": 0.8752, "step": 2988 }, { "epoch": 0.2676366892383457, "grad_norm": 0.9344890082206477, "learning_rate": 1.7182958176802352e-05, "loss": 0.8583, "step": 2989 }, { "epoch": 0.2677262297834238, "grad_norm": 0.9828676832282021, "learning_rate": 1.7180940034223566e-05, "loss": 0.8496, "step": 2990 }, { "epoch": 0.2678157703285019, "grad_norm": 1.06045556196744, "learning_rate": 1.717892128760751e-05, "loss": 0.8655, "step": 2991 }, { "epoch": 0.26790531087357994, "grad_norm": 0.9627821456796015, "learning_rate": 1.717690193712399e-05, "loss": 0.8977, "step": 2992 }, { "epoch": 0.26799485141865803, "grad_norm": 0.944053559039206, "learning_rate": 1.7174881982942865e-05, "loss": 0.8988, "step": 2993 }, { "epoch": 0.26808439196373607, "grad_norm": 0.9984368175835054, "learning_rate": 1.717286142523405e-05, "loss": 0.8453, "step": 2994 }, { "epoch": 0.26817393250881416, "grad_norm": 0.922179263049267, "learning_rate": 1.71708402641675e-05, "loss": 0.901, "step": 2995 }, { "epoch": 0.2682634730538922, "grad_norm": 1.0223060532248, "learning_rate": 1.716881849991324e-05, "loss": 0.8952, "step": 2996 }, { "epoch": 0.2683530135989703, "grad_norm": 0.9169053924386658, "learning_rate": 1.716679613264133e-05, "loss": 0.8601, "step": 2997 }, { "epoch": 0.26844255414404833, "grad_norm": 0.9488880944826082, "learning_rate": 1.716477316252188e-05, "loss": 0.8166, "step": 2998 }, { "epoch": 0.2685320946891264, "grad_norm": 1.0417405292844477, "learning_rate": 1.716274958972506e-05, "loss": 0.8374, "step": 2999 }, { "epoch": 0.2686216352342045, "grad_norm": 1.0182194694986495, "learning_rate": 1.7160725414421086e-05, "loss": 0.8809, "step": 3000 }, { "epoch": 0.26871117577928255, "grad_norm": 0.9467863834263519, "learning_rate": 1.715870063678023e-05, "loss": 0.8612, "step": 3001 }, { "epoch": 0.26880071632436064, "grad_norm": 0.8714370418249684, "learning_rate": 1.7156675256972797e-05, "loss": 0.8756, "step": 3002 }, { "epoch": 0.2688902568694387, "grad_norm": 0.8648193527507475, "learning_rate": 1.7154649275169167e-05, "loss": 0.8433, "step": 3003 }, { "epoch": 0.2689797974145168, "grad_norm": 0.963704783617534, "learning_rate": 1.7152622691539754e-05, "loss": 0.9282, "step": 3004 }, { "epoch": 0.2690693379595948, "grad_norm": 0.9691179898919868, "learning_rate": 1.715059550625503e-05, "loss": 0.8674, "step": 3005 }, { "epoch": 0.2691588785046729, "grad_norm": 0.9245910145770874, "learning_rate": 1.714856771948551e-05, "loss": 0.8638, "step": 3006 }, { "epoch": 0.26924841904975094, "grad_norm": 1.0444269751758923, "learning_rate": 1.714653933140177e-05, "loss": 0.8999, "step": 3007 }, { "epoch": 0.26933795959482904, "grad_norm": 0.9639871616594202, "learning_rate": 1.714451034217443e-05, "loss": 0.8979, "step": 3008 }, { "epoch": 0.26942750013990713, "grad_norm": 1.0743706570345868, "learning_rate": 1.7142480751974166e-05, "loss": 0.8879, "step": 3009 }, { "epoch": 0.26951704068498517, "grad_norm": 0.9904629185277329, "learning_rate": 1.714045056097169e-05, "loss": 0.8812, "step": 3010 }, { "epoch": 0.26960658123006326, "grad_norm": 1.005673568474522, "learning_rate": 1.7138419769337783e-05, "loss": 0.9492, "step": 3011 }, { "epoch": 0.2696961217751413, "grad_norm": 0.9719798847545754, "learning_rate": 1.713638837724327e-05, "loss": 0.8753, "step": 3012 }, { "epoch": 0.2697856623202194, "grad_norm": 0.8843175251777998, "learning_rate": 1.713435638485902e-05, "loss": 0.8578, "step": 3013 }, { "epoch": 0.2698752028652974, "grad_norm": 0.9612583039263236, "learning_rate": 1.713232379235596e-05, "loss": 0.8927, "step": 3014 }, { "epoch": 0.2699647434103755, "grad_norm": 0.990009204752591, "learning_rate": 1.7130290599905064e-05, "loss": 0.8254, "step": 3015 }, { "epoch": 0.27005428395545356, "grad_norm": 0.8656763040294388, "learning_rate": 1.7128256807677357e-05, "loss": 0.8322, "step": 3016 }, { "epoch": 0.27014382450053165, "grad_norm": 0.8575523454398983, "learning_rate": 1.712622241584392e-05, "loss": 0.8449, "step": 3017 }, { "epoch": 0.27023336504560974, "grad_norm": 0.9570602507319353, "learning_rate": 1.7124187424575874e-05, "loss": 0.8874, "step": 3018 }, { "epoch": 0.2703229055906878, "grad_norm": 1.0718856139401851, "learning_rate": 1.7122151834044398e-05, "loss": 0.7851, "step": 3019 }, { "epoch": 0.27041244613576587, "grad_norm": 0.9891111012225225, "learning_rate": 1.7120115644420715e-05, "loss": 0.8794, "step": 3020 }, { "epoch": 0.2705019866808439, "grad_norm": 1.0637752056060539, "learning_rate": 1.711807885587611e-05, "loss": 0.8702, "step": 3021 }, { "epoch": 0.270591527225922, "grad_norm": 0.8678973860819694, "learning_rate": 1.711604146858191e-05, "loss": 0.9016, "step": 3022 }, { "epoch": 0.27068106777100004, "grad_norm": 0.9133847924152949, "learning_rate": 1.7114003482709485e-05, "loss": 0.8923, "step": 3023 }, { "epoch": 0.27077060831607813, "grad_norm": 1.1361075412547286, "learning_rate": 1.711196489843027e-05, "loss": 0.8545, "step": 3024 }, { "epoch": 0.27086014886115617, "grad_norm": 0.957147657766586, "learning_rate": 1.7109925715915746e-05, "loss": 0.8984, "step": 3025 }, { "epoch": 0.27094968940623426, "grad_norm": 0.9842822921013271, "learning_rate": 1.710788593533744e-05, "loss": 0.8922, "step": 3026 }, { "epoch": 0.27103922995131235, "grad_norm": 1.0467396411551855, "learning_rate": 1.7105845556866932e-05, "loss": 0.8785, "step": 3027 }, { "epoch": 0.2711287704963904, "grad_norm": 0.9339037570821632, "learning_rate": 1.7103804580675846e-05, "loss": 0.8783, "step": 3028 }, { "epoch": 0.2712183110414685, "grad_norm": 0.9532689140019491, "learning_rate": 1.7101763006935873e-05, "loss": 0.9014, "step": 3029 }, { "epoch": 0.2713078515865465, "grad_norm": 1.0961636011208542, "learning_rate": 1.709972083581874e-05, "loss": 0.932, "step": 3030 }, { "epoch": 0.2713973921316246, "grad_norm": 1.0034877688365238, "learning_rate": 1.709767806749622e-05, "loss": 0.8375, "step": 3031 }, { "epoch": 0.27148693267670265, "grad_norm": 0.9680004256002208, "learning_rate": 1.7095634702140154e-05, "loss": 0.9148, "step": 3032 }, { "epoch": 0.27157647322178075, "grad_norm": 1.0393882284921006, "learning_rate": 1.7093590739922424e-05, "loss": 0.8703, "step": 3033 }, { "epoch": 0.2716660137668588, "grad_norm": 0.9787746862538466, "learning_rate": 1.7091546181014952e-05, "loss": 0.8287, "step": 3034 }, { "epoch": 0.2717555543119369, "grad_norm": 0.9318422087289375, "learning_rate": 1.708950102558972e-05, "loss": 0.878, "step": 3035 }, { "epoch": 0.27184509485701497, "grad_norm": 0.8817689498534514, "learning_rate": 1.7087455273818774e-05, "loss": 0.8793, "step": 3036 }, { "epoch": 0.271934635402093, "grad_norm": 0.9805970947585829, "learning_rate": 1.708540892587418e-05, "loss": 0.8464, "step": 3037 }, { "epoch": 0.2720241759471711, "grad_norm": 1.1290119685498867, "learning_rate": 1.7083361981928077e-05, "loss": 0.8707, "step": 3038 }, { "epoch": 0.27211371649224914, "grad_norm": 0.9376039190267978, "learning_rate": 1.708131444215265e-05, "loss": 0.8696, "step": 3039 }, { "epoch": 0.27220325703732723, "grad_norm": 0.9383608008638069, "learning_rate": 1.7079266306720125e-05, "loss": 0.8874, "step": 3040 }, { "epoch": 0.27229279758240527, "grad_norm": 0.8972142357320703, "learning_rate": 1.7077217575802785e-05, "loss": 0.8176, "step": 3041 }, { "epoch": 0.27238233812748336, "grad_norm": 0.9516260153267242, "learning_rate": 1.707516824957297e-05, "loss": 0.8596, "step": 3042 }, { "epoch": 0.2724718786725614, "grad_norm": 1.1498236350718762, "learning_rate": 1.7073118328203055e-05, "loss": 0.8921, "step": 3043 }, { "epoch": 0.2725614192176395, "grad_norm": 0.8980761727006406, "learning_rate": 1.7071067811865477e-05, "loss": 0.7968, "step": 3044 }, { "epoch": 0.2726509597627176, "grad_norm": 1.2016333409470468, "learning_rate": 1.7069016700732714e-05, "loss": 0.91, "step": 3045 }, { "epoch": 0.2727405003077956, "grad_norm": 0.9310197362402982, "learning_rate": 1.706696499497731e-05, "loss": 0.8727, "step": 3046 }, { "epoch": 0.2728300408528737, "grad_norm": 1.0248901546481457, "learning_rate": 1.7064912694771832e-05, "loss": 0.9117, "step": 3047 }, { "epoch": 0.27291958139795175, "grad_norm": 0.9568207149098361, "learning_rate": 1.7062859800288923e-05, "loss": 0.866, "step": 3048 }, { "epoch": 0.27300912194302984, "grad_norm": 1.0116632966060441, "learning_rate": 1.706080631170126e-05, "loss": 0.8721, "step": 3049 }, { "epoch": 0.2730986624881079, "grad_norm": 0.8971182858081596, "learning_rate": 1.7058752229181585e-05, "loss": 0.8404, "step": 3050 }, { "epoch": 0.273188203033186, "grad_norm": 0.96007596800799, "learning_rate": 1.705669755290267e-05, "loss": 0.8559, "step": 3051 }, { "epoch": 0.273277743578264, "grad_norm": 0.8911357190975704, "learning_rate": 1.7054642283037356e-05, "loss": 0.8749, "step": 3052 }, { "epoch": 0.2733672841233421, "grad_norm": 0.9376006334398593, "learning_rate": 1.705258641975852e-05, "loss": 0.8759, "step": 3053 }, { "epoch": 0.2734568246684202, "grad_norm": 1.0219150425146417, "learning_rate": 1.70505299632391e-05, "loss": 0.8268, "step": 3054 }, { "epoch": 0.27354636521349823, "grad_norm": 0.8999727313506765, "learning_rate": 1.7048472913652073e-05, "loss": 0.854, "step": 3055 }, { "epoch": 0.2736359057585763, "grad_norm": 0.8654714995750459, "learning_rate": 1.704641527117047e-05, "loss": 0.891, "step": 3056 }, { "epoch": 0.27372544630365436, "grad_norm": 0.8667614459768594, "learning_rate": 1.7044357035967382e-05, "loss": 0.8516, "step": 3057 }, { "epoch": 0.27381498684873246, "grad_norm": 0.9826709674736025, "learning_rate": 1.704229820821593e-05, "loss": 0.8523, "step": 3058 }, { "epoch": 0.2739045273938105, "grad_norm": 0.867346385767419, "learning_rate": 1.7040238788089304e-05, "loss": 0.8586, "step": 3059 }, { "epoch": 0.2739940679388886, "grad_norm": 0.964219742865869, "learning_rate": 1.7038178775760738e-05, "loss": 0.832, "step": 3060 }, { "epoch": 0.2740836084839666, "grad_norm": 0.8857639573684032, "learning_rate": 1.70361181714035e-05, "loss": 0.8714, "step": 3061 }, { "epoch": 0.2741731490290447, "grad_norm": 0.9058464580598033, "learning_rate": 1.703405697519094e-05, "loss": 0.8259, "step": 3062 }, { "epoch": 0.2742626895741228, "grad_norm": 1.0272622265971423, "learning_rate": 1.703199518729642e-05, "loss": 0.8738, "step": 3063 }, { "epoch": 0.27435223011920085, "grad_norm": 0.9512762416372345, "learning_rate": 1.7029932807893383e-05, "loss": 0.8917, "step": 3064 }, { "epoch": 0.27444177066427894, "grad_norm": 0.9845303147881898, "learning_rate": 1.702786983715531e-05, "loss": 0.9263, "step": 3065 }, { "epoch": 0.274531311209357, "grad_norm": 1.1071745281630738, "learning_rate": 1.702580627525572e-05, "loss": 0.9408, "step": 3066 }, { "epoch": 0.27462085175443507, "grad_norm": 1.1012996254153433, "learning_rate": 1.702374212236821e-05, "loss": 0.8262, "step": 3067 }, { "epoch": 0.2747103922995131, "grad_norm": 0.8572665031294724, "learning_rate": 1.7021677378666398e-05, "loss": 0.8748, "step": 3068 }, { "epoch": 0.2747999328445912, "grad_norm": 0.9098357465077431, "learning_rate": 1.7019612044323965e-05, "loss": 0.8504, "step": 3069 }, { "epoch": 0.27488947338966924, "grad_norm": 0.892031050161679, "learning_rate": 1.7017546119514645e-05, "loss": 0.8496, "step": 3070 }, { "epoch": 0.27497901393474733, "grad_norm": 0.9957505953895591, "learning_rate": 1.701547960441221e-05, "loss": 0.8411, "step": 3071 }, { "epoch": 0.2750685544798254, "grad_norm": 1.0652254095574978, "learning_rate": 1.7013412499190494e-05, "loss": 0.8878, "step": 3072 }, { "epoch": 0.27515809502490346, "grad_norm": 1.0215215181599415, "learning_rate": 1.7011344804023373e-05, "loss": 0.9094, "step": 3073 }, { "epoch": 0.27524763556998155, "grad_norm": 1.1046706587742765, "learning_rate": 1.7009276519084773e-05, "loss": 0.8987, "step": 3074 }, { "epoch": 0.2753371761150596, "grad_norm": 0.8928684793987689, "learning_rate": 1.7007207644548676e-05, "loss": 0.8692, "step": 3075 }, { "epoch": 0.2754267166601377, "grad_norm": 0.9619248627125876, "learning_rate": 1.7005138180589106e-05, "loss": 0.8816, "step": 3076 }, { "epoch": 0.2755162572052157, "grad_norm": 0.8712901864519633, "learning_rate": 1.7003068127380137e-05, "loss": 0.803, "step": 3077 }, { "epoch": 0.2756057977502938, "grad_norm": 0.8991245777475443, "learning_rate": 1.70009974850959e-05, "loss": 0.8522, "step": 3078 }, { "epoch": 0.27569533829537185, "grad_norm": 1.066699976232185, "learning_rate": 1.699892625391057e-05, "loss": 0.8327, "step": 3079 }, { "epoch": 0.27578487884044994, "grad_norm": 0.9405816531286452, "learning_rate": 1.6996854433998368e-05, "loss": 0.8219, "step": 3080 }, { "epoch": 0.27587441938552804, "grad_norm": 0.9143267340472762, "learning_rate": 1.6994782025533574e-05, "loss": 0.8814, "step": 3081 }, { "epoch": 0.2759639599306061, "grad_norm": 0.9114202102861547, "learning_rate": 1.699270902869051e-05, "loss": 0.8652, "step": 3082 }, { "epoch": 0.27605350047568417, "grad_norm": 0.8354509720765657, "learning_rate": 1.6990635443643547e-05, "loss": 0.818, "step": 3083 }, { "epoch": 0.2761430410207622, "grad_norm": 1.0405536079119269, "learning_rate": 1.6988561270567116e-05, "loss": 0.8837, "step": 3084 }, { "epoch": 0.2762325815658403, "grad_norm": 1.047074511681417, "learning_rate": 1.698648650963568e-05, "loss": 0.9219, "step": 3085 }, { "epoch": 0.27632212211091833, "grad_norm": 1.085045915828381, "learning_rate": 1.6984411161023765e-05, "loss": 0.8885, "step": 3086 }, { "epoch": 0.2764116626559964, "grad_norm": 0.9039733612137021, "learning_rate": 1.698233522490595e-05, "loss": 0.8394, "step": 3087 }, { "epoch": 0.27650120320107446, "grad_norm": 0.9875549274478735, "learning_rate": 1.6980258701456843e-05, "loss": 0.826, "step": 3088 }, { "epoch": 0.27659074374615256, "grad_norm": 0.9722767019244013, "learning_rate": 1.6978181590851123e-05, "loss": 0.9195, "step": 3089 }, { "epoch": 0.27668028429123065, "grad_norm": 0.8648326537411496, "learning_rate": 1.6976103893263507e-05, "loss": 0.8188, "step": 3090 }, { "epoch": 0.2767698248363087, "grad_norm": 0.959990445898637, "learning_rate": 1.6974025608868765e-05, "loss": 0.8886, "step": 3091 }, { "epoch": 0.2768593653813868, "grad_norm": 0.9331415629386074, "learning_rate": 1.6971946737841715e-05, "loss": 0.8691, "step": 3092 }, { "epoch": 0.2769489059264648, "grad_norm": 1.1251824690994092, "learning_rate": 1.6969867280357224e-05, "loss": 0.9197, "step": 3093 }, { "epoch": 0.2770384464715429, "grad_norm": 0.9581851646411722, "learning_rate": 1.696778723659021e-05, "loss": 0.8276, "step": 3094 }, { "epoch": 0.27712798701662095, "grad_norm": 0.9725773952360081, "learning_rate": 1.6965706606715642e-05, "loss": 0.8737, "step": 3095 }, { "epoch": 0.27721752756169904, "grad_norm": 1.0160289896436507, "learning_rate": 1.696362539090853e-05, "loss": 0.9064, "step": 3096 }, { "epoch": 0.2773070681067771, "grad_norm": 1.0309838177317907, "learning_rate": 1.6961543589343943e-05, "loss": 0.892, "step": 3097 }, { "epoch": 0.27739660865185517, "grad_norm": 1.063620923469998, "learning_rate": 1.6959461202196996e-05, "loss": 0.8997, "step": 3098 }, { "epoch": 0.27748614919693326, "grad_norm": 0.9414588229692542, "learning_rate": 1.695737822964285e-05, "loss": 0.8975, "step": 3099 }, { "epoch": 0.2775756897420113, "grad_norm": 0.9080463366377379, "learning_rate": 1.6955294671856722e-05, "loss": 0.8409, "step": 3100 }, { "epoch": 0.2776652302870894, "grad_norm": 0.9738420195506777, "learning_rate": 1.695321052901387e-05, "loss": 0.864, "step": 3101 }, { "epoch": 0.27775477083216743, "grad_norm": 1.127570733232605, "learning_rate": 1.6951125801289607e-05, "loss": 0.8843, "step": 3102 }, { "epoch": 0.2778443113772455, "grad_norm": 0.982627905859027, "learning_rate": 1.6949040488859295e-05, "loss": 0.8922, "step": 3103 }, { "epoch": 0.27793385192232356, "grad_norm": 0.9799994241542295, "learning_rate": 1.694695459189834e-05, "loss": 0.8381, "step": 3104 }, { "epoch": 0.27802339246740165, "grad_norm": 1.0105176473099975, "learning_rate": 1.69448681105822e-05, "loss": 0.8151, "step": 3105 }, { "epoch": 0.2781129330124797, "grad_norm": 0.9097175995774183, "learning_rate": 1.6942781045086393e-05, "loss": 0.8855, "step": 3106 }, { "epoch": 0.2782024735575578, "grad_norm": 1.0698651249211961, "learning_rate": 1.6940693395586463e-05, "loss": 0.849, "step": 3107 }, { "epoch": 0.2782920141026359, "grad_norm": 0.8569301023322689, "learning_rate": 1.6938605162258026e-05, "loss": 0.9053, "step": 3108 }, { "epoch": 0.2783815546477139, "grad_norm": 0.9400385040489173, "learning_rate": 1.693651634527673e-05, "loss": 0.8632, "step": 3109 }, { "epoch": 0.278471095192792, "grad_norm": 1.0149695002740913, "learning_rate": 1.6934426944818282e-05, "loss": 0.8935, "step": 3110 }, { "epoch": 0.27856063573787004, "grad_norm": 1.0552194694178423, "learning_rate": 1.693233696105844e-05, "loss": 0.8733, "step": 3111 }, { "epoch": 0.27865017628294814, "grad_norm": 1.1355533437834913, "learning_rate": 1.6930246394173004e-05, "loss": 0.8698, "step": 3112 }, { "epoch": 0.2787397168280262, "grad_norm": 1.0666585348230584, "learning_rate": 1.6928155244337823e-05, "loss": 0.9067, "step": 3113 }, { "epoch": 0.27882925737310427, "grad_norm": 0.9202935513746124, "learning_rate": 1.69260635117288e-05, "loss": 0.8713, "step": 3114 }, { "epoch": 0.2789187979181823, "grad_norm": 0.8445833839258885, "learning_rate": 1.6923971196521886e-05, "loss": 0.8701, "step": 3115 }, { "epoch": 0.2790083384632604, "grad_norm": 0.9486270967806315, "learning_rate": 1.6921878298893076e-05, "loss": 0.8768, "step": 3116 }, { "epoch": 0.2790978790083385, "grad_norm": 0.9661193519913366, "learning_rate": 1.691978481901842e-05, "loss": 0.9441, "step": 3117 }, { "epoch": 0.2791874195534165, "grad_norm": 0.9714688665814267, "learning_rate": 1.6917690757074018e-05, "loss": 0.9137, "step": 3118 }, { "epoch": 0.2792769600984946, "grad_norm": 0.9617206889423764, "learning_rate": 1.6915596113236006e-05, "loss": 0.8634, "step": 3119 }, { "epoch": 0.27936650064357266, "grad_norm": 1.0010342672971355, "learning_rate": 1.6913500887680588e-05, "loss": 0.8693, "step": 3120 }, { "epoch": 0.27945604118865075, "grad_norm": 1.7189605556600702, "learning_rate": 1.6911405080584003e-05, "loss": 0.862, "step": 3121 }, { "epoch": 0.2795455817337288, "grad_norm": 0.9527797015057667, "learning_rate": 1.690930869212255e-05, "loss": 0.8664, "step": 3122 }, { "epoch": 0.2796351222788069, "grad_norm": 0.9070381882554512, "learning_rate": 1.690721172247256e-05, "loss": 0.8324, "step": 3123 }, { "epoch": 0.2797246628238849, "grad_norm": 0.9154452725309471, "learning_rate": 1.6905114171810432e-05, "loss": 0.8702, "step": 3124 }, { "epoch": 0.279814203368963, "grad_norm": 0.9517578403734541, "learning_rate": 1.69030160403126e-05, "loss": 0.8624, "step": 3125 }, { "epoch": 0.2799037439140411, "grad_norm": 1.0221252173679627, "learning_rate": 1.6900917328155552e-05, "loss": 0.887, "step": 3126 }, { "epoch": 0.27999328445911914, "grad_norm": 0.9692741553919281, "learning_rate": 1.6898818035515825e-05, "loss": 0.8901, "step": 3127 }, { "epoch": 0.28008282500419723, "grad_norm": 0.877399179411387, "learning_rate": 1.689671816257001e-05, "loss": 0.8423, "step": 3128 }, { "epoch": 0.28017236554927527, "grad_norm": 1.0635639511469, "learning_rate": 1.6894617709494738e-05, "loss": 0.8995, "step": 3129 }, { "epoch": 0.28026190609435336, "grad_norm": 0.9578175391218497, "learning_rate": 1.6892516676466687e-05, "loss": 0.9025, "step": 3130 }, { "epoch": 0.2803514466394314, "grad_norm": 0.8925553036324171, "learning_rate": 1.6890415063662598e-05, "loss": 0.9257, "step": 3131 }, { "epoch": 0.2804409871845095, "grad_norm": 0.9436066066607104, "learning_rate": 1.6888312871259247e-05, "loss": 0.7947, "step": 3132 }, { "epoch": 0.28053052772958753, "grad_norm": 0.934782451206911, "learning_rate": 1.6886210099433464e-05, "loss": 0.8318, "step": 3133 }, { "epoch": 0.2806200682746656, "grad_norm": 0.9893400437244729, "learning_rate": 1.6884106748362126e-05, "loss": 0.8912, "step": 3134 }, { "epoch": 0.2807096088197437, "grad_norm": 0.9954061664473843, "learning_rate": 1.6882002818222166e-05, "loss": 0.9053, "step": 3135 }, { "epoch": 0.28079914936482175, "grad_norm": 1.2088354187050214, "learning_rate": 1.687989830919055e-05, "loss": 0.8329, "step": 3136 }, { "epoch": 0.28088868990989985, "grad_norm": 1.1303027412862616, "learning_rate": 1.687779322144431e-05, "loss": 0.8632, "step": 3137 }, { "epoch": 0.2809782304549779, "grad_norm": 0.9371182588200657, "learning_rate": 1.6875687555160518e-05, "loss": 0.8885, "step": 3138 }, { "epoch": 0.281067771000056, "grad_norm": 0.9134839991096796, "learning_rate": 1.6873581310516294e-05, "loss": 0.8213, "step": 3139 }, { "epoch": 0.281157311545134, "grad_norm": 0.9527635748717829, "learning_rate": 1.687147448768881e-05, "loss": 0.8964, "step": 3140 }, { "epoch": 0.2812468520902121, "grad_norm": 0.961195586870507, "learning_rate": 1.6869367086855278e-05, "loss": 0.8575, "step": 3141 }, { "epoch": 0.28133639263529014, "grad_norm": 0.9936313187569966, "learning_rate": 1.686725910819298e-05, "loss": 0.8735, "step": 3142 }, { "epoch": 0.28142593318036824, "grad_norm": 0.9229324559597707, "learning_rate": 1.686515055187922e-05, "loss": 0.8565, "step": 3143 }, { "epoch": 0.28151547372544633, "grad_norm": 1.0360846237257146, "learning_rate": 1.686304141809137e-05, "loss": 0.8534, "step": 3144 }, { "epoch": 0.28160501427052437, "grad_norm": 1.063979130548798, "learning_rate": 1.6860931707006835e-05, "loss": 0.8838, "step": 3145 }, { "epoch": 0.28169455481560246, "grad_norm": 1.018000566188955, "learning_rate": 1.6858821418803087e-05, "loss": 0.8486, "step": 3146 }, { "epoch": 0.2817840953606805, "grad_norm": 0.978518777762257, "learning_rate": 1.6856710553657633e-05, "loss": 0.8395, "step": 3147 }, { "epoch": 0.2818736359057586, "grad_norm": 0.9632930919649931, "learning_rate": 1.6854599111748027e-05, "loss": 0.8487, "step": 3148 }, { "epoch": 0.28196317645083663, "grad_norm": 0.9913011775405436, "learning_rate": 1.6852487093251883e-05, "loss": 0.9697, "step": 3149 }, { "epoch": 0.2820527169959147, "grad_norm": 0.9200299120201978, "learning_rate": 1.6850374498346857e-05, "loss": 0.8338, "step": 3150 }, { "epoch": 0.28214225754099276, "grad_norm": 1.0368439154423343, "learning_rate": 1.684826132721065e-05, "loss": 0.8935, "step": 3151 }, { "epoch": 0.28223179808607085, "grad_norm": 0.941217170401303, "learning_rate": 1.6846147580021016e-05, "loss": 0.8904, "step": 3152 }, { "epoch": 0.28232133863114894, "grad_norm": 1.0103885923103557, "learning_rate": 1.684403325695576e-05, "loss": 0.8674, "step": 3153 }, { "epoch": 0.282410879176227, "grad_norm": 2.2109009147192697, "learning_rate": 1.6841918358192725e-05, "loss": 0.8851, "step": 3154 }, { "epoch": 0.2825004197213051, "grad_norm": 1.218619033777584, "learning_rate": 1.6839802883909814e-05, "loss": 0.8428, "step": 3155 }, { "epoch": 0.2825899602663831, "grad_norm": 0.912222411542443, "learning_rate": 1.6837686834284978e-05, "loss": 0.8708, "step": 3156 }, { "epoch": 0.2826795008114612, "grad_norm": 0.9421602152747818, "learning_rate": 1.6835570209496198e-05, "loss": 0.8325, "step": 3157 }, { "epoch": 0.28276904135653924, "grad_norm": 0.9882926023918414, "learning_rate": 1.6833453009721538e-05, "loss": 0.8711, "step": 3158 }, { "epoch": 0.28285858190161733, "grad_norm": 0.9383150192373714, "learning_rate": 1.6831335235139072e-05, "loss": 0.8846, "step": 3159 }, { "epoch": 0.28294812244669537, "grad_norm": 0.9719857845357589, "learning_rate": 1.682921688592695e-05, "loss": 0.8984, "step": 3160 }, { "epoch": 0.28303766299177346, "grad_norm": 1.05405485655929, "learning_rate": 1.6827097962263355e-05, "loss": 0.8654, "step": 3161 }, { "epoch": 0.28312720353685156, "grad_norm": 0.8836666319954309, "learning_rate": 1.682497846432653e-05, "loss": 0.8313, "step": 3162 }, { "epoch": 0.2832167440819296, "grad_norm": 0.9290851947248366, "learning_rate": 1.6822858392294757e-05, "loss": 0.8965, "step": 3163 }, { "epoch": 0.2833062846270077, "grad_norm": 1.0476084424923817, "learning_rate": 1.682073774634637e-05, "loss": 0.8854, "step": 3164 }, { "epoch": 0.2833958251720857, "grad_norm": 1.0092516907196987, "learning_rate": 1.681861652665975e-05, "loss": 0.8806, "step": 3165 }, { "epoch": 0.2834853657171638, "grad_norm": 0.9410850844630412, "learning_rate": 1.6816494733413328e-05, "loss": 0.8895, "step": 3166 }, { "epoch": 0.28357490626224185, "grad_norm": 0.9700542279555697, "learning_rate": 1.6814372366785578e-05, "loss": 0.8454, "step": 3167 }, { "epoch": 0.28366444680731995, "grad_norm": 0.9189909588025574, "learning_rate": 1.6812249426955033e-05, "loss": 0.8744, "step": 3168 }, { "epoch": 0.283753987352398, "grad_norm": 0.9532373661627619, "learning_rate": 1.681012591410027e-05, "loss": 0.8888, "step": 3169 }, { "epoch": 0.2838435278974761, "grad_norm": 0.9007629322296989, "learning_rate": 1.68080018283999e-05, "loss": 0.8653, "step": 3170 }, { "epoch": 0.28393306844255417, "grad_norm": 0.9726798464553322, "learning_rate": 1.6805877170032606e-05, "loss": 0.856, "step": 3171 }, { "epoch": 0.2840226089876322, "grad_norm": 1.082492427837062, "learning_rate": 1.68037519391771e-05, "loss": 0.868, "step": 3172 }, { "epoch": 0.2841121495327103, "grad_norm": 0.9337689528416439, "learning_rate": 1.6801626136012156e-05, "loss": 0.8919, "step": 3173 }, { "epoch": 0.28420169007778834, "grad_norm": 0.9288505031886436, "learning_rate": 1.6799499760716585e-05, "loss": 0.8202, "step": 3174 }, { "epoch": 0.28429123062286643, "grad_norm": 0.9851516248059862, "learning_rate": 1.679737281346925e-05, "loss": 0.7852, "step": 3175 }, { "epoch": 0.28438077116794447, "grad_norm": 1.1260358212500687, "learning_rate": 1.6795245294449064e-05, "loss": 0.8506, "step": 3176 }, { "epoch": 0.28447031171302256, "grad_norm": 1.0816490171705435, "learning_rate": 1.679311720383499e-05, "loss": 0.9042, "step": 3177 }, { "epoch": 0.2845598522581006, "grad_norm": 0.9284832811636621, "learning_rate": 1.679098854180603e-05, "loss": 0.8699, "step": 3178 }, { "epoch": 0.2846493928031787, "grad_norm": 0.9240882171752225, "learning_rate": 1.6788859308541247e-05, "loss": 0.8503, "step": 3179 }, { "epoch": 0.2847389333482568, "grad_norm": 0.9492275023529425, "learning_rate": 1.6786729504219742e-05, "loss": 0.9595, "step": 3180 }, { "epoch": 0.2848284738933348, "grad_norm": 0.923039712222499, "learning_rate": 1.678459912902066e-05, "loss": 0.8494, "step": 3181 }, { "epoch": 0.2849180144384129, "grad_norm": 0.9552713670981043, "learning_rate": 1.6782468183123217e-05, "loss": 0.9128, "step": 3182 }, { "epoch": 0.28500755498349095, "grad_norm": 0.8506558342602634, "learning_rate": 1.678033666670665e-05, "loss": 0.8859, "step": 3183 }, { "epoch": 0.28509709552856904, "grad_norm": 0.9629396093085503, "learning_rate": 1.6778204579950258e-05, "loss": 0.8589, "step": 3184 }, { "epoch": 0.2851866360736471, "grad_norm": 1.0037891788764925, "learning_rate": 1.6776071923033383e-05, "loss": 0.9333, "step": 3185 }, { "epoch": 0.2852761766187252, "grad_norm": 0.9684746932044083, "learning_rate": 1.677393869613542e-05, "loss": 0.8025, "step": 3186 }, { "epoch": 0.2853657171638032, "grad_norm": 0.8881307168188709, "learning_rate": 1.6771804899435808e-05, "loss": 0.9054, "step": 3187 }, { "epoch": 0.2854552577088813, "grad_norm": 1.045375343901422, "learning_rate": 1.6769670533114037e-05, "loss": 0.8514, "step": 3188 }, { "epoch": 0.2855447982539594, "grad_norm": 0.9132102557526443, "learning_rate": 1.676753559734964e-05, "loss": 0.8672, "step": 3189 }, { "epoch": 0.28563433879903743, "grad_norm": 0.9442172320512818, "learning_rate": 1.67654000923222e-05, "loss": 0.892, "step": 3190 }, { "epoch": 0.28572387934411553, "grad_norm": 1.0397387459452976, "learning_rate": 1.676326401821135e-05, "loss": 0.8724, "step": 3191 }, { "epoch": 0.28581341988919357, "grad_norm": 0.9588287503179966, "learning_rate": 1.6761127375196776e-05, "loss": 0.879, "step": 3192 }, { "epoch": 0.28590296043427166, "grad_norm": 0.9817413888869693, "learning_rate": 1.6758990163458195e-05, "loss": 0.9113, "step": 3193 }, { "epoch": 0.2859925009793497, "grad_norm": 0.9699630903173093, "learning_rate": 1.675685238317539e-05, "loss": 0.8933, "step": 3194 }, { "epoch": 0.2860820415244278, "grad_norm": 0.9185487395604305, "learning_rate": 1.6754714034528176e-05, "loss": 0.8585, "step": 3195 }, { "epoch": 0.2861715820695058, "grad_norm": 0.9762093779643273, "learning_rate": 1.6752575117696435e-05, "loss": 0.8256, "step": 3196 }, { "epoch": 0.2862611226145839, "grad_norm": 1.083173533962238, "learning_rate": 1.6750435632860074e-05, "loss": 0.8921, "step": 3197 }, { "epoch": 0.286350663159662, "grad_norm": 1.0208908262826755, "learning_rate": 1.674829558019907e-05, "loss": 0.8992, "step": 3198 }, { "epoch": 0.28644020370474005, "grad_norm": 0.8953915176464201, "learning_rate": 1.6746154959893433e-05, "loss": 0.8369, "step": 3199 }, { "epoch": 0.28652974424981814, "grad_norm": 1.1272926372798986, "learning_rate": 1.674401377212322e-05, "loss": 0.8733, "step": 3200 }, { "epoch": 0.2866192847948962, "grad_norm": 1.0065015684329834, "learning_rate": 1.6741872017068547e-05, "loss": 0.9719, "step": 3201 }, { "epoch": 0.28670882533997427, "grad_norm": 1.006968106517315, "learning_rate": 1.673972969490957e-05, "loss": 0.8977, "step": 3202 }, { "epoch": 0.2867983658850523, "grad_norm": 0.885961933597078, "learning_rate": 1.6737586805826494e-05, "loss": 0.8764, "step": 3203 }, { "epoch": 0.2868879064301304, "grad_norm": 0.9880874783445704, "learning_rate": 1.673544334999957e-05, "loss": 0.9062, "step": 3204 }, { "epoch": 0.28697744697520844, "grad_norm": 1.036984376291364, "learning_rate": 1.6733299327609103e-05, "loss": 0.8396, "step": 3205 }, { "epoch": 0.28706698752028653, "grad_norm": 0.978218171581359, "learning_rate": 1.6731154738835436e-05, "loss": 0.8775, "step": 3206 }, { "epoch": 0.2871565280653646, "grad_norm": 0.9806278416433408, "learning_rate": 1.6729009583858974e-05, "loss": 0.8469, "step": 3207 }, { "epoch": 0.28724606861044266, "grad_norm": 0.864084149388989, "learning_rate": 1.6726863862860147e-05, "loss": 0.8343, "step": 3208 }, { "epoch": 0.28733560915552075, "grad_norm": 1.0425967923450619, "learning_rate": 1.6724717576019452e-05, "loss": 0.8363, "step": 3209 }, { "epoch": 0.2874251497005988, "grad_norm": 0.9728595411255289, "learning_rate": 1.672257072351743e-05, "loss": 0.9134, "step": 3210 }, { "epoch": 0.2875146902456769, "grad_norm": 0.9194735682346911, "learning_rate": 1.6720423305534667e-05, "loss": 0.8803, "step": 3211 }, { "epoch": 0.2876042307907549, "grad_norm": 0.9422105704178322, "learning_rate": 1.6718275322251795e-05, "loss": 0.8616, "step": 3212 }, { "epoch": 0.287693771335833, "grad_norm": 0.8843388795011171, "learning_rate": 1.6716126773849492e-05, "loss": 0.8538, "step": 3213 }, { "epoch": 0.28778331188091105, "grad_norm": 1.0091168634732492, "learning_rate": 1.6713977660508493e-05, "loss": 0.9557, "step": 3214 }, { "epoch": 0.28787285242598915, "grad_norm": 0.9747250743109609, "learning_rate": 1.6711827982409573e-05, "loss": 0.9586, "step": 3215 }, { "epoch": 0.28796239297106724, "grad_norm": 1.02488948442337, "learning_rate": 1.6709677739733555e-05, "loss": 0.9058, "step": 3216 }, { "epoch": 0.2880519335161453, "grad_norm": 0.9971299706746245, "learning_rate": 1.670752693266131e-05, "loss": 0.7917, "step": 3217 }, { "epoch": 0.28814147406122337, "grad_norm": 0.9436902565552915, "learning_rate": 1.670537556137376e-05, "loss": 0.9243, "step": 3218 }, { "epoch": 0.2882310146063014, "grad_norm": 1.2315407220820207, "learning_rate": 1.6703223626051866e-05, "loss": 0.8191, "step": 3219 }, { "epoch": 0.2883205551513795, "grad_norm": 0.8295536177777809, "learning_rate": 1.670107112687664e-05, "loss": 0.8437, "step": 3220 }, { "epoch": 0.28841009569645754, "grad_norm": 0.9707409648873437, "learning_rate": 1.6698918064029155e-05, "loss": 0.9168, "step": 3221 }, { "epoch": 0.28849963624153563, "grad_norm": 0.9772613410653564, "learning_rate": 1.669676443769051e-05, "loss": 0.8333, "step": 3222 }, { "epoch": 0.28858917678661367, "grad_norm": 0.959326622044151, "learning_rate": 1.6694610248041864e-05, "loss": 0.8894, "step": 3223 }, { "epoch": 0.28867871733169176, "grad_norm": 0.9642767461677191, "learning_rate": 1.6692455495264413e-05, "loss": 0.9108, "step": 3224 }, { "epoch": 0.28876825787676985, "grad_norm": 1.0548427345861626, "learning_rate": 1.6690300179539423e-05, "loss": 0.8976, "step": 3225 }, { "epoch": 0.2888577984218479, "grad_norm": 0.9521922112892662, "learning_rate": 1.668814430104818e-05, "loss": 0.8426, "step": 3226 }, { "epoch": 0.288947338966926, "grad_norm": 0.9259928196967661, "learning_rate": 1.6685987859972033e-05, "loss": 0.8619, "step": 3227 }, { "epoch": 0.289036879512004, "grad_norm": 0.9314846439250073, "learning_rate": 1.6683830856492377e-05, "loss": 0.8708, "step": 3228 }, { "epoch": 0.2891264200570821, "grad_norm": 1.009583004411416, "learning_rate": 1.6681673290790645e-05, "loss": 0.8837, "step": 3229 }, { "epoch": 0.28921596060216015, "grad_norm": 0.9304154887110127, "learning_rate": 1.6679515163048333e-05, "loss": 0.9089, "step": 3230 }, { "epoch": 0.28930550114723824, "grad_norm": 0.9192434388647978, "learning_rate": 1.6677356473446972e-05, "loss": 0.852, "step": 3231 }, { "epoch": 0.2893950416923163, "grad_norm": 0.9679358179116513, "learning_rate": 1.6675197222168144e-05, "loss": 0.8551, "step": 3232 }, { "epoch": 0.28948458223739437, "grad_norm": 0.9498729899220998, "learning_rate": 1.667303740939348e-05, "loss": 0.8624, "step": 3233 }, { "epoch": 0.28957412278247247, "grad_norm": 0.8688824298396755, "learning_rate": 1.6670877035304652e-05, "loss": 0.8428, "step": 3234 }, { "epoch": 0.2896636633275505, "grad_norm": 1.0242323609621278, "learning_rate": 1.6668716100083386e-05, "loss": 0.8795, "step": 3235 }, { "epoch": 0.2897532038726286, "grad_norm": 0.9165733908228226, "learning_rate": 1.6666554603911454e-05, "loss": 0.8246, "step": 3236 }, { "epoch": 0.28984274441770663, "grad_norm": 0.8373146859118741, "learning_rate": 1.6664392546970673e-05, "loss": 0.859, "step": 3237 }, { "epoch": 0.2899322849627847, "grad_norm": 0.8976506763010602, "learning_rate": 1.6662229929442904e-05, "loss": 0.8759, "step": 3238 }, { "epoch": 0.29002182550786276, "grad_norm": 1.038814382218406, "learning_rate": 1.6660066751510067e-05, "loss": 0.8582, "step": 3239 }, { "epoch": 0.29011136605294086, "grad_norm": 1.003097881638538, "learning_rate": 1.665790301335412e-05, "loss": 0.8108, "step": 3240 }, { "epoch": 0.2902009065980189, "grad_norm": 0.8888945467173105, "learning_rate": 1.6655738715157067e-05, "loss": 0.892, "step": 3241 }, { "epoch": 0.290290447143097, "grad_norm": 0.8655574541263847, "learning_rate": 1.6653573857100963e-05, "loss": 0.8591, "step": 3242 }, { "epoch": 0.2903799876881751, "grad_norm": 1.1122810281258813, "learning_rate": 1.6651408439367906e-05, "loss": 0.8925, "step": 3243 }, { "epoch": 0.2904695282332531, "grad_norm": 0.8375975383815422, "learning_rate": 1.6649242462140046e-05, "loss": 0.856, "step": 3244 }, { "epoch": 0.2905590687783312, "grad_norm": 0.9343012023661711, "learning_rate": 1.664707592559958e-05, "loss": 0.8604, "step": 3245 }, { "epoch": 0.29064860932340925, "grad_norm": 0.941289958297517, "learning_rate": 1.6644908829928746e-05, "loss": 0.9262, "step": 3246 }, { "epoch": 0.29073814986848734, "grad_norm": 0.908991697178001, "learning_rate": 1.6642741175309834e-05, "loss": 0.8033, "step": 3247 }, { "epoch": 0.2908276904135654, "grad_norm": 0.9890487765557584, "learning_rate": 1.6640572961925182e-05, "loss": 0.8674, "step": 3248 }, { "epoch": 0.29091723095864347, "grad_norm": 0.8785740981735879, "learning_rate": 1.6638404189957175e-05, "loss": 0.8695, "step": 3249 }, { "epoch": 0.2910067715037215, "grad_norm": 0.9046915537014031, "learning_rate": 1.6636234859588237e-05, "loss": 0.8818, "step": 3250 }, { "epoch": 0.2910963120487996, "grad_norm": 0.9598828458643593, "learning_rate": 1.663406497100085e-05, "loss": 0.9068, "step": 3251 }, { "epoch": 0.2911858525938777, "grad_norm": 1.0138962109622558, "learning_rate": 1.6631894524377534e-05, "loss": 0.8625, "step": 3252 }, { "epoch": 0.29127539313895573, "grad_norm": 0.8407423168014051, "learning_rate": 1.6629723519900865e-05, "loss": 0.8587, "step": 3253 }, { "epoch": 0.2913649336840338, "grad_norm": 0.9651146221094635, "learning_rate": 1.6627551957753458e-05, "loss": 0.8701, "step": 3254 }, { "epoch": 0.29145447422911186, "grad_norm": 1.012547741516527, "learning_rate": 1.662537983811797e-05, "loss": 0.871, "step": 3255 }, { "epoch": 0.29154401477418995, "grad_norm": 1.1275837011289496, "learning_rate": 1.662320716117713e-05, "loss": 0.9275, "step": 3256 }, { "epoch": 0.291633555319268, "grad_norm": 0.8533431619885825, "learning_rate": 1.662103392711368e-05, "loss": 0.8819, "step": 3257 }, { "epoch": 0.2917230958643461, "grad_norm": 0.9037538105831064, "learning_rate": 1.6618860136110434e-05, "loss": 0.9018, "step": 3258 }, { "epoch": 0.2918126364094241, "grad_norm": 0.9027846157991418, "learning_rate": 1.6616685788350246e-05, "loss": 0.9206, "step": 3259 }, { "epoch": 0.2919021769545022, "grad_norm": 0.8878364206273809, "learning_rate": 1.6614510884016004e-05, "loss": 0.8626, "step": 3260 }, { "epoch": 0.2919917174995803, "grad_norm": 0.9398855288484764, "learning_rate": 1.6612335423290667e-05, "loss": 0.8473, "step": 3261 }, { "epoch": 0.29208125804465834, "grad_norm": 0.8573502823502863, "learning_rate": 1.661015940635722e-05, "loss": 0.8502, "step": 3262 }, { "epoch": 0.29217079858973644, "grad_norm": 0.9793373723115257, "learning_rate": 1.6607982833398703e-05, "loss": 0.8486, "step": 3263 }, { "epoch": 0.2922603391348145, "grad_norm": 1.2221691881739998, "learning_rate": 1.6605805704598208e-05, "loss": 0.8956, "step": 3264 }, { "epoch": 0.29234987967989257, "grad_norm": 0.9541275359348135, "learning_rate": 1.6603628020138857e-05, "loss": 0.8687, "step": 3265 }, { "epoch": 0.2924394202249706, "grad_norm": 0.9582163398397108, "learning_rate": 1.660144978020384e-05, "loss": 0.8129, "step": 3266 }, { "epoch": 0.2925289607700487, "grad_norm": 1.4375057766377144, "learning_rate": 1.659927098497638e-05, "loss": 0.7965, "step": 3267 }, { "epoch": 0.29261850131512673, "grad_norm": 0.91491602344137, "learning_rate": 1.6597091634639747e-05, "loss": 0.8639, "step": 3268 }, { "epoch": 0.2927080418602048, "grad_norm": 0.8799933963508528, "learning_rate": 1.6594911729377268e-05, "loss": 0.851, "step": 3269 }, { "epoch": 0.2927975824052829, "grad_norm": 0.998306802187008, "learning_rate": 1.6592731269372303e-05, "loss": 0.8611, "step": 3270 }, { "epoch": 0.29288712295036096, "grad_norm": 0.972013274754095, "learning_rate": 1.6590550254808266e-05, "loss": 0.9234, "step": 3271 }, { "epoch": 0.29297666349543905, "grad_norm": 0.8279498780570005, "learning_rate": 1.658836868586862e-05, "loss": 0.868, "step": 3272 }, { "epoch": 0.2930662040405171, "grad_norm": 0.9122430283181979, "learning_rate": 1.6586186562736868e-05, "loss": 0.8357, "step": 3273 }, { "epoch": 0.2931557445855952, "grad_norm": 0.9127344921300548, "learning_rate": 1.6584003885596566e-05, "loss": 0.859, "step": 3274 }, { "epoch": 0.2932452851306732, "grad_norm": 1.0029979275262317, "learning_rate": 1.6581820654631313e-05, "loss": 0.8577, "step": 3275 }, { "epoch": 0.2933348256757513, "grad_norm": 0.9391636952584056, "learning_rate": 1.6579636870024757e-05, "loss": 0.8919, "step": 3276 }, { "epoch": 0.29342436622082935, "grad_norm": 0.9154703401389772, "learning_rate": 1.6577452531960584e-05, "loss": 0.8863, "step": 3277 }, { "epoch": 0.29351390676590744, "grad_norm": 0.9661786340970516, "learning_rate": 1.6575267640622538e-05, "loss": 0.873, "step": 3278 }, { "epoch": 0.29360344731098553, "grad_norm": 0.9282358930917813, "learning_rate": 1.657308219619441e-05, "loss": 0.8583, "step": 3279 }, { "epoch": 0.29369298785606357, "grad_norm": 0.9965039641540155, "learning_rate": 1.657089619886002e-05, "loss": 0.8366, "step": 3280 }, { "epoch": 0.29378252840114166, "grad_norm": 0.9749795638790674, "learning_rate": 1.656870964880326e-05, "loss": 0.9008, "step": 3281 }, { "epoch": 0.2938720689462197, "grad_norm": 0.9306223595961471, "learning_rate": 1.656652254620805e-05, "loss": 0.8546, "step": 3282 }, { "epoch": 0.2939616094912978, "grad_norm": 0.8798222759587829, "learning_rate": 1.6564334891258356e-05, "loss": 0.8712, "step": 3283 }, { "epoch": 0.29405115003637583, "grad_norm": 1.000760310311452, "learning_rate": 1.6562146684138206e-05, "loss": 0.8989, "step": 3284 }, { "epoch": 0.2941406905814539, "grad_norm": 1.0024506878602193, "learning_rate": 1.655995792503166e-05, "loss": 0.865, "step": 3285 }, { "epoch": 0.29423023112653196, "grad_norm": 0.9693756465244854, "learning_rate": 1.6557768614122832e-05, "loss": 0.8784, "step": 3286 }, { "epoch": 0.29431977167161005, "grad_norm": 0.9572361990884715, "learning_rate": 1.655557875159588e-05, "loss": 0.9233, "step": 3287 }, { "epoch": 0.29440931221668815, "grad_norm": 0.9019902760106082, "learning_rate": 1.6553388337635e-05, "loss": 0.8539, "step": 3288 }, { "epoch": 0.2944988527617662, "grad_norm": 0.9742717789802614, "learning_rate": 1.6551197372424456e-05, "loss": 0.864, "step": 3289 }, { "epoch": 0.2945883933068443, "grad_norm": 0.9392096403968139, "learning_rate": 1.6549005856148534e-05, "loss": 0.926, "step": 3290 }, { "epoch": 0.2946779338519223, "grad_norm": 0.8643058765257536, "learning_rate": 1.6546813788991578e-05, "loss": 0.8611, "step": 3291 }, { "epoch": 0.2947674743970004, "grad_norm": 1.0038257415528673, "learning_rate": 1.6544621171137984e-05, "loss": 0.8302, "step": 3292 }, { "epoch": 0.29485701494207844, "grad_norm": 0.9232743051631848, "learning_rate": 1.6542428002772182e-05, "loss": 0.8365, "step": 3293 }, { "epoch": 0.29494655548715654, "grad_norm": 0.8736492805665838, "learning_rate": 1.6540234284078656e-05, "loss": 0.9205, "step": 3294 }, { "epoch": 0.2950360960322346, "grad_norm": 0.9583720483706979, "learning_rate": 1.6538040015241937e-05, "loss": 0.879, "step": 3295 }, { "epoch": 0.29512563657731267, "grad_norm": 0.9899434274970295, "learning_rate": 1.6535845196446593e-05, "loss": 0.8447, "step": 3296 }, { "epoch": 0.29521517712239076, "grad_norm": 0.9914752856463189, "learning_rate": 1.6533649827877254e-05, "loss": 0.8967, "step": 3297 }, { "epoch": 0.2953047176674688, "grad_norm": 0.9031522174524893, "learning_rate": 1.653145390971858e-05, "loss": 0.883, "step": 3298 }, { "epoch": 0.2953942582125469, "grad_norm": 0.9549113483601311, "learning_rate": 1.652925744215529e-05, "loss": 0.8947, "step": 3299 }, { "epoch": 0.2954837987576249, "grad_norm": 0.9020602435368948, "learning_rate": 1.652706042537214e-05, "loss": 0.8113, "step": 3300 }, { "epoch": 0.295573339302703, "grad_norm": 1.0360156074190718, "learning_rate": 1.6524862859553935e-05, "loss": 0.8604, "step": 3301 }, { "epoch": 0.29566287984778106, "grad_norm": 0.9433688564932925, "learning_rate": 1.652266474488553e-05, "loss": 0.8813, "step": 3302 }, { "epoch": 0.29575242039285915, "grad_norm": 0.9183760370081216, "learning_rate": 1.6520466081551823e-05, "loss": 0.8737, "step": 3303 }, { "epoch": 0.2958419609379372, "grad_norm": 1.0427240380391123, "learning_rate": 1.6518266869737755e-05, "loss": 0.8609, "step": 3304 }, { "epoch": 0.2959315014830153, "grad_norm": 0.9075751377735736, "learning_rate": 1.651606710962832e-05, "loss": 0.8833, "step": 3305 }, { "epoch": 0.2960210420280934, "grad_norm": 0.9964647071277793, "learning_rate": 1.6513866801408553e-05, "loss": 0.8316, "step": 3306 }, { "epoch": 0.2961105825731714, "grad_norm": 1.0561973256810544, "learning_rate": 1.651166594526354e-05, "loss": 0.8161, "step": 3307 }, { "epoch": 0.2962001231182495, "grad_norm": 1.0149517433178916, "learning_rate": 1.6509464541378404e-05, "loss": 0.8647, "step": 3308 }, { "epoch": 0.29628966366332754, "grad_norm": 1.057077634484621, "learning_rate": 1.6507262589938325e-05, "loss": 0.8666, "step": 3309 }, { "epoch": 0.29637920420840563, "grad_norm": 1.0895180933609274, "learning_rate": 1.650506009112852e-05, "loss": 0.8858, "step": 3310 }, { "epoch": 0.29646874475348367, "grad_norm": 0.9246340361392058, "learning_rate": 1.6502857045134262e-05, "loss": 0.905, "step": 3311 }, { "epoch": 0.29655828529856176, "grad_norm": 0.9258748623867799, "learning_rate": 1.650065345214086e-05, "loss": 0.8547, "step": 3312 }, { "epoch": 0.2966478258436398, "grad_norm": 0.9856245009323533, "learning_rate": 1.6498449312333674e-05, "loss": 0.8591, "step": 3313 }, { "epoch": 0.2967373663887179, "grad_norm": 0.8589078392289878, "learning_rate": 1.6496244625898103e-05, "loss": 0.7809, "step": 3314 }, { "epoch": 0.296826906933796, "grad_norm": 0.9852228008078571, "learning_rate": 1.6494039393019606e-05, "loss": 0.9095, "step": 3315 }, { "epoch": 0.296916447478874, "grad_norm": 1.6707114816872342, "learning_rate": 1.649183361388368e-05, "loss": 0.8947, "step": 3316 }, { "epoch": 0.2970059880239521, "grad_norm": 0.8606948222912678, "learning_rate": 1.6489627288675865e-05, "loss": 0.8595, "step": 3317 }, { "epoch": 0.29709552856903015, "grad_norm": 0.980090550966696, "learning_rate": 1.6487420417581746e-05, "loss": 0.8723, "step": 3318 }, { "epoch": 0.29718506911410825, "grad_norm": 1.0250113715154596, "learning_rate": 1.6485213000786966e-05, "loss": 0.8781, "step": 3319 }, { "epoch": 0.2972746096591863, "grad_norm": 0.9688053476778848, "learning_rate": 1.64830050384772e-05, "loss": 0.8721, "step": 3320 }, { "epoch": 0.2973641502042644, "grad_norm": 0.8851681997423748, "learning_rate": 1.6480796530838176e-05, "loss": 0.8278, "step": 3321 }, { "epoch": 0.2974536907493424, "grad_norm": 0.8742978652683395, "learning_rate": 1.6478587478055668e-05, "loss": 0.8834, "step": 3322 }, { "epoch": 0.2975432312944205, "grad_norm": 1.080073501563653, "learning_rate": 1.6476377880315495e-05, "loss": 0.8272, "step": 3323 }, { "epoch": 0.2976327718394986, "grad_norm": 1.0062505383933291, "learning_rate": 1.6474167737803517e-05, "loss": 0.8796, "step": 3324 }, { "epoch": 0.29772231238457664, "grad_norm": 0.9728013528840082, "learning_rate": 1.6471957050705645e-05, "loss": 0.8837, "step": 3325 }, { "epoch": 0.29781185292965473, "grad_norm": 0.9867169709172918, "learning_rate": 1.646974581920784e-05, "loss": 0.84, "step": 3326 }, { "epoch": 0.29790139347473277, "grad_norm": 0.9518110546243204, "learning_rate": 1.6467534043496095e-05, "loss": 0.8319, "step": 3327 }, { "epoch": 0.29799093401981086, "grad_norm": 1.0781699523117905, "learning_rate": 1.6465321723756464e-05, "loss": 0.9126, "step": 3328 }, { "epoch": 0.2980804745648889, "grad_norm": 0.991606119281339, "learning_rate": 1.6463108860175036e-05, "loss": 0.9109, "step": 3329 }, { "epoch": 0.298170015109967, "grad_norm": 0.8877881828552697, "learning_rate": 1.6460895452937956e-05, "loss": 0.8551, "step": 3330 }, { "epoch": 0.298259555655045, "grad_norm": 0.9819466374320167, "learning_rate": 1.6458681502231405e-05, "loss": 0.8223, "step": 3331 }, { "epoch": 0.2983490962001231, "grad_norm": 0.9108872159909693, "learning_rate": 1.645646700824161e-05, "loss": 0.8406, "step": 3332 }, { "epoch": 0.2984386367452012, "grad_norm": 0.8629868230494729, "learning_rate": 1.645425197115485e-05, "loss": 0.8455, "step": 3333 }, { "epoch": 0.29852817729027925, "grad_norm": 0.9489229233062977, "learning_rate": 1.645203639115745e-05, "loss": 0.8282, "step": 3334 }, { "epoch": 0.29861771783535734, "grad_norm": 0.9034629564276201, "learning_rate": 1.644982026843577e-05, "loss": 0.8844, "step": 3335 }, { "epoch": 0.2987072583804354, "grad_norm": 0.965359558375752, "learning_rate": 1.6447603603176227e-05, "loss": 0.8714, "step": 3336 }, { "epoch": 0.2987967989255135, "grad_norm": 0.9872869519565761, "learning_rate": 1.6445386395565283e-05, "loss": 0.8225, "step": 3337 }, { "epoch": 0.2988863394705915, "grad_norm": 1.3107464558046085, "learning_rate": 1.6443168645789436e-05, "loss": 0.8903, "step": 3338 }, { "epoch": 0.2989758800156696, "grad_norm": 1.0235286103970958, "learning_rate": 1.6440950354035242e-05, "loss": 0.8603, "step": 3339 }, { "epoch": 0.29906542056074764, "grad_norm": 0.9734319568708801, "learning_rate": 1.6438731520489292e-05, "loss": 0.862, "step": 3340 }, { "epoch": 0.29915496110582573, "grad_norm": 0.8913377143496777, "learning_rate": 1.6436512145338227e-05, "loss": 0.8367, "step": 3341 }, { "epoch": 0.2992445016509038, "grad_norm": 0.8905430230549951, "learning_rate": 1.6434292228768736e-05, "loss": 0.841, "step": 3342 }, { "epoch": 0.29933404219598186, "grad_norm": 0.8855827955004383, "learning_rate": 1.643207177096755e-05, "loss": 0.8673, "step": 3343 }, { "epoch": 0.29942358274105996, "grad_norm": 1.0022150252933022, "learning_rate": 1.6429850772121448e-05, "loss": 0.9596, "step": 3344 }, { "epoch": 0.299513123286138, "grad_norm": 1.058986381359884, "learning_rate": 1.6427629232417253e-05, "loss": 0.9042, "step": 3345 }, { "epoch": 0.2996026638312161, "grad_norm": 1.139066325193794, "learning_rate": 1.642540715204183e-05, "loss": 0.8694, "step": 3346 }, { "epoch": 0.2996922043762941, "grad_norm": 0.9809387249031534, "learning_rate": 1.6423184531182098e-05, "loss": 0.897, "step": 3347 }, { "epoch": 0.2997817449213722, "grad_norm": 0.883297085362577, "learning_rate": 1.642096137002501e-05, "loss": 0.7982, "step": 3348 }, { "epoch": 0.29987128546645025, "grad_norm": 0.8974921040768915, "learning_rate": 1.641873766875758e-05, "loss": 0.8372, "step": 3349 }, { "epoch": 0.29996082601152835, "grad_norm": 0.973746135301797, "learning_rate": 1.6416513427566853e-05, "loss": 0.8942, "step": 3350 }, { "epoch": 0.30005036655660644, "grad_norm": 0.9111712424072251, "learning_rate": 1.6414288646639928e-05, "loss": 0.8156, "step": 3351 }, { "epoch": 0.3001399071016845, "grad_norm": 0.9164911353043519, "learning_rate": 1.641206332616394e-05, "loss": 0.8362, "step": 3352 }, { "epoch": 0.30022944764676257, "grad_norm": 0.9425401626791092, "learning_rate": 1.6409837466326082e-05, "loss": 0.904, "step": 3353 }, { "epoch": 0.3003189881918406, "grad_norm": 1.0007983932076, "learning_rate": 1.640761106731359e-05, "loss": 0.8913, "step": 3354 }, { "epoch": 0.3004085287369187, "grad_norm": 1.1366846750040638, "learning_rate": 1.6405384129313725e-05, "loss": 0.8506, "step": 3355 }, { "epoch": 0.30049806928199674, "grad_norm": 1.1316637554447213, "learning_rate": 1.640315665251383e-05, "loss": 0.8984, "step": 3356 }, { "epoch": 0.30058760982707483, "grad_norm": 1.0725245615813197, "learning_rate": 1.6400928637101253e-05, "loss": 0.8415, "step": 3357 }, { "epoch": 0.30067715037215287, "grad_norm": 0.8667239395328905, "learning_rate": 1.6398700083263426e-05, "loss": 0.8634, "step": 3358 }, { "epoch": 0.30076669091723096, "grad_norm": 1.0562908035709986, "learning_rate": 1.6396470991187796e-05, "loss": 0.8688, "step": 3359 }, { "epoch": 0.30085623146230905, "grad_norm": 1.0092443613129098, "learning_rate": 1.6394241361061873e-05, "loss": 0.8906, "step": 3360 }, { "epoch": 0.3009457720073871, "grad_norm": 0.8535208591883673, "learning_rate": 1.63920111930732e-05, "loss": 0.8361, "step": 3361 }, { "epoch": 0.3010353125524652, "grad_norm": 0.9145565018944185, "learning_rate": 1.6389780487409377e-05, "loss": 0.8865, "step": 3362 }, { "epoch": 0.3011248530975432, "grad_norm": 0.9404076413003393, "learning_rate": 1.6387549244258043e-05, "loss": 0.8708, "step": 3363 }, { "epoch": 0.3012143936426213, "grad_norm": 0.9249544702683623, "learning_rate": 1.638531746380688e-05, "loss": 0.885, "step": 3364 }, { "epoch": 0.30130393418769935, "grad_norm": 1.0181360257300796, "learning_rate": 1.638308514624362e-05, "loss": 0.8357, "step": 3365 }, { "epoch": 0.30139347473277744, "grad_norm": 0.929546369423035, "learning_rate": 1.6380852291756036e-05, "loss": 0.862, "step": 3366 }, { "epoch": 0.3014830152778555, "grad_norm": 0.8930116290147487, "learning_rate": 1.6378618900531957e-05, "loss": 0.9068, "step": 3367 }, { "epoch": 0.3015725558229336, "grad_norm": 0.9382718644988309, "learning_rate": 1.6376384972759238e-05, "loss": 0.8326, "step": 3368 }, { "epoch": 0.30166209636801167, "grad_norm": 0.9441542819356852, "learning_rate": 1.637415050862579e-05, "loss": 0.8738, "step": 3369 }, { "epoch": 0.3017516369130897, "grad_norm": 0.9873020019489532, "learning_rate": 1.637191550831958e-05, "loss": 0.8451, "step": 3370 }, { "epoch": 0.3018411774581678, "grad_norm": 0.9349078225057035, "learning_rate": 1.63696799720286e-05, "loss": 0.8554, "step": 3371 }, { "epoch": 0.30193071800324583, "grad_norm": 1.136421596115118, "learning_rate": 1.6367443899940895e-05, "loss": 0.8879, "step": 3372 }, { "epoch": 0.3020202585483239, "grad_norm": 1.034531909315919, "learning_rate": 1.6365207292244557e-05, "loss": 0.7959, "step": 3373 }, { "epoch": 0.30210979909340197, "grad_norm": 0.9056855865082264, "learning_rate": 1.6362970149127727e-05, "loss": 0.8496, "step": 3374 }, { "epoch": 0.30219933963848006, "grad_norm": 0.9308162316532969, "learning_rate": 1.6360732470778583e-05, "loss": 0.8618, "step": 3375 }, { "epoch": 0.3022888801835581, "grad_norm": 0.9412597327105108, "learning_rate": 1.635849425738535e-05, "loss": 0.9087, "step": 3376 }, { "epoch": 0.3023784207286362, "grad_norm": 1.2975027069142038, "learning_rate": 1.6356255509136304e-05, "loss": 0.8298, "step": 3377 }, { "epoch": 0.3024679612737143, "grad_norm": 0.9485635632596228, "learning_rate": 1.6354016226219752e-05, "loss": 0.8793, "step": 3378 }, { "epoch": 0.3025575018187923, "grad_norm": 0.8960492780088233, "learning_rate": 1.6351776408824066e-05, "loss": 0.8163, "step": 3379 }, { "epoch": 0.3026470423638704, "grad_norm": 0.9179515893934462, "learning_rate": 1.6349536057137646e-05, "loss": 0.8741, "step": 3380 }, { "epoch": 0.30273658290894845, "grad_norm": 1.0386365296587892, "learning_rate": 1.6347295171348943e-05, "loss": 0.8547, "step": 3381 }, { "epoch": 0.30282612345402654, "grad_norm": 0.9336272672608067, "learning_rate": 1.6345053751646455e-05, "loss": 0.8786, "step": 3382 }, { "epoch": 0.3029156639991046, "grad_norm": 0.922362835587453, "learning_rate": 1.634281179821872e-05, "loss": 0.8521, "step": 3383 }, { "epoch": 0.30300520454418267, "grad_norm": 0.9621682680547788, "learning_rate": 1.6340569311254328e-05, "loss": 0.8236, "step": 3384 }, { "epoch": 0.3030947450892607, "grad_norm": 0.975488648605726, "learning_rate": 1.6338326290941906e-05, "loss": 0.8445, "step": 3385 }, { "epoch": 0.3031842856343388, "grad_norm": 1.0176065107094208, "learning_rate": 1.6336082737470132e-05, "loss": 0.8391, "step": 3386 }, { "epoch": 0.3032738261794169, "grad_norm": 0.9761483915339645, "learning_rate": 1.6333838651027724e-05, "loss": 0.9061, "step": 3387 }, { "epoch": 0.30336336672449493, "grad_norm": 0.9113043096186673, "learning_rate": 1.6331594031803453e-05, "loss": 0.8336, "step": 3388 }, { "epoch": 0.303452907269573, "grad_norm": 0.8564439519628021, "learning_rate": 1.632934887998612e-05, "loss": 0.8567, "step": 3389 }, { "epoch": 0.30354244781465106, "grad_norm": 0.963881667457333, "learning_rate": 1.6327103195764588e-05, "loss": 0.9046, "step": 3390 }, { "epoch": 0.30363198835972915, "grad_norm": 0.9343078740194618, "learning_rate": 1.6324856979327754e-05, "loss": 0.8846, "step": 3391 }, { "epoch": 0.3037215289048072, "grad_norm": 1.040381157262312, "learning_rate": 1.632261023086456e-05, "loss": 0.8473, "step": 3392 }, { "epoch": 0.3038110694498853, "grad_norm": 0.9724309471609979, "learning_rate": 1.6320362950563995e-05, "loss": 0.8782, "step": 3393 }, { "epoch": 0.3039006099949633, "grad_norm": 0.9539440708460019, "learning_rate": 1.6318115138615095e-05, "loss": 0.8309, "step": 3394 }, { "epoch": 0.3039901505400414, "grad_norm": 1.0809948350659595, "learning_rate": 1.6315866795206943e-05, "loss": 0.9079, "step": 3395 }, { "epoch": 0.3040796910851195, "grad_norm": 0.8846212167410237, "learning_rate": 1.6313617920528653e-05, "loss": 0.8866, "step": 3396 }, { "epoch": 0.30416923163019755, "grad_norm": 1.0262091195902667, "learning_rate": 1.63113685147694e-05, "loss": 0.8717, "step": 3397 }, { "epoch": 0.30425877217527564, "grad_norm": 0.8675087304735325, "learning_rate": 1.6309118578118396e-05, "loss": 0.8354, "step": 3398 }, { "epoch": 0.3043483127203537, "grad_norm": 1.0415908936065483, "learning_rate": 1.6306868110764893e-05, "loss": 0.8328, "step": 3399 }, { "epoch": 0.30443785326543177, "grad_norm": 0.8679334027093386, "learning_rate": 1.63046171128982e-05, "loss": 0.8683, "step": 3400 }, { "epoch": 0.3045273938105098, "grad_norm": 0.9333733051329592, "learning_rate": 1.630236558470766e-05, "loss": 0.8942, "step": 3401 }, { "epoch": 0.3046169343555879, "grad_norm": 0.9597638885384935, "learning_rate": 1.630011352638266e-05, "loss": 0.9216, "step": 3402 }, { "epoch": 0.30470647490066594, "grad_norm": 0.9050292168063366, "learning_rate": 1.6297860938112644e-05, "loss": 0.8598, "step": 3403 }, { "epoch": 0.30479601544574403, "grad_norm": 1.0005228029060493, "learning_rate": 1.6295607820087087e-05, "loss": 0.7883, "step": 3404 }, { "epoch": 0.3048855559908221, "grad_norm": 0.9225407879910846, "learning_rate": 1.629335417249552e-05, "loss": 0.8566, "step": 3405 }, { "epoch": 0.30497509653590016, "grad_norm": 0.9969329685815507, "learning_rate": 1.6291099995527504e-05, "loss": 0.8716, "step": 3406 }, { "epoch": 0.30506463708097825, "grad_norm": 0.9428467799387135, "learning_rate": 1.6288845289372657e-05, "loss": 0.9121, "step": 3407 }, { "epoch": 0.3051541776260563, "grad_norm": 0.9100311190918421, "learning_rate": 1.6286590054220643e-05, "loss": 0.8463, "step": 3408 }, { "epoch": 0.3052437181711344, "grad_norm": 0.9801864910993777, "learning_rate": 1.6284334290261154e-05, "loss": 0.8714, "step": 3409 }, { "epoch": 0.3053332587162124, "grad_norm": 1.0576716361059155, "learning_rate": 1.6282077997683945e-05, "loss": 0.8308, "step": 3410 }, { "epoch": 0.3054227992612905, "grad_norm": 1.0337224143873354, "learning_rate": 1.6279821176678805e-05, "loss": 0.8074, "step": 3411 }, { "epoch": 0.30551233980636855, "grad_norm": 0.9620021039486134, "learning_rate": 1.6277563827435573e-05, "loss": 0.8811, "step": 3412 }, { "epoch": 0.30560188035144664, "grad_norm": 1.089106944049632, "learning_rate": 1.627530595014413e-05, "loss": 0.859, "step": 3413 }, { "epoch": 0.30569142089652473, "grad_norm": 1.000249419507427, "learning_rate": 1.6273047544994402e-05, "loss": 0.8304, "step": 3414 }, { "epoch": 0.30578096144160277, "grad_norm": 0.953848023189794, "learning_rate": 1.6270788612176353e-05, "loss": 0.819, "step": 3415 }, { "epoch": 0.30587050198668086, "grad_norm": 0.9742188500118159, "learning_rate": 1.626852915188e-05, "loss": 0.9366, "step": 3416 }, { "epoch": 0.3059600425317589, "grad_norm": 0.9415175831265549, "learning_rate": 1.6266269164295402e-05, "loss": 0.8248, "step": 3417 }, { "epoch": 0.306049583076837, "grad_norm": 0.9130590932669462, "learning_rate": 1.626400864961266e-05, "loss": 0.8412, "step": 3418 }, { "epoch": 0.30613912362191503, "grad_norm": 0.8876589175345738, "learning_rate": 1.6261747608021926e-05, "loss": 0.8483, "step": 3419 }, { "epoch": 0.3062286641669931, "grad_norm": 0.8940818677402772, "learning_rate": 1.625948603971339e-05, "loss": 0.8661, "step": 3420 }, { "epoch": 0.30631820471207116, "grad_norm": 0.9626924951537016, "learning_rate": 1.6257223944877284e-05, "loss": 0.8949, "step": 3421 }, { "epoch": 0.30640774525714926, "grad_norm": 1.0070905883991625, "learning_rate": 1.625496132370389e-05, "loss": 0.8648, "step": 3422 }, { "epoch": 0.30649728580222735, "grad_norm": 0.9962808815191857, "learning_rate": 1.625269817638353e-05, "loss": 0.9433, "step": 3423 }, { "epoch": 0.3065868263473054, "grad_norm": 0.9259661882997582, "learning_rate": 1.625043450310658e-05, "loss": 0.9183, "step": 3424 }, { "epoch": 0.3066763668923835, "grad_norm": 1.0635674383092117, "learning_rate": 1.6248170304063448e-05, "loss": 0.8534, "step": 3425 }, { "epoch": 0.3067659074374615, "grad_norm": 0.9960795819882224, "learning_rate": 1.6245905579444587e-05, "loss": 0.8659, "step": 3426 }, { "epoch": 0.3068554479825396, "grad_norm": 0.9468635227592602, "learning_rate": 1.6243640329440503e-05, "loss": 0.9017, "step": 3427 }, { "epoch": 0.30694498852761765, "grad_norm": 1.0935174475228768, "learning_rate": 1.6241374554241744e-05, "loss": 0.8267, "step": 3428 }, { "epoch": 0.30703452907269574, "grad_norm": 0.9405525033799972, "learning_rate": 1.6239108254038893e-05, "loss": 0.8524, "step": 3429 }, { "epoch": 0.3071240696177738, "grad_norm": 0.8922950735577043, "learning_rate": 1.6236841429022587e-05, "loss": 0.8545, "step": 3430 }, { "epoch": 0.30721361016285187, "grad_norm": 1.0052641106292792, "learning_rate": 1.6234574079383505e-05, "loss": 0.8491, "step": 3431 }, { "epoch": 0.30730315070792996, "grad_norm": 1.0724617478302498, "learning_rate": 1.6232306205312367e-05, "loss": 0.8644, "step": 3432 }, { "epoch": 0.307392691253008, "grad_norm": 0.8658514109629328, "learning_rate": 1.6230037806999944e-05, "loss": 0.8624, "step": 3433 }, { "epoch": 0.3074822317980861, "grad_norm": 1.0331838310455363, "learning_rate": 1.622776888463704e-05, "loss": 0.8331, "step": 3434 }, { "epoch": 0.30757177234316413, "grad_norm": 0.9005507906752604, "learning_rate": 1.6225499438414512e-05, "loss": 0.8388, "step": 3435 }, { "epoch": 0.3076613128882422, "grad_norm": 0.9694621781869465, "learning_rate": 1.6223229468523258e-05, "loss": 0.831, "step": 3436 }, { "epoch": 0.30775085343332026, "grad_norm": 1.1658138470007087, "learning_rate": 1.622095897515422e-05, "loss": 0.9043, "step": 3437 }, { "epoch": 0.30784039397839835, "grad_norm": 0.957523192208975, "learning_rate": 1.6218687958498386e-05, "loss": 0.8287, "step": 3438 }, { "epoch": 0.3079299345234764, "grad_norm": 1.0504540110286282, "learning_rate": 1.6216416418746787e-05, "loss": 0.9206, "step": 3439 }, { "epoch": 0.3080194750685545, "grad_norm": 1.0384576635004699, "learning_rate": 1.6214144356090494e-05, "loss": 0.8235, "step": 3440 }, { "epoch": 0.3081090156136326, "grad_norm": 0.9049288741768955, "learning_rate": 1.621187177072063e-05, "loss": 0.8796, "step": 3441 }, { "epoch": 0.3081985561587106, "grad_norm": 0.902046055993748, "learning_rate": 1.6209598662828353e-05, "loss": 0.9254, "step": 3442 }, { "epoch": 0.3082880967037887, "grad_norm": 0.8702330630496671, "learning_rate": 1.6207325032604875e-05, "loss": 0.812, "step": 3443 }, { "epoch": 0.30837763724886674, "grad_norm": 0.9512577595636892, "learning_rate": 1.620505088024144e-05, "loss": 0.8584, "step": 3444 }, { "epoch": 0.30846717779394484, "grad_norm": 0.8736108024310634, "learning_rate": 1.6202776205929347e-05, "loss": 0.8247, "step": 3445 }, { "epoch": 0.3085567183390229, "grad_norm": 0.977546182678823, "learning_rate": 1.620050100985993e-05, "loss": 0.8109, "step": 3446 }, { "epoch": 0.30864625888410097, "grad_norm": 0.9441407883901345, "learning_rate": 1.6198225292224576e-05, "loss": 0.8607, "step": 3447 }, { "epoch": 0.308735799429179, "grad_norm": 0.9292094808562754, "learning_rate": 1.619594905321471e-05, "loss": 0.858, "step": 3448 }, { "epoch": 0.3088253399742571, "grad_norm": 0.907658372940407, "learning_rate": 1.61936722930218e-05, "loss": 0.8944, "step": 3449 }, { "epoch": 0.3089148805193352, "grad_norm": 0.9452040891228245, "learning_rate": 1.6191395011837355e-05, "loss": 0.895, "step": 3450 }, { "epoch": 0.3090044210644132, "grad_norm": 1.0367338213477804, "learning_rate": 1.6189117209852945e-05, "loss": 0.8935, "step": 3451 }, { "epoch": 0.3090939616094913, "grad_norm": 0.9653878917870894, "learning_rate": 1.6186838887260158e-05, "loss": 0.9038, "step": 3452 }, { "epoch": 0.30918350215456936, "grad_norm": 0.9617260902061584, "learning_rate": 1.618456004425065e-05, "loss": 0.9008, "step": 3453 }, { "epoch": 0.30927304269964745, "grad_norm": 0.9103736868286196, "learning_rate": 1.61822806810161e-05, "loss": 0.8599, "step": 3454 }, { "epoch": 0.3093625832447255, "grad_norm": 1.0030654231814287, "learning_rate": 1.6180000797748248e-05, "loss": 0.8414, "step": 3455 }, { "epoch": 0.3094521237898036, "grad_norm": 0.9091806819292904, "learning_rate": 1.6177720394638865e-05, "loss": 0.9077, "step": 3456 }, { "epoch": 0.3095416643348816, "grad_norm": 0.876744536570522, "learning_rate": 1.6175439471879776e-05, "loss": 0.8758, "step": 3457 }, { "epoch": 0.3096312048799597, "grad_norm": 0.9355249348209174, "learning_rate": 1.6173158029662844e-05, "loss": 0.8592, "step": 3458 }, { "epoch": 0.3097207454250378, "grad_norm": 0.9359629254519449, "learning_rate": 1.617087606817997e-05, "loss": 0.8678, "step": 3459 }, { "epoch": 0.30981028597011584, "grad_norm": 1.0143250348608364, "learning_rate": 1.6168593587623114e-05, "loss": 0.8593, "step": 3460 }, { "epoch": 0.30989982651519393, "grad_norm": 1.2644287911915526, "learning_rate": 1.6166310588184265e-05, "loss": 0.8444, "step": 3461 }, { "epoch": 0.30998936706027197, "grad_norm": 1.0247649502788982, "learning_rate": 1.6164027070055466e-05, "loss": 0.858, "step": 3462 }, { "epoch": 0.31007890760535006, "grad_norm": 1.0590423639682442, "learning_rate": 1.6161743033428795e-05, "loss": 0.8993, "step": 3463 }, { "epoch": 0.3101684481504281, "grad_norm": 0.8764865389267936, "learning_rate": 1.615945847849638e-05, "loss": 0.8397, "step": 3464 }, { "epoch": 0.3102579886955062, "grad_norm": 0.9697408210957381, "learning_rate": 1.615717340545039e-05, "loss": 0.8329, "step": 3465 }, { "epoch": 0.31034752924058423, "grad_norm": 0.9081920502782245, "learning_rate": 1.6154887814483038e-05, "loss": 0.8508, "step": 3466 }, { "epoch": 0.3104370697856623, "grad_norm": 0.9263684568346056, "learning_rate": 1.6152601705786576e-05, "loss": 0.8821, "step": 3467 }, { "epoch": 0.3105266103307404, "grad_norm": 1.0993247898987768, "learning_rate": 1.6150315079553315e-05, "loss": 0.8289, "step": 3468 }, { "epoch": 0.31061615087581845, "grad_norm": 0.9834980267291011, "learning_rate": 1.6148027935975587e-05, "loss": 0.7801, "step": 3469 }, { "epoch": 0.31070569142089655, "grad_norm": 0.9460613267159045, "learning_rate": 1.6145740275245782e-05, "loss": 0.8762, "step": 3470 }, { "epoch": 0.3107952319659746, "grad_norm": 0.9314008052556515, "learning_rate": 1.6143452097556336e-05, "loss": 0.8427, "step": 3471 }, { "epoch": 0.3108847725110527, "grad_norm": 0.9363928298225046, "learning_rate": 1.6141163403099716e-05, "loss": 0.8586, "step": 3472 }, { "epoch": 0.3109743130561307, "grad_norm": 0.8842561063925621, "learning_rate": 1.6138874192068446e-05, "loss": 0.8451, "step": 3473 }, { "epoch": 0.3110638536012088, "grad_norm": 1.1796776311004327, "learning_rate": 1.6136584464655082e-05, "loss": 0.8522, "step": 3474 }, { "epoch": 0.31115339414628684, "grad_norm": 1.0449545602634394, "learning_rate": 1.613429422105223e-05, "loss": 0.8484, "step": 3475 }, { "epoch": 0.31124293469136494, "grad_norm": 0.9302964571125277, "learning_rate": 1.613200346145254e-05, "loss": 0.8783, "step": 3476 }, { "epoch": 0.31133247523644303, "grad_norm": 0.9621306508521787, "learning_rate": 1.61297121860487e-05, "loss": 0.8763, "step": 3477 }, { "epoch": 0.31142201578152107, "grad_norm": 0.9284883083555696, "learning_rate": 1.612742039503344e-05, "loss": 0.867, "step": 3478 }, { "epoch": 0.31151155632659916, "grad_norm": 1.001961444604598, "learning_rate": 1.612512808859955e-05, "loss": 0.852, "step": 3479 }, { "epoch": 0.3116010968716772, "grad_norm": 1.002516734014538, "learning_rate": 1.6122835266939848e-05, "loss": 0.8608, "step": 3480 }, { "epoch": 0.3116906374167553, "grad_norm": 0.9701202265927532, "learning_rate": 1.612054193024719e-05, "loss": 0.8427, "step": 3481 }, { "epoch": 0.3117801779618333, "grad_norm": 1.0342947808887826, "learning_rate": 1.6118248078714493e-05, "loss": 0.8392, "step": 3482 }, { "epoch": 0.3118697185069114, "grad_norm": 1.0150538341799913, "learning_rate": 1.61159537125347e-05, "loss": 0.8977, "step": 3483 }, { "epoch": 0.31195925905198946, "grad_norm": 0.9298670610892577, "learning_rate": 1.611365883190082e-05, "loss": 0.8398, "step": 3484 }, { "epoch": 0.31204879959706755, "grad_norm": 0.9169537953841247, "learning_rate": 1.6111363437005875e-05, "loss": 0.9463, "step": 3485 }, { "epoch": 0.31213834014214564, "grad_norm": 0.9678314459038888, "learning_rate": 1.6109067528042953e-05, "loss": 0.8831, "step": 3486 }, { "epoch": 0.3122278806872237, "grad_norm": 0.9009375979382416, "learning_rate": 1.6106771105205182e-05, "loss": 0.8603, "step": 3487 }, { "epoch": 0.3123174212323018, "grad_norm": 1.009435064071259, "learning_rate": 1.6104474168685724e-05, "loss": 0.8893, "step": 3488 }, { "epoch": 0.3124069617773798, "grad_norm": 0.9157814162932284, "learning_rate": 1.610217671867779e-05, "loss": 0.8289, "step": 3489 }, { "epoch": 0.3124965023224579, "grad_norm": 0.9531630421755719, "learning_rate": 1.6099878755374636e-05, "loss": 0.8788, "step": 3490 }, { "epoch": 0.31258604286753594, "grad_norm": 0.960214984482796, "learning_rate": 1.609758027896956e-05, "loss": 0.925, "step": 3491 }, { "epoch": 0.31267558341261403, "grad_norm": 1.0668041262797365, "learning_rate": 1.60952812896559e-05, "loss": 0.8405, "step": 3492 }, { "epoch": 0.31276512395769207, "grad_norm": 0.880133357254461, "learning_rate": 1.609298178762704e-05, "loss": 0.828, "step": 3493 }, { "epoch": 0.31285466450277016, "grad_norm": 0.9723038640326397, "learning_rate": 1.609068177307641e-05, "loss": 0.8792, "step": 3494 }, { "epoch": 0.31294420504784826, "grad_norm": 0.9588627376170388, "learning_rate": 1.6088381246197476e-05, "loss": 0.8085, "step": 3495 }, { "epoch": 0.3130337455929263, "grad_norm": 0.8730550143396204, "learning_rate": 1.608608020718375e-05, "loss": 0.8531, "step": 3496 }, { "epoch": 0.3131232861380044, "grad_norm": 1.0147999348922498, "learning_rate": 1.608377865622879e-05, "loss": 0.9172, "step": 3497 }, { "epoch": 0.3132128266830824, "grad_norm": 1.0644918752840586, "learning_rate": 1.6081476593526194e-05, "loss": 0.8946, "step": 3498 }, { "epoch": 0.3133023672281605, "grad_norm": 0.8179810735964052, "learning_rate": 1.607917401926961e-05, "loss": 0.8287, "step": 3499 }, { "epoch": 0.31339190777323855, "grad_norm": 0.935999923340399, "learning_rate": 1.607687093365271e-05, "loss": 0.8918, "step": 3500 }, { "epoch": 0.31348144831831665, "grad_norm": 0.9826076664014272, "learning_rate": 1.6074567336869235e-05, "loss": 0.8523, "step": 3501 }, { "epoch": 0.3135709888633947, "grad_norm": 0.9841186147693697, "learning_rate": 1.607226322911295e-05, "loss": 0.8491, "step": 3502 }, { "epoch": 0.3136605294084728, "grad_norm": 0.9062230088776329, "learning_rate": 1.6069958610577668e-05, "loss": 0.885, "step": 3503 }, { "epoch": 0.31375006995355087, "grad_norm": 0.9089910204165106, "learning_rate": 1.6067653481457254e-05, "loss": 0.8495, "step": 3504 }, { "epoch": 0.3138396104986289, "grad_norm": 0.8961527027498256, "learning_rate": 1.6065347841945595e-05, "loss": 0.8356, "step": 3505 }, { "epoch": 0.313929151043707, "grad_norm": 0.9041681669124759, "learning_rate": 1.6063041692236643e-05, "loss": 0.7983, "step": 3506 }, { "epoch": 0.31401869158878504, "grad_norm": 0.9278962804605422, "learning_rate": 1.606073503252438e-05, "loss": 0.8643, "step": 3507 }, { "epoch": 0.31410823213386313, "grad_norm": 1.0549207191776326, "learning_rate": 1.6058427863002838e-05, "loss": 0.8414, "step": 3508 }, { "epoch": 0.31419777267894117, "grad_norm": 1.0081789037238174, "learning_rate": 1.6056120183866087e-05, "loss": 0.8728, "step": 3509 }, { "epoch": 0.31428731322401926, "grad_norm": 0.9098956436641146, "learning_rate": 1.6053811995308242e-05, "loss": 0.8707, "step": 3510 }, { "epoch": 0.3143768537690973, "grad_norm": 1.0453733347437302, "learning_rate": 1.6051503297523455e-05, "loss": 0.8722, "step": 3511 }, { "epoch": 0.3144663943141754, "grad_norm": 0.9068577241833307, "learning_rate": 1.6049194090705935e-05, "loss": 0.8805, "step": 3512 }, { "epoch": 0.3145559348592535, "grad_norm": 0.9982183841071748, "learning_rate": 1.604688437504992e-05, "loss": 0.894, "step": 3513 }, { "epoch": 0.3146454754043315, "grad_norm": 0.9295402601331746, "learning_rate": 1.6044574150749697e-05, "loss": 0.8579, "step": 3514 }, { "epoch": 0.3147350159494096, "grad_norm": 0.8974535444863458, "learning_rate": 1.604226341799959e-05, "loss": 0.7873, "step": 3515 }, { "epoch": 0.31482455649448765, "grad_norm": 0.9748587500694583, "learning_rate": 1.603995217699398e-05, "loss": 0.8428, "step": 3516 }, { "epoch": 0.31491409703956574, "grad_norm": 0.9484601390168033, "learning_rate": 1.6037640427927272e-05, "loss": 0.8933, "step": 3517 }, { "epoch": 0.3150036375846438, "grad_norm": 0.9326034565771101, "learning_rate": 1.6035328170993928e-05, "loss": 0.8811, "step": 3518 }, { "epoch": 0.3150931781297219, "grad_norm": 1.122842311282288, "learning_rate": 1.6033015406388442e-05, "loss": 0.9004, "step": 3519 }, { "epoch": 0.3151827186747999, "grad_norm": 0.914674073279445, "learning_rate": 1.603070213430536e-05, "loss": 0.8891, "step": 3520 }, { "epoch": 0.315272259219878, "grad_norm": 1.01870486409183, "learning_rate": 1.602838835493927e-05, "loss": 0.8493, "step": 3521 }, { "epoch": 0.3153617997649561, "grad_norm": 0.9358650050328511, "learning_rate": 1.6026074068484794e-05, "loss": 0.8028, "step": 3522 }, { "epoch": 0.31545134031003413, "grad_norm": 0.9768410024986681, "learning_rate": 1.6023759275136605e-05, "loss": 0.822, "step": 3523 }, { "epoch": 0.3155408808551122, "grad_norm": 0.8508494016505629, "learning_rate": 1.6021443975089417e-05, "loss": 0.8848, "step": 3524 }, { "epoch": 0.31563042140019026, "grad_norm": 0.9943791218580922, "learning_rate": 1.601912816853798e-05, "loss": 0.9129, "step": 3525 }, { "epoch": 0.31571996194526836, "grad_norm": 0.8702139822027839, "learning_rate": 1.60168118556771e-05, "loss": 0.8538, "step": 3526 }, { "epoch": 0.3158095024903464, "grad_norm": 0.9749660282212825, "learning_rate": 1.6014495036701613e-05, "loss": 0.8836, "step": 3527 }, { "epoch": 0.3158990430354245, "grad_norm": 0.9245308102039224, "learning_rate": 1.6012177711806403e-05, "loss": 0.8642, "step": 3528 }, { "epoch": 0.3159885835805025, "grad_norm": 1.027412622282684, "learning_rate": 1.6009859881186395e-05, "loss": 0.933, "step": 3529 }, { "epoch": 0.3160781241255806, "grad_norm": 0.8696898481773648, "learning_rate": 1.6007541545036558e-05, "loss": 0.814, "step": 3530 }, { "epoch": 0.3161676646706587, "grad_norm": 0.9797723255521389, "learning_rate": 1.6005222703551902e-05, "loss": 0.9227, "step": 3531 }, { "epoch": 0.31625720521573675, "grad_norm": 0.8750169194670149, "learning_rate": 1.6002903356927487e-05, "loss": 0.8595, "step": 3532 }, { "epoch": 0.31634674576081484, "grad_norm": 0.9161886209008, "learning_rate": 1.6000583505358397e-05, "loss": 0.8625, "step": 3533 }, { "epoch": 0.3164362863058929, "grad_norm": 0.9616566569002498, "learning_rate": 1.5998263149039778e-05, "loss": 0.92, "step": 3534 }, { "epoch": 0.31652582685097097, "grad_norm": 0.935846953925903, "learning_rate": 1.599594228816681e-05, "loss": 0.9136, "step": 3535 }, { "epoch": 0.316615367396049, "grad_norm": 0.9713035539621584, "learning_rate": 1.5993620922934716e-05, "loss": 0.857, "step": 3536 }, { "epoch": 0.3167049079411271, "grad_norm": 0.8505154489010355, "learning_rate": 1.599129905353876e-05, "loss": 0.8814, "step": 3537 }, { "epoch": 0.31679444848620514, "grad_norm": 0.8925383585190358, "learning_rate": 1.5988976680174257e-05, "loss": 0.8343, "step": 3538 }, { "epoch": 0.31688398903128323, "grad_norm": 0.9489930095399801, "learning_rate": 1.5986653803036544e-05, "loss": 0.8838, "step": 3539 }, { "epoch": 0.3169735295763613, "grad_norm": 1.020732977407151, "learning_rate": 1.598433042232103e-05, "loss": 0.8464, "step": 3540 }, { "epoch": 0.31706307012143936, "grad_norm": 0.9124141936440842, "learning_rate": 1.5982006538223136e-05, "loss": 0.8875, "step": 3541 }, { "epoch": 0.31715261066651745, "grad_norm": 0.9635543852592855, "learning_rate": 1.5979682150938343e-05, "loss": 0.8652, "step": 3542 }, { "epoch": 0.3172421512115955, "grad_norm": 0.9290033642681926, "learning_rate": 1.597735726066218e-05, "loss": 0.7978, "step": 3543 }, { "epoch": 0.3173316917566736, "grad_norm": 0.9349539303385449, "learning_rate": 1.59750318675902e-05, "loss": 0.8477, "step": 3544 }, { "epoch": 0.3174212323017516, "grad_norm": 0.9855831692658552, "learning_rate": 1.5972705971918012e-05, "loss": 0.8757, "step": 3545 }, { "epoch": 0.3175107728468297, "grad_norm": 0.8786012984894488, "learning_rate": 1.597037957384126e-05, "loss": 0.8732, "step": 3546 }, { "epoch": 0.31760031339190775, "grad_norm": 0.9698153538446342, "learning_rate": 1.5968052673555632e-05, "loss": 0.816, "step": 3547 }, { "epoch": 0.31768985393698584, "grad_norm": 0.9173539829192748, "learning_rate": 1.5965725271256864e-05, "loss": 0.8858, "step": 3548 }, { "epoch": 0.31777939448206394, "grad_norm": 0.97370645288233, "learning_rate": 1.5963397367140724e-05, "loss": 0.8764, "step": 3549 }, { "epoch": 0.317868935027142, "grad_norm": 0.864031651030753, "learning_rate": 1.5961068961403033e-05, "loss": 0.8322, "step": 3550 }, { "epoch": 0.31795847557222007, "grad_norm": 0.9107709531188253, "learning_rate": 1.5958740054239643e-05, "loss": 0.9138, "step": 3551 }, { "epoch": 0.3180480161172981, "grad_norm": 1.1097718267394971, "learning_rate": 1.595641064584646e-05, "loss": 0.8755, "step": 3552 }, { "epoch": 0.3181375566623762, "grad_norm": 1.0110341934662181, "learning_rate": 1.5954080736419425e-05, "loss": 0.8707, "step": 3553 }, { "epoch": 0.31822709720745423, "grad_norm": 1.0214249900325971, "learning_rate": 1.5951750326154517e-05, "loss": 0.8617, "step": 3554 }, { "epoch": 0.3183166377525323, "grad_norm": 0.8283023031458524, "learning_rate": 1.5949419415247767e-05, "loss": 0.8465, "step": 3555 }, { "epoch": 0.31840617829761036, "grad_norm": 1.0258574500585769, "learning_rate": 1.594708800389525e-05, "loss": 0.7986, "step": 3556 }, { "epoch": 0.31849571884268846, "grad_norm": 0.8966298464584821, "learning_rate": 1.5944756092293062e-05, "loss": 0.8257, "step": 3557 }, { "epoch": 0.31858525938776655, "grad_norm": 0.9306855140791443, "learning_rate": 1.5942423680637368e-05, "loss": 0.8881, "step": 3558 }, { "epoch": 0.3186747999328446, "grad_norm": 1.0708481517779538, "learning_rate": 1.5940090769124357e-05, "loss": 0.8391, "step": 3559 }, { "epoch": 0.3187643404779227, "grad_norm": 1.2900298195717073, "learning_rate": 1.5937757357950266e-05, "loss": 0.9022, "step": 3560 }, { "epoch": 0.3188538810230007, "grad_norm": 0.9029206559905374, "learning_rate": 1.5935423447311377e-05, "loss": 0.8541, "step": 3561 }, { "epoch": 0.3189434215680788, "grad_norm": 0.9774341117370221, "learning_rate": 1.593308903740401e-05, "loss": 0.8507, "step": 3562 }, { "epoch": 0.31903296211315685, "grad_norm": 1.0067474092359003, "learning_rate": 1.5930754128424527e-05, "loss": 0.8563, "step": 3563 }, { "epoch": 0.31912250265823494, "grad_norm": 0.9383787999477323, "learning_rate": 1.5928418720569333e-05, "loss": 0.8932, "step": 3564 }, { "epoch": 0.319212043203313, "grad_norm": 1.03813093061488, "learning_rate": 1.5926082814034875e-05, "loss": 0.9109, "step": 3565 }, { "epoch": 0.31930158374839107, "grad_norm": 1.1467969563531681, "learning_rate": 1.5923746409017642e-05, "loss": 0.846, "step": 3566 }, { "epoch": 0.31939112429346916, "grad_norm": 0.9972992927564741, "learning_rate": 1.5921409505714165e-05, "loss": 0.7978, "step": 3567 }, { "epoch": 0.3194806648385472, "grad_norm": 1.1015570006544695, "learning_rate": 1.591907210432102e-05, "loss": 0.8624, "step": 3568 }, { "epoch": 0.3195702053836253, "grad_norm": 0.9711106023435543, "learning_rate": 1.591673420503481e-05, "loss": 0.8706, "step": 3569 }, { "epoch": 0.31965974592870333, "grad_norm": 1.0271826440338592, "learning_rate": 1.5914395808052207e-05, "loss": 0.9213, "step": 3570 }, { "epoch": 0.3197492864737814, "grad_norm": 1.035025036565157, "learning_rate": 1.59120569135699e-05, "loss": 0.9111, "step": 3571 }, { "epoch": 0.31983882701885946, "grad_norm": 0.9130146026195757, "learning_rate": 1.590971752178463e-05, "loss": 0.8692, "step": 3572 }, { "epoch": 0.31992836756393755, "grad_norm": 0.8753357899151957, "learning_rate": 1.590737763289318e-05, "loss": 0.8893, "step": 3573 }, { "epoch": 0.3200179081090156, "grad_norm": 0.9186210597382566, "learning_rate": 1.5905037247092374e-05, "loss": 0.8879, "step": 3574 }, { "epoch": 0.3201074486540937, "grad_norm": 0.9216688202659082, "learning_rate": 1.590269636457908e-05, "loss": 0.8324, "step": 3575 }, { "epoch": 0.3201969891991718, "grad_norm": 1.0384590831013387, "learning_rate": 1.59003549855502e-05, "loss": 0.8251, "step": 3576 }, { "epoch": 0.3202865297442498, "grad_norm": 0.9179907825399464, "learning_rate": 1.5898013110202684e-05, "loss": 0.8322, "step": 3577 }, { "epoch": 0.3203760702893279, "grad_norm": 0.9727147976061713, "learning_rate": 1.589567073873353e-05, "loss": 0.9224, "step": 3578 }, { "epoch": 0.32046561083440595, "grad_norm": 1.0736353696288954, "learning_rate": 1.589332787133976e-05, "loss": 0.8207, "step": 3579 }, { "epoch": 0.32055515137948404, "grad_norm": 0.9303985515634942, "learning_rate": 1.589098450821846e-05, "loss": 0.9148, "step": 3580 }, { "epoch": 0.3206446919245621, "grad_norm": 0.8916387034799939, "learning_rate": 1.588864064956674e-05, "loss": 0.8275, "step": 3581 }, { "epoch": 0.32073423246964017, "grad_norm": 0.9500591067831453, "learning_rate": 1.5886296295581752e-05, "loss": 0.8486, "step": 3582 }, { "epoch": 0.3208237730147182, "grad_norm": 1.0141093979458402, "learning_rate": 1.5883951446460707e-05, "loss": 0.8916, "step": 3583 }, { "epoch": 0.3209133135597963, "grad_norm": 0.956192812681069, "learning_rate": 1.588160610240084e-05, "loss": 0.8498, "step": 3584 }, { "epoch": 0.3210028541048744, "grad_norm": 0.9102389731818618, "learning_rate": 1.587926026359943e-05, "loss": 0.8565, "step": 3585 }, { "epoch": 0.32109239464995243, "grad_norm": 1.0205222005341894, "learning_rate": 1.587691393025381e-05, "loss": 0.8315, "step": 3586 }, { "epoch": 0.3211819351950305, "grad_norm": 0.9907235447065051, "learning_rate": 1.5874567102561336e-05, "loss": 0.848, "step": 3587 }, { "epoch": 0.32127147574010856, "grad_norm": 0.8879169629226085, "learning_rate": 1.5872219780719428e-05, "loss": 0.8263, "step": 3588 }, { "epoch": 0.32136101628518665, "grad_norm": 0.8801380073048135, "learning_rate": 1.5869871964925523e-05, "loss": 0.8449, "step": 3589 }, { "epoch": 0.3214505568302647, "grad_norm": 0.9816475732305109, "learning_rate": 1.5867523655377116e-05, "loss": 0.8584, "step": 3590 }, { "epoch": 0.3215400973753428, "grad_norm": 1.0090436799412859, "learning_rate": 1.5865174852271742e-05, "loss": 0.8674, "step": 3591 }, { "epoch": 0.3216296379204208, "grad_norm": 1.0368188817338386, "learning_rate": 1.5862825555806972e-05, "loss": 0.8615, "step": 3592 }, { "epoch": 0.3217191784654989, "grad_norm": 1.0561364274093863, "learning_rate": 1.586047576618042e-05, "loss": 0.8596, "step": 3593 }, { "epoch": 0.321808719010577, "grad_norm": 0.9232289144563027, "learning_rate": 1.5858125483589743e-05, "loss": 0.8779, "step": 3594 }, { "epoch": 0.32189825955565504, "grad_norm": 0.8947961456163048, "learning_rate": 1.5855774708232644e-05, "loss": 0.8982, "step": 3595 }, { "epoch": 0.32198780010073313, "grad_norm": 0.9500325962339439, "learning_rate": 1.5853423440306858e-05, "loss": 0.8244, "step": 3596 }, { "epoch": 0.32207734064581117, "grad_norm": 0.9470003193027838, "learning_rate": 1.5851071680010165e-05, "loss": 0.8721, "step": 3597 }, { "epoch": 0.32216688119088926, "grad_norm": 1.1659575903612587, "learning_rate": 1.584871942754039e-05, "loss": 0.8513, "step": 3598 }, { "epoch": 0.3222564217359673, "grad_norm": 1.0506990655518231, "learning_rate": 1.5846366683095394e-05, "loss": 0.873, "step": 3599 }, { "epoch": 0.3223459622810454, "grad_norm": 0.8366654532631679, "learning_rate": 1.5844013446873087e-05, "loss": 0.8369, "step": 3600 }, { "epoch": 0.32243550282612343, "grad_norm": 0.8702368526793697, "learning_rate": 1.584165971907141e-05, "loss": 0.8761, "step": 3601 }, { "epoch": 0.3225250433712015, "grad_norm": 0.9589636439059769, "learning_rate": 1.5839305499888355e-05, "loss": 0.8524, "step": 3602 }, { "epoch": 0.3226145839162796, "grad_norm": 0.8808853090698026, "learning_rate": 1.5836950789521952e-05, "loss": 0.8669, "step": 3603 }, { "epoch": 0.32270412446135766, "grad_norm": 0.9127365606072017, "learning_rate": 1.583459558817027e-05, "loss": 0.8412, "step": 3604 }, { "epoch": 0.32279366500643575, "grad_norm": 1.0161410372567297, "learning_rate": 1.5832239896031415e-05, "loss": 0.8593, "step": 3605 }, { "epoch": 0.3228832055515138, "grad_norm": 0.9521009589524897, "learning_rate": 1.5829883713303547e-05, "loss": 0.862, "step": 3606 }, { "epoch": 0.3229727460965919, "grad_norm": 0.9233511338982374, "learning_rate": 1.5827527040184864e-05, "loss": 0.8509, "step": 3607 }, { "epoch": 0.3230622866416699, "grad_norm": 0.9223131453171549, "learning_rate": 1.582516987687359e-05, "loss": 0.876, "step": 3608 }, { "epoch": 0.323151827186748, "grad_norm": 1.0361683951732445, "learning_rate": 1.5822812223568014e-05, "loss": 0.8554, "step": 3609 }, { "epoch": 0.32324136773182605, "grad_norm": 0.8869414599092794, "learning_rate": 1.5820454080466446e-05, "loss": 0.8378, "step": 3610 }, { "epoch": 0.32333090827690414, "grad_norm": 0.9589668484237807, "learning_rate": 1.581809544776725e-05, "loss": 0.833, "step": 3611 }, { "epoch": 0.32342044882198223, "grad_norm": 1.0014261328693184, "learning_rate": 1.581573632566882e-05, "loss": 0.8555, "step": 3612 }, { "epoch": 0.32350998936706027, "grad_norm": 0.927480464984033, "learning_rate": 1.5813376714369605e-05, "loss": 0.8083, "step": 3613 }, { "epoch": 0.32359952991213836, "grad_norm": 0.9068144629186506, "learning_rate": 1.5811016614068084e-05, "loss": 0.8586, "step": 3614 }, { "epoch": 0.3236890704572164, "grad_norm": 0.9305728096425411, "learning_rate": 1.5808656024962782e-05, "loss": 0.8883, "step": 3615 }, { "epoch": 0.3237786110022945, "grad_norm": 0.8993854827916744, "learning_rate": 1.5806294947252264e-05, "loss": 0.8359, "step": 3616 }, { "epoch": 0.32386815154737253, "grad_norm": 0.9022886733965982, "learning_rate": 1.5803933381135136e-05, "loss": 0.8868, "step": 3617 }, { "epoch": 0.3239576920924506, "grad_norm": 0.9662876173454276, "learning_rate": 1.5801571326810046e-05, "loss": 0.8468, "step": 3618 }, { "epoch": 0.32404723263752866, "grad_norm": 0.8634980823650235, "learning_rate": 1.5799208784475683e-05, "loss": 0.8909, "step": 3619 }, { "epoch": 0.32413677318260675, "grad_norm": 0.8760839141388197, "learning_rate": 1.5796845754330772e-05, "loss": 0.8605, "step": 3620 }, { "epoch": 0.32422631372768485, "grad_norm": 0.9690571081398535, "learning_rate": 1.5794482236574083e-05, "loss": 0.9117, "step": 3621 }, { "epoch": 0.3243158542727629, "grad_norm": 0.8794280041663629, "learning_rate": 1.5792118231404438e-05, "loss": 0.8784, "step": 3622 }, { "epoch": 0.324405394817841, "grad_norm": 0.9284514992633226, "learning_rate": 1.578975373902068e-05, "loss": 0.8812, "step": 3623 }, { "epoch": 0.324494935362919, "grad_norm": 1.0304635493841716, "learning_rate": 1.5787388759621703e-05, "loss": 0.8704, "step": 3624 }, { "epoch": 0.3245844759079971, "grad_norm": 0.9614529455593678, "learning_rate": 1.5785023293406445e-05, "loss": 0.8854, "step": 3625 }, { "epoch": 0.32467401645307514, "grad_norm": 0.9689922971077647, "learning_rate": 1.5782657340573875e-05, "loss": 0.9054, "step": 3626 }, { "epoch": 0.32476355699815324, "grad_norm": 0.9543309059230699, "learning_rate": 1.5780290901323017e-05, "loss": 0.8522, "step": 3627 }, { "epoch": 0.3248530975432313, "grad_norm": 1.3114182392384317, "learning_rate": 1.5777923975852926e-05, "loss": 0.8984, "step": 3628 }, { "epoch": 0.32494263808830937, "grad_norm": 0.8892923116981535, "learning_rate": 1.5775556564362696e-05, "loss": 0.8151, "step": 3629 }, { "epoch": 0.3250321786333874, "grad_norm": 0.9914580392729705, "learning_rate": 1.577318866705147e-05, "loss": 0.8563, "step": 3630 }, { "epoch": 0.3251217191784655, "grad_norm": 0.8528123291944163, "learning_rate": 1.5770820284118425e-05, "loss": 0.7794, "step": 3631 }, { "epoch": 0.3252112597235436, "grad_norm": 0.8788062646237809, "learning_rate": 1.5768451415762784e-05, "loss": 0.827, "step": 3632 }, { "epoch": 0.3253008002686216, "grad_norm": 0.9459152611388139, "learning_rate": 1.5766082062183808e-05, "loss": 0.8192, "step": 3633 }, { "epoch": 0.3253903408136997, "grad_norm": 1.1430681007791503, "learning_rate": 1.5763712223580796e-05, "loss": 0.8922, "step": 3634 }, { "epoch": 0.32547988135877776, "grad_norm": 1.1687986959245797, "learning_rate": 1.5761341900153094e-05, "loss": 0.906, "step": 3635 }, { "epoch": 0.32556942190385585, "grad_norm": 0.8768524924153864, "learning_rate": 1.575897109210009e-05, "loss": 0.8753, "step": 3636 }, { "epoch": 0.3256589624489339, "grad_norm": 0.9597904507300347, "learning_rate": 1.5756599799621204e-05, "loss": 0.8822, "step": 3637 }, { "epoch": 0.325748502994012, "grad_norm": 0.8746253956284374, "learning_rate": 1.5754228022915903e-05, "loss": 0.8364, "step": 3638 }, { "epoch": 0.32583804353909, "grad_norm": 0.9219672188959224, "learning_rate": 1.5751855762183686e-05, "loss": 0.8416, "step": 3639 }, { "epoch": 0.3259275840841681, "grad_norm": 1.0526968315843568, "learning_rate": 1.5749483017624112e-05, "loss": 0.8332, "step": 3640 }, { "epoch": 0.3260171246292462, "grad_norm": 0.9146187868479909, "learning_rate": 1.5747109789436762e-05, "loss": 0.8471, "step": 3641 }, { "epoch": 0.32610666517432424, "grad_norm": 0.9427175798447344, "learning_rate": 1.5744736077821257e-05, "loss": 0.855, "step": 3642 }, { "epoch": 0.32619620571940233, "grad_norm": 0.935223314761867, "learning_rate": 1.5742361882977282e-05, "loss": 0.8169, "step": 3643 }, { "epoch": 0.32628574626448037, "grad_norm": 0.9900847981136247, "learning_rate": 1.5739987205104535e-05, "loss": 0.8949, "step": 3644 }, { "epoch": 0.32637528680955846, "grad_norm": 0.9391341896833509, "learning_rate": 1.573761204440277e-05, "loss": 0.8229, "step": 3645 }, { "epoch": 0.3264648273546365, "grad_norm": 1.1258803425471366, "learning_rate": 1.5735236401071778e-05, "loss": 0.8404, "step": 3646 }, { "epoch": 0.3265543678997146, "grad_norm": 0.9773098058488157, "learning_rate": 1.5732860275311387e-05, "loss": 0.87, "step": 3647 }, { "epoch": 0.32664390844479263, "grad_norm": 1.0253147978242285, "learning_rate": 1.573048366732147e-05, "loss": 0.8187, "step": 3648 }, { "epoch": 0.3267334489898707, "grad_norm": 0.9680557857369011, "learning_rate": 1.5728106577301945e-05, "loss": 0.8373, "step": 3649 }, { "epoch": 0.3268229895349488, "grad_norm": 0.9595440163983757, "learning_rate": 1.5725729005452758e-05, "loss": 0.9162, "step": 3650 }, { "epoch": 0.32691253008002685, "grad_norm": 0.9608819872708337, "learning_rate": 1.5723350951973905e-05, "loss": 0.8752, "step": 3651 }, { "epoch": 0.32700207062510495, "grad_norm": 0.9745222880738017, "learning_rate": 1.5720972417065424e-05, "loss": 0.8789, "step": 3652 }, { "epoch": 0.327091611170183, "grad_norm": 1.0002507804909697, "learning_rate": 1.5718593400927385e-05, "loss": 0.8433, "step": 3653 }, { "epoch": 0.3271811517152611, "grad_norm": 0.9360896822621942, "learning_rate": 1.5716213903759902e-05, "loss": 0.9099, "step": 3654 }, { "epoch": 0.3272706922603391, "grad_norm": 1.0009490756621653, "learning_rate": 1.5713833925763137e-05, "loss": 0.8993, "step": 3655 }, { "epoch": 0.3273602328054172, "grad_norm": 0.9973717391791578, "learning_rate": 1.5711453467137276e-05, "loss": 0.8655, "step": 3656 }, { "epoch": 0.32744977335049524, "grad_norm": 0.8976154387949733, "learning_rate": 1.5709072528082567e-05, "loss": 0.8458, "step": 3657 }, { "epoch": 0.32753931389557334, "grad_norm": 0.9019576717589594, "learning_rate": 1.5706691108799277e-05, "loss": 0.8573, "step": 3658 }, { "epoch": 0.32762885444065143, "grad_norm": 0.945092574554595, "learning_rate": 1.570430920948773e-05, "loss": 0.8825, "step": 3659 }, { "epoch": 0.32771839498572947, "grad_norm": 0.9191838558943229, "learning_rate": 1.570192683034828e-05, "loss": 0.8675, "step": 3660 }, { "epoch": 0.32780793553080756, "grad_norm": 0.8866495700893718, "learning_rate": 1.5699543971581324e-05, "loss": 0.889, "step": 3661 }, { "epoch": 0.3278974760758856, "grad_norm": 1.0356205395855966, "learning_rate": 1.5697160633387304e-05, "loss": 0.8718, "step": 3662 }, { "epoch": 0.3279870166209637, "grad_norm": 0.9485569645193066, "learning_rate": 1.5694776815966698e-05, "loss": 0.799, "step": 3663 }, { "epoch": 0.3280765571660417, "grad_norm": 1.1097511370801718, "learning_rate": 1.5692392519520022e-05, "loss": 0.8737, "step": 3664 }, { "epoch": 0.3281660977111198, "grad_norm": 1.0609603172803252, "learning_rate": 1.569000774424784e-05, "loss": 0.8222, "step": 3665 }, { "epoch": 0.32825563825619786, "grad_norm": 1.0242180777562753, "learning_rate": 1.5687622490350743e-05, "loss": 0.8781, "step": 3666 }, { "epoch": 0.32834517880127595, "grad_norm": 0.9346930443834084, "learning_rate": 1.5685236758029383e-05, "loss": 0.8944, "step": 3667 }, { "epoch": 0.32843471934635404, "grad_norm": 1.0364674046752969, "learning_rate": 1.5682850547484433e-05, "loss": 0.8975, "step": 3668 }, { "epoch": 0.3285242598914321, "grad_norm": 0.9236252124603934, "learning_rate": 1.5680463858916608e-05, "loss": 0.818, "step": 3669 }, { "epoch": 0.3286138004365102, "grad_norm": 1.0180194484332112, "learning_rate": 1.567807669252668e-05, "loss": 0.8812, "step": 3670 }, { "epoch": 0.3287033409815882, "grad_norm": 0.8536268704647232, "learning_rate": 1.5675689048515438e-05, "loss": 0.8739, "step": 3671 }, { "epoch": 0.3287928815266663, "grad_norm": 0.9625526289732876, "learning_rate": 1.5673300927083732e-05, "loss": 0.8319, "step": 3672 }, { "epoch": 0.32888242207174434, "grad_norm": 0.9420534141712468, "learning_rate": 1.567091232843244e-05, "loss": 0.8314, "step": 3673 }, { "epoch": 0.32897196261682243, "grad_norm": 0.9807560107732121, "learning_rate": 1.5668523252762482e-05, "loss": 0.9363, "step": 3674 }, { "epoch": 0.32906150316190047, "grad_norm": 0.9417233166381623, "learning_rate": 1.566613370027482e-05, "loss": 0.8818, "step": 3675 }, { "epoch": 0.32915104370697856, "grad_norm": 0.9456448250219259, "learning_rate": 1.5663743671170454e-05, "loss": 0.7935, "step": 3676 }, { "epoch": 0.32924058425205666, "grad_norm": 0.9817787621694393, "learning_rate": 1.566135316565043e-05, "loss": 0.8664, "step": 3677 }, { "epoch": 0.3293301247971347, "grad_norm": 0.9158494247948236, "learning_rate": 1.5658962183915823e-05, "loss": 0.8618, "step": 3678 }, { "epoch": 0.3294196653422128, "grad_norm": 0.9608444008780501, "learning_rate": 1.5656570726167763e-05, "loss": 0.9373, "step": 3679 }, { "epoch": 0.3295092058872908, "grad_norm": 0.8816596462490449, "learning_rate": 1.56541787926074e-05, "loss": 0.8445, "step": 3680 }, { "epoch": 0.3295987464323689, "grad_norm": 0.9505472058799227, "learning_rate": 1.5651786383435945e-05, "loss": 0.8939, "step": 3681 }, { "epoch": 0.32968828697744695, "grad_norm": 0.9017922955167217, "learning_rate": 1.5649393498854637e-05, "loss": 0.8409, "step": 3682 }, { "epoch": 0.32977782752252505, "grad_norm": 1.2141596734855504, "learning_rate": 1.564700013906476e-05, "loss": 0.8523, "step": 3683 }, { "epoch": 0.3298673680676031, "grad_norm": 0.8983192060791162, "learning_rate": 1.564460630426763e-05, "loss": 0.801, "step": 3684 }, { "epoch": 0.3299569086126812, "grad_norm": 0.958042280532552, "learning_rate": 1.5642211994664614e-05, "loss": 0.826, "step": 3685 }, { "epoch": 0.33004644915775927, "grad_norm": 0.9046980479715719, "learning_rate": 1.5639817210457108e-05, "loss": 0.8361, "step": 3686 }, { "epoch": 0.3301359897028373, "grad_norm": 1.1548514754331594, "learning_rate": 1.563742195184656e-05, "loss": 0.8516, "step": 3687 }, { "epoch": 0.3302255302479154, "grad_norm": 0.9284305644578263, "learning_rate": 1.5635026219034446e-05, "loss": 0.8435, "step": 3688 }, { "epoch": 0.33031507079299344, "grad_norm": 0.9451406310583375, "learning_rate": 1.563263001222229e-05, "loss": 0.8211, "step": 3689 }, { "epoch": 0.33040461133807153, "grad_norm": 0.9703726326225346, "learning_rate": 1.5630233331611656e-05, "loss": 0.8509, "step": 3690 }, { "epoch": 0.33049415188314957, "grad_norm": 0.9066613904628877, "learning_rate": 1.5627836177404137e-05, "loss": 0.9098, "step": 3691 }, { "epoch": 0.33058369242822766, "grad_norm": 0.9046250797759379, "learning_rate": 1.5625438549801377e-05, "loss": 0.8264, "step": 3692 }, { "epoch": 0.3306732329733057, "grad_norm": 0.953880747872513, "learning_rate": 1.5623040449005063e-05, "loss": 0.8717, "step": 3693 }, { "epoch": 0.3307627735183838, "grad_norm": 0.9589971412249996, "learning_rate": 1.5620641875216908e-05, "loss": 0.8818, "step": 3694 }, { "epoch": 0.3308523140634619, "grad_norm": 0.8695905110945504, "learning_rate": 1.5618242828638672e-05, "loss": 0.8275, "step": 3695 }, { "epoch": 0.3309418546085399, "grad_norm": 1.0182129307099206, "learning_rate": 1.5615843309472162e-05, "loss": 0.8756, "step": 3696 }, { "epoch": 0.331031395153618, "grad_norm": 0.9418077565038715, "learning_rate": 1.5613443317919207e-05, "loss": 0.8403, "step": 3697 }, { "epoch": 0.33112093569869605, "grad_norm": 0.9514319438492587, "learning_rate": 1.56110428541817e-05, "loss": 0.8958, "step": 3698 }, { "epoch": 0.33121047624377414, "grad_norm": 1.2269311557315061, "learning_rate": 1.5608641918461545e-05, "loss": 0.8293, "step": 3699 }, { "epoch": 0.3313000167888522, "grad_norm": 1.0245858604786116, "learning_rate": 1.5606240510960715e-05, "loss": 0.8063, "step": 3700 }, { "epoch": 0.3313895573339303, "grad_norm": 1.1365608616917942, "learning_rate": 1.56038386318812e-05, "loss": 0.8037, "step": 3701 }, { "epoch": 0.3314790978790083, "grad_norm": 1.0118970539372367, "learning_rate": 1.560143628142504e-05, "loss": 0.8572, "step": 3702 }, { "epoch": 0.3315686384240864, "grad_norm": 1.0706775076143242, "learning_rate": 1.5599033459794317e-05, "loss": 0.85, "step": 3703 }, { "epoch": 0.3316581789691645, "grad_norm": 1.080742392291737, "learning_rate": 1.559663016719114e-05, "loss": 0.8303, "step": 3704 }, { "epoch": 0.33174771951424253, "grad_norm": 0.9085441099897575, "learning_rate": 1.5594226403817674e-05, "loss": 0.8134, "step": 3705 }, { "epoch": 0.3318372600593206, "grad_norm": 0.9314097035205126, "learning_rate": 1.5591822169876116e-05, "loss": 0.854, "step": 3706 }, { "epoch": 0.33192680060439866, "grad_norm": 0.8923386938942766, "learning_rate": 1.558941746556869e-05, "loss": 0.8318, "step": 3707 }, { "epoch": 0.33201634114947676, "grad_norm": 0.9018864916936062, "learning_rate": 1.5587012291097686e-05, "loss": 0.8701, "step": 3708 }, { "epoch": 0.3321058816945548, "grad_norm": 0.9327631315906499, "learning_rate": 1.5584606646665416e-05, "loss": 0.9287, "step": 3709 }, { "epoch": 0.3321954222396329, "grad_norm": 1.0559700180665932, "learning_rate": 1.5582200532474233e-05, "loss": 0.8343, "step": 3710 }, { "epoch": 0.3322849627847109, "grad_norm": 0.9091869066086047, "learning_rate": 1.557979394872653e-05, "loss": 0.8434, "step": 3711 }, { "epoch": 0.332374503329789, "grad_norm": 0.9686305962665044, "learning_rate": 1.5577386895624743e-05, "loss": 0.8193, "step": 3712 }, { "epoch": 0.3324640438748671, "grad_norm": 0.9950069532982954, "learning_rate": 1.557497937337135e-05, "loss": 0.8779, "step": 3713 }, { "epoch": 0.33255358441994515, "grad_norm": 1.0106282485530667, "learning_rate": 1.5572571382168853e-05, "loss": 0.9211, "step": 3714 }, { "epoch": 0.33264312496502324, "grad_norm": 1.1747121137378411, "learning_rate": 1.5570162922219815e-05, "loss": 0.8116, "step": 3715 }, { "epoch": 0.3327326655101013, "grad_norm": 0.9011452497357115, "learning_rate": 1.556775399372682e-05, "loss": 0.7967, "step": 3716 }, { "epoch": 0.33282220605517937, "grad_norm": 0.8806567559699375, "learning_rate": 1.5565344596892505e-05, "loss": 0.8503, "step": 3717 }, { "epoch": 0.3329117466002574, "grad_norm": 0.8551702542947183, "learning_rate": 1.556293473191954e-05, "loss": 0.8816, "step": 3718 }, { "epoch": 0.3330012871453355, "grad_norm": 0.82463751371181, "learning_rate": 1.556052439901063e-05, "loss": 0.8636, "step": 3719 }, { "epoch": 0.33309082769041354, "grad_norm": 1.0238240603108388, "learning_rate": 1.555811359836853e-05, "loss": 0.8705, "step": 3720 }, { "epoch": 0.33318036823549163, "grad_norm": 0.9269602907071586, "learning_rate": 1.5555702330196024e-05, "loss": 0.8832, "step": 3721 }, { "epoch": 0.3332699087805697, "grad_norm": 0.9038445810980802, "learning_rate": 1.5553290594695942e-05, "loss": 0.8478, "step": 3722 }, { "epoch": 0.33335944932564776, "grad_norm": 1.1059008595351187, "learning_rate": 1.5550878392071155e-05, "loss": 0.8891, "step": 3723 }, { "epoch": 0.33344898987072585, "grad_norm": 0.9516241392686325, "learning_rate": 1.5548465722524565e-05, "loss": 0.8403, "step": 3724 }, { "epoch": 0.3335385304158039, "grad_norm": 1.0058112975618558, "learning_rate": 1.5546052586259118e-05, "loss": 0.8249, "step": 3725 }, { "epoch": 0.333628070960882, "grad_norm": 1.1832204229415506, "learning_rate": 1.5543638983477804e-05, "loss": 0.8737, "step": 3726 }, { "epoch": 0.33371761150596, "grad_norm": 0.8906055364691648, "learning_rate": 1.554122491438364e-05, "loss": 0.8185, "step": 3727 }, { "epoch": 0.3338071520510381, "grad_norm": 0.9814521584592724, "learning_rate": 1.5538810379179694e-05, "loss": 0.8419, "step": 3728 }, { "epoch": 0.33389669259611615, "grad_norm": 1.0282048597644162, "learning_rate": 1.553639537806907e-05, "loss": 0.8568, "step": 3729 }, { "epoch": 0.33398623314119424, "grad_norm": 0.9057934455692483, "learning_rate": 1.5533979911254907e-05, "loss": 0.8205, "step": 3730 }, { "epoch": 0.33407577368627234, "grad_norm": 1.0727746560167528, "learning_rate": 1.5531563978940386e-05, "loss": 0.7874, "step": 3731 }, { "epoch": 0.3341653142313504, "grad_norm": 0.9769472122986363, "learning_rate": 1.552914758132873e-05, "loss": 0.865, "step": 3732 }, { "epoch": 0.33425485477642847, "grad_norm": 0.9522246829164233, "learning_rate": 1.5526730718623197e-05, "loss": 0.8955, "step": 3733 }, { "epoch": 0.3343443953215065, "grad_norm": 0.9774585857492755, "learning_rate": 1.5524313391027087e-05, "loss": 0.8164, "step": 3734 }, { "epoch": 0.3344339358665846, "grad_norm": 0.9372843261795946, "learning_rate": 1.5521895598743735e-05, "loss": 0.8858, "step": 3735 }, { "epoch": 0.33452347641166263, "grad_norm": 1.0109896203935234, "learning_rate": 1.551947734197652e-05, "loss": 0.8566, "step": 3736 }, { "epoch": 0.3346130169567407, "grad_norm": 1.15493879535932, "learning_rate": 1.551705862092886e-05, "loss": 0.848, "step": 3737 }, { "epoch": 0.33470255750181876, "grad_norm": 0.9203061676032983, "learning_rate": 1.5514639435804207e-05, "loss": 0.8229, "step": 3738 }, { "epoch": 0.33479209804689686, "grad_norm": 0.9807189993737404, "learning_rate": 1.551221978680605e-05, "loss": 0.9021, "step": 3739 }, { "epoch": 0.33488163859197495, "grad_norm": 1.1392174938310522, "learning_rate": 1.550979967413793e-05, "loss": 0.8701, "step": 3740 }, { "epoch": 0.334971179137053, "grad_norm": 0.8590881594439866, "learning_rate": 1.550737909800342e-05, "loss": 0.8687, "step": 3741 }, { "epoch": 0.3350607196821311, "grad_norm": 0.9090037832050619, "learning_rate": 1.5504958058606125e-05, "loss": 0.87, "step": 3742 }, { "epoch": 0.3351502602272091, "grad_norm": 1.0665061137261056, "learning_rate": 1.5502536556149695e-05, "loss": 0.7945, "step": 3743 }, { "epoch": 0.3352398007722872, "grad_norm": 1.222201922956993, "learning_rate": 1.5500114590837823e-05, "loss": 0.9085, "step": 3744 }, { "epoch": 0.33532934131736525, "grad_norm": 0.9590013934279815, "learning_rate": 1.5497692162874235e-05, "loss": 0.8418, "step": 3745 }, { "epoch": 0.33541888186244334, "grad_norm": 0.8867051267190553, "learning_rate": 1.5495269272462695e-05, "loss": 0.8045, "step": 3746 }, { "epoch": 0.3355084224075214, "grad_norm": 0.9160259233989024, "learning_rate": 1.5492845919807014e-05, "loss": 0.8447, "step": 3747 }, { "epoch": 0.33559796295259947, "grad_norm": 1.0292276087299104, "learning_rate": 1.5490422105111032e-05, "loss": 0.8388, "step": 3748 }, { "epoch": 0.33568750349767756, "grad_norm": 1.0766755207053724, "learning_rate": 1.5487997828578634e-05, "loss": 0.8838, "step": 3749 }, { "epoch": 0.3357770440427556, "grad_norm": 0.930822666624386, "learning_rate": 1.5485573090413746e-05, "loss": 0.8756, "step": 3750 }, { "epoch": 0.3358665845878337, "grad_norm": 1.013399877217967, "learning_rate": 1.548314789082032e-05, "loss": 0.8337, "step": 3751 }, { "epoch": 0.33595612513291173, "grad_norm": 1.0630448041184852, "learning_rate": 1.5480722230002367e-05, "loss": 0.9031, "step": 3752 }, { "epoch": 0.3360456656779898, "grad_norm": 1.050760655326706, "learning_rate": 1.5478296108163918e-05, "loss": 0.8863, "step": 3753 }, { "epoch": 0.33613520622306786, "grad_norm": 1.0290828846463238, "learning_rate": 1.547586952550905e-05, "loss": 0.8972, "step": 3754 }, { "epoch": 0.33622474676814595, "grad_norm": 0.8714750795387245, "learning_rate": 1.5473442482241883e-05, "loss": 0.8391, "step": 3755 }, { "epoch": 0.336314287313224, "grad_norm": 0.9152757476415232, "learning_rate": 1.5471014978566572e-05, "loss": 0.7885, "step": 3756 }, { "epoch": 0.3364038278583021, "grad_norm": 0.8537405892465879, "learning_rate": 1.5468587014687308e-05, "loss": 0.8505, "step": 3757 }, { "epoch": 0.3364933684033802, "grad_norm": 0.9662354341030726, "learning_rate": 1.5466158590808326e-05, "loss": 0.8411, "step": 3758 }, { "epoch": 0.3365829089484582, "grad_norm": 0.9908717423771062, "learning_rate": 1.5463729707133897e-05, "loss": 0.8364, "step": 3759 }, { "epoch": 0.3366724494935363, "grad_norm": 0.965234107078259, "learning_rate": 1.5461300363868326e-05, "loss": 0.8877, "step": 3760 }, { "epoch": 0.33676199003861434, "grad_norm": 0.892688975598758, "learning_rate": 1.5458870561215967e-05, "loss": 0.8814, "step": 3761 }, { "epoch": 0.33685153058369244, "grad_norm": 1.1265633413803315, "learning_rate": 1.545644029938121e-05, "loss": 0.8416, "step": 3762 }, { "epoch": 0.3369410711287705, "grad_norm": 1.1982058437476673, "learning_rate": 1.5454009578568466e-05, "loss": 0.8748, "step": 3763 }, { "epoch": 0.33703061167384857, "grad_norm": 0.8623490260410338, "learning_rate": 1.5451578398982218e-05, "loss": 0.8051, "step": 3764 }, { "epoch": 0.3371201522189266, "grad_norm": 0.9060696774249142, "learning_rate": 1.5449146760826955e-05, "loss": 0.8604, "step": 3765 }, { "epoch": 0.3372096927640047, "grad_norm": 0.9436553003179557, "learning_rate": 1.5446714664307226e-05, "loss": 0.8476, "step": 3766 }, { "epoch": 0.3372992333090828, "grad_norm": 0.9636474211173599, "learning_rate": 1.544428210962761e-05, "loss": 0.8115, "step": 3767 }, { "epoch": 0.33738877385416083, "grad_norm": 0.9141887151876072, "learning_rate": 1.544184909699272e-05, "loss": 0.8653, "step": 3768 }, { "epoch": 0.3374783143992389, "grad_norm": 0.9422262461309175, "learning_rate": 1.5439415626607223e-05, "loss": 0.8415, "step": 3769 }, { "epoch": 0.33756785494431696, "grad_norm": 0.9048275364385754, "learning_rate": 1.5436981698675807e-05, "loss": 0.8878, "step": 3770 }, { "epoch": 0.33765739548939505, "grad_norm": 0.995027655945389, "learning_rate": 1.5434547313403205e-05, "loss": 0.8496, "step": 3771 }, { "epoch": 0.3377469360344731, "grad_norm": 0.9069485808886016, "learning_rate": 1.5432112470994195e-05, "loss": 0.832, "step": 3772 }, { "epoch": 0.3378364765795512, "grad_norm": 1.0880520286335649, "learning_rate": 1.5429677171653584e-05, "loss": 0.879, "step": 3773 }, { "epoch": 0.3379260171246292, "grad_norm": 0.9330914187533275, "learning_rate": 1.542724141558622e-05, "loss": 0.8242, "step": 3774 }, { "epoch": 0.3380155576697073, "grad_norm": 0.9638798298354745, "learning_rate": 1.5424805202997e-05, "loss": 0.8447, "step": 3775 }, { "epoch": 0.3381050982147854, "grad_norm": 0.9114027554088435, "learning_rate": 1.5422368534090844e-05, "loss": 0.8415, "step": 3776 }, { "epoch": 0.33819463875986344, "grad_norm": 1.118946716761861, "learning_rate": 1.5419931409072715e-05, "loss": 0.8856, "step": 3777 }, { "epoch": 0.33828417930494153, "grad_norm": 0.888712856809204, "learning_rate": 1.5417493828147616e-05, "loss": 0.8402, "step": 3778 }, { "epoch": 0.33837371985001957, "grad_norm": 0.9558994540633312, "learning_rate": 1.541505579152059e-05, "loss": 0.8546, "step": 3779 }, { "epoch": 0.33846326039509766, "grad_norm": 0.9184775548791609, "learning_rate": 1.5412617299396723e-05, "loss": 0.8746, "step": 3780 }, { "epoch": 0.3385528009401757, "grad_norm": 0.8970495025990053, "learning_rate": 1.5410178351981123e-05, "loss": 0.8884, "step": 3781 }, { "epoch": 0.3386423414852538, "grad_norm": 0.9371371627641085, "learning_rate": 1.540773894947895e-05, "loss": 0.8829, "step": 3782 }, { "epoch": 0.33873188203033183, "grad_norm": 0.9476143064996031, "learning_rate": 1.54052990920954e-05, "loss": 0.86, "step": 3783 }, { "epoch": 0.3388214225754099, "grad_norm": 0.9498284392829476, "learning_rate": 1.54028587800357e-05, "loss": 0.8682, "step": 3784 }, { "epoch": 0.338910963120488, "grad_norm": 0.9498579907834941, "learning_rate": 1.540041801350513e-05, "loss": 0.8515, "step": 3785 }, { "epoch": 0.33900050366556606, "grad_norm": 0.8708035168199026, "learning_rate": 1.5397976792708992e-05, "loss": 0.8754, "step": 3786 }, { "epoch": 0.33909004421064415, "grad_norm": 1.0139393955611922, "learning_rate": 1.539553511785264e-05, "loss": 0.8798, "step": 3787 }, { "epoch": 0.3391795847557222, "grad_norm": 0.9473010375295183, "learning_rate": 1.539309298914146e-05, "loss": 0.9326, "step": 3788 }, { "epoch": 0.3392691253008003, "grad_norm": 1.0037784770821752, "learning_rate": 1.5390650406780862e-05, "loss": 0.8691, "step": 3789 }, { "epoch": 0.3393586658458783, "grad_norm": 0.90485127451467, "learning_rate": 1.5388207370976323e-05, "loss": 0.8444, "step": 3790 }, { "epoch": 0.3394482063909564, "grad_norm": 1.0129675195287833, "learning_rate": 1.538576388193334e-05, "loss": 0.8939, "step": 3791 }, { "epoch": 0.33953774693603445, "grad_norm": 0.8633996745150577, "learning_rate": 1.538331993985745e-05, "loss": 0.8255, "step": 3792 }, { "epoch": 0.33962728748111254, "grad_norm": 0.9041308623142068, "learning_rate": 1.5380875544954227e-05, "loss": 0.8578, "step": 3793 }, { "epoch": 0.33971682802619063, "grad_norm": 0.9554547345468418, "learning_rate": 1.537843069742929e-05, "loss": 0.8592, "step": 3794 }, { "epoch": 0.33980636857126867, "grad_norm": 0.9999572802320026, "learning_rate": 1.537598539748829e-05, "loss": 0.8905, "step": 3795 }, { "epoch": 0.33989590911634676, "grad_norm": 1.055529033762274, "learning_rate": 1.537353964533691e-05, "loss": 0.8969, "step": 3796 }, { "epoch": 0.3399854496614248, "grad_norm": 0.964714699315358, "learning_rate": 1.53710934411809e-05, "loss": 0.8462, "step": 3797 }, { "epoch": 0.3400749902065029, "grad_norm": 0.8540414406235691, "learning_rate": 1.5368646785225996e-05, "loss": 0.8565, "step": 3798 }, { "epoch": 0.34016453075158093, "grad_norm": 1.0773546335442135, "learning_rate": 1.536619967767803e-05, "loss": 0.8743, "step": 3799 }, { "epoch": 0.340254071296659, "grad_norm": 0.9334614161590481, "learning_rate": 1.536375211874283e-05, "loss": 0.8322, "step": 3800 }, { "epoch": 0.34034361184173706, "grad_norm": 0.9057358499027224, "learning_rate": 1.5361304108626282e-05, "loss": 0.8555, "step": 3801 }, { "epoch": 0.34043315238681515, "grad_norm": 0.8819048027022858, "learning_rate": 1.5358855647534306e-05, "loss": 0.808, "step": 3802 }, { "epoch": 0.34052269293189324, "grad_norm": 0.8488072954913346, "learning_rate": 1.5356406735672852e-05, "loss": 0.8341, "step": 3803 }, { "epoch": 0.3406122334769713, "grad_norm": 0.9898413074898648, "learning_rate": 1.535395737324792e-05, "loss": 0.8765, "step": 3804 }, { "epoch": 0.3407017740220494, "grad_norm": 1.0330843180762026, "learning_rate": 1.535150756046554e-05, "loss": 0.8396, "step": 3805 }, { "epoch": 0.3407913145671274, "grad_norm": 0.9476915491993404, "learning_rate": 1.5349057297531782e-05, "loss": 0.8583, "step": 3806 }, { "epoch": 0.3408808551122055, "grad_norm": 0.9192136044542062, "learning_rate": 1.5346606584652758e-05, "loss": 0.8985, "step": 3807 }, { "epoch": 0.34097039565728354, "grad_norm": 1.049577571971932, "learning_rate": 1.5344155422034608e-05, "loss": 0.8628, "step": 3808 }, { "epoch": 0.34105993620236164, "grad_norm": 0.862338895901733, "learning_rate": 1.5341703809883523e-05, "loss": 0.8336, "step": 3809 }, { "epoch": 0.3411494767474397, "grad_norm": 0.9639019218230677, "learning_rate": 1.5339251748405715e-05, "loss": 0.844, "step": 3810 }, { "epoch": 0.34123901729251777, "grad_norm": 0.8264070158407661, "learning_rate": 1.533679923780745e-05, "loss": 0.8313, "step": 3811 }, { "epoch": 0.34132855783759586, "grad_norm": 0.9203581606062318, "learning_rate": 1.533434627829503e-05, "loss": 0.7886, "step": 3812 }, { "epoch": 0.3414180983826739, "grad_norm": 0.8789911178748009, "learning_rate": 1.5331892870074777e-05, "loss": 0.7943, "step": 3813 }, { "epoch": 0.341507638927752, "grad_norm": 0.9920213006181874, "learning_rate": 1.5329439013353074e-05, "loss": 0.8844, "step": 3814 }, { "epoch": 0.34159717947283, "grad_norm": 0.947287664247967, "learning_rate": 1.532698470833633e-05, "loss": 0.8461, "step": 3815 }, { "epoch": 0.3416867200179081, "grad_norm": 1.0064172516296754, "learning_rate": 1.532452995523099e-05, "loss": 0.8721, "step": 3816 }, { "epoch": 0.34177626056298616, "grad_norm": 0.9103733284369846, "learning_rate": 1.532207475424354e-05, "loss": 0.859, "step": 3817 }, { "epoch": 0.34186580110806425, "grad_norm": 0.9855977863965022, "learning_rate": 1.5319619105580506e-05, "loss": 0.8248, "step": 3818 }, { "epoch": 0.3419553416531423, "grad_norm": 0.9164174798325228, "learning_rate": 1.5317163009448444e-05, "loss": 0.847, "step": 3819 }, { "epoch": 0.3420448821982204, "grad_norm": 0.8699045960859396, "learning_rate": 1.531470646605396e-05, "loss": 0.8671, "step": 3820 }, { "epoch": 0.34213442274329847, "grad_norm": 1.2001096650296608, "learning_rate": 1.531224947560369e-05, "loss": 0.8681, "step": 3821 }, { "epoch": 0.3422239632883765, "grad_norm": 0.9058794180545245, "learning_rate": 1.5309792038304302e-05, "loss": 0.8867, "step": 3822 }, { "epoch": 0.3423135038334546, "grad_norm": 0.822295197490177, "learning_rate": 1.530733415436251e-05, "loss": 0.8305, "step": 3823 }, { "epoch": 0.34240304437853264, "grad_norm": 0.9739994656764144, "learning_rate": 1.5304875823985067e-05, "loss": 0.8414, "step": 3824 }, { "epoch": 0.34249258492361073, "grad_norm": 0.9829670787987514, "learning_rate": 1.530241704737875e-05, "loss": 0.8661, "step": 3825 }, { "epoch": 0.34258212546868877, "grad_norm": 0.8655978068765721, "learning_rate": 1.52999578247504e-05, "loss": 0.8408, "step": 3826 }, { "epoch": 0.34267166601376686, "grad_norm": 0.9748409300151503, "learning_rate": 1.5297498156306864e-05, "loss": 0.8618, "step": 3827 }, { "epoch": 0.3427612065588449, "grad_norm": 0.9191711409415992, "learning_rate": 1.529503804225504e-05, "loss": 0.7627, "step": 3828 }, { "epoch": 0.342850747103923, "grad_norm": 1.0525824746564418, "learning_rate": 1.529257748280188e-05, "loss": 0.8216, "step": 3829 }, { "epoch": 0.3429402876490011, "grad_norm": 1.2537178737960688, "learning_rate": 1.5290116478154346e-05, "loss": 0.8782, "step": 3830 }, { "epoch": 0.3430298281940791, "grad_norm": 0.9549147507849366, "learning_rate": 1.5287655028519453e-05, "loss": 0.9269, "step": 3831 }, { "epoch": 0.3431193687391572, "grad_norm": 0.9596048063390664, "learning_rate": 1.528519313410425e-05, "loss": 0.891, "step": 3832 }, { "epoch": 0.34320890928423525, "grad_norm": 1.2417933070577973, "learning_rate": 1.5282730795115822e-05, "loss": 0.9077, "step": 3833 }, { "epoch": 0.34329844982931335, "grad_norm": 1.0097239403296414, "learning_rate": 1.5280268011761296e-05, "loss": 0.9237, "step": 3834 }, { "epoch": 0.3433879903743914, "grad_norm": 0.9730894959132692, "learning_rate": 1.527780478424783e-05, "loss": 0.8688, "step": 3835 }, { "epoch": 0.3434775309194695, "grad_norm": 0.926194904513807, "learning_rate": 1.5275341112782625e-05, "loss": 0.8317, "step": 3836 }, { "epoch": 0.3435670714645475, "grad_norm": 0.9703703941546321, "learning_rate": 1.5272876997572916e-05, "loss": 0.8615, "step": 3837 }, { "epoch": 0.3436566120096256, "grad_norm": 0.8971043374811823, "learning_rate": 1.527041243882598e-05, "loss": 0.8181, "step": 3838 }, { "epoch": 0.3437461525547037, "grad_norm": 0.8574412943492389, "learning_rate": 1.526794743674912e-05, "loss": 0.8449, "step": 3839 }, { "epoch": 0.34383569309978174, "grad_norm": 0.9519693342302483, "learning_rate": 1.526548199154969e-05, "loss": 0.8546, "step": 3840 }, { "epoch": 0.34392523364485983, "grad_norm": 1.0447711226794831, "learning_rate": 1.5263016103435072e-05, "loss": 0.8735, "step": 3841 }, { "epoch": 0.34401477418993787, "grad_norm": 0.9045378033159213, "learning_rate": 1.526054977261269e-05, "loss": 0.8123, "step": 3842 }, { "epoch": 0.34410431473501596, "grad_norm": 0.8618110882958003, "learning_rate": 1.5258082999290004e-05, "loss": 0.889, "step": 3843 }, { "epoch": 0.344193855280094, "grad_norm": 0.8853325841756551, "learning_rate": 1.5255615783674513e-05, "loss": 0.8732, "step": 3844 }, { "epoch": 0.3442833958251721, "grad_norm": 0.9281580088014069, "learning_rate": 1.5253148125973746e-05, "loss": 0.872, "step": 3845 }, { "epoch": 0.3443729363702501, "grad_norm": 0.9732980784194987, "learning_rate": 1.525068002639528e-05, "loss": 0.8501, "step": 3846 }, { "epoch": 0.3444624769153282, "grad_norm": 0.9339493691422398, "learning_rate": 1.524821148514672e-05, "loss": 0.8417, "step": 3847 }, { "epoch": 0.3445520174604063, "grad_norm": 1.02786635635532, "learning_rate": 1.5245742502435713e-05, "loss": 0.8594, "step": 3848 }, { "epoch": 0.34464155800548435, "grad_norm": 1.0865579406765522, "learning_rate": 1.5243273078469938e-05, "loss": 0.9129, "step": 3849 }, { "epoch": 0.34473109855056244, "grad_norm": 0.9527993610785306, "learning_rate": 1.524080321345712e-05, "loss": 0.9007, "step": 3850 }, { "epoch": 0.3448206390956405, "grad_norm": 0.8772472196066003, "learning_rate": 1.5238332907605014e-05, "loss": 0.8038, "step": 3851 }, { "epoch": 0.3449101796407186, "grad_norm": 0.9477423440766604, "learning_rate": 1.5235862161121412e-05, "loss": 0.864, "step": 3852 }, { "epoch": 0.3449997201857966, "grad_norm": 0.8618935685308358, "learning_rate": 1.5233390974214146e-05, "loss": 0.7888, "step": 3853 }, { "epoch": 0.3450892607308747, "grad_norm": 0.93746703898179, "learning_rate": 1.5230919347091092e-05, "loss": 0.8315, "step": 3854 }, { "epoch": 0.34517880127595274, "grad_norm": 0.9461773417400678, "learning_rate": 1.522844727996014e-05, "loss": 0.8752, "step": 3855 }, { "epoch": 0.34526834182103083, "grad_norm": 0.916368679058873, "learning_rate": 1.5225974773029246e-05, "loss": 0.7965, "step": 3856 }, { "epoch": 0.3453578823661089, "grad_norm": 1.001557934153696, "learning_rate": 1.5223501826506386e-05, "loss": 0.7992, "step": 3857 }, { "epoch": 0.34544742291118696, "grad_norm": 0.9410352148934408, "learning_rate": 1.522102844059957e-05, "loss": 0.8657, "step": 3858 }, { "epoch": 0.34553696345626506, "grad_norm": 1.117751099714368, "learning_rate": 1.5218554615516857e-05, "loss": 0.8805, "step": 3859 }, { "epoch": 0.3456265040013431, "grad_norm": 1.0477389525397895, "learning_rate": 1.5216080351466333e-05, "loss": 0.8797, "step": 3860 }, { "epoch": 0.3457160445464212, "grad_norm": 0.9500500564910868, "learning_rate": 1.5213605648656132e-05, "loss": 0.8338, "step": 3861 }, { "epoch": 0.3458055850914992, "grad_norm": 0.9656886624057628, "learning_rate": 1.521113050729441e-05, "loss": 0.8448, "step": 3862 }, { "epoch": 0.3458951256365773, "grad_norm": 1.2063491629719156, "learning_rate": 1.5208654927589372e-05, "loss": 0.8676, "step": 3863 }, { "epoch": 0.34598466618165535, "grad_norm": 0.9604753016354344, "learning_rate": 1.5206178909749256e-05, "loss": 0.8235, "step": 3864 }, { "epoch": 0.34607420672673345, "grad_norm": 0.9838223077573088, "learning_rate": 1.5203702453982335e-05, "loss": 0.8455, "step": 3865 }, { "epoch": 0.34616374727181154, "grad_norm": 1.0384524178269148, "learning_rate": 1.520122556049692e-05, "loss": 0.9041, "step": 3866 }, { "epoch": 0.3462532878168896, "grad_norm": 1.0865067671525488, "learning_rate": 1.5198748229501358e-05, "loss": 0.8422, "step": 3867 }, { "epoch": 0.34634282836196767, "grad_norm": 0.9441735043841237, "learning_rate": 1.5196270461204041e-05, "loss": 0.8328, "step": 3868 }, { "epoch": 0.3464323689070457, "grad_norm": 0.9610505945450045, "learning_rate": 1.5193792255813384e-05, "loss": 0.7907, "step": 3869 }, { "epoch": 0.3465219094521238, "grad_norm": 0.9565827905743597, "learning_rate": 1.5191313613537846e-05, "loss": 0.8491, "step": 3870 }, { "epoch": 0.34661144999720184, "grad_norm": 0.9168807353150401, "learning_rate": 1.5188834534585926e-05, "loss": 0.9017, "step": 3871 }, { "epoch": 0.34670099054227993, "grad_norm": 1.1993016137904005, "learning_rate": 1.5186355019166153e-05, "loss": 0.8842, "step": 3872 }, { "epoch": 0.34679053108735797, "grad_norm": 0.9912875115147707, "learning_rate": 1.5183875067487095e-05, "loss": 0.8817, "step": 3873 }, { "epoch": 0.34688007163243606, "grad_norm": 0.898752358158883, "learning_rate": 1.5181394679757363e-05, "loss": 0.8598, "step": 3874 }, { "epoch": 0.34696961217751415, "grad_norm": 0.8794468945455675, "learning_rate": 1.517891385618559e-05, "loss": 0.8283, "step": 3875 }, { "epoch": 0.3470591527225922, "grad_norm": 0.8635876594728162, "learning_rate": 1.5176432596980465e-05, "loss": 0.885, "step": 3876 }, { "epoch": 0.3471486932676703, "grad_norm": 0.9503130061250759, "learning_rate": 1.5173950902350694e-05, "loss": 0.806, "step": 3877 }, { "epoch": 0.3472382338127483, "grad_norm": 0.9726805220063456, "learning_rate": 1.5171468772505038e-05, "loss": 0.9332, "step": 3878 }, { "epoch": 0.3473277743578264, "grad_norm": 0.9567696880020461, "learning_rate": 1.5168986207652277e-05, "loss": 0.874, "step": 3879 }, { "epoch": 0.34741731490290445, "grad_norm": 1.2150698791731538, "learning_rate": 1.516650320800124e-05, "loss": 0.8729, "step": 3880 }, { "epoch": 0.34750685544798254, "grad_norm": 0.9839961594413172, "learning_rate": 1.5164019773760794e-05, "loss": 0.8327, "step": 3881 }, { "epoch": 0.3475963959930606, "grad_norm": 0.9308726600539514, "learning_rate": 1.5161535905139829e-05, "loss": 0.8733, "step": 3882 }, { "epoch": 0.3476859365381387, "grad_norm": 0.9164058038747945, "learning_rate": 1.5159051602347283e-05, "loss": 0.8855, "step": 3883 }, { "epoch": 0.34777547708321677, "grad_norm": 0.9472958820406885, "learning_rate": 1.515656686559213e-05, "loss": 0.8925, "step": 3884 }, { "epoch": 0.3478650176282948, "grad_norm": 1.0523456510468796, "learning_rate": 1.5154081695083376e-05, "loss": 0.8157, "step": 3885 }, { "epoch": 0.3479545581733729, "grad_norm": 0.8936604672590607, "learning_rate": 1.5151596091030063e-05, "loss": 0.8231, "step": 3886 }, { "epoch": 0.34804409871845093, "grad_norm": 0.8679223467581099, "learning_rate": 1.5149110053641278e-05, "loss": 0.8986, "step": 3887 }, { "epoch": 0.348133639263529, "grad_norm": 0.909766781543616, "learning_rate": 1.5146623583126134e-05, "loss": 0.8742, "step": 3888 }, { "epoch": 0.34822317980860706, "grad_norm": 0.9073702293037776, "learning_rate": 1.5144136679693784e-05, "loss": 0.8727, "step": 3889 }, { "epoch": 0.34831272035368516, "grad_norm": 0.9613482522838903, "learning_rate": 1.5141649343553419e-05, "loss": 0.866, "step": 3890 }, { "epoch": 0.3484022608987632, "grad_norm": 1.0186264856750307, "learning_rate": 1.5139161574914267e-05, "loss": 0.843, "step": 3891 }, { "epoch": 0.3484918014438413, "grad_norm": 1.2572013534105926, "learning_rate": 1.5136673373985593e-05, "loss": 0.8322, "step": 3892 }, { "epoch": 0.3485813419889194, "grad_norm": 1.0558487531450107, "learning_rate": 1.513418474097669e-05, "loss": 0.9316, "step": 3893 }, { "epoch": 0.3486708825339974, "grad_norm": 0.8280851841782433, "learning_rate": 1.51316956760969e-05, "loss": 0.8877, "step": 3894 }, { "epoch": 0.3487604230790755, "grad_norm": 0.8910115021814877, "learning_rate": 1.5129206179555591e-05, "loss": 0.8599, "step": 3895 }, { "epoch": 0.34884996362415355, "grad_norm": 0.9698876265160217, "learning_rate": 1.5126716251562173e-05, "loss": 0.8422, "step": 3896 }, { "epoch": 0.34893950416923164, "grad_norm": 0.9484585757566176, "learning_rate": 1.512422589232609e-05, "loss": 0.8488, "step": 3897 }, { "epoch": 0.3490290447143097, "grad_norm": 0.9626678539537833, "learning_rate": 1.5121735102056825e-05, "loss": 0.8354, "step": 3898 }, { "epoch": 0.34911858525938777, "grad_norm": 0.9896372535790691, "learning_rate": 1.511924388096389e-05, "loss": 0.8741, "step": 3899 }, { "epoch": 0.3492081258044658, "grad_norm": 0.997184360705059, "learning_rate": 1.5116752229256844e-05, "loss": 0.8694, "step": 3900 }, { "epoch": 0.3492976663495439, "grad_norm": 0.9399181414900208, "learning_rate": 1.5114260147145274e-05, "loss": 0.871, "step": 3901 }, { "epoch": 0.349387206894622, "grad_norm": 0.9596841835715545, "learning_rate": 1.5111767634838805e-05, "loss": 0.8888, "step": 3902 }, { "epoch": 0.34947674743970003, "grad_norm": 1.0089352578317965, "learning_rate": 1.5109274692547104e-05, "loss": 0.8701, "step": 3903 }, { "epoch": 0.3495662879847781, "grad_norm": 1.0440021871417409, "learning_rate": 1.5106781320479864e-05, "loss": 0.8935, "step": 3904 }, { "epoch": 0.34965582852985616, "grad_norm": 0.8934495846600866, "learning_rate": 1.5104287518846818e-05, "loss": 0.8965, "step": 3905 }, { "epoch": 0.34974536907493425, "grad_norm": 0.8895411672924293, "learning_rate": 1.5101793287857743e-05, "loss": 0.8145, "step": 3906 }, { "epoch": 0.3498349096200123, "grad_norm": 0.9595862698373742, "learning_rate": 1.509929862772244e-05, "loss": 0.8477, "step": 3907 }, { "epoch": 0.3499244501650904, "grad_norm": 0.9554606480378743, "learning_rate": 1.5096803538650754e-05, "loss": 0.8532, "step": 3908 }, { "epoch": 0.3500139907101684, "grad_norm": 0.8762636141502056, "learning_rate": 1.5094308020852564e-05, "loss": 0.8377, "step": 3909 }, { "epoch": 0.3501035312552465, "grad_norm": 0.8531600565090199, "learning_rate": 1.509181207453778e-05, "loss": 0.8648, "step": 3910 }, { "epoch": 0.3501930718003246, "grad_norm": 0.8715589321284145, "learning_rate": 1.5089315699916364e-05, "loss": 0.8494, "step": 3911 }, { "epoch": 0.35028261234540264, "grad_norm": 0.9391168854697806, "learning_rate": 1.5086818897198292e-05, "loss": 0.865, "step": 3912 }, { "epoch": 0.35037215289048074, "grad_norm": 1.0732189912939272, "learning_rate": 1.5084321666593589e-05, "loss": 0.8606, "step": 3913 }, { "epoch": 0.3504616934355588, "grad_norm": 1.2491541359483513, "learning_rate": 1.508182400831232e-05, "loss": 0.849, "step": 3914 }, { "epoch": 0.35055123398063687, "grad_norm": 0.9413897656289163, "learning_rate": 1.5079325922564574e-05, "loss": 0.8821, "step": 3915 }, { "epoch": 0.3506407745257149, "grad_norm": 0.9399070851264855, "learning_rate": 1.5076827409560481e-05, "loss": 0.8648, "step": 3916 }, { "epoch": 0.350730315070793, "grad_norm": 0.955577486784344, "learning_rate": 1.5074328469510212e-05, "loss": 0.782, "step": 3917 }, { "epoch": 0.35081985561587103, "grad_norm": 0.9483996096090402, "learning_rate": 1.5071829102623969e-05, "loss": 0.8988, "step": 3918 }, { "epoch": 0.3509093961609491, "grad_norm": 1.0435256049509705, "learning_rate": 1.5069329309111988e-05, "loss": 0.9141, "step": 3919 }, { "epoch": 0.3509989367060272, "grad_norm": 0.9369752645346466, "learning_rate": 1.5066829089184545e-05, "loss": 0.8251, "step": 3920 }, { "epoch": 0.35108847725110526, "grad_norm": 0.8928919093481668, "learning_rate": 1.506432844305195e-05, "loss": 0.8765, "step": 3921 }, { "epoch": 0.35117801779618335, "grad_norm": 0.9916226807939872, "learning_rate": 1.5061827370924551e-05, "loss": 0.9395, "step": 3922 }, { "epoch": 0.3512675583412614, "grad_norm": 0.9814741589499576, "learning_rate": 1.5059325873012727e-05, "loss": 0.8443, "step": 3923 }, { "epoch": 0.3513570988863395, "grad_norm": 0.9138014496094762, "learning_rate": 1.50568239495269e-05, "loss": 0.8734, "step": 3924 }, { "epoch": 0.3514466394314175, "grad_norm": 0.9705963395916672, "learning_rate": 1.505432160067752e-05, "loss": 0.8611, "step": 3925 }, { "epoch": 0.3515361799764956, "grad_norm": 1.1895696939115223, "learning_rate": 1.5051818826675076e-05, "loss": 0.8517, "step": 3926 }, { "epoch": 0.35162572052157365, "grad_norm": 1.1797515896118147, "learning_rate": 1.5049315627730093e-05, "loss": 0.8414, "step": 3927 }, { "epoch": 0.35171526106665174, "grad_norm": 0.9503095175887297, "learning_rate": 1.5046812004053135e-05, "loss": 0.8036, "step": 3928 }, { "epoch": 0.35180480161172983, "grad_norm": 0.9701961351933339, "learning_rate": 1.50443079558548e-05, "loss": 0.8828, "step": 3929 }, { "epoch": 0.35189434215680787, "grad_norm": 0.9125553107076922, "learning_rate": 1.5041803483345715e-05, "loss": 0.8422, "step": 3930 }, { "epoch": 0.35198388270188596, "grad_norm": 0.9911840870158602, "learning_rate": 1.5039298586736553e-05, "loss": 0.8456, "step": 3931 }, { "epoch": 0.352073423246964, "grad_norm": 0.9895231162495137, "learning_rate": 1.5036793266238013e-05, "loss": 0.8789, "step": 3932 }, { "epoch": 0.3521629637920421, "grad_norm": 0.9681345323486902, "learning_rate": 1.5034287522060838e-05, "loss": 0.8627, "step": 3933 }, { "epoch": 0.35225250433712013, "grad_norm": 0.8867903443526222, "learning_rate": 1.50317813544158e-05, "loss": 0.8858, "step": 3934 }, { "epoch": 0.3523420448821982, "grad_norm": 0.9259796971721358, "learning_rate": 1.5029274763513716e-05, "loss": 0.8493, "step": 3935 }, { "epoch": 0.35243158542727626, "grad_norm": 0.9767020867195255, "learning_rate": 1.5026767749565423e-05, "loss": 0.8526, "step": 3936 }, { "epoch": 0.35252112597235435, "grad_norm": 0.9342679943746612, "learning_rate": 1.5024260312781812e-05, "loss": 0.815, "step": 3937 }, { "epoch": 0.35261066651743245, "grad_norm": 0.9172558108437532, "learning_rate": 1.5021752453373793e-05, "loss": 0.7808, "step": 3938 }, { "epoch": 0.3527002070625105, "grad_norm": 1.1711110308975234, "learning_rate": 1.5019244171552326e-05, "loss": 0.8673, "step": 3939 }, { "epoch": 0.3527897476075886, "grad_norm": 0.9770321217887352, "learning_rate": 1.5016735467528389e-05, "loss": 0.8673, "step": 3940 }, { "epoch": 0.3528792881526666, "grad_norm": 1.0105731489932666, "learning_rate": 1.5014226341513016e-05, "loss": 0.8565, "step": 3941 }, { "epoch": 0.3529688286977447, "grad_norm": 0.9112453746323449, "learning_rate": 1.5011716793717264e-05, "loss": 0.877, "step": 3942 }, { "epoch": 0.35305836924282274, "grad_norm": 1.0846777652944848, "learning_rate": 1.5009206824352226e-05, "loss": 0.779, "step": 3943 }, { "epoch": 0.35314790978790084, "grad_norm": 1.1087094114060112, "learning_rate": 1.5006696433629033e-05, "loss": 0.8369, "step": 3944 }, { "epoch": 0.3532374503329789, "grad_norm": 1.0708153279954185, "learning_rate": 1.5004185621758853e-05, "loss": 0.879, "step": 3945 }, { "epoch": 0.35332699087805697, "grad_norm": 0.9203650064787073, "learning_rate": 1.500167438895288e-05, "loss": 0.8846, "step": 3946 }, { "epoch": 0.35341653142313506, "grad_norm": 1.002084220792609, "learning_rate": 1.4999162735422363e-05, "loss": 0.9191, "step": 3947 }, { "epoch": 0.3535060719682131, "grad_norm": 1.0025798121287472, "learning_rate": 1.4996650661378567e-05, "loss": 0.8654, "step": 3948 }, { "epoch": 0.3535956125132912, "grad_norm": 1.000348914388443, "learning_rate": 1.4994138167032798e-05, "loss": 0.8703, "step": 3949 }, { "epoch": 0.35368515305836923, "grad_norm": 0.9438194945155277, "learning_rate": 1.49916252525964e-05, "loss": 0.8284, "step": 3950 }, { "epoch": 0.3537746936034473, "grad_norm": 0.900275860149152, "learning_rate": 1.4989111918280755e-05, "loss": 0.8637, "step": 3951 }, { "epoch": 0.35386423414852536, "grad_norm": 0.9952525473553556, "learning_rate": 1.498659816429727e-05, "loss": 0.8409, "step": 3952 }, { "epoch": 0.35395377469360345, "grad_norm": 1.0103762891662074, "learning_rate": 1.4984083990857398e-05, "loss": 0.8049, "step": 3953 }, { "epoch": 0.3540433152386815, "grad_norm": 0.881035963127236, "learning_rate": 1.4981569398172624e-05, "loss": 0.8558, "step": 3954 }, { "epoch": 0.3541328557837596, "grad_norm": 0.9063096293754465, "learning_rate": 1.4979054386454465e-05, "loss": 0.8538, "step": 3955 }, { "epoch": 0.3542223963288377, "grad_norm": 0.9926465691497112, "learning_rate": 1.4976538955914474e-05, "loss": 0.851, "step": 3956 }, { "epoch": 0.3543119368739157, "grad_norm": 1.0003253182625675, "learning_rate": 1.4974023106764248e-05, "loss": 0.8885, "step": 3957 }, { "epoch": 0.3544014774189938, "grad_norm": 0.9066325681820581, "learning_rate": 1.4971506839215404e-05, "loss": 0.8138, "step": 3958 }, { "epoch": 0.35449101796407184, "grad_norm": 1.0152014413483348, "learning_rate": 1.4968990153479605e-05, "loss": 0.8768, "step": 3959 }, { "epoch": 0.35458055850914993, "grad_norm": 1.252522606505696, "learning_rate": 1.4966473049768548e-05, "loss": 0.8601, "step": 3960 }, { "epoch": 0.35467009905422797, "grad_norm": 0.9535075526718158, "learning_rate": 1.4963955528293961e-05, "loss": 0.845, "step": 3961 }, { "epoch": 0.35475963959930606, "grad_norm": 1.0927258466243515, "learning_rate": 1.4961437589267611e-05, "loss": 0.8533, "step": 3962 }, { "epoch": 0.3548491801443841, "grad_norm": 1.070004456412874, "learning_rate": 1.4958919232901301e-05, "loss": 0.8363, "step": 3963 }, { "epoch": 0.3549387206894622, "grad_norm": 0.9558127770766579, "learning_rate": 1.4956400459406864e-05, "loss": 0.8851, "step": 3964 }, { "epoch": 0.3550282612345403, "grad_norm": 0.9479647573640971, "learning_rate": 1.4953881268996169e-05, "loss": 0.9248, "step": 3965 }, { "epoch": 0.3551178017796183, "grad_norm": 0.9421883241057183, "learning_rate": 1.495136166188113e-05, "loss": 0.8765, "step": 3966 }, { "epoch": 0.3552073423246964, "grad_norm": 0.8811124302527993, "learning_rate": 1.4948841638273677e-05, "loss": 0.83, "step": 3967 }, { "epoch": 0.35529688286977446, "grad_norm": 0.8921003573685975, "learning_rate": 1.4946321198385796e-05, "loss": 0.8501, "step": 3968 }, { "epoch": 0.35538642341485255, "grad_norm": 1.2053438652054258, "learning_rate": 1.4943800342429495e-05, "loss": 0.8611, "step": 3969 }, { "epoch": 0.3554759639599306, "grad_norm": 1.0018680082260272, "learning_rate": 1.4941279070616816e-05, "loss": 0.8282, "step": 3970 }, { "epoch": 0.3555655045050087, "grad_norm": 1.06883740713149, "learning_rate": 1.493875738315985e-05, "loss": 0.7919, "step": 3971 }, { "epoch": 0.3556550450500867, "grad_norm": 0.8919671471160332, "learning_rate": 1.4936235280270702e-05, "loss": 0.8296, "step": 3972 }, { "epoch": 0.3557445855951648, "grad_norm": 0.9520179686323305, "learning_rate": 1.4933712762161525e-05, "loss": 0.838, "step": 3973 }, { "epoch": 0.3558341261402429, "grad_norm": 1.0363774531151329, "learning_rate": 1.4931189829044514e-05, "loss": 0.8727, "step": 3974 }, { "epoch": 0.35592366668532094, "grad_norm": 1.1249006352208268, "learning_rate": 1.4928666481131884e-05, "loss": 0.861, "step": 3975 }, { "epoch": 0.35601320723039903, "grad_norm": 0.9735390645226156, "learning_rate": 1.4926142718635886e-05, "loss": 0.8921, "step": 3976 }, { "epoch": 0.35610274777547707, "grad_norm": 0.8965790523836814, "learning_rate": 1.4923618541768819e-05, "loss": 0.8681, "step": 3977 }, { "epoch": 0.35619228832055516, "grad_norm": 0.9810827849055104, "learning_rate": 1.4921093950743002e-05, "loss": 0.8995, "step": 3978 }, { "epoch": 0.3562818288656332, "grad_norm": 1.0141182664712898, "learning_rate": 1.4918568945770801e-05, "loss": 0.906, "step": 3979 }, { "epoch": 0.3563713694107113, "grad_norm": 1.0145444768600687, "learning_rate": 1.4916043527064608e-05, "loss": 0.8941, "step": 3980 }, { "epoch": 0.35646090995578933, "grad_norm": 1.0139159043896746, "learning_rate": 1.4913517694836851e-05, "loss": 0.8614, "step": 3981 }, { "epoch": 0.3565504505008674, "grad_norm": 0.8543208214084534, "learning_rate": 1.4910991449299998e-05, "loss": 0.8901, "step": 3982 }, { "epoch": 0.3566399910459455, "grad_norm": 0.9086471593959841, "learning_rate": 1.490846479066655e-05, "loss": 0.8213, "step": 3983 }, { "epoch": 0.35672953159102355, "grad_norm": 1.0055538116106704, "learning_rate": 1.4905937719149038e-05, "loss": 0.9009, "step": 3984 }, { "epoch": 0.35681907213610164, "grad_norm": 0.9945893144854023, "learning_rate": 1.4903410234960032e-05, "loss": 0.8706, "step": 3985 }, { "epoch": 0.3569086126811797, "grad_norm": 0.8762873363231536, "learning_rate": 1.4900882338312134e-05, "loss": 0.8599, "step": 3986 }, { "epoch": 0.3569981532262578, "grad_norm": 0.9382703022649406, "learning_rate": 1.4898354029417987e-05, "loss": 0.8114, "step": 3987 }, { "epoch": 0.3570876937713358, "grad_norm": 0.8947749929853646, "learning_rate": 1.4895825308490259e-05, "loss": 0.889, "step": 3988 }, { "epoch": 0.3571772343164139, "grad_norm": 0.9020415971229506, "learning_rate": 1.4893296175741663e-05, "loss": 0.8255, "step": 3989 }, { "epoch": 0.35726677486149194, "grad_norm": 0.8827857204131178, "learning_rate": 1.4890766631384934e-05, "loss": 0.8771, "step": 3990 }, { "epoch": 0.35735631540657004, "grad_norm": 1.0047935358583926, "learning_rate": 1.4888236675632857e-05, "loss": 0.8368, "step": 3991 }, { "epoch": 0.35744585595164813, "grad_norm": 0.9719191902868516, "learning_rate": 1.488570630869824e-05, "loss": 0.8789, "step": 3992 }, { "epoch": 0.35753539649672617, "grad_norm": 0.9044832073080918, "learning_rate": 1.488317553079393e-05, "loss": 0.8511, "step": 3993 }, { "epoch": 0.35762493704180426, "grad_norm": 0.9119614247703369, "learning_rate": 1.4880644342132804e-05, "loss": 0.8425, "step": 3994 }, { "epoch": 0.3577144775868823, "grad_norm": 0.8731226108467127, "learning_rate": 1.4878112742927784e-05, "loss": 0.8657, "step": 3995 }, { "epoch": 0.3578040181319604, "grad_norm": 0.9368926264624609, "learning_rate": 1.4875580733391814e-05, "loss": 0.8549, "step": 3996 }, { "epoch": 0.3578935586770384, "grad_norm": 0.9281394912926532, "learning_rate": 1.4873048313737881e-05, "loss": 0.8474, "step": 3997 }, { "epoch": 0.3579830992221165, "grad_norm": 0.9095304979047474, "learning_rate": 1.4870515484179005e-05, "loss": 0.9027, "step": 3998 }, { "epoch": 0.35807263976719456, "grad_norm": 0.8558355670174908, "learning_rate": 1.4867982244928238e-05, "loss": 0.8397, "step": 3999 }, { "epoch": 0.35816218031227265, "grad_norm": 0.9499847223776043, "learning_rate": 1.4865448596198666e-05, "loss": 0.8067, "step": 4000 }, { "epoch": 0.35825172085735074, "grad_norm": 0.9465277207326475, "learning_rate": 1.4862914538203416e-05, "loss": 0.801, "step": 4001 }, { "epoch": 0.3583412614024288, "grad_norm": 0.9309723660707486, "learning_rate": 1.4860380071155641e-05, "loss": 0.8441, "step": 4002 }, { "epoch": 0.35843080194750687, "grad_norm": 1.0109531801666056, "learning_rate": 1.485784519526853e-05, "loss": 0.8919, "step": 4003 }, { "epoch": 0.3585203424925849, "grad_norm": 0.9360921943844651, "learning_rate": 1.4855309910755315e-05, "loss": 0.8808, "step": 4004 }, { "epoch": 0.358609883037663, "grad_norm": 0.9745386655068174, "learning_rate": 1.485277421782925e-05, "loss": 0.8509, "step": 4005 }, { "epoch": 0.35869942358274104, "grad_norm": 0.9036447989275697, "learning_rate": 1.485023811670363e-05, "loss": 0.8549, "step": 4006 }, { "epoch": 0.35878896412781913, "grad_norm": 1.0781452688504636, "learning_rate": 1.4847701607591791e-05, "loss": 0.8284, "step": 4007 }, { "epoch": 0.35887850467289717, "grad_norm": 0.9981096948196267, "learning_rate": 1.4845164690707087e-05, "loss": 0.8312, "step": 4008 }, { "epoch": 0.35896804521797526, "grad_norm": 0.9138844107706523, "learning_rate": 1.4842627366262918e-05, "loss": 0.8504, "step": 4009 }, { "epoch": 0.35905758576305336, "grad_norm": 0.970251279851938, "learning_rate": 1.4840089634472715e-05, "loss": 0.8141, "step": 4010 }, { "epoch": 0.3591471263081314, "grad_norm": 0.8766481046911421, "learning_rate": 1.4837551495549944e-05, "loss": 0.8595, "step": 4011 }, { "epoch": 0.3592366668532095, "grad_norm": 0.9393628554931694, "learning_rate": 1.4835012949708105e-05, "loss": 0.8623, "step": 4012 }, { "epoch": 0.3593262073982875, "grad_norm": 1.1619033550296851, "learning_rate": 1.4832473997160735e-05, "loss": 0.8889, "step": 4013 }, { "epoch": 0.3594157479433656, "grad_norm": 0.9114933278405927, "learning_rate": 1.48299346381214e-05, "loss": 0.835, "step": 4014 }, { "epoch": 0.35950528848844365, "grad_norm": 1.0021328170446315, "learning_rate": 1.48273948728037e-05, "loss": 0.8099, "step": 4015 }, { "epoch": 0.35959482903352175, "grad_norm": 0.84380845670878, "learning_rate": 1.4824854701421277e-05, "loss": 0.8568, "step": 4016 }, { "epoch": 0.3596843695785998, "grad_norm": 0.8336619936350709, "learning_rate": 1.4822314124187795e-05, "loss": 0.8245, "step": 4017 }, { "epoch": 0.3597739101236779, "grad_norm": 0.9653655240524449, "learning_rate": 1.4819773141316967e-05, "loss": 0.8724, "step": 4018 }, { "epoch": 0.35986345066875597, "grad_norm": 0.9926892922993221, "learning_rate": 1.4817231753022528e-05, "loss": 0.8477, "step": 4019 }, { "epoch": 0.359952991213834, "grad_norm": 0.896176376730794, "learning_rate": 1.4814689959518254e-05, "loss": 0.8274, "step": 4020 }, { "epoch": 0.3600425317589121, "grad_norm": 0.900490633457732, "learning_rate": 1.4812147761017946e-05, "loss": 0.8348, "step": 4021 }, { "epoch": 0.36013207230399014, "grad_norm": 0.9283388486767167, "learning_rate": 1.480960515773545e-05, "loss": 0.8528, "step": 4022 }, { "epoch": 0.36022161284906823, "grad_norm": 0.961354230676862, "learning_rate": 1.4807062149884645e-05, "loss": 0.8956, "step": 4023 }, { "epoch": 0.36031115339414627, "grad_norm": 0.8707301391479813, "learning_rate": 1.4804518737679435e-05, "loss": 0.8515, "step": 4024 }, { "epoch": 0.36040069393922436, "grad_norm": 0.8910716818537476, "learning_rate": 1.4801974921333763e-05, "loss": 0.811, "step": 4025 }, { "epoch": 0.3604902344843024, "grad_norm": 1.0146513302106128, "learning_rate": 1.4799430701061613e-05, "loss": 0.8897, "step": 4026 }, { "epoch": 0.3605797750293805, "grad_norm": 1.0512585860498647, "learning_rate": 1.4796886077076988e-05, "loss": 0.8856, "step": 4027 }, { "epoch": 0.3606693155744586, "grad_norm": 1.0093057152977183, "learning_rate": 1.4794341049593939e-05, "loss": 0.8472, "step": 4028 }, { "epoch": 0.3607588561195366, "grad_norm": 1.052685762546745, "learning_rate": 1.4791795618826548e-05, "loss": 0.8864, "step": 4029 }, { "epoch": 0.3608483966646147, "grad_norm": 0.9893494121114546, "learning_rate": 1.4789249784988919e-05, "loss": 0.8434, "step": 4030 }, { "epoch": 0.36093793720969275, "grad_norm": 0.9449777376451948, "learning_rate": 1.478670354829521e-05, "loss": 0.8074, "step": 4031 }, { "epoch": 0.36102747775477084, "grad_norm": 0.8373353355657409, "learning_rate": 1.4784156908959593e-05, "loss": 0.8377, "step": 4032 }, { "epoch": 0.3611170182998489, "grad_norm": 0.9255538441038482, "learning_rate": 1.4781609867196288e-05, "loss": 0.8994, "step": 4033 }, { "epoch": 0.361206558844927, "grad_norm": 0.8821222426449012, "learning_rate": 1.4779062423219543e-05, "loss": 0.8686, "step": 4034 }, { "epoch": 0.361296099390005, "grad_norm": 1.0393899978040426, "learning_rate": 1.4776514577243641e-05, "loss": 0.9001, "step": 4035 }, { "epoch": 0.3613856399350831, "grad_norm": 0.9929826756024304, "learning_rate": 1.4773966329482896e-05, "loss": 0.8957, "step": 4036 }, { "epoch": 0.3614751804801612, "grad_norm": 0.9391545345948109, "learning_rate": 1.4771417680151665e-05, "loss": 0.8668, "step": 4037 }, { "epoch": 0.36156472102523923, "grad_norm": 1.0267193633512912, "learning_rate": 1.4768868629464323e-05, "loss": 0.8427, "step": 4038 }, { "epoch": 0.3616542615703173, "grad_norm": 0.9031494240910067, "learning_rate": 1.4766319177635292e-05, "loss": 0.7593, "step": 4039 }, { "epoch": 0.36174380211539536, "grad_norm": 1.0102326389506426, "learning_rate": 1.4763769324879027e-05, "loss": 0.8962, "step": 4040 }, { "epoch": 0.36183334266047346, "grad_norm": 1.7522725429409525, "learning_rate": 1.476121907141001e-05, "loss": 0.8607, "step": 4041 }, { "epoch": 0.3619228832055515, "grad_norm": 1.1429766071364311, "learning_rate": 1.4758668417442758e-05, "loss": 0.9142, "step": 4042 }, { "epoch": 0.3620124237506296, "grad_norm": 0.9184820020074588, "learning_rate": 1.4756117363191826e-05, "loss": 0.869, "step": 4043 }, { "epoch": 0.3621019642957076, "grad_norm": 0.9298283306943418, "learning_rate": 1.47535659088718e-05, "loss": 0.9037, "step": 4044 }, { "epoch": 0.3621915048407857, "grad_norm": 0.9260994231600933, "learning_rate": 1.4751014054697303e-05, "loss": 0.8354, "step": 4045 }, { "epoch": 0.3622810453858638, "grad_norm": 1.0571478987673144, "learning_rate": 1.4748461800882983e-05, "loss": 0.8365, "step": 4046 }, { "epoch": 0.36237058593094185, "grad_norm": 0.8745635579504719, "learning_rate": 1.4745909147643532e-05, "loss": 0.816, "step": 4047 }, { "epoch": 0.36246012647601994, "grad_norm": 1.0076007197914887, "learning_rate": 1.4743356095193665e-05, "loss": 0.8339, "step": 4048 }, { "epoch": 0.362549667021098, "grad_norm": 0.8623204778969524, "learning_rate": 1.4740802643748145e-05, "loss": 0.8735, "step": 4049 }, { "epoch": 0.36263920756617607, "grad_norm": 0.9306466292765032, "learning_rate": 1.4738248793521756e-05, "loss": 0.8923, "step": 4050 }, { "epoch": 0.3627287481112541, "grad_norm": 0.973520307813799, "learning_rate": 1.4735694544729315e-05, "loss": 0.8853, "step": 4051 }, { "epoch": 0.3628182886563322, "grad_norm": 0.9408571165343537, "learning_rate": 1.4733139897585682e-05, "loss": 0.8519, "step": 4052 }, { "epoch": 0.36290782920141024, "grad_norm": 1.0694960168730903, "learning_rate": 1.473058485230575e-05, "loss": 0.8447, "step": 4053 }, { "epoch": 0.36299736974648833, "grad_norm": 0.8625521884335589, "learning_rate": 1.4728029409104428e-05, "loss": 0.88, "step": 4054 }, { "epoch": 0.3630869102915664, "grad_norm": 0.9511602530717933, "learning_rate": 1.4725473568196682e-05, "loss": 0.8597, "step": 4055 }, { "epoch": 0.36317645083664446, "grad_norm": 0.9235567102628609, "learning_rate": 1.4722917329797502e-05, "loss": 0.8488, "step": 4056 }, { "epoch": 0.36326599138172255, "grad_norm": 0.9710407996319418, "learning_rate": 1.4720360694121902e-05, "loss": 0.8449, "step": 4057 }, { "epoch": 0.3633555319268006, "grad_norm": 1.0127019393547076, "learning_rate": 1.4717803661384946e-05, "loss": 0.8287, "step": 4058 }, { "epoch": 0.3634450724718787, "grad_norm": 1.0016754295269792, "learning_rate": 1.4715246231801722e-05, "loss": 0.9266, "step": 4059 }, { "epoch": 0.3635346130169567, "grad_norm": 1.0414652776828093, "learning_rate": 1.4712688405587344e-05, "loss": 0.8345, "step": 4060 }, { "epoch": 0.3636241535620348, "grad_norm": 0.9147906583948842, "learning_rate": 1.4710130182956979e-05, "loss": 0.8703, "step": 4061 }, { "epoch": 0.36371369410711285, "grad_norm": 0.9039883098756201, "learning_rate": 1.4707571564125812e-05, "loss": 0.8111, "step": 4062 }, { "epoch": 0.36380323465219094, "grad_norm": 1.0104084563859532, "learning_rate": 1.470501254930906e-05, "loss": 0.8534, "step": 4063 }, { "epoch": 0.36389277519726904, "grad_norm": 0.9348460603919654, "learning_rate": 1.4702453138721993e-05, "loss": 0.8104, "step": 4064 }, { "epoch": 0.3639823157423471, "grad_norm": 1.1858454653745654, "learning_rate": 1.4699893332579886e-05, "loss": 0.8519, "step": 4065 }, { "epoch": 0.36407185628742517, "grad_norm": 1.0272632778017143, "learning_rate": 1.4697333131098069e-05, "loss": 0.9099, "step": 4066 }, { "epoch": 0.3641613968325032, "grad_norm": 0.9827629962373259, "learning_rate": 1.4694772534491897e-05, "loss": 0.8043, "step": 4067 }, { "epoch": 0.3642509373775813, "grad_norm": 0.8777019465298066, "learning_rate": 1.4692211542976757e-05, "loss": 0.843, "step": 4068 }, { "epoch": 0.36434047792265933, "grad_norm": 0.8673015446226611, "learning_rate": 1.4689650156768071e-05, "loss": 0.8959, "step": 4069 }, { "epoch": 0.3644300184677374, "grad_norm": 0.9435678343627695, "learning_rate": 1.46870883760813e-05, "loss": 0.861, "step": 4070 }, { "epoch": 0.36451955901281546, "grad_norm": 0.8985379523982656, "learning_rate": 1.4684526201131928e-05, "loss": 0.8689, "step": 4071 }, { "epoch": 0.36460909955789356, "grad_norm": 1.0688693823840476, "learning_rate": 1.4681963632135476e-05, "loss": 0.8521, "step": 4072 }, { "epoch": 0.36469864010297165, "grad_norm": 0.9256763920799816, "learning_rate": 1.46794006693075e-05, "loss": 0.8737, "step": 4073 }, { "epoch": 0.3647881806480497, "grad_norm": 0.9408529298359314, "learning_rate": 1.467683731286359e-05, "loss": 0.8832, "step": 4074 }, { "epoch": 0.3648777211931278, "grad_norm": 0.895891875152209, "learning_rate": 1.4674273563019365e-05, "loss": 0.8303, "step": 4075 }, { "epoch": 0.3649672617382058, "grad_norm": 0.8205494219956654, "learning_rate": 1.467170941999048e-05, "loss": 0.8115, "step": 4076 }, { "epoch": 0.3650568022832839, "grad_norm": 0.9518379564802395, "learning_rate": 1.466914488399262e-05, "loss": 0.8454, "step": 4077 }, { "epoch": 0.36514634282836195, "grad_norm": 1.0390404653472753, "learning_rate": 1.4666579955241512e-05, "loss": 0.8613, "step": 4078 }, { "epoch": 0.36523588337344004, "grad_norm": 1.0364938891315036, "learning_rate": 1.4664014633952902e-05, "loss": 0.857, "step": 4079 }, { "epoch": 0.3653254239185181, "grad_norm": 0.8691771682474251, "learning_rate": 1.4661448920342585e-05, "loss": 0.8169, "step": 4080 }, { "epoch": 0.36541496446359617, "grad_norm": 0.9953302089039852, "learning_rate": 1.4658882814626367e-05, "loss": 0.9031, "step": 4081 }, { "epoch": 0.36550450500867426, "grad_norm": 0.9682627622132935, "learning_rate": 1.4656316317020113e-05, "loss": 0.8016, "step": 4082 }, { "epoch": 0.3655940455537523, "grad_norm": 0.959822597413384, "learning_rate": 1.4653749427739704e-05, "loss": 0.8048, "step": 4083 }, { "epoch": 0.3656835860988304, "grad_norm": 0.913700034180063, "learning_rate": 1.4651182147001057e-05, "loss": 0.8153, "step": 4084 }, { "epoch": 0.36577312664390843, "grad_norm": 1.2243079953148666, "learning_rate": 1.4648614475020125e-05, "loss": 0.9083, "step": 4085 }, { "epoch": 0.3658626671889865, "grad_norm": 0.9506877471484193, "learning_rate": 1.4646046412012893e-05, "loss": 0.8195, "step": 4086 }, { "epoch": 0.36595220773406456, "grad_norm": 0.8011070135513193, "learning_rate": 1.4643477958195376e-05, "loss": 0.7589, "step": 4087 }, { "epoch": 0.36604174827914265, "grad_norm": 0.9288589637605866, "learning_rate": 1.4640909113783623e-05, "loss": 0.8725, "step": 4088 }, { "epoch": 0.3661312888242207, "grad_norm": 0.9405992575363072, "learning_rate": 1.4638339878993723e-05, "loss": 0.8677, "step": 4089 }, { "epoch": 0.3662208293692988, "grad_norm": 1.0380543353325462, "learning_rate": 1.4635770254041784e-05, "loss": 0.7548, "step": 4090 }, { "epoch": 0.3663103699143769, "grad_norm": 0.8970078123160518, "learning_rate": 1.4633200239143958e-05, "loss": 0.8583, "step": 4091 }, { "epoch": 0.3663999104594549, "grad_norm": 0.9097403396290786, "learning_rate": 1.4630629834516427e-05, "loss": 0.8919, "step": 4092 }, { "epoch": 0.366489451004533, "grad_norm": 0.9371330768865324, "learning_rate": 1.4628059040375404e-05, "loss": 0.839, "step": 4093 }, { "epoch": 0.36657899154961104, "grad_norm": 0.9171870033562859, "learning_rate": 1.4625487856937138e-05, "loss": 0.8278, "step": 4094 }, { "epoch": 0.36666853209468914, "grad_norm": 0.9689193482290918, "learning_rate": 1.4622916284417903e-05, "loss": 0.849, "step": 4095 }, { "epoch": 0.3667580726397672, "grad_norm": 0.8347713221596132, "learning_rate": 1.4620344323034016e-05, "loss": 0.8802, "step": 4096 }, { "epoch": 0.36684761318484527, "grad_norm": 0.8840334704281035, "learning_rate": 1.4617771973001822e-05, "loss": 0.8319, "step": 4097 }, { "epoch": 0.3669371537299233, "grad_norm": 0.8216425413124275, "learning_rate": 1.4615199234537698e-05, "loss": 0.8732, "step": 4098 }, { "epoch": 0.3670266942750014, "grad_norm": 0.9754660316412291, "learning_rate": 1.461262610785805e-05, "loss": 0.8795, "step": 4099 }, { "epoch": 0.3671162348200795, "grad_norm": 0.9026359820102634, "learning_rate": 1.461005259317933e-05, "loss": 0.9458, "step": 4100 }, { "epoch": 0.3672057753651575, "grad_norm": 1.0746453418878956, "learning_rate": 1.4607478690718006e-05, "loss": 0.8649, "step": 4101 }, { "epoch": 0.3672953159102356, "grad_norm": 0.9559400659691023, "learning_rate": 1.4604904400690591e-05, "loss": 0.795, "step": 4102 }, { "epoch": 0.36738485645531366, "grad_norm": 0.9036957099192082, "learning_rate": 1.4602329723313623e-05, "loss": 0.8097, "step": 4103 }, { "epoch": 0.36747439700039175, "grad_norm": 0.9045081647667311, "learning_rate": 1.4599754658803673e-05, "loss": 0.9519, "step": 4104 }, { "epoch": 0.3675639375454698, "grad_norm": 0.8277504622275791, "learning_rate": 1.4597179207377354e-05, "loss": 0.8073, "step": 4105 }, { "epoch": 0.3676534780905479, "grad_norm": 0.875719161653309, "learning_rate": 1.4594603369251302e-05, "loss": 0.8678, "step": 4106 }, { "epoch": 0.3677430186356259, "grad_norm": 0.9519674453999357, "learning_rate": 1.4592027144642183e-05, "loss": 0.8155, "step": 4107 }, { "epoch": 0.367832559180704, "grad_norm": 1.1021867470081759, "learning_rate": 1.4589450533766707e-05, "loss": 0.8075, "step": 4108 }, { "epoch": 0.3679220997257821, "grad_norm": 0.8844108959018988, "learning_rate": 1.4586873536841607e-05, "loss": 0.8426, "step": 4109 }, { "epoch": 0.36801164027086014, "grad_norm": 0.8576527569185451, "learning_rate": 1.4584296154083653e-05, "loss": 0.7999, "step": 4110 }, { "epoch": 0.36810118081593823, "grad_norm": 0.922963579957153, "learning_rate": 1.4581718385709645e-05, "loss": 0.8432, "step": 4111 }, { "epoch": 0.36819072136101627, "grad_norm": 0.9087362793791527, "learning_rate": 1.4579140231936415e-05, "loss": 0.9006, "step": 4112 }, { "epoch": 0.36828026190609436, "grad_norm": 1.002410940660549, "learning_rate": 1.4576561692980834e-05, "loss": 0.8766, "step": 4113 }, { "epoch": 0.3683698024511724, "grad_norm": 0.9253837969336944, "learning_rate": 1.4573982769059796e-05, "loss": 0.8403, "step": 4114 }, { "epoch": 0.3684593429962505, "grad_norm": 1.0720434226150808, "learning_rate": 1.4571403460390233e-05, "loss": 0.9115, "step": 4115 }, { "epoch": 0.36854888354132853, "grad_norm": 0.8762742283650113, "learning_rate": 1.4568823767189109e-05, "loss": 0.7979, "step": 4116 }, { "epoch": 0.3686384240864066, "grad_norm": 0.9164485334793443, "learning_rate": 1.4566243689673413e-05, "loss": 0.8665, "step": 4117 }, { "epoch": 0.3687279646314847, "grad_norm": 0.9216615852090195, "learning_rate": 1.456366322806018e-05, "loss": 0.8808, "step": 4118 }, { "epoch": 0.36881750517656275, "grad_norm": 1.037505096457901, "learning_rate": 1.4561082382566472e-05, "loss": 0.8749, "step": 4119 }, { "epoch": 0.36890704572164085, "grad_norm": 0.9662641545534558, "learning_rate": 1.4558501153409372e-05, "loss": 0.889, "step": 4120 }, { "epoch": 0.3689965862667189, "grad_norm": 0.9436247487062716, "learning_rate": 1.4555919540806015e-05, "loss": 0.7996, "step": 4121 }, { "epoch": 0.369086126811797, "grad_norm": 0.9530414673661675, "learning_rate": 1.4553337544973549e-05, "loss": 0.8855, "step": 4122 }, { "epoch": 0.369175667356875, "grad_norm": 1.0157256461677562, "learning_rate": 1.4550755166129165e-05, "loss": 0.863, "step": 4123 }, { "epoch": 0.3692652079019531, "grad_norm": 1.0917150189980733, "learning_rate": 1.454817240449009e-05, "loss": 0.818, "step": 4124 }, { "epoch": 0.36935474844703114, "grad_norm": 1.082301371864195, "learning_rate": 1.4545589260273572e-05, "loss": 0.8133, "step": 4125 }, { "epoch": 0.36944428899210924, "grad_norm": 0.8593234056065762, "learning_rate": 1.4543005733696896e-05, "loss": 0.8903, "step": 4126 }, { "epoch": 0.36953382953718733, "grad_norm": 0.8789815034499544, "learning_rate": 1.4540421824977386e-05, "loss": 0.8555, "step": 4127 }, { "epoch": 0.36962337008226537, "grad_norm": 0.9931615463947052, "learning_rate": 1.4537837534332386e-05, "loss": 0.8427, "step": 4128 }, { "epoch": 0.36971291062734346, "grad_norm": 0.9386252408354195, "learning_rate": 1.453525286197928e-05, "loss": 0.8675, "step": 4129 }, { "epoch": 0.3698024511724215, "grad_norm": 1.1073360248027586, "learning_rate": 1.4532667808135484e-05, "loss": 0.8911, "step": 4130 }, { "epoch": 0.3698919917174996, "grad_norm": 1.1958180215896763, "learning_rate": 1.4530082373018439e-05, "loss": 0.8355, "step": 4131 }, { "epoch": 0.36998153226257763, "grad_norm": 1.126802437089102, "learning_rate": 1.4527496556845631e-05, "loss": 0.8394, "step": 4132 }, { "epoch": 0.3700710728076557, "grad_norm": 0.938014900646232, "learning_rate": 1.4524910359834563e-05, "loss": 0.8941, "step": 4133 }, { "epoch": 0.37016061335273376, "grad_norm": 0.9279658248917745, "learning_rate": 1.4522323782202783e-05, "loss": 0.8607, "step": 4134 }, { "epoch": 0.37025015389781185, "grad_norm": 0.9044983000499031, "learning_rate": 1.451973682416786e-05, "loss": 0.8586, "step": 4135 }, { "epoch": 0.37033969444288994, "grad_norm": 0.9258374683090905, "learning_rate": 1.4517149485947409e-05, "loss": 0.8179, "step": 4136 }, { "epoch": 0.370429234987968, "grad_norm": 0.9328437117047843, "learning_rate": 1.4514561767759059e-05, "loss": 0.8712, "step": 4137 }, { "epoch": 0.3705187755330461, "grad_norm": 0.9843834075522936, "learning_rate": 1.4511973669820487e-05, "loss": 0.8098, "step": 4138 }, { "epoch": 0.3706083160781241, "grad_norm": 0.9373994711582793, "learning_rate": 1.4509385192349393e-05, "loss": 0.8908, "step": 4139 }, { "epoch": 0.3706978566232022, "grad_norm": 0.9483480264780919, "learning_rate": 1.450679633556351e-05, "loss": 0.8888, "step": 4140 }, { "epoch": 0.37078739716828024, "grad_norm": 0.9726878435845129, "learning_rate": 1.4504207099680607e-05, "loss": 0.9089, "step": 4141 }, { "epoch": 0.37087693771335833, "grad_norm": 0.9591857220650409, "learning_rate": 1.450161748491848e-05, "loss": 0.8324, "step": 4142 }, { "epoch": 0.37096647825843637, "grad_norm": 0.9260482342200291, "learning_rate": 1.449902749149496e-05, "loss": 0.9035, "step": 4143 }, { "epoch": 0.37105601880351446, "grad_norm": 0.901350748863898, "learning_rate": 1.4496437119627907e-05, "loss": 0.8726, "step": 4144 }, { "epoch": 0.37114555934859256, "grad_norm": 0.8669579378283601, "learning_rate": 1.4493846369535219e-05, "loss": 0.84, "step": 4145 }, { "epoch": 0.3712350998936706, "grad_norm": 0.8750001983119764, "learning_rate": 1.4491255241434817e-05, "loss": 0.8034, "step": 4146 }, { "epoch": 0.3713246404387487, "grad_norm": 0.8975750934615734, "learning_rate": 1.4488663735544658e-05, "loss": 0.8336, "step": 4147 }, { "epoch": 0.3714141809838267, "grad_norm": 0.8685969831656057, "learning_rate": 1.4486071852082733e-05, "loss": 0.8607, "step": 4148 }, { "epoch": 0.3715037215289048, "grad_norm": 0.9830188299775098, "learning_rate": 1.4483479591267067e-05, "loss": 0.8609, "step": 4149 }, { "epoch": 0.37159326207398286, "grad_norm": 0.9743289775855571, "learning_rate": 1.4480886953315703e-05, "loss": 0.8616, "step": 4150 }, { "epoch": 0.37168280261906095, "grad_norm": 0.9040133995543536, "learning_rate": 1.4478293938446734e-05, "loss": 0.8511, "step": 4151 }, { "epoch": 0.371772343164139, "grad_norm": 0.9608664745308472, "learning_rate": 1.447570054687827e-05, "loss": 0.8687, "step": 4152 }, { "epoch": 0.3718618837092171, "grad_norm": 0.8568078994124085, "learning_rate": 1.4473106778828459e-05, "loss": 0.8819, "step": 4153 }, { "epoch": 0.37195142425429517, "grad_norm": 0.9571225764767383, "learning_rate": 1.4470512634515489e-05, "loss": 0.8633, "step": 4154 }, { "epoch": 0.3720409647993732, "grad_norm": 0.9347425653078706, "learning_rate": 1.4467918114157558e-05, "loss": 0.8157, "step": 4155 }, { "epoch": 0.3721305053444513, "grad_norm": 1.03193702636088, "learning_rate": 1.4465323217972917e-05, "loss": 0.8546, "step": 4156 }, { "epoch": 0.37222004588952934, "grad_norm": 0.945834178468124, "learning_rate": 1.446272794617984e-05, "loss": 0.8623, "step": 4157 }, { "epoch": 0.37230958643460743, "grad_norm": 0.9050818227267468, "learning_rate": 1.4460132298996628e-05, "loss": 0.8793, "step": 4158 }, { "epoch": 0.37239912697968547, "grad_norm": 0.8615703755038566, "learning_rate": 1.4457536276641621e-05, "loss": 0.8149, "step": 4159 }, { "epoch": 0.37248866752476356, "grad_norm": 1.0278482566477132, "learning_rate": 1.445493987933319e-05, "loss": 0.8989, "step": 4160 }, { "epoch": 0.3725782080698416, "grad_norm": 0.8801070334629719, "learning_rate": 1.4452343107289733e-05, "loss": 0.8341, "step": 4161 }, { "epoch": 0.3726677486149197, "grad_norm": 1.1238091076363188, "learning_rate": 1.4449745960729681e-05, "loss": 0.8735, "step": 4162 }, { "epoch": 0.3727572891599978, "grad_norm": 1.1129784660018307, "learning_rate": 1.4447148439871503e-05, "loss": 0.8426, "step": 4163 }, { "epoch": 0.3728468297050758, "grad_norm": 0.9606956187602632, "learning_rate": 1.4444550544933687e-05, "loss": 0.8642, "step": 4164 }, { "epoch": 0.3729363702501539, "grad_norm": 1.0006790830059897, "learning_rate": 1.4441952276134766e-05, "loss": 0.8109, "step": 4165 }, { "epoch": 0.37302591079523195, "grad_norm": 0.9421086824793374, "learning_rate": 1.4439353633693292e-05, "loss": 0.8473, "step": 4166 }, { "epoch": 0.37311545134031004, "grad_norm": 0.9224093364861758, "learning_rate": 1.4436754617827857e-05, "loss": 0.8096, "step": 4167 }, { "epoch": 0.3732049918853881, "grad_norm": 0.8791060932886061, "learning_rate": 1.443415522875708e-05, "loss": 0.8441, "step": 4168 }, { "epoch": 0.3732945324304662, "grad_norm": 0.8971121012311769, "learning_rate": 1.4431555466699619e-05, "loss": 0.87, "step": 4169 }, { "epoch": 0.3733840729755442, "grad_norm": 0.9583130954477878, "learning_rate": 1.4428955331874148e-05, "loss": 0.8241, "step": 4170 }, { "epoch": 0.3734736135206223, "grad_norm": 0.919696875065664, "learning_rate": 1.4426354824499391e-05, "loss": 0.8561, "step": 4171 }, { "epoch": 0.3735631540657004, "grad_norm": 1.049418800107167, "learning_rate": 1.442375394479409e-05, "loss": 0.8844, "step": 4172 }, { "epoch": 0.37365269461077844, "grad_norm": 1.0046427750498634, "learning_rate": 1.4421152692977023e-05, "loss": 0.8387, "step": 4173 }, { "epoch": 0.37374223515585653, "grad_norm": 0.9120003435613306, "learning_rate": 1.4418551069266996e-05, "loss": 0.8953, "step": 4174 }, { "epoch": 0.37383177570093457, "grad_norm": 0.8437144270980298, "learning_rate": 1.4415949073882853e-05, "loss": 0.8187, "step": 4175 }, { "epoch": 0.37392131624601266, "grad_norm": 1.091793818287057, "learning_rate": 1.4413346707043467e-05, "loss": 0.8757, "step": 4176 }, { "epoch": 0.3740108567910907, "grad_norm": 0.9206951397598376, "learning_rate": 1.4410743968967733e-05, "loss": 0.8749, "step": 4177 }, { "epoch": 0.3741003973361688, "grad_norm": 0.9546160798847961, "learning_rate": 1.4408140859874593e-05, "loss": 0.8654, "step": 4178 }, { "epoch": 0.3741899378812468, "grad_norm": 1.0961819100434245, "learning_rate": 1.440553737998301e-05, "loss": 0.9058, "step": 4179 }, { "epoch": 0.3742794784263249, "grad_norm": 0.9324401068098676, "learning_rate": 1.4402933529511975e-05, "loss": 0.8703, "step": 4180 }, { "epoch": 0.374369018971403, "grad_norm": 1.0525022025626931, "learning_rate": 1.4400329308680523e-05, "loss": 0.834, "step": 4181 }, { "epoch": 0.37445855951648105, "grad_norm": 0.9212509687634383, "learning_rate": 1.4397724717707708e-05, "loss": 0.7758, "step": 4182 }, { "epoch": 0.37454810006155914, "grad_norm": 1.004164125561591, "learning_rate": 1.439511975681262e-05, "loss": 0.842, "step": 4183 }, { "epoch": 0.3746376406066372, "grad_norm": 0.9400035806256253, "learning_rate": 1.4392514426214378e-05, "loss": 0.8799, "step": 4184 }, { "epoch": 0.37472718115171527, "grad_norm": 0.9769589016947828, "learning_rate": 1.438990872613214e-05, "loss": 0.8279, "step": 4185 }, { "epoch": 0.3748167216967933, "grad_norm": 0.9290654956864182, "learning_rate": 1.4387302656785084e-05, "loss": 0.8184, "step": 4186 }, { "epoch": 0.3749062622418714, "grad_norm": 1.0151282341113241, "learning_rate": 1.4384696218392425e-05, "loss": 0.8779, "step": 4187 }, { "epoch": 0.37499580278694944, "grad_norm": 0.9970904254477098, "learning_rate": 1.438208941117341e-05, "loss": 0.8706, "step": 4188 }, { "epoch": 0.37508534333202753, "grad_norm": 0.9195544511661152, "learning_rate": 1.4379482235347312e-05, "loss": 0.8349, "step": 4189 }, { "epoch": 0.3751748838771056, "grad_norm": 0.977908981386023, "learning_rate": 1.437687469113344e-05, "loss": 0.8228, "step": 4190 }, { "epoch": 0.37526442442218366, "grad_norm": 0.930415413540987, "learning_rate": 1.4374266778751134e-05, "loss": 0.8446, "step": 4191 }, { "epoch": 0.37535396496726176, "grad_norm": 0.9037742540913861, "learning_rate": 1.4371658498419758e-05, "loss": 0.8759, "step": 4192 }, { "epoch": 0.3754435055123398, "grad_norm": 0.9982318479667434, "learning_rate": 1.4369049850358717e-05, "loss": 0.8026, "step": 4193 }, { "epoch": 0.3755330460574179, "grad_norm": 1.0936746032381797, "learning_rate": 1.4366440834787439e-05, "loss": 0.8281, "step": 4194 }, { "epoch": 0.3756225866024959, "grad_norm": 0.9269404334097506, "learning_rate": 1.4363831451925387e-05, "loss": 0.8297, "step": 4195 }, { "epoch": 0.375712127147574, "grad_norm": 0.8942739386051966, "learning_rate": 1.4361221701992055e-05, "loss": 0.8756, "step": 4196 }, { "epoch": 0.37580166769265205, "grad_norm": 0.9713827048368489, "learning_rate": 1.4358611585206962e-05, "loss": 0.856, "step": 4197 }, { "epoch": 0.37589120823773015, "grad_norm": 1.0547917897259143, "learning_rate": 1.435600110178967e-05, "loss": 0.9057, "step": 4198 }, { "epoch": 0.37598074878280824, "grad_norm": 0.9848118630679092, "learning_rate": 1.4353390251959759e-05, "loss": 0.8958, "step": 4199 }, { "epoch": 0.3760702893278863, "grad_norm": 0.9233538786336366, "learning_rate": 1.4350779035936846e-05, "loss": 0.9064, "step": 4200 }, { "epoch": 0.37615982987296437, "grad_norm": 0.9259714612322825, "learning_rate": 1.4348167453940578e-05, "loss": 0.8523, "step": 4201 }, { "epoch": 0.3762493704180424, "grad_norm": 1.182028723436946, "learning_rate": 1.4345555506190634e-05, "loss": 0.8682, "step": 4202 }, { "epoch": 0.3763389109631205, "grad_norm": 1.203874149980032, "learning_rate": 1.4342943192906721e-05, "loss": 0.8529, "step": 4203 }, { "epoch": 0.37642845150819854, "grad_norm": 0.9676937471865007, "learning_rate": 1.4340330514308578e-05, "loss": 0.8512, "step": 4204 }, { "epoch": 0.37651799205327663, "grad_norm": 0.8816738701483038, "learning_rate": 1.4337717470615978e-05, "loss": 0.8653, "step": 4205 }, { "epoch": 0.37660753259835467, "grad_norm": 0.9088213012625905, "learning_rate": 1.4335104062048721e-05, "loss": 0.8325, "step": 4206 }, { "epoch": 0.37669707314343276, "grad_norm": 0.9437175177991065, "learning_rate": 1.4332490288826632e-05, "loss": 0.856, "step": 4207 }, { "epoch": 0.37678661368851085, "grad_norm": 0.986517210753926, "learning_rate": 1.4329876151169581e-05, "loss": 0.8201, "step": 4208 }, { "epoch": 0.3768761542335889, "grad_norm": 1.0603461499648112, "learning_rate": 1.4327261649297462e-05, "loss": 0.9224, "step": 4209 }, { "epoch": 0.376965694778667, "grad_norm": 1.0633569121240445, "learning_rate": 1.432464678343019e-05, "loss": 0.8447, "step": 4210 }, { "epoch": 0.377055235323745, "grad_norm": 0.9316599917294316, "learning_rate": 1.4322031553787721e-05, "loss": 0.8794, "step": 4211 }, { "epoch": 0.3771447758688231, "grad_norm": 1.0053573139515422, "learning_rate": 1.4319415960590046e-05, "loss": 0.7921, "step": 4212 }, { "epoch": 0.37723431641390115, "grad_norm": 1.0106457799594875, "learning_rate": 1.4316800004057174e-05, "loss": 0.8578, "step": 4213 }, { "epoch": 0.37732385695897924, "grad_norm": 0.9180942866481836, "learning_rate": 1.4314183684409155e-05, "loss": 0.8971, "step": 4214 }, { "epoch": 0.3774133975040573, "grad_norm": 0.9998943279127191, "learning_rate": 1.4311567001866063e-05, "loss": 0.8665, "step": 4215 }, { "epoch": 0.3775029380491354, "grad_norm": 0.929882648243161, "learning_rate": 1.4308949956648005e-05, "loss": 0.8952, "step": 4216 }, { "epoch": 0.37759247859421347, "grad_norm": 1.2465203944895376, "learning_rate": 1.4306332548975114e-05, "loss": 0.8817, "step": 4217 }, { "epoch": 0.3776820191392915, "grad_norm": 0.9137013259222495, "learning_rate": 1.4303714779067566e-05, "loss": 0.8356, "step": 4218 }, { "epoch": 0.3777715596843696, "grad_norm": 0.9364230560584578, "learning_rate": 1.4301096647145554e-05, "loss": 0.8809, "step": 4219 }, { "epoch": 0.37786110022944763, "grad_norm": 0.9140623262134444, "learning_rate": 1.4298478153429307e-05, "loss": 0.8144, "step": 4220 }, { "epoch": 0.3779506407745257, "grad_norm": 0.8458572510169693, "learning_rate": 1.4295859298139088e-05, "loss": 0.795, "step": 4221 }, { "epoch": 0.37804018131960376, "grad_norm": 0.9205956467596355, "learning_rate": 1.4293240081495181e-05, "loss": 0.8438, "step": 4222 }, { "epoch": 0.37812972186468186, "grad_norm": 0.9087354375870077, "learning_rate": 1.4290620503717912e-05, "loss": 0.8595, "step": 4223 }, { "epoch": 0.3782192624097599, "grad_norm": 0.9854147362150998, "learning_rate": 1.4288000565027625e-05, "loss": 0.8169, "step": 4224 }, { "epoch": 0.378308802954838, "grad_norm": 0.9424113846040778, "learning_rate": 1.4285380265644703e-05, "loss": 0.8106, "step": 4225 }, { "epoch": 0.3783983434999161, "grad_norm": 0.9277631633750724, "learning_rate": 1.4282759605789562e-05, "loss": 0.8629, "step": 4226 }, { "epoch": 0.3784878840449941, "grad_norm": 0.9452038411119786, "learning_rate": 1.4280138585682637e-05, "loss": 0.8574, "step": 4227 }, { "epoch": 0.3785774245900722, "grad_norm": 1.0633011122398508, "learning_rate": 1.42775172055444e-05, "loss": 0.8251, "step": 4228 }, { "epoch": 0.37866696513515025, "grad_norm": 1.0455673398406378, "learning_rate": 1.4274895465595357e-05, "loss": 0.943, "step": 4229 }, { "epoch": 0.37875650568022834, "grad_norm": 0.8950580181599327, "learning_rate": 1.4272273366056037e-05, "loss": 0.87, "step": 4230 }, { "epoch": 0.3788460462253064, "grad_norm": 0.9961738402803761, "learning_rate": 1.4269650907147006e-05, "loss": 0.842, "step": 4231 }, { "epoch": 0.37893558677038447, "grad_norm": 0.9869635039736778, "learning_rate": 1.4267028089088853e-05, "loss": 0.8965, "step": 4232 }, { "epoch": 0.3790251273154625, "grad_norm": 1.057067285420222, "learning_rate": 1.4264404912102204e-05, "loss": 0.8274, "step": 4233 }, { "epoch": 0.3791146678605406, "grad_norm": 0.9973396331360694, "learning_rate": 1.4261781376407704e-05, "loss": 0.8574, "step": 4234 }, { "epoch": 0.3792042084056187, "grad_norm": 0.9827946251791934, "learning_rate": 1.4259157482226046e-05, "loss": 0.9084, "step": 4235 }, { "epoch": 0.37929374895069673, "grad_norm": 0.9374688859696028, "learning_rate": 1.4256533229777943e-05, "loss": 0.8723, "step": 4236 }, { "epoch": 0.3793832894957748, "grad_norm": 1.0867846742484102, "learning_rate": 1.4253908619284134e-05, "loss": 0.9099, "step": 4237 }, { "epoch": 0.37947283004085286, "grad_norm": 1.0924294421049032, "learning_rate": 1.4251283650965388e-05, "loss": 0.8466, "step": 4238 }, { "epoch": 0.37956237058593095, "grad_norm": 1.2073962546243577, "learning_rate": 1.4248658325042524e-05, "loss": 0.8249, "step": 4239 }, { "epoch": 0.379651911131009, "grad_norm": 0.9163763834459361, "learning_rate": 1.4246032641736362e-05, "loss": 0.8104, "step": 4240 }, { "epoch": 0.3797414516760871, "grad_norm": 1.0138836305492107, "learning_rate": 1.4243406601267769e-05, "loss": 0.8611, "step": 4241 }, { "epoch": 0.3798309922211651, "grad_norm": 0.9723552202197642, "learning_rate": 1.4240780203857645e-05, "loss": 0.8938, "step": 4242 }, { "epoch": 0.3799205327662432, "grad_norm": 0.9205623845181184, "learning_rate": 1.4238153449726909e-05, "loss": 0.8354, "step": 4243 }, { "epoch": 0.3800100733113213, "grad_norm": 0.9622707725132664, "learning_rate": 1.4235526339096515e-05, "loss": 0.7925, "step": 4244 }, { "epoch": 0.38009961385639934, "grad_norm": 1.0785892261987198, "learning_rate": 1.4232898872187446e-05, "loss": 0.8457, "step": 4245 }, { "epoch": 0.38018915440147744, "grad_norm": 0.9164650095794956, "learning_rate": 1.423027104922072e-05, "loss": 0.7852, "step": 4246 }, { "epoch": 0.3802786949465555, "grad_norm": 0.9069031040102574, "learning_rate": 1.4227642870417374e-05, "loss": 0.8368, "step": 4247 }, { "epoch": 0.38036823549163357, "grad_norm": 0.9070925847169027, "learning_rate": 1.4225014335998492e-05, "loss": 0.8552, "step": 4248 }, { "epoch": 0.3804577760367116, "grad_norm": 0.8698162732092911, "learning_rate": 1.422238544618517e-05, "loss": 0.8432, "step": 4249 }, { "epoch": 0.3805473165817897, "grad_norm": 1.2142724803712692, "learning_rate": 1.4219756201198545e-05, "loss": 0.8716, "step": 4250 }, { "epoch": 0.38063685712686773, "grad_norm": 0.8889037302068833, "learning_rate": 1.4217126601259776e-05, "loss": 0.8465, "step": 4251 }, { "epoch": 0.3807263976719458, "grad_norm": 0.9592370205778548, "learning_rate": 1.4214496646590061e-05, "loss": 0.8137, "step": 4252 }, { "epoch": 0.3808159382170239, "grad_norm": 1.305559081491289, "learning_rate": 1.4211866337410625e-05, "loss": 0.8822, "step": 4253 }, { "epoch": 0.38090547876210196, "grad_norm": 0.9720683973384823, "learning_rate": 1.4209235673942713e-05, "loss": 0.8732, "step": 4254 }, { "epoch": 0.38099501930718005, "grad_norm": 0.9324441928406956, "learning_rate": 1.4206604656407616e-05, "loss": 0.831, "step": 4255 }, { "epoch": 0.3810845598522581, "grad_norm": 0.8102839926839986, "learning_rate": 1.4203973285026642e-05, "loss": 0.8386, "step": 4256 }, { "epoch": 0.3811741003973362, "grad_norm": 0.8438876096737119, "learning_rate": 1.4201341560021135e-05, "loss": 0.8513, "step": 4257 }, { "epoch": 0.3812636409424142, "grad_norm": 0.9405831480446842, "learning_rate": 1.419870948161247e-05, "loss": 0.8247, "step": 4258 }, { "epoch": 0.3813531814874923, "grad_norm": 0.9256323930915015, "learning_rate": 1.419607705002204e-05, "loss": 0.8352, "step": 4259 }, { "epoch": 0.38144272203257035, "grad_norm": 0.8993235365135677, "learning_rate": 1.4193444265471285e-05, "loss": 0.8969, "step": 4260 }, { "epoch": 0.38153226257764844, "grad_norm": 1.0203939070077361, "learning_rate": 1.4190811128181665e-05, "loss": 0.8908, "step": 4261 }, { "epoch": 0.38162180312272653, "grad_norm": 0.8867518837972675, "learning_rate": 1.418817763837467e-05, "loss": 0.8217, "step": 4262 }, { "epoch": 0.38171134366780457, "grad_norm": 0.9237608056118288, "learning_rate": 1.4185543796271819e-05, "loss": 0.8439, "step": 4263 }, { "epoch": 0.38180088421288266, "grad_norm": 0.9473744262718337, "learning_rate": 1.4182909602094664e-05, "loss": 0.8839, "step": 4264 }, { "epoch": 0.3818904247579607, "grad_norm": 0.8690428624385834, "learning_rate": 1.418027505606478e-05, "loss": 0.8559, "step": 4265 }, { "epoch": 0.3819799653030388, "grad_norm": 1.012377858570929, "learning_rate": 1.4177640158403785e-05, "loss": 0.8633, "step": 4266 }, { "epoch": 0.38206950584811683, "grad_norm": 0.9725345876329412, "learning_rate": 1.4175004909333311e-05, "loss": 0.8827, "step": 4267 }, { "epoch": 0.3821590463931949, "grad_norm": 1.1259074248593617, "learning_rate": 1.417236930907503e-05, "loss": 0.811, "step": 4268 }, { "epoch": 0.38224858693827296, "grad_norm": 0.9520156065350955, "learning_rate": 1.4169733357850642e-05, "loss": 0.8008, "step": 4269 }, { "epoch": 0.38233812748335105, "grad_norm": 0.9759065697207279, "learning_rate": 1.416709705588187e-05, "loss": 0.8191, "step": 4270 }, { "epoch": 0.38242766802842915, "grad_norm": 0.9342913048013438, "learning_rate": 1.4164460403390468e-05, "loss": 0.8442, "step": 4271 }, { "epoch": 0.3825172085735072, "grad_norm": 0.9261386565299685, "learning_rate": 1.4161823400598234e-05, "loss": 0.8553, "step": 4272 }, { "epoch": 0.3826067491185853, "grad_norm": 1.0639767582168744, "learning_rate": 1.4159186047726976e-05, "loss": 0.9142, "step": 4273 }, { "epoch": 0.3826962896636633, "grad_norm": 1.072377580166384, "learning_rate": 1.4156548344998543e-05, "loss": 0.8776, "step": 4274 }, { "epoch": 0.3827858302087414, "grad_norm": 0.8688137771209221, "learning_rate": 1.4153910292634802e-05, "loss": 0.8131, "step": 4275 }, { "epoch": 0.38287537075381944, "grad_norm": 0.933743373478712, "learning_rate": 1.415127189085767e-05, "loss": 0.9066, "step": 4276 }, { "epoch": 0.38296491129889754, "grad_norm": 1.106902207972974, "learning_rate": 1.4148633139889069e-05, "loss": 0.8045, "step": 4277 }, { "epoch": 0.3830544518439756, "grad_norm": 0.8789845383092317, "learning_rate": 1.4145994039950971e-05, "loss": 0.846, "step": 4278 }, { "epoch": 0.38314399238905367, "grad_norm": 0.9320178752378488, "learning_rate": 1.4143354591265365e-05, "loss": 0.8261, "step": 4279 }, { "epoch": 0.38323353293413176, "grad_norm": 0.8865757357567181, "learning_rate": 1.4140714794054274e-05, "loss": 0.8206, "step": 4280 }, { "epoch": 0.3833230734792098, "grad_norm": 0.8553551883354696, "learning_rate": 1.4138074648539744e-05, "loss": 0.821, "step": 4281 }, { "epoch": 0.3834126140242879, "grad_norm": 0.9559577621983442, "learning_rate": 1.4135434154943861e-05, "loss": 0.8635, "step": 4282 }, { "epoch": 0.3835021545693659, "grad_norm": 1.095410195525319, "learning_rate": 1.4132793313488732e-05, "loss": 0.7741, "step": 4283 }, { "epoch": 0.383591695114444, "grad_norm": 0.8698858960107688, "learning_rate": 1.41301521243965e-05, "loss": 0.7575, "step": 4284 }, { "epoch": 0.38368123565952206, "grad_norm": 0.880629716763362, "learning_rate": 1.4127510587889328e-05, "loss": 0.8253, "step": 4285 }, { "epoch": 0.38377077620460015, "grad_norm": 0.896855197257339, "learning_rate": 1.4124868704189416e-05, "loss": 0.8738, "step": 4286 }, { "epoch": 0.3838603167496782, "grad_norm": 0.8536335908044614, "learning_rate": 1.4122226473518991e-05, "loss": 0.847, "step": 4287 }, { "epoch": 0.3839498572947563, "grad_norm": 1.1513467011902712, "learning_rate": 1.4119583896100309e-05, "loss": 0.818, "step": 4288 }, { "epoch": 0.3840393978398344, "grad_norm": 0.9732392592290227, "learning_rate": 1.4116940972155651e-05, "loss": 0.8287, "step": 4289 }, { "epoch": 0.3841289383849124, "grad_norm": 0.9428344607202426, "learning_rate": 1.4114297701907336e-05, "loss": 0.9013, "step": 4290 }, { "epoch": 0.3842184789299905, "grad_norm": 1.001595853243546, "learning_rate": 1.4111654085577709e-05, "loss": 0.8271, "step": 4291 }, { "epoch": 0.38430801947506854, "grad_norm": 1.0327373690973063, "learning_rate": 1.4109010123389133e-05, "loss": 0.8524, "step": 4292 }, { "epoch": 0.38439756002014663, "grad_norm": 0.872689374391715, "learning_rate": 1.410636581556402e-05, "loss": 0.826, "step": 4293 }, { "epoch": 0.38448710056522467, "grad_norm": 0.9405576608820454, "learning_rate": 1.4103721162324795e-05, "loss": 0.8167, "step": 4294 }, { "epoch": 0.38457664111030276, "grad_norm": 0.958985046725877, "learning_rate": 1.4101076163893915e-05, "loss": 0.8764, "step": 4295 }, { "epoch": 0.3846661816553808, "grad_norm": 1.071263056616227, "learning_rate": 1.4098430820493878e-05, "loss": 0.8892, "step": 4296 }, { "epoch": 0.3847557222004589, "grad_norm": 0.9300828484513695, "learning_rate": 1.409578513234719e-05, "loss": 0.8696, "step": 4297 }, { "epoch": 0.384845262745537, "grad_norm": 0.9437021221337074, "learning_rate": 1.4093139099676407e-05, "loss": 0.8255, "step": 4298 }, { "epoch": 0.384934803290615, "grad_norm": 0.9960229291129784, "learning_rate": 1.4090492722704103e-05, "loss": 0.8686, "step": 4299 }, { "epoch": 0.3850243438356931, "grad_norm": 0.8979512275750243, "learning_rate": 1.4087846001652878e-05, "loss": 0.8228, "step": 4300 }, { "epoch": 0.38511388438077115, "grad_norm": 0.9178256480161378, "learning_rate": 1.4085198936745368e-05, "loss": 0.864, "step": 4301 }, { "epoch": 0.38520342492584925, "grad_norm": 1.0044870989634411, "learning_rate": 1.4082551528204237e-05, "loss": 0.8065, "step": 4302 }, { "epoch": 0.3852929654709273, "grad_norm": 0.9617772172822552, "learning_rate": 1.4079903776252178e-05, "loss": 0.8585, "step": 4303 }, { "epoch": 0.3853825060160054, "grad_norm": 0.9673509706484636, "learning_rate": 1.4077255681111905e-05, "loss": 0.9104, "step": 4304 }, { "epoch": 0.3854720465610834, "grad_norm": 0.9823270335554953, "learning_rate": 1.4074607243006171e-05, "loss": 0.9083, "step": 4305 }, { "epoch": 0.3855615871061615, "grad_norm": 0.9333533835196084, "learning_rate": 1.4071958462157756e-05, "loss": 0.8777, "step": 4306 }, { "epoch": 0.3856511276512396, "grad_norm": 0.9197493221909131, "learning_rate": 1.4069309338789461e-05, "loss": 0.8118, "step": 4307 }, { "epoch": 0.38574066819631764, "grad_norm": 0.9412536228405011, "learning_rate": 1.4066659873124127e-05, "loss": 0.832, "step": 4308 }, { "epoch": 0.38583020874139573, "grad_norm": 0.8900960298494255, "learning_rate": 1.4064010065384616e-05, "loss": 0.8415, "step": 4309 }, { "epoch": 0.38591974928647377, "grad_norm": 0.9574022279263648, "learning_rate": 1.4061359915793822e-05, "loss": 0.9219, "step": 4310 }, { "epoch": 0.38600928983155186, "grad_norm": 0.8803996126541075, "learning_rate": 1.4058709424574668e-05, "loss": 0.9162, "step": 4311 }, { "epoch": 0.3860988303766299, "grad_norm": 0.9820062178464307, "learning_rate": 1.4056058591950101e-05, "loss": 0.8322, "step": 4312 }, { "epoch": 0.386188370921708, "grad_norm": 1.0867289654869534, "learning_rate": 1.4053407418143104e-05, "loss": 0.8483, "step": 4313 }, { "epoch": 0.38627791146678603, "grad_norm": 1.0406821788831242, "learning_rate": 1.4050755903376682e-05, "loss": 0.8885, "step": 4314 }, { "epoch": 0.3863674520118641, "grad_norm": 0.8786489512235386, "learning_rate": 1.4048104047873876e-05, "loss": 0.8532, "step": 4315 }, { "epoch": 0.3864569925569422, "grad_norm": 0.9068648804136524, "learning_rate": 1.4045451851857749e-05, "loss": 0.8612, "step": 4316 }, { "epoch": 0.38654653310202025, "grad_norm": 0.9020771941862551, "learning_rate": 1.4042799315551394e-05, "loss": 0.8507, "step": 4317 }, { "epoch": 0.38663607364709834, "grad_norm": 0.8767583290432911, "learning_rate": 1.4040146439177937e-05, "loss": 0.8406, "step": 4318 }, { "epoch": 0.3867256141921764, "grad_norm": 1.0022282165748737, "learning_rate": 1.4037493222960525e-05, "loss": 0.91, "step": 4319 }, { "epoch": 0.3868151547372545, "grad_norm": 0.9041341302018024, "learning_rate": 1.403483966712234e-05, "loss": 0.8592, "step": 4320 }, { "epoch": 0.3869046952823325, "grad_norm": 0.9593574254787363, "learning_rate": 1.4032185771886593e-05, "loss": 0.8579, "step": 4321 }, { "epoch": 0.3869942358274106, "grad_norm": 0.9157039629303706, "learning_rate": 1.4029531537476515e-05, "loss": 0.8451, "step": 4322 }, { "epoch": 0.38708377637248864, "grad_norm": 0.9400030550174615, "learning_rate": 1.402687696411538e-05, "loss": 0.9032, "step": 4323 }, { "epoch": 0.38717331691756673, "grad_norm": 0.9987093123835726, "learning_rate": 1.4024222052026473e-05, "loss": 0.7824, "step": 4324 }, { "epoch": 0.3872628574626448, "grad_norm": 0.8485699986713621, "learning_rate": 1.402156680143312e-05, "loss": 0.7973, "step": 4325 }, { "epoch": 0.38735239800772286, "grad_norm": 1.02578180214162, "learning_rate": 1.4018911212558677e-05, "loss": 0.846, "step": 4326 }, { "epoch": 0.38744193855280096, "grad_norm": 0.950263862498733, "learning_rate": 1.4016255285626517e-05, "loss": 0.8878, "step": 4327 }, { "epoch": 0.387531479097879, "grad_norm": 0.916345684872174, "learning_rate": 1.4013599020860046e-05, "loss": 0.8389, "step": 4328 }, { "epoch": 0.3876210196429571, "grad_norm": 1.0249819283264308, "learning_rate": 1.401094241848271e-05, "loss": 0.8613, "step": 4329 }, { "epoch": 0.3877105601880351, "grad_norm": 0.8885072102604518, "learning_rate": 1.4008285478717967e-05, "loss": 0.8822, "step": 4330 }, { "epoch": 0.3878001007331132, "grad_norm": 0.9633357056027243, "learning_rate": 1.400562820178931e-05, "loss": 0.8726, "step": 4331 }, { "epoch": 0.38788964127819126, "grad_norm": 1.2694737301866863, "learning_rate": 1.4002970587920264e-05, "loss": 0.824, "step": 4332 }, { "epoch": 0.38797918182326935, "grad_norm": 0.9316470266952813, "learning_rate": 1.4000312637334376e-05, "loss": 0.8456, "step": 4333 }, { "epoch": 0.38806872236834744, "grad_norm": 0.981039211722298, "learning_rate": 1.3997654350255223e-05, "loss": 0.8727, "step": 4334 }, { "epoch": 0.3881582629134255, "grad_norm": 1.106531448032237, "learning_rate": 1.3994995726906415e-05, "loss": 0.8553, "step": 4335 }, { "epoch": 0.38824780345850357, "grad_norm": 0.9499351717802188, "learning_rate": 1.3992336767511585e-05, "loss": 0.8058, "step": 4336 }, { "epoch": 0.3883373440035816, "grad_norm": 0.8974496379060156, "learning_rate": 1.3989677472294397e-05, "loss": 0.8684, "step": 4337 }, { "epoch": 0.3884268845486597, "grad_norm": 0.9797572117318515, "learning_rate": 1.3987017841478539e-05, "loss": 0.8323, "step": 4338 }, { "epoch": 0.38851642509373774, "grad_norm": 1.0102729822760879, "learning_rate": 1.398435787528773e-05, "loss": 0.9084, "step": 4339 }, { "epoch": 0.38860596563881583, "grad_norm": 0.9183288231872694, "learning_rate": 1.3981697573945726e-05, "loss": 0.7844, "step": 4340 }, { "epoch": 0.38869550618389387, "grad_norm": 0.9769498120870611, "learning_rate": 1.3979036937676294e-05, "loss": 0.8098, "step": 4341 }, { "epoch": 0.38878504672897196, "grad_norm": 1.0167669718911116, "learning_rate": 1.3976375966703241e-05, "loss": 0.787, "step": 4342 }, { "epoch": 0.38887458727405005, "grad_norm": 0.9812453696310841, "learning_rate": 1.3973714661250402e-05, "loss": 0.8939, "step": 4343 }, { "epoch": 0.3889641278191281, "grad_norm": 0.916553348073219, "learning_rate": 1.3971053021541634e-05, "loss": 0.8382, "step": 4344 }, { "epoch": 0.3890536683642062, "grad_norm": 1.031731947140006, "learning_rate": 1.3968391047800827e-05, "loss": 0.8513, "step": 4345 }, { "epoch": 0.3891432089092842, "grad_norm": 0.8753378447057569, "learning_rate": 1.3965728740251894e-05, "loss": 0.7696, "step": 4346 }, { "epoch": 0.3892327494543623, "grad_norm": 0.9375260404746948, "learning_rate": 1.3963066099118785e-05, "loss": 0.8099, "step": 4347 }, { "epoch": 0.38932228999944035, "grad_norm": 1.1677852058642593, "learning_rate": 1.396040312462547e-05, "loss": 0.8603, "step": 4348 }, { "epoch": 0.38941183054451844, "grad_norm": 1.0118260464213775, "learning_rate": 1.3957739816995948e-05, "loss": 0.8508, "step": 4349 }, { "epoch": 0.3895013710895965, "grad_norm": 0.9669125945422604, "learning_rate": 1.395507617645425e-05, "loss": 0.8371, "step": 4350 }, { "epoch": 0.3895909116346746, "grad_norm": 1.08540787302438, "learning_rate": 1.3952412203224437e-05, "loss": 0.846, "step": 4351 }, { "epoch": 0.38968045217975267, "grad_norm": 0.8946888088373711, "learning_rate": 1.3949747897530583e-05, "loss": 0.8598, "step": 4352 }, { "epoch": 0.3897699927248307, "grad_norm": 0.9139977317401248, "learning_rate": 1.3947083259596812e-05, "loss": 0.8429, "step": 4353 }, { "epoch": 0.3898595332699088, "grad_norm": 1.11661369917112, "learning_rate": 1.3944418289647257e-05, "loss": 0.8391, "step": 4354 }, { "epoch": 0.38994907381498684, "grad_norm": 1.0448964070492612, "learning_rate": 1.3941752987906088e-05, "loss": 0.8582, "step": 4355 }, { "epoch": 0.39003861436006493, "grad_norm": 0.9607039448516149, "learning_rate": 1.3939087354597507e-05, "loss": 0.8943, "step": 4356 }, { "epoch": 0.39012815490514297, "grad_norm": 2.316024328723829, "learning_rate": 1.3936421389945729e-05, "loss": 0.903, "step": 4357 }, { "epoch": 0.39021769545022106, "grad_norm": 1.0268005530301259, "learning_rate": 1.3933755094175013e-05, "loss": 0.8914, "step": 4358 }, { "epoch": 0.3903072359952991, "grad_norm": 0.866225061105971, "learning_rate": 1.3931088467509639e-05, "loss": 0.7966, "step": 4359 }, { "epoch": 0.3903967765403772, "grad_norm": 0.9234187978297268, "learning_rate": 1.3928421510173913e-05, "loss": 0.8326, "step": 4360 }, { "epoch": 0.3904863170854553, "grad_norm": 0.9186714463958888, "learning_rate": 1.3925754222392167e-05, "loss": 0.822, "step": 4361 }, { "epoch": 0.3905758576305333, "grad_norm": 0.8825929864338079, "learning_rate": 1.3923086604388774e-05, "loss": 0.8995, "step": 4362 }, { "epoch": 0.3906653981756114, "grad_norm": 0.9601314627873958, "learning_rate": 1.3920418656388117e-05, "loss": 0.8289, "step": 4363 }, { "epoch": 0.39075493872068945, "grad_norm": 0.8399128029083268, "learning_rate": 1.391775037861462e-05, "loss": 0.813, "step": 4364 }, { "epoch": 0.39084447926576754, "grad_norm": 0.9038736268327304, "learning_rate": 1.3915081771292726e-05, "loss": 0.8495, "step": 4365 }, { "epoch": 0.3909340198108456, "grad_norm": 0.9654302051689361, "learning_rate": 1.3912412834646914e-05, "loss": 0.8103, "step": 4366 }, { "epoch": 0.39102356035592367, "grad_norm": 0.8875386590279613, "learning_rate": 1.390974356890168e-05, "loss": 0.8575, "step": 4367 }, { "epoch": 0.3911131009010017, "grad_norm": 0.8735904239846528, "learning_rate": 1.3907073974281562e-05, "loss": 0.7866, "step": 4368 }, { "epoch": 0.3912026414460798, "grad_norm": 0.9361460247292266, "learning_rate": 1.390440405101111e-05, "loss": 0.8428, "step": 4369 }, { "epoch": 0.3912921819911579, "grad_norm": 0.9201267894732166, "learning_rate": 1.3901733799314916e-05, "loss": 0.8224, "step": 4370 }, { "epoch": 0.39138172253623593, "grad_norm": 0.9690148023612849, "learning_rate": 1.3899063219417586e-05, "loss": 0.87, "step": 4371 }, { "epoch": 0.391471263081314, "grad_norm": 1.0052688165934114, "learning_rate": 1.3896392311543766e-05, "loss": 0.8471, "step": 4372 }, { "epoch": 0.39156080362639206, "grad_norm": 0.9096339062435472, "learning_rate": 1.3893721075918121e-05, "loss": 0.8962, "step": 4373 }, { "epoch": 0.39165034417147015, "grad_norm": 0.9481615439397594, "learning_rate": 1.3891049512765347e-05, "loss": 0.8133, "step": 4374 }, { "epoch": 0.3917398847165482, "grad_norm": 1.0622202445862254, "learning_rate": 1.3888377622310173e-05, "loss": 0.8691, "step": 4375 }, { "epoch": 0.3918294252616263, "grad_norm": 0.9104192920270602, "learning_rate": 1.3885705404777334e-05, "loss": 0.8119, "step": 4376 }, { "epoch": 0.3919189658067043, "grad_norm": 1.0730988714446843, "learning_rate": 1.3883032860391626e-05, "loss": 0.8347, "step": 4377 }, { "epoch": 0.3920085063517824, "grad_norm": 0.8796530338796501, "learning_rate": 1.3880359989377847e-05, "loss": 0.8223, "step": 4378 }, { "epoch": 0.3920980468968605, "grad_norm": 0.9231030151116769, "learning_rate": 1.387768679196083e-05, "loss": 0.8167, "step": 4379 }, { "epoch": 0.39218758744193855, "grad_norm": 0.9894131240123671, "learning_rate": 1.3875013268365436e-05, "loss": 0.8041, "step": 4380 }, { "epoch": 0.39227712798701664, "grad_norm": 0.9266938072541895, "learning_rate": 1.3872339418816553e-05, "loss": 0.8155, "step": 4381 }, { "epoch": 0.3923666685320947, "grad_norm": 0.9612890433504183, "learning_rate": 1.3869665243539097e-05, "loss": 0.8742, "step": 4382 }, { "epoch": 0.39245620907717277, "grad_norm": 0.8874424793831853, "learning_rate": 1.3866990742758013e-05, "loss": 0.8519, "step": 4383 }, { "epoch": 0.3925457496222508, "grad_norm": 1.0600835428736393, "learning_rate": 1.3864315916698266e-05, "loss": 0.8348, "step": 4384 }, { "epoch": 0.3926352901673289, "grad_norm": 1.1778071593422073, "learning_rate": 1.3861640765584857e-05, "loss": 0.84, "step": 4385 }, { "epoch": 0.39272483071240694, "grad_norm": 0.9280436112107391, "learning_rate": 1.3858965289642811e-05, "loss": 0.829, "step": 4386 }, { "epoch": 0.39281437125748503, "grad_norm": 0.889346521185196, "learning_rate": 1.3856289489097181e-05, "loss": 0.8652, "step": 4387 }, { "epoch": 0.3929039118025631, "grad_norm": 0.9022687799347335, "learning_rate": 1.3853613364173043e-05, "loss": 0.8345, "step": 4388 }, { "epoch": 0.39299345234764116, "grad_norm": 0.9615885614981415, "learning_rate": 1.3850936915095512e-05, "loss": 0.8095, "step": 4389 }, { "epoch": 0.39308299289271925, "grad_norm": 0.9280503788569006, "learning_rate": 1.3848260142089714e-05, "loss": 0.8492, "step": 4390 }, { "epoch": 0.3931725334377973, "grad_norm": 0.9217216270985268, "learning_rate": 1.384558304538081e-05, "loss": 0.8331, "step": 4391 }, { "epoch": 0.3932620739828754, "grad_norm": 0.957404776947905, "learning_rate": 1.3842905625193998e-05, "loss": 0.8187, "step": 4392 }, { "epoch": 0.3933516145279534, "grad_norm": 0.8992985649268581, "learning_rate": 1.3840227881754485e-05, "loss": 0.8328, "step": 4393 }, { "epoch": 0.3934411550730315, "grad_norm": 0.8470041329119131, "learning_rate": 1.3837549815287514e-05, "loss": 0.7872, "step": 4394 }, { "epoch": 0.39353069561810955, "grad_norm": 0.9601284425157596, "learning_rate": 1.3834871426018361e-05, "loss": 0.8098, "step": 4395 }, { "epoch": 0.39362023616318764, "grad_norm": 1.0320052277868255, "learning_rate": 1.3832192714172319e-05, "loss": 0.9062, "step": 4396 }, { "epoch": 0.39370977670826574, "grad_norm": 1.01664736854573, "learning_rate": 1.3829513679974715e-05, "loss": 0.8821, "step": 4397 }, { "epoch": 0.3937993172533438, "grad_norm": 0.9509103921756694, "learning_rate": 1.3826834323650899e-05, "loss": 0.8796, "step": 4398 }, { "epoch": 0.39388885779842187, "grad_norm": 0.8759599557938503, "learning_rate": 1.3824154645426251e-05, "loss": 0.8722, "step": 4399 }, { "epoch": 0.3939783983434999, "grad_norm": 0.9197583012357854, "learning_rate": 1.3821474645526174e-05, "loss": 0.8332, "step": 4400 }, { "epoch": 0.394067938888578, "grad_norm": 1.234147526389074, "learning_rate": 1.3818794324176103e-05, "loss": 0.808, "step": 4401 }, { "epoch": 0.39415747943365603, "grad_norm": 0.8734731885833076, "learning_rate": 1.3816113681601499e-05, "loss": 0.8576, "step": 4402 }, { "epoch": 0.3942470199787341, "grad_norm": 0.9597112028837594, "learning_rate": 1.3813432718027849e-05, "loss": 0.8413, "step": 4403 }, { "epoch": 0.39433656052381216, "grad_norm": 0.925958937771262, "learning_rate": 1.3810751433680665e-05, "loss": 0.8676, "step": 4404 }, { "epoch": 0.39442610106889026, "grad_norm": 0.9322276327961405, "learning_rate": 1.3808069828785489e-05, "loss": 0.8842, "step": 4405 }, { "epoch": 0.39451564161396835, "grad_norm": 0.8383103932605624, "learning_rate": 1.3805387903567885e-05, "loss": 0.8413, "step": 4406 }, { "epoch": 0.3946051821590464, "grad_norm": 0.92133066760104, "learning_rate": 1.3802705658253452e-05, "loss": 0.7347, "step": 4407 }, { "epoch": 0.3946947227041245, "grad_norm": 0.9135407525122693, "learning_rate": 1.3800023093067814e-05, "loss": 0.8004, "step": 4408 }, { "epoch": 0.3947842632492025, "grad_norm": 1.0322266056492955, "learning_rate": 1.3797340208236611e-05, "loss": 0.9178, "step": 4409 }, { "epoch": 0.3948738037942806, "grad_norm": 0.9330819964109333, "learning_rate": 1.379465700398553e-05, "loss": 0.8556, "step": 4410 }, { "epoch": 0.39496334433935865, "grad_norm": 0.9589600499774461, "learning_rate": 1.3791973480540265e-05, "loss": 0.8337, "step": 4411 }, { "epoch": 0.39505288488443674, "grad_norm": 0.8145779145985143, "learning_rate": 1.3789289638126549e-05, "loss": 0.8886, "step": 4412 }, { "epoch": 0.3951424254295148, "grad_norm": 0.9368340917103307, "learning_rate": 1.3786605476970134e-05, "loss": 0.8379, "step": 4413 }, { "epoch": 0.39523196597459287, "grad_norm": 0.9665018786877776, "learning_rate": 1.3783920997296809e-05, "loss": 0.903, "step": 4414 }, { "epoch": 0.39532150651967096, "grad_norm": 0.9744554511080462, "learning_rate": 1.3781236199332377e-05, "loss": 0.8193, "step": 4415 }, { "epoch": 0.395411047064749, "grad_norm": 0.9984648646470298, "learning_rate": 1.3778551083302683e-05, "loss": 0.9238, "step": 4416 }, { "epoch": 0.3955005876098271, "grad_norm": 1.8587189012509444, "learning_rate": 1.377586564943358e-05, "loss": 0.8597, "step": 4417 }, { "epoch": 0.39559012815490513, "grad_norm": 0.9820593197200812, "learning_rate": 1.3773179897950964e-05, "loss": 0.9142, "step": 4418 }, { "epoch": 0.3956796686999832, "grad_norm": 1.0023579986949207, "learning_rate": 1.3770493829080754e-05, "loss": 0.8597, "step": 4419 }, { "epoch": 0.39576920924506126, "grad_norm": 1.001339650346833, "learning_rate": 1.3767807443048885e-05, "loss": 0.8808, "step": 4420 }, { "epoch": 0.39585874979013935, "grad_norm": 0.9960995115179302, "learning_rate": 1.3765120740081332e-05, "loss": 0.8509, "step": 4421 }, { "epoch": 0.3959482903352174, "grad_norm": 0.9416600730312418, "learning_rate": 1.3762433720404097e-05, "loss": 0.7846, "step": 4422 }, { "epoch": 0.3960378308802955, "grad_norm": 0.9859649192647081, "learning_rate": 1.3759746384243195e-05, "loss": 0.8857, "step": 4423 }, { "epoch": 0.3961273714253736, "grad_norm": 0.9308342581251277, "learning_rate": 1.375705873182468e-05, "loss": 0.8947, "step": 4424 }, { "epoch": 0.3962169119704516, "grad_norm": 0.946731604520498, "learning_rate": 1.3754370763374626e-05, "loss": 0.762, "step": 4425 }, { "epoch": 0.3963064525155297, "grad_norm": 0.9719468692547852, "learning_rate": 1.375168247911914e-05, "loss": 0.8205, "step": 4426 }, { "epoch": 0.39639599306060774, "grad_norm": 1.0073513431991896, "learning_rate": 1.374899387928435e-05, "loss": 0.8699, "step": 4427 }, { "epoch": 0.39648553360568584, "grad_norm": 0.9252415956914926, "learning_rate": 1.3746304964096409e-05, "loss": 0.8642, "step": 4428 }, { "epoch": 0.3965750741507639, "grad_norm": 0.97873111542137, "learning_rate": 1.3743615733781504e-05, "loss": 0.8551, "step": 4429 }, { "epoch": 0.39666461469584197, "grad_norm": 0.9806292018994187, "learning_rate": 1.374092618856584e-05, "loss": 0.8092, "step": 4430 }, { "epoch": 0.39675415524092, "grad_norm": 0.9556317038521843, "learning_rate": 1.3738236328675658e-05, "loss": 0.9211, "step": 4431 }, { "epoch": 0.3968436957859981, "grad_norm": 0.9648291637022697, "learning_rate": 1.3735546154337218e-05, "loss": 0.8615, "step": 4432 }, { "epoch": 0.3969332363310762, "grad_norm": 1.0397258488292191, "learning_rate": 1.3732855665776808e-05, "loss": 0.8947, "step": 4433 }, { "epoch": 0.3970227768761542, "grad_norm": 0.9396599887989061, "learning_rate": 1.3730164863220746e-05, "loss": 0.8248, "step": 4434 }, { "epoch": 0.3971123174212323, "grad_norm": 1.0203642860131545, "learning_rate": 1.372747374689537e-05, "loss": 0.8899, "step": 4435 }, { "epoch": 0.39720185796631036, "grad_norm": 1.1881706247539423, "learning_rate": 1.3724782317027046e-05, "loss": 0.9191, "step": 4436 }, { "epoch": 0.39729139851138845, "grad_norm": 1.0388388313209214, "learning_rate": 1.3722090573842173e-05, "loss": 0.824, "step": 4437 }, { "epoch": 0.3973809390564665, "grad_norm": 0.9065276285889443, "learning_rate": 1.3719398517567172e-05, "loss": 0.8877, "step": 4438 }, { "epoch": 0.3974704796015446, "grad_norm": 0.8763718611180635, "learning_rate": 1.3716706148428487e-05, "loss": 0.8254, "step": 4439 }, { "epoch": 0.3975600201466226, "grad_norm": 0.8778965272462631, "learning_rate": 1.3714013466652592e-05, "loss": 0.8322, "step": 4440 }, { "epoch": 0.3976495606917007, "grad_norm": 0.9922961405589652, "learning_rate": 1.371132047246599e-05, "loss": 0.8868, "step": 4441 }, { "epoch": 0.3977391012367788, "grad_norm": 0.8849921930041346, "learning_rate": 1.3708627166095199e-05, "loss": 0.8582, "step": 4442 }, { "epoch": 0.39782864178185684, "grad_norm": 0.93941497167276, "learning_rate": 1.3705933547766778e-05, "loss": 0.8467, "step": 4443 }, { "epoch": 0.39791818232693493, "grad_norm": 0.9096076321095928, "learning_rate": 1.3703239617707308e-05, "loss": 0.8577, "step": 4444 }, { "epoch": 0.39800772287201297, "grad_norm": 0.9775998871123823, "learning_rate": 1.3700545376143381e-05, "loss": 0.8474, "step": 4445 }, { "epoch": 0.39809726341709106, "grad_norm": 0.9346929989107483, "learning_rate": 1.3697850823301642e-05, "loss": 0.8315, "step": 4446 }, { "epoch": 0.3981868039621691, "grad_norm": 0.9177284749382002, "learning_rate": 1.369515595940874e-05, "loss": 0.8688, "step": 4447 }, { "epoch": 0.3982763445072472, "grad_norm": 0.9034819967760698, "learning_rate": 1.3692460784691357e-05, "loss": 0.8497, "step": 4448 }, { "epoch": 0.39836588505232523, "grad_norm": 0.8559289759829627, "learning_rate": 1.3689765299376212e-05, "loss": 0.8378, "step": 4449 }, { "epoch": 0.3984554255974033, "grad_norm": 0.9252137252384982, "learning_rate": 1.368706950369003e-05, "loss": 0.8855, "step": 4450 }, { "epoch": 0.3985449661424814, "grad_norm": 0.8905599303696533, "learning_rate": 1.3684373397859573e-05, "loss": 0.8159, "step": 4451 }, { "epoch": 0.39863450668755945, "grad_norm": 0.9361216357755833, "learning_rate": 1.368167698211164e-05, "loss": 0.8894, "step": 4452 }, { "epoch": 0.39872404723263755, "grad_norm": 1.1497370561760702, "learning_rate": 1.3678980256673034e-05, "loss": 0.8494, "step": 4453 }, { "epoch": 0.3988135877777156, "grad_norm": 1.1331748858027035, "learning_rate": 1.3676283221770595e-05, "loss": 0.8342, "step": 4454 }, { "epoch": 0.3989031283227937, "grad_norm": 0.9742609324623751, "learning_rate": 1.3673585877631192e-05, "loss": 0.8908, "step": 4455 }, { "epoch": 0.3989926688678717, "grad_norm": 1.0649250636549026, "learning_rate": 1.3670888224481717e-05, "loss": 0.7885, "step": 4456 }, { "epoch": 0.3990822094129498, "grad_norm": 0.9202688598092882, "learning_rate": 1.3668190262549085e-05, "loss": 0.8778, "step": 4457 }, { "epoch": 0.39917174995802784, "grad_norm": 0.8926703013115901, "learning_rate": 1.3665491992060243e-05, "loss": 0.7983, "step": 4458 }, { "epoch": 0.39926129050310594, "grad_norm": 0.9393187929328782, "learning_rate": 1.3662793413242162e-05, "loss": 0.8602, "step": 4459 }, { "epoch": 0.39935083104818403, "grad_norm": 0.9965123964980215, "learning_rate": 1.366009452632183e-05, "loss": 0.8583, "step": 4460 }, { "epoch": 0.39944037159326207, "grad_norm": 0.9652940414311767, "learning_rate": 1.3657395331526277e-05, "loss": 0.8192, "step": 4461 }, { "epoch": 0.39952991213834016, "grad_norm": 1.000579874071736, "learning_rate": 1.3654695829082547e-05, "loss": 0.8194, "step": 4462 }, { "epoch": 0.3996194526834182, "grad_norm": 0.9214729686800737, "learning_rate": 1.3651996019217712e-05, "loss": 0.7846, "step": 4463 }, { "epoch": 0.3997089932284963, "grad_norm": 0.964798851799019, "learning_rate": 1.3649295902158874e-05, "loss": 0.8574, "step": 4464 }, { "epoch": 0.3997985337735743, "grad_norm": 0.9072131773093552, "learning_rate": 1.3646595478133158e-05, "loss": 0.8706, "step": 4465 }, { "epoch": 0.3998880743186524, "grad_norm": 0.9343619815846401, "learning_rate": 1.364389474736771e-05, "loss": 0.8946, "step": 4466 }, { "epoch": 0.39997761486373046, "grad_norm": 0.888477664457081, "learning_rate": 1.364119371008971e-05, "loss": 0.8321, "step": 4467 }, { "epoch": 0.40006715540880855, "grad_norm": 1.1271464424441613, "learning_rate": 1.3638492366526364e-05, "loss": 0.8734, "step": 4468 }, { "epoch": 0.40015669595388664, "grad_norm": 0.9485863184681835, "learning_rate": 1.3635790716904894e-05, "loss": 0.8738, "step": 4469 }, { "epoch": 0.4002462364989647, "grad_norm": 1.0452633444698731, "learning_rate": 1.3633088761452557e-05, "loss": 0.9401, "step": 4470 }, { "epoch": 0.4003357770440428, "grad_norm": 0.9661636635651168, "learning_rate": 1.3630386500396637e-05, "loss": 0.862, "step": 4471 }, { "epoch": 0.4004253175891208, "grad_norm": 0.8936469353949354, "learning_rate": 1.362768393396443e-05, "loss": 0.8304, "step": 4472 }, { "epoch": 0.4005148581341989, "grad_norm": 0.8816972531088852, "learning_rate": 1.3624981062383274e-05, "loss": 0.8338, "step": 4473 }, { "epoch": 0.40060439867927694, "grad_norm": 0.8761735847969794, "learning_rate": 1.3622277885880526e-05, "loss": 0.8677, "step": 4474 }, { "epoch": 0.40069393922435503, "grad_norm": 0.8935426172795219, "learning_rate": 1.361957440468356e-05, "loss": 0.8396, "step": 4475 }, { "epoch": 0.40078347976943307, "grad_norm": 0.8787888345569079, "learning_rate": 1.3616870619019795e-05, "loss": 0.867, "step": 4476 }, { "epoch": 0.40087302031451116, "grad_norm": 0.965390833492568, "learning_rate": 1.3614166529116659e-05, "loss": 0.8572, "step": 4477 }, { "epoch": 0.40096256085958926, "grad_norm": 0.9740971215939589, "learning_rate": 1.361146213520161e-05, "loss": 0.8385, "step": 4478 }, { "epoch": 0.4010521014046673, "grad_norm": 0.9025120988745962, "learning_rate": 1.360875743750214e-05, "loss": 0.8655, "step": 4479 }, { "epoch": 0.4011416419497454, "grad_norm": 1.0186064649949762, "learning_rate": 1.360605243624575e-05, "loss": 0.8553, "step": 4480 }, { "epoch": 0.4012311824948234, "grad_norm": 0.9250276269164744, "learning_rate": 1.360334713165998e-05, "loss": 0.8044, "step": 4481 }, { "epoch": 0.4013207230399015, "grad_norm": 0.9322636939402259, "learning_rate": 1.3600641523972393e-05, "loss": 0.8432, "step": 4482 }, { "epoch": 0.40141026358497955, "grad_norm": 0.8996558050061848, "learning_rate": 1.3597935613410576e-05, "loss": 0.8471, "step": 4483 }, { "epoch": 0.40149980413005765, "grad_norm": 2.326977929397955, "learning_rate": 1.3595229400202137e-05, "loss": 0.9359, "step": 4484 }, { "epoch": 0.4015893446751357, "grad_norm": 0.9927075276244369, "learning_rate": 1.3592522884574717e-05, "loss": 0.8261, "step": 4485 }, { "epoch": 0.4016788852202138, "grad_norm": 0.877379387454843, "learning_rate": 1.3589816066755978e-05, "loss": 0.8186, "step": 4486 }, { "epoch": 0.40176842576529187, "grad_norm": 0.9581301362758288, "learning_rate": 1.358710894697361e-05, "loss": 0.8924, "step": 4487 }, { "epoch": 0.4018579663103699, "grad_norm": 0.9075046332936687, "learning_rate": 1.3584401525455331e-05, "loss": 0.8715, "step": 4488 }, { "epoch": 0.401947506855448, "grad_norm": 0.9302898791311506, "learning_rate": 1.3581693802428873e-05, "loss": 0.8113, "step": 4489 }, { "epoch": 0.40203704740052604, "grad_norm": 0.9670470035990554, "learning_rate": 1.3578985778122003e-05, "loss": 0.8533, "step": 4490 }, { "epoch": 0.40212658794560413, "grad_norm": 0.9360660088757257, "learning_rate": 1.3576277452762512e-05, "loss": 0.8995, "step": 4491 }, { "epoch": 0.40221612849068217, "grad_norm": 0.942163854335068, "learning_rate": 1.3573568826578215e-05, "loss": 0.8759, "step": 4492 }, { "epoch": 0.40230566903576026, "grad_norm": 1.0234700830736927, "learning_rate": 1.3570859899796957e-05, "loss": 0.8278, "step": 4493 }, { "epoch": 0.4023952095808383, "grad_norm": 0.9692503125331849, "learning_rate": 1.3568150672646598e-05, "loss": 0.8607, "step": 4494 }, { "epoch": 0.4024847501259164, "grad_norm": 0.9325685504847488, "learning_rate": 1.3565441145355036e-05, "loss": 0.8778, "step": 4495 }, { "epoch": 0.4025742906709945, "grad_norm": 0.8801075544714826, "learning_rate": 1.3562731318150177e-05, "loss": 0.8094, "step": 4496 }, { "epoch": 0.4026638312160725, "grad_norm": 1.0693582860488853, "learning_rate": 1.3560021191259973e-05, "loss": 0.8769, "step": 4497 }, { "epoch": 0.4027533717611506, "grad_norm": 1.1543768107375647, "learning_rate": 1.355731076491239e-05, "loss": 0.8391, "step": 4498 }, { "epoch": 0.40284291230622865, "grad_norm": 1.1614143836238247, "learning_rate": 1.3554600039335413e-05, "loss": 0.8868, "step": 4499 }, { "epoch": 0.40293245285130674, "grad_norm": 0.96024822484628, "learning_rate": 1.3551889014757067e-05, "loss": 0.8048, "step": 4500 }, { "epoch": 0.4030219933963848, "grad_norm": 1.2757267786068034, "learning_rate": 1.3549177691405391e-05, "loss": 0.8522, "step": 4501 }, { "epoch": 0.4031115339414629, "grad_norm": 0.9952925269442845, "learning_rate": 1.354646606950845e-05, "loss": 0.9318, "step": 4502 }, { "epoch": 0.4032010744865409, "grad_norm": 1.134944826076881, "learning_rate": 1.3543754149294344e-05, "loss": 0.8511, "step": 4503 }, { "epoch": 0.403290615031619, "grad_norm": 0.9015969479082923, "learning_rate": 1.3541041930991189e-05, "loss": 0.8202, "step": 4504 }, { "epoch": 0.4033801555766971, "grad_norm": 0.936302782037889, "learning_rate": 1.353832941482712e-05, "loss": 0.8217, "step": 4505 }, { "epoch": 0.40346969612177513, "grad_norm": 0.8761864219275303, "learning_rate": 1.3535616601030317e-05, "loss": 0.8941, "step": 4506 }, { "epoch": 0.4035592366668532, "grad_norm": 0.8444784263369487, "learning_rate": 1.3532903489828964e-05, "loss": 0.8162, "step": 4507 }, { "epoch": 0.40364877721193126, "grad_norm": 0.9510293773266176, "learning_rate": 1.3530190081451282e-05, "loss": 0.8222, "step": 4508 }, { "epoch": 0.40373831775700936, "grad_norm": 1.2659201022040512, "learning_rate": 1.3527476376125515e-05, "loss": 0.8264, "step": 4509 }, { "epoch": 0.4038278583020874, "grad_norm": 1.0595297337418874, "learning_rate": 1.352476237407993e-05, "loss": 0.8595, "step": 4510 }, { "epoch": 0.4039173988471655, "grad_norm": 1.0403890973811447, "learning_rate": 1.3522048075542818e-05, "loss": 0.8329, "step": 4511 }, { "epoch": 0.4040069393922435, "grad_norm": 1.145333383524234, "learning_rate": 1.3519333480742502e-05, "loss": 0.9007, "step": 4512 }, { "epoch": 0.4040964799373216, "grad_norm": 0.9484488113446747, "learning_rate": 1.351661858990732e-05, "loss": 0.7972, "step": 4513 }, { "epoch": 0.4041860204823997, "grad_norm": 0.9580461484275042, "learning_rate": 1.3513903403265643e-05, "loss": 0.8539, "step": 4514 }, { "epoch": 0.40427556102747775, "grad_norm": 0.9767206623843067, "learning_rate": 1.3511187921045863e-05, "loss": 0.8401, "step": 4515 }, { "epoch": 0.40436510157255584, "grad_norm": 0.887666835612042, "learning_rate": 1.3508472143476397e-05, "loss": 0.8147, "step": 4516 }, { "epoch": 0.4044546421176339, "grad_norm": 0.8704146018612282, "learning_rate": 1.3505756070785684e-05, "loss": 0.8208, "step": 4517 }, { "epoch": 0.40454418266271197, "grad_norm": 0.9620386661415793, "learning_rate": 1.35030397032022e-05, "loss": 0.8489, "step": 4518 }, { "epoch": 0.40463372320779, "grad_norm": 1.0808081832604142, "learning_rate": 1.3500323040954429e-05, "loss": 0.8613, "step": 4519 }, { "epoch": 0.4047232637528681, "grad_norm": 1.010232512801896, "learning_rate": 1.3497606084270889e-05, "loss": 0.8954, "step": 4520 }, { "epoch": 0.40481280429794614, "grad_norm": 0.970985652683431, "learning_rate": 1.3494888833380124e-05, "loss": 0.8374, "step": 4521 }, { "epoch": 0.40490234484302423, "grad_norm": 0.9341418442078511, "learning_rate": 1.3492171288510701e-05, "loss": 0.8727, "step": 4522 }, { "epoch": 0.4049918853881023, "grad_norm": 0.9081627155438473, "learning_rate": 1.3489453449891206e-05, "loss": 0.8017, "step": 4523 }, { "epoch": 0.40508142593318036, "grad_norm": 0.8474542247164016, "learning_rate": 1.348673531775026e-05, "loss": 0.8112, "step": 4524 }, { "epoch": 0.40517096647825845, "grad_norm": 0.9412260054466648, "learning_rate": 1.3484016892316503e-05, "loss": 0.8917, "step": 4525 }, { "epoch": 0.4052605070233365, "grad_norm": 0.9961068539087753, "learning_rate": 1.3481298173818593e-05, "loss": 0.8704, "step": 4526 }, { "epoch": 0.4053500475684146, "grad_norm": 1.0562341777492048, "learning_rate": 1.3478579162485228e-05, "loss": 0.9319, "step": 4527 }, { "epoch": 0.4054395881134926, "grad_norm": 0.9851290481217739, "learning_rate": 1.3475859858545121e-05, "loss": 0.8612, "step": 4528 }, { "epoch": 0.4055291286585707, "grad_norm": 0.93516140191837, "learning_rate": 1.3473140262227007e-05, "loss": 0.8564, "step": 4529 }, { "epoch": 0.40561866920364875, "grad_norm": 0.9448154886682678, "learning_rate": 1.3470420373759651e-05, "loss": 0.8307, "step": 4530 }, { "epoch": 0.40570820974872684, "grad_norm": 0.9474729757593683, "learning_rate": 1.3467700193371848e-05, "loss": 0.8336, "step": 4531 }, { "epoch": 0.40579775029380494, "grad_norm": 0.9814331907833825, "learning_rate": 1.3464979721292399e-05, "loss": 0.8943, "step": 4532 }, { "epoch": 0.405887290838883, "grad_norm": 0.9619634081070338, "learning_rate": 1.3462258957750152e-05, "loss": 0.828, "step": 4533 }, { "epoch": 0.40597683138396107, "grad_norm": 0.908827172390751, "learning_rate": 1.3459537902973963e-05, "loss": 0.8403, "step": 4534 }, { "epoch": 0.4060663719290391, "grad_norm": 0.8560585522376893, "learning_rate": 1.3456816557192718e-05, "loss": 0.7777, "step": 4535 }, { "epoch": 0.4061559124741172, "grad_norm": 0.9913749254338156, "learning_rate": 1.3454094920635327e-05, "loss": 0.8891, "step": 4536 }, { "epoch": 0.40624545301919524, "grad_norm": 1.0432003774117335, "learning_rate": 1.3451372993530729e-05, "loss": 0.8542, "step": 4537 }, { "epoch": 0.40633499356427333, "grad_norm": 0.9223267963715208, "learning_rate": 1.344865077610788e-05, "loss": 0.7705, "step": 4538 }, { "epoch": 0.40642453410935137, "grad_norm": 0.9908953176945192, "learning_rate": 1.344592826859577e-05, "loss": 0.8053, "step": 4539 }, { "epoch": 0.40651407465442946, "grad_norm": 0.9678679772787933, "learning_rate": 1.34432054712234e-05, "loss": 0.8579, "step": 4540 }, { "epoch": 0.40660361519950755, "grad_norm": 1.0174010720322002, "learning_rate": 1.3440482384219807e-05, "loss": 0.8598, "step": 4541 }, { "epoch": 0.4066931557445856, "grad_norm": 0.8460226462544751, "learning_rate": 1.3437759007814046e-05, "loss": 0.8565, "step": 4542 }, { "epoch": 0.4067826962896637, "grad_norm": 0.9386144553068516, "learning_rate": 1.3435035342235204e-05, "loss": 0.8663, "step": 4543 }, { "epoch": 0.4068722368347417, "grad_norm": 0.9692082358969527, "learning_rate": 1.3432311387712378e-05, "loss": 0.8616, "step": 4544 }, { "epoch": 0.4069617773798198, "grad_norm": 0.9466390515429067, "learning_rate": 1.3429587144474705e-05, "loss": 0.8627, "step": 4545 }, { "epoch": 0.40705131792489785, "grad_norm": 0.9058342600177697, "learning_rate": 1.3426862612751336e-05, "loss": 0.8567, "step": 4546 }, { "epoch": 0.40714085846997594, "grad_norm": 1.1423014686710726, "learning_rate": 1.3424137792771455e-05, "loss": 0.8851, "step": 4547 }, { "epoch": 0.407230399015054, "grad_norm": 0.9226218607379479, "learning_rate": 1.3421412684764256e-05, "loss": 0.847, "step": 4548 }, { "epoch": 0.40731993956013207, "grad_norm": 0.9774286083690441, "learning_rate": 1.3418687288958976e-05, "loss": 0.8142, "step": 4549 }, { "epoch": 0.40740948010521016, "grad_norm": 0.9650988238030125, "learning_rate": 1.3415961605584857e-05, "loss": 0.8285, "step": 4550 }, { "epoch": 0.4074990206502882, "grad_norm": 0.8598830505967154, "learning_rate": 1.3413235634871185e-05, "loss": 0.8823, "step": 4551 }, { "epoch": 0.4075885611953663, "grad_norm": 0.9101646712023322, "learning_rate": 1.341050937704725e-05, "loss": 0.8846, "step": 4552 }, { "epoch": 0.40767810174044433, "grad_norm": 0.9030153623726633, "learning_rate": 1.3407782832342382e-05, "loss": 0.8575, "step": 4553 }, { "epoch": 0.4077676422855224, "grad_norm": 0.8388911890500786, "learning_rate": 1.3405056000985925e-05, "loss": 0.8238, "step": 4554 }, { "epoch": 0.40785718283060046, "grad_norm": 0.9309588745139172, "learning_rate": 1.3402328883207257e-05, "loss": 0.8606, "step": 4555 }, { "epoch": 0.40794672337567855, "grad_norm": 0.8762462551837407, "learning_rate": 1.3399601479235767e-05, "loss": 0.8824, "step": 4556 }, { "epoch": 0.4080362639207566, "grad_norm": 0.8882470096875025, "learning_rate": 1.3396873789300879e-05, "loss": 0.7821, "step": 4557 }, { "epoch": 0.4081258044658347, "grad_norm": 0.9251347056269427, "learning_rate": 1.339414581363204e-05, "loss": 0.8521, "step": 4558 }, { "epoch": 0.4082153450109128, "grad_norm": 0.8890298695496572, "learning_rate": 1.3391417552458712e-05, "loss": 0.831, "step": 4559 }, { "epoch": 0.4083048855559908, "grad_norm": 1.086556456057103, "learning_rate": 1.3388689006010394e-05, "loss": 0.8612, "step": 4560 }, { "epoch": 0.4083944261010689, "grad_norm": 1.1083473238528658, "learning_rate": 1.33859601745166e-05, "loss": 0.796, "step": 4561 }, { "epoch": 0.40848396664614695, "grad_norm": 1.0760774860103657, "learning_rate": 1.3383231058206866e-05, "loss": 0.8794, "step": 4562 }, { "epoch": 0.40857350719122504, "grad_norm": 1.1125398325660019, "learning_rate": 1.3380501657310763e-05, "loss": 0.8206, "step": 4563 }, { "epoch": 0.4086630477363031, "grad_norm": 0.9563373323066136, "learning_rate": 1.3377771972057878e-05, "loss": 0.8118, "step": 4564 }, { "epoch": 0.40875258828138117, "grad_norm": 0.9090597504823279, "learning_rate": 1.337504200267782e-05, "loss": 0.8911, "step": 4565 }, { "epoch": 0.4088421288264592, "grad_norm": 0.9233541598867686, "learning_rate": 1.3372311749400226e-05, "loss": 0.8393, "step": 4566 }, { "epoch": 0.4089316693715373, "grad_norm": 1.0508909317558797, "learning_rate": 1.3369581212454758e-05, "loss": 0.8542, "step": 4567 }, { "epoch": 0.4090212099166154, "grad_norm": 0.9278082088264658, "learning_rate": 1.33668503920711e-05, "loss": 0.8193, "step": 4568 }, { "epoch": 0.40911075046169343, "grad_norm": 0.8397817438730998, "learning_rate": 1.336411928847896e-05, "loss": 0.8243, "step": 4569 }, { "epoch": 0.4092002910067715, "grad_norm": 0.8466845539832453, "learning_rate": 1.3361387901908063e-05, "loss": 0.8304, "step": 4570 }, { "epoch": 0.40928983155184956, "grad_norm": 0.9380068231004197, "learning_rate": 1.3358656232588174e-05, "loss": 0.8308, "step": 4571 }, { "epoch": 0.40937937209692765, "grad_norm": 0.9449251187204846, "learning_rate": 1.3355924280749065e-05, "loss": 0.902, "step": 4572 }, { "epoch": 0.4094689126420057, "grad_norm": 1.0066540794588783, "learning_rate": 1.3353192046620542e-05, "loss": 0.8834, "step": 4573 }, { "epoch": 0.4095584531870838, "grad_norm": 0.9492260077501271, "learning_rate": 1.3350459530432431e-05, "loss": 0.8426, "step": 4574 }, { "epoch": 0.4096479937321618, "grad_norm": 1.0876057747062915, "learning_rate": 1.3347726732414584e-05, "loss": 0.8201, "step": 4575 }, { "epoch": 0.4097375342772399, "grad_norm": 0.9938192013613967, "learning_rate": 1.3344993652796872e-05, "loss": 0.8697, "step": 4576 }, { "epoch": 0.409827074822318, "grad_norm": 0.9451712074067705, "learning_rate": 1.3342260291809194e-05, "loss": 0.8296, "step": 4577 }, { "epoch": 0.40991661536739604, "grad_norm": 0.9699039908276667, "learning_rate": 1.3339526649681473e-05, "loss": 0.9037, "step": 4578 }, { "epoch": 0.41000615591247414, "grad_norm": 0.9614840372737827, "learning_rate": 1.3336792726643652e-05, "loss": 0.861, "step": 4579 }, { "epoch": 0.4100956964575522, "grad_norm": 0.9299321911136904, "learning_rate": 1.3334058522925702e-05, "loss": 0.8708, "step": 4580 }, { "epoch": 0.41018523700263027, "grad_norm": 0.9654633566271076, "learning_rate": 1.3331324038757612e-05, "loss": 0.8072, "step": 4581 }, { "epoch": 0.4102747775477083, "grad_norm": 0.8878407335900006, "learning_rate": 1.33285892743694e-05, "loss": 0.8628, "step": 4582 }, { "epoch": 0.4103643180927864, "grad_norm": 0.8908724709174111, "learning_rate": 1.3325854229991104e-05, "loss": 0.7846, "step": 4583 }, { "epoch": 0.41045385863786443, "grad_norm": 0.9613035862774133, "learning_rate": 1.332311890585279e-05, "loss": 0.8256, "step": 4584 }, { "epoch": 0.4105433991829425, "grad_norm": 0.9964065110914502, "learning_rate": 1.3320383302184546e-05, "loss": 0.8389, "step": 4585 }, { "epoch": 0.4106329397280206, "grad_norm": 0.8546655750731609, "learning_rate": 1.331764741921647e-05, "loss": 0.8205, "step": 4586 }, { "epoch": 0.41072248027309866, "grad_norm": 1.0774639743019896, "learning_rate": 1.331491125717871e-05, "loss": 0.8171, "step": 4587 }, { "epoch": 0.41081202081817675, "grad_norm": 1.0181628094583761, "learning_rate": 1.3312174816301418e-05, "loss": 0.8284, "step": 4588 }, { "epoch": 0.4109015613632548, "grad_norm": 0.8855528770972546, "learning_rate": 1.3309438096814772e-05, "loss": 0.8531, "step": 4589 }, { "epoch": 0.4109911019083329, "grad_norm": 0.8822107077366784, "learning_rate": 1.3306701098948978e-05, "loss": 0.8439, "step": 4590 }, { "epoch": 0.4110806424534109, "grad_norm": 0.9076855758792197, "learning_rate": 1.3303963822934264e-05, "loss": 0.8398, "step": 4591 }, { "epoch": 0.411170182998489, "grad_norm": 0.9285593788012148, "learning_rate": 1.330122626900088e-05, "loss": 0.833, "step": 4592 }, { "epoch": 0.41125972354356705, "grad_norm": 0.9756747490703039, "learning_rate": 1.3298488437379096e-05, "loss": 0.8188, "step": 4593 }, { "epoch": 0.41134926408864514, "grad_norm": 1.078644547280076, "learning_rate": 1.3295750328299219e-05, "loss": 0.8569, "step": 4594 }, { "epoch": 0.41143880463372323, "grad_norm": 1.2177247263270485, "learning_rate": 1.3293011941991561e-05, "loss": 0.869, "step": 4595 }, { "epoch": 0.41152834517880127, "grad_norm": 1.0152014145562078, "learning_rate": 1.3290273278686469e-05, "loss": 0.8268, "step": 4596 }, { "epoch": 0.41161788572387936, "grad_norm": 1.1074879588961724, "learning_rate": 1.3287534338614313e-05, "loss": 0.8107, "step": 4597 }, { "epoch": 0.4117074262689574, "grad_norm": 0.8896465682708404, "learning_rate": 1.328479512200548e-05, "loss": 0.8534, "step": 4598 }, { "epoch": 0.4117969668140355, "grad_norm": 1.073572431591183, "learning_rate": 1.3282055629090387e-05, "loss": 0.8695, "step": 4599 }, { "epoch": 0.41188650735911353, "grad_norm": 0.881577555602564, "learning_rate": 1.3279315860099468e-05, "loss": 0.8473, "step": 4600 }, { "epoch": 0.4119760479041916, "grad_norm": 0.982180239266472, "learning_rate": 1.3276575815263187e-05, "loss": 0.8528, "step": 4601 }, { "epoch": 0.41206558844926966, "grad_norm": 1.0190998905201165, "learning_rate": 1.3273835494812023e-05, "loss": 0.9019, "step": 4602 }, { "epoch": 0.41215512899434775, "grad_norm": 0.9104134662785992, "learning_rate": 1.3271094898976488e-05, "loss": 0.8848, "step": 4603 }, { "epoch": 0.41224466953942585, "grad_norm": 0.9998663868670606, "learning_rate": 1.326835402798711e-05, "loss": 0.8403, "step": 4604 }, { "epoch": 0.4123342100845039, "grad_norm": 1.2477286444590316, "learning_rate": 1.3265612882074441e-05, "loss": 0.8272, "step": 4605 }, { "epoch": 0.412423750629582, "grad_norm": 0.9250570113061771, "learning_rate": 1.3262871461469057e-05, "loss": 0.8207, "step": 4606 }, { "epoch": 0.41251329117466, "grad_norm": 0.9329401712882781, "learning_rate": 1.326012976640156e-05, "loss": 0.8322, "step": 4607 }, { "epoch": 0.4126028317197381, "grad_norm": 0.9489315333930428, "learning_rate": 1.325738779710257e-05, "loss": 0.8176, "step": 4608 }, { "epoch": 0.41269237226481614, "grad_norm": 0.8273872821029399, "learning_rate": 1.3254645553802731e-05, "loss": 0.8147, "step": 4609 }, { "epoch": 0.41278191280989424, "grad_norm": 0.8970109030900272, "learning_rate": 1.3251903036732718e-05, "loss": 0.8561, "step": 4610 }, { "epoch": 0.4128714533549723, "grad_norm": 0.9692401893013974, "learning_rate": 1.3249160246123216e-05, "loss": 0.839, "step": 4611 }, { "epoch": 0.41296099390005037, "grad_norm": 0.9087791981905572, "learning_rate": 1.3246417182204944e-05, "loss": 0.8806, "step": 4612 }, { "epoch": 0.41305053444512846, "grad_norm": 0.966237193485273, "learning_rate": 1.3243673845208638e-05, "loss": 0.8269, "step": 4613 }, { "epoch": 0.4131400749902065, "grad_norm": 0.9287230016236534, "learning_rate": 1.3240930235365054e-05, "loss": 0.7945, "step": 4614 }, { "epoch": 0.4132296155352846, "grad_norm": 1.0018047332899902, "learning_rate": 1.3238186352904985e-05, "loss": 0.8592, "step": 4615 }, { "epoch": 0.4133191560803626, "grad_norm": 0.999805643274705, "learning_rate": 1.3235442198059233e-05, "loss": 0.7644, "step": 4616 }, { "epoch": 0.4134086966254407, "grad_norm": 1.0806087015699573, "learning_rate": 1.323269777105862e-05, "loss": 0.8874, "step": 4617 }, { "epoch": 0.41349823717051876, "grad_norm": 0.9091031689744161, "learning_rate": 1.3229953072134012e-05, "loss": 0.7725, "step": 4618 }, { "epoch": 0.41358777771559685, "grad_norm": 1.1672372449111448, "learning_rate": 1.3227208101516273e-05, "loss": 0.9022, "step": 4619 }, { "epoch": 0.4136773182606749, "grad_norm": 1.1690505655294394, "learning_rate": 1.3224462859436304e-05, "loss": 0.8404, "step": 4620 }, { "epoch": 0.413766858805753, "grad_norm": 0.925577127704765, "learning_rate": 1.3221717346125029e-05, "loss": 0.8088, "step": 4621 }, { "epoch": 0.4138563993508311, "grad_norm": 0.8968946873108684, "learning_rate": 1.321897156181339e-05, "loss": 0.8215, "step": 4622 }, { "epoch": 0.4139459398959091, "grad_norm": 0.9704610813389212, "learning_rate": 1.321622550673235e-05, "loss": 0.8222, "step": 4623 }, { "epoch": 0.4140354804409872, "grad_norm": 1.020599008603233, "learning_rate": 1.3213479181112906e-05, "loss": 0.8546, "step": 4624 }, { "epoch": 0.41412502098606524, "grad_norm": 0.954281810589137, "learning_rate": 1.321073258518606e-05, "loss": 0.848, "step": 4625 }, { "epoch": 0.41421456153114333, "grad_norm": 0.9592509514278127, "learning_rate": 1.3207985719182857e-05, "loss": 0.8486, "step": 4626 }, { "epoch": 0.41430410207622137, "grad_norm": 0.992927323363761, "learning_rate": 1.3205238583334343e-05, "loss": 0.8775, "step": 4627 }, { "epoch": 0.41439364262129946, "grad_norm": 0.9194625188139148, "learning_rate": 1.3202491177871608e-05, "loss": 0.8847, "step": 4628 }, { "epoch": 0.4144831831663775, "grad_norm": 0.9330567672692596, "learning_rate": 1.3199743503025753e-05, "loss": 0.8232, "step": 4629 }, { "epoch": 0.4145727237114556, "grad_norm": 0.8889707777673176, "learning_rate": 1.3196995559027897e-05, "loss": 0.8215, "step": 4630 }, { "epoch": 0.4146622642565337, "grad_norm": 0.9061196558934956, "learning_rate": 1.3194247346109196e-05, "loss": 0.8732, "step": 4631 }, { "epoch": 0.4147518048016117, "grad_norm": 1.1181165079217956, "learning_rate": 1.3191498864500813e-05, "loss": 0.7961, "step": 4632 }, { "epoch": 0.4148413453466898, "grad_norm": 0.8115127254057263, "learning_rate": 1.3188750114433951e-05, "loss": 0.8748, "step": 4633 }, { "epoch": 0.41493088589176785, "grad_norm": 0.8769947067715624, "learning_rate": 1.3186001096139818e-05, "loss": 0.8615, "step": 4634 }, { "epoch": 0.41502042643684595, "grad_norm": 1.1462198331281213, "learning_rate": 1.3183251809849655e-05, "loss": 0.7953, "step": 4635 }, { "epoch": 0.415109966981924, "grad_norm": 0.8572780827941026, "learning_rate": 1.3180502255794723e-05, "loss": 0.8474, "step": 4636 }, { "epoch": 0.4151995075270021, "grad_norm": 1.2846869032657486, "learning_rate": 1.3177752434206305e-05, "loss": 0.8899, "step": 4637 }, { "epoch": 0.4152890480720801, "grad_norm": 0.9821742845977395, "learning_rate": 1.317500234531571e-05, "loss": 0.8355, "step": 4638 }, { "epoch": 0.4153785886171582, "grad_norm": 0.9373207537928022, "learning_rate": 1.317225198935426e-05, "loss": 0.8755, "step": 4639 }, { "epoch": 0.4154681291622363, "grad_norm": 0.8308853445502898, "learning_rate": 1.3169501366553314e-05, "loss": 0.8288, "step": 4640 }, { "epoch": 0.41555766970731434, "grad_norm": 1.1504088516725506, "learning_rate": 1.3166750477144241e-05, "loss": 0.8422, "step": 4641 }, { "epoch": 0.41564721025239243, "grad_norm": 0.9402836437634677, "learning_rate": 1.3163999321358436e-05, "loss": 0.8845, "step": 4642 }, { "epoch": 0.41573675079747047, "grad_norm": 0.9623303998465765, "learning_rate": 1.3161247899427321e-05, "loss": 0.8155, "step": 4643 }, { "epoch": 0.41582629134254856, "grad_norm": 0.9191905321268291, "learning_rate": 1.315849621158233e-05, "loss": 0.9001, "step": 4644 }, { "epoch": 0.4159158318876266, "grad_norm": 0.9607037118151015, "learning_rate": 1.3155744258054934e-05, "loss": 0.7946, "step": 4645 }, { "epoch": 0.4160053724327047, "grad_norm": 0.9216694176538363, "learning_rate": 1.3152992039076616e-05, "loss": 0.8218, "step": 4646 }, { "epoch": 0.4160949129777827, "grad_norm": 0.8871327779806395, "learning_rate": 1.3150239554878876e-05, "loss": 0.8622, "step": 4647 }, { "epoch": 0.4161844535228608, "grad_norm": 0.9362560430881015, "learning_rate": 1.3147486805693258e-05, "loss": 0.8369, "step": 4648 }, { "epoch": 0.4162739940679389, "grad_norm": 0.9023815269899874, "learning_rate": 1.3144733791751301e-05, "loss": 0.8391, "step": 4649 }, { "epoch": 0.41636353461301695, "grad_norm": 0.9831019406426816, "learning_rate": 1.3141980513284584e-05, "loss": 0.8839, "step": 4650 }, { "epoch": 0.41645307515809504, "grad_norm": 0.8992611897182874, "learning_rate": 1.313922697052471e-05, "loss": 0.8316, "step": 4651 }, { "epoch": 0.4165426157031731, "grad_norm": 0.9406869452982524, "learning_rate": 1.3136473163703291e-05, "loss": 0.8408, "step": 4652 }, { "epoch": 0.4166321562482512, "grad_norm": 0.917723487069821, "learning_rate": 1.3133719093051968e-05, "loss": 0.8745, "step": 4653 }, { "epoch": 0.4167216967933292, "grad_norm": 0.9185868620363086, "learning_rate": 1.313096475880241e-05, "loss": 0.7782, "step": 4654 }, { "epoch": 0.4168112373384073, "grad_norm": 0.9065162127552399, "learning_rate": 1.3128210161186299e-05, "loss": 0.8299, "step": 4655 }, { "epoch": 0.41690077788348534, "grad_norm": 0.9124269286174485, "learning_rate": 1.3125455300435343e-05, "loss": 0.8715, "step": 4656 }, { "epoch": 0.41699031842856343, "grad_norm": 0.9210232868415803, "learning_rate": 1.312270017678127e-05, "loss": 0.8163, "step": 4657 }, { "epoch": 0.4170798589736415, "grad_norm": 0.8443906039482938, "learning_rate": 1.3119944790455837e-05, "loss": 0.8041, "step": 4658 }, { "epoch": 0.41716939951871956, "grad_norm": 0.9599377820340842, "learning_rate": 1.3117189141690815e-05, "loss": 0.8484, "step": 4659 }, { "epoch": 0.41725894006379766, "grad_norm": 0.9297116036913715, "learning_rate": 1.3114433230717998e-05, "loss": 0.9055, "step": 4660 }, { "epoch": 0.4173484806088757, "grad_norm": 0.8874840704671126, "learning_rate": 1.3111677057769208e-05, "loss": 0.8659, "step": 4661 }, { "epoch": 0.4174380211539538, "grad_norm": 1.003778061450708, "learning_rate": 1.3108920623076285e-05, "loss": 0.8191, "step": 4662 }, { "epoch": 0.4175275616990318, "grad_norm": 0.9992249877611448, "learning_rate": 1.3106163926871087e-05, "loss": 0.8047, "step": 4663 }, { "epoch": 0.4176171022441099, "grad_norm": 0.9560570869517108, "learning_rate": 1.3103406969385503e-05, "loss": 0.8683, "step": 4664 }, { "epoch": 0.41770664278918795, "grad_norm": 0.9552214078071677, "learning_rate": 1.310064975085144e-05, "loss": 0.8257, "step": 4665 }, { "epoch": 0.41779618333426605, "grad_norm": 0.9413913920296842, "learning_rate": 1.3097892271500823e-05, "loss": 0.8621, "step": 4666 }, { "epoch": 0.41788572387934414, "grad_norm": 0.9472586405069346, "learning_rate": 1.3095134531565603e-05, "loss": 0.8756, "step": 4667 }, { "epoch": 0.4179752644244222, "grad_norm": 0.9577133262185117, "learning_rate": 1.3092376531277751e-05, "loss": 0.8494, "step": 4668 }, { "epoch": 0.41806480496950027, "grad_norm": 1.014542126188905, "learning_rate": 1.3089618270869263e-05, "loss": 0.8829, "step": 4669 }, { "epoch": 0.4181543455145783, "grad_norm": 0.8704543576605985, "learning_rate": 1.3086859750572155e-05, "loss": 0.8693, "step": 4670 }, { "epoch": 0.4182438860596564, "grad_norm": 0.9127430959270818, "learning_rate": 1.3084100970618463e-05, "loss": 0.8689, "step": 4671 }, { "epoch": 0.41833342660473444, "grad_norm": 0.9068657150194723, "learning_rate": 1.3081341931240248e-05, "loss": 0.8043, "step": 4672 }, { "epoch": 0.41842296714981253, "grad_norm": 0.9699648071629574, "learning_rate": 1.3078582632669594e-05, "loss": 0.881, "step": 4673 }, { "epoch": 0.41851250769489057, "grad_norm": 0.963258145354212, "learning_rate": 1.3075823075138596e-05, "loss": 0.8292, "step": 4674 }, { "epoch": 0.41860204823996866, "grad_norm": 1.0242842037934123, "learning_rate": 1.3073063258879387e-05, "loss": 0.8577, "step": 4675 }, { "epoch": 0.41869158878504675, "grad_norm": 1.092262611762796, "learning_rate": 1.3070303184124111e-05, "loss": 0.8711, "step": 4676 }, { "epoch": 0.4187811293301248, "grad_norm": 0.9838270688618203, "learning_rate": 1.3067542851104933e-05, "loss": 0.8233, "step": 4677 }, { "epoch": 0.4188706698752029, "grad_norm": 0.9203296737606311, "learning_rate": 1.306478226005405e-05, "loss": 0.8172, "step": 4678 }, { "epoch": 0.4189602104202809, "grad_norm": 0.9616787989563077, "learning_rate": 1.3062021411203671e-05, "loss": 0.8599, "step": 4679 }, { "epoch": 0.419049750965359, "grad_norm": 0.9613061690080046, "learning_rate": 1.3059260304786025e-05, "loss": 0.8276, "step": 4680 }, { "epoch": 0.41913929151043705, "grad_norm": 0.8375156033115455, "learning_rate": 1.3056498941033375e-05, "loss": 0.7958, "step": 4681 }, { "epoch": 0.41922883205551514, "grad_norm": 1.1478593726725939, "learning_rate": 1.3053737320177995e-05, "loss": 0.8488, "step": 4682 }, { "epoch": 0.4193183726005932, "grad_norm": 0.9092129667055416, "learning_rate": 1.3050975442452179e-05, "loss": 0.8679, "step": 4683 }, { "epoch": 0.4194079131456713, "grad_norm": 0.982886630540918, "learning_rate": 1.3048213308088253e-05, "loss": 0.8845, "step": 4684 }, { "epoch": 0.41949745369074937, "grad_norm": 0.9913852260384906, "learning_rate": 1.3045450917318558e-05, "loss": 0.8002, "step": 4685 }, { "epoch": 0.4195869942358274, "grad_norm": 1.0569129500700625, "learning_rate": 1.3042688270375456e-05, "loss": 0.8956, "step": 4686 }, { "epoch": 0.4196765347809055, "grad_norm": 0.8801706474732551, "learning_rate": 1.3039925367491331e-05, "loss": 0.8812, "step": 4687 }, { "epoch": 0.41976607532598353, "grad_norm": 1.2044954002253259, "learning_rate": 1.303716220889859e-05, "loss": 0.9146, "step": 4688 }, { "epoch": 0.4198556158710616, "grad_norm": 1.0281304338884343, "learning_rate": 1.3034398794829661e-05, "loss": 0.8599, "step": 4689 }, { "epoch": 0.41994515641613966, "grad_norm": 0.8643529163147792, "learning_rate": 1.3031635125516994e-05, "loss": 0.7747, "step": 4690 }, { "epoch": 0.42003469696121776, "grad_norm": 0.9820525966487977, "learning_rate": 1.3028871201193058e-05, "loss": 0.861, "step": 4691 }, { "epoch": 0.4201242375062958, "grad_norm": 0.9372381839014928, "learning_rate": 1.3026107022090351e-05, "loss": 0.8248, "step": 4692 }, { "epoch": 0.4202137780513739, "grad_norm": 0.8919571994048802, "learning_rate": 1.3023342588441379e-05, "loss": 0.8853, "step": 4693 }, { "epoch": 0.420303318596452, "grad_norm": 0.9153850318594442, "learning_rate": 1.3020577900478682e-05, "loss": 0.8676, "step": 4694 }, { "epoch": 0.42039285914153, "grad_norm": 0.9430614114038058, "learning_rate": 1.3017812958434812e-05, "loss": 0.8321, "step": 4695 }, { "epoch": 0.4204823996866081, "grad_norm": 0.9332423052638498, "learning_rate": 1.3015047762542353e-05, "loss": 0.8643, "step": 4696 }, { "epoch": 0.42057194023168615, "grad_norm": 0.9722117021221077, "learning_rate": 1.3012282313033904e-05, "loss": 0.885, "step": 4697 }, { "epoch": 0.42066148077676424, "grad_norm": 0.9412990551681779, "learning_rate": 1.3009516610142076e-05, "loss": 0.8553, "step": 4698 }, { "epoch": 0.4207510213218423, "grad_norm": 0.9931320570131007, "learning_rate": 1.3006750654099522e-05, "loss": 0.8645, "step": 4699 }, { "epoch": 0.42084056186692037, "grad_norm": 0.8885940695753026, "learning_rate": 1.3003984445138905e-05, "loss": 0.8886, "step": 4700 }, { "epoch": 0.4209301024119984, "grad_norm": 0.9826481640600054, "learning_rate": 1.3001217983492898e-05, "loss": 0.8198, "step": 4701 }, { "epoch": 0.4210196429570765, "grad_norm": 1.1667617963905261, "learning_rate": 1.2998451269394216e-05, "loss": 0.8006, "step": 4702 }, { "epoch": 0.4211091835021546, "grad_norm": 0.9579931360581966, "learning_rate": 1.2995684303075591e-05, "loss": 0.8363, "step": 4703 }, { "epoch": 0.42119872404723263, "grad_norm": 1.1073316128782695, "learning_rate": 1.2992917084769757e-05, "loss": 0.8684, "step": 4704 }, { "epoch": 0.4212882645923107, "grad_norm": 0.9806731566472836, "learning_rate": 1.2990149614709493e-05, "loss": 0.8486, "step": 4705 }, { "epoch": 0.42137780513738876, "grad_norm": 0.9035359947952818, "learning_rate": 1.2987381893127591e-05, "loss": 0.8092, "step": 4706 }, { "epoch": 0.42146734568246685, "grad_norm": 0.9753732270507935, "learning_rate": 1.2984613920256852e-05, "loss": 0.8637, "step": 4707 }, { "epoch": 0.4215568862275449, "grad_norm": 0.9167516545821977, "learning_rate": 1.2981845696330122e-05, "loss": 0.884, "step": 4708 }, { "epoch": 0.421646426772623, "grad_norm": 0.8722929779588924, "learning_rate": 1.2979077221580247e-05, "loss": 0.8222, "step": 4709 }, { "epoch": 0.421735967317701, "grad_norm": 0.9808933749379851, "learning_rate": 1.2976308496240102e-05, "loss": 0.874, "step": 4710 }, { "epoch": 0.4218255078627791, "grad_norm": 0.896217722491882, "learning_rate": 1.2973539520542589e-05, "loss": 0.8845, "step": 4711 }, { "epoch": 0.4219150484078572, "grad_norm": 0.9967614285525078, "learning_rate": 1.297077029472062e-05, "loss": 0.8759, "step": 4712 }, { "epoch": 0.42200458895293524, "grad_norm": 0.9951832322325717, "learning_rate": 1.2968000819007131e-05, "loss": 0.8939, "step": 4713 }, { "epoch": 0.42209412949801334, "grad_norm": 0.9403580223141892, "learning_rate": 1.2965231093635092e-05, "loss": 0.892, "step": 4714 }, { "epoch": 0.4221836700430914, "grad_norm": 0.9695860500395811, "learning_rate": 1.2962461118837472e-05, "loss": 0.8509, "step": 4715 }, { "epoch": 0.42227321058816947, "grad_norm": 0.9387882227536882, "learning_rate": 1.2959690894847274e-05, "loss": 0.8307, "step": 4716 }, { "epoch": 0.4223627511332475, "grad_norm": 0.8952646394499433, "learning_rate": 1.2956920421897527e-05, "loss": 0.8139, "step": 4717 }, { "epoch": 0.4224522916783256, "grad_norm": 0.9156342739196973, "learning_rate": 1.2954149700221271e-05, "loss": 0.8613, "step": 4718 }, { "epoch": 0.42254183222340364, "grad_norm": 0.9192123322657759, "learning_rate": 1.2951378730051566e-05, "loss": 0.8275, "step": 4719 }, { "epoch": 0.42263137276848173, "grad_norm": 0.9319667696245016, "learning_rate": 1.2948607511621498e-05, "loss": 0.8342, "step": 4720 }, { "epoch": 0.4227209133135598, "grad_norm": 0.9367606521278115, "learning_rate": 1.2945836045164177e-05, "loss": 0.8066, "step": 4721 }, { "epoch": 0.42281045385863786, "grad_norm": 0.920752247593063, "learning_rate": 1.294306433091273e-05, "loss": 0.8216, "step": 4722 }, { "epoch": 0.42289999440371595, "grad_norm": 0.9868429618607599, "learning_rate": 1.2940292369100297e-05, "loss": 0.9433, "step": 4723 }, { "epoch": 0.422989534948794, "grad_norm": 1.0015056418035166, "learning_rate": 1.2937520159960055e-05, "loss": 0.9041, "step": 4724 }, { "epoch": 0.4230790754938721, "grad_norm": 0.9663577835711813, "learning_rate": 1.2934747703725188e-05, "loss": 0.7973, "step": 4725 }, { "epoch": 0.4231686160389501, "grad_norm": 1.0069102472883023, "learning_rate": 1.293197500062891e-05, "loss": 0.8155, "step": 4726 }, { "epoch": 0.4232581565840282, "grad_norm": 0.946504808793655, "learning_rate": 1.2929202050904448e-05, "loss": 0.8697, "step": 4727 }, { "epoch": 0.42334769712910625, "grad_norm": 0.9652805094817695, "learning_rate": 1.292642885478505e-05, "loss": 0.9012, "step": 4728 }, { "epoch": 0.42343723767418434, "grad_norm": 0.9300322169650518, "learning_rate": 1.2923655412504e-05, "loss": 0.8533, "step": 4729 }, { "epoch": 0.42352677821926243, "grad_norm": 0.934481759423368, "learning_rate": 1.2920881724294585e-05, "loss": 0.8746, "step": 4730 }, { "epoch": 0.42361631876434047, "grad_norm": 0.9544057259344606, "learning_rate": 1.2918107790390111e-05, "loss": 0.8012, "step": 4731 }, { "epoch": 0.42370585930941856, "grad_norm": 0.9059359668812877, "learning_rate": 1.2915333611023923e-05, "loss": 0.8878, "step": 4732 }, { "epoch": 0.4237953998544966, "grad_norm": 1.0493015692935304, "learning_rate": 1.2912559186429372e-05, "loss": 0.9328, "step": 4733 }, { "epoch": 0.4238849403995747, "grad_norm": 0.8678960884162599, "learning_rate": 1.2909784516839832e-05, "loss": 0.8177, "step": 4734 }, { "epoch": 0.42397448094465273, "grad_norm": 0.8443960331048497, "learning_rate": 1.2907009602488701e-05, "loss": 0.8822, "step": 4735 }, { "epoch": 0.4240640214897308, "grad_norm": 0.9967353630763351, "learning_rate": 1.2904234443609395e-05, "loss": 0.9215, "step": 4736 }, { "epoch": 0.42415356203480886, "grad_norm": 0.9609524307596268, "learning_rate": 1.2901459040435352e-05, "loss": 0.8496, "step": 4737 }, { "epoch": 0.42424310257988695, "grad_norm": 0.9394560590312089, "learning_rate": 1.289868339320003e-05, "loss": 0.8724, "step": 4738 }, { "epoch": 0.42433264312496505, "grad_norm": 0.9842980274260325, "learning_rate": 1.2895907502136908e-05, "loss": 0.8831, "step": 4739 }, { "epoch": 0.4244221836700431, "grad_norm": 0.9547615978521125, "learning_rate": 1.289313136747948e-05, "loss": 0.8291, "step": 4740 }, { "epoch": 0.4245117242151212, "grad_norm": 0.8946177388542212, "learning_rate": 1.2890354989461273e-05, "loss": 0.8072, "step": 4741 }, { "epoch": 0.4246012647601992, "grad_norm": 0.8951148884280952, "learning_rate": 1.288757836831582e-05, "loss": 0.7793, "step": 4742 }, { "epoch": 0.4246908053052773, "grad_norm": 0.9616023408383729, "learning_rate": 1.2884801504276684e-05, "loss": 0.7799, "step": 4743 }, { "epoch": 0.42478034585035535, "grad_norm": 0.870720014309664, "learning_rate": 1.288202439757745e-05, "loss": 0.8209, "step": 4744 }, { "epoch": 0.42486988639543344, "grad_norm": 1.0635366073884072, "learning_rate": 1.2879247048451715e-05, "loss": 0.8556, "step": 4745 }, { "epoch": 0.4249594269405115, "grad_norm": 1.0119358795930222, "learning_rate": 1.2876469457133097e-05, "loss": 0.8674, "step": 4746 }, { "epoch": 0.42504896748558957, "grad_norm": 0.9331042827489495, "learning_rate": 1.2873691623855247e-05, "loss": 0.8266, "step": 4747 }, { "epoch": 0.42513850803066766, "grad_norm": 1.099966397829452, "learning_rate": 1.287091354885182e-05, "loss": 0.8358, "step": 4748 }, { "epoch": 0.4252280485757457, "grad_norm": 0.9196868924358731, "learning_rate": 1.28681352323565e-05, "loss": 0.916, "step": 4749 }, { "epoch": 0.4253175891208238, "grad_norm": 0.9279223906877845, "learning_rate": 1.2865356674602989e-05, "loss": 0.8443, "step": 4750 }, { "epoch": 0.42540712966590183, "grad_norm": 1.0760275318520804, "learning_rate": 1.2862577875825015e-05, "loss": 0.797, "step": 4751 }, { "epoch": 0.4254966702109799, "grad_norm": 0.9610514551418874, "learning_rate": 1.2859798836256316e-05, "loss": 0.799, "step": 4752 }, { "epoch": 0.42558621075605796, "grad_norm": 0.9695438381709975, "learning_rate": 1.2857019556130658e-05, "loss": 0.8397, "step": 4753 }, { "epoch": 0.42567575130113605, "grad_norm": 1.0024527775482661, "learning_rate": 1.2854240035681826e-05, "loss": 0.8188, "step": 4754 }, { "epoch": 0.4257652918462141, "grad_norm": 0.9561709543887179, "learning_rate": 1.2851460275143626e-05, "loss": 0.8084, "step": 4755 }, { "epoch": 0.4258548323912922, "grad_norm": 0.8828706487400473, "learning_rate": 1.2848680274749876e-05, "loss": 0.8599, "step": 4756 }, { "epoch": 0.4259443729363703, "grad_norm": 1.1928364760984327, "learning_rate": 1.284590003473443e-05, "loss": 0.8171, "step": 4757 }, { "epoch": 0.4260339134814483, "grad_norm": 0.9530531000329019, "learning_rate": 1.2843119555331139e-05, "loss": 0.8824, "step": 4758 }, { "epoch": 0.4261234540265264, "grad_norm": 1.0290543939752066, "learning_rate": 1.28403388367739e-05, "loss": 0.8365, "step": 4759 }, { "epoch": 0.42621299457160444, "grad_norm": 0.9407487476072213, "learning_rate": 1.2837557879296613e-05, "loss": 0.9077, "step": 4760 }, { "epoch": 0.42630253511668253, "grad_norm": 0.8534524626006078, "learning_rate": 1.2834776683133202e-05, "loss": 0.827, "step": 4761 }, { "epoch": 0.4263920756617606, "grad_norm": 1.037699087004459, "learning_rate": 1.283199524851762e-05, "loss": 0.8657, "step": 4762 }, { "epoch": 0.42648161620683867, "grad_norm": 0.9486358800844373, "learning_rate": 1.2829213575683823e-05, "loss": 0.832, "step": 4763 }, { "epoch": 0.4265711567519167, "grad_norm": 0.9472139926666191, "learning_rate": 1.2826431664865797e-05, "loss": 0.8794, "step": 4764 }, { "epoch": 0.4266606972969948, "grad_norm": 0.8049144071365216, "learning_rate": 1.2823649516297552e-05, "loss": 0.811, "step": 4765 }, { "epoch": 0.4267502378420729, "grad_norm": 0.9966447106517308, "learning_rate": 1.2820867130213116e-05, "loss": 0.7944, "step": 4766 }, { "epoch": 0.4268397783871509, "grad_norm": 0.9851354625173271, "learning_rate": 1.281808450684652e-05, "loss": 0.8238, "step": 4767 }, { "epoch": 0.426929318932229, "grad_norm": 0.9530873972647405, "learning_rate": 1.2815301646431846e-05, "loss": 0.8373, "step": 4768 }, { "epoch": 0.42701885947730706, "grad_norm": 0.99876390216737, "learning_rate": 1.2812518549203168e-05, "loss": 0.8574, "step": 4769 }, { "epoch": 0.42710840002238515, "grad_norm": 0.8849457849970673, "learning_rate": 1.2809735215394594e-05, "loss": 0.8178, "step": 4770 }, { "epoch": 0.4271979405674632, "grad_norm": 0.9821755595379084, "learning_rate": 1.2806951645240253e-05, "loss": 0.8326, "step": 4771 }, { "epoch": 0.4272874811125413, "grad_norm": 0.8855626146210521, "learning_rate": 1.2804167838974286e-05, "loss": 0.8498, "step": 4772 }, { "epoch": 0.4273770216576193, "grad_norm": 0.9593581366544753, "learning_rate": 1.2801383796830856e-05, "loss": 0.8259, "step": 4773 }, { "epoch": 0.4274665622026974, "grad_norm": 0.9956563200185977, "learning_rate": 1.2798599519044152e-05, "loss": 0.8627, "step": 4774 }, { "epoch": 0.4275561027477755, "grad_norm": 0.8894698609544399, "learning_rate": 1.2795815005848378e-05, "loss": 0.7775, "step": 4775 }, { "epoch": 0.42764564329285354, "grad_norm": 0.8751912418748008, "learning_rate": 1.2793030257477751e-05, "loss": 0.7641, "step": 4776 }, { "epoch": 0.42773518383793163, "grad_norm": 0.8747698294072767, "learning_rate": 1.2790245274166526e-05, "loss": 0.8426, "step": 4777 }, { "epoch": 0.42782472438300967, "grad_norm": 1.170275714882787, "learning_rate": 1.2787460056148957e-05, "loss": 0.8104, "step": 4778 }, { "epoch": 0.42791426492808776, "grad_norm": 0.8466957303278845, "learning_rate": 1.2784674603659335e-05, "loss": 0.8069, "step": 4779 }, { "epoch": 0.4280038054731658, "grad_norm": 0.9710348946666589, "learning_rate": 1.2781888916931957e-05, "loss": 0.9052, "step": 4780 }, { "epoch": 0.4280933460182439, "grad_norm": 0.8892971225807514, "learning_rate": 1.277910299620115e-05, "loss": 0.8303, "step": 4781 }, { "epoch": 0.42818288656332193, "grad_norm": 0.9745266973023328, "learning_rate": 1.2776316841701255e-05, "loss": 0.8228, "step": 4782 }, { "epoch": 0.4282724271084, "grad_norm": 0.9101436527507242, "learning_rate": 1.2773530453666634e-05, "loss": 0.7861, "step": 4783 }, { "epoch": 0.4283619676534781, "grad_norm": 0.8603459675524968, "learning_rate": 1.277074383233167e-05, "loss": 0.8688, "step": 4784 }, { "epoch": 0.42845150819855615, "grad_norm": 0.8910546863910951, "learning_rate": 1.2767956977930764e-05, "loss": 0.8444, "step": 4785 }, { "epoch": 0.42854104874363425, "grad_norm": 1.013042514508105, "learning_rate": 1.2765169890698337e-05, "loss": 0.8599, "step": 4786 }, { "epoch": 0.4286305892887123, "grad_norm": 1.0814816004979944, "learning_rate": 1.2762382570868828e-05, "loss": 0.844, "step": 4787 }, { "epoch": 0.4287201298337904, "grad_norm": 0.8935973255675718, "learning_rate": 1.27595950186767e-05, "loss": 0.8274, "step": 4788 }, { "epoch": 0.4288096703788684, "grad_norm": 0.9223805769304506, "learning_rate": 1.2756807234356432e-05, "loss": 0.8816, "step": 4789 }, { "epoch": 0.4288992109239465, "grad_norm": 0.9805609392151479, "learning_rate": 1.2754019218142526e-05, "loss": 0.7898, "step": 4790 }, { "epoch": 0.42898875146902454, "grad_norm": 0.9036644494990118, "learning_rate": 1.2751230970269493e-05, "loss": 0.8268, "step": 4791 }, { "epoch": 0.42907829201410264, "grad_norm": 0.8883477760436611, "learning_rate": 1.2748442490971877e-05, "loss": 0.7894, "step": 4792 }, { "epoch": 0.42916783255918073, "grad_norm": 1.0228154556689883, "learning_rate": 1.2745653780484239e-05, "loss": 0.8773, "step": 4793 }, { "epoch": 0.42925737310425877, "grad_norm": 0.9972554405599129, "learning_rate": 1.2742864839041147e-05, "loss": 0.8656, "step": 4794 }, { "epoch": 0.42934691364933686, "grad_norm": 0.8898273634896448, "learning_rate": 1.2740075666877205e-05, "loss": 0.8655, "step": 4795 }, { "epoch": 0.4294364541944149, "grad_norm": 0.878411403977995, "learning_rate": 1.273728626422703e-05, "loss": 0.7576, "step": 4796 }, { "epoch": 0.429525994739493, "grad_norm": 0.8595654190197719, "learning_rate": 1.2734496631325249e-05, "loss": 0.895, "step": 4797 }, { "epoch": 0.429615535284571, "grad_norm": 0.9624770323553657, "learning_rate": 1.2731706768406528e-05, "loss": 0.907, "step": 4798 }, { "epoch": 0.4297050758296491, "grad_norm": 1.0601072596173775, "learning_rate": 1.272891667570553e-05, "loss": 0.8776, "step": 4799 }, { "epoch": 0.42979461637472716, "grad_norm": 0.9388296844225305, "learning_rate": 1.2726126353456955e-05, "loss": 0.8274, "step": 4800 }, { "epoch": 0.42988415691980525, "grad_norm": 1.0526180882921847, "learning_rate": 1.272333580189552e-05, "loss": 0.9228, "step": 4801 }, { "epoch": 0.42997369746488334, "grad_norm": 0.9495504636937622, "learning_rate": 1.2720545021255947e-05, "loss": 0.8765, "step": 4802 }, { "epoch": 0.4300632380099614, "grad_norm": 0.9287615676153879, "learning_rate": 1.2717754011772992e-05, "loss": 0.8196, "step": 4803 }, { "epoch": 0.43015277855503947, "grad_norm": 0.9412930538689297, "learning_rate": 1.271496277368143e-05, "loss": 0.8379, "step": 4804 }, { "epoch": 0.4302423191001175, "grad_norm": 0.8732131087061276, "learning_rate": 1.2712171307216044e-05, "loss": 0.8842, "step": 4805 }, { "epoch": 0.4303318596451956, "grad_norm": 1.0794507928906578, "learning_rate": 1.2709379612611646e-05, "loss": 0.8675, "step": 4806 }, { "epoch": 0.43042140019027364, "grad_norm": 0.8814229789906701, "learning_rate": 1.2706587690103062e-05, "loss": 0.8459, "step": 4807 }, { "epoch": 0.43051094073535173, "grad_norm": 0.8801983363210407, "learning_rate": 1.2703795539925143e-05, "loss": 0.8062, "step": 4808 }, { "epoch": 0.43060048128042977, "grad_norm": 0.9977529720533943, "learning_rate": 1.2701003162312757e-05, "loss": 0.8327, "step": 4809 }, { "epoch": 0.43069002182550786, "grad_norm": 1.0032653114416341, "learning_rate": 1.2698210557500783e-05, "loss": 0.8315, "step": 4810 }, { "epoch": 0.43077956237058596, "grad_norm": 1.0009242305469093, "learning_rate": 1.2695417725724131e-05, "loss": 0.8891, "step": 4811 }, { "epoch": 0.430869102915664, "grad_norm": 0.9127557994630798, "learning_rate": 1.2692624667217724e-05, "loss": 0.9202, "step": 4812 }, { "epoch": 0.4309586434607421, "grad_norm": 0.8440858240013827, "learning_rate": 1.2689831382216502e-05, "loss": 0.8061, "step": 4813 }, { "epoch": 0.4310481840058201, "grad_norm": 0.9026101363841126, "learning_rate": 1.2687037870955431e-05, "loss": 0.8699, "step": 4814 }, { "epoch": 0.4311377245508982, "grad_norm": 0.9270997150201402, "learning_rate": 1.2684244133669492e-05, "loss": 0.8513, "step": 4815 }, { "epoch": 0.43122726509597625, "grad_norm": 0.8518418837022311, "learning_rate": 1.2681450170593683e-05, "loss": 0.857, "step": 4816 }, { "epoch": 0.43131680564105435, "grad_norm": 0.9311937806731825, "learning_rate": 1.2678655981963022e-05, "loss": 0.855, "step": 4817 }, { "epoch": 0.4314063461861324, "grad_norm": 0.8894540801131331, "learning_rate": 1.2675861568012551e-05, "loss": 0.8156, "step": 4818 }, { "epoch": 0.4314958867312105, "grad_norm": 0.9950315713130442, "learning_rate": 1.2673066928977323e-05, "loss": 0.9056, "step": 4819 }, { "epoch": 0.43158542727628857, "grad_norm": 1.0382160882555977, "learning_rate": 1.2670272065092417e-05, "loss": 0.8151, "step": 4820 }, { "epoch": 0.4316749678213666, "grad_norm": 0.9207360957385227, "learning_rate": 1.2667476976592925e-05, "loss": 0.7916, "step": 4821 }, { "epoch": 0.4317645083664447, "grad_norm": 0.9725214312370394, "learning_rate": 1.2664681663713963e-05, "loss": 0.8183, "step": 4822 }, { "epoch": 0.43185404891152274, "grad_norm": 0.9327426230225193, "learning_rate": 1.2661886126690664e-05, "loss": 0.8434, "step": 4823 }, { "epoch": 0.43194358945660083, "grad_norm": 0.9235941356474715, "learning_rate": 1.2659090365758176e-05, "loss": 0.8293, "step": 4824 }, { "epoch": 0.43203313000167887, "grad_norm": 0.851534251188331, "learning_rate": 1.2656294381151673e-05, "loss": 0.8013, "step": 4825 }, { "epoch": 0.43212267054675696, "grad_norm": 0.9258449941398924, "learning_rate": 1.2653498173106344e-05, "loss": 0.8544, "step": 4826 }, { "epoch": 0.432212211091835, "grad_norm": 0.9362566732375528, "learning_rate": 1.2650701741857391e-05, "loss": 0.8508, "step": 4827 }, { "epoch": 0.4323017516369131, "grad_norm": 0.9940060583671982, "learning_rate": 1.2647905087640051e-05, "loss": 0.7994, "step": 4828 }, { "epoch": 0.4323912921819912, "grad_norm": 1.1747164512861237, "learning_rate": 1.2645108210689561e-05, "loss": 0.8672, "step": 4829 }, { "epoch": 0.4324808327270692, "grad_norm": 1.0171557734206085, "learning_rate": 1.2642311111241185e-05, "loss": 0.8463, "step": 4830 }, { "epoch": 0.4325703732721473, "grad_norm": 0.9474973124697055, "learning_rate": 1.2639513789530215e-05, "loss": 0.8719, "step": 4831 }, { "epoch": 0.43265991381722535, "grad_norm": 0.9634603113537945, "learning_rate": 1.2636716245791945e-05, "loss": 0.8977, "step": 4832 }, { "epoch": 0.43274945436230344, "grad_norm": 0.9062129184938401, "learning_rate": 1.2633918480261693e-05, "loss": 0.8575, "step": 4833 }, { "epoch": 0.4328389949073815, "grad_norm": 0.9513956853223187, "learning_rate": 1.2631120493174804e-05, "loss": 0.8477, "step": 4834 }, { "epoch": 0.4329285354524596, "grad_norm": 0.9756218803232527, "learning_rate": 1.2628322284766635e-05, "loss": 0.8697, "step": 4835 }, { "epoch": 0.4330180759975376, "grad_norm": 0.9539701351709329, "learning_rate": 1.2625523855272556e-05, "loss": 0.831, "step": 4836 }, { "epoch": 0.4331076165426157, "grad_norm": 0.9269016930659613, "learning_rate": 1.2622725204927968e-05, "loss": 0.8674, "step": 4837 }, { "epoch": 0.4331971570876938, "grad_norm": 0.9579227621477914, "learning_rate": 1.2619926333968285e-05, "loss": 0.8577, "step": 4838 }, { "epoch": 0.43328669763277183, "grad_norm": 0.8837061266557809, "learning_rate": 1.2617127242628932e-05, "loss": 0.8776, "step": 4839 }, { "epoch": 0.4333762381778499, "grad_norm": 0.9463837983354312, "learning_rate": 1.2614327931145366e-05, "loss": 0.8372, "step": 4840 }, { "epoch": 0.43346577872292796, "grad_norm": 0.8972352040612186, "learning_rate": 1.2611528399753055e-05, "loss": 0.7941, "step": 4841 }, { "epoch": 0.43355531926800606, "grad_norm": 1.0377929563538764, "learning_rate": 1.2608728648687482e-05, "loss": 0.8392, "step": 4842 }, { "epoch": 0.4336448598130841, "grad_norm": 1.0271316609340724, "learning_rate": 1.2605928678184158e-05, "loss": 0.854, "step": 4843 }, { "epoch": 0.4337344003581622, "grad_norm": 0.9741405693757601, "learning_rate": 1.2603128488478606e-05, "loss": 0.7994, "step": 4844 }, { "epoch": 0.4338239409032402, "grad_norm": 1.0124372921632732, "learning_rate": 1.260032807980637e-05, "loss": 0.7715, "step": 4845 }, { "epoch": 0.4339134814483183, "grad_norm": 0.849358403799566, "learning_rate": 1.2597527452403007e-05, "loss": 0.8, "step": 4846 }, { "epoch": 0.4340030219933964, "grad_norm": 0.8999586212096153, "learning_rate": 1.2594726606504099e-05, "loss": 0.8974, "step": 4847 }, { "epoch": 0.43409256253847445, "grad_norm": 0.9943050642371936, "learning_rate": 1.2591925542345244e-05, "loss": 0.828, "step": 4848 }, { "epoch": 0.43418210308355254, "grad_norm": 0.9287191183927296, "learning_rate": 1.258912426016206e-05, "loss": 0.8001, "step": 4849 }, { "epoch": 0.4342716436286306, "grad_norm": 1.1642083723544498, "learning_rate": 1.2586322760190183e-05, "loss": 0.8739, "step": 4850 }, { "epoch": 0.43436118417370867, "grad_norm": 0.9312804456714632, "learning_rate": 1.258352104266526e-05, "loss": 0.8281, "step": 4851 }, { "epoch": 0.4344507247187867, "grad_norm": 0.956289400656988, "learning_rate": 1.2580719107822966e-05, "loss": 0.8484, "step": 4852 }, { "epoch": 0.4345402652638648, "grad_norm": 0.8857536963242103, "learning_rate": 1.2577916955898993e-05, "loss": 0.8614, "step": 4853 }, { "epoch": 0.43462980580894284, "grad_norm": 0.9593652202117886, "learning_rate": 1.2575114587129042e-05, "loss": 0.8533, "step": 4854 }, { "epoch": 0.43471934635402093, "grad_norm": 0.9766609288880667, "learning_rate": 1.2572312001748845e-05, "loss": 0.8229, "step": 4855 }, { "epoch": 0.434808886899099, "grad_norm": 0.9907412472258096, "learning_rate": 1.2569509199994147e-05, "loss": 0.8451, "step": 4856 }, { "epoch": 0.43489842744417706, "grad_norm": 0.9600086977177898, "learning_rate": 1.2566706182100706e-05, "loss": 0.8456, "step": 4857 }, { "epoch": 0.43498796798925515, "grad_norm": 0.8453638121218173, "learning_rate": 1.256390294830431e-05, "loss": 0.7841, "step": 4858 }, { "epoch": 0.4350775085343332, "grad_norm": 0.8854204102440567, "learning_rate": 1.2561099498840748e-05, "loss": 0.8831, "step": 4859 }, { "epoch": 0.4351670490794113, "grad_norm": 0.92437048691386, "learning_rate": 1.2558295833945842e-05, "loss": 0.8276, "step": 4860 }, { "epoch": 0.4352565896244893, "grad_norm": 0.9321453878329122, "learning_rate": 1.2555491953855427e-05, "loss": 0.7855, "step": 4861 }, { "epoch": 0.4353461301695674, "grad_norm": 0.9238108323672232, "learning_rate": 1.2552687858805359e-05, "loss": 0.8151, "step": 4862 }, { "epoch": 0.43543567071464545, "grad_norm": 0.9826108417409795, "learning_rate": 1.2549883549031505e-05, "loss": 0.8301, "step": 4863 }, { "epoch": 0.43552521125972354, "grad_norm": 1.0144682853648659, "learning_rate": 1.2547079024769757e-05, "loss": 0.8668, "step": 4864 }, { "epoch": 0.43561475180480164, "grad_norm": 0.9581370800110325, "learning_rate": 1.2544274286256021e-05, "loss": 0.8043, "step": 4865 }, { "epoch": 0.4357042923498797, "grad_norm": 0.9355672582910216, "learning_rate": 1.2541469333726225e-05, "loss": 0.8319, "step": 4866 }, { "epoch": 0.43579383289495777, "grad_norm": 0.8879904719894574, "learning_rate": 1.2538664167416308e-05, "loss": 0.8221, "step": 4867 }, { "epoch": 0.4358833734400358, "grad_norm": 0.8914566367794373, "learning_rate": 1.2535858787562237e-05, "loss": 0.8331, "step": 4868 }, { "epoch": 0.4359729139851139, "grad_norm": 0.9439988126122387, "learning_rate": 1.2533053194399984e-05, "loss": 0.8195, "step": 4869 }, { "epoch": 0.43606245453019193, "grad_norm": 1.032441963435167, "learning_rate": 1.2530247388165553e-05, "loss": 0.7837, "step": 4870 }, { "epoch": 0.43615199507527, "grad_norm": 0.9839367158883598, "learning_rate": 1.2527441369094958e-05, "loss": 0.9404, "step": 4871 }, { "epoch": 0.43624153562034806, "grad_norm": 0.8936881609992845, "learning_rate": 1.252463513742423e-05, "loss": 0.861, "step": 4872 }, { "epoch": 0.43633107616542616, "grad_norm": 0.9919945228462781, "learning_rate": 1.2521828693389422e-05, "loss": 0.8732, "step": 4873 }, { "epoch": 0.43642061671050425, "grad_norm": 0.9998797561955459, "learning_rate": 1.25190220372266e-05, "loss": 0.8568, "step": 4874 }, { "epoch": 0.4365101572555823, "grad_norm": 0.8743666337756882, "learning_rate": 1.2516215169171854e-05, "loss": 0.902, "step": 4875 }, { "epoch": 0.4365996978006604, "grad_norm": 0.9929221273125726, "learning_rate": 1.251340808946129e-05, "loss": 0.8634, "step": 4876 }, { "epoch": 0.4366892383457384, "grad_norm": 0.9164505987848556, "learning_rate": 1.2510600798331028e-05, "loss": 0.8194, "step": 4877 }, { "epoch": 0.4367787788908165, "grad_norm": 0.9094308825181555, "learning_rate": 1.2507793296017203e-05, "loss": 0.7992, "step": 4878 }, { "epoch": 0.43686831943589455, "grad_norm": 0.9837583632315312, "learning_rate": 1.2504985582755981e-05, "loss": 0.7807, "step": 4879 }, { "epoch": 0.43695785998097264, "grad_norm": 0.9395724757938536, "learning_rate": 1.2502177658783538e-05, "loss": 0.8375, "step": 4880 }, { "epoch": 0.4370474005260507, "grad_norm": 0.9354370429545157, "learning_rate": 1.249936952433606e-05, "loss": 0.8013, "step": 4881 }, { "epoch": 0.43713694107112877, "grad_norm": 1.0593030466375308, "learning_rate": 1.2496561179649764e-05, "loss": 0.8032, "step": 4882 }, { "epoch": 0.43722648161620686, "grad_norm": 0.989252234045883, "learning_rate": 1.2493752624960879e-05, "loss": 0.8859, "step": 4883 }, { "epoch": 0.4373160221612849, "grad_norm": 0.9363586019273621, "learning_rate": 1.2490943860505647e-05, "loss": 0.8421, "step": 4884 }, { "epoch": 0.437405562706363, "grad_norm": 0.9116939840105585, "learning_rate": 1.2488134886520334e-05, "loss": 0.8249, "step": 4885 }, { "epoch": 0.43749510325144103, "grad_norm": 0.9944678951709268, "learning_rate": 1.2485325703241226e-05, "loss": 0.7938, "step": 4886 }, { "epoch": 0.4375846437965191, "grad_norm": 1.0142632450539617, "learning_rate": 1.2482516310904616e-05, "loss": 0.8001, "step": 4887 }, { "epoch": 0.43767418434159716, "grad_norm": 1.0490125939385535, "learning_rate": 1.2479706709746821e-05, "loss": 0.8641, "step": 4888 }, { "epoch": 0.43776372488667525, "grad_norm": 1.0543424651295565, "learning_rate": 1.2476896900004185e-05, "loss": 0.8338, "step": 4889 }, { "epoch": 0.4378532654317533, "grad_norm": 0.9110593979964402, "learning_rate": 1.247408688191305e-05, "loss": 0.8224, "step": 4890 }, { "epoch": 0.4379428059768314, "grad_norm": 0.9884299234095539, "learning_rate": 1.2471276655709788e-05, "loss": 0.8014, "step": 4891 }, { "epoch": 0.4380323465219095, "grad_norm": 0.9723874547467449, "learning_rate": 1.2468466221630787e-05, "loss": 0.8338, "step": 4892 }, { "epoch": 0.4381218870669875, "grad_norm": 0.9351858518365292, "learning_rate": 1.2465655579912453e-05, "loss": 0.8033, "step": 4893 }, { "epoch": 0.4382114276120656, "grad_norm": 0.9688692987112623, "learning_rate": 1.2462844730791203e-05, "loss": 0.8242, "step": 4894 }, { "epoch": 0.43830096815714364, "grad_norm": 0.940403444449089, "learning_rate": 1.2460033674503484e-05, "loss": 0.8321, "step": 4895 }, { "epoch": 0.43839050870222174, "grad_norm": 0.9515037006589611, "learning_rate": 1.2457222411285745e-05, "loss": 0.8176, "step": 4896 }, { "epoch": 0.4384800492472998, "grad_norm": 0.8863816820392372, "learning_rate": 1.2454410941374469e-05, "loss": 0.8396, "step": 4897 }, { "epoch": 0.43856958979237787, "grad_norm": 1.0309614707191332, "learning_rate": 1.2451599265006138e-05, "loss": 0.8648, "step": 4898 }, { "epoch": 0.4386591303374559, "grad_norm": 1.004976265427351, "learning_rate": 1.2448787382417269e-05, "loss": 0.8449, "step": 4899 }, { "epoch": 0.438748670882534, "grad_norm": 1.0998398318460583, "learning_rate": 1.2445975293844383e-05, "loss": 0.8284, "step": 4900 }, { "epoch": 0.4388382114276121, "grad_norm": 0.8518638399587809, "learning_rate": 1.2443162999524027e-05, "loss": 0.7896, "step": 4901 }, { "epoch": 0.43892775197269013, "grad_norm": 0.9732771886053379, "learning_rate": 1.244035049969276e-05, "loss": 0.9182, "step": 4902 }, { "epoch": 0.4390172925177682, "grad_norm": 0.9420961100348079, "learning_rate": 1.2437537794587163e-05, "loss": 0.8357, "step": 4903 }, { "epoch": 0.43910683306284626, "grad_norm": 0.9314178207335466, "learning_rate": 1.243472488444383e-05, "loss": 0.8404, "step": 4904 }, { "epoch": 0.43919637360792435, "grad_norm": 0.9242272731296275, "learning_rate": 1.2431911769499372e-05, "loss": 0.8412, "step": 4905 }, { "epoch": 0.4392859141530024, "grad_norm": 0.9995187945094381, "learning_rate": 1.2429098449990423e-05, "loss": 0.8139, "step": 4906 }, { "epoch": 0.4393754546980805, "grad_norm": 0.8723115759546067, "learning_rate": 1.2426284926153627e-05, "loss": 0.8221, "step": 4907 }, { "epoch": 0.4394649952431585, "grad_norm": 0.9078321455272073, "learning_rate": 1.242347119822565e-05, "loss": 0.9048, "step": 4908 }, { "epoch": 0.4395545357882366, "grad_norm": 1.020795044404057, "learning_rate": 1.2420657266443172e-05, "loss": 0.7848, "step": 4909 }, { "epoch": 0.4396440763333147, "grad_norm": 1.0316547736063222, "learning_rate": 1.2417843131042894e-05, "loss": 0.8401, "step": 4910 }, { "epoch": 0.43973361687839274, "grad_norm": 0.9798567204117883, "learning_rate": 1.2415028792261529e-05, "loss": 0.8512, "step": 4911 }, { "epoch": 0.43982315742347083, "grad_norm": 1.0317789462671159, "learning_rate": 1.2412214250335815e-05, "loss": 0.834, "step": 4912 }, { "epoch": 0.43991269796854887, "grad_norm": 0.8923677523236467, "learning_rate": 1.24093995055025e-05, "loss": 0.8545, "step": 4913 }, { "epoch": 0.44000223851362696, "grad_norm": 0.8500561079129809, "learning_rate": 1.2406584557998347e-05, "loss": 0.8319, "step": 4914 }, { "epoch": 0.440091779058705, "grad_norm": 0.9447697684315952, "learning_rate": 1.240376940806014e-05, "loss": 0.8887, "step": 4915 }, { "epoch": 0.4401813196037831, "grad_norm": 0.8556924786478811, "learning_rate": 1.240095405592469e-05, "loss": 0.8691, "step": 4916 }, { "epoch": 0.44027086014886113, "grad_norm": 0.9621086002716165, "learning_rate": 1.2398138501828806e-05, "loss": 0.8927, "step": 4917 }, { "epoch": 0.4403604006939392, "grad_norm": 1.0390473445427628, "learning_rate": 1.2395322746009323e-05, "loss": 0.8319, "step": 4918 }, { "epoch": 0.4404499412390173, "grad_norm": 0.9989174376592065, "learning_rate": 1.2392506788703103e-05, "loss": 0.8595, "step": 4919 }, { "epoch": 0.44053948178409535, "grad_norm": 0.8543573444440964, "learning_rate": 1.2389690630147004e-05, "loss": 0.8381, "step": 4920 }, { "epoch": 0.44062902232917345, "grad_norm": 0.9305264194522433, "learning_rate": 1.2386874270577918e-05, "loss": 0.811, "step": 4921 }, { "epoch": 0.4407185628742515, "grad_norm": 0.9129230439157308, "learning_rate": 1.2384057710232747e-05, "loss": 0.8336, "step": 4922 }, { "epoch": 0.4408081034193296, "grad_norm": 1.021099217122807, "learning_rate": 1.2381240949348407e-05, "loss": 0.8498, "step": 4923 }, { "epoch": 0.4408976439644076, "grad_norm": 0.9470821676390883, "learning_rate": 1.2378423988161843e-05, "loss": 0.8161, "step": 4924 }, { "epoch": 0.4409871845094857, "grad_norm": 1.062166192138627, "learning_rate": 1.2375606826910001e-05, "loss": 0.8032, "step": 4925 }, { "epoch": 0.44107672505456375, "grad_norm": 1.184185396934736, "learning_rate": 1.2372789465829853e-05, "loss": 0.864, "step": 4926 }, { "epoch": 0.44116626559964184, "grad_norm": 0.9313128033183719, "learning_rate": 1.2369971905158389e-05, "loss": 0.8264, "step": 4927 }, { "epoch": 0.44125580614471993, "grad_norm": 1.0072106537347125, "learning_rate": 1.2367154145132609e-05, "loss": 0.7892, "step": 4928 }, { "epoch": 0.44134534668979797, "grad_norm": 0.9670566761518096, "learning_rate": 1.2364336185989538e-05, "loss": 0.88, "step": 4929 }, { "epoch": 0.44143488723487606, "grad_norm": 0.8704635214354152, "learning_rate": 1.2361518027966213e-05, "loss": 0.8261, "step": 4930 }, { "epoch": 0.4415244277799541, "grad_norm": 0.951435724989145, "learning_rate": 1.2358699671299685e-05, "loss": 0.7763, "step": 4931 }, { "epoch": 0.4416139683250322, "grad_norm": 1.0785750820972066, "learning_rate": 1.2355881116227028e-05, "loss": 0.8176, "step": 4932 }, { "epoch": 0.44170350887011023, "grad_norm": 1.2443039006183512, "learning_rate": 1.2353062362985329e-05, "loss": 0.8671, "step": 4933 }, { "epoch": 0.4417930494151883, "grad_norm": 0.885145197162479, "learning_rate": 1.2350243411811691e-05, "loss": 0.8548, "step": 4934 }, { "epoch": 0.44188258996026636, "grad_norm": 0.9248300110923634, "learning_rate": 1.2347424262943235e-05, "loss": 0.8323, "step": 4935 }, { "epoch": 0.44197213050534445, "grad_norm": 0.9126143159353564, "learning_rate": 1.2344604916617102e-05, "loss": 0.8299, "step": 4936 }, { "epoch": 0.44206167105042254, "grad_norm": 1.0233899041280883, "learning_rate": 1.2341785373070442e-05, "loss": 0.8526, "step": 4937 }, { "epoch": 0.4421512115955006, "grad_norm": 0.950629001855324, "learning_rate": 1.2338965632540428e-05, "loss": 0.733, "step": 4938 }, { "epoch": 0.4422407521405787, "grad_norm": 1.032065747350411, "learning_rate": 1.2336145695264247e-05, "loss": 0.8403, "step": 4939 }, { "epoch": 0.4423302926856567, "grad_norm": 0.8957667603492465, "learning_rate": 1.2333325561479106e-05, "loss": 0.8039, "step": 4940 }, { "epoch": 0.4424198332307348, "grad_norm": 0.9266927363038796, "learning_rate": 1.2330505231422219e-05, "loss": 0.835, "step": 4941 }, { "epoch": 0.44250937377581284, "grad_norm": 1.0413648888592126, "learning_rate": 1.2327684705330825e-05, "loss": 0.8436, "step": 4942 }, { "epoch": 0.44259891432089093, "grad_norm": 0.8938951236187573, "learning_rate": 1.2324863983442184e-05, "loss": 0.8329, "step": 4943 }, { "epoch": 0.44268845486596897, "grad_norm": 0.8915640194846318, "learning_rate": 1.2322043065993556e-05, "loss": 0.8559, "step": 4944 }, { "epoch": 0.44277799541104707, "grad_norm": 0.9845823567038516, "learning_rate": 1.2319221953222232e-05, "loss": 0.7961, "step": 4945 }, { "epoch": 0.44286753595612516, "grad_norm": 0.9210903379218929, "learning_rate": 1.2316400645365518e-05, "loss": 0.8894, "step": 4946 }, { "epoch": 0.4429570765012032, "grad_norm": 0.9067395159248738, "learning_rate": 1.2313579142660727e-05, "loss": 0.8569, "step": 4947 }, { "epoch": 0.4430466170462813, "grad_norm": 0.8280645533265587, "learning_rate": 1.2310757445345199e-05, "loss": 0.8803, "step": 4948 }, { "epoch": 0.4431361575913593, "grad_norm": 0.9893197676544135, "learning_rate": 1.2307935553656288e-05, "loss": 0.8423, "step": 4949 }, { "epoch": 0.4432256981364374, "grad_norm": 0.9962626141771275, "learning_rate": 1.2305113467831356e-05, "loss": 0.8947, "step": 4950 }, { "epoch": 0.44331523868151546, "grad_norm": 1.0520409540531206, "learning_rate": 1.230229118810779e-05, "loss": 0.8478, "step": 4951 }, { "epoch": 0.44340477922659355, "grad_norm": 0.9643138102887774, "learning_rate": 1.2299468714722993e-05, "loss": 0.8375, "step": 4952 }, { "epoch": 0.4434943197716716, "grad_norm": 0.936928678486314, "learning_rate": 1.229664604791438e-05, "loss": 0.8371, "step": 4953 }, { "epoch": 0.4435838603167497, "grad_norm": 0.9232266086815668, "learning_rate": 1.2293823187919388e-05, "loss": 0.8445, "step": 4954 }, { "epoch": 0.44367340086182777, "grad_norm": 1.0084610926388942, "learning_rate": 1.2291000134975462e-05, "loss": 0.8885, "step": 4955 }, { "epoch": 0.4437629414069058, "grad_norm": 0.8668814579577111, "learning_rate": 1.228817688932007e-05, "loss": 0.8227, "step": 4956 }, { "epoch": 0.4438524819519839, "grad_norm": 0.9969676529499523, "learning_rate": 1.2285353451190696e-05, "loss": 0.8772, "step": 4957 }, { "epoch": 0.44394202249706194, "grad_norm": 0.9952897460148868, "learning_rate": 1.2282529820824837e-05, "loss": 0.8021, "step": 4958 }, { "epoch": 0.44403156304214003, "grad_norm": 0.9623233913075947, "learning_rate": 1.2279705998460008e-05, "loss": 0.8399, "step": 4959 }, { "epoch": 0.44412110358721807, "grad_norm": 0.9248963466963206, "learning_rate": 1.2276881984333738e-05, "loss": 0.846, "step": 4960 }, { "epoch": 0.44421064413229616, "grad_norm": 1.0384997867644628, "learning_rate": 1.2274057778683574e-05, "loss": 0.8739, "step": 4961 }, { "epoch": 0.4443001846773742, "grad_norm": 1.1292807330123693, "learning_rate": 1.2271233381747082e-05, "loss": 0.8295, "step": 4962 }, { "epoch": 0.4443897252224523, "grad_norm": 0.9459633704729262, "learning_rate": 1.2268408793761839e-05, "loss": 0.9017, "step": 4963 }, { "epoch": 0.4444792657675304, "grad_norm": 1.035876805320125, "learning_rate": 1.2265584014965439e-05, "loss": 0.9068, "step": 4964 }, { "epoch": 0.4445688063126084, "grad_norm": 0.9490420753535469, "learning_rate": 1.2262759045595497e-05, "loss": 0.8556, "step": 4965 }, { "epoch": 0.4446583468576865, "grad_norm": 0.9694011833638522, "learning_rate": 1.2259933885889636e-05, "loss": 0.8797, "step": 4966 }, { "epoch": 0.44474788740276455, "grad_norm": 0.8440779978244876, "learning_rate": 1.2257108536085502e-05, "loss": 0.8496, "step": 4967 }, { "epoch": 0.44483742794784265, "grad_norm": 0.8780891172009224, "learning_rate": 1.2254282996420755e-05, "loss": 0.795, "step": 4968 }, { "epoch": 0.4449269684929207, "grad_norm": 1.0158621285964142, "learning_rate": 1.2251457267133065e-05, "loss": 0.8642, "step": 4969 }, { "epoch": 0.4450165090379988, "grad_norm": 0.8798105786000459, "learning_rate": 1.224863134846013e-05, "loss": 0.7793, "step": 4970 }, { "epoch": 0.4451060495830768, "grad_norm": 1.016548071332554, "learning_rate": 1.224580524063965e-05, "loss": 0.8275, "step": 4971 }, { "epoch": 0.4451955901281549, "grad_norm": 1.0451586237062631, "learning_rate": 1.2242978943909352e-05, "loss": 0.8224, "step": 4972 }, { "epoch": 0.445285130673233, "grad_norm": 0.9168964105503629, "learning_rate": 1.2240152458506975e-05, "loss": 0.8517, "step": 4973 }, { "epoch": 0.44537467121831104, "grad_norm": 0.9804228743898941, "learning_rate": 1.2237325784670272e-05, "loss": 0.835, "step": 4974 }, { "epoch": 0.44546421176338913, "grad_norm": 0.9384641262746605, "learning_rate": 1.2234498922637017e-05, "loss": 0.8186, "step": 4975 }, { "epoch": 0.44555375230846717, "grad_norm": 1.1958438975670578, "learning_rate": 1.2231671872644995e-05, "loss": 0.8754, "step": 4976 }, { "epoch": 0.44564329285354526, "grad_norm": 0.8264561709159851, "learning_rate": 1.2228844634932005e-05, "loss": 0.8065, "step": 4977 }, { "epoch": 0.4457328333986233, "grad_norm": 1.1828203154188872, "learning_rate": 1.2226017209735867e-05, "loss": 0.8772, "step": 4978 }, { "epoch": 0.4458223739437014, "grad_norm": 0.9188282304430239, "learning_rate": 1.2223189597294419e-05, "loss": 0.8193, "step": 4979 }, { "epoch": 0.4459119144887794, "grad_norm": 1.0597947556272984, "learning_rate": 1.2220361797845504e-05, "loss": 0.867, "step": 4980 }, { "epoch": 0.4460014550338575, "grad_norm": 0.9156626830669515, "learning_rate": 1.2217533811626988e-05, "loss": 0.8115, "step": 4981 }, { "epoch": 0.4460909955789356, "grad_norm": 1.0755799364806071, "learning_rate": 1.2214705638876757e-05, "loss": 0.9017, "step": 4982 }, { "epoch": 0.44618053612401365, "grad_norm": 0.8569980167747868, "learning_rate": 1.2211877279832704e-05, "loss": 0.8334, "step": 4983 }, { "epoch": 0.44627007666909174, "grad_norm": 1.0293003650500874, "learning_rate": 1.2209048734732742e-05, "loss": 0.8303, "step": 4984 }, { "epoch": 0.4463596172141698, "grad_norm": 0.9230990220496825, "learning_rate": 1.22062200038148e-05, "loss": 0.8172, "step": 4985 }, { "epoch": 0.44644915775924787, "grad_norm": 0.8764458824383204, "learning_rate": 1.2203391087316821e-05, "loss": 0.8459, "step": 4986 }, { "epoch": 0.4465386983043259, "grad_norm": 0.8885519757059731, "learning_rate": 1.2200561985476762e-05, "loss": 0.8104, "step": 4987 }, { "epoch": 0.446628238849404, "grad_norm": 1.2263345648822574, "learning_rate": 1.21977326985326e-05, "loss": 0.7992, "step": 4988 }, { "epoch": 0.44671777939448204, "grad_norm": 1.3156167415923492, "learning_rate": 1.2194903226722328e-05, "loss": 0.86, "step": 4989 }, { "epoch": 0.44680731993956013, "grad_norm": 1.029644122199806, "learning_rate": 1.2192073570283947e-05, "loss": 0.8398, "step": 4990 }, { "epoch": 0.4468968604846382, "grad_norm": 1.1465405872284344, "learning_rate": 1.218924372945548e-05, "loss": 0.8523, "step": 4991 }, { "epoch": 0.44698640102971626, "grad_norm": 0.9363230774719453, "learning_rate": 1.2186413704474964e-05, "loss": 0.8589, "step": 4992 }, { "epoch": 0.44707594157479436, "grad_norm": 1.1094642252568914, "learning_rate": 1.2183583495580453e-05, "loss": 0.8392, "step": 4993 }, { "epoch": 0.4471654821198724, "grad_norm": 1.039528691212776, "learning_rate": 1.2180753103010015e-05, "loss": 0.8872, "step": 4994 }, { "epoch": 0.4472550226649505, "grad_norm": 0.9589759577702714, "learning_rate": 1.2177922527001734e-05, "loss": 0.885, "step": 4995 }, { "epoch": 0.4473445632100285, "grad_norm": 0.9175534626459917, "learning_rate": 1.2175091767793701e-05, "loss": 0.8294, "step": 4996 }, { "epoch": 0.4474341037551066, "grad_norm": 0.9449186173401207, "learning_rate": 1.217226082562404e-05, "loss": 0.8429, "step": 4997 }, { "epoch": 0.44752364430018465, "grad_norm": 1.024442959510463, "learning_rate": 1.216942970073088e-05, "loss": 0.8517, "step": 4998 }, { "epoch": 0.44761318484526275, "grad_norm": 0.9028098604588379, "learning_rate": 1.216659839335236e-05, "loss": 0.8256, "step": 4999 }, { "epoch": 0.44770272539034084, "grad_norm": 1.0690473628953165, "learning_rate": 1.2163766903726645e-05, "loss": 0.8876, "step": 5000 }, { "epoch": 0.4477922659354189, "grad_norm": 0.9830249177088282, "learning_rate": 1.2160935232091908e-05, "loss": 0.858, "step": 5001 }, { "epoch": 0.44788180648049697, "grad_norm": 0.9521578278960316, "learning_rate": 1.215810337868634e-05, "loss": 0.8528, "step": 5002 }, { "epoch": 0.447971347025575, "grad_norm": 1.1102308415365079, "learning_rate": 1.2155271343748151e-05, "loss": 0.9412, "step": 5003 }, { "epoch": 0.4480608875706531, "grad_norm": 0.9455047671976106, "learning_rate": 1.2152439127515558e-05, "loss": 0.8757, "step": 5004 }, { "epoch": 0.44815042811573114, "grad_norm": 0.9175710622762737, "learning_rate": 1.21496067302268e-05, "loss": 0.8845, "step": 5005 }, { "epoch": 0.44823996866080923, "grad_norm": 0.9735470488833643, "learning_rate": 1.2146774152120128e-05, "loss": 0.858, "step": 5006 }, { "epoch": 0.44832950920588727, "grad_norm": 0.9515093341645374, "learning_rate": 1.2143941393433813e-05, "loss": 0.8402, "step": 5007 }, { "epoch": 0.44841904975096536, "grad_norm": 1.075878236999601, "learning_rate": 1.214110845440613e-05, "loss": 0.8303, "step": 5008 }, { "epoch": 0.44850859029604345, "grad_norm": 1.0775874471880393, "learning_rate": 1.2138275335275387e-05, "loss": 0.8854, "step": 5009 }, { "epoch": 0.4485981308411215, "grad_norm": 0.9838436071431359, "learning_rate": 1.2135442036279885e-05, "loss": 0.8022, "step": 5010 }, { "epoch": 0.4486876713861996, "grad_norm": 0.8715541070223323, "learning_rate": 1.2132608557657961e-05, "loss": 0.838, "step": 5011 }, { "epoch": 0.4487772119312776, "grad_norm": 0.920424404198566, "learning_rate": 1.2129774899647955e-05, "loss": 0.909, "step": 5012 }, { "epoch": 0.4488667524763557, "grad_norm": 1.2476885087120966, "learning_rate": 1.2126941062488222e-05, "loss": 0.8564, "step": 5013 }, { "epoch": 0.44895629302143375, "grad_norm": 0.9691536001664944, "learning_rate": 1.212410704641714e-05, "loss": 0.8518, "step": 5014 }, { "epoch": 0.44904583356651184, "grad_norm": 0.8748288122334094, "learning_rate": 1.2121272851673094e-05, "loss": 0.7749, "step": 5015 }, { "epoch": 0.4491353741115899, "grad_norm": 0.9663868279098115, "learning_rate": 1.211843847849449e-05, "loss": 0.8512, "step": 5016 }, { "epoch": 0.449224914656668, "grad_norm": 0.8963985780723251, "learning_rate": 1.2115603927119744e-05, "loss": 0.8501, "step": 5017 }, { "epoch": 0.44931445520174607, "grad_norm": 0.9513994561232421, "learning_rate": 1.2112769197787288e-05, "loss": 0.8996, "step": 5018 }, { "epoch": 0.4494039957468241, "grad_norm": 0.9265616177900434, "learning_rate": 1.2109934290735572e-05, "loss": 0.8653, "step": 5019 }, { "epoch": 0.4494935362919022, "grad_norm": 0.906233274036701, "learning_rate": 1.2107099206203061e-05, "loss": 0.8348, "step": 5020 }, { "epoch": 0.44958307683698023, "grad_norm": 0.9562366941701399, "learning_rate": 1.210426394442823e-05, "loss": 0.9177, "step": 5021 }, { "epoch": 0.4496726173820583, "grad_norm": 0.9691958071315939, "learning_rate": 1.2101428505649578e-05, "loss": 0.8193, "step": 5022 }, { "epoch": 0.44976215792713636, "grad_norm": 0.8736806838470305, "learning_rate": 1.20985928901056e-05, "loss": 0.8036, "step": 5023 }, { "epoch": 0.44985169847221446, "grad_norm": 0.9464448440226463, "learning_rate": 1.209575709803483e-05, "loss": 0.8988, "step": 5024 }, { "epoch": 0.4499412390172925, "grad_norm": 0.9527657137557682, "learning_rate": 1.2092921129675806e-05, "loss": 0.9084, "step": 5025 }, { "epoch": 0.4500307795623706, "grad_norm": 0.8756618404720549, "learning_rate": 1.209008498526707e-05, "loss": 0.8439, "step": 5026 }, { "epoch": 0.4501203201074487, "grad_norm": 1.1309710970522686, "learning_rate": 1.20872486650472e-05, "loss": 0.8143, "step": 5027 }, { "epoch": 0.4502098606525267, "grad_norm": 0.9583075346596197, "learning_rate": 1.2084412169254776e-05, "loss": 0.7868, "step": 5028 }, { "epoch": 0.4502994011976048, "grad_norm": 0.8817625853100305, "learning_rate": 1.2081575498128389e-05, "loss": 0.8208, "step": 5029 }, { "epoch": 0.45038894174268285, "grad_norm": 1.0500416547906286, "learning_rate": 1.2078738651906657e-05, "loss": 0.8986, "step": 5030 }, { "epoch": 0.45047848228776094, "grad_norm": 0.9426854713842331, "learning_rate": 1.2075901630828201e-05, "loss": 0.8589, "step": 5031 }, { "epoch": 0.450568022832839, "grad_norm": 0.8786347066571443, "learning_rate": 1.2073064435131665e-05, "loss": 0.8035, "step": 5032 }, { "epoch": 0.45065756337791707, "grad_norm": 1.085281747249534, "learning_rate": 1.2070227065055707e-05, "loss": 0.8606, "step": 5033 }, { "epoch": 0.4507471039229951, "grad_norm": 0.8858325152944133, "learning_rate": 1.2067389520838993e-05, "loss": 0.8499, "step": 5034 }, { "epoch": 0.4508366444680732, "grad_norm": 0.978484079414316, "learning_rate": 1.2064551802720206e-05, "loss": 0.813, "step": 5035 }, { "epoch": 0.4509261850131513, "grad_norm": 0.9669722128065028, "learning_rate": 1.2061713910938055e-05, "loss": 0.8472, "step": 5036 }, { "epoch": 0.45101572555822933, "grad_norm": 0.9185166457149108, "learning_rate": 1.2058875845731246e-05, "loss": 0.8386, "step": 5037 }, { "epoch": 0.4511052661033074, "grad_norm": 0.8713806588817347, "learning_rate": 1.2056037607338507e-05, "loss": 0.7731, "step": 5038 }, { "epoch": 0.45119480664838546, "grad_norm": 0.9426875708284569, "learning_rate": 1.2053199195998588e-05, "loss": 0.8327, "step": 5039 }, { "epoch": 0.45128434719346355, "grad_norm": 1.0103196506640122, "learning_rate": 1.2050360611950245e-05, "loss": 0.8511, "step": 5040 }, { "epoch": 0.4513738877385416, "grad_norm": 0.8751700931689197, "learning_rate": 1.2047521855432245e-05, "loss": 0.807, "step": 5041 }, { "epoch": 0.4514634282836197, "grad_norm": 0.9615425688301869, "learning_rate": 1.204468292668338e-05, "loss": 0.836, "step": 5042 }, { "epoch": 0.4515529688286977, "grad_norm": 1.0200752562956759, "learning_rate": 1.2041843825942452e-05, "loss": 0.8344, "step": 5043 }, { "epoch": 0.4516425093737758, "grad_norm": 1.0404064713980927, "learning_rate": 1.2039004553448272e-05, "loss": 0.8832, "step": 5044 }, { "epoch": 0.4517320499188539, "grad_norm": 1.1332377058641891, "learning_rate": 1.2036165109439675e-05, "loss": 0.8583, "step": 5045 }, { "epoch": 0.45182159046393194, "grad_norm": 0.8467009664062819, "learning_rate": 1.2033325494155505e-05, "loss": 0.7623, "step": 5046 }, { "epoch": 0.45191113100901004, "grad_norm": 0.8533850006801387, "learning_rate": 1.203048570783462e-05, "loss": 0.8314, "step": 5047 }, { "epoch": 0.4520006715540881, "grad_norm": 0.8891812973518456, "learning_rate": 1.2027645750715894e-05, "loss": 0.8855, "step": 5048 }, { "epoch": 0.45209021209916617, "grad_norm": 0.9720290914438073, "learning_rate": 1.2024805623038214e-05, "loss": 0.8662, "step": 5049 }, { "epoch": 0.4521797526442442, "grad_norm": 0.952712247465624, "learning_rate": 1.2021965325040483e-05, "loss": 0.8539, "step": 5050 }, { "epoch": 0.4522692931893223, "grad_norm": 0.9706085889264378, "learning_rate": 1.2019124856961619e-05, "loss": 0.8322, "step": 5051 }, { "epoch": 0.45235883373440033, "grad_norm": 0.9439906860264836, "learning_rate": 1.2016284219040555e-05, "loss": 0.8252, "step": 5052 }, { "epoch": 0.4524483742794784, "grad_norm": 1.1548567721346144, "learning_rate": 1.2013443411516228e-05, "loss": 0.8043, "step": 5053 }, { "epoch": 0.4525379148245565, "grad_norm": 1.0843588209208894, "learning_rate": 1.2010602434627603e-05, "loss": 0.891, "step": 5054 }, { "epoch": 0.45262745536963456, "grad_norm": 0.991124332534902, "learning_rate": 1.2007761288613655e-05, "loss": 0.8852, "step": 5055 }, { "epoch": 0.45271699591471265, "grad_norm": 1.0093878302310264, "learning_rate": 1.200491997371337e-05, "loss": 0.8069, "step": 5056 }, { "epoch": 0.4528065364597907, "grad_norm": 1.2559665885680094, "learning_rate": 1.2002078490165747e-05, "loss": 0.8714, "step": 5057 }, { "epoch": 0.4528960770048688, "grad_norm": 1.0262695295007849, "learning_rate": 1.1999236838209812e-05, "loss": 0.8129, "step": 5058 }, { "epoch": 0.4529856175499468, "grad_norm": 0.8968808186122779, "learning_rate": 1.1996395018084581e-05, "loss": 0.8495, "step": 5059 }, { "epoch": 0.4530751580950249, "grad_norm": 0.9031510893666217, "learning_rate": 1.1993553030029115e-05, "loss": 0.8235, "step": 5060 }, { "epoch": 0.45316469864010295, "grad_norm": 0.8860369160878107, "learning_rate": 1.199071087428246e-05, "loss": 0.8462, "step": 5061 }, { "epoch": 0.45325423918518104, "grad_norm": 0.8163072797688293, "learning_rate": 1.1987868551083693e-05, "loss": 0.8635, "step": 5062 }, { "epoch": 0.45334377973025913, "grad_norm": 0.9075454864860621, "learning_rate": 1.1985026060671903e-05, "loss": 0.8916, "step": 5063 }, { "epoch": 0.45343332027533717, "grad_norm": 1.0349385187091114, "learning_rate": 1.1982183403286186e-05, "loss": 0.8544, "step": 5064 }, { "epoch": 0.45352286082041526, "grad_norm": 0.9843618909773673, "learning_rate": 1.1979340579165664e-05, "loss": 0.853, "step": 5065 }, { "epoch": 0.4536124013654933, "grad_norm": 1.104034288122838, "learning_rate": 1.1976497588549462e-05, "loss": 0.8408, "step": 5066 }, { "epoch": 0.4537019419105714, "grad_norm": 0.9659715076829284, "learning_rate": 1.1973654431676724e-05, "loss": 0.8183, "step": 5067 }, { "epoch": 0.45379148245564943, "grad_norm": 0.8930634387351026, "learning_rate": 1.1970811108786604e-05, "loss": 0.855, "step": 5068 }, { "epoch": 0.4538810230007275, "grad_norm": 0.9434983981650649, "learning_rate": 1.196796762011828e-05, "loss": 0.814, "step": 5069 }, { "epoch": 0.45397056354580556, "grad_norm": 0.8963944008765798, "learning_rate": 1.196512396591093e-05, "loss": 0.8658, "step": 5070 }, { "epoch": 0.45406010409088365, "grad_norm": 1.0549202590809732, "learning_rate": 1.1962280146403757e-05, "loss": 0.8881, "step": 5071 }, { "epoch": 0.45414964463596175, "grad_norm": 0.9763210000438982, "learning_rate": 1.1959436161835971e-05, "loss": 0.8121, "step": 5072 }, { "epoch": 0.4542391851810398, "grad_norm": 0.8721554302357839, "learning_rate": 1.1956592012446802e-05, "loss": 0.8405, "step": 5073 }, { "epoch": 0.4543287257261179, "grad_norm": 0.9252083745266856, "learning_rate": 1.1953747698475488e-05, "loss": 0.8765, "step": 5074 }, { "epoch": 0.4544182662711959, "grad_norm": 0.8973514272673419, "learning_rate": 1.1950903220161286e-05, "loss": 0.8193, "step": 5075 }, { "epoch": 0.454507806816274, "grad_norm": 0.9574005373761372, "learning_rate": 1.194805857774346e-05, "loss": 0.862, "step": 5076 }, { "epoch": 0.45459734736135204, "grad_norm": 0.8591463346459571, "learning_rate": 1.1945213771461295e-05, "loss": 0.8689, "step": 5077 }, { "epoch": 0.45468688790643014, "grad_norm": 1.0080441587426137, "learning_rate": 1.1942368801554087e-05, "loss": 0.8481, "step": 5078 }, { "epoch": 0.4547764284515082, "grad_norm": 0.8771598341755856, "learning_rate": 1.1939523668261144e-05, "loss": 0.8337, "step": 5079 }, { "epoch": 0.45486596899658627, "grad_norm": 0.9629426693749846, "learning_rate": 1.1936678371821792e-05, "loss": 0.872, "step": 5080 }, { "epoch": 0.45495550954166436, "grad_norm": 0.949637300793534, "learning_rate": 1.1933832912475365e-05, "loss": 0.8725, "step": 5081 }, { "epoch": 0.4550450500867424, "grad_norm": 0.9462173293029135, "learning_rate": 1.1930987290461217e-05, "loss": 0.8229, "step": 5082 }, { "epoch": 0.4551345906318205, "grad_norm": 1.1939740197640527, "learning_rate": 1.1928141506018708e-05, "loss": 0.879, "step": 5083 }, { "epoch": 0.4552241311768985, "grad_norm": 1.066220151447293, "learning_rate": 1.1925295559387222e-05, "loss": 0.8385, "step": 5084 }, { "epoch": 0.4553136717219766, "grad_norm": 0.9959461896673916, "learning_rate": 1.1922449450806148e-05, "loss": 0.8122, "step": 5085 }, { "epoch": 0.45540321226705466, "grad_norm": 0.8973387329768747, "learning_rate": 1.1919603180514888e-05, "loss": 0.7462, "step": 5086 }, { "epoch": 0.45549275281213275, "grad_norm": 1.2050414135735144, "learning_rate": 1.1916756748752862e-05, "loss": 0.8801, "step": 5087 }, { "epoch": 0.4555822933572108, "grad_norm": 1.0826626837115767, "learning_rate": 1.191391015575951e-05, "loss": 0.8144, "step": 5088 }, { "epoch": 0.4556718339022889, "grad_norm": 0.9510047669372982, "learning_rate": 1.1911063401774268e-05, "loss": 0.8414, "step": 5089 }, { "epoch": 0.455761374447367, "grad_norm": 0.9026229596587412, "learning_rate": 1.1908216487036602e-05, "loss": 0.8454, "step": 5090 }, { "epoch": 0.455850914992445, "grad_norm": 0.9845568415979884, "learning_rate": 1.1905369411785985e-05, "loss": 0.7829, "step": 5091 }, { "epoch": 0.4559404555375231, "grad_norm": 2.0467374846165822, "learning_rate": 1.1902522176261897e-05, "loss": 0.8519, "step": 5092 }, { "epoch": 0.45602999608260114, "grad_norm": 0.904289742905037, "learning_rate": 1.189967478070385e-05, "loss": 0.7921, "step": 5093 }, { "epoch": 0.45611953662767923, "grad_norm": 1.0154336909027895, "learning_rate": 1.1896827225351347e-05, "loss": 0.919, "step": 5094 }, { "epoch": 0.45620907717275727, "grad_norm": 0.9394985334954377, "learning_rate": 1.1893979510443918e-05, "loss": 0.8153, "step": 5095 }, { "epoch": 0.45629861771783536, "grad_norm": 0.9789784836354222, "learning_rate": 1.1891131636221107e-05, "loss": 0.8504, "step": 5096 }, { "epoch": 0.4563881582629134, "grad_norm": 0.84521230857603, "learning_rate": 1.1888283602922466e-05, "loss": 0.8045, "step": 5097 }, { "epoch": 0.4564776988079915, "grad_norm": 0.9077526319778918, "learning_rate": 1.1885435410787558e-05, "loss": 0.8424, "step": 5098 }, { "epoch": 0.4565672393530696, "grad_norm": 0.9812593693485887, "learning_rate": 1.188258706005597e-05, "loss": 0.8637, "step": 5099 }, { "epoch": 0.4566567798981476, "grad_norm": 1.1596095294244895, "learning_rate": 1.1879738550967295e-05, "loss": 0.7684, "step": 5100 }, { "epoch": 0.4567463204432257, "grad_norm": 0.9701778638188038, "learning_rate": 1.1876889883761136e-05, "loss": 0.8413, "step": 5101 }, { "epoch": 0.45683586098830375, "grad_norm": 0.9801421940186926, "learning_rate": 1.1874041058677115e-05, "loss": 0.8595, "step": 5102 }, { "epoch": 0.45692540153338185, "grad_norm": 1.0641837438955042, "learning_rate": 1.187119207595487e-05, "loss": 0.8564, "step": 5103 }, { "epoch": 0.4570149420784599, "grad_norm": 0.934191559272803, "learning_rate": 1.1868342935834043e-05, "loss": 0.7732, "step": 5104 }, { "epoch": 0.457104482623538, "grad_norm": 0.9082685161884676, "learning_rate": 1.1865493638554298e-05, "loss": 0.8364, "step": 5105 }, { "epoch": 0.457194023168616, "grad_norm": 0.926208624429448, "learning_rate": 1.1862644184355307e-05, "loss": 0.8419, "step": 5106 }, { "epoch": 0.4572835637136941, "grad_norm": 0.9167973198404721, "learning_rate": 1.1859794573476757e-05, "loss": 0.8095, "step": 5107 }, { "epoch": 0.4573731042587722, "grad_norm": 1.2181386293302687, "learning_rate": 1.1856944806158348e-05, "loss": 0.8321, "step": 5108 }, { "epoch": 0.45746264480385024, "grad_norm": 1.3809273745703932, "learning_rate": 1.1854094882639792e-05, "loss": 0.8362, "step": 5109 }, { "epoch": 0.45755218534892833, "grad_norm": 0.8974751594210281, "learning_rate": 1.1851244803160818e-05, "loss": 0.8863, "step": 5110 }, { "epoch": 0.45764172589400637, "grad_norm": 1.1250431284468114, "learning_rate": 1.1848394567961163e-05, "loss": 0.8926, "step": 5111 }, { "epoch": 0.45773126643908446, "grad_norm": 0.9693129387220083, "learning_rate": 1.1845544177280581e-05, "loss": 0.8296, "step": 5112 }, { "epoch": 0.4578208069841625, "grad_norm": 1.082412697049325, "learning_rate": 1.1842693631358835e-05, "loss": 0.8589, "step": 5113 }, { "epoch": 0.4579103475292406, "grad_norm": 0.9978118112796628, "learning_rate": 1.1839842930435707e-05, "loss": 0.8798, "step": 5114 }, { "epoch": 0.45799988807431863, "grad_norm": 0.963276683881721, "learning_rate": 1.183699207475099e-05, "loss": 0.8816, "step": 5115 }, { "epoch": 0.4580894286193967, "grad_norm": 0.9160337842433388, "learning_rate": 1.183414106454448e-05, "loss": 0.879, "step": 5116 }, { "epoch": 0.4581789691644748, "grad_norm": 0.9436601993934784, "learning_rate": 1.1831289900056005e-05, "loss": 0.8024, "step": 5117 }, { "epoch": 0.45826850970955285, "grad_norm": 1.0221115122408104, "learning_rate": 1.182843858152539e-05, "loss": 0.8323, "step": 5118 }, { "epoch": 0.45835805025463094, "grad_norm": 0.9255867967578555, "learning_rate": 1.1825587109192478e-05, "loss": 0.8009, "step": 5119 }, { "epoch": 0.458447590799709, "grad_norm": 0.9910793504294045, "learning_rate": 1.182273548329713e-05, "loss": 0.8418, "step": 5120 }, { "epoch": 0.4585371313447871, "grad_norm": 1.0377871238486296, "learning_rate": 1.1819883704079214e-05, "loss": 0.8156, "step": 5121 }, { "epoch": 0.4586266718898651, "grad_norm": 0.9929060557856917, "learning_rate": 1.1817031771778607e-05, "loss": 0.8313, "step": 5122 }, { "epoch": 0.4587162124349432, "grad_norm": 1.033280629583944, "learning_rate": 1.1814179686635213e-05, "loss": 0.8734, "step": 5123 }, { "epoch": 0.45880575298002124, "grad_norm": 0.9225136025281566, "learning_rate": 1.1811327448888933e-05, "loss": 0.8464, "step": 5124 }, { "epoch": 0.45889529352509933, "grad_norm": 1.1199092820297125, "learning_rate": 1.180847505877969e-05, "loss": 0.8247, "step": 5125 }, { "epoch": 0.4589848340701774, "grad_norm": 0.9189131256374843, "learning_rate": 1.180562251654742e-05, "loss": 0.8147, "step": 5126 }, { "epoch": 0.45907437461525546, "grad_norm": 1.076456297574399, "learning_rate": 1.1802769822432068e-05, "loss": 0.8545, "step": 5127 }, { "epoch": 0.45916391516033356, "grad_norm": 1.0370810627361717, "learning_rate": 1.1799916976673589e-05, "loss": 0.893, "step": 5128 }, { "epoch": 0.4592534557054116, "grad_norm": 1.1895294582205767, "learning_rate": 1.1797063979511964e-05, "loss": 0.732, "step": 5129 }, { "epoch": 0.4593429962504897, "grad_norm": 1.0168987893379378, "learning_rate": 1.1794210831187174e-05, "loss": 0.8667, "step": 5130 }, { "epoch": 0.4594325367955677, "grad_norm": 0.8843046531508095, "learning_rate": 1.1791357531939211e-05, "loss": 0.7884, "step": 5131 }, { "epoch": 0.4595220773406458, "grad_norm": 0.9614994258948922, "learning_rate": 1.1788504082008093e-05, "loss": 0.9285, "step": 5132 }, { "epoch": 0.45961161788572386, "grad_norm": 0.9687012149307961, "learning_rate": 1.1785650481633841e-05, "loss": 0.8307, "step": 5133 }, { "epoch": 0.45970115843080195, "grad_norm": 1.0292601584866816, "learning_rate": 1.1782796731056487e-05, "loss": 0.8554, "step": 5134 }, { "epoch": 0.45979069897588004, "grad_norm": 0.9430173864660034, "learning_rate": 1.1779942830516083e-05, "loss": 0.8545, "step": 5135 }, { "epoch": 0.4598802395209581, "grad_norm": 1.0922230750751905, "learning_rate": 1.1777088780252688e-05, "loss": 0.7958, "step": 5136 }, { "epoch": 0.45996978006603617, "grad_norm": 0.905540854383264, "learning_rate": 1.1774234580506374e-05, "loss": 0.884, "step": 5137 }, { "epoch": 0.4600593206111142, "grad_norm": 0.9490256894100174, "learning_rate": 1.1771380231517231e-05, "loss": 0.8821, "step": 5138 }, { "epoch": 0.4601488611561923, "grad_norm": 0.9087771187164633, "learning_rate": 1.1768525733525356e-05, "loss": 0.8391, "step": 5139 }, { "epoch": 0.46023840170127034, "grad_norm": 0.8544891362161957, "learning_rate": 1.1765671086770855e-05, "loss": 0.8323, "step": 5140 }, { "epoch": 0.46032794224634843, "grad_norm": 0.8918519262694055, "learning_rate": 1.1762816291493862e-05, "loss": 0.8632, "step": 5141 }, { "epoch": 0.46041748279142647, "grad_norm": 0.9406230848093153, "learning_rate": 1.1759961347934505e-05, "loss": 0.7528, "step": 5142 }, { "epoch": 0.46050702333650456, "grad_norm": 0.9507773370465508, "learning_rate": 1.175710625633293e-05, "loss": 0.8197, "step": 5143 }, { "epoch": 0.46059656388158265, "grad_norm": 1.0554815453727129, "learning_rate": 1.1754251016929308e-05, "loss": 0.8339, "step": 5144 }, { "epoch": 0.4606861044266607, "grad_norm": 0.9793904364171417, "learning_rate": 1.1751395629963806e-05, "loss": 0.8699, "step": 5145 }, { "epoch": 0.4607756449717388, "grad_norm": 0.8967591221463577, "learning_rate": 1.1748540095676609e-05, "loss": 0.8494, "step": 5146 }, { "epoch": 0.4608651855168168, "grad_norm": 1.0052731232315066, "learning_rate": 1.1745684414307919e-05, "loss": 0.8263, "step": 5147 }, { "epoch": 0.4609547260618949, "grad_norm": 1.0766045531543664, "learning_rate": 1.1742828586097945e-05, "loss": 0.8371, "step": 5148 }, { "epoch": 0.46104426660697295, "grad_norm": 0.9870349156594975, "learning_rate": 1.1739972611286908e-05, "loss": 0.8153, "step": 5149 }, { "epoch": 0.46113380715205105, "grad_norm": 1.1063983806107454, "learning_rate": 1.1737116490115046e-05, "loss": 0.8194, "step": 5150 }, { "epoch": 0.4612233476971291, "grad_norm": 1.2037774365506213, "learning_rate": 1.1734260222822606e-05, "loss": 0.8096, "step": 5151 }, { "epoch": 0.4613128882422072, "grad_norm": 0.9391945356514018, "learning_rate": 1.1731403809649847e-05, "loss": 0.855, "step": 5152 }, { "epoch": 0.46140242878728527, "grad_norm": 0.9320474389705087, "learning_rate": 1.1728547250837042e-05, "loss": 0.879, "step": 5153 }, { "epoch": 0.4614919693323633, "grad_norm": 0.9235509884755432, "learning_rate": 1.1725690546624475e-05, "loss": 0.831, "step": 5154 }, { "epoch": 0.4615815098774414, "grad_norm": 1.0247354495996444, "learning_rate": 1.172283369725244e-05, "loss": 0.8569, "step": 5155 }, { "epoch": 0.46167105042251944, "grad_norm": 0.9262693555228466, "learning_rate": 1.1719976702961253e-05, "loss": 0.8696, "step": 5156 }, { "epoch": 0.46176059096759753, "grad_norm": 0.8628726572076715, "learning_rate": 1.1717119563991228e-05, "loss": 0.8643, "step": 5157 }, { "epoch": 0.46185013151267557, "grad_norm": 0.9568026555805302, "learning_rate": 1.1714262280582703e-05, "loss": 0.8583, "step": 5158 }, { "epoch": 0.46193967205775366, "grad_norm": 0.8971088630736636, "learning_rate": 1.1711404852976019e-05, "loss": 0.8681, "step": 5159 }, { "epoch": 0.4620292126028317, "grad_norm": 0.8600036959318377, "learning_rate": 1.1708547281411535e-05, "loss": 0.8564, "step": 5160 }, { "epoch": 0.4621187531479098, "grad_norm": 0.9427869977381004, "learning_rate": 1.1705689566129624e-05, "loss": 0.8509, "step": 5161 }, { "epoch": 0.4622082936929879, "grad_norm": 0.951829353340297, "learning_rate": 1.1702831707370662e-05, "loss": 0.8261, "step": 5162 }, { "epoch": 0.4622978342380659, "grad_norm": 0.9416540892702926, "learning_rate": 1.169997370537505e-05, "loss": 0.8263, "step": 5163 }, { "epoch": 0.462387374783144, "grad_norm": 0.9606430843275499, "learning_rate": 1.1697115560383186e-05, "loss": 0.8713, "step": 5164 }, { "epoch": 0.46247691532822205, "grad_norm": 1.0073479095188298, "learning_rate": 1.1694257272635494e-05, "loss": 0.7987, "step": 5165 }, { "epoch": 0.46256645587330014, "grad_norm": 1.290285035991937, "learning_rate": 1.1691398842372398e-05, "loss": 0.8418, "step": 5166 }, { "epoch": 0.4626559964183782, "grad_norm": 0.903550390064754, "learning_rate": 1.1688540269834346e-05, "loss": 0.8542, "step": 5167 }, { "epoch": 0.46274553696345627, "grad_norm": 0.9029447663126926, "learning_rate": 1.1685681555261788e-05, "loss": 0.7914, "step": 5168 }, { "epoch": 0.4628350775085343, "grad_norm": 0.9613588116446969, "learning_rate": 1.168282269889519e-05, "loss": 0.8754, "step": 5169 }, { "epoch": 0.4629246180536124, "grad_norm": 2.1611118205773168, "learning_rate": 1.1679963700975031e-05, "loss": 0.8822, "step": 5170 }, { "epoch": 0.4630141585986905, "grad_norm": 0.9144672855589865, "learning_rate": 1.1677104561741801e-05, "loss": 0.828, "step": 5171 }, { "epoch": 0.46310369914376853, "grad_norm": 0.9906521946167535, "learning_rate": 1.1674245281436001e-05, "loss": 0.8563, "step": 5172 }, { "epoch": 0.4631932396888466, "grad_norm": 0.9277476759628505, "learning_rate": 1.1671385860298141e-05, "loss": 0.8661, "step": 5173 }, { "epoch": 0.46328278023392466, "grad_norm": 0.9793713132605708, "learning_rate": 1.166852629856875e-05, "loss": 0.8088, "step": 5174 }, { "epoch": 0.46337232077900276, "grad_norm": 0.9893867605180711, "learning_rate": 1.1665666596488368e-05, "loss": 0.857, "step": 5175 }, { "epoch": 0.4634618613240808, "grad_norm": 0.9843420190742015, "learning_rate": 1.1662806754297533e-05, "loss": 0.8846, "step": 5176 }, { "epoch": 0.4635514018691589, "grad_norm": 0.9594780492778098, "learning_rate": 1.1659946772236817e-05, "loss": 0.8368, "step": 5177 }, { "epoch": 0.4636409424142369, "grad_norm": 0.8638476420391954, "learning_rate": 1.1657086650546788e-05, "loss": 0.8056, "step": 5178 }, { "epoch": 0.463730482959315, "grad_norm": 1.0030252752936568, "learning_rate": 1.1654226389468026e-05, "loss": 0.9096, "step": 5179 }, { "epoch": 0.4638200235043931, "grad_norm": 0.9642909848922914, "learning_rate": 1.1651365989241132e-05, "loss": 0.8064, "step": 5180 }, { "epoch": 0.46390956404947115, "grad_norm": 0.947620777082785, "learning_rate": 1.1648505450106716e-05, "loss": 0.8621, "step": 5181 }, { "epoch": 0.46399910459454924, "grad_norm": 0.8862060660325074, "learning_rate": 1.164564477230539e-05, "loss": 0.8082, "step": 5182 }, { "epoch": 0.4640886451396273, "grad_norm": 1.0577093037378118, "learning_rate": 1.164278395607779e-05, "loss": 0.9186, "step": 5183 }, { "epoch": 0.46417818568470537, "grad_norm": 1.0455588028285767, "learning_rate": 1.1639923001664557e-05, "loss": 0.88, "step": 5184 }, { "epoch": 0.4642677262297834, "grad_norm": 1.0200264244446493, "learning_rate": 1.1637061909306344e-05, "loss": 0.8856, "step": 5185 }, { "epoch": 0.4643572667748615, "grad_norm": 0.9924458646803535, "learning_rate": 1.1634200679243816e-05, "loss": 0.8404, "step": 5186 }, { "epoch": 0.46444680731993954, "grad_norm": 0.9239883304364668, "learning_rate": 1.1631339311717655e-05, "loss": 0.8553, "step": 5187 }, { "epoch": 0.46453634786501763, "grad_norm": 1.1516541175943453, "learning_rate": 1.1628477806968547e-05, "loss": 0.8624, "step": 5188 }, { "epoch": 0.4646258884100957, "grad_norm": 0.8792909404843124, "learning_rate": 1.1625616165237193e-05, "loss": 0.787, "step": 5189 }, { "epoch": 0.46471542895517376, "grad_norm": 0.8970280224538852, "learning_rate": 1.1622754386764303e-05, "loss": 0.8196, "step": 5190 }, { "epoch": 0.46480496950025185, "grad_norm": 0.9805322893003451, "learning_rate": 1.1619892471790604e-05, "loss": 0.8214, "step": 5191 }, { "epoch": 0.4648945100453299, "grad_norm": 0.9689784369555546, "learning_rate": 1.1617030420556828e-05, "loss": 0.8824, "step": 5192 }, { "epoch": 0.464984050590408, "grad_norm": 1.0666476611090014, "learning_rate": 1.1614168233303721e-05, "loss": 0.8795, "step": 5193 }, { "epoch": 0.465073591135486, "grad_norm": 0.9748989040614077, "learning_rate": 1.1611305910272046e-05, "loss": 0.8099, "step": 5194 }, { "epoch": 0.4651631316805641, "grad_norm": 0.9859242649711023, "learning_rate": 1.1608443451702565e-05, "loss": 0.8432, "step": 5195 }, { "epoch": 0.46525267222564215, "grad_norm": 0.9022376373477163, "learning_rate": 1.1605580857836063e-05, "loss": 0.7724, "step": 5196 }, { "epoch": 0.46534221277072024, "grad_norm": 0.9676367478826834, "learning_rate": 1.1602718128913333e-05, "loss": 0.8558, "step": 5197 }, { "epoch": 0.46543175331579834, "grad_norm": 0.919082370067064, "learning_rate": 1.1599855265175174e-05, "loss": 0.8599, "step": 5198 }, { "epoch": 0.4655212938608764, "grad_norm": 0.984967753640184, "learning_rate": 1.1596992266862408e-05, "loss": 0.875, "step": 5199 }, { "epoch": 0.46561083440595447, "grad_norm": 0.8645127895422162, "learning_rate": 1.1594129134215852e-05, "loss": 0.8127, "step": 5200 }, { "epoch": 0.4657003749510325, "grad_norm": 0.9754446044729783, "learning_rate": 1.1591265867476351e-05, "loss": 0.8826, "step": 5201 }, { "epoch": 0.4657899154961106, "grad_norm": 0.8628398572804993, "learning_rate": 1.1588402466884751e-05, "loss": 0.8535, "step": 5202 }, { "epoch": 0.46587945604118863, "grad_norm": 1.0215292997433247, "learning_rate": 1.1585538932681909e-05, "loss": 0.8635, "step": 5203 }, { "epoch": 0.4659689965862667, "grad_norm": 0.8733754395936902, "learning_rate": 1.15826752651087e-05, "loss": 0.8165, "step": 5204 }, { "epoch": 0.46605853713134476, "grad_norm": 1.3161709859107618, "learning_rate": 1.1579811464406005e-05, "loss": 0.8091, "step": 5205 }, { "epoch": 0.46614807767642286, "grad_norm": 1.006162528928787, "learning_rate": 1.1576947530814717e-05, "loss": 0.8355, "step": 5206 }, { "epoch": 0.46623761822150095, "grad_norm": 0.915255026807611, "learning_rate": 1.1574083464575744e-05, "loss": 0.849, "step": 5207 }, { "epoch": 0.466327158766579, "grad_norm": 0.9330250055520248, "learning_rate": 1.1571219265929997e-05, "loss": 0.8982, "step": 5208 }, { "epoch": 0.4664166993116571, "grad_norm": 0.9291402823972272, "learning_rate": 1.1568354935118407e-05, "loss": 0.7836, "step": 5209 }, { "epoch": 0.4665062398567351, "grad_norm": 1.0190295482150298, "learning_rate": 1.1565490472381908e-05, "loss": 0.825, "step": 5210 }, { "epoch": 0.4665957804018132, "grad_norm": 0.8710376182816013, "learning_rate": 1.1562625877961458e-05, "loss": 0.805, "step": 5211 }, { "epoch": 0.46668532094689125, "grad_norm": 0.9739225021879631, "learning_rate": 1.1559761152098005e-05, "loss": 0.832, "step": 5212 }, { "epoch": 0.46677486149196934, "grad_norm": 0.9713575139842687, "learning_rate": 1.1556896295032531e-05, "loss": 0.8717, "step": 5213 }, { "epoch": 0.4668644020370474, "grad_norm": 0.9989192148678726, "learning_rate": 1.155403130700601e-05, "loss": 0.8226, "step": 5214 }, { "epoch": 0.46695394258212547, "grad_norm": 0.9551333948031926, "learning_rate": 1.1551166188259445e-05, "loss": 0.8249, "step": 5215 }, { "epoch": 0.46704348312720356, "grad_norm": 0.9115808532065003, "learning_rate": 1.154830093903383e-05, "loss": 0.8501, "step": 5216 }, { "epoch": 0.4671330236722816, "grad_norm": 1.0123276888428039, "learning_rate": 1.1545435559570186e-05, "loss": 0.865, "step": 5217 }, { "epoch": 0.4672225642173597, "grad_norm": 0.97820114146102, "learning_rate": 1.154257005010954e-05, "loss": 0.8277, "step": 5218 }, { "epoch": 0.46731210476243773, "grad_norm": 1.1312219882918144, "learning_rate": 1.153970441089293e-05, "loss": 0.8106, "step": 5219 }, { "epoch": 0.4674016453075158, "grad_norm": 0.9306810239962637, "learning_rate": 1.1536838642161398e-05, "loss": 0.8438, "step": 5220 }, { "epoch": 0.46749118585259386, "grad_norm": 0.8812904992510471, "learning_rate": 1.1533972744156012e-05, "loss": 0.8261, "step": 5221 }, { "epoch": 0.46758072639767195, "grad_norm": 0.8968564150309098, "learning_rate": 1.1531106717117834e-05, "loss": 0.8034, "step": 5222 }, { "epoch": 0.46767026694275, "grad_norm": 0.9611065536353001, "learning_rate": 1.1528240561287951e-05, "loss": 0.9246, "step": 5223 }, { "epoch": 0.4677598074878281, "grad_norm": 0.9497328001822333, "learning_rate": 1.1525374276907451e-05, "loss": 0.8114, "step": 5224 }, { "epoch": 0.4678493480329062, "grad_norm": 0.8867585082037666, "learning_rate": 1.1522507864217438e-05, "loss": 0.8775, "step": 5225 }, { "epoch": 0.4679388885779842, "grad_norm": 0.9621814364210087, "learning_rate": 1.1519641323459024e-05, "loss": 0.7805, "step": 5226 }, { "epoch": 0.4680284291230623, "grad_norm": 1.030873953249709, "learning_rate": 1.1516774654873335e-05, "loss": 0.8962, "step": 5227 }, { "epoch": 0.46811796966814034, "grad_norm": 1.0035366362948042, "learning_rate": 1.1513907858701503e-05, "loss": 0.7912, "step": 5228 }, { "epoch": 0.46820751021321844, "grad_norm": 0.9992846537933064, "learning_rate": 1.1511040935184676e-05, "loss": 0.8769, "step": 5229 }, { "epoch": 0.4682970507582965, "grad_norm": 0.9545721801065259, "learning_rate": 1.150817388456401e-05, "loss": 0.8308, "step": 5230 }, { "epoch": 0.46838659130337457, "grad_norm": 0.9945648840769662, "learning_rate": 1.1505306707080673e-05, "loss": 0.8391, "step": 5231 }, { "epoch": 0.4684761318484526, "grad_norm": 0.990723166997344, "learning_rate": 1.1502439402975842e-05, "loss": 0.8269, "step": 5232 }, { "epoch": 0.4685656723935307, "grad_norm": 0.9558470924019299, "learning_rate": 1.14995719724907e-05, "loss": 0.7981, "step": 5233 }, { "epoch": 0.4686552129386088, "grad_norm": 0.9746648704838762, "learning_rate": 1.149670441586645e-05, "loss": 0.8895, "step": 5234 }, { "epoch": 0.4687447534836868, "grad_norm": 0.9025191809931887, "learning_rate": 1.1493836733344307e-05, "loss": 0.8466, "step": 5235 }, { "epoch": 0.4688342940287649, "grad_norm": 0.92594171384849, "learning_rate": 1.1490968925165482e-05, "loss": 0.844, "step": 5236 }, { "epoch": 0.46892383457384296, "grad_norm": 0.9293577437869477, "learning_rate": 1.1488100991571211e-05, "loss": 0.8687, "step": 5237 }, { "epoch": 0.46901337511892105, "grad_norm": 0.9521795691404596, "learning_rate": 1.1485232932802737e-05, "loss": 0.8584, "step": 5238 }, { "epoch": 0.4691029156639991, "grad_norm": 0.9629467247553939, "learning_rate": 1.1482364749101305e-05, "loss": 0.8044, "step": 5239 }, { "epoch": 0.4691924562090772, "grad_norm": 1.0842044339258565, "learning_rate": 1.147949644070818e-05, "loss": 0.8668, "step": 5240 }, { "epoch": 0.4692819967541552, "grad_norm": 0.946594935878108, "learning_rate": 1.147662800786464e-05, "loss": 0.8767, "step": 5241 }, { "epoch": 0.4693715372992333, "grad_norm": 1.0023315680115483, "learning_rate": 1.147375945081196e-05, "loss": 0.8026, "step": 5242 }, { "epoch": 0.4694610778443114, "grad_norm": 1.1560960046529032, "learning_rate": 1.1470890769791438e-05, "loss": 0.8832, "step": 5243 }, { "epoch": 0.46955061838938944, "grad_norm": 0.9770830932469063, "learning_rate": 1.1468021965044378e-05, "loss": 0.9262, "step": 5244 }, { "epoch": 0.46964015893446753, "grad_norm": 0.9627355111775144, "learning_rate": 1.1465153036812094e-05, "loss": 0.8343, "step": 5245 }, { "epoch": 0.46972969947954557, "grad_norm": 1.088746897026918, "learning_rate": 1.1462283985335911e-05, "loss": 0.8421, "step": 5246 }, { "epoch": 0.46981924002462366, "grad_norm": 0.9432298660556239, "learning_rate": 1.1459414810857164e-05, "loss": 0.8156, "step": 5247 }, { "epoch": 0.4699087805697017, "grad_norm": 0.902636627672977, "learning_rate": 1.1456545513617199e-05, "loss": 0.8539, "step": 5248 }, { "epoch": 0.4699983211147798, "grad_norm": 1.0367685513016893, "learning_rate": 1.1453676093857372e-05, "loss": 0.8088, "step": 5249 }, { "epoch": 0.47008786165985783, "grad_norm": 1.0050452151289615, "learning_rate": 1.145080655181905e-05, "loss": 0.8384, "step": 5250 }, { "epoch": 0.4701774022049359, "grad_norm": 0.9021852078817674, "learning_rate": 1.1447936887743607e-05, "loss": 0.8838, "step": 5251 }, { "epoch": 0.470266942750014, "grad_norm": 0.9414374375219461, "learning_rate": 1.1445067101872434e-05, "loss": 0.837, "step": 5252 }, { "epoch": 0.47035648329509205, "grad_norm": 0.845039856388987, "learning_rate": 1.1442197194446922e-05, "loss": 0.7847, "step": 5253 }, { "epoch": 0.47044602384017015, "grad_norm": 0.8964249772572265, "learning_rate": 1.1439327165708486e-05, "loss": 0.8417, "step": 5254 }, { "epoch": 0.4705355643852482, "grad_norm": 1.0019252989596703, "learning_rate": 1.1436457015898536e-05, "loss": 0.861, "step": 5255 }, { "epoch": 0.4706251049303263, "grad_norm": 0.8803237976162153, "learning_rate": 1.1433586745258503e-05, "loss": 0.8227, "step": 5256 }, { "epoch": 0.4707146454754043, "grad_norm": 0.994006604480196, "learning_rate": 1.1430716354029825e-05, "loss": 0.8874, "step": 5257 }, { "epoch": 0.4708041860204824, "grad_norm": 0.90857179307338, "learning_rate": 1.142784584245395e-05, "loss": 0.8868, "step": 5258 }, { "epoch": 0.47089372656556044, "grad_norm": 0.9197600515747429, "learning_rate": 1.1424975210772336e-05, "loss": 0.7741, "step": 5259 }, { "epoch": 0.47098326711063854, "grad_norm": 0.9763544796916975, "learning_rate": 1.1422104459226449e-05, "loss": 0.822, "step": 5260 }, { "epoch": 0.47107280765571663, "grad_norm": 0.9549436020294078, "learning_rate": 1.141923358805777e-05, "loss": 0.7983, "step": 5261 }, { "epoch": 0.47116234820079467, "grad_norm": 1.5787089199491313, "learning_rate": 1.1416362597507789e-05, "loss": 0.7917, "step": 5262 }, { "epoch": 0.47125188874587276, "grad_norm": 0.8864948746732515, "learning_rate": 1.1413491487817998e-05, "loss": 0.8122, "step": 5263 }, { "epoch": 0.4713414292909508, "grad_norm": 1.0090843716660998, "learning_rate": 1.141062025922991e-05, "loss": 0.8489, "step": 5264 }, { "epoch": 0.4714309698360289, "grad_norm": 0.9471400267410108, "learning_rate": 1.1407748911985045e-05, "loss": 0.8311, "step": 5265 }, { "epoch": 0.4715205103811069, "grad_norm": 0.9706015020485944, "learning_rate": 1.1404877446324928e-05, "loss": 0.8219, "step": 5266 }, { "epoch": 0.471610050926185, "grad_norm": 0.9396569204242291, "learning_rate": 1.1402005862491094e-05, "loss": 0.8228, "step": 5267 }, { "epoch": 0.47169959147126306, "grad_norm": 0.998623564237825, "learning_rate": 1.1399134160725103e-05, "loss": 0.8738, "step": 5268 }, { "epoch": 0.47178913201634115, "grad_norm": 1.0201159456173834, "learning_rate": 1.1396262341268503e-05, "loss": 0.8165, "step": 5269 }, { "epoch": 0.47187867256141924, "grad_norm": 1.186615338368212, "learning_rate": 1.139339040436286e-05, "loss": 0.8468, "step": 5270 }, { "epoch": 0.4719682131064973, "grad_norm": 0.9723645916864768, "learning_rate": 1.1390518350249762e-05, "loss": 0.819, "step": 5271 }, { "epoch": 0.4720577536515754, "grad_norm": 1.0045527476200615, "learning_rate": 1.138764617917079e-05, "loss": 0.748, "step": 5272 }, { "epoch": 0.4721472941966534, "grad_norm": 1.0257631215986733, "learning_rate": 1.1384773891367544e-05, "loss": 0.8178, "step": 5273 }, { "epoch": 0.4722368347417315, "grad_norm": 0.9091084448257286, "learning_rate": 1.138190148708163e-05, "loss": 0.846, "step": 5274 }, { "epoch": 0.47232637528680954, "grad_norm": 0.9087784936486357, "learning_rate": 1.1379028966554669e-05, "loss": 0.8278, "step": 5275 }, { "epoch": 0.47241591583188763, "grad_norm": 0.9196960278653166, "learning_rate": 1.1376156330028281e-05, "loss": 0.9109, "step": 5276 }, { "epoch": 0.47250545637696567, "grad_norm": 1.1089444181201233, "learning_rate": 1.137328357774411e-05, "loss": 0.8534, "step": 5277 }, { "epoch": 0.47259499692204376, "grad_norm": 0.9387743007259313, "learning_rate": 1.1370410709943798e-05, "loss": 0.789, "step": 5278 }, { "epoch": 0.47268453746712186, "grad_norm": 1.0193530898892347, "learning_rate": 1.1367537726869003e-05, "loss": 0.7974, "step": 5279 }, { "epoch": 0.4727740780121999, "grad_norm": 0.9508104504992084, "learning_rate": 1.1364664628761391e-05, "loss": 0.8522, "step": 5280 }, { "epoch": 0.472863618557278, "grad_norm": 0.8999457405036402, "learning_rate": 1.1361791415862637e-05, "loss": 0.812, "step": 5281 }, { "epoch": 0.472953159102356, "grad_norm": 0.9339050837264762, "learning_rate": 1.1358918088414427e-05, "loss": 0.7978, "step": 5282 }, { "epoch": 0.4730426996474341, "grad_norm": 0.9910159270876545, "learning_rate": 1.1356044646658455e-05, "loss": 0.8413, "step": 5283 }, { "epoch": 0.47313224019251215, "grad_norm": 1.0551357894416837, "learning_rate": 1.1353171090836427e-05, "loss": 0.8935, "step": 5284 }, { "epoch": 0.47322178073759025, "grad_norm": 0.861611819650731, "learning_rate": 1.1350297421190058e-05, "loss": 0.8156, "step": 5285 }, { "epoch": 0.4733113212826683, "grad_norm": 0.9325490434811302, "learning_rate": 1.1347423637961067e-05, "loss": 0.8779, "step": 5286 }, { "epoch": 0.4734008618277464, "grad_norm": 0.9469048301140901, "learning_rate": 1.1344549741391193e-05, "loss": 0.7747, "step": 5287 }, { "epoch": 0.47349040237282447, "grad_norm": 0.9306991540983031, "learning_rate": 1.1341675731722175e-05, "loss": 0.7733, "step": 5288 }, { "epoch": 0.4735799429179025, "grad_norm": 0.9949786844847933, "learning_rate": 1.1338801609195769e-05, "loss": 0.8305, "step": 5289 }, { "epoch": 0.4736694834629806, "grad_norm": 0.8816959185076279, "learning_rate": 1.133592737405373e-05, "loss": 0.8267, "step": 5290 }, { "epoch": 0.47375902400805864, "grad_norm": 0.9132957126578524, "learning_rate": 1.1333053026537837e-05, "loss": 0.835, "step": 5291 }, { "epoch": 0.47384856455313673, "grad_norm": 1.0847778536707446, "learning_rate": 1.1330178566889866e-05, "loss": 0.904, "step": 5292 }, { "epoch": 0.47393810509821477, "grad_norm": 0.9214369869536464, "learning_rate": 1.132730399535161e-05, "loss": 0.7884, "step": 5293 }, { "epoch": 0.47402764564329286, "grad_norm": 0.8969466418700005, "learning_rate": 1.1324429312164866e-05, "loss": 0.8416, "step": 5294 }, { "epoch": 0.4741171861883709, "grad_norm": 0.9569458733886527, "learning_rate": 1.1321554517571447e-05, "loss": 0.9044, "step": 5295 }, { "epoch": 0.474206726733449, "grad_norm": 0.8659255960461684, "learning_rate": 1.1318679611813166e-05, "loss": 0.8621, "step": 5296 }, { "epoch": 0.4742962672785271, "grad_norm": 0.9560804327918336, "learning_rate": 1.1315804595131849e-05, "loss": 0.8404, "step": 5297 }, { "epoch": 0.4743858078236051, "grad_norm": 0.9075687691457829, "learning_rate": 1.1312929467769345e-05, "loss": 0.8109, "step": 5298 }, { "epoch": 0.4744753483686832, "grad_norm": 0.9819848432450048, "learning_rate": 1.1310054229967488e-05, "loss": 0.8254, "step": 5299 }, { "epoch": 0.47456488891376125, "grad_norm": 0.9753265965935348, "learning_rate": 1.1307178881968139e-05, "loss": 0.7944, "step": 5300 }, { "epoch": 0.47465442945883934, "grad_norm": 0.8925099788153175, "learning_rate": 1.1304303424013163e-05, "loss": 0.8204, "step": 5301 }, { "epoch": 0.4747439700039174, "grad_norm": 0.9341656523766776, "learning_rate": 1.1301427856344433e-05, "loss": 0.8194, "step": 5302 }, { "epoch": 0.4748335105489955, "grad_norm": 1.1056015584190042, "learning_rate": 1.1298552179203834e-05, "loss": 0.8962, "step": 5303 }, { "epoch": 0.4749230510940735, "grad_norm": 0.8560590125901193, "learning_rate": 1.1295676392833254e-05, "loss": 0.7768, "step": 5304 }, { "epoch": 0.4750125916391516, "grad_norm": 0.9260195831733976, "learning_rate": 1.1292800497474601e-05, "loss": 0.8857, "step": 5305 }, { "epoch": 0.4751021321842297, "grad_norm": 0.9641733702747228, "learning_rate": 1.1289924493369782e-05, "loss": 0.8098, "step": 5306 }, { "epoch": 0.47519167272930773, "grad_norm": 1.100794042172999, "learning_rate": 1.1287048380760719e-05, "loss": 0.8201, "step": 5307 }, { "epoch": 0.4752812132743858, "grad_norm": 1.0504780829263825, "learning_rate": 1.1284172159889339e-05, "loss": 0.8227, "step": 5308 }, { "epoch": 0.47537075381946386, "grad_norm": 0.8424012057000588, "learning_rate": 1.1281295830997583e-05, "loss": 0.7736, "step": 5309 }, { "epoch": 0.47546029436454196, "grad_norm": 0.9109376046758272, "learning_rate": 1.1278419394327395e-05, "loss": 0.8218, "step": 5310 }, { "epoch": 0.47554983490962, "grad_norm": 0.9450459875303185, "learning_rate": 1.1275542850120735e-05, "loss": 0.8689, "step": 5311 }, { "epoch": 0.4756393754546981, "grad_norm": 1.0344027311439215, "learning_rate": 1.1272666198619567e-05, "loss": 0.8271, "step": 5312 }, { "epoch": 0.4757289159997761, "grad_norm": 0.9823633530088418, "learning_rate": 1.1269789440065864e-05, "loss": 0.8202, "step": 5313 }, { "epoch": 0.4758184565448542, "grad_norm": 0.9071368910867725, "learning_rate": 1.1266912574701612e-05, "loss": 0.8894, "step": 5314 }, { "epoch": 0.4759079970899323, "grad_norm": 0.9419612514456097, "learning_rate": 1.1264035602768803e-05, "loss": 0.7942, "step": 5315 }, { "epoch": 0.47599753763501035, "grad_norm": 0.885260171135509, "learning_rate": 1.1261158524509438e-05, "loss": 0.8187, "step": 5316 }, { "epoch": 0.47608707818008844, "grad_norm": 0.9217523928714639, "learning_rate": 1.125828134016553e-05, "loss": 0.8655, "step": 5317 }, { "epoch": 0.4761766187251665, "grad_norm": 0.9415069331724792, "learning_rate": 1.1255404049979093e-05, "loss": 0.8531, "step": 5318 }, { "epoch": 0.47626615927024457, "grad_norm": 1.0909940732086685, "learning_rate": 1.125252665419216e-05, "loss": 0.8186, "step": 5319 }, { "epoch": 0.4763556998153226, "grad_norm": 0.9677106078542571, "learning_rate": 1.1249649153046767e-05, "loss": 0.8627, "step": 5320 }, { "epoch": 0.4764452403604007, "grad_norm": 0.9648149993541045, "learning_rate": 1.1246771546784956e-05, "loss": 0.843, "step": 5321 }, { "epoch": 0.47653478090547874, "grad_norm": 0.8976075828437056, "learning_rate": 1.1243893835648789e-05, "loss": 0.8248, "step": 5322 }, { "epoch": 0.47662432145055683, "grad_norm": 1.2291949984363775, "learning_rate": 1.1241016019880326e-05, "loss": 0.91, "step": 5323 }, { "epoch": 0.4767138619956349, "grad_norm": 0.8890010688239305, "learning_rate": 1.1238138099721637e-05, "loss": 0.7462, "step": 5324 }, { "epoch": 0.47680340254071296, "grad_norm": 0.9409122006338185, "learning_rate": 1.1235260075414809e-05, "loss": 0.8526, "step": 5325 }, { "epoch": 0.47689294308579105, "grad_norm": 0.9744496690637215, "learning_rate": 1.1232381947201928e-05, "loss": 0.7907, "step": 5326 }, { "epoch": 0.4769824836308691, "grad_norm": 0.8940813575429236, "learning_rate": 1.1229503715325087e-05, "loss": 0.8231, "step": 5327 }, { "epoch": 0.4770720241759472, "grad_norm": 0.9807049036041172, "learning_rate": 1.1226625380026407e-05, "loss": 0.8874, "step": 5328 }, { "epoch": 0.4771615647210252, "grad_norm": 0.9880819562569806, "learning_rate": 1.1223746941547997e-05, "loss": 0.8332, "step": 5329 }, { "epoch": 0.4772511052661033, "grad_norm": 0.9843482983780973, "learning_rate": 1.122086840013198e-05, "loss": 0.8834, "step": 5330 }, { "epoch": 0.47734064581118135, "grad_norm": 1.1282045937893888, "learning_rate": 1.1217989756020494e-05, "loss": 0.841, "step": 5331 }, { "epoch": 0.47743018635625945, "grad_norm": 1.0002996211313124, "learning_rate": 1.1215111009455677e-05, "loss": 0.8522, "step": 5332 }, { "epoch": 0.47751972690133754, "grad_norm": 1.0550676589162387, "learning_rate": 1.1212232160679682e-05, "loss": 0.8034, "step": 5333 }, { "epoch": 0.4776092674464156, "grad_norm": 0.9614182501843216, "learning_rate": 1.1209353209934666e-05, "loss": 0.7838, "step": 5334 }, { "epoch": 0.47769880799149367, "grad_norm": 0.9089386146544342, "learning_rate": 1.12064741574628e-05, "loss": 0.8519, "step": 5335 }, { "epoch": 0.4777883485365717, "grad_norm": 0.9701278563413372, "learning_rate": 1.1203595003506261e-05, "loss": 0.7866, "step": 5336 }, { "epoch": 0.4778778890816498, "grad_norm": 0.8681129107144012, "learning_rate": 1.120071574830723e-05, "loss": 0.793, "step": 5337 }, { "epoch": 0.47796742962672784, "grad_norm": 1.0124925356339436, "learning_rate": 1.1197836392107906e-05, "loss": 0.8155, "step": 5338 }, { "epoch": 0.47805697017180593, "grad_norm": 0.9986727757610999, "learning_rate": 1.1194956935150488e-05, "loss": 0.8516, "step": 5339 }, { "epoch": 0.47814651071688397, "grad_norm": 1.1592841084519814, "learning_rate": 1.1192077377677185e-05, "loss": 0.8658, "step": 5340 }, { "epoch": 0.47823605126196206, "grad_norm": 0.9301736508772399, "learning_rate": 1.1189197719930215e-05, "loss": 0.7669, "step": 5341 }, { "epoch": 0.47832559180704015, "grad_norm": 0.9743045436127313, "learning_rate": 1.118631796215181e-05, "loss": 0.8376, "step": 5342 }, { "epoch": 0.4784151323521182, "grad_norm": 0.8881080948058735, "learning_rate": 1.1183438104584208e-05, "loss": 0.8218, "step": 5343 }, { "epoch": 0.4785046728971963, "grad_norm": 0.9321029195240634, "learning_rate": 1.1180558147469645e-05, "loss": 0.8418, "step": 5344 }, { "epoch": 0.4785942134422743, "grad_norm": 0.9691402667829844, "learning_rate": 1.117767809105038e-05, "loss": 0.8968, "step": 5345 }, { "epoch": 0.4786837539873524, "grad_norm": 1.0052834874806957, "learning_rate": 1.1174797935568668e-05, "loss": 0.8599, "step": 5346 }, { "epoch": 0.47877329453243045, "grad_norm": 0.8892325473464305, "learning_rate": 1.1171917681266789e-05, "loss": 0.846, "step": 5347 }, { "epoch": 0.47886283507750854, "grad_norm": 0.9193999652398854, "learning_rate": 1.1169037328387005e-05, "loss": 0.8371, "step": 5348 }, { "epoch": 0.4789523756225866, "grad_norm": 1.0398757816722264, "learning_rate": 1.1166156877171614e-05, "loss": 0.861, "step": 5349 }, { "epoch": 0.47904191616766467, "grad_norm": 0.9689115400559034, "learning_rate": 1.116327632786291e-05, "loss": 0.805, "step": 5350 }, { "epoch": 0.47913145671274276, "grad_norm": 0.9849120376974381, "learning_rate": 1.1160395680703187e-05, "loss": 0.8045, "step": 5351 }, { "epoch": 0.4792209972578208, "grad_norm": 0.8844120702938685, "learning_rate": 1.1157514935934762e-05, "loss": 0.825, "step": 5352 }, { "epoch": 0.4793105378028989, "grad_norm": 1.073761274866184, "learning_rate": 1.1154634093799957e-05, "loss": 0.8266, "step": 5353 }, { "epoch": 0.47940007834797693, "grad_norm": 0.932521315664131, "learning_rate": 1.1151753154541087e-05, "loss": 0.8292, "step": 5354 }, { "epoch": 0.479489618893055, "grad_norm": 0.9619430248145938, "learning_rate": 1.1148872118400503e-05, "loss": 0.8719, "step": 5355 }, { "epoch": 0.47957915943813306, "grad_norm": 0.9666819666110861, "learning_rate": 1.1145990985620533e-05, "loss": 0.8504, "step": 5356 }, { "epoch": 0.47966869998321116, "grad_norm": 0.92643309206178, "learning_rate": 1.1143109756443537e-05, "loss": 0.8137, "step": 5357 }, { "epoch": 0.4797582405282892, "grad_norm": 0.9137776345501791, "learning_rate": 1.114022843111188e-05, "loss": 0.8639, "step": 5358 }, { "epoch": 0.4798477810733673, "grad_norm": 0.8833077466944291, "learning_rate": 1.1137347009867916e-05, "loss": 0.8054, "step": 5359 }, { "epoch": 0.4799373216184454, "grad_norm": 0.9434909319503336, "learning_rate": 1.1134465492954028e-05, "loss": 0.8058, "step": 5360 }, { "epoch": 0.4800268621635234, "grad_norm": 0.8345190191062126, "learning_rate": 1.1131583880612605e-05, "loss": 0.7599, "step": 5361 }, { "epoch": 0.4801164027086015, "grad_norm": 0.908944382634431, "learning_rate": 1.112870217308603e-05, "loss": 0.8206, "step": 5362 }, { "epoch": 0.48020594325367955, "grad_norm": 1.3502354468938789, "learning_rate": 1.1125820370616704e-05, "loss": 0.8862, "step": 5363 }, { "epoch": 0.48029548379875764, "grad_norm": 1.0129019353336564, "learning_rate": 1.112293847344704e-05, "loss": 0.8011, "step": 5364 }, { "epoch": 0.4803850243438357, "grad_norm": 0.9548234925075353, "learning_rate": 1.1120056481819452e-05, "loss": 0.7599, "step": 5365 }, { "epoch": 0.48047456488891377, "grad_norm": 0.9132712187569844, "learning_rate": 1.111717439597636e-05, "loss": 0.8075, "step": 5366 }, { "epoch": 0.4805641054339918, "grad_norm": 0.9296303720271222, "learning_rate": 1.11142922161602e-05, "loss": 0.7982, "step": 5367 }, { "epoch": 0.4806536459790699, "grad_norm": 0.912759742432175, "learning_rate": 1.1111409942613408e-05, "loss": 0.8247, "step": 5368 }, { "epoch": 0.480743186524148, "grad_norm": 1.00883625490817, "learning_rate": 1.1108527575578436e-05, "loss": 0.8052, "step": 5369 }, { "epoch": 0.48083272706922603, "grad_norm": 1.0415350359980045, "learning_rate": 1.1105645115297736e-05, "loss": 0.8119, "step": 5370 }, { "epoch": 0.4809222676143041, "grad_norm": 0.932589819911679, "learning_rate": 1.1102762562013771e-05, "loss": 0.8567, "step": 5371 }, { "epoch": 0.48101180815938216, "grad_norm": 0.9726114043484292, "learning_rate": 1.1099879915969014e-05, "loss": 0.8348, "step": 5372 }, { "epoch": 0.48110134870446025, "grad_norm": 0.9355877473825388, "learning_rate": 1.1096997177405942e-05, "loss": 0.8597, "step": 5373 }, { "epoch": 0.4811908892495383, "grad_norm": 0.8686910496285023, "learning_rate": 1.1094114346567045e-05, "loss": 0.8975, "step": 5374 }, { "epoch": 0.4812804297946164, "grad_norm": 1.2004109116118875, "learning_rate": 1.1091231423694808e-05, "loss": 0.8308, "step": 5375 }, { "epoch": 0.4813699703396944, "grad_norm": 0.9355256448265609, "learning_rate": 1.1088348409031744e-05, "loss": 0.7949, "step": 5376 }, { "epoch": 0.4814595108847725, "grad_norm": 1.2255405327531765, "learning_rate": 1.108546530282036e-05, "loss": 0.8366, "step": 5377 }, { "epoch": 0.4815490514298506, "grad_norm": 0.8870258691877803, "learning_rate": 1.1082582105303169e-05, "loss": 0.7971, "step": 5378 }, { "epoch": 0.48163859197492864, "grad_norm": 0.8868642407447398, "learning_rate": 1.1079698816722698e-05, "loss": 0.8412, "step": 5379 }, { "epoch": 0.48172813252000674, "grad_norm": 0.9891033673540561, "learning_rate": 1.1076815437321484e-05, "loss": 0.8475, "step": 5380 }, { "epoch": 0.4818176730650848, "grad_norm": 1.0043119362438915, "learning_rate": 1.1073931967342062e-05, "loss": 0.8415, "step": 5381 }, { "epoch": 0.48190721361016287, "grad_norm": 0.918227254841356, "learning_rate": 1.1071048407026983e-05, "loss": 0.8186, "step": 5382 }, { "epoch": 0.4819967541552409, "grad_norm": 1.1826903954689694, "learning_rate": 1.1068164756618807e-05, "loss": 0.8629, "step": 5383 }, { "epoch": 0.482086294700319, "grad_norm": 0.9166736532192641, "learning_rate": 1.1065281016360086e-05, "loss": 0.8627, "step": 5384 }, { "epoch": 0.48217583524539703, "grad_norm": 0.9504508792568868, "learning_rate": 1.1062397186493402e-05, "loss": 0.8634, "step": 5385 }, { "epoch": 0.4822653757904751, "grad_norm": 1.0813402837060233, "learning_rate": 1.1059513267261326e-05, "loss": 0.8333, "step": 5386 }, { "epoch": 0.4823549163355532, "grad_norm": 0.8371839805303918, "learning_rate": 1.1056629258906443e-05, "loss": 0.7965, "step": 5387 }, { "epoch": 0.48244445688063126, "grad_norm": 0.8953452109452927, "learning_rate": 1.1053745161671358e-05, "loss": 0.8327, "step": 5388 }, { "epoch": 0.48253399742570935, "grad_norm": 0.9580487965469747, "learning_rate": 1.105086097579866e-05, "loss": 0.8031, "step": 5389 }, { "epoch": 0.4826235379707874, "grad_norm": 1.067431629750438, "learning_rate": 1.1047976701530958e-05, "loss": 0.8997, "step": 5390 }, { "epoch": 0.4827130785158655, "grad_norm": 0.9467978736790414, "learning_rate": 1.1045092339110877e-05, "loss": 0.7866, "step": 5391 }, { "epoch": 0.4828026190609435, "grad_norm": 0.9110524929768579, "learning_rate": 1.1042207888781031e-05, "loss": 0.8394, "step": 5392 }, { "epoch": 0.4828921596060216, "grad_norm": 1.0658681871401243, "learning_rate": 1.1039323350784052e-05, "loss": 0.8576, "step": 5393 }, { "epoch": 0.48298170015109965, "grad_norm": 0.8631443704125721, "learning_rate": 1.1036438725362584e-05, "loss": 0.8834, "step": 5394 }, { "epoch": 0.48307124069617774, "grad_norm": 0.9527470188076916, "learning_rate": 1.1033554012759265e-05, "loss": 0.8576, "step": 5395 }, { "epoch": 0.48316078124125583, "grad_norm": 0.9112305561373075, "learning_rate": 1.1030669213216749e-05, "loss": 0.8741, "step": 5396 }, { "epoch": 0.48325032178633387, "grad_norm": 1.0056935939331737, "learning_rate": 1.10277843269777e-05, "loss": 0.8603, "step": 5397 }, { "epoch": 0.48333986233141196, "grad_norm": 0.9301890561895692, "learning_rate": 1.1024899354284782e-05, "loss": 0.9033, "step": 5398 }, { "epoch": 0.48342940287649, "grad_norm": 1.0743897791097297, "learning_rate": 1.1022014295380669e-05, "loss": 0.8539, "step": 5399 }, { "epoch": 0.4835189434215681, "grad_norm": 0.8898366429043726, "learning_rate": 1.1019129150508046e-05, "loss": 0.823, "step": 5400 }, { "epoch": 0.48360848396664613, "grad_norm": 0.854253191833201, "learning_rate": 1.10162439199096e-05, "loss": 0.829, "step": 5401 }, { "epoch": 0.4836980245117242, "grad_norm": 0.8529121732026164, "learning_rate": 1.1013358603828023e-05, "loss": 0.8047, "step": 5402 }, { "epoch": 0.48378756505680226, "grad_norm": 1.067078131272578, "learning_rate": 1.1010473202506027e-05, "loss": 0.8607, "step": 5403 }, { "epoch": 0.48387710560188035, "grad_norm": 0.9187570510517816, "learning_rate": 1.1007587716186319e-05, "loss": 0.8643, "step": 5404 }, { "epoch": 0.48396664614695845, "grad_norm": 0.8726738048027067, "learning_rate": 1.100470214511161e-05, "loss": 0.875, "step": 5405 }, { "epoch": 0.4840561866920365, "grad_norm": 0.936790486933609, "learning_rate": 1.1001816489524636e-05, "loss": 0.825, "step": 5406 }, { "epoch": 0.4841457272371146, "grad_norm": 1.0581603299640654, "learning_rate": 1.0998930749668122e-05, "loss": 0.8593, "step": 5407 }, { "epoch": 0.4842352677821926, "grad_norm": 0.8844108711404719, "learning_rate": 1.0996044925784805e-05, "loss": 0.8809, "step": 5408 }, { "epoch": 0.4843248083272707, "grad_norm": 0.9428760047661111, "learning_rate": 1.0993159018117436e-05, "loss": 0.8317, "step": 5409 }, { "epoch": 0.48441434887234874, "grad_norm": 0.887983642764275, "learning_rate": 1.0990273026908771e-05, "loss": 0.8451, "step": 5410 }, { "epoch": 0.48450388941742684, "grad_norm": 0.9647754581659951, "learning_rate": 1.0987386952401562e-05, "loss": 0.8375, "step": 5411 }, { "epoch": 0.4845934299625049, "grad_norm": 0.9140107956337725, "learning_rate": 1.0984500794838582e-05, "loss": 0.8524, "step": 5412 }, { "epoch": 0.48468297050758297, "grad_norm": 0.9900890472653607, "learning_rate": 1.0981614554462604e-05, "loss": 0.8663, "step": 5413 }, { "epoch": 0.48477251105266106, "grad_norm": 0.9227293071651652, "learning_rate": 1.0978728231516404e-05, "loss": 0.8496, "step": 5414 }, { "epoch": 0.4848620515977391, "grad_norm": 0.8583150348441748, "learning_rate": 1.097584182624278e-05, "loss": 0.829, "step": 5415 }, { "epoch": 0.4849515921428172, "grad_norm": 1.0536022862678383, "learning_rate": 1.0972955338884521e-05, "loss": 0.8111, "step": 5416 }, { "epoch": 0.4850411326878952, "grad_norm": 0.8411416135608347, "learning_rate": 1.0970068769684425e-05, "loss": 0.7849, "step": 5417 }, { "epoch": 0.4851306732329733, "grad_norm": 0.9529371851790517, "learning_rate": 1.0967182118885309e-05, "loss": 0.7718, "step": 5418 }, { "epoch": 0.48522021377805136, "grad_norm": 1.352909888161765, "learning_rate": 1.0964295386729984e-05, "loss": 0.8397, "step": 5419 }, { "epoch": 0.48530975432312945, "grad_norm": 1.0854258816340463, "learning_rate": 1.0961408573461272e-05, "loss": 0.8103, "step": 5420 }, { "epoch": 0.4853992948682075, "grad_norm": 0.9264208832796995, "learning_rate": 1.0958521679322007e-05, "loss": 0.8398, "step": 5421 }, { "epoch": 0.4854888354132856, "grad_norm": 0.9325702894583737, "learning_rate": 1.0955634704555021e-05, "loss": 0.8468, "step": 5422 }, { "epoch": 0.4855783759583637, "grad_norm": 0.9220840111458508, "learning_rate": 1.0952747649403155e-05, "loss": 0.8158, "step": 5423 }, { "epoch": 0.4856679165034417, "grad_norm": 0.8874265474025742, "learning_rate": 1.0949860514109265e-05, "loss": 0.8323, "step": 5424 }, { "epoch": 0.4857574570485198, "grad_norm": 0.9688237992333443, "learning_rate": 1.09469732989162e-05, "loss": 0.9099, "step": 5425 }, { "epoch": 0.48584699759359784, "grad_norm": 0.9426734855077556, "learning_rate": 1.094408600406683e-05, "loss": 0.7316, "step": 5426 }, { "epoch": 0.48593653813867593, "grad_norm": 0.8908555974865792, "learning_rate": 1.0941198629804022e-05, "loss": 0.8018, "step": 5427 }, { "epoch": 0.48602607868375397, "grad_norm": 0.9598272351460286, "learning_rate": 1.093831117637065e-05, "loss": 0.8116, "step": 5428 }, { "epoch": 0.48611561922883206, "grad_norm": 0.9617190541527875, "learning_rate": 1.0935423644009602e-05, "loss": 0.8229, "step": 5429 }, { "epoch": 0.4862051597739101, "grad_norm": 0.9275113141684821, "learning_rate": 1.0932536032963764e-05, "loss": 0.8691, "step": 5430 }, { "epoch": 0.4862947003189882, "grad_norm": 0.8661965046068462, "learning_rate": 1.0929648343476033e-05, "loss": 0.8572, "step": 5431 }, { "epoch": 0.4863842408640663, "grad_norm": 0.9486107758254199, "learning_rate": 1.0926760575789315e-05, "loss": 0.7991, "step": 5432 }, { "epoch": 0.4864737814091443, "grad_norm": 0.9542512562255702, "learning_rate": 1.0923872730146514e-05, "loss": 0.7692, "step": 5433 }, { "epoch": 0.4865633219542224, "grad_norm": 0.9106043017178462, "learning_rate": 1.0920984806790555e-05, "loss": 0.7474, "step": 5434 }, { "epoch": 0.48665286249930045, "grad_norm": 0.9694032467390674, "learning_rate": 1.0918096805964349e-05, "loss": 0.803, "step": 5435 }, { "epoch": 0.48674240304437855, "grad_norm": 0.9212763675758805, "learning_rate": 1.0915208727910835e-05, "loss": 0.8476, "step": 5436 }, { "epoch": 0.4868319435894566, "grad_norm": 0.9251803197997206, "learning_rate": 1.0912320572872948e-05, "loss": 0.8984, "step": 5437 }, { "epoch": 0.4869214841345347, "grad_norm": 1.0582315687264296, "learning_rate": 1.090943234109362e-05, "loss": 0.8716, "step": 5438 }, { "epoch": 0.4870110246796127, "grad_norm": 0.9623755970767499, "learning_rate": 1.0906544032815811e-05, "loss": 0.8118, "step": 5439 }, { "epoch": 0.4871005652246908, "grad_norm": 1.0814228233221772, "learning_rate": 1.0903655648282476e-05, "loss": 0.8055, "step": 5440 }, { "epoch": 0.4871901057697689, "grad_norm": 0.8804642863839655, "learning_rate": 1.0900767187736566e-05, "loss": 0.8195, "step": 5441 }, { "epoch": 0.48727964631484694, "grad_norm": 0.9163496790548435, "learning_rate": 1.0897878651421058e-05, "loss": 0.8669, "step": 5442 }, { "epoch": 0.48736918685992503, "grad_norm": 0.9423984611808082, "learning_rate": 1.0894990039578925e-05, "loss": 0.8683, "step": 5443 }, { "epoch": 0.48745872740500307, "grad_norm": 1.0493576747564988, "learning_rate": 1.0892101352453143e-05, "loss": 0.8463, "step": 5444 }, { "epoch": 0.48754826795008116, "grad_norm": 0.9991593377221076, "learning_rate": 1.0889212590286709e-05, "loss": 0.8297, "step": 5445 }, { "epoch": 0.4876378084951592, "grad_norm": 0.9548597681163442, "learning_rate": 1.0886323753322605e-05, "loss": 0.8356, "step": 5446 }, { "epoch": 0.4877273490402373, "grad_norm": 0.9207230704892666, "learning_rate": 1.0883434841803833e-05, "loss": 0.8576, "step": 5447 }, { "epoch": 0.4878168895853153, "grad_norm": 0.9687441810145719, "learning_rate": 1.0880545855973405e-05, "loss": 0.8455, "step": 5448 }, { "epoch": 0.4879064301303934, "grad_norm": 0.9611885396337851, "learning_rate": 1.0877656796074328e-05, "loss": 0.845, "step": 5449 }, { "epoch": 0.4879959706754715, "grad_norm": 0.9652694901842425, "learning_rate": 1.0874767662349618e-05, "loss": 0.8563, "step": 5450 }, { "epoch": 0.48808551122054955, "grad_norm": 1.0009151789637358, "learning_rate": 1.0871878455042308e-05, "loss": 0.7752, "step": 5451 }, { "epoch": 0.48817505176562764, "grad_norm": 0.8885801711557764, "learning_rate": 1.086898917439542e-05, "loss": 0.8046, "step": 5452 }, { "epoch": 0.4882645923107057, "grad_norm": 0.9320281469574639, "learning_rate": 1.0866099820651996e-05, "loss": 0.8198, "step": 5453 }, { "epoch": 0.4883541328557838, "grad_norm": 0.9087009745454233, "learning_rate": 1.0863210394055079e-05, "loss": 0.8053, "step": 5454 }, { "epoch": 0.4884436734008618, "grad_norm": 0.9031195754697942, "learning_rate": 1.0860320894847713e-05, "loss": 0.7949, "step": 5455 }, { "epoch": 0.4885332139459399, "grad_norm": 0.9154585308593028, "learning_rate": 1.085743132327296e-05, "loss": 0.8731, "step": 5456 }, { "epoch": 0.48862275449101794, "grad_norm": 1.0024831952038893, "learning_rate": 1.0854541679573876e-05, "loss": 0.8445, "step": 5457 }, { "epoch": 0.48871229503609603, "grad_norm": 0.9692453837554905, "learning_rate": 1.0851651963993533e-05, "loss": 0.8406, "step": 5458 }, { "epoch": 0.4888018355811741, "grad_norm": 1.2416721180199812, "learning_rate": 1.0848762176775001e-05, "loss": 0.8821, "step": 5459 }, { "epoch": 0.48889137612625216, "grad_norm": 0.9533333988824881, "learning_rate": 1.084587231816136e-05, "loss": 0.8044, "step": 5460 }, { "epoch": 0.48898091667133026, "grad_norm": 0.8934345258848013, "learning_rate": 1.0842982388395696e-05, "loss": 0.8389, "step": 5461 }, { "epoch": 0.4890704572164083, "grad_norm": 0.9778464424481985, "learning_rate": 1.08400923877211e-05, "loss": 0.8735, "step": 5462 }, { "epoch": 0.4891599977614864, "grad_norm": 0.9846564271636775, "learning_rate": 1.083720231638067e-05, "loss": 0.9005, "step": 5463 }, { "epoch": 0.4892495383065644, "grad_norm": 1.0014082779988795, "learning_rate": 1.0834312174617509e-05, "loss": 0.8457, "step": 5464 }, { "epoch": 0.4893390788516425, "grad_norm": 0.9847442605117007, "learning_rate": 1.0831421962674729e-05, "loss": 0.8439, "step": 5465 }, { "epoch": 0.48942861939672055, "grad_norm": 0.8398461484457317, "learning_rate": 1.082853168079544e-05, "loss": 0.8055, "step": 5466 }, { "epoch": 0.48951815994179865, "grad_norm": 1.0501031710764737, "learning_rate": 1.082564132922277e-05, "loss": 0.8088, "step": 5467 }, { "epoch": 0.48960770048687674, "grad_norm": 0.890401726837827, "learning_rate": 1.0822750908199836e-05, "loss": 0.7975, "step": 5468 }, { "epoch": 0.4896972410319548, "grad_norm": 0.89436920611807, "learning_rate": 1.081986041796978e-05, "loss": 0.8339, "step": 5469 }, { "epoch": 0.48978678157703287, "grad_norm": 0.8833076851602016, "learning_rate": 1.0816969858775741e-05, "loss": 0.7754, "step": 5470 }, { "epoch": 0.4898763221221109, "grad_norm": 0.9504792994868865, "learning_rate": 1.0814079230860852e-05, "loss": 0.9009, "step": 5471 }, { "epoch": 0.489965862667189, "grad_norm": 0.9232147500495367, "learning_rate": 1.0811188534468275e-05, "loss": 0.8757, "step": 5472 }, { "epoch": 0.49005540321226704, "grad_norm": 0.9223899908993878, "learning_rate": 1.0808297769841166e-05, "loss": 0.8665, "step": 5473 }, { "epoch": 0.49014494375734513, "grad_norm": 0.9853322266596035, "learning_rate": 1.0805406937222676e-05, "loss": 0.8355, "step": 5474 }, { "epoch": 0.49023448430242317, "grad_norm": 0.9425596539241807, "learning_rate": 1.0802516036855983e-05, "loss": 0.8365, "step": 5475 }, { "epoch": 0.49032402484750126, "grad_norm": 0.9844582230165297, "learning_rate": 1.0799625068984255e-05, "loss": 0.9095, "step": 5476 }, { "epoch": 0.49041356539257935, "grad_norm": 0.9007098675792026, "learning_rate": 1.0796734033850668e-05, "loss": 0.8599, "step": 5477 }, { "epoch": 0.4905031059376574, "grad_norm": 0.9123440669615296, "learning_rate": 1.0793842931698417e-05, "loss": 0.825, "step": 5478 }, { "epoch": 0.4905926464827355, "grad_norm": 1.1960439609104925, "learning_rate": 1.0790951762770682e-05, "loss": 0.8434, "step": 5479 }, { "epoch": 0.4906821870278135, "grad_norm": 0.9569361757493331, "learning_rate": 1.078806052731066e-05, "loss": 0.7619, "step": 5480 }, { "epoch": 0.4907717275728916, "grad_norm": 0.8872471020010495, "learning_rate": 1.078516922556156e-05, "loss": 0.7861, "step": 5481 }, { "epoch": 0.49086126811796965, "grad_norm": 0.9458726876412398, "learning_rate": 1.0782277857766581e-05, "loss": 0.8843, "step": 5482 }, { "epoch": 0.49095080866304774, "grad_norm": 1.083925636241174, "learning_rate": 1.0779386424168937e-05, "loss": 0.7807, "step": 5483 }, { "epoch": 0.4910403492081258, "grad_norm": 0.9381578970844493, "learning_rate": 1.0776494925011847e-05, "loss": 0.7161, "step": 5484 }, { "epoch": 0.4911298897532039, "grad_norm": 1.1475326197011941, "learning_rate": 1.0773603360538533e-05, "loss": 0.8387, "step": 5485 }, { "epoch": 0.49121943029828197, "grad_norm": 0.9731840756642766, "learning_rate": 1.0770711730992227e-05, "loss": 0.8387, "step": 5486 }, { "epoch": 0.49130897084336, "grad_norm": 0.9327109535566369, "learning_rate": 1.076782003661616e-05, "loss": 0.8517, "step": 5487 }, { "epoch": 0.4913985113884381, "grad_norm": 1.080701126343337, "learning_rate": 1.0764928277653577e-05, "loss": 0.8129, "step": 5488 }, { "epoch": 0.49148805193351613, "grad_norm": 0.8484044701000448, "learning_rate": 1.0762036454347717e-05, "loss": 0.7872, "step": 5489 }, { "epoch": 0.4915775924785942, "grad_norm": 0.943703289221061, "learning_rate": 1.0759144566941836e-05, "loss": 0.8399, "step": 5490 }, { "epoch": 0.49166713302367226, "grad_norm": 0.8652998713933185, "learning_rate": 1.0756252615679185e-05, "loss": 0.8282, "step": 5491 }, { "epoch": 0.49175667356875036, "grad_norm": 0.9269031885564512, "learning_rate": 1.0753360600803031e-05, "loss": 0.8445, "step": 5492 }, { "epoch": 0.4918462141138284, "grad_norm": 0.9782034134416089, "learning_rate": 1.0750468522556637e-05, "loss": 0.8271, "step": 5493 }, { "epoch": 0.4919357546589065, "grad_norm": 0.9373824050014058, "learning_rate": 1.0747576381183276e-05, "loss": 0.7676, "step": 5494 }, { "epoch": 0.4920252952039846, "grad_norm": 0.8154979274825064, "learning_rate": 1.0744684176926228e-05, "loss": 0.83, "step": 5495 }, { "epoch": 0.4921148357490626, "grad_norm": 0.9451577016550945, "learning_rate": 1.0741791910028771e-05, "loss": 0.8754, "step": 5496 }, { "epoch": 0.4922043762941407, "grad_norm": 1.0658792237455592, "learning_rate": 1.0738899580734198e-05, "loss": 0.8898, "step": 5497 }, { "epoch": 0.49229391683921875, "grad_norm": 0.9137708607556491, "learning_rate": 1.0736007189285798e-05, "loss": 0.8099, "step": 5498 }, { "epoch": 0.49238345738429684, "grad_norm": 0.8727540825069028, "learning_rate": 1.0733114735926872e-05, "loss": 0.8386, "step": 5499 }, { "epoch": 0.4924729979293749, "grad_norm": 1.047064906110096, "learning_rate": 1.0730222220900727e-05, "loss": 0.821, "step": 5500 }, { "epoch": 0.49256253847445297, "grad_norm": 0.9554680456909219, "learning_rate": 1.0727329644450663e-05, "loss": 0.8287, "step": 5501 }, { "epoch": 0.492652079019531, "grad_norm": 0.8786220082870211, "learning_rate": 1.0724437006820002e-05, "loss": 0.7562, "step": 5502 }, { "epoch": 0.4927416195646091, "grad_norm": 0.9247342505870936, "learning_rate": 1.0721544308252063e-05, "loss": 0.8719, "step": 5503 }, { "epoch": 0.4928311601096872, "grad_norm": 0.9465102164555348, "learning_rate": 1.0718651548990165e-05, "loss": 0.8918, "step": 5504 }, { "epoch": 0.49292070065476523, "grad_norm": 0.9479122816958343, "learning_rate": 1.0715758729277643e-05, "loss": 0.8812, "step": 5505 }, { "epoch": 0.4930102411998433, "grad_norm": 0.8911732447472639, "learning_rate": 1.0712865849357827e-05, "loss": 0.8545, "step": 5506 }, { "epoch": 0.49309978174492136, "grad_norm": 0.9742305594792593, "learning_rate": 1.0709972909474057e-05, "loss": 0.7397, "step": 5507 }, { "epoch": 0.49318932228999945, "grad_norm": 0.9459926111461088, "learning_rate": 1.0707079909869684e-05, "loss": 0.8108, "step": 5508 }, { "epoch": 0.4932788628350775, "grad_norm": 0.9597562079227918, "learning_rate": 1.0704186850788053e-05, "loss": 0.7896, "step": 5509 }, { "epoch": 0.4933684033801556, "grad_norm": 0.9065494028787365, "learning_rate": 1.0701293732472515e-05, "loss": 0.9132, "step": 5510 }, { "epoch": 0.4934579439252336, "grad_norm": 0.9814941647116594, "learning_rate": 1.0698400555166435e-05, "loss": 0.7841, "step": 5511 }, { "epoch": 0.4935474844703117, "grad_norm": 0.9736616534815627, "learning_rate": 1.0695507319113177e-05, "loss": 0.8373, "step": 5512 }, { "epoch": 0.4936370250153898, "grad_norm": 0.9177502222837732, "learning_rate": 1.069261402455611e-05, "loss": 0.8069, "step": 5513 }, { "epoch": 0.49372656556046784, "grad_norm": 0.8493040567039404, "learning_rate": 1.0689720671738606e-05, "loss": 0.8192, "step": 5514 }, { "epoch": 0.49381610610554594, "grad_norm": 0.9808387690139264, "learning_rate": 1.0686827260904049e-05, "loss": 0.8178, "step": 5515 }, { "epoch": 0.493905646650624, "grad_norm": 0.927205558067573, "learning_rate": 1.068393379229582e-05, "loss": 0.8075, "step": 5516 }, { "epoch": 0.49399518719570207, "grad_norm": 0.8637030062662195, "learning_rate": 1.068104026615731e-05, "loss": 0.7892, "step": 5517 }, { "epoch": 0.4940847277407801, "grad_norm": 0.940182825973553, "learning_rate": 1.0678146682731911e-05, "loss": 0.8264, "step": 5518 }, { "epoch": 0.4941742682858582, "grad_norm": 0.9075927089055923, "learning_rate": 1.0675253042263023e-05, "loss": 0.7972, "step": 5519 }, { "epoch": 0.49426380883093624, "grad_norm": 0.9865319126017165, "learning_rate": 1.067235934499405e-05, "loss": 0.8615, "step": 5520 }, { "epoch": 0.49435334937601433, "grad_norm": 0.9386120717990005, "learning_rate": 1.06694655911684e-05, "loss": 0.8258, "step": 5521 }, { "epoch": 0.4944428899210924, "grad_norm": 1.0466269900529461, "learning_rate": 1.0666571781029487e-05, "loss": 0.872, "step": 5522 }, { "epoch": 0.49453243046617046, "grad_norm": 1.0367848702168003, "learning_rate": 1.0663677914820724e-05, "loss": 0.8437, "step": 5523 }, { "epoch": 0.49462197101124855, "grad_norm": 0.9883651468063838, "learning_rate": 1.0660783992785542e-05, "loss": 0.8498, "step": 5524 }, { "epoch": 0.4947115115563266, "grad_norm": 1.0617241774989314, "learning_rate": 1.0657890015167363e-05, "loss": 0.809, "step": 5525 }, { "epoch": 0.4948010521014047, "grad_norm": 0.9285536384560348, "learning_rate": 1.0654995982209617e-05, "loss": 0.8019, "step": 5526 }, { "epoch": 0.4948905926464827, "grad_norm": 0.9514566484270098, "learning_rate": 1.0652101894155749e-05, "loss": 0.7813, "step": 5527 }, { "epoch": 0.4949801331915608, "grad_norm": 0.9939600922643165, "learning_rate": 1.0649207751249188e-05, "loss": 0.8168, "step": 5528 }, { "epoch": 0.49506967373663885, "grad_norm": 0.9418871835676739, "learning_rate": 1.064631355373339e-05, "loss": 0.8876, "step": 5529 }, { "epoch": 0.49515921428171694, "grad_norm": 0.8950925752811717, "learning_rate": 1.0643419301851804e-05, "loss": 0.8431, "step": 5530 }, { "epoch": 0.49524875482679503, "grad_norm": 0.9492929151239158, "learning_rate": 1.064052499584788e-05, "loss": 0.8969, "step": 5531 }, { "epoch": 0.49533829537187307, "grad_norm": 1.0364137086068224, "learning_rate": 1.063763063596508e-05, "loss": 0.8907, "step": 5532 }, { "epoch": 0.49542783591695116, "grad_norm": 0.9529723244730066, "learning_rate": 1.0634736222446873e-05, "loss": 0.8545, "step": 5533 }, { "epoch": 0.4955173764620292, "grad_norm": 0.9419168354959362, "learning_rate": 1.0631841755536719e-05, "loss": 0.8461, "step": 5534 }, { "epoch": 0.4956069170071073, "grad_norm": 0.9464035725028185, "learning_rate": 1.0628947235478098e-05, "loss": 0.844, "step": 5535 }, { "epoch": 0.49569645755218533, "grad_norm": 0.898385316526279, "learning_rate": 1.0626052662514484e-05, "loss": 0.805, "step": 5536 }, { "epoch": 0.4957859980972634, "grad_norm": 0.914898577258325, "learning_rate": 1.0623158036889361e-05, "loss": 0.8842, "step": 5537 }, { "epoch": 0.49587553864234146, "grad_norm": 0.909692154896812, "learning_rate": 1.0620263358846212e-05, "loss": 0.8223, "step": 5538 }, { "epoch": 0.49596507918741956, "grad_norm": 1.04479570880073, "learning_rate": 1.0617368628628533e-05, "loss": 0.8922, "step": 5539 }, { "epoch": 0.49605461973249765, "grad_norm": 1.0875278809468085, "learning_rate": 1.0614473846479815e-05, "loss": 0.8521, "step": 5540 }, { "epoch": 0.4961441602775757, "grad_norm": 1.1454456565138784, "learning_rate": 1.0611579012643562e-05, "loss": 0.826, "step": 5541 }, { "epoch": 0.4962337008226538, "grad_norm": 0.9765731232229852, "learning_rate": 1.0608684127363274e-05, "loss": 0.8179, "step": 5542 }, { "epoch": 0.4963232413677318, "grad_norm": 0.8943157055690566, "learning_rate": 1.060578919088246e-05, "loss": 0.8398, "step": 5543 }, { "epoch": 0.4964127819128099, "grad_norm": 1.0658644563426445, "learning_rate": 1.0602894203444634e-05, "loss": 0.8558, "step": 5544 }, { "epoch": 0.49650232245788795, "grad_norm": 0.9847572261317876, "learning_rate": 1.0599999165293314e-05, "loss": 0.8497, "step": 5545 }, { "epoch": 0.49659186300296604, "grad_norm": 1.0896747613966464, "learning_rate": 1.0597104076672016e-05, "loss": 0.8338, "step": 5546 }, { "epoch": 0.4966814035480441, "grad_norm": 1.0182416176095934, "learning_rate": 1.059420893782427e-05, "loss": 0.8664, "step": 5547 }, { "epoch": 0.49677094409312217, "grad_norm": 1.1166118919325323, "learning_rate": 1.0591313748993605e-05, "loss": 0.8607, "step": 5548 }, { "epoch": 0.49686048463820026, "grad_norm": 0.8848185870243289, "learning_rate": 1.0588418510423554e-05, "loss": 0.8193, "step": 5549 }, { "epoch": 0.4969500251832783, "grad_norm": 0.8671343187828997, "learning_rate": 1.0585523222357657e-05, "loss": 0.871, "step": 5550 }, { "epoch": 0.4970395657283564, "grad_norm": 1.2493932025967889, "learning_rate": 1.0582627885039454e-05, "loss": 0.8987, "step": 5551 }, { "epoch": 0.49712910627343443, "grad_norm": 0.9142869154250017, "learning_rate": 1.057973249871249e-05, "loss": 0.891, "step": 5552 }, { "epoch": 0.4972186468185125, "grad_norm": 1.0455783016839881, "learning_rate": 1.057683706362032e-05, "loss": 0.7795, "step": 5553 }, { "epoch": 0.49730818736359056, "grad_norm": 0.9029708844988072, "learning_rate": 1.0573941580006494e-05, "loss": 0.8062, "step": 5554 }, { "epoch": 0.49739772790866865, "grad_norm": 1.102077207288504, "learning_rate": 1.0571046048114573e-05, "loss": 0.857, "step": 5555 }, { "epoch": 0.4974872684537467, "grad_norm": 1.0723032582385812, "learning_rate": 1.0568150468188119e-05, "loss": 0.8173, "step": 5556 }, { "epoch": 0.4975768089988248, "grad_norm": 0.8992947317065803, "learning_rate": 1.0565254840470703e-05, "loss": 0.897, "step": 5557 }, { "epoch": 0.4976663495439029, "grad_norm": 0.8563259862578821, "learning_rate": 1.0562359165205884e-05, "loss": 0.8282, "step": 5558 }, { "epoch": 0.4977558900889809, "grad_norm": 1.0838005421630286, "learning_rate": 1.055946344263725e-05, "loss": 0.8351, "step": 5559 }, { "epoch": 0.497845430634059, "grad_norm": 0.8947836046135957, "learning_rate": 1.0556567673008376e-05, "loss": 0.7789, "step": 5560 }, { "epoch": 0.49793497117913704, "grad_norm": 0.9492163018068949, "learning_rate": 1.0553671856562836e-05, "loss": 0.7946, "step": 5561 }, { "epoch": 0.49802451172421514, "grad_norm": 0.9692056874135808, "learning_rate": 1.0550775993544232e-05, "loss": 0.8414, "step": 5562 }, { "epoch": 0.4981140522692932, "grad_norm": 0.9531500419874599, "learning_rate": 1.0547880084196142e-05, "loss": 0.8732, "step": 5563 }, { "epoch": 0.49820359281437127, "grad_norm": 0.9010628502269036, "learning_rate": 1.0544984128762165e-05, "loss": 0.8387, "step": 5564 }, { "epoch": 0.4982931333594493, "grad_norm": 0.8789157813395974, "learning_rate": 1.0542088127485896e-05, "loss": 0.8, "step": 5565 }, { "epoch": 0.4983826739045274, "grad_norm": 1.011682194798431, "learning_rate": 1.0539192080610947e-05, "loss": 0.7437, "step": 5566 }, { "epoch": 0.4984722144496055, "grad_norm": 0.9118967096553129, "learning_rate": 1.0536295988380914e-05, "loss": 0.7881, "step": 5567 }, { "epoch": 0.4985617549946835, "grad_norm": 0.8865521741903999, "learning_rate": 1.053339985103941e-05, "loss": 0.8251, "step": 5568 }, { "epoch": 0.4986512955397616, "grad_norm": 0.9113690821592623, "learning_rate": 1.0530503668830048e-05, "loss": 0.8346, "step": 5569 }, { "epoch": 0.49874083608483966, "grad_norm": 1.0489648222913055, "learning_rate": 1.0527607441996445e-05, "loss": 0.8174, "step": 5570 }, { "epoch": 0.49883037662991775, "grad_norm": 1.0291688569295008, "learning_rate": 1.0524711170782225e-05, "loss": 0.8194, "step": 5571 }, { "epoch": 0.4989199171749958, "grad_norm": 1.0336236844813076, "learning_rate": 1.0521814855431011e-05, "loss": 0.8136, "step": 5572 }, { "epoch": 0.4990094577200739, "grad_norm": 1.0606173896180067, "learning_rate": 1.0518918496186431e-05, "loss": 0.8554, "step": 5573 }, { "epoch": 0.4990989982651519, "grad_norm": 0.8971481616756988, "learning_rate": 1.051602209329212e-05, "loss": 0.8178, "step": 5574 }, { "epoch": 0.49918853881023, "grad_norm": 0.9570364411683916, "learning_rate": 1.0513125646991712e-05, "loss": 0.7802, "step": 5575 }, { "epoch": 0.4992780793553081, "grad_norm": 1.0126625278202206, "learning_rate": 1.0510229157528844e-05, "loss": 0.8646, "step": 5576 }, { "epoch": 0.49936761990038614, "grad_norm": 0.8540957518673419, "learning_rate": 1.0507332625147164e-05, "loss": 0.8116, "step": 5577 }, { "epoch": 0.49945716044546423, "grad_norm": 0.9162209421878439, "learning_rate": 1.0504436050090316e-05, "loss": 0.7847, "step": 5578 }, { "epoch": 0.49954670099054227, "grad_norm": 0.9317959070033501, "learning_rate": 1.050153943260195e-05, "loss": 0.8294, "step": 5579 }, { "epoch": 0.49963624153562036, "grad_norm": 0.9519429346333402, "learning_rate": 1.0498642772925724e-05, "loss": 0.8569, "step": 5580 }, { "epoch": 0.4997257820806984, "grad_norm": 0.9345127321472849, "learning_rate": 1.0495746071305293e-05, "loss": 0.833, "step": 5581 }, { "epoch": 0.4998153226257765, "grad_norm": 0.889599219493746, "learning_rate": 1.0492849327984316e-05, "loss": 0.8278, "step": 5582 }, { "epoch": 0.49990486317085453, "grad_norm": 0.9514754041890596, "learning_rate": 1.048995254320646e-05, "loss": 0.8555, "step": 5583 }, { "epoch": 0.4999944037159326, "grad_norm": 0.8351257042788979, "learning_rate": 1.0487055717215394e-05, "loss": 0.7965, "step": 5584 }, { "epoch": 0.5000839442610107, "grad_norm": 0.9838067955483186, "learning_rate": 1.0484158850254787e-05, "loss": 0.8135, "step": 5585 }, { "epoch": 0.5001734848060888, "grad_norm": 0.9840477103044719, "learning_rate": 1.0481261942568315e-05, "loss": 0.8436, "step": 5586 }, { "epoch": 0.5002630253511668, "grad_norm": 1.0105657456983554, "learning_rate": 1.0478364994399659e-05, "loss": 0.8229, "step": 5587 }, { "epoch": 0.5003525658962449, "grad_norm": 0.9179908232062686, "learning_rate": 1.0475468005992495e-05, "loss": 0.834, "step": 5588 }, { "epoch": 0.5004421064413229, "grad_norm": 1.02529697336232, "learning_rate": 1.0472570977590513e-05, "loss": 0.8151, "step": 5589 }, { "epoch": 0.5005316469864011, "grad_norm": 0.9481129392812507, "learning_rate": 1.0469673909437404e-05, "loss": 0.859, "step": 5590 }, { "epoch": 0.5006211875314791, "grad_norm": 0.9670658230081969, "learning_rate": 1.0466776801776852e-05, "loss": 0.7935, "step": 5591 }, { "epoch": 0.5007107280765571, "grad_norm": 0.8756442609641216, "learning_rate": 1.0463879654852556e-05, "loss": 0.8568, "step": 5592 }, { "epoch": 0.5008002686216352, "grad_norm": 0.918635184947109, "learning_rate": 1.0460982468908218e-05, "loss": 0.8199, "step": 5593 }, { "epoch": 0.5008898091667133, "grad_norm": 0.9524984967249571, "learning_rate": 1.0458085244187537e-05, "loss": 0.8024, "step": 5594 }, { "epoch": 0.5009793497117914, "grad_norm": 1.0065523010054314, "learning_rate": 1.0455187980934213e-05, "loss": 0.7811, "step": 5595 }, { "epoch": 0.5010688902568694, "grad_norm": 1.0549528396337997, "learning_rate": 1.0452290679391965e-05, "loss": 0.9018, "step": 5596 }, { "epoch": 0.5011584308019476, "grad_norm": 1.2804268216657473, "learning_rate": 1.0449393339804497e-05, "loss": 0.9132, "step": 5597 }, { "epoch": 0.5012479713470256, "grad_norm": 0.9024326326719799, "learning_rate": 1.0446495962415527e-05, "loss": 0.8817, "step": 5598 }, { "epoch": 0.5013375118921036, "grad_norm": 0.9381537264660221, "learning_rate": 1.044359854746877e-05, "loss": 0.8095, "step": 5599 }, { "epoch": 0.5014270524371817, "grad_norm": 0.9619689625656384, "learning_rate": 1.0440701095207948e-05, "loss": 0.8326, "step": 5600 }, { "epoch": 0.5015165929822598, "grad_norm": 0.9192958598831402, "learning_rate": 1.0437803605876785e-05, "loss": 0.8526, "step": 5601 }, { "epoch": 0.5016061335273378, "grad_norm": 1.2233757488710706, "learning_rate": 1.0434906079719014e-05, "loss": 0.7907, "step": 5602 }, { "epoch": 0.5016956740724159, "grad_norm": 0.942825602122367, "learning_rate": 1.0432008516978358e-05, "loss": 0.8525, "step": 5603 }, { "epoch": 0.501785214617494, "grad_norm": 0.9435089972127314, "learning_rate": 1.0429110917898553e-05, "loss": 0.8047, "step": 5604 }, { "epoch": 0.5018747551625721, "grad_norm": 1.0978093484309475, "learning_rate": 1.0426213282723337e-05, "loss": 0.8861, "step": 5605 }, { "epoch": 0.5019642957076501, "grad_norm": 1.0168516978382383, "learning_rate": 1.0423315611696447e-05, "loss": 0.8368, "step": 5606 }, { "epoch": 0.5020538362527281, "grad_norm": 1.0276617535045034, "learning_rate": 1.0420417905061629e-05, "loss": 0.7736, "step": 5607 }, { "epoch": 0.5021433767978063, "grad_norm": 0.9254192355055794, "learning_rate": 1.0417520163062627e-05, "loss": 0.8441, "step": 5608 }, { "epoch": 0.5022329173428843, "grad_norm": 0.9339239083227069, "learning_rate": 1.0414622385943187e-05, "loss": 0.883, "step": 5609 }, { "epoch": 0.5023224578879624, "grad_norm": 1.001848237517298, "learning_rate": 1.0411724573947065e-05, "loss": 0.8217, "step": 5610 }, { "epoch": 0.5024119984330404, "grad_norm": 1.009722822791706, "learning_rate": 1.0408826727318014e-05, "loss": 0.7443, "step": 5611 }, { "epoch": 0.5025015389781186, "grad_norm": 0.9294132603640504, "learning_rate": 1.0405928846299789e-05, "loss": 0.8289, "step": 5612 }, { "epoch": 0.5025910795231966, "grad_norm": 1.0226094017551006, "learning_rate": 1.0403030931136154e-05, "loss": 0.8607, "step": 5613 }, { "epoch": 0.5026806200682746, "grad_norm": 1.026875333863931, "learning_rate": 1.0400132982070868e-05, "loss": 0.8383, "step": 5614 }, { "epoch": 0.5027701606133528, "grad_norm": 0.957124238308067, "learning_rate": 1.03972349993477e-05, "loss": 0.8264, "step": 5615 }, { "epoch": 0.5028597011584308, "grad_norm": 0.9094004290289938, "learning_rate": 1.039433698321042e-05, "loss": 0.7768, "step": 5616 }, { "epoch": 0.5029492417035089, "grad_norm": 1.0335680484552405, "learning_rate": 1.03914389339028e-05, "loss": 0.8495, "step": 5617 }, { "epoch": 0.5030387822485869, "grad_norm": 0.9146139794142809, "learning_rate": 1.038854085166861e-05, "loss": 0.8259, "step": 5618 }, { "epoch": 0.503128322793665, "grad_norm": 0.9034191594328688, "learning_rate": 1.0385642736751627e-05, "loss": 0.8347, "step": 5619 }, { "epoch": 0.5032178633387431, "grad_norm": 0.8810751238713469, "learning_rate": 1.0382744589395638e-05, "loss": 0.8192, "step": 5620 }, { "epoch": 0.5033074038838211, "grad_norm": 0.9044205690734595, "learning_rate": 1.0379846409844421e-05, "loss": 0.8528, "step": 5621 }, { "epoch": 0.5033969444288993, "grad_norm": 0.9125155840134336, "learning_rate": 1.0376948198341759e-05, "loss": 0.7811, "step": 5622 }, { "epoch": 0.5034864849739773, "grad_norm": 0.9727289194209204, "learning_rate": 1.0374049955131444e-05, "loss": 0.8097, "step": 5623 }, { "epoch": 0.5035760255190553, "grad_norm": 0.9816898852134502, "learning_rate": 1.0371151680457268e-05, "loss": 0.8437, "step": 5624 }, { "epoch": 0.5036655660641334, "grad_norm": 0.9214796603260058, "learning_rate": 1.0368253374563018e-05, "loss": 0.8542, "step": 5625 }, { "epoch": 0.5037551066092115, "grad_norm": 0.9384612439202347, "learning_rate": 1.0365355037692498e-05, "loss": 0.8498, "step": 5626 }, { "epoch": 0.5038446471542896, "grad_norm": 0.9018324365561423, "learning_rate": 1.03624566700895e-05, "loss": 0.8254, "step": 5627 }, { "epoch": 0.5039341876993676, "grad_norm": 0.9668605876223945, "learning_rate": 1.035955827199783e-05, "loss": 0.8035, "step": 5628 }, { "epoch": 0.5040237282444456, "grad_norm": 1.359179921382918, "learning_rate": 1.035665984366129e-05, "loss": 0.8322, "step": 5629 }, { "epoch": 0.5041132687895238, "grad_norm": 0.9970508299282815, "learning_rate": 1.0353761385323684e-05, "loss": 0.8453, "step": 5630 }, { "epoch": 0.5042028093346018, "grad_norm": 1.0323354953995, "learning_rate": 1.0350862897228823e-05, "loss": 0.8107, "step": 5631 }, { "epoch": 0.5042923498796799, "grad_norm": 0.8931896203139051, "learning_rate": 1.034796437962052e-05, "loss": 0.8403, "step": 5632 }, { "epoch": 0.504381890424758, "grad_norm": 0.9268168685818163, "learning_rate": 1.034506583274259e-05, "loss": 0.8649, "step": 5633 }, { "epoch": 0.504471430969836, "grad_norm": 0.9183292408588057, "learning_rate": 1.0342167256838842e-05, "loss": 0.8149, "step": 5634 }, { "epoch": 0.5045609715149141, "grad_norm": 0.8823094840847446, "learning_rate": 1.03392686521531e-05, "loss": 0.8016, "step": 5635 }, { "epoch": 0.5046505120599921, "grad_norm": 1.1527839163753102, "learning_rate": 1.0336370018929187e-05, "loss": 0.8267, "step": 5636 }, { "epoch": 0.5047400526050703, "grad_norm": 0.8892244683324797, "learning_rate": 1.0333471357410923e-05, "loss": 0.8084, "step": 5637 }, { "epoch": 0.5048295931501483, "grad_norm": 0.9485727730380542, "learning_rate": 1.0330572667842135e-05, "loss": 0.8378, "step": 5638 }, { "epoch": 0.5049191336952263, "grad_norm": 0.9234940048982382, "learning_rate": 1.032767395046665e-05, "loss": 0.8688, "step": 5639 }, { "epoch": 0.5050086742403045, "grad_norm": 0.9838577263077368, "learning_rate": 1.0324775205528304e-05, "loss": 0.8507, "step": 5640 }, { "epoch": 0.5050982147853825, "grad_norm": 0.9531037132937147, "learning_rate": 1.0321876433270922e-05, "loss": 0.8585, "step": 5641 }, { "epoch": 0.5051877553304606, "grad_norm": 0.8929780630608709, "learning_rate": 1.0318977633938346e-05, "loss": 0.8638, "step": 5642 }, { "epoch": 0.5052772958755386, "grad_norm": 0.8506284264992997, "learning_rate": 1.0316078807774408e-05, "loss": 0.8169, "step": 5643 }, { "epoch": 0.5053668364206167, "grad_norm": 0.8347783881841847, "learning_rate": 1.0313179955022952e-05, "loss": 0.8382, "step": 5644 }, { "epoch": 0.5054563769656948, "grad_norm": 1.4144879409799278, "learning_rate": 1.0310281075927822e-05, "loss": 0.8749, "step": 5645 }, { "epoch": 0.5055459175107728, "grad_norm": 0.8344064144632978, "learning_rate": 1.0307382170732853e-05, "loss": 0.7805, "step": 5646 }, { "epoch": 0.5056354580558509, "grad_norm": 0.8962987598690999, "learning_rate": 1.0304483239681904e-05, "loss": 0.8313, "step": 5647 }, { "epoch": 0.505724998600929, "grad_norm": 0.9163357440818541, "learning_rate": 1.0301584283018813e-05, "loss": 0.826, "step": 5648 }, { "epoch": 0.505814539146007, "grad_norm": 0.8676967806028741, "learning_rate": 1.0298685300987434e-05, "loss": 0.8369, "step": 5649 }, { "epoch": 0.5059040796910851, "grad_norm": 0.9160550273681637, "learning_rate": 1.0295786293831624e-05, "loss": 0.8024, "step": 5650 }, { "epoch": 0.5059936202361632, "grad_norm": 0.9878481845130673, "learning_rate": 1.0292887261795233e-05, "loss": 0.8119, "step": 5651 }, { "epoch": 0.5060831607812413, "grad_norm": 1.0061763393403576, "learning_rate": 1.0289988205122118e-05, "loss": 0.8377, "step": 5652 }, { "epoch": 0.5061727013263193, "grad_norm": 0.9854810788964102, "learning_rate": 1.0287089124056144e-05, "loss": 0.7974, "step": 5653 }, { "epoch": 0.5062622418713973, "grad_norm": 0.8535170441105268, "learning_rate": 1.0284190018841167e-05, "loss": 0.8137, "step": 5654 }, { "epoch": 0.5063517824164755, "grad_norm": 1.1565294516679432, "learning_rate": 1.028129088972105e-05, "loss": 0.827, "step": 5655 }, { "epoch": 0.5064413229615535, "grad_norm": 0.9241579902072041, "learning_rate": 1.0278391736939664e-05, "loss": 0.7765, "step": 5656 }, { "epoch": 0.5065308635066316, "grad_norm": 0.928523994970865, "learning_rate": 1.027549256074087e-05, "loss": 0.759, "step": 5657 }, { "epoch": 0.5066204040517097, "grad_norm": 1.0271839089472052, "learning_rate": 1.027259336136854e-05, "loss": 0.8178, "step": 5658 }, { "epoch": 0.5067099445967878, "grad_norm": 1.0768939092417489, "learning_rate": 1.0269694139066541e-05, "loss": 0.8353, "step": 5659 }, { "epoch": 0.5067994851418658, "grad_norm": 0.8800049778567998, "learning_rate": 1.0266794894078753e-05, "loss": 0.8561, "step": 5660 }, { "epoch": 0.5068890256869438, "grad_norm": 0.9707451025794508, "learning_rate": 1.026389562664905e-05, "loss": 0.8394, "step": 5661 }, { "epoch": 0.506978566232022, "grad_norm": 1.0759671146740297, "learning_rate": 1.0260996337021302e-05, "loss": 0.8249, "step": 5662 }, { "epoch": 0.5070681067771, "grad_norm": 0.8665457638347268, "learning_rate": 1.0258097025439397e-05, "loss": 0.8097, "step": 5663 }, { "epoch": 0.507157647322178, "grad_norm": 0.9510320250166213, "learning_rate": 1.025519769214721e-05, "loss": 0.7812, "step": 5664 }, { "epoch": 0.5072471878672561, "grad_norm": 0.9347308824193438, "learning_rate": 1.0252298337388625e-05, "loss": 0.8104, "step": 5665 }, { "epoch": 0.5073367284123342, "grad_norm": 0.9148271693553773, "learning_rate": 1.0249398961407523e-05, "loss": 0.8807, "step": 5666 }, { "epoch": 0.5074262689574123, "grad_norm": 0.8817225109044043, "learning_rate": 1.0246499564447796e-05, "loss": 0.835, "step": 5667 }, { "epoch": 0.5075158095024903, "grad_norm": 0.9314648841176935, "learning_rate": 1.024360014675333e-05, "loss": 0.8253, "step": 5668 }, { "epoch": 0.5076053500475685, "grad_norm": 0.9322579440048805, "learning_rate": 1.024070070856801e-05, "loss": 0.8481, "step": 5669 }, { "epoch": 0.5076948905926465, "grad_norm": 0.9171280928073983, "learning_rate": 1.0237801250135733e-05, "loss": 0.798, "step": 5670 }, { "epoch": 0.5077844311377245, "grad_norm": 1.0105736625965325, "learning_rate": 1.023490177170039e-05, "loss": 0.822, "step": 5671 }, { "epoch": 0.5078739716828026, "grad_norm": 0.992994003637592, "learning_rate": 1.0232002273505877e-05, "loss": 0.8141, "step": 5672 }, { "epoch": 0.5079635122278807, "grad_norm": 0.9460741283881552, "learning_rate": 1.0229102755796083e-05, "loss": 0.8254, "step": 5673 }, { "epoch": 0.5080530527729588, "grad_norm": 0.9585354497853238, "learning_rate": 1.0226203218814916e-05, "loss": 0.8493, "step": 5674 }, { "epoch": 0.5081425933180368, "grad_norm": 0.989025540061667, "learning_rate": 1.0223303662806274e-05, "loss": 0.863, "step": 5675 }, { "epoch": 0.5082321338631149, "grad_norm": 1.188395277964902, "learning_rate": 1.0220404088014049e-05, "loss": 0.8485, "step": 5676 }, { "epoch": 0.508321674408193, "grad_norm": 0.926224335645974, "learning_rate": 1.0217504494682155e-05, "loss": 0.8358, "step": 5677 }, { "epoch": 0.508411214953271, "grad_norm": 0.9688517555777614, "learning_rate": 1.021460488305449e-05, "loss": 0.8255, "step": 5678 }, { "epoch": 0.508500755498349, "grad_norm": 1.1015507395449424, "learning_rate": 1.0211705253374962e-05, "loss": 0.8861, "step": 5679 }, { "epoch": 0.5085902960434272, "grad_norm": 1.0239398094009546, "learning_rate": 1.020880560588748e-05, "loss": 0.8873, "step": 5680 }, { "epoch": 0.5086798365885052, "grad_norm": 0.999686397918096, "learning_rate": 1.0205905940835948e-05, "loss": 0.8627, "step": 5681 }, { "epoch": 0.5087693771335833, "grad_norm": 0.9894233074326406, "learning_rate": 1.0203006258464276e-05, "loss": 0.8355, "step": 5682 }, { "epoch": 0.5088589176786613, "grad_norm": 0.8910249281615787, "learning_rate": 1.0200106559016387e-05, "loss": 0.8322, "step": 5683 }, { "epoch": 0.5089484582237395, "grad_norm": 0.945274786165729, "learning_rate": 1.0197206842736182e-05, "loss": 0.7911, "step": 5684 }, { "epoch": 0.5090379987688175, "grad_norm": 0.9649104404992614, "learning_rate": 1.0194307109867579e-05, "loss": 0.8103, "step": 5685 }, { "epoch": 0.5091275393138955, "grad_norm": 1.0349309359091792, "learning_rate": 1.0191407360654497e-05, "loss": 0.8337, "step": 5686 }, { "epoch": 0.5092170798589737, "grad_norm": 0.9101053504996527, "learning_rate": 1.0188507595340852e-05, "loss": 0.8481, "step": 5687 }, { "epoch": 0.5093066204040517, "grad_norm": 0.9521517634457553, "learning_rate": 1.0185607814170561e-05, "loss": 0.8125, "step": 5688 }, { "epoch": 0.5093961609491298, "grad_norm": 1.046247005505056, "learning_rate": 1.0182708017387545e-05, "loss": 0.8921, "step": 5689 }, { "epoch": 0.5094857014942078, "grad_norm": 0.9736685537150398, "learning_rate": 1.0179808205235728e-05, "loss": 0.8925, "step": 5690 }, { "epoch": 0.509575242039286, "grad_norm": 0.87872915848908, "learning_rate": 1.017690837795903e-05, "loss": 0.8558, "step": 5691 }, { "epoch": 0.509664782584364, "grad_norm": 0.9504394403276015, "learning_rate": 1.0174008535801377e-05, "loss": 0.8354, "step": 5692 }, { "epoch": 0.509754323129442, "grad_norm": 0.8978687322180984, "learning_rate": 1.017110867900669e-05, "loss": 0.8089, "step": 5693 }, { "epoch": 0.5098438636745202, "grad_norm": 0.9358443916739222, "learning_rate": 1.01682088078189e-05, "loss": 0.7893, "step": 5694 }, { "epoch": 0.5099334042195982, "grad_norm": 0.9790316375246025, "learning_rate": 1.0165308922481934e-05, "loss": 0.8402, "step": 5695 }, { "epoch": 0.5100229447646762, "grad_norm": 0.9277316035542695, "learning_rate": 1.0162409023239718e-05, "loss": 0.778, "step": 5696 }, { "epoch": 0.5101124853097543, "grad_norm": 0.9432252138560028, "learning_rate": 1.0159509110336185e-05, "loss": 0.8598, "step": 5697 }, { "epoch": 0.5102020258548324, "grad_norm": 0.9254987079443144, "learning_rate": 1.0156609184015267e-05, "loss": 0.8437, "step": 5698 }, { "epoch": 0.5102915663999105, "grad_norm": 0.9366227658038045, "learning_rate": 1.0153709244520896e-05, "loss": 0.8759, "step": 5699 }, { "epoch": 0.5103811069449885, "grad_norm": 1.0391836448213363, "learning_rate": 1.0150809292096999e-05, "loss": 0.8872, "step": 5700 }, { "epoch": 0.5104706474900665, "grad_norm": 1.0040360243389448, "learning_rate": 1.014790932698752e-05, "loss": 0.8191, "step": 5701 }, { "epoch": 0.5105601880351447, "grad_norm": 0.8388867553743186, "learning_rate": 1.014500934943639e-05, "loss": 0.8282, "step": 5702 }, { "epoch": 0.5106497285802227, "grad_norm": 0.8999765401073014, "learning_rate": 1.0142109359687542e-05, "loss": 0.8168, "step": 5703 }, { "epoch": 0.5107392691253008, "grad_norm": 0.9977696804638951, "learning_rate": 1.0139209357984923e-05, "loss": 0.8091, "step": 5704 }, { "epoch": 0.5108288096703789, "grad_norm": 0.9896412627993668, "learning_rate": 1.0136309344572465e-05, "loss": 0.9126, "step": 5705 }, { "epoch": 0.510918350215457, "grad_norm": 0.9361163637436256, "learning_rate": 1.0133409319694107e-05, "loss": 0.8287, "step": 5706 }, { "epoch": 0.511007890760535, "grad_norm": 0.8668929837122763, "learning_rate": 1.0130509283593795e-05, "loss": 0.8654, "step": 5707 }, { "epoch": 0.511097431305613, "grad_norm": 1.006381330493986, "learning_rate": 1.0127609236515466e-05, "loss": 0.8569, "step": 5708 }, { "epoch": 0.5111869718506912, "grad_norm": 0.9339336685237292, "learning_rate": 1.012470917870306e-05, "loss": 0.8114, "step": 5709 }, { "epoch": 0.5112765123957692, "grad_norm": 0.9337146448938235, "learning_rate": 1.0121809110400531e-05, "loss": 0.844, "step": 5710 }, { "epoch": 0.5113660529408472, "grad_norm": 0.9208362710189054, "learning_rate": 1.0118909031851814e-05, "loss": 0.8212, "step": 5711 }, { "epoch": 0.5114555934859254, "grad_norm": 1.013699665444257, "learning_rate": 1.0116008943300852e-05, "loss": 0.8707, "step": 5712 }, { "epoch": 0.5115451340310034, "grad_norm": 0.957475425496647, "learning_rate": 1.0113108844991603e-05, "loss": 0.7581, "step": 5713 }, { "epoch": 0.5116346745760815, "grad_norm": 1.1032190404957256, "learning_rate": 1.0110208737168004e-05, "loss": 0.766, "step": 5714 }, { "epoch": 0.5117242151211595, "grad_norm": 0.9372153697871768, "learning_rate": 1.0107308620074e-05, "loss": 0.7944, "step": 5715 }, { "epoch": 0.5118137556662377, "grad_norm": 0.9115984906035624, "learning_rate": 1.0104408493953553e-05, "loss": 0.829, "step": 5716 }, { "epoch": 0.5119032962113157, "grad_norm": 1.0455012754565474, "learning_rate": 1.01015083590506e-05, "loss": 0.882, "step": 5717 }, { "epoch": 0.5119928367563937, "grad_norm": 0.9267008740134861, "learning_rate": 1.0098608215609093e-05, "loss": 0.865, "step": 5718 }, { "epoch": 0.5120823773014718, "grad_norm": 0.8458052458653388, "learning_rate": 1.0095708063872987e-05, "loss": 0.8746, "step": 5719 }, { "epoch": 0.5121719178465499, "grad_norm": 0.950756241340812, "learning_rate": 1.009280790408623e-05, "loss": 0.8434, "step": 5720 }, { "epoch": 0.512261458391628, "grad_norm": 0.9365497973498069, "learning_rate": 1.0089907736492775e-05, "loss": 0.8047, "step": 5721 }, { "epoch": 0.512350998936706, "grad_norm": 0.8619053666743478, "learning_rate": 1.008700756133657e-05, "loss": 0.8396, "step": 5722 }, { "epoch": 0.5124405394817841, "grad_norm": 1.0703625203452427, "learning_rate": 1.0084107378861576e-05, "loss": 0.8563, "step": 5723 }, { "epoch": 0.5125300800268622, "grad_norm": 1.1073593472492376, "learning_rate": 1.0081207189311744e-05, "loss": 0.8995, "step": 5724 }, { "epoch": 0.5126196205719402, "grad_norm": 1.026812377376004, "learning_rate": 1.0078306992931026e-05, "loss": 0.8336, "step": 5725 }, { "epoch": 0.5127091611170183, "grad_norm": 0.9046759000873378, "learning_rate": 1.007540678996338e-05, "loss": 0.8437, "step": 5726 }, { "epoch": 0.5127987016620964, "grad_norm": 0.8416369367749211, "learning_rate": 1.0072506580652761e-05, "loss": 0.8447, "step": 5727 }, { "epoch": 0.5128882422071744, "grad_norm": 0.9347753644556931, "learning_rate": 1.0069606365243123e-05, "loss": 0.8211, "step": 5728 }, { "epoch": 0.5129777827522525, "grad_norm": 0.9252804213041933, "learning_rate": 1.0066706143978426e-05, "loss": 0.818, "step": 5729 }, { "epoch": 0.5130673232973306, "grad_norm": 0.960000271088663, "learning_rate": 1.0063805917102625e-05, "loss": 0.8279, "step": 5730 }, { "epoch": 0.5131568638424087, "grad_norm": 0.9850243958833704, "learning_rate": 1.0060905684859676e-05, "loss": 0.8201, "step": 5731 }, { "epoch": 0.5132464043874867, "grad_norm": 0.982282049149327, "learning_rate": 1.0058005447493543e-05, "loss": 0.8912, "step": 5732 }, { "epoch": 0.5133359449325647, "grad_norm": 1.2195788138486465, "learning_rate": 1.0055105205248179e-05, "loss": 0.8399, "step": 5733 }, { "epoch": 0.5134254854776429, "grad_norm": 0.8911986986009016, "learning_rate": 1.0052204958367543e-05, "loss": 0.7993, "step": 5734 }, { "epoch": 0.5135150260227209, "grad_norm": 0.882611232844248, "learning_rate": 1.0049304707095601e-05, "loss": 0.829, "step": 5735 }, { "epoch": 0.513604566567799, "grad_norm": 0.8881986387566153, "learning_rate": 1.0046404451676301e-05, "loss": 0.7916, "step": 5736 }, { "epoch": 0.513694107112877, "grad_norm": 1.1044456942046341, "learning_rate": 1.0043504192353617e-05, "loss": 0.7922, "step": 5737 }, { "epoch": 0.5137836476579551, "grad_norm": 0.9337394092539986, "learning_rate": 1.0040603929371497e-05, "loss": 0.8108, "step": 5738 }, { "epoch": 0.5138731882030332, "grad_norm": 0.8626039663294794, "learning_rate": 1.0037703662973908e-05, "loss": 0.8106, "step": 5739 }, { "epoch": 0.5139627287481112, "grad_norm": 0.9295625376946628, "learning_rate": 1.003480339340481e-05, "loss": 0.8095, "step": 5740 }, { "epoch": 0.5140522692931894, "grad_norm": 0.9708669394459352, "learning_rate": 1.0031903120908164e-05, "loss": 0.878, "step": 5741 }, { "epoch": 0.5141418098382674, "grad_norm": 0.9724627017908292, "learning_rate": 1.002900284572793e-05, "loss": 0.855, "step": 5742 }, { "epoch": 0.5142313503833454, "grad_norm": 0.9994421806819294, "learning_rate": 1.0026102568108073e-05, "loss": 0.8142, "step": 5743 }, { "epoch": 0.5143208909284235, "grad_norm": 0.9993004262931823, "learning_rate": 1.0023202288292552e-05, "loss": 0.822, "step": 5744 }, { "epoch": 0.5144104314735016, "grad_norm": 0.9415485291817184, "learning_rate": 1.002030200652533e-05, "loss": 0.8269, "step": 5745 }, { "epoch": 0.5144999720185797, "grad_norm": 0.8186878339858695, "learning_rate": 1.0017401723050373e-05, "loss": 0.7756, "step": 5746 }, { "epoch": 0.5145895125636577, "grad_norm": 1.0452318836621595, "learning_rate": 1.0014501438111634e-05, "loss": 0.9124, "step": 5747 }, { "epoch": 0.5146790531087359, "grad_norm": 0.9936390653578736, "learning_rate": 1.0011601151953086e-05, "loss": 0.8271, "step": 5748 }, { "epoch": 0.5147685936538139, "grad_norm": 1.1648323143979917, "learning_rate": 1.0008700864818684e-05, "loss": 0.8792, "step": 5749 }, { "epoch": 0.5148581341988919, "grad_norm": 0.9241611919959483, "learning_rate": 1.0005800576952394e-05, "loss": 0.8315, "step": 5750 }, { "epoch": 0.51494767474397, "grad_norm": 0.9072341229587688, "learning_rate": 1.0002900288598178e-05, "loss": 0.8351, "step": 5751 }, { "epoch": 0.5150372152890481, "grad_norm": 0.9174708563684035, "learning_rate": 1e-05, "loss": 0.837, "step": 5752 }, { "epoch": 0.5151267558341261, "grad_norm": 0.9925731320875039, "learning_rate": 9.997099711401824e-06, "loss": 0.8403, "step": 5753 }, { "epoch": 0.5152162963792042, "grad_norm": 0.9004261651982192, "learning_rate": 9.994199423047606e-06, "loss": 0.8183, "step": 5754 }, { "epoch": 0.5153058369242822, "grad_norm": 0.93199335448373, "learning_rate": 9.991299135181321e-06, "loss": 0.8466, "step": 5755 }, { "epoch": 0.5153953774693604, "grad_norm": 1.0153260567968183, "learning_rate": 9.988398848046919e-06, "loss": 0.8504, "step": 5756 }, { "epoch": 0.5154849180144384, "grad_norm": 0.9423223317027095, "learning_rate": 9.985498561888368e-06, "loss": 0.8282, "step": 5757 }, { "epoch": 0.5155744585595164, "grad_norm": 0.8948109955018279, "learning_rate": 9.98259827694963e-06, "loss": 0.7583, "step": 5758 }, { "epoch": 0.5156639991045946, "grad_norm": 0.9254455779568171, "learning_rate": 9.979697993474671e-06, "loss": 0.809, "step": 5759 }, { "epoch": 0.5157535396496726, "grad_norm": 0.9417150871515682, "learning_rate": 9.97679771170745e-06, "loss": 0.7941, "step": 5760 }, { "epoch": 0.5158430801947507, "grad_norm": 1.0699868697742625, "learning_rate": 9.973897431891932e-06, "loss": 0.8488, "step": 5761 }, { "epoch": 0.5159326207398287, "grad_norm": 0.9390206129846707, "learning_rate": 9.970997154272072e-06, "loss": 0.7701, "step": 5762 }, { "epoch": 0.5160221612849069, "grad_norm": 1.1670611888440607, "learning_rate": 9.96809687909184e-06, "loss": 0.8077, "step": 5763 }, { "epoch": 0.5161117018299849, "grad_norm": 0.93986931395637, "learning_rate": 9.965196606595193e-06, "loss": 0.7987, "step": 5764 }, { "epoch": 0.5162012423750629, "grad_norm": 0.9488163763663193, "learning_rate": 9.962296337026094e-06, "loss": 0.8759, "step": 5765 }, { "epoch": 0.5162907829201411, "grad_norm": 0.9771767832165023, "learning_rate": 9.959396070628508e-06, "loss": 0.8141, "step": 5766 }, { "epoch": 0.5163803234652191, "grad_norm": 0.9869727106733011, "learning_rate": 9.956495807646388e-06, "loss": 0.8101, "step": 5767 }, { "epoch": 0.5164698640102972, "grad_norm": 0.8908464818278669, "learning_rate": 9.9535955483237e-06, "loss": 0.8084, "step": 5768 }, { "epoch": 0.5165594045553752, "grad_norm": 0.8874484652013991, "learning_rate": 9.950695292904402e-06, "loss": 0.8367, "step": 5769 }, { "epoch": 0.5166489451004533, "grad_norm": 0.9108127737525271, "learning_rate": 9.947795041632457e-06, "loss": 0.8373, "step": 5770 }, { "epoch": 0.5167384856455314, "grad_norm": 0.9571377336666812, "learning_rate": 9.944894794751823e-06, "loss": 0.8499, "step": 5771 }, { "epoch": 0.5168280261906094, "grad_norm": 1.1099525240620316, "learning_rate": 9.941994552506462e-06, "loss": 0.7452, "step": 5772 }, { "epoch": 0.5169175667356875, "grad_norm": 0.9098656941131136, "learning_rate": 9.939094315140325e-06, "loss": 0.8004, "step": 5773 }, { "epoch": 0.5170071072807656, "grad_norm": 0.9335460843107476, "learning_rate": 9.93619408289738e-06, "loss": 0.8006, "step": 5774 }, { "epoch": 0.5170966478258436, "grad_norm": 1.3956266387998333, "learning_rate": 9.933293856021576e-06, "loss": 0.7372, "step": 5775 }, { "epoch": 0.5171861883709217, "grad_norm": 0.8927887634140342, "learning_rate": 9.930393634756877e-06, "loss": 0.8273, "step": 5776 }, { "epoch": 0.5172757289159998, "grad_norm": 0.9540092831208936, "learning_rate": 9.927493419347246e-06, "loss": 0.8389, "step": 5777 }, { "epoch": 0.5173652694610779, "grad_norm": 0.9908822230225323, "learning_rate": 9.924593210036623e-06, "loss": 0.8303, "step": 5778 }, { "epoch": 0.5174548100061559, "grad_norm": 1.0847490114962337, "learning_rate": 9.921693007068977e-06, "loss": 0.8537, "step": 5779 }, { "epoch": 0.5175443505512339, "grad_norm": 1.0196824876649921, "learning_rate": 9.91879281068826e-06, "loss": 0.8464, "step": 5780 }, { "epoch": 0.5176338910963121, "grad_norm": 1.1038941403223972, "learning_rate": 9.915892621138424e-06, "loss": 0.8336, "step": 5781 }, { "epoch": 0.5177234316413901, "grad_norm": 1.120034624716666, "learning_rate": 9.91299243866343e-06, "loss": 0.7779, "step": 5782 }, { "epoch": 0.5178129721864682, "grad_norm": 0.9763755628576826, "learning_rate": 9.910092263507232e-06, "loss": 0.8868, "step": 5783 }, { "epoch": 0.5179025127315463, "grad_norm": 0.9171080967412689, "learning_rate": 9.907192095913773e-06, "loss": 0.8597, "step": 5784 }, { "epoch": 0.5179920532766243, "grad_norm": 0.9999311668989066, "learning_rate": 9.904291936127015e-06, "loss": 0.8253, "step": 5785 }, { "epoch": 0.5180815938217024, "grad_norm": 0.9092114075698892, "learning_rate": 9.901391784390909e-06, "loss": 0.869, "step": 5786 }, { "epoch": 0.5181711343667804, "grad_norm": 1.0014071092524957, "learning_rate": 9.898491640949403e-06, "loss": 0.8785, "step": 5787 }, { "epoch": 0.5182606749118586, "grad_norm": 0.9659893298273803, "learning_rate": 9.895591506046452e-06, "loss": 0.9111, "step": 5788 }, { "epoch": 0.5183502154569366, "grad_norm": 0.8925807927291977, "learning_rate": 9.892691379926001e-06, "loss": 0.8425, "step": 5789 }, { "epoch": 0.5184397560020146, "grad_norm": 1.0341940291575344, "learning_rate": 9.889791262832e-06, "loss": 0.8685, "step": 5790 }, { "epoch": 0.5185292965470927, "grad_norm": 0.9134144583256875, "learning_rate": 9.886891155008399e-06, "loss": 0.8611, "step": 5791 }, { "epoch": 0.5186188370921708, "grad_norm": 1.13660767906761, "learning_rate": 9.883991056699146e-06, "loss": 0.8552, "step": 5792 }, { "epoch": 0.5187083776372489, "grad_norm": 1.0032537207678789, "learning_rate": 9.881090968148191e-06, "loss": 0.7851, "step": 5793 }, { "epoch": 0.5187979181823269, "grad_norm": 0.9405576966840509, "learning_rate": 9.878190889599474e-06, "loss": 0.8579, "step": 5794 }, { "epoch": 0.518887458727405, "grad_norm": 0.9557382015855391, "learning_rate": 9.875290821296942e-06, "loss": 0.8435, "step": 5795 }, { "epoch": 0.5189769992724831, "grad_norm": 0.9156785052087755, "learning_rate": 9.872390763484538e-06, "loss": 0.8649, "step": 5796 }, { "epoch": 0.5190665398175611, "grad_norm": 1.2040477439596808, "learning_rate": 9.869490716406206e-06, "loss": 0.8434, "step": 5797 }, { "epoch": 0.5191560803626392, "grad_norm": 0.958297624494633, "learning_rate": 9.866590680305895e-06, "loss": 0.7794, "step": 5798 }, { "epoch": 0.5192456209077173, "grad_norm": 0.922798772163918, "learning_rate": 9.86369065542754e-06, "loss": 0.8695, "step": 5799 }, { "epoch": 0.5193351614527953, "grad_norm": 0.8531889104717889, "learning_rate": 9.860790642015082e-06, "loss": 0.8483, "step": 5800 }, { "epoch": 0.5194247019978734, "grad_norm": 0.9521186125760788, "learning_rate": 9.85789064031246e-06, "loss": 0.803, "step": 5801 }, { "epoch": 0.5195142425429515, "grad_norm": 0.8869257286067888, "learning_rate": 9.854990650563613e-06, "loss": 0.8486, "step": 5802 }, { "epoch": 0.5196037830880296, "grad_norm": 1.1014017692037756, "learning_rate": 9.852090673012482e-06, "loss": 0.822, "step": 5803 }, { "epoch": 0.5196933236331076, "grad_norm": 1.0737055563651632, "learning_rate": 9.849190707903007e-06, "loss": 0.7752, "step": 5804 }, { "epoch": 0.5197828641781856, "grad_norm": 0.9536781436060574, "learning_rate": 9.84629075547911e-06, "loss": 0.8319, "step": 5805 }, { "epoch": 0.5198724047232638, "grad_norm": 0.9381833726928932, "learning_rate": 9.843390815984737e-06, "loss": 0.7517, "step": 5806 }, { "epoch": 0.5199619452683418, "grad_norm": 0.8759706209252847, "learning_rate": 9.840490889663817e-06, "loss": 0.8317, "step": 5807 }, { "epoch": 0.5200514858134199, "grad_norm": 0.9640580984642807, "learning_rate": 9.837590976760283e-06, "loss": 0.8835, "step": 5808 }, { "epoch": 0.5201410263584979, "grad_norm": 1.009547746810493, "learning_rate": 9.834691077518068e-06, "loss": 0.8799, "step": 5809 }, { "epoch": 0.520230566903576, "grad_norm": 1.1403178367916549, "learning_rate": 9.831791192181107e-06, "loss": 0.831, "step": 5810 }, { "epoch": 0.5203201074486541, "grad_norm": 0.9709791692659347, "learning_rate": 9.828891320993314e-06, "loss": 0.8828, "step": 5811 }, { "epoch": 0.5204096479937321, "grad_norm": 0.9492608794938069, "learning_rate": 9.825991464198628e-06, "loss": 0.7922, "step": 5812 }, { "epoch": 0.5204991885388103, "grad_norm": 0.8908573527246928, "learning_rate": 9.823091622040974e-06, "loss": 0.8059, "step": 5813 }, { "epoch": 0.5205887290838883, "grad_norm": 1.0320786474095176, "learning_rate": 9.820191794764274e-06, "loss": 0.8714, "step": 5814 }, { "epoch": 0.5206782696289664, "grad_norm": 0.9610565202266272, "learning_rate": 9.81729198261246e-06, "loss": 0.8588, "step": 5815 }, { "epoch": 0.5207678101740444, "grad_norm": 0.8562654924389672, "learning_rate": 9.814392185829444e-06, "loss": 0.8537, "step": 5816 }, { "epoch": 0.5208573507191225, "grad_norm": 1.055423235434543, "learning_rate": 9.81149240465915e-06, "loss": 0.8416, "step": 5817 }, { "epoch": 0.5209468912642006, "grad_norm": 0.9748861425554027, "learning_rate": 9.808592639345504e-06, "loss": 0.8368, "step": 5818 }, { "epoch": 0.5210364318092786, "grad_norm": 0.9743042085775417, "learning_rate": 9.805692890132423e-06, "loss": 0.7703, "step": 5819 }, { "epoch": 0.5211259723543568, "grad_norm": 0.9601745869020492, "learning_rate": 9.802793157263821e-06, "loss": 0.797, "step": 5820 }, { "epoch": 0.5212155128994348, "grad_norm": 0.862573726985379, "learning_rate": 9.79989344098362e-06, "loss": 0.7494, "step": 5821 }, { "epoch": 0.5213050534445128, "grad_norm": 1.0086230219854315, "learning_rate": 9.796993741535726e-06, "loss": 0.8229, "step": 5822 }, { "epoch": 0.5213945939895909, "grad_norm": 0.9932146565246411, "learning_rate": 9.794094059164056e-06, "loss": 0.8176, "step": 5823 }, { "epoch": 0.521484134534669, "grad_norm": 1.0233090281124761, "learning_rate": 9.791194394112525e-06, "loss": 0.8183, "step": 5824 }, { "epoch": 0.5215736750797471, "grad_norm": 0.9047110649219527, "learning_rate": 9.78829474662504e-06, "loss": 0.7938, "step": 5825 }, { "epoch": 0.5216632156248251, "grad_norm": 0.9643590634414291, "learning_rate": 9.785395116945515e-06, "loss": 0.8335, "step": 5826 }, { "epoch": 0.5217527561699031, "grad_norm": 0.9109070172157157, "learning_rate": 9.78249550531785e-06, "loss": 0.8688, "step": 5827 }, { "epoch": 0.5218422967149813, "grad_norm": 0.9280636625788184, "learning_rate": 9.779595911985954e-06, "loss": 0.8263, "step": 5828 }, { "epoch": 0.5219318372600593, "grad_norm": 1.0010278639508368, "learning_rate": 9.77669633719373e-06, "loss": 0.8282, "step": 5829 }, { "epoch": 0.5220213778051374, "grad_norm": 0.8771063715473473, "learning_rate": 9.773796781185084e-06, "loss": 0.8606, "step": 5830 }, { "epoch": 0.5221109183502155, "grad_norm": 0.9673609818196243, "learning_rate": 9.770897244203917e-06, "loss": 0.7821, "step": 5831 }, { "epoch": 0.5222004588952935, "grad_norm": 0.9669044391018786, "learning_rate": 9.767997726494128e-06, "loss": 0.8729, "step": 5832 }, { "epoch": 0.5222899994403716, "grad_norm": 0.998191793887367, "learning_rate": 9.765098228299613e-06, "loss": 0.8338, "step": 5833 }, { "epoch": 0.5223795399854496, "grad_norm": 0.9104302996897441, "learning_rate": 9.76219874986427e-06, "loss": 0.8369, "step": 5834 }, { "epoch": 0.5224690805305278, "grad_norm": 0.938814262845783, "learning_rate": 9.759299291431991e-06, "loss": 0.8259, "step": 5835 }, { "epoch": 0.5225586210756058, "grad_norm": 1.0108825982163623, "learning_rate": 9.756399853246672e-06, "loss": 0.8425, "step": 5836 }, { "epoch": 0.5226481616206838, "grad_norm": 0.9187759251757708, "learning_rate": 9.75350043555221e-06, "loss": 0.8019, "step": 5837 }, { "epoch": 0.522737702165762, "grad_norm": 0.962780516682919, "learning_rate": 9.750601038592478e-06, "loss": 0.8494, "step": 5838 }, { "epoch": 0.52282724271084, "grad_norm": 0.9889256985992765, "learning_rate": 9.74770166261138e-06, "loss": 0.8476, "step": 5839 }, { "epoch": 0.5229167832559181, "grad_norm": 0.8721242794662549, "learning_rate": 9.744802307852794e-06, "loss": 0.813, "step": 5840 }, { "epoch": 0.5230063238009961, "grad_norm": 0.9829273370527312, "learning_rate": 9.741902974560606e-06, "loss": 0.8195, "step": 5841 }, { "epoch": 0.5230958643460742, "grad_norm": 0.9267601283305458, "learning_rate": 9.739003662978696e-06, "loss": 0.8517, "step": 5842 }, { "epoch": 0.5231854048911523, "grad_norm": 0.9356977703447937, "learning_rate": 9.736104373350957e-06, "loss": 0.8121, "step": 5843 }, { "epoch": 0.5232749454362303, "grad_norm": 0.906517004014335, "learning_rate": 9.733205105921249e-06, "loss": 0.8461, "step": 5844 }, { "epoch": 0.5233644859813084, "grad_norm": 1.071582804729008, "learning_rate": 9.73030586093346e-06, "loss": 0.8602, "step": 5845 }, { "epoch": 0.5234540265263865, "grad_norm": 0.9396011870599469, "learning_rate": 9.727406638631466e-06, "loss": 0.8123, "step": 5846 }, { "epoch": 0.5235435670714645, "grad_norm": 1.0834775431671524, "learning_rate": 9.724507439259134e-06, "loss": 0.7939, "step": 5847 }, { "epoch": 0.5236331076165426, "grad_norm": 0.9895616449266376, "learning_rate": 9.721608263060341e-06, "loss": 0.8079, "step": 5848 }, { "epoch": 0.5237226481616207, "grad_norm": 1.0284430091483427, "learning_rate": 9.718709110278953e-06, "loss": 0.8691, "step": 5849 }, { "epoch": 0.5238121887066988, "grad_norm": 0.9378828482220959, "learning_rate": 9.715809981158836e-06, "loss": 0.8599, "step": 5850 }, { "epoch": 0.5239017292517768, "grad_norm": 0.958264172084436, "learning_rate": 9.712910875943858e-06, "loss": 0.9014, "step": 5851 }, { "epoch": 0.5239912697968548, "grad_norm": 0.9000173078012683, "learning_rate": 9.710011794877883e-06, "loss": 0.8595, "step": 5852 }, { "epoch": 0.524080810341933, "grad_norm": 0.897967087837684, "learning_rate": 9.707112738204769e-06, "loss": 0.8385, "step": 5853 }, { "epoch": 0.524170350887011, "grad_norm": 1.0923630971545393, "learning_rate": 9.704213706168381e-06, "loss": 0.8401, "step": 5854 }, { "epoch": 0.5242598914320891, "grad_norm": 0.8980382154203161, "learning_rate": 9.701314699012569e-06, "loss": 0.7639, "step": 5855 }, { "epoch": 0.5243494319771672, "grad_norm": 1.050974782470295, "learning_rate": 9.69841571698119e-06, "loss": 0.8238, "step": 5856 }, { "epoch": 0.5244389725222453, "grad_norm": 0.9293012855746424, "learning_rate": 9.6955167603181e-06, "loss": 0.7849, "step": 5857 }, { "epoch": 0.5245285130673233, "grad_norm": 0.9193257407579004, "learning_rate": 9.692617829267147e-06, "loss": 0.8078, "step": 5858 }, { "epoch": 0.5246180536124013, "grad_norm": 0.9842900025812967, "learning_rate": 9.689718924072184e-06, "loss": 0.9049, "step": 5859 }, { "epoch": 0.5247075941574795, "grad_norm": 0.8688706019357838, "learning_rate": 9.68682004497705e-06, "loss": 0.8015, "step": 5860 }, { "epoch": 0.5247971347025575, "grad_norm": 0.8617228832212375, "learning_rate": 9.683921192225596e-06, "loss": 0.8389, "step": 5861 }, { "epoch": 0.5248866752476355, "grad_norm": 1.0529376621541577, "learning_rate": 9.681022366061659e-06, "loss": 0.816, "step": 5862 }, { "epoch": 0.5249762157927136, "grad_norm": 1.1120128795394109, "learning_rate": 9.678123566729078e-06, "loss": 0.8329, "step": 5863 }, { "epoch": 0.5250657563377917, "grad_norm": 0.9781772771080847, "learning_rate": 9.675224794471703e-06, "loss": 0.8432, "step": 5864 }, { "epoch": 0.5251552968828698, "grad_norm": 1.0737763735797068, "learning_rate": 9.672326049533352e-06, "loss": 0.8463, "step": 5865 }, { "epoch": 0.5252448374279478, "grad_norm": 1.040466234950394, "learning_rate": 9.669427332157868e-06, "loss": 0.824, "step": 5866 }, { "epoch": 0.525334377973026, "grad_norm": 0.991979661483553, "learning_rate": 9.66652864258908e-06, "loss": 0.8468, "step": 5867 }, { "epoch": 0.525423918518104, "grad_norm": 0.9651207998820273, "learning_rate": 9.663629981070815e-06, "loss": 0.8446, "step": 5868 }, { "epoch": 0.525513459063182, "grad_norm": 0.9339693636194671, "learning_rate": 9.660731347846899e-06, "loss": 0.8904, "step": 5869 }, { "epoch": 0.5256029996082601, "grad_norm": 0.8727878749777708, "learning_rate": 9.657832743161163e-06, "loss": 0.8245, "step": 5870 }, { "epoch": 0.5256925401533382, "grad_norm": 0.9374149933582777, "learning_rate": 9.654934167257414e-06, "loss": 0.8451, "step": 5871 }, { "epoch": 0.5257820806984163, "grad_norm": 1.0209935149944833, "learning_rate": 9.652035620379481e-06, "loss": 0.8422, "step": 5872 }, { "epoch": 0.5258716212434943, "grad_norm": 0.8385862080796269, "learning_rate": 9.649137102771178e-06, "loss": 0.813, "step": 5873 }, { "epoch": 0.5259611617885724, "grad_norm": 1.0830530141041017, "learning_rate": 9.646238614676317e-06, "loss": 0.8091, "step": 5874 }, { "epoch": 0.5260507023336505, "grad_norm": 1.0029460249718387, "learning_rate": 9.643340156338715e-06, "loss": 0.8123, "step": 5875 }, { "epoch": 0.5261402428787285, "grad_norm": 0.9312124029916633, "learning_rate": 9.640441728002174e-06, "loss": 0.8922, "step": 5876 }, { "epoch": 0.5262297834238066, "grad_norm": 0.9656688193923817, "learning_rate": 9.637543329910502e-06, "loss": 0.8671, "step": 5877 }, { "epoch": 0.5263193239688847, "grad_norm": 0.9920643649023224, "learning_rate": 9.634644962307504e-06, "loss": 0.8126, "step": 5878 }, { "epoch": 0.5264088645139627, "grad_norm": 0.9772624868750944, "learning_rate": 9.631746625436982e-06, "loss": 0.8031, "step": 5879 }, { "epoch": 0.5264984050590408, "grad_norm": 1.0461560217261032, "learning_rate": 9.628848319542735e-06, "loss": 0.7995, "step": 5880 }, { "epoch": 0.5265879456041188, "grad_norm": 0.9015005268891336, "learning_rate": 9.625950044868559e-06, "loss": 0.8713, "step": 5881 }, { "epoch": 0.526677486149197, "grad_norm": 0.8757632478291026, "learning_rate": 9.623051801658245e-06, "loss": 0.765, "step": 5882 }, { "epoch": 0.526767026694275, "grad_norm": 0.9170787962195963, "learning_rate": 9.620153590155582e-06, "loss": 0.851, "step": 5883 }, { "epoch": 0.526856567239353, "grad_norm": 0.9564374513522406, "learning_rate": 9.617255410604363e-06, "loss": 0.8252, "step": 5884 }, { "epoch": 0.5269461077844312, "grad_norm": 1.1994945033261033, "learning_rate": 9.614357263248373e-06, "loss": 0.849, "step": 5885 }, { "epoch": 0.5270356483295092, "grad_norm": 1.0433561718584206, "learning_rate": 9.611459148331394e-06, "loss": 0.7924, "step": 5886 }, { "epoch": 0.5271251888745873, "grad_norm": 1.0110916663792813, "learning_rate": 9.608561066097204e-06, "loss": 0.8193, "step": 5887 }, { "epoch": 0.5272147294196653, "grad_norm": 1.0017813043442196, "learning_rate": 9.605663016789583e-06, "loss": 0.8262, "step": 5888 }, { "epoch": 0.5273042699647434, "grad_norm": 0.977004953919621, "learning_rate": 9.602765000652302e-06, "loss": 0.9001, "step": 5889 }, { "epoch": 0.5273938105098215, "grad_norm": 0.8933301301050861, "learning_rate": 9.599867017929132e-06, "loss": 0.8464, "step": 5890 }, { "epoch": 0.5274833510548995, "grad_norm": 0.8873789637488516, "learning_rate": 9.596969068863848e-06, "loss": 0.8121, "step": 5891 }, { "epoch": 0.5275728915999777, "grad_norm": 1.057371966520408, "learning_rate": 9.594071153700214e-06, "loss": 0.8781, "step": 5892 }, { "epoch": 0.5276624321450557, "grad_norm": 0.879912330116897, "learning_rate": 9.591173272681991e-06, "loss": 0.8676, "step": 5893 }, { "epoch": 0.5277519726901337, "grad_norm": 0.9142383452943076, "learning_rate": 9.588275426052938e-06, "loss": 0.8122, "step": 5894 }, { "epoch": 0.5278415132352118, "grad_norm": 1.1722459101116416, "learning_rate": 9.585377614056815e-06, "loss": 0.8526, "step": 5895 }, { "epoch": 0.5279310537802899, "grad_norm": 0.8721268557421681, "learning_rate": 9.582479836937374e-06, "loss": 0.8005, "step": 5896 }, { "epoch": 0.528020594325368, "grad_norm": 1.2818764862468448, "learning_rate": 9.579582094938376e-06, "loss": 0.8596, "step": 5897 }, { "epoch": 0.528110134870446, "grad_norm": 1.0004753610546644, "learning_rate": 9.576684388303556e-06, "loss": 0.7527, "step": 5898 }, { "epoch": 0.528199675415524, "grad_norm": 1.001270208239588, "learning_rate": 9.573786717276666e-06, "loss": 0.8083, "step": 5899 }, { "epoch": 0.5282892159606022, "grad_norm": 0.9370050809879644, "learning_rate": 9.57088908210145e-06, "loss": 0.8172, "step": 5900 }, { "epoch": 0.5283787565056802, "grad_norm": 0.9054228452562488, "learning_rate": 9.567991483021645e-06, "loss": 0.8392, "step": 5901 }, { "epoch": 0.5284682970507583, "grad_norm": 0.8555623272616262, "learning_rate": 9.565093920280987e-06, "loss": 0.7998, "step": 5902 }, { "epoch": 0.5285578375958364, "grad_norm": 1.018713883126122, "learning_rate": 9.562196394123218e-06, "loss": 0.8316, "step": 5903 }, { "epoch": 0.5286473781409144, "grad_norm": 0.9523901582592875, "learning_rate": 9.559298904792054e-06, "loss": 0.8373, "step": 5904 }, { "epoch": 0.5287369186859925, "grad_norm": 0.8967443151989319, "learning_rate": 9.556401452531233e-06, "loss": 0.8583, "step": 5905 }, { "epoch": 0.5288264592310705, "grad_norm": 0.9086604107382693, "learning_rate": 9.553504037584477e-06, "loss": 0.8283, "step": 5906 }, { "epoch": 0.5289159997761487, "grad_norm": 1.0015736747287498, "learning_rate": 9.550606660195505e-06, "loss": 0.8727, "step": 5907 }, { "epoch": 0.5290055403212267, "grad_norm": 1.0364730088975551, "learning_rate": 9.54770932060804e-06, "loss": 0.7718, "step": 5908 }, { "epoch": 0.5290950808663047, "grad_norm": 0.8904222557705052, "learning_rate": 9.544812019065788e-06, "loss": 0.802, "step": 5909 }, { "epoch": 0.5291846214113829, "grad_norm": 0.9847830813716536, "learning_rate": 9.541914755812467e-06, "loss": 0.8198, "step": 5910 }, { "epoch": 0.5292741619564609, "grad_norm": 0.9178633612998542, "learning_rate": 9.539017531091783e-06, "loss": 0.8454, "step": 5911 }, { "epoch": 0.529363702501539, "grad_norm": 1.073505987836565, "learning_rate": 9.536120345147445e-06, "loss": 0.7644, "step": 5912 }, { "epoch": 0.529453243046617, "grad_norm": 0.9696948823977192, "learning_rate": 9.53322319822315e-06, "loss": 0.8664, "step": 5913 }, { "epoch": 0.5295427835916952, "grad_norm": 0.9112097401699266, "learning_rate": 9.530326090562601e-06, "loss": 0.8097, "step": 5914 }, { "epoch": 0.5296323241367732, "grad_norm": 1.0830901050727209, "learning_rate": 9.52742902240949e-06, "loss": 0.8251, "step": 5915 }, { "epoch": 0.5297218646818512, "grad_norm": 0.8476665862605041, "learning_rate": 9.524531994007507e-06, "loss": 0.8284, "step": 5916 }, { "epoch": 0.5298114052269293, "grad_norm": 1.2405957055038048, "learning_rate": 9.521635005600344e-06, "loss": 0.8194, "step": 5917 }, { "epoch": 0.5299009457720074, "grad_norm": 0.8614393102738275, "learning_rate": 9.518738057431686e-06, "loss": 0.7579, "step": 5918 }, { "epoch": 0.5299904863170855, "grad_norm": 0.9711279153362193, "learning_rate": 9.515841149745217e-06, "loss": 0.832, "step": 5919 }, { "epoch": 0.5300800268621635, "grad_norm": 0.9264237769122139, "learning_rate": 9.51294428278461e-06, "loss": 0.8264, "step": 5920 }, { "epoch": 0.5301695674072416, "grad_norm": 0.8799422181900238, "learning_rate": 9.510047456793543e-06, "loss": 0.8328, "step": 5921 }, { "epoch": 0.5302591079523197, "grad_norm": 0.9281002726805655, "learning_rate": 9.507150672015687e-06, "loss": 0.834, "step": 5922 }, { "epoch": 0.5303486484973977, "grad_norm": 0.9010245385258798, "learning_rate": 9.504253928694709e-06, "loss": 0.8858, "step": 5923 }, { "epoch": 0.5304381890424757, "grad_norm": 0.9295371224915867, "learning_rate": 9.50135722707428e-06, "loss": 0.8543, "step": 5924 }, { "epoch": 0.5305277295875539, "grad_norm": 1.1408579285553133, "learning_rate": 9.498460567398052e-06, "loss": 0.8506, "step": 5925 }, { "epoch": 0.5306172701326319, "grad_norm": 0.9468427776598466, "learning_rate": 9.495563949909688e-06, "loss": 0.8362, "step": 5926 }, { "epoch": 0.53070681067771, "grad_norm": 0.89914433013515, "learning_rate": 9.49266737485284e-06, "loss": 0.8723, "step": 5927 }, { "epoch": 0.5307963512227881, "grad_norm": 0.8735474485756044, "learning_rate": 9.489770842471158e-06, "loss": 0.7985, "step": 5928 }, { "epoch": 0.5308858917678662, "grad_norm": 0.8879425203951726, "learning_rate": 9.48687435300829e-06, "loss": 0.8036, "step": 5929 }, { "epoch": 0.5309754323129442, "grad_norm": 1.000887088725653, "learning_rate": 9.483977906707885e-06, "loss": 0.8318, "step": 5930 }, { "epoch": 0.5310649728580222, "grad_norm": 0.9204547605272929, "learning_rate": 9.48108150381357e-06, "loss": 0.8261, "step": 5931 }, { "epoch": 0.5311545134031004, "grad_norm": 1.013985459865476, "learning_rate": 9.478185144568992e-06, "loss": 0.7753, "step": 5932 }, { "epoch": 0.5312440539481784, "grad_norm": 0.9244743658158767, "learning_rate": 9.475288829217779e-06, "loss": 0.833, "step": 5933 }, { "epoch": 0.5313335944932565, "grad_norm": 0.9694281714991287, "learning_rate": 9.472392558003556e-06, "loss": 0.8178, "step": 5934 }, { "epoch": 0.5314231350383345, "grad_norm": 0.9276933295528008, "learning_rate": 9.469496331169959e-06, "loss": 0.812, "step": 5935 }, { "epoch": 0.5315126755834126, "grad_norm": 0.8877859249764296, "learning_rate": 9.466600148960597e-06, "loss": 0.7791, "step": 5936 }, { "epoch": 0.5316022161284907, "grad_norm": 0.9931187359424636, "learning_rate": 9.46370401161909e-06, "loss": 0.8104, "step": 5937 }, { "epoch": 0.5316917566735687, "grad_norm": 0.9454492064923877, "learning_rate": 9.460807919389056e-06, "loss": 0.8412, "step": 5938 }, { "epoch": 0.5317812972186469, "grad_norm": 0.9023761699647791, "learning_rate": 9.457911872514102e-06, "loss": 0.8394, "step": 5939 }, { "epoch": 0.5318708377637249, "grad_norm": 0.9712189697710407, "learning_rate": 9.455015871237836e-06, "loss": 0.7986, "step": 5940 }, { "epoch": 0.5319603783088029, "grad_norm": 0.9531808597072575, "learning_rate": 9.452119915803863e-06, "loss": 0.7965, "step": 5941 }, { "epoch": 0.532049918853881, "grad_norm": 1.0028651978019683, "learning_rate": 9.449224006455773e-06, "loss": 0.7851, "step": 5942 }, { "epoch": 0.5321394593989591, "grad_norm": 0.9838576945272512, "learning_rate": 9.446328143437165e-06, "loss": 0.8739, "step": 5943 }, { "epoch": 0.5322289999440372, "grad_norm": 1.146795578743294, "learning_rate": 9.443432326991627e-06, "loss": 0.8112, "step": 5944 }, { "epoch": 0.5323185404891152, "grad_norm": 0.9662979337446806, "learning_rate": 9.44053655736275e-06, "loss": 0.8503, "step": 5945 }, { "epoch": 0.5324080810341933, "grad_norm": 0.8736412417807512, "learning_rate": 9.437640834794118e-06, "loss": 0.7599, "step": 5946 }, { "epoch": 0.5324976215792714, "grad_norm": 1.1570262924970436, "learning_rate": 9.434745159529302e-06, "loss": 0.8406, "step": 5947 }, { "epoch": 0.5325871621243494, "grad_norm": 0.9460882706711075, "learning_rate": 9.431849531811883e-06, "loss": 0.8448, "step": 5948 }, { "epoch": 0.5326767026694275, "grad_norm": 0.8477959073111798, "learning_rate": 9.42895395188543e-06, "loss": 0.8371, "step": 5949 }, { "epoch": 0.5327662432145056, "grad_norm": 0.9765876344319901, "learning_rate": 9.426058419993507e-06, "loss": 0.812, "step": 5950 }, { "epoch": 0.5328557837595836, "grad_norm": 1.001455336352303, "learning_rate": 9.423162936379681e-06, "loss": 0.9003, "step": 5951 }, { "epoch": 0.5329453243046617, "grad_norm": 0.8967252943651437, "learning_rate": 9.420267501287512e-06, "loss": 0.7933, "step": 5952 }, { "epoch": 0.5330348648497397, "grad_norm": 0.9572983931016393, "learning_rate": 9.41737211496055e-06, "loss": 0.8932, "step": 5953 }, { "epoch": 0.5331244053948179, "grad_norm": 0.9244813543203914, "learning_rate": 9.414476777642347e-06, "loss": 0.8539, "step": 5954 }, { "epoch": 0.5332139459398959, "grad_norm": 0.910680283768178, "learning_rate": 9.411581489576447e-06, "loss": 0.8487, "step": 5955 }, { "epoch": 0.5333034864849739, "grad_norm": 0.9249565045236668, "learning_rate": 9.408686251006395e-06, "loss": 0.8367, "step": 5956 }, { "epoch": 0.5333930270300521, "grad_norm": 1.0795302881979438, "learning_rate": 9.405791062175735e-06, "loss": 0.8371, "step": 5957 }, { "epoch": 0.5334825675751301, "grad_norm": 0.9819786441181229, "learning_rate": 9.402895923327987e-06, "loss": 0.8581, "step": 5958 }, { "epoch": 0.5335721081202082, "grad_norm": 0.8953573174441338, "learning_rate": 9.400000834706692e-06, "loss": 0.8141, "step": 5959 }, { "epoch": 0.5336616486652862, "grad_norm": 1.0973721942494425, "learning_rate": 9.39710579655537e-06, "loss": 0.8322, "step": 5960 }, { "epoch": 0.5337511892103644, "grad_norm": 1.1538945971376044, "learning_rate": 9.394210809117543e-06, "loss": 0.8537, "step": 5961 }, { "epoch": 0.5338407297554424, "grad_norm": 1.1898809745085779, "learning_rate": 9.391315872636728e-06, "loss": 0.8641, "step": 5962 }, { "epoch": 0.5339302703005204, "grad_norm": 1.0622005854470915, "learning_rate": 9.388420987356443e-06, "loss": 0.8464, "step": 5963 }, { "epoch": 0.5340198108455986, "grad_norm": 0.8516865198026251, "learning_rate": 9.385526153520186e-06, "loss": 0.8011, "step": 5964 }, { "epoch": 0.5341093513906766, "grad_norm": 1.0475635214203323, "learning_rate": 9.38263137137147e-06, "loss": 0.8776, "step": 5965 }, { "epoch": 0.5341988919357546, "grad_norm": 0.9767623830341828, "learning_rate": 9.379736641153791e-06, "loss": 0.8245, "step": 5966 }, { "epoch": 0.5342884324808327, "grad_norm": 0.9102020121864909, "learning_rate": 9.376841963110644e-06, "loss": 0.8575, "step": 5967 }, { "epoch": 0.5343779730259108, "grad_norm": 0.9261795506019623, "learning_rate": 9.373947337485521e-06, "loss": 0.8006, "step": 5968 }, { "epoch": 0.5344675135709889, "grad_norm": 1.28416824632598, "learning_rate": 9.371052764521907e-06, "loss": 0.8332, "step": 5969 }, { "epoch": 0.5345570541160669, "grad_norm": 0.9542747370790804, "learning_rate": 9.368158244463286e-06, "loss": 0.8321, "step": 5970 }, { "epoch": 0.534646594661145, "grad_norm": 1.0603271293390129, "learning_rate": 9.36526377755313e-06, "loss": 0.9048, "step": 5971 }, { "epoch": 0.5347361352062231, "grad_norm": 0.9778998649183257, "learning_rate": 9.36236936403492e-06, "loss": 0.8629, "step": 5972 }, { "epoch": 0.5348256757513011, "grad_norm": 1.0174339162479573, "learning_rate": 9.359475004152122e-06, "loss": 0.8743, "step": 5973 }, { "epoch": 0.5349152162963792, "grad_norm": 0.8805067028232537, "learning_rate": 9.3565806981482e-06, "loss": 0.8148, "step": 5974 }, { "epoch": 0.5350047568414573, "grad_norm": 1.164120370745813, "learning_rate": 9.353686446266611e-06, "loss": 0.8059, "step": 5975 }, { "epoch": 0.5350942973865354, "grad_norm": 1.169431984103252, "learning_rate": 9.350792248750814e-06, "loss": 0.8258, "step": 5976 }, { "epoch": 0.5351838379316134, "grad_norm": 0.9313698513720441, "learning_rate": 9.347898105844255e-06, "loss": 0.7955, "step": 5977 }, { "epoch": 0.5352733784766914, "grad_norm": 0.8731411992557386, "learning_rate": 9.345004017790382e-06, "loss": 0.8359, "step": 5978 }, { "epoch": 0.5353629190217696, "grad_norm": 0.971084304185613, "learning_rate": 9.34210998483264e-06, "loss": 0.8077, "step": 5979 }, { "epoch": 0.5354524595668476, "grad_norm": 0.8641044510659649, "learning_rate": 9.339216007214462e-06, "loss": 0.8119, "step": 5980 }, { "epoch": 0.5355420001119257, "grad_norm": 0.949280292360375, "learning_rate": 9.336322085179277e-06, "loss": 0.8465, "step": 5981 }, { "epoch": 0.5356315406570038, "grad_norm": 0.990935611199923, "learning_rate": 9.333428218970517e-06, "loss": 0.788, "step": 5982 }, { "epoch": 0.5357210812020818, "grad_norm": 1.0069845958750356, "learning_rate": 9.3305344088316e-06, "loss": 0.799, "step": 5983 }, { "epoch": 0.5358106217471599, "grad_norm": 0.9946052293714969, "learning_rate": 9.327640655005951e-06, "loss": 0.8509, "step": 5984 }, { "epoch": 0.5359001622922379, "grad_norm": 0.9468533198482125, "learning_rate": 9.32474695773698e-06, "loss": 0.8135, "step": 5985 }, { "epoch": 0.5359897028373161, "grad_norm": 0.8364696718689509, "learning_rate": 9.32185331726809e-06, "loss": 0.8379, "step": 5986 }, { "epoch": 0.5360792433823941, "grad_norm": 0.9321307070844654, "learning_rate": 9.318959733842692e-06, "loss": 0.8512, "step": 5987 }, { "epoch": 0.5361687839274721, "grad_norm": 0.9354559714736931, "learning_rate": 9.316066207704184e-06, "loss": 0.8322, "step": 5988 }, { "epoch": 0.5362583244725502, "grad_norm": 0.8951707236626564, "learning_rate": 9.313172739095951e-06, "loss": 0.7512, "step": 5989 }, { "epoch": 0.5363478650176283, "grad_norm": 0.8374592721818452, "learning_rate": 9.310279328261399e-06, "loss": 0.7794, "step": 5990 }, { "epoch": 0.5364374055627064, "grad_norm": 0.9050864282223952, "learning_rate": 9.307385975443893e-06, "loss": 0.8814, "step": 5991 }, { "epoch": 0.5365269461077844, "grad_norm": 0.9324966903121951, "learning_rate": 9.304492680886825e-06, "loss": 0.8098, "step": 5992 }, { "epoch": 0.5366164866528625, "grad_norm": 0.9007864185018738, "learning_rate": 9.301599444833567e-06, "loss": 0.8965, "step": 5993 }, { "epoch": 0.5367060271979406, "grad_norm": 1.0909618926944298, "learning_rate": 9.298706267527487e-06, "loss": 0.8201, "step": 5994 }, { "epoch": 0.5367955677430186, "grad_norm": 0.9169705679735011, "learning_rate": 9.295813149211954e-06, "loss": 0.8238, "step": 5995 }, { "epoch": 0.5368851082880967, "grad_norm": 1.0327180026288332, "learning_rate": 9.292920090130321e-06, "loss": 0.8059, "step": 5996 }, { "epoch": 0.5369746488331748, "grad_norm": 0.8921533366183012, "learning_rate": 9.290027090525945e-06, "loss": 0.8144, "step": 5997 }, { "epoch": 0.5370641893782528, "grad_norm": 0.9570832935594241, "learning_rate": 9.287134150642175e-06, "loss": 0.8778, "step": 5998 }, { "epoch": 0.5371537299233309, "grad_norm": 1.0479740740578811, "learning_rate": 9.284241270722359e-06, "loss": 0.8456, "step": 5999 }, { "epoch": 0.537243270468409, "grad_norm": 1.01345398155251, "learning_rate": 9.281348451009837e-06, "loss": 0.8433, "step": 6000 }, { "epoch": 0.5373328110134871, "grad_norm": 0.8383155947726476, "learning_rate": 9.27845569174794e-06, "loss": 0.8402, "step": 6001 }, { "epoch": 0.5374223515585651, "grad_norm": 0.9074713301212324, "learning_rate": 9.275562993180001e-06, "loss": 0.8262, "step": 6002 }, { "epoch": 0.5375118921036431, "grad_norm": 0.8861145166620412, "learning_rate": 9.272670355549338e-06, "loss": 0.8326, "step": 6003 }, { "epoch": 0.5376014326487213, "grad_norm": 0.9868744579591654, "learning_rate": 9.269777779099276e-06, "loss": 0.8334, "step": 6004 }, { "epoch": 0.5376909731937993, "grad_norm": 0.9599648381030712, "learning_rate": 9.266885264073128e-06, "loss": 0.8421, "step": 6005 }, { "epoch": 0.5377805137388774, "grad_norm": 0.9698199949636004, "learning_rate": 9.263992810714203e-06, "loss": 0.7773, "step": 6006 }, { "epoch": 0.5378700542839554, "grad_norm": 0.836509847908596, "learning_rate": 9.261100419265807e-06, "loss": 0.8102, "step": 6007 }, { "epoch": 0.5379595948290335, "grad_norm": 0.9083574463951704, "learning_rate": 9.258208089971232e-06, "loss": 0.8196, "step": 6008 }, { "epoch": 0.5380491353741116, "grad_norm": 1.0146169089608352, "learning_rate": 9.255315823073775e-06, "loss": 0.7759, "step": 6009 }, { "epoch": 0.5381386759191896, "grad_norm": 1.0770066218035734, "learning_rate": 9.252423618816724e-06, "loss": 0.8305, "step": 6010 }, { "epoch": 0.5382282164642678, "grad_norm": 0.9674553879815466, "learning_rate": 9.249531477443365e-06, "loss": 0.8136, "step": 6011 }, { "epoch": 0.5383177570093458, "grad_norm": 0.8719930878061914, "learning_rate": 9.246639399196972e-06, "loss": 0.8446, "step": 6012 }, { "epoch": 0.5384072975544238, "grad_norm": 0.9843557122170272, "learning_rate": 9.243747384320816e-06, "loss": 0.831, "step": 6013 }, { "epoch": 0.5384968380995019, "grad_norm": 1.0931369512169629, "learning_rate": 9.240855433058166e-06, "loss": 0.8147, "step": 6014 }, { "epoch": 0.53858637864458, "grad_norm": 0.9026738447887179, "learning_rate": 9.237963545652286e-06, "loss": 0.8075, "step": 6015 }, { "epoch": 0.5386759191896581, "grad_norm": 0.9153457022947674, "learning_rate": 9.235071722346424e-06, "loss": 0.8583, "step": 6016 }, { "epoch": 0.5387654597347361, "grad_norm": 0.9809408440018338, "learning_rate": 9.232179963383843e-06, "loss": 0.8261, "step": 6017 }, { "epoch": 0.5388550002798143, "grad_norm": 0.989652558914238, "learning_rate": 9.229288269007776e-06, "loss": 0.8018, "step": 6018 }, { "epoch": 0.5389445408248923, "grad_norm": 1.0141192984038463, "learning_rate": 9.226396639461468e-06, "loss": 0.7657, "step": 6019 }, { "epoch": 0.5390340813699703, "grad_norm": 1.010602187843883, "learning_rate": 9.223505074988157e-06, "loss": 0.8858, "step": 6020 }, { "epoch": 0.5391236219150484, "grad_norm": 0.892782109658125, "learning_rate": 9.220613575831066e-06, "loss": 0.828, "step": 6021 }, { "epoch": 0.5392131624601265, "grad_norm": 0.9867203714512734, "learning_rate": 9.21772214223342e-06, "loss": 0.8245, "step": 6022 }, { "epoch": 0.5393027030052046, "grad_norm": 1.1217624986525767, "learning_rate": 9.214830774438447e-06, "loss": 0.7392, "step": 6023 }, { "epoch": 0.5393922435502826, "grad_norm": 1.1171142759317856, "learning_rate": 9.211939472689342e-06, "loss": 0.8141, "step": 6024 }, { "epoch": 0.5394817840953606, "grad_norm": 0.9035446392146811, "learning_rate": 9.209048237229321e-06, "loss": 0.8976, "step": 6025 }, { "epoch": 0.5395713246404388, "grad_norm": 1.0852354666805286, "learning_rate": 9.206157068301587e-06, "loss": 0.8649, "step": 6026 }, { "epoch": 0.5396608651855168, "grad_norm": 0.9175996699269234, "learning_rate": 9.203265966149332e-06, "loss": 0.8235, "step": 6027 }, { "epoch": 0.5397504057305949, "grad_norm": 0.8985212666519329, "learning_rate": 9.20037493101575e-06, "loss": 0.7998, "step": 6028 }, { "epoch": 0.539839946275673, "grad_norm": 0.9233585778380023, "learning_rate": 9.197483963144024e-06, "loss": 0.8538, "step": 6029 }, { "epoch": 0.539929486820751, "grad_norm": 1.0046108348134133, "learning_rate": 9.194593062777328e-06, "loss": 0.8532, "step": 6030 }, { "epoch": 0.5400190273658291, "grad_norm": 0.9342094258349481, "learning_rate": 9.191702230158838e-06, "loss": 0.7779, "step": 6031 }, { "epoch": 0.5401085679109071, "grad_norm": 1.0025370734767913, "learning_rate": 9.188811465531725e-06, "loss": 0.8522, "step": 6032 }, { "epoch": 0.5401981084559853, "grad_norm": 0.8850759096374533, "learning_rate": 9.185920769139148e-06, "loss": 0.835, "step": 6033 }, { "epoch": 0.5402876490010633, "grad_norm": 0.8613278081126127, "learning_rate": 9.183030141224265e-06, "loss": 0.8205, "step": 6034 }, { "epoch": 0.5403771895461413, "grad_norm": 0.9378977307033007, "learning_rate": 9.180139582030222e-06, "loss": 0.7789, "step": 6035 }, { "epoch": 0.5404667300912195, "grad_norm": 0.9113057673799452, "learning_rate": 9.177249091800167e-06, "loss": 0.7987, "step": 6036 }, { "epoch": 0.5405562706362975, "grad_norm": 1.0267154210330178, "learning_rate": 9.174358670777232e-06, "loss": 0.8308, "step": 6037 }, { "epoch": 0.5406458111813756, "grad_norm": 0.9575922009271098, "learning_rate": 9.17146831920456e-06, "loss": 0.8408, "step": 6038 }, { "epoch": 0.5407353517264536, "grad_norm": 1.014334853712665, "learning_rate": 9.168578037325275e-06, "loss": 0.8732, "step": 6039 }, { "epoch": 0.5408248922715317, "grad_norm": 0.9568020936634541, "learning_rate": 9.165687825382493e-06, "loss": 0.856, "step": 6040 }, { "epoch": 0.5409144328166098, "grad_norm": 0.9287275228688183, "learning_rate": 9.162797683619333e-06, "loss": 0.8664, "step": 6041 }, { "epoch": 0.5410039733616878, "grad_norm": 1.0345732045531393, "learning_rate": 9.159907612278904e-06, "loss": 0.8845, "step": 6042 }, { "epoch": 0.5410935139067659, "grad_norm": 0.9456855050235818, "learning_rate": 9.157017611604306e-06, "loss": 0.7896, "step": 6043 }, { "epoch": 0.541183054451844, "grad_norm": 1.1644985646671988, "learning_rate": 9.154127681838642e-06, "loss": 0.867, "step": 6044 }, { "epoch": 0.541272594996922, "grad_norm": 1.0316801563374332, "learning_rate": 9.151237823225004e-06, "loss": 0.8065, "step": 6045 }, { "epoch": 0.5413621355420001, "grad_norm": 0.9379106499281991, "learning_rate": 9.14834803600647e-06, "loss": 0.8701, "step": 6046 }, { "epoch": 0.5414516760870782, "grad_norm": 0.9439221009556342, "learning_rate": 9.145458320426126e-06, "loss": 0.7924, "step": 6047 }, { "epoch": 0.5415412166321563, "grad_norm": 0.958022645054481, "learning_rate": 9.142568676727043e-06, "loss": 0.8598, "step": 6048 }, { "epoch": 0.5416307571772343, "grad_norm": 0.9371390650033266, "learning_rate": 9.139679105152285e-06, "loss": 0.8891, "step": 6049 }, { "epoch": 0.5417202977223123, "grad_norm": 1.0467191447126114, "learning_rate": 9.136789605944926e-06, "loss": 0.8046, "step": 6050 }, { "epoch": 0.5418098382673905, "grad_norm": 1.0179290967763104, "learning_rate": 9.133900179348008e-06, "loss": 0.8553, "step": 6051 }, { "epoch": 0.5418993788124685, "grad_norm": 1.0165602746373275, "learning_rate": 9.131010825604581e-06, "loss": 0.831, "step": 6052 }, { "epoch": 0.5419889193575466, "grad_norm": 0.8956327535125366, "learning_rate": 9.128121544957694e-06, "loss": 0.8407, "step": 6053 }, { "epoch": 0.5420784599026247, "grad_norm": 0.9574699122065526, "learning_rate": 9.125232337650382e-06, "loss": 0.8089, "step": 6054 }, { "epoch": 0.5421680004477027, "grad_norm": 0.8962931259112521, "learning_rate": 9.122343203925674e-06, "loss": 0.82, "step": 6055 }, { "epoch": 0.5422575409927808, "grad_norm": 0.9417108963585707, "learning_rate": 9.1194541440266e-06, "loss": 0.8307, "step": 6056 }, { "epoch": 0.5423470815378588, "grad_norm": 1.0121605947568275, "learning_rate": 9.11656515819617e-06, "loss": 0.8371, "step": 6057 }, { "epoch": 0.542436622082937, "grad_norm": 0.8890636147088912, "learning_rate": 9.113676246677397e-06, "loss": 0.7889, "step": 6058 }, { "epoch": 0.542526162628015, "grad_norm": 1.0230472478080912, "learning_rate": 9.110787409713295e-06, "loss": 0.8751, "step": 6059 }, { "epoch": 0.542615703173093, "grad_norm": 1.7828499583886108, "learning_rate": 9.107898647546855e-06, "loss": 0.8069, "step": 6060 }, { "epoch": 0.5427052437181711, "grad_norm": 0.8784478988492977, "learning_rate": 9.105009960421078e-06, "loss": 0.7716, "step": 6061 }, { "epoch": 0.5427947842632492, "grad_norm": 0.8500551213183383, "learning_rate": 9.102121348578945e-06, "loss": 0.7995, "step": 6062 }, { "epoch": 0.5428843248083273, "grad_norm": 0.9684694095905027, "learning_rate": 9.099232812263436e-06, "loss": 0.8419, "step": 6063 }, { "epoch": 0.5429738653534053, "grad_norm": 1.0055519255021597, "learning_rate": 9.096344351717528e-06, "loss": 0.8136, "step": 6064 }, { "epoch": 0.5430634058984835, "grad_norm": 1.0303591059031552, "learning_rate": 9.093455967184188e-06, "loss": 0.7835, "step": 6065 }, { "epoch": 0.5431529464435615, "grad_norm": 0.9206225357158364, "learning_rate": 9.090567658906381e-06, "loss": 0.8127, "step": 6066 }, { "epoch": 0.5432424869886395, "grad_norm": 1.5089140014009896, "learning_rate": 9.087679427127059e-06, "loss": 0.8357, "step": 6067 }, { "epoch": 0.5433320275337176, "grad_norm": 0.88387917478891, "learning_rate": 9.084791272089167e-06, "loss": 0.7903, "step": 6068 }, { "epoch": 0.5434215680787957, "grad_norm": 1.2478442414487725, "learning_rate": 9.081903194035653e-06, "loss": 0.8463, "step": 6069 }, { "epoch": 0.5435111086238738, "grad_norm": 0.9586143254105307, "learning_rate": 9.079015193209447e-06, "loss": 0.8077, "step": 6070 }, { "epoch": 0.5436006491689518, "grad_norm": 0.918830581257915, "learning_rate": 9.076127269853486e-06, "loss": 0.7762, "step": 6071 }, { "epoch": 0.5436901897140299, "grad_norm": 0.9791065848298622, "learning_rate": 9.07323942421069e-06, "loss": 0.8571, "step": 6072 }, { "epoch": 0.543779730259108, "grad_norm": 1.1067430162198335, "learning_rate": 9.07035165652397e-06, "loss": 0.8555, "step": 6073 }, { "epoch": 0.543869270804186, "grad_norm": 0.9142661318842814, "learning_rate": 9.06746396703624e-06, "loss": 0.8131, "step": 6074 }, { "epoch": 0.543958811349264, "grad_norm": 0.8827728075773811, "learning_rate": 9.064576355990401e-06, "loss": 0.7949, "step": 6075 }, { "epoch": 0.5440483518943422, "grad_norm": 0.9554625020502936, "learning_rate": 9.06168882362935e-06, "loss": 0.7611, "step": 6076 }, { "epoch": 0.5441378924394202, "grad_norm": 1.16519233349128, "learning_rate": 9.058801370195985e-06, "loss": 0.8029, "step": 6077 }, { "epoch": 0.5442274329844983, "grad_norm": 0.9267329252653302, "learning_rate": 9.055913995933174e-06, "loss": 0.838, "step": 6078 }, { "epoch": 0.5443169735295763, "grad_norm": 1.0075344199258005, "learning_rate": 9.053026701083801e-06, "loss": 0.783, "step": 6079 }, { "epoch": 0.5444065140746545, "grad_norm": 1.0423944995912053, "learning_rate": 9.050139485890738e-06, "loss": 0.8357, "step": 6080 }, { "epoch": 0.5444960546197325, "grad_norm": 1.132718910259074, "learning_rate": 9.047252350596846e-06, "loss": 0.8228, "step": 6081 }, { "epoch": 0.5445855951648105, "grad_norm": 0.9703651213886199, "learning_rate": 9.044365295444982e-06, "loss": 0.7845, "step": 6082 }, { "epoch": 0.5446751357098887, "grad_norm": 0.9247358938486525, "learning_rate": 9.041478320677998e-06, "loss": 0.8411, "step": 6083 }, { "epoch": 0.5447646762549667, "grad_norm": 0.8687925445298574, "learning_rate": 9.03859142653873e-06, "loss": 0.8731, "step": 6084 }, { "epoch": 0.5448542168000448, "grad_norm": 1.00217629126635, "learning_rate": 9.035704613270017e-06, "loss": 0.8484, "step": 6085 }, { "epoch": 0.5449437573451228, "grad_norm": 0.9221122411670781, "learning_rate": 9.032817881114693e-06, "loss": 0.866, "step": 6086 }, { "epoch": 0.5450332978902009, "grad_norm": 0.880489882005139, "learning_rate": 9.029931230315576e-06, "loss": 0.8278, "step": 6087 }, { "epoch": 0.545122838435279, "grad_norm": 1.1401667376112792, "learning_rate": 9.027044661115486e-06, "loss": 0.8377, "step": 6088 }, { "epoch": 0.545212378980357, "grad_norm": 0.9991133873198594, "learning_rate": 9.024158173757224e-06, "loss": 0.788, "step": 6089 }, { "epoch": 0.5453019195254352, "grad_norm": 0.9579178704542471, "learning_rate": 9.021271768483598e-06, "loss": 0.8386, "step": 6090 }, { "epoch": 0.5453914600705132, "grad_norm": 0.9786053192302293, "learning_rate": 9.018385445537398e-06, "loss": 0.7855, "step": 6091 }, { "epoch": 0.5454810006155912, "grad_norm": 0.9813045172794426, "learning_rate": 9.01549920516142e-06, "loss": 0.8037, "step": 6092 }, { "epoch": 0.5455705411606693, "grad_norm": 0.9362038553652263, "learning_rate": 9.012613047598438e-06, "loss": 0.8303, "step": 6093 }, { "epoch": 0.5456600817057474, "grad_norm": 0.9616343493600813, "learning_rate": 9.009726973091234e-06, "loss": 0.835, "step": 6094 }, { "epoch": 0.5457496222508255, "grad_norm": 0.8928570254426087, "learning_rate": 9.006840981882565e-06, "loss": 0.8472, "step": 6095 }, { "epoch": 0.5458391627959035, "grad_norm": 0.9172940273859472, "learning_rate": 9.003955074215198e-06, "loss": 0.8819, "step": 6096 }, { "epoch": 0.5459287033409815, "grad_norm": 0.891450200365495, "learning_rate": 9.001069250331881e-06, "loss": 0.8112, "step": 6097 }, { "epoch": 0.5460182438860597, "grad_norm": 0.8892763216395327, "learning_rate": 8.998183510475366e-06, "loss": 0.8281, "step": 6098 }, { "epoch": 0.5461077844311377, "grad_norm": 1.134286937055993, "learning_rate": 8.995297854888394e-06, "loss": 0.8206, "step": 6099 }, { "epoch": 0.5461973249762158, "grad_norm": 0.9333884074444031, "learning_rate": 8.992412283813688e-06, "loss": 0.778, "step": 6100 }, { "epoch": 0.5462868655212939, "grad_norm": 0.9858286755603527, "learning_rate": 8.989526797493977e-06, "loss": 0.8396, "step": 6101 }, { "epoch": 0.546376406066372, "grad_norm": 1.035325237236071, "learning_rate": 8.986641396171978e-06, "loss": 0.8274, "step": 6102 }, { "epoch": 0.54646594661145, "grad_norm": 0.9934779067972035, "learning_rate": 8.983756080090402e-06, "loss": 0.8527, "step": 6103 }, { "epoch": 0.546555487156528, "grad_norm": 0.9183399284842207, "learning_rate": 8.980870849491955e-06, "loss": 0.8117, "step": 6104 }, { "epoch": 0.5466450277016062, "grad_norm": 0.9335294561635452, "learning_rate": 8.977985704619334e-06, "loss": 0.8342, "step": 6105 }, { "epoch": 0.5467345682466842, "grad_norm": 1.2339940146158423, "learning_rate": 8.975100645715221e-06, "loss": 0.8056, "step": 6106 }, { "epoch": 0.5468241087917622, "grad_norm": 0.9915305601950564, "learning_rate": 8.972215673022303e-06, "loss": 0.7813, "step": 6107 }, { "epoch": 0.5469136493368404, "grad_norm": 0.9385719164843029, "learning_rate": 8.969330786783253e-06, "loss": 0.7597, "step": 6108 }, { "epoch": 0.5470031898819184, "grad_norm": 0.9087642302529276, "learning_rate": 8.966445987240738e-06, "loss": 0.821, "step": 6109 }, { "epoch": 0.5470927304269965, "grad_norm": 0.9145735034292245, "learning_rate": 8.963561274637423e-06, "loss": 0.8396, "step": 6110 }, { "epoch": 0.5471822709720745, "grad_norm": 0.9826819093462108, "learning_rate": 8.960676649215951e-06, "loss": 0.824, "step": 6111 }, { "epoch": 0.5472718115171527, "grad_norm": 1.1327670985795641, "learning_rate": 8.95779211121897e-06, "loss": 0.813, "step": 6112 }, { "epoch": 0.5473613520622307, "grad_norm": 0.890713563932156, "learning_rate": 8.954907660889126e-06, "loss": 0.7882, "step": 6113 }, { "epoch": 0.5474508926073087, "grad_norm": 1.0619352154870587, "learning_rate": 8.952023298469042e-06, "loss": 0.8295, "step": 6114 }, { "epoch": 0.5475404331523868, "grad_norm": 0.9567964901525606, "learning_rate": 8.949139024201343e-06, "loss": 0.8062, "step": 6115 }, { "epoch": 0.5476299736974649, "grad_norm": 0.927551938283949, "learning_rate": 8.946254838328647e-06, "loss": 0.8098, "step": 6116 }, { "epoch": 0.547719514242543, "grad_norm": 1.006534327834802, "learning_rate": 8.943370741093558e-06, "loss": 0.7956, "step": 6117 }, { "epoch": 0.547809054787621, "grad_norm": 0.9596685327015084, "learning_rate": 8.940486732738677e-06, "loss": 0.8554, "step": 6118 }, { "epoch": 0.5478985953326991, "grad_norm": 1.1857216365890464, "learning_rate": 8.937602813506602e-06, "loss": 0.8046, "step": 6119 }, { "epoch": 0.5479881358777772, "grad_norm": 1.0159856478786566, "learning_rate": 8.934718983639916e-06, "loss": 0.8431, "step": 6120 }, { "epoch": 0.5480776764228552, "grad_norm": 0.8572100177979363, "learning_rate": 8.9318352433812e-06, "loss": 0.8124, "step": 6121 }, { "epoch": 0.5481672169679332, "grad_norm": 0.9531498731708228, "learning_rate": 8.928951592973019e-06, "loss": 0.8534, "step": 6122 }, { "epoch": 0.5482567575130114, "grad_norm": 0.9002591951125026, "learning_rate": 8.926068032657941e-06, "loss": 0.8072, "step": 6123 }, { "epoch": 0.5483462980580894, "grad_norm": 1.0402074850858871, "learning_rate": 8.923184562678518e-06, "loss": 0.8228, "step": 6124 }, { "epoch": 0.5484358386031675, "grad_norm": 0.8654098910589669, "learning_rate": 8.920301183277302e-06, "loss": 0.7496, "step": 6125 }, { "epoch": 0.5485253791482456, "grad_norm": 0.920346943428762, "learning_rate": 8.917417894696836e-06, "loss": 0.7711, "step": 6126 }, { "epoch": 0.5486149196933237, "grad_norm": 0.9643760138678554, "learning_rate": 8.914534697179645e-06, "loss": 0.7897, "step": 6127 }, { "epoch": 0.5487044602384017, "grad_norm": 0.9221495857476754, "learning_rate": 8.911651590968259e-06, "loss": 0.8137, "step": 6128 }, { "epoch": 0.5487940007834797, "grad_norm": 1.4247809736387251, "learning_rate": 8.908768576305194e-06, "loss": 0.8431, "step": 6129 }, { "epoch": 0.5488835413285579, "grad_norm": 0.9022339320391835, "learning_rate": 8.905885653432958e-06, "loss": 0.7817, "step": 6130 }, { "epoch": 0.5489730818736359, "grad_norm": 0.9327807027686045, "learning_rate": 8.90300282259406e-06, "loss": 0.7895, "step": 6131 }, { "epoch": 0.549062622418714, "grad_norm": 1.0316982796621155, "learning_rate": 8.90012008403099e-06, "loss": 0.8055, "step": 6132 }, { "epoch": 0.549152162963792, "grad_norm": 0.956859619071835, "learning_rate": 8.897237437986232e-06, "loss": 0.8247, "step": 6133 }, { "epoch": 0.5492417035088701, "grad_norm": 1.0195351839694258, "learning_rate": 8.894354884702266e-06, "loss": 0.8206, "step": 6134 }, { "epoch": 0.5493312440539482, "grad_norm": 1.0926993323248415, "learning_rate": 8.891472424421567e-06, "loss": 0.833, "step": 6135 }, { "epoch": 0.5494207845990262, "grad_norm": 0.9933041596725648, "learning_rate": 8.888590057386593e-06, "loss": 0.8707, "step": 6136 }, { "epoch": 0.5495103251441044, "grad_norm": 1.0408213093660634, "learning_rate": 8.885707783839805e-06, "loss": 0.8288, "step": 6137 }, { "epoch": 0.5495998656891824, "grad_norm": 0.9655884580923807, "learning_rate": 8.882825604023644e-06, "loss": 0.8717, "step": 6138 }, { "epoch": 0.5496894062342604, "grad_norm": 0.9274310023582122, "learning_rate": 8.879943518180551e-06, "loss": 0.8129, "step": 6139 }, { "epoch": 0.5497789467793385, "grad_norm": 0.9744971834880503, "learning_rate": 8.877061526552961e-06, "loss": 0.8403, "step": 6140 }, { "epoch": 0.5498684873244166, "grad_norm": 0.9567821253355189, "learning_rate": 8.874179629383298e-06, "loss": 0.8345, "step": 6141 }, { "epoch": 0.5499580278694947, "grad_norm": 0.8831138620077232, "learning_rate": 8.871297826913974e-06, "loss": 0.8022, "step": 6142 }, { "epoch": 0.5500475684145727, "grad_norm": 0.9677391175527981, "learning_rate": 8.8684161193874e-06, "loss": 0.8022, "step": 6143 }, { "epoch": 0.5501371089596508, "grad_norm": 0.8662411398480176, "learning_rate": 8.865534507045974e-06, "loss": 0.809, "step": 6144 }, { "epoch": 0.5502266495047289, "grad_norm": 0.9449368851220822, "learning_rate": 8.862652990132085e-06, "loss": 0.8265, "step": 6145 }, { "epoch": 0.5503161900498069, "grad_norm": 0.8688978157837339, "learning_rate": 8.859771568888126e-06, "loss": 0.8304, "step": 6146 }, { "epoch": 0.550405730594885, "grad_norm": 0.924076373047901, "learning_rate": 8.856890243556463e-06, "loss": 0.7413, "step": 6147 }, { "epoch": 0.5504952711399631, "grad_norm": 1.170850785514499, "learning_rate": 8.854009014379472e-06, "loss": 0.8439, "step": 6148 }, { "epoch": 0.5505848116850411, "grad_norm": 0.9227805288993206, "learning_rate": 8.851127881599504e-06, "loss": 0.8131, "step": 6149 }, { "epoch": 0.5506743522301192, "grad_norm": 0.9903403612308835, "learning_rate": 8.848246845458915e-06, "loss": 0.8405, "step": 6150 }, { "epoch": 0.5507638927751972, "grad_norm": 1.0264125070402255, "learning_rate": 8.845365906200048e-06, "loss": 0.8616, "step": 6151 }, { "epoch": 0.5508534333202754, "grad_norm": 1.0278846394831378, "learning_rate": 8.842485064065238e-06, "loss": 0.838, "step": 6152 }, { "epoch": 0.5509429738653534, "grad_norm": 0.9603266249808516, "learning_rate": 8.839604319296815e-06, "loss": 0.8271, "step": 6153 }, { "epoch": 0.5510325144104314, "grad_norm": 0.8748500258920642, "learning_rate": 8.836723672137096e-06, "loss": 0.8221, "step": 6154 }, { "epoch": 0.5511220549555096, "grad_norm": 0.9306339854890584, "learning_rate": 8.833843122828388e-06, "loss": 0.7819, "step": 6155 }, { "epoch": 0.5512115955005876, "grad_norm": 0.9823302735405507, "learning_rate": 8.830962671612998e-06, "loss": 0.851, "step": 6156 }, { "epoch": 0.5513011360456657, "grad_norm": 0.8791709851271489, "learning_rate": 8.828082318733216e-06, "loss": 0.8188, "step": 6157 }, { "epoch": 0.5513906765907437, "grad_norm": 0.9929130025992706, "learning_rate": 8.825202064431332e-06, "loss": 0.8408, "step": 6158 }, { "epoch": 0.5514802171358218, "grad_norm": 0.9532564623484906, "learning_rate": 8.822321908949627e-06, "loss": 0.8232, "step": 6159 }, { "epoch": 0.5515697576808999, "grad_norm": 0.959490306887631, "learning_rate": 8.819441852530358e-06, "loss": 0.859, "step": 6160 }, { "epoch": 0.5516592982259779, "grad_norm": 0.9786472754942536, "learning_rate": 8.816561895415796e-06, "loss": 0.8591, "step": 6161 }, { "epoch": 0.5517488387710561, "grad_norm": 0.9461115139049723, "learning_rate": 8.81368203784819e-06, "loss": 0.7565, "step": 6162 }, { "epoch": 0.5518383793161341, "grad_norm": 0.978793636802868, "learning_rate": 8.810802280069786e-06, "loss": 0.829, "step": 6163 }, { "epoch": 0.5519279198612121, "grad_norm": 0.9396127601080151, "learning_rate": 8.807922622322817e-06, "loss": 0.8325, "step": 6164 }, { "epoch": 0.5520174604062902, "grad_norm": 0.9049368968935767, "learning_rate": 8.805043064849519e-06, "loss": 0.7863, "step": 6165 }, { "epoch": 0.5521070009513683, "grad_norm": 0.9931101610609135, "learning_rate": 8.802163607892098e-06, "loss": 0.8186, "step": 6166 }, { "epoch": 0.5521965414964464, "grad_norm": 0.9630011474588609, "learning_rate": 8.79928425169277e-06, "loss": 0.8679, "step": 6167 }, { "epoch": 0.5522860820415244, "grad_norm": 1.1174275411614434, "learning_rate": 8.79640499649374e-06, "loss": 0.8462, "step": 6168 }, { "epoch": 0.5523756225866024, "grad_norm": 1.0085656191663102, "learning_rate": 8.793525842537201e-06, "loss": 0.8418, "step": 6169 }, { "epoch": 0.5524651631316806, "grad_norm": 0.8877752143694859, "learning_rate": 8.790646790065337e-06, "loss": 0.85, "step": 6170 }, { "epoch": 0.5525547036767586, "grad_norm": 0.9303797202738328, "learning_rate": 8.787767839320323e-06, "loss": 0.7623, "step": 6171 }, { "epoch": 0.5526442442218367, "grad_norm": 1.1451139285953147, "learning_rate": 8.784888990544327e-06, "loss": 0.8153, "step": 6172 }, { "epoch": 0.5527337847669148, "grad_norm": 1.2084127994541511, "learning_rate": 8.78201024397951e-06, "loss": 0.7856, "step": 6173 }, { "epoch": 0.5528233253119929, "grad_norm": 0.9025103880629155, "learning_rate": 8.779131599868022e-06, "loss": 0.8725, "step": 6174 }, { "epoch": 0.5529128658570709, "grad_norm": 0.9286201240276614, "learning_rate": 8.776253058452006e-06, "loss": 0.8151, "step": 6175 }, { "epoch": 0.5530024064021489, "grad_norm": 0.926513286270532, "learning_rate": 8.773374619973598e-06, "loss": 0.8742, "step": 6176 }, { "epoch": 0.5530919469472271, "grad_norm": 0.8748821873801832, "learning_rate": 8.770496284674915e-06, "loss": 0.8237, "step": 6177 }, { "epoch": 0.5531814874923051, "grad_norm": 1.1027547928068657, "learning_rate": 8.767618052798077e-06, "loss": 0.8009, "step": 6178 }, { "epoch": 0.5532710280373832, "grad_norm": 1.076919625435243, "learning_rate": 8.764739924585194e-06, "loss": 0.7971, "step": 6179 }, { "epoch": 0.5533605685824613, "grad_norm": 1.1749975923790266, "learning_rate": 8.761861900278365e-06, "loss": 0.8377, "step": 6180 }, { "epoch": 0.5534501091275393, "grad_norm": 1.010278443904513, "learning_rate": 8.75898398011968e-06, "loss": 0.842, "step": 6181 }, { "epoch": 0.5535396496726174, "grad_norm": 0.8683879065811744, "learning_rate": 8.756106164351214e-06, "loss": 0.825, "step": 6182 }, { "epoch": 0.5536291902176954, "grad_norm": 0.956779918576646, "learning_rate": 8.753228453215047e-06, "loss": 0.8426, "step": 6183 }, { "epoch": 0.5537187307627736, "grad_norm": 0.9092970863369139, "learning_rate": 8.750350846953235e-06, "loss": 0.8432, "step": 6184 }, { "epoch": 0.5538082713078516, "grad_norm": 0.9309671617713405, "learning_rate": 8.747473345807841e-06, "loss": 0.8066, "step": 6185 }, { "epoch": 0.5538978118529296, "grad_norm": 0.9785680066165925, "learning_rate": 8.744595950020907e-06, "loss": 0.8724, "step": 6186 }, { "epoch": 0.5539873523980077, "grad_norm": 0.8164713560957905, "learning_rate": 8.741718659834474e-06, "loss": 0.8189, "step": 6187 }, { "epoch": 0.5540768929430858, "grad_norm": 1.0056619059372478, "learning_rate": 8.738841475490563e-06, "loss": 0.8976, "step": 6188 }, { "epoch": 0.5541664334881639, "grad_norm": 0.882164065857934, "learning_rate": 8.735964397231199e-06, "loss": 0.7703, "step": 6189 }, { "epoch": 0.5542559740332419, "grad_norm": 1.1138187036940759, "learning_rate": 8.73308742529839e-06, "loss": 0.8019, "step": 6190 }, { "epoch": 0.55434551457832, "grad_norm": 0.9142994305137074, "learning_rate": 8.730210559934137e-06, "loss": 0.8373, "step": 6191 }, { "epoch": 0.5544350551233981, "grad_norm": 0.8813696840477887, "learning_rate": 8.72733380138044e-06, "loss": 0.8239, "step": 6192 }, { "epoch": 0.5545245956684761, "grad_norm": 0.9571352757648829, "learning_rate": 8.724457149879268e-06, "loss": 0.8438, "step": 6193 }, { "epoch": 0.5546141362135542, "grad_norm": 0.972279501527995, "learning_rate": 8.721580605672608e-06, "loss": 0.7665, "step": 6194 }, { "epoch": 0.5547036767586323, "grad_norm": 0.8714358931234844, "learning_rate": 8.71870416900242e-06, "loss": 0.8473, "step": 6195 }, { "epoch": 0.5547932173037103, "grad_norm": 0.8164731871718228, "learning_rate": 8.715827840110665e-06, "loss": 0.837, "step": 6196 }, { "epoch": 0.5548827578487884, "grad_norm": 0.9220563929238569, "learning_rate": 8.712951619239288e-06, "loss": 0.8068, "step": 6197 }, { "epoch": 0.5549722983938665, "grad_norm": 1.0476642672712828, "learning_rate": 8.710075506630223e-06, "loss": 0.8184, "step": 6198 }, { "epoch": 0.5550618389389446, "grad_norm": 0.9375961096878558, "learning_rate": 8.7071995025254e-06, "loss": 0.8266, "step": 6199 }, { "epoch": 0.5551513794840226, "grad_norm": 0.9012721110369738, "learning_rate": 8.704323607166747e-06, "loss": 0.807, "step": 6200 }, { "epoch": 0.5552409200291006, "grad_norm": 0.9201837888685435, "learning_rate": 8.701447820796169e-06, "loss": 0.862, "step": 6201 }, { "epoch": 0.5553304605741788, "grad_norm": 1.0076213703126056, "learning_rate": 8.698572143655568e-06, "loss": 0.8602, "step": 6202 }, { "epoch": 0.5554200011192568, "grad_norm": 0.9482457601212124, "learning_rate": 8.69569657598684e-06, "loss": 0.8085, "step": 6203 }, { "epoch": 0.5555095416643349, "grad_norm": 0.9478277847660875, "learning_rate": 8.692821118031864e-06, "loss": 0.8223, "step": 6204 }, { "epoch": 0.5555990822094129, "grad_norm": 0.9002180666735875, "learning_rate": 8.689945770032514e-06, "loss": 0.8501, "step": 6205 }, { "epoch": 0.555688622754491, "grad_norm": 0.9412683268785947, "learning_rate": 8.687070532230657e-06, "loss": 0.7888, "step": 6206 }, { "epoch": 0.5557781632995691, "grad_norm": 0.9272483580666643, "learning_rate": 8.684195404868149e-06, "loss": 0.8159, "step": 6207 }, { "epoch": 0.5558677038446471, "grad_norm": 0.9922701102619413, "learning_rate": 8.68132038818684e-06, "loss": 0.8347, "step": 6208 }, { "epoch": 0.5559572443897253, "grad_norm": 0.8911024837830945, "learning_rate": 8.67844548242856e-06, "loss": 0.8031, "step": 6209 }, { "epoch": 0.5560467849348033, "grad_norm": 0.8782596279670537, "learning_rate": 8.675570687835138e-06, "loss": 0.8612, "step": 6210 }, { "epoch": 0.5561363254798813, "grad_norm": 1.0878373284880813, "learning_rate": 8.672696004648391e-06, "loss": 0.7436, "step": 6211 }, { "epoch": 0.5562258660249594, "grad_norm": 1.0790367172382063, "learning_rate": 8.669821433110133e-06, "loss": 0.819, "step": 6212 }, { "epoch": 0.5563154065700375, "grad_norm": 0.9120162340914998, "learning_rate": 8.666946973462163e-06, "loss": 0.8582, "step": 6213 }, { "epoch": 0.5564049471151156, "grad_norm": 0.9198809554701761, "learning_rate": 8.664072625946271e-06, "loss": 0.8324, "step": 6214 }, { "epoch": 0.5564944876601936, "grad_norm": 0.9342727041133677, "learning_rate": 8.661198390804235e-06, "loss": 0.8583, "step": 6215 }, { "epoch": 0.5565840282052718, "grad_norm": 0.9073260590909366, "learning_rate": 8.658324268277827e-06, "loss": 0.8095, "step": 6216 }, { "epoch": 0.5566735687503498, "grad_norm": 1.0042041463652132, "learning_rate": 8.65545025860881e-06, "loss": 0.7842, "step": 6217 }, { "epoch": 0.5567631092954278, "grad_norm": 0.915306347297924, "learning_rate": 8.652576362038933e-06, "loss": 0.7842, "step": 6218 }, { "epoch": 0.5568526498405059, "grad_norm": 0.8691734379417024, "learning_rate": 8.649702578809948e-06, "loss": 0.8226, "step": 6219 }, { "epoch": 0.556942190385584, "grad_norm": 0.9779780690046866, "learning_rate": 8.646828909163574e-06, "loss": 0.7953, "step": 6220 }, { "epoch": 0.557031730930662, "grad_norm": 1.0478814875529516, "learning_rate": 8.643955353341546e-06, "loss": 0.8635, "step": 6221 }, { "epoch": 0.5571212714757401, "grad_norm": 0.8883695514944274, "learning_rate": 8.641081911585576e-06, "loss": 0.8042, "step": 6222 }, { "epoch": 0.5572108120208181, "grad_norm": 0.8839468462801209, "learning_rate": 8.638208584137366e-06, "loss": 0.7905, "step": 6223 }, { "epoch": 0.5573003525658963, "grad_norm": 1.0447527429385743, "learning_rate": 8.63533537123861e-06, "loss": 0.8512, "step": 6224 }, { "epoch": 0.5573898931109743, "grad_norm": 0.9852439677192617, "learning_rate": 8.632462273131002e-06, "loss": 0.8749, "step": 6225 }, { "epoch": 0.5574794336560523, "grad_norm": 0.9084443950532312, "learning_rate": 8.629589290056207e-06, "loss": 0.795, "step": 6226 }, { "epoch": 0.5575689742011305, "grad_norm": 0.9132888277667458, "learning_rate": 8.626716422255894e-06, "loss": 0.798, "step": 6227 }, { "epoch": 0.5576585147462085, "grad_norm": 0.9670570095211392, "learning_rate": 8.62384366997172e-06, "loss": 0.8315, "step": 6228 }, { "epoch": 0.5577480552912866, "grad_norm": 0.9864812394570586, "learning_rate": 8.620971033445335e-06, "loss": 0.7924, "step": 6229 }, { "epoch": 0.5578375958363646, "grad_norm": 0.980799412429873, "learning_rate": 8.618098512918373e-06, "loss": 0.8291, "step": 6230 }, { "epoch": 0.5579271363814428, "grad_norm": 0.884826647902106, "learning_rate": 8.615226108632461e-06, "loss": 0.8397, "step": 6231 }, { "epoch": 0.5580166769265208, "grad_norm": 1.1153769429476674, "learning_rate": 8.612353820829211e-06, "loss": 0.838, "step": 6232 }, { "epoch": 0.5581062174715988, "grad_norm": 1.0957122488552455, "learning_rate": 8.60948164975024e-06, "loss": 0.8462, "step": 6233 }, { "epoch": 0.558195758016677, "grad_norm": 0.9341638784553586, "learning_rate": 8.60660959563714e-06, "loss": 0.818, "step": 6234 }, { "epoch": 0.558285298561755, "grad_norm": 1.0499033901854964, "learning_rate": 8.6037376587315e-06, "loss": 0.765, "step": 6235 }, { "epoch": 0.558374839106833, "grad_norm": 0.947506906884927, "learning_rate": 8.600865839274902e-06, "loss": 0.7735, "step": 6236 }, { "epoch": 0.5584643796519111, "grad_norm": 0.9733665251533825, "learning_rate": 8.597994137508907e-06, "loss": 0.8701, "step": 6237 }, { "epoch": 0.5585539201969892, "grad_norm": 1.0607018295729944, "learning_rate": 8.595122553675075e-06, "loss": 0.8507, "step": 6238 }, { "epoch": 0.5586434607420673, "grad_norm": 1.0020744151736605, "learning_rate": 8.592251088014956e-06, "loss": 0.8464, "step": 6239 }, { "epoch": 0.5587330012871453, "grad_norm": 1.1974872131823473, "learning_rate": 8.589379740770091e-06, "loss": 0.8381, "step": 6240 }, { "epoch": 0.5588225418322234, "grad_norm": 0.989551388006751, "learning_rate": 8.586508512182006e-06, "loss": 0.8429, "step": 6241 }, { "epoch": 0.5589120823773015, "grad_norm": 1.011662099827941, "learning_rate": 8.583637402492216e-06, "loss": 0.8557, "step": 6242 }, { "epoch": 0.5590016229223795, "grad_norm": 0.9977999435781696, "learning_rate": 8.580766411942232e-06, "loss": 0.8312, "step": 6243 }, { "epoch": 0.5590911634674576, "grad_norm": 1.048007158168493, "learning_rate": 8.577895540773553e-06, "loss": 0.8197, "step": 6244 }, { "epoch": 0.5591807040125357, "grad_norm": 1.1161325246222016, "learning_rate": 8.575024789227666e-06, "loss": 0.854, "step": 6245 }, { "epoch": 0.5592702445576138, "grad_norm": 0.8877063141808497, "learning_rate": 8.572154157546051e-06, "loss": 0.7914, "step": 6246 }, { "epoch": 0.5593597851026918, "grad_norm": 0.9166527406783664, "learning_rate": 8.569283645970178e-06, "loss": 0.8468, "step": 6247 }, { "epoch": 0.5594493256477698, "grad_norm": 0.920340771529597, "learning_rate": 8.5664132547415e-06, "loss": 0.8366, "step": 6248 }, { "epoch": 0.559538866192848, "grad_norm": 0.9708546225007653, "learning_rate": 8.563542984101467e-06, "loss": 0.8107, "step": 6249 }, { "epoch": 0.559628406737926, "grad_norm": 1.325946252370619, "learning_rate": 8.560672834291518e-06, "loss": 0.853, "step": 6250 }, { "epoch": 0.5597179472830041, "grad_norm": 0.9623276638039914, "learning_rate": 8.557802805553076e-06, "loss": 0.7945, "step": 6251 }, { "epoch": 0.5598074878280822, "grad_norm": 0.8937795643793636, "learning_rate": 8.554932898127571e-06, "loss": 0.8265, "step": 6252 }, { "epoch": 0.5598970283731602, "grad_norm": 0.9130919495370106, "learning_rate": 8.552063112256395e-06, "loss": 0.8442, "step": 6253 }, { "epoch": 0.5599865689182383, "grad_norm": 0.995423876481945, "learning_rate": 8.549193448180952e-06, "loss": 0.8191, "step": 6254 }, { "epoch": 0.5600761094633163, "grad_norm": 0.8570026182790849, "learning_rate": 8.546323906142631e-06, "loss": 0.8437, "step": 6255 }, { "epoch": 0.5601656500083945, "grad_norm": 0.9476637818961786, "learning_rate": 8.543454486382803e-06, "loss": 0.7945, "step": 6256 }, { "epoch": 0.5602551905534725, "grad_norm": 0.9342135221862148, "learning_rate": 8.540585189142836e-06, "loss": 0.8631, "step": 6257 }, { "epoch": 0.5603447310985505, "grad_norm": 1.2577540954382154, "learning_rate": 8.537716014664095e-06, "loss": 0.8573, "step": 6258 }, { "epoch": 0.5604342716436286, "grad_norm": 0.8428549564493989, "learning_rate": 8.53484696318791e-06, "loss": 0.8216, "step": 6259 }, { "epoch": 0.5605238121887067, "grad_norm": 1.086904038516366, "learning_rate": 8.531978034955625e-06, "loss": 0.8018, "step": 6260 }, { "epoch": 0.5606133527337848, "grad_norm": 1.0124644505079914, "learning_rate": 8.529109230208565e-06, "loss": 0.8526, "step": 6261 }, { "epoch": 0.5607028932788628, "grad_norm": 0.8748819294738786, "learning_rate": 8.526240549188044e-06, "loss": 0.8389, "step": 6262 }, { "epoch": 0.560792433823941, "grad_norm": 0.9262398437097885, "learning_rate": 8.523371992135367e-06, "loss": 0.8036, "step": 6263 }, { "epoch": 0.560881974369019, "grad_norm": 0.9650725213494799, "learning_rate": 8.520503559291824e-06, "loss": 0.8757, "step": 6264 }, { "epoch": 0.560971514914097, "grad_norm": 0.9676504460598785, "learning_rate": 8.517635250898698e-06, "loss": 0.8239, "step": 6265 }, { "epoch": 0.5610610554591751, "grad_norm": 1.0244553865905783, "learning_rate": 8.514767067197267e-06, "loss": 0.866, "step": 6266 }, { "epoch": 0.5611505960042532, "grad_norm": 1.0292286637759693, "learning_rate": 8.511899008428789e-06, "loss": 0.9125, "step": 6267 }, { "epoch": 0.5612401365493312, "grad_norm": 1.0202115555546305, "learning_rate": 8.50903107483452e-06, "loss": 0.7988, "step": 6268 }, { "epoch": 0.5613296770944093, "grad_norm": 0.9343298931208839, "learning_rate": 8.506163266655696e-06, "loss": 0.8864, "step": 6269 }, { "epoch": 0.5614192176394874, "grad_norm": 0.9522285698390794, "learning_rate": 8.503295584133551e-06, "loss": 0.7618, "step": 6270 }, { "epoch": 0.5615087581845655, "grad_norm": 1.051380972133942, "learning_rate": 8.500428027509303e-06, "loss": 0.7719, "step": 6271 }, { "epoch": 0.5615982987296435, "grad_norm": 0.9333289788582795, "learning_rate": 8.497560597024161e-06, "loss": 0.7471, "step": 6272 }, { "epoch": 0.5616878392747215, "grad_norm": 0.9668336778699221, "learning_rate": 8.494693292919329e-06, "loss": 0.8639, "step": 6273 }, { "epoch": 0.5617773798197997, "grad_norm": 1.054468513323391, "learning_rate": 8.491826115435991e-06, "loss": 0.8597, "step": 6274 }, { "epoch": 0.5618669203648777, "grad_norm": 0.9438055834324189, "learning_rate": 8.488959064815326e-06, "loss": 0.8212, "step": 6275 }, { "epoch": 0.5619564609099558, "grad_norm": 0.8877640981945504, "learning_rate": 8.486092141298499e-06, "loss": 0.7586, "step": 6276 }, { "epoch": 0.5620460014550338, "grad_norm": 0.9028159078681367, "learning_rate": 8.483225345126668e-06, "loss": 0.8336, "step": 6277 }, { "epoch": 0.562135542000112, "grad_norm": 0.9892478788835701, "learning_rate": 8.480358676540976e-06, "loss": 0.85, "step": 6278 }, { "epoch": 0.56222508254519, "grad_norm": 0.8815015426588725, "learning_rate": 8.477492135782567e-06, "loss": 0.8169, "step": 6279 }, { "epoch": 0.562314623090268, "grad_norm": 0.961130718681417, "learning_rate": 8.47462572309255e-06, "loss": 0.8402, "step": 6280 }, { "epoch": 0.5624041636353462, "grad_norm": 0.8992772068149755, "learning_rate": 8.471759438712052e-06, "loss": 0.7792, "step": 6281 }, { "epoch": 0.5624937041804242, "grad_norm": 1.1618372639309393, "learning_rate": 8.468893282882167e-06, "loss": 0.8317, "step": 6282 }, { "epoch": 0.5625832447255023, "grad_norm": 1.1293455553953806, "learning_rate": 8.466027255843991e-06, "loss": 0.7772, "step": 6283 }, { "epoch": 0.5626727852705803, "grad_norm": 0.919329280776228, "learning_rate": 8.4631613578386e-06, "loss": 0.7939, "step": 6284 }, { "epoch": 0.5627623258156584, "grad_norm": 1.1266022537183649, "learning_rate": 8.460295589107075e-06, "loss": 0.7935, "step": 6285 }, { "epoch": 0.5628518663607365, "grad_norm": 0.9923958928527661, "learning_rate": 8.457429949890463e-06, "loss": 0.8228, "step": 6286 }, { "epoch": 0.5629414069058145, "grad_norm": 1.1803432317636555, "learning_rate": 8.454564440429816e-06, "loss": 0.9026, "step": 6287 }, { "epoch": 0.5630309474508927, "grad_norm": 1.0214736351306875, "learning_rate": 8.451699060966174e-06, "loss": 0.8211, "step": 6288 }, { "epoch": 0.5631204879959707, "grad_norm": 1.0469931734583096, "learning_rate": 8.44883381174056e-06, "loss": 0.826, "step": 6289 }, { "epoch": 0.5632100285410487, "grad_norm": 0.9813633606361105, "learning_rate": 8.445968692993993e-06, "loss": 0.765, "step": 6290 }, { "epoch": 0.5632995690861268, "grad_norm": 0.8924360160421672, "learning_rate": 8.443103704967474e-06, "loss": 0.8062, "step": 6291 }, { "epoch": 0.5633891096312049, "grad_norm": 1.069047030585701, "learning_rate": 8.440238847901996e-06, "loss": 0.8474, "step": 6292 }, { "epoch": 0.563478650176283, "grad_norm": 1.155684157588797, "learning_rate": 8.437374122038546e-06, "loss": 0.8194, "step": 6293 }, { "epoch": 0.563568190721361, "grad_norm": 1.0725241323363837, "learning_rate": 8.434509527618092e-06, "loss": 0.8584, "step": 6294 }, { "epoch": 0.563657731266439, "grad_norm": 1.2565606695097087, "learning_rate": 8.431645064881594e-06, "loss": 0.8313, "step": 6295 }, { "epoch": 0.5637472718115172, "grad_norm": 1.0805780074786737, "learning_rate": 8.428780734070006e-06, "loss": 0.8334, "step": 6296 }, { "epoch": 0.5638368123565952, "grad_norm": 0.8878792445544832, "learning_rate": 8.42591653542426e-06, "loss": 0.806, "step": 6297 }, { "epoch": 0.5639263529016733, "grad_norm": 0.9256005444527914, "learning_rate": 8.423052469185286e-06, "loss": 0.8193, "step": 6298 }, { "epoch": 0.5640158934467514, "grad_norm": 0.9034716500111684, "learning_rate": 8.420188535593996e-06, "loss": 0.8269, "step": 6299 }, { "epoch": 0.5641054339918294, "grad_norm": 1.0594733404489611, "learning_rate": 8.417324734891301e-06, "loss": 0.8821, "step": 6300 }, { "epoch": 0.5641949745369075, "grad_norm": 0.8624414916053894, "learning_rate": 8.414461067318095e-06, "loss": 0.8026, "step": 6301 }, { "epoch": 0.5642845150819855, "grad_norm": 0.9839236386878734, "learning_rate": 8.411597533115254e-06, "loss": 0.8079, "step": 6302 }, { "epoch": 0.5643740556270637, "grad_norm": 0.9478835720582433, "learning_rate": 8.408734132523652e-06, "loss": 0.766, "step": 6303 }, { "epoch": 0.5644635961721417, "grad_norm": 0.9419887145719844, "learning_rate": 8.405870865784151e-06, "loss": 0.7744, "step": 6304 }, { "epoch": 0.5645531367172197, "grad_norm": 1.0145689506221294, "learning_rate": 8.403007733137594e-06, "loss": 0.7575, "step": 6305 }, { "epoch": 0.5646426772622979, "grad_norm": 1.1383161605710237, "learning_rate": 8.400144734824826e-06, "loss": 0.8169, "step": 6306 }, { "epoch": 0.5647322178073759, "grad_norm": 0.9659362464312885, "learning_rate": 8.39728187108667e-06, "loss": 0.8482, "step": 6307 }, { "epoch": 0.564821758352454, "grad_norm": 0.9642635440547264, "learning_rate": 8.394419142163939e-06, "loss": 0.8089, "step": 6308 }, { "epoch": 0.564911298897532, "grad_norm": 0.879275208492966, "learning_rate": 8.391556548297436e-06, "loss": 0.8406, "step": 6309 }, { "epoch": 0.5650008394426101, "grad_norm": 0.8536728031521994, "learning_rate": 8.388694089727959e-06, "loss": 0.7758, "step": 6310 }, { "epoch": 0.5650903799876882, "grad_norm": 0.9331503864789821, "learning_rate": 8.385831766696279e-06, "loss": 0.8159, "step": 6311 }, { "epoch": 0.5651799205327662, "grad_norm": 0.8629547036750387, "learning_rate": 8.382969579443177e-06, "loss": 0.8261, "step": 6312 }, { "epoch": 0.5652694610778443, "grad_norm": 0.9001896629068947, "learning_rate": 8.380107528209399e-06, "loss": 0.8106, "step": 6313 }, { "epoch": 0.5653590016229224, "grad_norm": 0.9906884456529951, "learning_rate": 8.377245613235699e-06, "loss": 0.7837, "step": 6314 }, { "epoch": 0.5654485421680004, "grad_norm": 0.9728225599627576, "learning_rate": 8.374383834762808e-06, "loss": 0.8855, "step": 6315 }, { "epoch": 0.5655380827130785, "grad_norm": 0.9609543119900488, "learning_rate": 8.371522193031454e-06, "loss": 0.8275, "step": 6316 }, { "epoch": 0.5656276232581566, "grad_norm": 0.8921145513052312, "learning_rate": 8.368660688282345e-06, "loss": 0.8588, "step": 6317 }, { "epoch": 0.5657171638032347, "grad_norm": 0.8935251161290556, "learning_rate": 8.365799320756187e-06, "loss": 0.8146, "step": 6318 }, { "epoch": 0.5658067043483127, "grad_norm": 0.9750701636409439, "learning_rate": 8.362938090693659e-06, "loss": 0.8457, "step": 6319 }, { "epoch": 0.5658962448933907, "grad_norm": 0.9602093286477318, "learning_rate": 8.360076998335447e-06, "loss": 0.8957, "step": 6320 }, { "epoch": 0.5659857854384689, "grad_norm": 1.0545457341150943, "learning_rate": 8.357216043922213e-06, "loss": 0.8902, "step": 6321 }, { "epoch": 0.5660753259835469, "grad_norm": 0.9668876559946807, "learning_rate": 8.354355227694612e-06, "loss": 0.8031, "step": 6322 }, { "epoch": 0.566164866528625, "grad_norm": 1.0449311481568286, "learning_rate": 8.35149454989329e-06, "loss": 0.8825, "step": 6323 }, { "epoch": 0.5662544070737031, "grad_norm": 1.065080049911013, "learning_rate": 8.34863401075887e-06, "loss": 0.8464, "step": 6324 }, { "epoch": 0.5663439476187812, "grad_norm": 0.9333666840118511, "learning_rate": 8.345773610531977e-06, "loss": 0.8079, "step": 6325 }, { "epoch": 0.5664334881638592, "grad_norm": 0.9358923170069114, "learning_rate": 8.342913349453215e-06, "loss": 0.8982, "step": 6326 }, { "epoch": 0.5665230287089372, "grad_norm": 0.9468538845201266, "learning_rate": 8.340053227763185e-06, "loss": 0.8641, "step": 6327 }, { "epoch": 0.5666125692540154, "grad_norm": 0.9664123754273294, "learning_rate": 8.33719324570247e-06, "loss": 0.8562, "step": 6328 }, { "epoch": 0.5667021097990934, "grad_norm": 1.028823625710351, "learning_rate": 8.334333403511639e-06, "loss": 0.8183, "step": 6329 }, { "epoch": 0.5667916503441714, "grad_norm": 0.8809140045975414, "learning_rate": 8.331473701431252e-06, "loss": 0.8283, "step": 6330 }, { "epoch": 0.5668811908892495, "grad_norm": 0.9489945447787314, "learning_rate": 8.328614139701862e-06, "loss": 0.7886, "step": 6331 }, { "epoch": 0.5669707314343276, "grad_norm": 0.8947527328509491, "learning_rate": 8.325754718564e-06, "loss": 0.781, "step": 6332 }, { "epoch": 0.5670602719794057, "grad_norm": 0.9308975391859056, "learning_rate": 8.322895438258199e-06, "loss": 0.8244, "step": 6333 }, { "epoch": 0.5671498125244837, "grad_norm": 0.8941890182477599, "learning_rate": 8.32003629902497e-06, "loss": 0.781, "step": 6334 }, { "epoch": 0.5672393530695619, "grad_norm": 0.9445626037486022, "learning_rate": 8.317177301104811e-06, "loss": 0.8162, "step": 6335 }, { "epoch": 0.5673288936146399, "grad_norm": 0.9364256485348648, "learning_rate": 8.314318444738215e-06, "loss": 0.8367, "step": 6336 }, { "epoch": 0.5674184341597179, "grad_norm": 1.0186646115429732, "learning_rate": 8.311459730165656e-06, "loss": 0.8073, "step": 6337 }, { "epoch": 0.567507974704796, "grad_norm": 0.9269379907016525, "learning_rate": 8.308601157627602e-06, "loss": 0.857, "step": 6338 }, { "epoch": 0.5675975152498741, "grad_norm": 1.1458419880019848, "learning_rate": 8.305742727364513e-06, "loss": 0.7925, "step": 6339 }, { "epoch": 0.5676870557949522, "grad_norm": 0.9873054946086383, "learning_rate": 8.302884439616816e-06, "loss": 0.8747, "step": 6340 }, { "epoch": 0.5677765963400302, "grad_norm": 0.8765078432984779, "learning_rate": 8.300026294624954e-06, "loss": 0.8141, "step": 6341 }, { "epoch": 0.5678661368851083, "grad_norm": 0.9078290125772962, "learning_rate": 8.29716829262934e-06, "loss": 0.8448, "step": 6342 }, { "epoch": 0.5679556774301864, "grad_norm": 0.927880998843783, "learning_rate": 8.29431043387038e-06, "loss": 0.7881, "step": 6343 }, { "epoch": 0.5680452179752644, "grad_norm": 1.1669227884019917, "learning_rate": 8.291452718588463e-06, "loss": 0.7835, "step": 6344 }, { "epoch": 0.5681347585203425, "grad_norm": 0.9554323050690641, "learning_rate": 8.288595147023986e-06, "loss": 0.7974, "step": 6345 }, { "epoch": 0.5682242990654206, "grad_norm": 0.966222410846061, "learning_rate": 8.2857377194173e-06, "loss": 0.8178, "step": 6346 }, { "epoch": 0.5683138396104986, "grad_norm": 1.0177312919330403, "learning_rate": 8.282880436008775e-06, "loss": 0.8235, "step": 6347 }, { "epoch": 0.5684033801555767, "grad_norm": 0.9381770554222061, "learning_rate": 8.280023297038749e-06, "loss": 0.8336, "step": 6348 }, { "epoch": 0.5684929207006547, "grad_norm": 0.9366471872765141, "learning_rate": 8.277166302747561e-06, "loss": 0.791, "step": 6349 }, { "epoch": 0.5685824612457329, "grad_norm": 0.9228847464402782, "learning_rate": 8.274309453375531e-06, "loss": 0.8157, "step": 6350 }, { "epoch": 0.5686720017908109, "grad_norm": 0.9729525529110813, "learning_rate": 8.271452749162963e-06, "loss": 0.8396, "step": 6351 }, { "epoch": 0.5687615423358889, "grad_norm": 1.0205362211932345, "learning_rate": 8.268596190350158e-06, "loss": 0.821, "step": 6352 }, { "epoch": 0.5688510828809671, "grad_norm": 0.974124744068781, "learning_rate": 8.265739777177396e-06, "loss": 0.8805, "step": 6353 }, { "epoch": 0.5689406234260451, "grad_norm": 0.9701716927333254, "learning_rate": 8.262883509884956e-06, "loss": 0.8155, "step": 6354 }, { "epoch": 0.5690301639711232, "grad_norm": 0.9705558159976604, "learning_rate": 8.260027388713094e-06, "loss": 0.8059, "step": 6355 }, { "epoch": 0.5691197045162012, "grad_norm": 0.9356401565289704, "learning_rate": 8.257171413902059e-06, "loss": 0.8524, "step": 6356 }, { "epoch": 0.5692092450612793, "grad_norm": 0.9322176060700326, "learning_rate": 8.254315585692084e-06, "loss": 0.8478, "step": 6357 }, { "epoch": 0.5692987856063574, "grad_norm": 0.9408689083239092, "learning_rate": 8.251459904323393e-06, "loss": 0.8268, "step": 6358 }, { "epoch": 0.5693883261514354, "grad_norm": 0.8957832704443067, "learning_rate": 8.248604370036196e-06, "loss": 0.7986, "step": 6359 }, { "epoch": 0.5694778666965136, "grad_norm": 0.9490471795327067, "learning_rate": 8.245748983070693e-06, "loss": 0.8214, "step": 6360 }, { "epoch": 0.5695674072415916, "grad_norm": 0.9403271910691102, "learning_rate": 8.242893743667072e-06, "loss": 0.7984, "step": 6361 }, { "epoch": 0.5696569477866696, "grad_norm": 0.902707532529254, "learning_rate": 8.2400386520655e-06, "loss": 0.8711, "step": 6362 }, { "epoch": 0.5697464883317477, "grad_norm": 1.0091644067047132, "learning_rate": 8.237183708506143e-06, "loss": 0.8626, "step": 6363 }, { "epoch": 0.5698360288768258, "grad_norm": 1.053468676280152, "learning_rate": 8.234328913229146e-06, "loss": 0.8611, "step": 6364 }, { "epoch": 0.5699255694219039, "grad_norm": 1.1687070452933956, "learning_rate": 8.231474266474645e-06, "loss": 0.8012, "step": 6365 }, { "epoch": 0.5700151099669819, "grad_norm": 0.8917133144247367, "learning_rate": 8.228619768482769e-06, "loss": 0.8001, "step": 6366 }, { "epoch": 0.5701046505120599, "grad_norm": 1.0808158463700963, "learning_rate": 8.225765419493627e-06, "loss": 0.8764, "step": 6367 }, { "epoch": 0.5701941910571381, "grad_norm": 0.96984628550912, "learning_rate": 8.222911219747317e-06, "loss": 0.8085, "step": 6368 }, { "epoch": 0.5702837316022161, "grad_norm": 0.929311773688592, "learning_rate": 8.22005716948392e-06, "loss": 0.8144, "step": 6369 }, { "epoch": 0.5703732721472942, "grad_norm": 0.8930937783614732, "learning_rate": 8.217203268943516e-06, "loss": 0.8003, "step": 6370 }, { "epoch": 0.5704628126923723, "grad_norm": 0.9166396903722628, "learning_rate": 8.21434951836616e-06, "loss": 0.8428, "step": 6371 }, { "epoch": 0.5705523532374503, "grad_norm": 0.959572916067849, "learning_rate": 8.21149591799191e-06, "loss": 0.863, "step": 6372 }, { "epoch": 0.5706418937825284, "grad_norm": 0.9501301822664973, "learning_rate": 8.208642468060792e-06, "loss": 0.8455, "step": 6373 }, { "epoch": 0.5707314343276064, "grad_norm": 0.9964438343989324, "learning_rate": 8.20578916881283e-06, "loss": 0.8216, "step": 6374 }, { "epoch": 0.5708209748726846, "grad_norm": 0.9232752955271403, "learning_rate": 8.202936020488037e-06, "loss": 0.8515, "step": 6375 }, { "epoch": 0.5709105154177626, "grad_norm": 0.9249316687707083, "learning_rate": 8.200083023326411e-06, "loss": 0.7889, "step": 6376 }, { "epoch": 0.5710000559628406, "grad_norm": 0.9297897788584465, "learning_rate": 8.197230177567934e-06, "loss": 0.8087, "step": 6377 }, { "epoch": 0.5710895965079188, "grad_norm": 0.9190141912176004, "learning_rate": 8.194377483452585e-06, "loss": 0.876, "step": 6378 }, { "epoch": 0.5711791370529968, "grad_norm": 0.9488146510555675, "learning_rate": 8.191524941220314e-06, "loss": 0.7971, "step": 6379 }, { "epoch": 0.5712686775980749, "grad_norm": 0.9492160511809153, "learning_rate": 8.188672551111069e-06, "loss": 0.8126, "step": 6380 }, { "epoch": 0.5713582181431529, "grad_norm": 0.9828175792235969, "learning_rate": 8.18582031336479e-06, "loss": 0.8504, "step": 6381 }, { "epoch": 0.5714477586882311, "grad_norm": 0.9493665086304246, "learning_rate": 8.182968228221395e-06, "loss": 0.8504, "step": 6382 }, { "epoch": 0.5715372992333091, "grad_norm": 0.8942004582800138, "learning_rate": 8.180116295920791e-06, "loss": 0.8655, "step": 6383 }, { "epoch": 0.5716268397783871, "grad_norm": 1.1245025791386143, "learning_rate": 8.177264516702875e-06, "loss": 0.8007, "step": 6384 }, { "epoch": 0.5717163803234652, "grad_norm": 0.9423437924309437, "learning_rate": 8.174412890807526e-06, "loss": 0.804, "step": 6385 }, { "epoch": 0.5718059208685433, "grad_norm": 1.1063026202946518, "learning_rate": 8.171561418474611e-06, "loss": 0.8352, "step": 6386 }, { "epoch": 0.5718954614136214, "grad_norm": 0.9132765398672845, "learning_rate": 8.168710099943997e-06, "loss": 0.7676, "step": 6387 }, { "epoch": 0.5719850019586994, "grad_norm": 1.0723400965339867, "learning_rate": 8.16585893545552e-06, "loss": 0.8585, "step": 6388 }, { "epoch": 0.5720745425037775, "grad_norm": 1.0152464257254932, "learning_rate": 8.163007925249015e-06, "loss": 0.8333, "step": 6389 }, { "epoch": 0.5721640830488556, "grad_norm": 0.9333039063551101, "learning_rate": 8.160157069564296e-06, "loss": 0.8457, "step": 6390 }, { "epoch": 0.5722536235939336, "grad_norm": 1.0402027545695234, "learning_rate": 8.157306368641167e-06, "loss": 0.8335, "step": 6391 }, { "epoch": 0.5723431641390117, "grad_norm": 0.9721077719071174, "learning_rate": 8.15445582271942e-06, "loss": 0.7654, "step": 6392 }, { "epoch": 0.5724327046840898, "grad_norm": 0.8410843717096093, "learning_rate": 8.151605432038838e-06, "loss": 0.8538, "step": 6393 }, { "epoch": 0.5725222452291678, "grad_norm": 0.9166532596877174, "learning_rate": 8.148755196839186e-06, "loss": 0.8486, "step": 6394 }, { "epoch": 0.5726117857742459, "grad_norm": 0.9765252647307026, "learning_rate": 8.145905117360211e-06, "loss": 0.876, "step": 6395 }, { "epoch": 0.572701326319324, "grad_norm": 0.9196803160494078, "learning_rate": 8.143055193841656e-06, "loss": 0.8126, "step": 6396 }, { "epoch": 0.5727908668644021, "grad_norm": 0.9798055534653657, "learning_rate": 8.140205426523247e-06, "loss": 0.8051, "step": 6397 }, { "epoch": 0.5728804074094801, "grad_norm": 0.8965282644553115, "learning_rate": 8.137355815644694e-06, "loss": 0.7987, "step": 6398 }, { "epoch": 0.5729699479545581, "grad_norm": 1.1298951301128226, "learning_rate": 8.134506361445706e-06, "loss": 0.831, "step": 6399 }, { "epoch": 0.5730594884996363, "grad_norm": 0.9125273256387578, "learning_rate": 8.131657064165962e-06, "loss": 0.7493, "step": 6400 }, { "epoch": 0.5731490290447143, "grad_norm": 0.9229129690648799, "learning_rate": 8.128807924045134e-06, "loss": 0.833, "step": 6401 }, { "epoch": 0.5732385695897924, "grad_norm": 0.972788484064051, "learning_rate": 8.125958941322886e-06, "loss": 0.908, "step": 6402 }, { "epoch": 0.5733281101348704, "grad_norm": 0.9556157937056855, "learning_rate": 8.123110116238868e-06, "loss": 0.8068, "step": 6403 }, { "epoch": 0.5734176506799485, "grad_norm": 0.9523184605473805, "learning_rate": 8.120261449032707e-06, "loss": 0.8038, "step": 6404 }, { "epoch": 0.5735071912250266, "grad_norm": 0.9856344743866243, "learning_rate": 8.117412939944034e-06, "loss": 0.8833, "step": 6405 }, { "epoch": 0.5735967317701046, "grad_norm": 0.8931507032660019, "learning_rate": 8.114564589212446e-06, "loss": 0.8244, "step": 6406 }, { "epoch": 0.5736862723151828, "grad_norm": 0.9919698173016027, "learning_rate": 8.111716397077539e-06, "loss": 0.8333, "step": 6407 }, { "epoch": 0.5737758128602608, "grad_norm": 0.9469042309268327, "learning_rate": 8.108868363778894e-06, "loss": 0.8647, "step": 6408 }, { "epoch": 0.5738653534053388, "grad_norm": 0.915613720385012, "learning_rate": 8.106020489556083e-06, "loss": 0.768, "step": 6409 }, { "epoch": 0.5739548939504169, "grad_norm": 0.8861685913117677, "learning_rate": 8.103172774648658e-06, "loss": 0.7898, "step": 6410 }, { "epoch": 0.574044434495495, "grad_norm": 1.0054692895449444, "learning_rate": 8.100325219296155e-06, "loss": 0.8109, "step": 6411 }, { "epoch": 0.5741339750405731, "grad_norm": 1.1020898335691662, "learning_rate": 8.097477823738105e-06, "loss": 0.8545, "step": 6412 }, { "epoch": 0.5742235155856511, "grad_norm": 0.933284281829491, "learning_rate": 8.094630588214018e-06, "loss": 0.7864, "step": 6413 }, { "epoch": 0.5743130561307292, "grad_norm": 1.0150261148943502, "learning_rate": 8.0917835129634e-06, "loss": 0.8852, "step": 6414 }, { "epoch": 0.5744025966758073, "grad_norm": 0.8763502477388453, "learning_rate": 8.088936598225733e-06, "loss": 0.8007, "step": 6415 }, { "epoch": 0.5744921372208853, "grad_norm": 1.017271770795491, "learning_rate": 8.086089844240495e-06, "loss": 0.8185, "step": 6416 }, { "epoch": 0.5745816777659634, "grad_norm": 0.8606119400167056, "learning_rate": 8.08324325124714e-06, "loss": 0.816, "step": 6417 }, { "epoch": 0.5746712183110415, "grad_norm": 0.8028210954214167, "learning_rate": 8.080396819485117e-06, "loss": 0.8223, "step": 6418 }, { "epoch": 0.5747607588561195, "grad_norm": 0.8999896038255069, "learning_rate": 8.077550549193856e-06, "loss": 0.7793, "step": 6419 }, { "epoch": 0.5748502994011976, "grad_norm": 1.0418254285132305, "learning_rate": 8.07470444061278e-06, "loss": 0.8541, "step": 6420 }, { "epoch": 0.5749398399462756, "grad_norm": 1.1736254697129083, "learning_rate": 8.071858493981293e-06, "loss": 0.8359, "step": 6421 }, { "epoch": 0.5750293804913538, "grad_norm": 0.8641004437980728, "learning_rate": 8.069012709538786e-06, "loss": 0.8174, "step": 6422 }, { "epoch": 0.5751189210364318, "grad_norm": 0.9273547965900395, "learning_rate": 8.066167087524637e-06, "loss": 0.8892, "step": 6423 }, { "epoch": 0.5752084615815098, "grad_norm": 0.9911392613874068, "learning_rate": 8.063321628178211e-06, "loss": 0.7688, "step": 6424 }, { "epoch": 0.575298002126588, "grad_norm": 0.9121099453851654, "learning_rate": 8.060476331738856e-06, "loss": 0.8545, "step": 6425 }, { "epoch": 0.575387542671666, "grad_norm": 0.893294219833341, "learning_rate": 8.057631198445915e-06, "loss": 0.8352, "step": 6426 }, { "epoch": 0.5754770832167441, "grad_norm": 0.927612744048782, "learning_rate": 8.05478622853871e-06, "loss": 0.8077, "step": 6427 }, { "epoch": 0.5755666237618221, "grad_norm": 1.083147704610812, "learning_rate": 8.051941422256545e-06, "loss": 0.8447, "step": 6428 }, { "epoch": 0.5756561643069003, "grad_norm": 0.985976398837955, "learning_rate": 8.04909677983872e-06, "loss": 0.8281, "step": 6429 }, { "epoch": 0.5757457048519783, "grad_norm": 1.324972808071582, "learning_rate": 8.046252301524515e-06, "loss": 0.8585, "step": 6430 }, { "epoch": 0.5758352453970563, "grad_norm": 0.8813350433492376, "learning_rate": 8.043407987553198e-06, "loss": 0.8635, "step": 6431 }, { "epoch": 0.5759247859421345, "grad_norm": 0.8977041449262854, "learning_rate": 8.040563838164034e-06, "loss": 0.9122, "step": 6432 }, { "epoch": 0.5760143264872125, "grad_norm": 0.8833341800490374, "learning_rate": 8.03771985359625e-06, "loss": 0.7945, "step": 6433 }, { "epoch": 0.5761038670322906, "grad_norm": 1.0429128547361064, "learning_rate": 8.034876034089072e-06, "loss": 0.8227, "step": 6434 }, { "epoch": 0.5761934075773686, "grad_norm": 0.9408627580945907, "learning_rate": 8.032032379881723e-06, "loss": 0.8359, "step": 6435 }, { "epoch": 0.5762829481224467, "grad_norm": 0.9356627139783498, "learning_rate": 8.029188891213398e-06, "loss": 0.8658, "step": 6436 }, { "epoch": 0.5763724886675248, "grad_norm": 0.8854871291210523, "learning_rate": 8.02634556832328e-06, "loss": 0.8381, "step": 6437 }, { "epoch": 0.5764620292126028, "grad_norm": 0.9822465113106529, "learning_rate": 8.023502411450543e-06, "loss": 0.8306, "step": 6438 }, { "epoch": 0.5765515697576808, "grad_norm": 1.011816271212423, "learning_rate": 8.020659420834341e-06, "loss": 0.8017, "step": 6439 }, { "epoch": 0.576641110302759, "grad_norm": 0.8772504311750633, "learning_rate": 8.017816596713815e-06, "loss": 0.8144, "step": 6440 }, { "epoch": 0.576730650847837, "grad_norm": 0.9226348972623325, "learning_rate": 8.0149739393281e-06, "loss": 0.8433, "step": 6441 }, { "epoch": 0.5768201913929151, "grad_norm": 0.9778638608023535, "learning_rate": 8.012131448916309e-06, "loss": 0.8229, "step": 6442 }, { "epoch": 0.5769097319379932, "grad_norm": 0.9459635919096719, "learning_rate": 8.009289125717546e-06, "loss": 0.8086, "step": 6443 }, { "epoch": 0.5769992724830713, "grad_norm": 0.93327348591905, "learning_rate": 8.006446969970892e-06, "loss": 0.9015, "step": 6444 }, { "epoch": 0.5770888130281493, "grad_norm": 0.9880004412820398, "learning_rate": 8.00360498191542e-06, "loss": 0.8342, "step": 6445 }, { "epoch": 0.5771783535732273, "grad_norm": 0.9422495703148925, "learning_rate": 8.000763161790191e-06, "loss": 0.8364, "step": 6446 }, { "epoch": 0.5772678941183055, "grad_norm": 0.8795069728102327, "learning_rate": 7.997921509834253e-06, "loss": 0.7863, "step": 6447 }, { "epoch": 0.5773574346633835, "grad_norm": 0.943865611030886, "learning_rate": 7.995080026286632e-06, "loss": 0.8032, "step": 6448 }, { "epoch": 0.5774469752084616, "grad_norm": 0.9511911084077231, "learning_rate": 7.992238711386348e-06, "loss": 0.8403, "step": 6449 }, { "epoch": 0.5775365157535397, "grad_norm": 0.9308580558202565, "learning_rate": 7.9893975653724e-06, "loss": 0.8824, "step": 6450 }, { "epoch": 0.5776260562986177, "grad_norm": 0.9608729100307624, "learning_rate": 7.986556588483776e-06, "loss": 0.8445, "step": 6451 }, { "epoch": 0.5777155968436958, "grad_norm": 0.9172710655616992, "learning_rate": 7.983715780959448e-06, "loss": 0.7698, "step": 6452 }, { "epoch": 0.5778051373887738, "grad_norm": 1.3098724098321528, "learning_rate": 7.980875143038381e-06, "loss": 0.8138, "step": 6453 }, { "epoch": 0.577894677933852, "grad_norm": 0.9238457182182461, "learning_rate": 7.97803467495952e-06, "loss": 0.8468, "step": 6454 }, { "epoch": 0.57798421847893, "grad_norm": 1.0245239461000697, "learning_rate": 7.975194376961788e-06, "loss": 0.8694, "step": 6455 }, { "epoch": 0.578073759024008, "grad_norm": 1.0220135423769703, "learning_rate": 7.972354249284108e-06, "loss": 0.8636, "step": 6456 }, { "epoch": 0.5781632995690861, "grad_norm": 1.0211911329064685, "learning_rate": 7.969514292165383e-06, "loss": 0.8201, "step": 6457 }, { "epoch": 0.5782528401141642, "grad_norm": 0.9262164975198456, "learning_rate": 7.966674505844497e-06, "loss": 0.7861, "step": 6458 }, { "epoch": 0.5783423806592423, "grad_norm": 0.9313697274232522, "learning_rate": 7.963834890560325e-06, "loss": 0.8033, "step": 6459 }, { "epoch": 0.5784319212043203, "grad_norm": 0.873137817051088, "learning_rate": 7.960995446551733e-06, "loss": 0.8247, "step": 6460 }, { "epoch": 0.5785214617493984, "grad_norm": 0.9750954985854806, "learning_rate": 7.958156174057552e-06, "loss": 0.8028, "step": 6461 }, { "epoch": 0.5786110022944765, "grad_norm": 0.9167504155995849, "learning_rate": 7.955317073316622e-06, "loss": 0.8212, "step": 6462 }, { "epoch": 0.5787005428395545, "grad_norm": 0.9747949818731226, "learning_rate": 7.952478144567757e-06, "loss": 0.8006, "step": 6463 }, { "epoch": 0.5787900833846326, "grad_norm": 1.0470035388965684, "learning_rate": 7.949639388049758e-06, "loss": 0.8177, "step": 6464 }, { "epoch": 0.5788796239297107, "grad_norm": 0.9125953701278784, "learning_rate": 7.946800804001415e-06, "loss": 0.8712, "step": 6465 }, { "epoch": 0.5789691644747887, "grad_norm": 0.9367846908157579, "learning_rate": 7.943962392661494e-06, "loss": 0.8458, "step": 6466 }, { "epoch": 0.5790587050198668, "grad_norm": 1.0526166378394979, "learning_rate": 7.941124154268757e-06, "loss": 0.847, "step": 6467 }, { "epoch": 0.5791482455649449, "grad_norm": 1.0490506524031435, "learning_rate": 7.938286089061948e-06, "loss": 0.7942, "step": 6468 }, { "epoch": 0.579237786110023, "grad_norm": 0.9596918034821302, "learning_rate": 7.935448197279794e-06, "loss": 0.8194, "step": 6469 }, { "epoch": 0.579327326655101, "grad_norm": 0.8490260695587314, "learning_rate": 7.932610479161012e-06, "loss": 0.7803, "step": 6470 }, { "epoch": 0.579416867200179, "grad_norm": 0.9551718615986595, "learning_rate": 7.929772934944298e-06, "loss": 0.7898, "step": 6471 }, { "epoch": 0.5795064077452572, "grad_norm": 1.0487460697841804, "learning_rate": 7.926935564868338e-06, "loss": 0.8114, "step": 6472 }, { "epoch": 0.5795959482903352, "grad_norm": 0.9234694604258941, "learning_rate": 7.9240983691718e-06, "loss": 0.7906, "step": 6473 }, { "epoch": 0.5796854888354133, "grad_norm": 0.8535202562732565, "learning_rate": 7.921261348093346e-06, "loss": 0.6869, "step": 6474 }, { "epoch": 0.5797750293804913, "grad_norm": 1.0002062016364197, "learning_rate": 7.918424501871613e-06, "loss": 0.8363, "step": 6475 }, { "epoch": 0.5798645699255695, "grad_norm": 1.1356524774197536, "learning_rate": 7.915587830745229e-06, "loss": 0.7282, "step": 6476 }, { "epoch": 0.5799541104706475, "grad_norm": 1.038139873624416, "learning_rate": 7.912751334952803e-06, "loss": 0.8015, "step": 6477 }, { "epoch": 0.5800436510157255, "grad_norm": 0.8986751948493565, "learning_rate": 7.909915014732932e-06, "loss": 0.8199, "step": 6478 }, { "epoch": 0.5801331915608037, "grad_norm": 0.9738098402958514, "learning_rate": 7.907078870324197e-06, "loss": 0.8661, "step": 6479 }, { "epoch": 0.5802227321058817, "grad_norm": 1.3008857274170373, "learning_rate": 7.904242901965171e-06, "loss": 0.789, "step": 6480 }, { "epoch": 0.5803122726509597, "grad_norm": 0.986083315281657, "learning_rate": 7.901407109894406e-06, "loss": 0.8518, "step": 6481 }, { "epoch": 0.5804018131960378, "grad_norm": 0.9380068620334621, "learning_rate": 7.898571494350429e-06, "loss": 0.8424, "step": 6482 }, { "epoch": 0.5804913537411159, "grad_norm": 0.8927072984860644, "learning_rate": 7.895736055571771e-06, "loss": 0.8523, "step": 6483 }, { "epoch": 0.580580894286194, "grad_norm": 1.0035839803547761, "learning_rate": 7.892900793796942e-06, "loss": 0.7784, "step": 6484 }, { "epoch": 0.580670434831272, "grad_norm": 0.9005322808047742, "learning_rate": 7.890065709264428e-06, "loss": 0.7741, "step": 6485 }, { "epoch": 0.5807599753763502, "grad_norm": 0.9113774279138676, "learning_rate": 7.887230802212714e-06, "loss": 0.8099, "step": 6486 }, { "epoch": 0.5808495159214282, "grad_norm": 0.9053044789014115, "learning_rate": 7.884396072880262e-06, "loss": 0.762, "step": 6487 }, { "epoch": 0.5809390564665062, "grad_norm": 0.9767837015772131, "learning_rate": 7.881561521505515e-06, "loss": 0.83, "step": 6488 }, { "epoch": 0.5810285970115843, "grad_norm": 0.9567417666481594, "learning_rate": 7.878727148326908e-06, "loss": 0.8582, "step": 6489 }, { "epoch": 0.5811181375566624, "grad_norm": 0.8781653500168903, "learning_rate": 7.875892953582862e-06, "loss": 0.8291, "step": 6490 }, { "epoch": 0.5812076781017405, "grad_norm": 1.1064947378322958, "learning_rate": 7.873058937511781e-06, "loss": 0.8636, "step": 6491 }, { "epoch": 0.5812972186468185, "grad_norm": 1.0285956385870236, "learning_rate": 7.870225100352052e-06, "loss": 0.9082, "step": 6492 }, { "epoch": 0.5813867591918965, "grad_norm": 0.993031976140912, "learning_rate": 7.867391442342044e-06, "loss": 0.8146, "step": 6493 }, { "epoch": 0.5814762997369747, "grad_norm": 0.9430822387795121, "learning_rate": 7.864557963720116e-06, "loss": 0.7642, "step": 6494 }, { "epoch": 0.5815658402820527, "grad_norm": 0.8970087812352168, "learning_rate": 7.861724664724617e-06, "loss": 0.7885, "step": 6495 }, { "epoch": 0.5816553808271308, "grad_norm": 1.0270269115861046, "learning_rate": 7.858891545593871e-06, "loss": 0.8346, "step": 6496 }, { "epoch": 0.5817449213722089, "grad_norm": 1.033156520873014, "learning_rate": 7.85605860656619e-06, "loss": 0.8279, "step": 6497 }, { "epoch": 0.5818344619172869, "grad_norm": 0.9731242629229762, "learning_rate": 7.853225847879875e-06, "loss": 0.8459, "step": 6498 }, { "epoch": 0.581924002462365, "grad_norm": 0.9049429967492776, "learning_rate": 7.850393269773205e-06, "loss": 0.774, "step": 6499 }, { "epoch": 0.582013543007443, "grad_norm": 0.9130349754118905, "learning_rate": 7.847560872484444e-06, "loss": 0.8409, "step": 6500 }, { "epoch": 0.5821030835525212, "grad_norm": 1.0792733107916725, "learning_rate": 7.844728656251852e-06, "loss": 0.8298, "step": 6501 }, { "epoch": 0.5821926240975992, "grad_norm": 1.0025652996532626, "learning_rate": 7.841896621313662e-06, "loss": 0.8518, "step": 6502 }, { "epoch": 0.5822821646426772, "grad_norm": 0.985825961955646, "learning_rate": 7.839064767908097e-06, "loss": 0.8403, "step": 6503 }, { "epoch": 0.5823717051877554, "grad_norm": 0.9400057664501444, "learning_rate": 7.83623309627336e-06, "loss": 0.8043, "step": 6504 }, { "epoch": 0.5824612457328334, "grad_norm": 0.9606753748617977, "learning_rate": 7.833401606647644e-06, "loss": 0.8153, "step": 6505 }, { "epoch": 0.5825507862779115, "grad_norm": 0.9326425624792472, "learning_rate": 7.830570299269123e-06, "loss": 0.7684, "step": 6506 }, { "epoch": 0.5826403268229895, "grad_norm": 1.3163131549117357, "learning_rate": 7.827739174375959e-06, "loss": 0.8561, "step": 6507 }, { "epoch": 0.5827298673680676, "grad_norm": 0.8572810905846464, "learning_rate": 7.824908232206299e-06, "loss": 0.7907, "step": 6508 }, { "epoch": 0.5828194079131457, "grad_norm": 0.9435299220457629, "learning_rate": 7.822077472998271e-06, "loss": 0.8369, "step": 6509 }, { "epoch": 0.5829089484582237, "grad_norm": 0.9467162466302117, "learning_rate": 7.819246896989989e-06, "loss": 0.7816, "step": 6510 }, { "epoch": 0.5829984890033018, "grad_norm": 1.0073229335728897, "learning_rate": 7.816416504419549e-06, "loss": 0.8768, "step": 6511 }, { "epoch": 0.5830880295483799, "grad_norm": 0.9844796030669847, "learning_rate": 7.81358629552504e-06, "loss": 0.8806, "step": 6512 }, { "epoch": 0.5831775700934579, "grad_norm": 0.9864634159572417, "learning_rate": 7.810756270544522e-06, "loss": 0.8214, "step": 6513 }, { "epoch": 0.583267110638536, "grad_norm": 1.128980745728833, "learning_rate": 7.80792642971606e-06, "loss": 0.8028, "step": 6514 }, { "epoch": 0.5833566511836141, "grad_norm": 0.9273665232179712, "learning_rate": 7.805096773277677e-06, "loss": 0.8192, "step": 6515 }, { "epoch": 0.5834461917286922, "grad_norm": 0.9271952357233152, "learning_rate": 7.802267301467401e-06, "loss": 0.7855, "step": 6516 }, { "epoch": 0.5835357322737702, "grad_norm": 0.9739860436020333, "learning_rate": 7.799438014523241e-06, "loss": 0.8071, "step": 6517 }, { "epoch": 0.5836252728188482, "grad_norm": 0.8924882977585086, "learning_rate": 7.796608912683182e-06, "loss": 0.8319, "step": 6518 }, { "epoch": 0.5837148133639264, "grad_norm": 1.0414374688833659, "learning_rate": 7.793779996185201e-06, "loss": 0.8345, "step": 6519 }, { "epoch": 0.5838043539090044, "grad_norm": 0.9258732752281499, "learning_rate": 7.790951265267261e-06, "loss": 0.8539, "step": 6520 }, { "epoch": 0.5838938944540825, "grad_norm": 1.017117219658151, "learning_rate": 7.788122720167298e-06, "loss": 0.8369, "step": 6521 }, { "epoch": 0.5839834349991606, "grad_norm": 0.9179593805442967, "learning_rate": 7.785294361123244e-06, "loss": 0.7987, "step": 6522 }, { "epoch": 0.5840729755442386, "grad_norm": 0.9464236419664835, "learning_rate": 7.782466188373013e-06, "loss": 0.858, "step": 6523 }, { "epoch": 0.5841625160893167, "grad_norm": 1.0876593481477803, "learning_rate": 7.779638202154499e-06, "loss": 0.8192, "step": 6524 }, { "epoch": 0.5842520566343947, "grad_norm": 1.0477459710207044, "learning_rate": 7.776810402705586e-06, "loss": 0.8673, "step": 6525 }, { "epoch": 0.5843415971794729, "grad_norm": 0.9166272871051644, "learning_rate": 7.773982790264136e-06, "loss": 0.8423, "step": 6526 }, { "epoch": 0.5844311377245509, "grad_norm": 0.9351455763214804, "learning_rate": 7.771155365067996e-06, "loss": 0.7638, "step": 6527 }, { "epoch": 0.584520678269629, "grad_norm": 0.9139307395077264, "learning_rate": 7.768328127355008e-06, "loss": 0.8371, "step": 6528 }, { "epoch": 0.584610218814707, "grad_norm": 0.9134923663957756, "learning_rate": 7.765501077362985e-06, "loss": 0.8163, "step": 6529 }, { "epoch": 0.5846997593597851, "grad_norm": 1.0035874345575289, "learning_rate": 7.762674215329729e-06, "loss": 0.8271, "step": 6530 }, { "epoch": 0.5847892999048632, "grad_norm": 0.9569251843374329, "learning_rate": 7.759847541493028e-06, "loss": 0.8346, "step": 6531 }, { "epoch": 0.5848788404499412, "grad_norm": 0.9479052696463844, "learning_rate": 7.757021056090652e-06, "loss": 0.8051, "step": 6532 }, { "epoch": 0.5849683809950194, "grad_norm": 0.9111167163791011, "learning_rate": 7.754194759360353e-06, "loss": 0.8326, "step": 6533 }, { "epoch": 0.5850579215400974, "grad_norm": 1.0209096740325037, "learning_rate": 7.751368651539875e-06, "loss": 0.8456, "step": 6534 }, { "epoch": 0.5851474620851754, "grad_norm": 1.0542473094057696, "learning_rate": 7.748542732866937e-06, "loss": 0.8434, "step": 6535 }, { "epoch": 0.5852370026302535, "grad_norm": 0.9153273199910147, "learning_rate": 7.745717003579249e-06, "loss": 0.8424, "step": 6536 }, { "epoch": 0.5853265431753316, "grad_norm": 0.8816081227294614, "learning_rate": 7.742891463914501e-06, "loss": 0.8173, "step": 6537 }, { "epoch": 0.5854160837204097, "grad_norm": 0.9069138966185126, "learning_rate": 7.740066114110365e-06, "loss": 0.8031, "step": 6538 }, { "epoch": 0.5855056242654877, "grad_norm": 0.9351886346572181, "learning_rate": 7.737240954404506e-06, "loss": 0.7647, "step": 6539 }, { "epoch": 0.5855951648105658, "grad_norm": 0.9419244196572438, "learning_rate": 7.73441598503456e-06, "loss": 0.8174, "step": 6540 }, { "epoch": 0.5856847053556439, "grad_norm": 0.9256638253441095, "learning_rate": 7.731591206238166e-06, "loss": 0.841, "step": 6541 }, { "epoch": 0.5857742459007219, "grad_norm": 0.8859346176872049, "learning_rate": 7.728766618252921e-06, "loss": 0.7971, "step": 6542 }, { "epoch": 0.5858637864458, "grad_norm": 0.9077510648784982, "learning_rate": 7.725942221316428e-06, "loss": 0.8859, "step": 6543 }, { "epoch": 0.5859533269908781, "grad_norm": 1.0515475937348417, "learning_rate": 7.723118015666266e-06, "loss": 0.8045, "step": 6544 }, { "epoch": 0.5860428675359561, "grad_norm": 0.9256529641755565, "learning_rate": 7.720294001539996e-06, "loss": 0.7842, "step": 6545 }, { "epoch": 0.5861324080810342, "grad_norm": 0.8595442360291764, "learning_rate": 7.717470179175164e-06, "loss": 0.8309, "step": 6546 }, { "epoch": 0.5862219486261122, "grad_norm": 1.2250932395170544, "learning_rate": 7.714646548809309e-06, "loss": 0.7827, "step": 6547 }, { "epoch": 0.5863114891711904, "grad_norm": 0.9690867562603219, "learning_rate": 7.711823110679933e-06, "loss": 0.8023, "step": 6548 }, { "epoch": 0.5864010297162684, "grad_norm": 0.9103584777312022, "learning_rate": 7.708999865024541e-06, "loss": 0.7929, "step": 6549 }, { "epoch": 0.5864905702613464, "grad_norm": 0.9742186744014101, "learning_rate": 7.706176812080616e-06, "loss": 0.8751, "step": 6550 }, { "epoch": 0.5865801108064246, "grad_norm": 0.9601507606348049, "learning_rate": 7.703353952085622e-06, "loss": 0.7875, "step": 6551 }, { "epoch": 0.5866696513515026, "grad_norm": 0.9199750658989079, "learning_rate": 7.700531285277012e-06, "loss": 0.8012, "step": 6552 }, { "epoch": 0.5867591918965807, "grad_norm": 1.4820086786098718, "learning_rate": 7.697708811892214e-06, "loss": 0.7707, "step": 6553 }, { "epoch": 0.5868487324416587, "grad_norm": 0.9792661465131891, "learning_rate": 7.694886532168649e-06, "loss": 0.8054, "step": 6554 }, { "epoch": 0.5869382729867368, "grad_norm": 0.9310413611926751, "learning_rate": 7.692064446343717e-06, "loss": 0.8559, "step": 6555 }, { "epoch": 0.5870278135318149, "grad_norm": 0.9599698243040112, "learning_rate": 7.689242554654801e-06, "loss": 0.8214, "step": 6556 }, { "epoch": 0.5871173540768929, "grad_norm": 0.9910594408304109, "learning_rate": 7.686420857339274e-06, "loss": 0.8551, "step": 6557 }, { "epoch": 0.5872068946219711, "grad_norm": 0.978620586844061, "learning_rate": 7.683599354634488e-06, "loss": 0.8161, "step": 6558 }, { "epoch": 0.5872964351670491, "grad_norm": 0.9575334617658113, "learning_rate": 7.680778046777771e-06, "loss": 0.8348, "step": 6559 }, { "epoch": 0.5873859757121271, "grad_norm": 0.8698630229875397, "learning_rate": 7.677956934006447e-06, "loss": 0.8298, "step": 6560 }, { "epoch": 0.5874755162572052, "grad_norm": 0.8907208647401973, "learning_rate": 7.675136016557821e-06, "loss": 0.7853, "step": 6561 }, { "epoch": 0.5875650568022833, "grad_norm": 0.9846409857100126, "learning_rate": 7.672315294669176e-06, "loss": 0.8031, "step": 6562 }, { "epoch": 0.5876545973473614, "grad_norm": 1.0249112021708875, "learning_rate": 7.669494768577786e-06, "loss": 0.8438, "step": 6563 }, { "epoch": 0.5877441378924394, "grad_norm": 1.3690566969401612, "learning_rate": 7.6666744385209e-06, "loss": 0.8579, "step": 6564 }, { "epoch": 0.5878336784375174, "grad_norm": 0.8134557783595218, "learning_rate": 7.663854304735756e-06, "loss": 0.8096, "step": 6565 }, { "epoch": 0.5879232189825956, "grad_norm": 0.9140859010134188, "learning_rate": 7.661034367459574e-06, "loss": 0.8743, "step": 6566 }, { "epoch": 0.5880127595276736, "grad_norm": 0.9802241986350292, "learning_rate": 7.65821462692956e-06, "loss": 0.8385, "step": 6567 }, { "epoch": 0.5881023000727517, "grad_norm": 0.9339292607956735, "learning_rate": 7.6553950833829e-06, "loss": 0.8375, "step": 6568 }, { "epoch": 0.5881918406178298, "grad_norm": 0.9329896670811443, "learning_rate": 7.652575737056766e-06, "loss": 0.8057, "step": 6569 }, { "epoch": 0.5882813811629078, "grad_norm": 0.9600981929218849, "learning_rate": 7.649756588188312e-06, "loss": 0.7864, "step": 6570 }, { "epoch": 0.5883709217079859, "grad_norm": 0.8888557952450733, "learning_rate": 7.646937637014674e-06, "loss": 0.8099, "step": 6571 }, { "epoch": 0.5884604622530639, "grad_norm": 1.0498738590788856, "learning_rate": 7.644118883772975e-06, "loss": 0.8472, "step": 6572 }, { "epoch": 0.5885500027981421, "grad_norm": 0.9882924258730189, "learning_rate": 7.641300328700314e-06, "loss": 0.7891, "step": 6573 }, { "epoch": 0.5886395433432201, "grad_norm": 0.990703206817161, "learning_rate": 7.638481972033792e-06, "loss": 0.8337, "step": 6574 }, { "epoch": 0.5887290838882981, "grad_norm": 1.0976528913213919, "learning_rate": 7.635663814010464e-06, "loss": 0.8589, "step": 6575 }, { "epoch": 0.5888186244333763, "grad_norm": 0.9187144225746039, "learning_rate": 7.632845854867393e-06, "loss": 0.7801, "step": 6576 }, { "epoch": 0.5889081649784543, "grad_norm": 0.9524297820233272, "learning_rate": 7.630028094841615e-06, "loss": 0.7885, "step": 6577 }, { "epoch": 0.5889977055235324, "grad_norm": 1.024816718414577, "learning_rate": 7.627210534170149e-06, "loss": 0.7966, "step": 6578 }, { "epoch": 0.5890872460686104, "grad_norm": 1.0772812865675907, "learning_rate": 7.624393173090001e-06, "loss": 0.8591, "step": 6579 }, { "epoch": 0.5891767866136886, "grad_norm": 0.8780474771131742, "learning_rate": 7.621576011838163e-06, "loss": 0.7758, "step": 6580 }, { "epoch": 0.5892663271587666, "grad_norm": 0.9261027670092598, "learning_rate": 7.618759050651594e-06, "loss": 0.8416, "step": 6581 }, { "epoch": 0.5893558677038446, "grad_norm": 0.9577012711410549, "learning_rate": 7.615942289767257e-06, "loss": 0.8043, "step": 6582 }, { "epoch": 0.5894454082489227, "grad_norm": 0.9866846058374945, "learning_rate": 7.613125729422084e-06, "loss": 0.8264, "step": 6583 }, { "epoch": 0.5895349487940008, "grad_norm": 0.9347165768098294, "learning_rate": 7.610309369852997e-06, "loss": 0.8518, "step": 6584 }, { "epoch": 0.5896244893390788, "grad_norm": 1.058519527651719, "learning_rate": 7.607493211296902e-06, "loss": 0.8343, "step": 6585 }, { "epoch": 0.5897140298841569, "grad_norm": 0.9374464299727447, "learning_rate": 7.604677253990678e-06, "loss": 0.821, "step": 6586 }, { "epoch": 0.589803570429235, "grad_norm": 1.0949965730405946, "learning_rate": 7.601861498171197e-06, "loss": 0.812, "step": 6587 }, { "epoch": 0.5898931109743131, "grad_norm": 0.9141566656520829, "learning_rate": 7.599045944075312e-06, "loss": 0.8112, "step": 6588 }, { "epoch": 0.5899826515193911, "grad_norm": 0.9371639507897345, "learning_rate": 7.596230591939859e-06, "loss": 0.762, "step": 6589 }, { "epoch": 0.5900721920644691, "grad_norm": 0.9132152418441872, "learning_rate": 7.593415442001657e-06, "loss": 0.8568, "step": 6590 }, { "epoch": 0.5901617326095473, "grad_norm": 0.9313319111733054, "learning_rate": 7.590600494497507e-06, "loss": 0.8421, "step": 6591 }, { "epoch": 0.5902512731546253, "grad_norm": 0.8835951660845978, "learning_rate": 7.5877857496641885e-06, "loss": 0.8216, "step": 6592 }, { "epoch": 0.5903408136997034, "grad_norm": 1.0009783689524365, "learning_rate": 7.584971207738473e-06, "loss": 0.8697, "step": 6593 }, { "epoch": 0.5904303542447815, "grad_norm": 0.9959506517717879, "learning_rate": 7.582156868957106e-06, "loss": 0.8044, "step": 6594 }, { "epoch": 0.5905198947898596, "grad_norm": 0.9080331391788492, "learning_rate": 7.57934273355683e-06, "loss": 0.819, "step": 6595 }, { "epoch": 0.5906094353349376, "grad_norm": 1.0180685387012558, "learning_rate": 7.576528801774354e-06, "loss": 0.7967, "step": 6596 }, { "epoch": 0.5906989758800156, "grad_norm": 0.8637798463607022, "learning_rate": 7.5737150738463764e-06, "loss": 0.7979, "step": 6597 }, { "epoch": 0.5907885164250938, "grad_norm": 0.9856224075049617, "learning_rate": 7.5709015500095805e-06, "loss": 0.8151, "step": 6598 }, { "epoch": 0.5908780569701718, "grad_norm": 1.04226107780226, "learning_rate": 7.56808823050063e-06, "loss": 0.8121, "step": 6599 }, { "epoch": 0.5909675975152499, "grad_norm": 0.8948428969388661, "learning_rate": 7.565275115556171e-06, "loss": 0.7475, "step": 6600 }, { "epoch": 0.5910571380603279, "grad_norm": 0.9752955154662264, "learning_rate": 7.562462205412841e-06, "loss": 0.8382, "step": 6601 }, { "epoch": 0.591146678605406, "grad_norm": 1.0173754421185708, "learning_rate": 7.5596495003072426e-06, "loss": 0.8164, "step": 6602 }, { "epoch": 0.5912362191504841, "grad_norm": 1.0882351641410581, "learning_rate": 7.556837000475976e-06, "loss": 0.7666, "step": 6603 }, { "epoch": 0.5913257596955621, "grad_norm": 1.0796334098674405, "learning_rate": 7.554024706155621e-06, "loss": 0.8109, "step": 6604 }, { "epoch": 0.5914153002406403, "grad_norm": 1.0328346127183168, "learning_rate": 7.551212617582735e-06, "loss": 0.7747, "step": 6605 }, { "epoch": 0.5915048407857183, "grad_norm": 1.034656528708423, "learning_rate": 7.548400734993863e-06, "loss": 0.8313, "step": 6606 }, { "epoch": 0.5915943813307963, "grad_norm": 1.0502832269977334, "learning_rate": 7.545589058625537e-06, "loss": 0.8804, "step": 6607 }, { "epoch": 0.5916839218758744, "grad_norm": 0.971353790024091, "learning_rate": 7.542777588714256e-06, "loss": 0.8595, "step": 6608 }, { "epoch": 0.5917734624209525, "grad_norm": 1.0060084915729497, "learning_rate": 7.539966325496519e-06, "loss": 0.8414, "step": 6609 }, { "epoch": 0.5918630029660306, "grad_norm": 1.0547995092521565, "learning_rate": 7.537155269208799e-06, "loss": 0.8137, "step": 6610 }, { "epoch": 0.5919525435111086, "grad_norm": 0.9629636976761035, "learning_rate": 7.534344420087552e-06, "loss": 0.8485, "step": 6611 }, { "epoch": 0.5920420840561867, "grad_norm": 0.9220577037706273, "learning_rate": 7.5315337783692176e-06, "loss": 0.8281, "step": 6612 }, { "epoch": 0.5921316246012648, "grad_norm": 0.9431706916344231, "learning_rate": 7.528723344290218e-06, "loss": 0.8414, "step": 6613 }, { "epoch": 0.5922211651463428, "grad_norm": 1.1325729741379715, "learning_rate": 7.525913118086954e-06, "loss": 0.8066, "step": 6614 }, { "epoch": 0.5923107056914209, "grad_norm": 0.9693362756352698, "learning_rate": 7.523103099995818e-06, "loss": 0.8248, "step": 6615 }, { "epoch": 0.592400246236499, "grad_norm": 0.9243909930039331, "learning_rate": 7.520293290253178e-06, "loss": 0.8153, "step": 6616 }, { "epoch": 0.592489786781577, "grad_norm": 1.065544866218688, "learning_rate": 7.517483689095386e-06, "loss": 0.8501, "step": 6617 }, { "epoch": 0.5925793273266551, "grad_norm": 1.0177646318500573, "learning_rate": 7.514674296758779e-06, "loss": 0.8582, "step": 6618 }, { "epoch": 0.5926688678717331, "grad_norm": 1.0279589796450326, "learning_rate": 7.511865113479668e-06, "loss": 0.7605, "step": 6619 }, { "epoch": 0.5927584084168113, "grad_norm": 0.8331584710905736, "learning_rate": 7.509056139494357e-06, "loss": 0.7925, "step": 6620 }, { "epoch": 0.5928479489618893, "grad_norm": 0.9247971416921925, "learning_rate": 7.506247375039123e-06, "loss": 0.7744, "step": 6621 }, { "epoch": 0.5929374895069673, "grad_norm": 1.0188921667513617, "learning_rate": 7.503438820350236e-06, "loss": 0.8375, "step": 6622 }, { "epoch": 0.5930270300520455, "grad_norm": 0.9837046396693102, "learning_rate": 7.500630475663941e-06, "loss": 0.8269, "step": 6623 }, { "epoch": 0.5931165705971235, "grad_norm": 0.9483604334552367, "learning_rate": 7.497822341216465e-06, "loss": 0.7799, "step": 6624 }, { "epoch": 0.5932061111422016, "grad_norm": 0.8952056460498599, "learning_rate": 7.4950144172440195e-06, "loss": 0.8665, "step": 6625 }, { "epoch": 0.5932956516872796, "grad_norm": 0.9165907640280305, "learning_rate": 7.492206703982798e-06, "loss": 0.8057, "step": 6626 }, { "epoch": 0.5933851922323577, "grad_norm": 0.8811658502082405, "learning_rate": 7.4893992016689745e-06, "loss": 0.8174, "step": 6627 }, { "epoch": 0.5934747327774358, "grad_norm": 1.1397267300945662, "learning_rate": 7.4865919105387105e-06, "loss": 0.7778, "step": 6628 }, { "epoch": 0.5935642733225138, "grad_norm": 0.9572251559124834, "learning_rate": 7.483784830828147e-06, "loss": 0.8369, "step": 6629 }, { "epoch": 0.593653813867592, "grad_norm": 1.026993803232246, "learning_rate": 7.4809779627734016e-06, "loss": 0.7854, "step": 6630 }, { "epoch": 0.59374335441267, "grad_norm": 0.961356987297155, "learning_rate": 7.478171306610582e-06, "loss": 0.8637, "step": 6631 }, { "epoch": 0.593832894957748, "grad_norm": 0.9777332229615109, "learning_rate": 7.4753648625757735e-06, "loss": 0.8366, "step": 6632 }, { "epoch": 0.5939224355028261, "grad_norm": 1.0506171660220092, "learning_rate": 7.472558630905043e-06, "loss": 0.8419, "step": 6633 }, { "epoch": 0.5940119760479042, "grad_norm": 0.9638614869840575, "learning_rate": 7.469752611834451e-06, "loss": 0.8343, "step": 6634 }, { "epoch": 0.5941015165929823, "grad_norm": 1.2795868767348604, "learning_rate": 7.466946805600019e-06, "loss": 0.8001, "step": 6635 }, { "epoch": 0.5941910571380603, "grad_norm": 1.21051507582244, "learning_rate": 7.464141212437768e-06, "loss": 0.8571, "step": 6636 }, { "epoch": 0.5942805976831383, "grad_norm": 0.8602868087258023, "learning_rate": 7.461335832583695e-06, "loss": 0.8389, "step": 6637 }, { "epoch": 0.5943701382282165, "grad_norm": 0.950213010689888, "learning_rate": 7.458530666273779e-06, "loss": 0.791, "step": 6638 }, { "epoch": 0.5944596787732945, "grad_norm": 0.9833956455630151, "learning_rate": 7.455725713743979e-06, "loss": 0.8218, "step": 6639 }, { "epoch": 0.5945492193183726, "grad_norm": 1.1059489021895281, "learning_rate": 7.452920975230247e-06, "loss": 0.7433, "step": 6640 }, { "epoch": 0.5946387598634507, "grad_norm": 0.8681071021260057, "learning_rate": 7.450116450968497e-06, "loss": 0.8182, "step": 6641 }, { "epoch": 0.5947283004085288, "grad_norm": 1.0024667623804866, "learning_rate": 7.447312141194643e-06, "loss": 0.8157, "step": 6642 }, { "epoch": 0.5948178409536068, "grad_norm": 0.9719622416623019, "learning_rate": 7.444508046144574e-06, "loss": 0.8695, "step": 6643 }, { "epoch": 0.5949073814986848, "grad_norm": 0.9294176887348778, "learning_rate": 7.44170416605416e-06, "loss": 0.8214, "step": 6644 }, { "epoch": 0.594996922043763, "grad_norm": 0.9718671340228515, "learning_rate": 7.4389005011592575e-06, "loss": 0.823, "step": 6645 }, { "epoch": 0.595086462588841, "grad_norm": 0.9437341618383176, "learning_rate": 7.436097051695696e-06, "loss": 0.8371, "step": 6646 }, { "epoch": 0.595176003133919, "grad_norm": 0.9651042742065485, "learning_rate": 7.433293817899296e-06, "loss": 0.7833, "step": 6647 }, { "epoch": 0.5952655436789972, "grad_norm": 1.0371490080918673, "learning_rate": 7.430490800005854e-06, "loss": 0.8364, "step": 6648 }, { "epoch": 0.5953550842240752, "grad_norm": 1.0919618658274939, "learning_rate": 7.427687998251155e-06, "loss": 0.8315, "step": 6649 }, { "epoch": 0.5954446247691533, "grad_norm": 0.9848350557440565, "learning_rate": 7.424885412870959e-06, "loss": 0.8417, "step": 6650 }, { "epoch": 0.5955341653142313, "grad_norm": 0.9944695833333687, "learning_rate": 7.422083044101012e-06, "loss": 0.8116, "step": 6651 }, { "epoch": 0.5956237058593095, "grad_norm": 0.9530477781466355, "learning_rate": 7.419280892177037e-06, "loss": 0.808, "step": 6652 }, { "epoch": 0.5957132464043875, "grad_norm": 0.9184209224358462, "learning_rate": 7.416478957334743e-06, "loss": 0.7818, "step": 6653 }, { "epoch": 0.5958027869494655, "grad_norm": 0.9256800079559844, "learning_rate": 7.41367723980982e-06, "loss": 0.8011, "step": 6654 }, { "epoch": 0.5958923274945436, "grad_norm": 1.1317921681695022, "learning_rate": 7.410875739837939e-06, "loss": 0.8058, "step": 6655 }, { "epoch": 0.5959818680396217, "grad_norm": 1.0354288676675154, "learning_rate": 7.408074457654757e-06, "loss": 0.8077, "step": 6656 }, { "epoch": 0.5960714085846998, "grad_norm": 0.8750470024579546, "learning_rate": 7.405273393495904e-06, "loss": 0.8982, "step": 6657 }, { "epoch": 0.5961609491297778, "grad_norm": 0.9690781573025311, "learning_rate": 7.402472547596996e-06, "loss": 0.8708, "step": 6658 }, { "epoch": 0.5962504896748559, "grad_norm": 1.052874930621147, "learning_rate": 7.399671920193634e-06, "loss": 0.8308, "step": 6659 }, { "epoch": 0.596340030219934, "grad_norm": 0.9086861232804438, "learning_rate": 7.396871511521393e-06, "loss": 0.8532, "step": 6660 }, { "epoch": 0.596429570765012, "grad_norm": 0.9358289297314465, "learning_rate": 7.3940713218158415e-06, "loss": 0.7629, "step": 6661 }, { "epoch": 0.59651911131009, "grad_norm": 0.9562086191653743, "learning_rate": 7.3912713513125185e-06, "loss": 0.8101, "step": 6662 }, { "epoch": 0.5966086518551682, "grad_norm": 0.92783742408096, "learning_rate": 7.388471600246948e-06, "loss": 0.7502, "step": 6663 }, { "epoch": 0.5966981924002462, "grad_norm": 0.9655393488611334, "learning_rate": 7.385672068854636e-06, "loss": 0.8647, "step": 6664 }, { "epoch": 0.5967877329453243, "grad_norm": 0.9608357231144287, "learning_rate": 7.382872757371069e-06, "loss": 0.8135, "step": 6665 }, { "epoch": 0.5968772734904024, "grad_norm": 1.0700632918641717, "learning_rate": 7.380073666031717e-06, "loss": 0.8134, "step": 6666 }, { "epoch": 0.5969668140354805, "grad_norm": 0.9087431165462603, "learning_rate": 7.377274795072036e-06, "loss": 0.8256, "step": 6667 }, { "epoch": 0.5970563545805585, "grad_norm": 0.9564177881585979, "learning_rate": 7.374476144727446e-06, "loss": 0.8152, "step": 6668 }, { "epoch": 0.5971458951256365, "grad_norm": 0.9914061198177218, "learning_rate": 7.371677715233369e-06, "loss": 0.7979, "step": 6669 }, { "epoch": 0.5972354356707147, "grad_norm": 0.8803631437245757, "learning_rate": 7.368879506825197e-06, "loss": 0.7826, "step": 6670 }, { "epoch": 0.5973249762157927, "grad_norm": 0.9476148650530133, "learning_rate": 7.366081519738309e-06, "loss": 0.8275, "step": 6671 }, { "epoch": 0.5974145167608708, "grad_norm": 0.918048481029053, "learning_rate": 7.363283754208061e-06, "loss": 0.827, "step": 6672 }, { "epoch": 0.5975040573059488, "grad_norm": 0.907303951395112, "learning_rate": 7.36048621046979e-06, "loss": 0.8025, "step": 6673 }, { "epoch": 0.597593597851027, "grad_norm": 1.0078204047292332, "learning_rate": 7.357688888758816e-06, "loss": 0.8475, "step": 6674 }, { "epoch": 0.597683138396105, "grad_norm": 0.9428722770124248, "learning_rate": 7.354891789310441e-06, "loss": 0.8173, "step": 6675 }, { "epoch": 0.597772678941183, "grad_norm": 0.946691591417038, "learning_rate": 7.352094912359951e-06, "loss": 0.7932, "step": 6676 }, { "epoch": 0.5978622194862612, "grad_norm": 0.943795253530749, "learning_rate": 7.34929825814261e-06, "loss": 0.8224, "step": 6677 }, { "epoch": 0.5979517600313392, "grad_norm": 0.9063418757229791, "learning_rate": 7.346501826893662e-06, "loss": 0.8324, "step": 6678 }, { "epoch": 0.5980413005764172, "grad_norm": 0.9873556412072985, "learning_rate": 7.343705618848331e-06, "loss": 0.8447, "step": 6679 }, { "epoch": 0.5981308411214953, "grad_norm": 0.9258455842112123, "learning_rate": 7.340909634241827e-06, "loss": 0.818, "step": 6680 }, { "epoch": 0.5982203816665734, "grad_norm": 0.8572335827649304, "learning_rate": 7.338113873309338e-06, "loss": 0.838, "step": 6681 }, { "epoch": 0.5983099222116515, "grad_norm": 0.9954530222602024, "learning_rate": 7.335318336286038e-06, "loss": 0.8436, "step": 6682 }, { "epoch": 0.5983994627567295, "grad_norm": 0.9041859453918185, "learning_rate": 7.332523023407079e-06, "loss": 0.7962, "step": 6683 }, { "epoch": 0.5984890033018077, "grad_norm": 0.8860385183541469, "learning_rate": 7.329727934907587e-06, "loss": 0.8275, "step": 6684 }, { "epoch": 0.5985785438468857, "grad_norm": 0.8931839929902914, "learning_rate": 7.3269330710226805e-06, "loss": 0.8256, "step": 6685 }, { "epoch": 0.5986680843919637, "grad_norm": 0.9604929691741774, "learning_rate": 7.324138431987453e-06, "loss": 0.8283, "step": 6686 }, { "epoch": 0.5987576249370418, "grad_norm": 0.9843119965776497, "learning_rate": 7.321344018036978e-06, "loss": 0.77, "step": 6687 }, { "epoch": 0.5988471654821199, "grad_norm": 0.9563474242461167, "learning_rate": 7.318549829406318e-06, "loss": 0.7943, "step": 6688 }, { "epoch": 0.598936706027198, "grad_norm": 1.1590892639588533, "learning_rate": 7.3157558663305115e-06, "loss": 0.8356, "step": 6689 }, { "epoch": 0.599026246572276, "grad_norm": 1.0970422858005777, "learning_rate": 7.31296212904457e-06, "loss": 0.8649, "step": 6690 }, { "epoch": 0.599115787117354, "grad_norm": 0.8863977322658771, "learning_rate": 7.3101686177834994e-06, "loss": 0.8064, "step": 6691 }, { "epoch": 0.5992053276624322, "grad_norm": 0.8702991587939508, "learning_rate": 7.307375332782279e-06, "loss": 0.8127, "step": 6692 }, { "epoch": 0.5992948682075102, "grad_norm": 0.9648807291822504, "learning_rate": 7.3045822742758695e-06, "loss": 0.8397, "step": 6693 }, { "epoch": 0.5993844087525882, "grad_norm": 0.9349920075668192, "learning_rate": 7.301789442499222e-06, "loss": 0.7727, "step": 6694 }, { "epoch": 0.5994739492976664, "grad_norm": 0.9305387311643484, "learning_rate": 7.298996837687246e-06, "loss": 0.8084, "step": 6695 }, { "epoch": 0.5995634898427444, "grad_norm": 0.9995157712927442, "learning_rate": 7.2962044600748584e-06, "loss": 0.8876, "step": 6696 }, { "epoch": 0.5996530303878225, "grad_norm": 0.9479955580250966, "learning_rate": 7.293412309896939e-06, "loss": 0.8695, "step": 6697 }, { "epoch": 0.5997425709329005, "grad_norm": 0.8919622355662048, "learning_rate": 7.2906203873883575e-06, "loss": 0.851, "step": 6698 }, { "epoch": 0.5998321114779787, "grad_norm": 1.0292728865159777, "learning_rate": 7.287828692783957e-06, "loss": 0.8357, "step": 6699 }, { "epoch": 0.5999216520230567, "grad_norm": 1.0104647615684283, "learning_rate": 7.285037226318576e-06, "loss": 0.8509, "step": 6700 }, { "epoch": 0.6000111925681347, "grad_norm": 1.0808336523098174, "learning_rate": 7.282245988227011e-06, "loss": 0.8396, "step": 6701 }, { "epoch": 0.6001007331132129, "grad_norm": 1.0342034904317794, "learning_rate": 7.279454978744055e-06, "loss": 0.8498, "step": 6702 }, { "epoch": 0.6001902736582909, "grad_norm": 0.9691680708797978, "learning_rate": 7.2766641981044824e-06, "loss": 0.8723, "step": 6703 }, { "epoch": 0.600279814203369, "grad_norm": 0.9707257010299679, "learning_rate": 7.273873646543044e-06, "loss": 0.8498, "step": 6704 }, { "epoch": 0.600369354748447, "grad_norm": 0.9691527262924255, "learning_rate": 7.2710833242944725e-06, "loss": 0.7999, "step": 6705 }, { "epoch": 0.6004588952935251, "grad_norm": 0.9178934345650496, "learning_rate": 7.268293231593477e-06, "loss": 0.8249, "step": 6706 }, { "epoch": 0.6005484358386032, "grad_norm": 1.4312256263869274, "learning_rate": 7.265503368674754e-06, "loss": 0.8669, "step": 6707 }, { "epoch": 0.6006379763836812, "grad_norm": 0.9462345880915896, "learning_rate": 7.262713735772973e-06, "loss": 0.8166, "step": 6708 }, { "epoch": 0.6007275169287593, "grad_norm": 0.9028460380832712, "learning_rate": 7.259924333122795e-06, "loss": 0.8125, "step": 6709 }, { "epoch": 0.6008170574738374, "grad_norm": 0.9816289605610599, "learning_rate": 7.257135160958854e-06, "loss": 0.8665, "step": 6710 }, { "epoch": 0.6009065980189154, "grad_norm": 0.8754134057317976, "learning_rate": 7.254346219515766e-06, "loss": 0.7611, "step": 6711 }, { "epoch": 0.6009961385639935, "grad_norm": 1.0598595090404133, "learning_rate": 7.251557509028125e-06, "loss": 0.8248, "step": 6712 }, { "epoch": 0.6010856791090716, "grad_norm": 0.8642784349827424, "learning_rate": 7.24876902973051e-06, "loss": 0.8018, "step": 6713 }, { "epoch": 0.6011752196541497, "grad_norm": 0.9907867942271675, "learning_rate": 7.245980781857477e-06, "loss": 0.8709, "step": 6714 }, { "epoch": 0.6012647601992277, "grad_norm": 0.8646124239626344, "learning_rate": 7.2431927656435674e-06, "loss": 0.7726, "step": 6715 }, { "epoch": 0.6013543007443057, "grad_norm": 0.9684102420119395, "learning_rate": 7.240404981323301e-06, "loss": 0.8754, "step": 6716 }, { "epoch": 0.6014438412893839, "grad_norm": 1.2274486156391375, "learning_rate": 7.2376174291311745e-06, "loss": 0.8328, "step": 6717 }, { "epoch": 0.6015333818344619, "grad_norm": 0.8972417097653106, "learning_rate": 7.234830109301667e-06, "loss": 0.8228, "step": 6718 }, { "epoch": 0.60162292237954, "grad_norm": 0.8762515413788429, "learning_rate": 7.23204302206924e-06, "loss": 0.8035, "step": 6719 }, { "epoch": 0.6017124629246181, "grad_norm": 0.9617789857605489, "learning_rate": 7.2292561676683305e-06, "loss": 0.8427, "step": 6720 }, { "epoch": 0.6018020034696961, "grad_norm": 0.88656612031784, "learning_rate": 7.2264695463333655e-06, "loss": 0.8557, "step": 6721 }, { "epoch": 0.6018915440147742, "grad_norm": 1.016314347631598, "learning_rate": 7.223683158298748e-06, "loss": 0.8501, "step": 6722 }, { "epoch": 0.6019810845598522, "grad_norm": 1.041439772847646, "learning_rate": 7.220897003798852e-06, "loss": 0.8437, "step": 6723 }, { "epoch": 0.6020706251049304, "grad_norm": 0.9659979101272343, "learning_rate": 7.218111083068045e-06, "loss": 0.8442, "step": 6724 }, { "epoch": 0.6021601656500084, "grad_norm": 1.0182127079013046, "learning_rate": 7.215325396340669e-06, "loss": 0.8592, "step": 6725 }, { "epoch": 0.6022497061950864, "grad_norm": 0.8979152169909671, "learning_rate": 7.2125399438510425e-06, "loss": 0.8807, "step": 6726 }, { "epoch": 0.6023392467401645, "grad_norm": 1.1356164789921348, "learning_rate": 7.2097547258334795e-06, "loss": 0.8422, "step": 6727 }, { "epoch": 0.6024287872852426, "grad_norm": 0.8513699427146872, "learning_rate": 7.206969742522252e-06, "loss": 0.7909, "step": 6728 }, { "epoch": 0.6025183278303207, "grad_norm": 0.9632939873460633, "learning_rate": 7.2041849941516265e-06, "loss": 0.8544, "step": 6729 }, { "epoch": 0.6026078683753987, "grad_norm": 0.8382926913068143, "learning_rate": 7.201400480955849e-06, "loss": 0.8008, "step": 6730 }, { "epoch": 0.6026974089204769, "grad_norm": 0.9032306498260338, "learning_rate": 7.1986162031691444e-06, "loss": 0.8471, "step": 6731 }, { "epoch": 0.6027869494655549, "grad_norm": 0.883267776133902, "learning_rate": 7.195832161025717e-06, "loss": 0.7989, "step": 6732 }, { "epoch": 0.6028764900106329, "grad_norm": 1.2702912102214474, "learning_rate": 7.193048354759751e-06, "loss": 0.8532, "step": 6733 }, { "epoch": 0.602966030555711, "grad_norm": 0.8571704404845609, "learning_rate": 7.190264784605409e-06, "loss": 0.7914, "step": 6734 }, { "epoch": 0.6030555711007891, "grad_norm": 0.9251162783703075, "learning_rate": 7.187481450796834e-06, "loss": 0.8157, "step": 6735 }, { "epoch": 0.6031451116458671, "grad_norm": 0.865788056813636, "learning_rate": 7.184698353568157e-06, "loss": 0.846, "step": 6736 }, { "epoch": 0.6032346521909452, "grad_norm": 0.9089876256676758, "learning_rate": 7.181915493153481e-06, "loss": 0.8036, "step": 6737 }, { "epoch": 0.6033241927360233, "grad_norm": 0.9595590509752535, "learning_rate": 7.179132869786891e-06, "loss": 0.8487, "step": 6738 }, { "epoch": 0.6034137332811014, "grad_norm": 1.1187982975234714, "learning_rate": 7.17635048370245e-06, "loss": 0.8491, "step": 6739 }, { "epoch": 0.6035032738261794, "grad_norm": 0.9533087926917494, "learning_rate": 7.173568335134206e-06, "loss": 0.8214, "step": 6740 }, { "epoch": 0.6035928143712574, "grad_norm": 1.0862523658901713, "learning_rate": 7.17078642431618e-06, "loss": 0.8743, "step": 6741 }, { "epoch": 0.6036823549163356, "grad_norm": 0.915870231184967, "learning_rate": 7.1680047514823825e-06, "loss": 0.8061, "step": 6742 }, { "epoch": 0.6037718954614136, "grad_norm": 0.9560657262175546, "learning_rate": 7.165223316866798e-06, "loss": 0.8323, "step": 6743 }, { "epoch": 0.6038614360064917, "grad_norm": 0.8920262930261265, "learning_rate": 7.16244212070339e-06, "loss": 0.8416, "step": 6744 }, { "epoch": 0.6039509765515697, "grad_norm": 1.08993823761268, "learning_rate": 7.159661163226104e-06, "loss": 0.8375, "step": 6745 }, { "epoch": 0.6040405170966479, "grad_norm": 0.8851160925570674, "learning_rate": 7.1568804446688645e-06, "loss": 0.8261, "step": 6746 }, { "epoch": 0.6041300576417259, "grad_norm": 1.011993412186414, "learning_rate": 7.154099965265575e-06, "loss": 0.8684, "step": 6747 }, { "epoch": 0.6042195981868039, "grad_norm": 1.023509137951365, "learning_rate": 7.1513197252501245e-06, "loss": 0.8432, "step": 6748 }, { "epoch": 0.6043091387318821, "grad_norm": 0.8692464832146128, "learning_rate": 7.148539724856378e-06, "loss": 0.8312, "step": 6749 }, { "epoch": 0.6043986792769601, "grad_norm": 1.0411707740387286, "learning_rate": 7.1457599643181755e-06, "loss": 0.7863, "step": 6750 }, { "epoch": 0.6044882198220382, "grad_norm": 0.9524598571762559, "learning_rate": 7.1429804438693425e-06, "loss": 0.8633, "step": 6751 }, { "epoch": 0.6045777603671162, "grad_norm": 1.0453501650349868, "learning_rate": 7.140201163743686e-06, "loss": 0.8619, "step": 6752 }, { "epoch": 0.6046673009121943, "grad_norm": 1.0209115641420439, "learning_rate": 7.137422124174987e-06, "loss": 0.7627, "step": 6753 }, { "epoch": 0.6047568414572724, "grad_norm": 0.9259015777856562, "learning_rate": 7.134643325397015e-06, "loss": 0.8034, "step": 6754 }, { "epoch": 0.6048463820023504, "grad_norm": 0.9958054353112362, "learning_rate": 7.131864767643506e-06, "loss": 0.8359, "step": 6755 }, { "epoch": 0.6049359225474286, "grad_norm": 1.1006165283885907, "learning_rate": 7.1290864511481835e-06, "loss": 0.8368, "step": 6756 }, { "epoch": 0.6050254630925066, "grad_norm": 0.9276107694961492, "learning_rate": 7.126308376144756e-06, "loss": 0.8007, "step": 6757 }, { "epoch": 0.6051150036375846, "grad_norm": 0.9314082658021962, "learning_rate": 7.123530542866903e-06, "loss": 0.7508, "step": 6758 }, { "epoch": 0.6052045441826627, "grad_norm": 0.90920871652135, "learning_rate": 7.120752951548288e-06, "loss": 0.767, "step": 6759 }, { "epoch": 0.6052940847277408, "grad_norm": 0.9170420341977984, "learning_rate": 7.117975602422553e-06, "loss": 0.8014, "step": 6760 }, { "epoch": 0.6053836252728189, "grad_norm": 1.0240022312453534, "learning_rate": 7.115198495723318e-06, "loss": 0.8117, "step": 6761 }, { "epoch": 0.6054731658178969, "grad_norm": 1.0977739438037732, "learning_rate": 7.112421631684181e-06, "loss": 0.7939, "step": 6762 }, { "epoch": 0.6055627063629749, "grad_norm": 0.9642488022003376, "learning_rate": 7.109645010538731e-06, "loss": 0.8088, "step": 6763 }, { "epoch": 0.6056522469080531, "grad_norm": 1.2631811723370763, "learning_rate": 7.1068686325205215e-06, "loss": 0.8691, "step": 6764 }, { "epoch": 0.6057417874531311, "grad_norm": 1.1660910060559508, "learning_rate": 7.1040924978630974e-06, "loss": 0.83, "step": 6765 }, { "epoch": 0.6058313279982092, "grad_norm": 0.8817320401313201, "learning_rate": 7.101316606799975e-06, "loss": 0.7924, "step": 6766 }, { "epoch": 0.6059208685432873, "grad_norm": 0.8621917735252521, "learning_rate": 7.0985409595646516e-06, "loss": 0.8496, "step": 6767 }, { "epoch": 0.6060104090883653, "grad_norm": 1.007124592022082, "learning_rate": 7.095765556390606e-06, "loss": 0.7731, "step": 6768 }, { "epoch": 0.6060999496334434, "grad_norm": 0.9238102356886433, "learning_rate": 7.092990397511302e-06, "loss": 0.8181, "step": 6769 }, { "epoch": 0.6061894901785214, "grad_norm": 0.8927284417962302, "learning_rate": 7.0902154831601695e-06, "loss": 0.8173, "step": 6770 }, { "epoch": 0.6062790307235996, "grad_norm": 0.8856362681732389, "learning_rate": 7.0874408135706315e-06, "loss": 0.8478, "step": 6771 }, { "epoch": 0.6063685712686776, "grad_norm": 0.9438029066016451, "learning_rate": 7.084666388976081e-06, "loss": 0.8193, "step": 6772 }, { "epoch": 0.6064581118137556, "grad_norm": 0.9010187440439678, "learning_rate": 7.081892209609892e-06, "loss": 0.8461, "step": 6773 }, { "epoch": 0.6065476523588338, "grad_norm": 0.9354921288894348, "learning_rate": 7.079118275705419e-06, "loss": 0.8001, "step": 6774 }, { "epoch": 0.6066371929039118, "grad_norm": 0.9493644420823326, "learning_rate": 7.076344587496e-06, "loss": 0.8738, "step": 6775 }, { "epoch": 0.6067267334489899, "grad_norm": 0.9167574995660371, "learning_rate": 7.07357114521495e-06, "loss": 0.7922, "step": 6776 }, { "epoch": 0.6068162739940679, "grad_norm": 0.8692298853313202, "learning_rate": 7.070797949095556e-06, "loss": 0.7489, "step": 6777 }, { "epoch": 0.606905814539146, "grad_norm": 0.9053380214298313, "learning_rate": 7.068024999371095e-06, "loss": 0.8285, "step": 6778 }, { "epoch": 0.6069953550842241, "grad_norm": 0.9330431833163727, "learning_rate": 7.065252296274814e-06, "loss": 0.8005, "step": 6779 }, { "epoch": 0.6070848956293021, "grad_norm": 0.9705059825484632, "learning_rate": 7.062479840039946e-06, "loss": 0.7986, "step": 6780 }, { "epoch": 0.6071744361743802, "grad_norm": 0.9705909190178027, "learning_rate": 7.0597076308997034e-06, "loss": 0.8111, "step": 6781 }, { "epoch": 0.6072639767194583, "grad_norm": 1.0309824194669222, "learning_rate": 7.056935669087277e-06, "loss": 0.7869, "step": 6782 }, { "epoch": 0.6073535172645363, "grad_norm": 0.894120548259424, "learning_rate": 7.054163954835825e-06, "loss": 0.786, "step": 6783 }, { "epoch": 0.6074430578096144, "grad_norm": 1.061238307142784, "learning_rate": 7.051392488378503e-06, "loss": 0.8015, "step": 6784 }, { "epoch": 0.6075325983546925, "grad_norm": 0.9501622634075337, "learning_rate": 7.048621269948438e-06, "loss": 0.802, "step": 6785 }, { "epoch": 0.6076221388997706, "grad_norm": 0.9168641198693591, "learning_rate": 7.045850299778733e-06, "loss": 0.8194, "step": 6786 }, { "epoch": 0.6077116794448486, "grad_norm": 1.0918858102785411, "learning_rate": 7.043079578102476e-06, "loss": 0.8045, "step": 6787 }, { "epoch": 0.6078012199899266, "grad_norm": 1.0672341869298139, "learning_rate": 7.040309105152728e-06, "loss": 0.8329, "step": 6788 }, { "epoch": 0.6078907605350048, "grad_norm": 0.8444411628893715, "learning_rate": 7.037538881162531e-06, "loss": 0.7769, "step": 6789 }, { "epoch": 0.6079803010800828, "grad_norm": 0.8758887890100214, "learning_rate": 7.034768906364912e-06, "loss": 0.7843, "step": 6790 }, { "epoch": 0.6080698416251609, "grad_norm": 1.00796442573516, "learning_rate": 7.031999180992868e-06, "loss": 0.84, "step": 6791 }, { "epoch": 0.608159382170239, "grad_norm": 0.927288135364999, "learning_rate": 7.029229705279384e-06, "loss": 0.8348, "step": 6792 }, { "epoch": 0.608248922715317, "grad_norm": 0.9054651493961807, "learning_rate": 7.0264604794574155e-06, "loss": 0.8286, "step": 6793 }, { "epoch": 0.6083384632603951, "grad_norm": 1.0511370499245034, "learning_rate": 7.023691503759901e-06, "loss": 0.8012, "step": 6794 }, { "epoch": 0.6084280038054731, "grad_norm": 1.0075045636977054, "learning_rate": 7.020922778419755e-06, "loss": 0.8358, "step": 6795 }, { "epoch": 0.6085175443505513, "grad_norm": 1.0738314469363892, "learning_rate": 7.018154303669879e-06, "loss": 0.8208, "step": 6796 }, { "epoch": 0.6086070848956293, "grad_norm": 0.8992154147552629, "learning_rate": 7.015386079743148e-06, "loss": 0.7973, "step": 6797 }, { "epoch": 0.6086966254407074, "grad_norm": 0.9265629519323908, "learning_rate": 7.012618106872415e-06, "loss": 0.84, "step": 6798 }, { "epoch": 0.6087861659857854, "grad_norm": 1.0174308741519702, "learning_rate": 7.009850385290511e-06, "loss": 0.8086, "step": 6799 }, { "epoch": 0.6088757065308635, "grad_norm": 1.0137765206719926, "learning_rate": 7.007082915230247e-06, "loss": 0.8352, "step": 6800 }, { "epoch": 0.6089652470759416, "grad_norm": 0.9447487626276228, "learning_rate": 7.004315696924413e-06, "loss": 0.8178, "step": 6801 }, { "epoch": 0.6090547876210196, "grad_norm": 1.106207788450664, "learning_rate": 7.001548730605783e-06, "loss": 0.8438, "step": 6802 }, { "epoch": 0.6091443281660978, "grad_norm": 0.9621839841777069, "learning_rate": 6.998782016507104e-06, "loss": 0.7925, "step": 6803 }, { "epoch": 0.6092338687111758, "grad_norm": 1.0749028365733964, "learning_rate": 6.996015554861101e-06, "loss": 0.8665, "step": 6804 }, { "epoch": 0.6093234092562538, "grad_norm": 0.9405063950600747, "learning_rate": 6.993249345900479e-06, "loss": 0.8319, "step": 6805 }, { "epoch": 0.6094129498013319, "grad_norm": 0.9909146378814356, "learning_rate": 6.990483389857925e-06, "loss": 0.8145, "step": 6806 }, { "epoch": 0.60950249034641, "grad_norm": 1.0268234825413491, "learning_rate": 6.9877176869661e-06, "loss": 0.8364, "step": 6807 }, { "epoch": 0.6095920308914881, "grad_norm": 0.8998589348804009, "learning_rate": 6.984952237457647e-06, "loss": 0.8524, "step": 6808 }, { "epoch": 0.6096815714365661, "grad_norm": 1.1473573138804667, "learning_rate": 6.982187041565192e-06, "loss": 0.81, "step": 6809 }, { "epoch": 0.6097711119816442, "grad_norm": 1.1503370653504872, "learning_rate": 6.979422099521323e-06, "loss": 0.8473, "step": 6810 }, { "epoch": 0.6098606525267223, "grad_norm": 0.9821407158543782, "learning_rate": 6.976657411558625e-06, "loss": 0.8077, "step": 6811 }, { "epoch": 0.6099501930718003, "grad_norm": 0.9133375862379889, "learning_rate": 6.973892977909653e-06, "loss": 0.8041, "step": 6812 }, { "epoch": 0.6100397336168784, "grad_norm": 0.8861377590588402, "learning_rate": 6.971128798806943e-06, "loss": 0.8148, "step": 6813 }, { "epoch": 0.6101292741619565, "grad_norm": 1.0944972378656204, "learning_rate": 6.9683648744830116e-06, "loss": 0.7876, "step": 6814 }, { "epoch": 0.6102188147070345, "grad_norm": 0.9495173848486047, "learning_rate": 6.965601205170345e-06, "loss": 0.804, "step": 6815 }, { "epoch": 0.6103083552521126, "grad_norm": 0.8498113193007459, "learning_rate": 6.962837791101414e-06, "loss": 0.7816, "step": 6816 }, { "epoch": 0.6103978957971906, "grad_norm": 1.1359905441985596, "learning_rate": 6.960074632508672e-06, "loss": 0.8681, "step": 6817 }, { "epoch": 0.6104874363422688, "grad_norm": 1.006355382225876, "learning_rate": 6.957311729624547e-06, "loss": 0.8191, "step": 6818 }, { "epoch": 0.6105769768873468, "grad_norm": 0.9260517455408065, "learning_rate": 6.954549082681444e-06, "loss": 0.8607, "step": 6819 }, { "epoch": 0.6106665174324248, "grad_norm": 0.9079865484712613, "learning_rate": 6.951786691911751e-06, "loss": 0.8036, "step": 6820 }, { "epoch": 0.610756057977503, "grad_norm": 1.034135095062518, "learning_rate": 6.949024557547824e-06, "loss": 0.7989, "step": 6821 }, { "epoch": 0.610845598522581, "grad_norm": 1.0030756502315685, "learning_rate": 6.946262679822009e-06, "loss": 0.8914, "step": 6822 }, { "epoch": 0.6109351390676591, "grad_norm": 0.9761653421831348, "learning_rate": 6.943501058966626e-06, "loss": 0.7915, "step": 6823 }, { "epoch": 0.6110246796127371, "grad_norm": 0.9606151087322252, "learning_rate": 6.940739695213976e-06, "loss": 0.8313, "step": 6824 }, { "epoch": 0.6111142201578152, "grad_norm": 1.0641970251505146, "learning_rate": 6.937978588796335e-06, "loss": 0.8613, "step": 6825 }, { "epoch": 0.6112037607028933, "grad_norm": 0.897624345628596, "learning_rate": 6.935217739945954e-06, "loss": 0.8119, "step": 6826 }, { "epoch": 0.6112933012479713, "grad_norm": 0.9068447837199171, "learning_rate": 6.93245714889507e-06, "loss": 0.7537, "step": 6827 }, { "epoch": 0.6113828417930495, "grad_norm": 0.9847564102035016, "learning_rate": 6.929696815875893e-06, "loss": 0.8914, "step": 6828 }, { "epoch": 0.6114723823381275, "grad_norm": 0.9371524659151274, "learning_rate": 6.926936741120616e-06, "loss": 0.8237, "step": 6829 }, { "epoch": 0.6115619228832055, "grad_norm": 0.9829707351498234, "learning_rate": 6.924176924861406e-06, "loss": 0.8489, "step": 6830 }, { "epoch": 0.6116514634282836, "grad_norm": 1.0620218793538247, "learning_rate": 6.921417367330412e-06, "loss": 0.8163, "step": 6831 }, { "epoch": 0.6117410039733617, "grad_norm": 0.8732960722186752, "learning_rate": 6.918658068759754e-06, "loss": 0.8437, "step": 6832 }, { "epoch": 0.6118305445184398, "grad_norm": 0.9062623961789811, "learning_rate": 6.915899029381538e-06, "loss": 0.8509, "step": 6833 }, { "epoch": 0.6119200850635178, "grad_norm": 0.9963413592763147, "learning_rate": 6.913140249427845e-06, "loss": 0.837, "step": 6834 }, { "epoch": 0.6120096256085958, "grad_norm": 0.9275059773347846, "learning_rate": 6.910381729130737e-06, "loss": 0.8363, "step": 6835 }, { "epoch": 0.612099166153674, "grad_norm": 0.9880189286796315, "learning_rate": 6.907623468722253e-06, "loss": 0.8114, "step": 6836 }, { "epoch": 0.612188706698752, "grad_norm": 0.903996391206342, "learning_rate": 6.904865468434401e-06, "loss": 0.8347, "step": 6837 }, { "epoch": 0.6122782472438301, "grad_norm": 0.9311166715580739, "learning_rate": 6.902107728499181e-06, "loss": 0.8201, "step": 6838 }, { "epoch": 0.6123677877889082, "grad_norm": 0.9216111817813706, "learning_rate": 6.8993502491485635e-06, "loss": 0.804, "step": 6839 }, { "epoch": 0.6124573283339863, "grad_norm": 0.9836808978254274, "learning_rate": 6.8965930306144975e-06, "loss": 0.7938, "step": 6840 }, { "epoch": 0.6125468688790643, "grad_norm": 0.8504655980054405, "learning_rate": 6.893836073128912e-06, "loss": 0.8331, "step": 6841 }, { "epoch": 0.6126364094241423, "grad_norm": 0.966529725792513, "learning_rate": 6.891079376923721e-06, "loss": 0.8053, "step": 6842 }, { "epoch": 0.6127259499692205, "grad_norm": 1.0052820061933097, "learning_rate": 6.888322942230794e-06, "loss": 0.819, "step": 6843 }, { "epoch": 0.6128154905142985, "grad_norm": 1.320637517223321, "learning_rate": 6.885566769282004e-06, "loss": 0.8844, "step": 6844 }, { "epoch": 0.6129050310593765, "grad_norm": 0.8889108577666743, "learning_rate": 6.882810858309188e-06, "loss": 0.8173, "step": 6845 }, { "epoch": 0.6129945716044547, "grad_norm": 1.2271222308017888, "learning_rate": 6.880055209544165e-06, "loss": 0.7767, "step": 6846 }, { "epoch": 0.6130841121495327, "grad_norm": 0.8886368236160457, "learning_rate": 6.877299823218733e-06, "loss": 0.8327, "step": 6847 }, { "epoch": 0.6131736526946108, "grad_norm": 0.9217922255684089, "learning_rate": 6.874544699564662e-06, "loss": 0.8394, "step": 6848 }, { "epoch": 0.6132631932396888, "grad_norm": 0.9485812158502913, "learning_rate": 6.871789838813703e-06, "loss": 0.8189, "step": 6849 }, { "epoch": 0.613352733784767, "grad_norm": 0.9502680072342772, "learning_rate": 6.869035241197592e-06, "loss": 0.8269, "step": 6850 }, { "epoch": 0.613442274329845, "grad_norm": 0.9642240807913821, "learning_rate": 6.866280906948033e-06, "loss": 0.8582, "step": 6851 }, { "epoch": 0.613531814874923, "grad_norm": 0.9286782234210027, "learning_rate": 6.863526836296712e-06, "loss": 0.8708, "step": 6852 }, { "epoch": 0.6136213554200011, "grad_norm": 1.0352072759505886, "learning_rate": 6.860773029475294e-06, "loss": 0.8491, "step": 6853 }, { "epoch": 0.6137108959650792, "grad_norm": 0.9466367146690194, "learning_rate": 6.858019486715418e-06, "loss": 0.846, "step": 6854 }, { "epoch": 0.6138004365101573, "grad_norm": 1.2503195140129573, "learning_rate": 6.855266208248702e-06, "loss": 0.7971, "step": 6855 }, { "epoch": 0.6138899770552353, "grad_norm": 1.0566208566015043, "learning_rate": 6.852513194306747e-06, "loss": 0.821, "step": 6856 }, { "epoch": 0.6139795176003134, "grad_norm": 1.0060041063347442, "learning_rate": 6.849760445121125e-06, "loss": 0.7427, "step": 6857 }, { "epoch": 0.6140690581453915, "grad_norm": 0.8554891426553833, "learning_rate": 6.847007960923391e-06, "loss": 0.8045, "step": 6858 }, { "epoch": 0.6141585986904695, "grad_norm": 0.9598180820536625, "learning_rate": 6.8442557419450695e-06, "loss": 0.8775, "step": 6859 }, { "epoch": 0.6142481392355476, "grad_norm": 1.1525574158109388, "learning_rate": 6.841503788417671e-06, "loss": 0.911, "step": 6860 }, { "epoch": 0.6143376797806257, "grad_norm": 1.0095088867488953, "learning_rate": 6.838752100572681e-06, "loss": 0.8605, "step": 6861 }, { "epoch": 0.6144272203257037, "grad_norm": 0.967964622099855, "learning_rate": 6.836000678641564e-06, "loss": 0.8147, "step": 6862 }, { "epoch": 0.6145167608707818, "grad_norm": 0.9034640468361166, "learning_rate": 6.833249522855761e-06, "loss": 0.8441, "step": 6863 }, { "epoch": 0.6146063014158599, "grad_norm": 1.1077748605927817, "learning_rate": 6.8304986334466884e-06, "loss": 0.7902, "step": 6864 }, { "epoch": 0.614695841960938, "grad_norm": 0.9632528179217105, "learning_rate": 6.827748010645741e-06, "loss": 0.7862, "step": 6865 }, { "epoch": 0.614785382506016, "grad_norm": 0.9939275199605824, "learning_rate": 6.824997654684293e-06, "loss": 0.7966, "step": 6866 }, { "epoch": 0.614874923051094, "grad_norm": 0.865792451368466, "learning_rate": 6.822247565793697e-06, "loss": 0.8151, "step": 6867 }, { "epoch": 0.6149644635961722, "grad_norm": 0.9642295863345405, "learning_rate": 6.819497744205277e-06, "loss": 0.7918, "step": 6868 }, { "epoch": 0.6150540041412502, "grad_norm": 0.9643131661546134, "learning_rate": 6.816748190150351e-06, "loss": 0.7813, "step": 6869 }, { "epoch": 0.6151435446863283, "grad_norm": 0.9927479306142228, "learning_rate": 6.813998903860185e-06, "loss": 0.8235, "step": 6870 }, { "epoch": 0.6152330852314063, "grad_norm": 0.9502333299458386, "learning_rate": 6.811249885566052e-06, "loss": 0.8339, "step": 6871 }, { "epoch": 0.6153226257764844, "grad_norm": 1.013974031261673, "learning_rate": 6.808501135499188e-06, "loss": 0.8522, "step": 6872 }, { "epoch": 0.6154121663215625, "grad_norm": 0.9717613484258886, "learning_rate": 6.805752653890808e-06, "loss": 0.836, "step": 6873 }, { "epoch": 0.6155017068666405, "grad_norm": 0.9318239743237516, "learning_rate": 6.8030044409721075e-06, "loss": 0.8409, "step": 6874 }, { "epoch": 0.6155912474117187, "grad_norm": 0.933077348792681, "learning_rate": 6.8002564969742536e-06, "loss": 0.83, "step": 6875 }, { "epoch": 0.6156807879567967, "grad_norm": 1.017866329656441, "learning_rate": 6.797508822128394e-06, "loss": 0.8314, "step": 6876 }, { "epoch": 0.6157703285018747, "grad_norm": 0.8392712677159732, "learning_rate": 6.794761416665658e-06, "loss": 0.8175, "step": 6877 }, { "epoch": 0.6158598690469528, "grad_norm": 0.9751822042556472, "learning_rate": 6.792014280817148e-06, "loss": 0.7722, "step": 6878 }, { "epoch": 0.6159494095920309, "grad_norm": 0.9200636083122777, "learning_rate": 6.789267414813941e-06, "loss": 0.7774, "step": 6879 }, { "epoch": 0.616038950137109, "grad_norm": 0.926848633195427, "learning_rate": 6.786520818887099e-06, "loss": 0.7998, "step": 6880 }, { "epoch": 0.616128490682187, "grad_norm": 0.8968750644806187, "learning_rate": 6.783774493267652e-06, "loss": 0.8031, "step": 6881 }, { "epoch": 0.6162180312272652, "grad_norm": 1.0149494608617347, "learning_rate": 6.781028438186612e-06, "loss": 0.8453, "step": 6882 }, { "epoch": 0.6163075717723432, "grad_norm": 0.9959631115275402, "learning_rate": 6.778282653874973e-06, "loss": 0.8398, "step": 6883 }, { "epoch": 0.6163971123174212, "grad_norm": 1.0214234469190036, "learning_rate": 6.775537140563697e-06, "loss": 0.8105, "step": 6884 }, { "epoch": 0.6164866528624993, "grad_norm": 1.1338735595503195, "learning_rate": 6.772791898483733e-06, "loss": 0.7981, "step": 6885 }, { "epoch": 0.6165761934075774, "grad_norm": 1.1192560690531386, "learning_rate": 6.770046927865994e-06, "loss": 0.8236, "step": 6886 }, { "epoch": 0.6166657339526554, "grad_norm": 0.9651837968638589, "learning_rate": 6.767302228941383e-06, "loss": 0.8426, "step": 6887 }, { "epoch": 0.6167552744977335, "grad_norm": 1.0193860652520395, "learning_rate": 6.764557801940771e-06, "loss": 0.8396, "step": 6888 }, { "epoch": 0.6168448150428115, "grad_norm": 0.9589074193360166, "learning_rate": 6.761813647095017e-06, "loss": 0.7701, "step": 6889 }, { "epoch": 0.6169343555878897, "grad_norm": 0.869034794592788, "learning_rate": 6.759069764634945e-06, "loss": 0.8201, "step": 6890 }, { "epoch": 0.6170238961329677, "grad_norm": 1.0762124881577682, "learning_rate": 6.756326154791366e-06, "loss": 0.8426, "step": 6891 }, { "epoch": 0.6171134366780457, "grad_norm": 0.9141353749945169, "learning_rate": 6.753582817795059e-06, "loss": 0.7611, "step": 6892 }, { "epoch": 0.6172029772231239, "grad_norm": 1.0150873853170288, "learning_rate": 6.750839753876785e-06, "loss": 0.8231, "step": 6893 }, { "epoch": 0.6172925177682019, "grad_norm": 0.9834024375726643, "learning_rate": 6.748096963267285e-06, "loss": 0.7864, "step": 6894 }, { "epoch": 0.61738205831328, "grad_norm": 1.0855608962192123, "learning_rate": 6.745354446197267e-06, "loss": 0.8113, "step": 6895 }, { "epoch": 0.617471598858358, "grad_norm": 0.9414822700422412, "learning_rate": 6.742612202897436e-06, "loss": 0.8109, "step": 6896 }, { "epoch": 0.6175611394034362, "grad_norm": 0.9680316424205204, "learning_rate": 6.7398702335984436e-06, "loss": 0.8829, "step": 6897 }, { "epoch": 0.6176506799485142, "grad_norm": 1.0812810028008988, "learning_rate": 6.737128538530946e-06, "loss": 0.8583, "step": 6898 }, { "epoch": 0.6177402204935922, "grad_norm": 1.0590551586059418, "learning_rate": 6.734387117925562e-06, "loss": 0.8877, "step": 6899 }, { "epoch": 0.6178297610386704, "grad_norm": 0.979256916353065, "learning_rate": 6.731645972012892e-06, "loss": 0.8201, "step": 6900 }, { "epoch": 0.6179193015837484, "grad_norm": 1.0382208878098076, "learning_rate": 6.728905101023512e-06, "loss": 0.7922, "step": 6901 }, { "epoch": 0.6180088421288265, "grad_norm": 1.1022771248009708, "learning_rate": 6.72616450518798e-06, "loss": 0.8799, "step": 6902 }, { "epoch": 0.6180983826739045, "grad_norm": 0.9648599272837541, "learning_rate": 6.723424184736816e-06, "loss": 0.8403, "step": 6903 }, { "epoch": 0.6181879232189826, "grad_norm": 0.9387292978873782, "learning_rate": 6.720684139900534e-06, "loss": 0.777, "step": 6904 }, { "epoch": 0.6182774637640607, "grad_norm": 0.9265779630393162, "learning_rate": 6.717944370909616e-06, "loss": 0.7941, "step": 6905 }, { "epoch": 0.6183670043091387, "grad_norm": 0.9676528923748605, "learning_rate": 6.715204877994521e-06, "loss": 0.7885, "step": 6906 }, { "epoch": 0.6184565448542167, "grad_norm": 0.976180815737479, "learning_rate": 6.712465661385692e-06, "loss": 0.8423, "step": 6907 }, { "epoch": 0.6185460853992949, "grad_norm": 0.9178080878821915, "learning_rate": 6.7097267213135345e-06, "loss": 0.856, "step": 6908 }, { "epoch": 0.6186356259443729, "grad_norm": 0.9602544438249079, "learning_rate": 6.7069880580084415e-06, "loss": 0.8349, "step": 6909 }, { "epoch": 0.618725166489451, "grad_norm": 1.0568022353132074, "learning_rate": 6.704249671700785e-06, "loss": 0.8378, "step": 6910 }, { "epoch": 0.6188147070345291, "grad_norm": 0.980577698820659, "learning_rate": 6.7015115626209035e-06, "loss": 0.7735, "step": 6911 }, { "epoch": 0.6189042475796072, "grad_norm": 0.9636637853010859, "learning_rate": 6.698773730999124e-06, "loss": 0.8016, "step": 6912 }, { "epoch": 0.6189937881246852, "grad_norm": 1.017534359227623, "learning_rate": 6.696036177065741e-06, "loss": 0.8146, "step": 6913 }, { "epoch": 0.6190833286697632, "grad_norm": 0.8993718344330415, "learning_rate": 6.693298901051026e-06, "loss": 0.7568, "step": 6914 }, { "epoch": 0.6191728692148414, "grad_norm": 0.9688416220533915, "learning_rate": 6.6905619031852295e-06, "loss": 0.8061, "step": 6915 }, { "epoch": 0.6192624097599194, "grad_norm": 0.8858830956937263, "learning_rate": 6.687825183698584e-06, "loss": 0.814, "step": 6916 }, { "epoch": 0.6193519503049975, "grad_norm": 0.9819610668615756, "learning_rate": 6.6850887428212905e-06, "loss": 0.8299, "step": 6917 }, { "epoch": 0.6194414908500756, "grad_norm": 0.9661020088389787, "learning_rate": 6.682352580783531e-06, "loss": 0.7784, "step": 6918 }, { "epoch": 0.6195310313951536, "grad_norm": 0.9307415978375252, "learning_rate": 6.679616697815461e-06, "loss": 0.8198, "step": 6919 }, { "epoch": 0.6196205719402317, "grad_norm": 0.9326523306181576, "learning_rate": 6.6768810941472116e-06, "loss": 0.8397, "step": 6920 }, { "epoch": 0.6197101124853097, "grad_norm": 0.9229164439507436, "learning_rate": 6.674145770008897e-06, "loss": 0.8311, "step": 6921 }, { "epoch": 0.6197996530303879, "grad_norm": 0.9913502736986972, "learning_rate": 6.671410725630601e-06, "loss": 0.8489, "step": 6922 }, { "epoch": 0.6198891935754659, "grad_norm": 0.9688240168021084, "learning_rate": 6.668675961242389e-06, "loss": 0.8363, "step": 6923 }, { "epoch": 0.6199787341205439, "grad_norm": 1.1158828057836605, "learning_rate": 6.665941477074301e-06, "loss": 0.7921, "step": 6924 }, { "epoch": 0.620068274665622, "grad_norm": 0.9509261521658446, "learning_rate": 6.663207273356351e-06, "loss": 0.8568, "step": 6925 }, { "epoch": 0.6201578152107001, "grad_norm": 0.8768836420587605, "learning_rate": 6.660473350318529e-06, "loss": 0.8369, "step": 6926 }, { "epoch": 0.6202473557557782, "grad_norm": 1.2551258932128715, "learning_rate": 6.657739708190807e-06, "loss": 0.7632, "step": 6927 }, { "epoch": 0.6203368963008562, "grad_norm": 0.9901217755491433, "learning_rate": 6.655006347203128e-06, "loss": 0.7513, "step": 6928 }, { "epoch": 0.6204264368459343, "grad_norm": 1.0833368989169174, "learning_rate": 6.65227326758542e-06, "loss": 0.8451, "step": 6929 }, { "epoch": 0.6205159773910124, "grad_norm": 0.9589829133918134, "learning_rate": 6.64954046956757e-06, "loss": 0.8637, "step": 6930 }, { "epoch": 0.6206055179360904, "grad_norm": 0.9133983633630999, "learning_rate": 6.64680795337946e-06, "loss": 0.8165, "step": 6931 }, { "epoch": 0.6206950584811685, "grad_norm": 0.9470503976085424, "learning_rate": 6.644075719250938e-06, "loss": 0.7981, "step": 6932 }, { "epoch": 0.6207845990262466, "grad_norm": 0.9474078496994895, "learning_rate": 6.6413437674118294e-06, "loss": 0.8313, "step": 6933 }, { "epoch": 0.6208741395713246, "grad_norm": 0.9600453895542121, "learning_rate": 6.638612098091937e-06, "loss": 0.7844, "step": 6934 }, { "epoch": 0.6209636801164027, "grad_norm": 1.0466134362250887, "learning_rate": 6.635880711521047e-06, "loss": 0.833, "step": 6935 }, { "epoch": 0.6210532206614808, "grad_norm": 0.9306107934701259, "learning_rate": 6.633149607928901e-06, "loss": 0.808, "step": 6936 }, { "epoch": 0.6211427612065589, "grad_norm": 0.9945883448958968, "learning_rate": 6.630418787545243e-06, "loss": 0.8369, "step": 6937 }, { "epoch": 0.6212323017516369, "grad_norm": 0.9763299397349428, "learning_rate": 6.627688250599775e-06, "loss": 0.7748, "step": 6938 }, { "epoch": 0.6213218422967149, "grad_norm": 0.9357816321731504, "learning_rate": 6.6249579973221835e-06, "loss": 0.7637, "step": 6939 }, { "epoch": 0.6214113828417931, "grad_norm": 0.9993866346409703, "learning_rate": 6.622228027942128e-06, "loss": 0.8251, "step": 6940 }, { "epoch": 0.6215009233868711, "grad_norm": 0.8438082927325912, "learning_rate": 6.619498342689241e-06, "loss": 0.826, "step": 6941 }, { "epoch": 0.6215904639319492, "grad_norm": 1.0756882144698472, "learning_rate": 6.616768941793134e-06, "loss": 0.8046, "step": 6942 }, { "epoch": 0.6216800044770272, "grad_norm": 0.9174458383614348, "learning_rate": 6.614039825483404e-06, "loss": 0.7874, "step": 6943 }, { "epoch": 0.6217695450221054, "grad_norm": 1.011929825062502, "learning_rate": 6.611310993989608e-06, "loss": 0.8627, "step": 6944 }, { "epoch": 0.6218590855671834, "grad_norm": 0.9423977343702019, "learning_rate": 6.608582447541292e-06, "loss": 0.8992, "step": 6945 }, { "epoch": 0.6219486261122614, "grad_norm": 1.011846296837944, "learning_rate": 6.605854186367965e-06, "loss": 0.8497, "step": 6946 }, { "epoch": 0.6220381666573396, "grad_norm": 1.0506651538098284, "learning_rate": 6.603126210699124e-06, "loss": 0.8206, "step": 6947 }, { "epoch": 0.6221277072024176, "grad_norm": 0.9285843562525606, "learning_rate": 6.600398520764237e-06, "loss": 0.8282, "step": 6948 }, { "epoch": 0.6222172477474956, "grad_norm": 0.9607051754572172, "learning_rate": 6.597671116792745e-06, "loss": 0.8058, "step": 6949 }, { "epoch": 0.6223067882925737, "grad_norm": 0.9890252866269129, "learning_rate": 6.594943999014076e-06, "loss": 0.8094, "step": 6950 }, { "epoch": 0.6223963288376518, "grad_norm": 0.8990318309790466, "learning_rate": 6.592217167657622e-06, "loss": 0.8138, "step": 6951 }, { "epoch": 0.6224858693827299, "grad_norm": 0.9081771484960499, "learning_rate": 6.589490622952752e-06, "loss": 0.7966, "step": 6952 }, { "epoch": 0.6225754099278079, "grad_norm": 1.0070436836417715, "learning_rate": 6.58676436512882e-06, "loss": 0.7962, "step": 6953 }, { "epoch": 0.6226649504728861, "grad_norm": 0.9720927437013402, "learning_rate": 6.5840383944151445e-06, "loss": 0.8819, "step": 6954 }, { "epoch": 0.6227544910179641, "grad_norm": 1.0375517597490889, "learning_rate": 6.581312711041026e-06, "loss": 0.7754, "step": 6955 }, { "epoch": 0.6228440315630421, "grad_norm": 1.0379279831505968, "learning_rate": 6.578587315235747e-06, "loss": 0.8557, "step": 6956 }, { "epoch": 0.6229335721081202, "grad_norm": 0.9224595131341047, "learning_rate": 6.57586220722855e-06, "loss": 0.7874, "step": 6957 }, { "epoch": 0.6230231126531983, "grad_norm": 0.9467945030732978, "learning_rate": 6.573137387248665e-06, "loss": 0.808, "step": 6958 }, { "epoch": 0.6231126531982764, "grad_norm": 0.9282129015331446, "learning_rate": 6.570412855525298e-06, "loss": 0.8461, "step": 6959 }, { "epoch": 0.6232021937433544, "grad_norm": 1.1037637998533718, "learning_rate": 6.567688612287625e-06, "loss": 0.8699, "step": 6960 }, { "epoch": 0.6232917342884324, "grad_norm": 1.1526537444983902, "learning_rate": 6.564964657764799e-06, "loss": 0.8363, "step": 6961 }, { "epoch": 0.6233812748335106, "grad_norm": 0.9879430777158691, "learning_rate": 6.562240992185958e-06, "loss": 0.8543, "step": 6962 }, { "epoch": 0.6234708153785886, "grad_norm": 1.0523246735197505, "learning_rate": 6.559517615780196e-06, "loss": 0.8247, "step": 6963 }, { "epoch": 0.6235603559236667, "grad_norm": 1.0222867303386085, "learning_rate": 6.556794528776602e-06, "loss": 0.8497, "step": 6964 }, { "epoch": 0.6236498964687448, "grad_norm": 0.9675485034821362, "learning_rate": 6.5540717314042335e-06, "loss": 0.8109, "step": 6965 }, { "epoch": 0.6237394370138228, "grad_norm": 1.0138915662654804, "learning_rate": 6.55134922389212e-06, "loss": 0.772, "step": 6966 }, { "epoch": 0.6238289775589009, "grad_norm": 0.9169558364325726, "learning_rate": 6.548627006469276e-06, "loss": 0.7997, "step": 6967 }, { "epoch": 0.6239185181039789, "grad_norm": 0.9635406898239307, "learning_rate": 6.545905079364678e-06, "loss": 0.8174, "step": 6968 }, { "epoch": 0.6240080586490571, "grad_norm": 0.8759907940326553, "learning_rate": 6.543183442807286e-06, "loss": 0.794, "step": 6969 }, { "epoch": 0.6240975991941351, "grad_norm": 1.0452357140960589, "learning_rate": 6.5404620970260415e-06, "loss": 0.7996, "step": 6970 }, { "epoch": 0.6241871397392131, "grad_norm": 1.0831881655381301, "learning_rate": 6.53774104224985e-06, "loss": 0.8551, "step": 6971 }, { "epoch": 0.6242766802842913, "grad_norm": 0.9401628630064085, "learning_rate": 6.5350202787076e-06, "loss": 0.8486, "step": 6972 }, { "epoch": 0.6243662208293693, "grad_norm": 0.9214762040281522, "learning_rate": 6.532299806628156e-06, "loss": 0.8516, "step": 6973 }, { "epoch": 0.6244557613744474, "grad_norm": 0.9372033980921437, "learning_rate": 6.5295796262403495e-06, "loss": 0.8612, "step": 6974 }, { "epoch": 0.6245453019195254, "grad_norm": 0.9106012685365585, "learning_rate": 6.526859737772996e-06, "loss": 0.8402, "step": 6975 }, { "epoch": 0.6246348424646035, "grad_norm": 1.1983038972864293, "learning_rate": 6.524140141454881e-06, "loss": 0.8196, "step": 6976 }, { "epoch": 0.6247243830096816, "grad_norm": 0.9953063885433927, "learning_rate": 6.5214208375147724e-06, "loss": 0.8149, "step": 6977 }, { "epoch": 0.6248139235547596, "grad_norm": 0.9980508760077046, "learning_rate": 6.5187018261814095e-06, "loss": 0.8176, "step": 6978 }, { "epoch": 0.6249034640998377, "grad_norm": 1.0294898215156598, "learning_rate": 6.515983107683504e-06, "loss": 0.8245, "step": 6979 }, { "epoch": 0.6249930046449158, "grad_norm": 0.9257275591198185, "learning_rate": 6.513264682249742e-06, "loss": 0.8562, "step": 6980 }, { "epoch": 0.6250825451899938, "grad_norm": 0.9088170244784146, "learning_rate": 6.510546550108796e-06, "loss": 0.8124, "step": 6981 }, { "epoch": 0.6251720857350719, "grad_norm": 0.9861942164958212, "learning_rate": 6.5078287114893015e-06, "loss": 0.7892, "step": 6982 }, { "epoch": 0.62526162628015, "grad_norm": 1.058059275598195, "learning_rate": 6.5051111666198755e-06, "loss": 0.865, "step": 6983 }, { "epoch": 0.6253511668252281, "grad_norm": 1.2328752780266363, "learning_rate": 6.502393915729113e-06, "loss": 0.8153, "step": 6984 }, { "epoch": 0.6254407073703061, "grad_norm": 1.0939841326469963, "learning_rate": 6.499676959045574e-06, "loss": 0.8182, "step": 6985 }, { "epoch": 0.6255302479153841, "grad_norm": 1.099747295072245, "learning_rate": 6.496960296797803e-06, "loss": 0.8017, "step": 6986 }, { "epoch": 0.6256197884604623, "grad_norm": 1.0775712758955602, "learning_rate": 6.494243929214316e-06, "loss": 0.8652, "step": 6987 }, { "epoch": 0.6257093290055403, "grad_norm": 0.9977971528439035, "learning_rate": 6.491527856523604e-06, "loss": 0.7569, "step": 6988 }, { "epoch": 0.6257988695506184, "grad_norm": 0.8882053653961417, "learning_rate": 6.488812078954142e-06, "loss": 0.8828, "step": 6989 }, { "epoch": 0.6258884100956965, "grad_norm": 0.9484766998567198, "learning_rate": 6.486096596734359e-06, "loss": 0.8026, "step": 6990 }, { "epoch": 0.6259779506407745, "grad_norm": 0.8378392888188896, "learning_rate": 6.483381410092682e-06, "loss": 0.7673, "step": 6991 }, { "epoch": 0.6260674911858526, "grad_norm": 1.096451967419823, "learning_rate": 6.480666519257501e-06, "loss": 0.7578, "step": 6992 }, { "epoch": 0.6261570317309306, "grad_norm": 0.9486263979420991, "learning_rate": 6.4779519244571845e-06, "loss": 0.8699, "step": 6993 }, { "epoch": 0.6262465722760088, "grad_norm": 0.8086644362649459, "learning_rate": 6.4752376259200725e-06, "loss": 0.7386, "step": 6994 }, { "epoch": 0.6263361128210868, "grad_norm": 1.001818834213824, "learning_rate": 6.472523623874491e-06, "loss": 0.857, "step": 6995 }, { "epoch": 0.6264256533661648, "grad_norm": 0.9435270729308571, "learning_rate": 6.4698099185487216e-06, "loss": 0.792, "step": 6996 }, { "epoch": 0.6265151939112429, "grad_norm": 0.9544299872650338, "learning_rate": 6.467096510171039e-06, "loss": 0.8109, "step": 6997 }, { "epoch": 0.626604734456321, "grad_norm": 1.0097359641564345, "learning_rate": 6.464383398969687e-06, "loss": 0.7991, "step": 6998 }, { "epoch": 0.6266942750013991, "grad_norm": 0.8577228621325402, "learning_rate": 6.461670585172881e-06, "loss": 0.8297, "step": 6999 }, { "epoch": 0.6267838155464771, "grad_norm": 0.9322767207522131, "learning_rate": 6.458958069008816e-06, "loss": 0.7479, "step": 7000 }, { "epoch": 0.6268733560915553, "grad_norm": 0.8817747808095097, "learning_rate": 6.456245850705658e-06, "loss": 0.8036, "step": 7001 }, { "epoch": 0.6269628966366333, "grad_norm": 1.0804966438465284, "learning_rate": 6.453533930491551e-06, "loss": 0.8275, "step": 7002 }, { "epoch": 0.6270524371817113, "grad_norm": 0.9639257986317283, "learning_rate": 6.4508223085946105e-06, "loss": 0.8324, "step": 7003 }, { "epoch": 0.6271419777267894, "grad_norm": 1.0462596674159441, "learning_rate": 6.448110985242935e-06, "loss": 0.74, "step": 7004 }, { "epoch": 0.6272315182718675, "grad_norm": 0.8968321892284347, "learning_rate": 6.44539996066459e-06, "loss": 0.7746, "step": 7005 }, { "epoch": 0.6273210588169456, "grad_norm": 0.9274040059790555, "learning_rate": 6.442689235087615e-06, "loss": 0.7564, "step": 7006 }, { "epoch": 0.6274105993620236, "grad_norm": 0.9702677854887344, "learning_rate": 6.4399788087400285e-06, "loss": 0.8232, "step": 7007 }, { "epoch": 0.6275001399071017, "grad_norm": 0.9872340770929972, "learning_rate": 6.437268681849824e-06, "loss": 0.845, "step": 7008 }, { "epoch": 0.6275896804521798, "grad_norm": 0.8409612140230771, "learning_rate": 6.4345588546449675e-06, "loss": 0.7976, "step": 7009 }, { "epoch": 0.6276792209972578, "grad_norm": 0.9822073508036436, "learning_rate": 6.431849327353401e-06, "loss": 0.8187, "step": 7010 }, { "epoch": 0.6277687615423359, "grad_norm": 1.0096700515470856, "learning_rate": 6.429140100203046e-06, "loss": 0.7993, "step": 7011 }, { "epoch": 0.627858302087414, "grad_norm": 1.033353444634398, "learning_rate": 6.4264311734217855e-06, "loss": 0.8278, "step": 7012 }, { "epoch": 0.627947842632492, "grad_norm": 1.0193287839705376, "learning_rate": 6.423722547237491e-06, "loss": 0.7762, "step": 7013 }, { "epoch": 0.6280373831775701, "grad_norm": 0.8986007300315252, "learning_rate": 6.421014221878001e-06, "loss": 0.8822, "step": 7014 }, { "epoch": 0.6281269237226481, "grad_norm": 0.9828611344085806, "learning_rate": 6.418306197571129e-06, "loss": 0.8941, "step": 7015 }, { "epoch": 0.6282164642677263, "grad_norm": 0.9953716396986332, "learning_rate": 6.4155984745446754e-06, "loss": 0.8558, "step": 7016 }, { "epoch": 0.6283060048128043, "grad_norm": 0.9986898618648041, "learning_rate": 6.412891053026391e-06, "loss": 0.8363, "step": 7017 }, { "epoch": 0.6283955453578823, "grad_norm": 1.0150457948423264, "learning_rate": 6.410183933244023e-06, "loss": 0.8416, "step": 7018 }, { "epoch": 0.6284850859029605, "grad_norm": 1.0407326145251505, "learning_rate": 6.407477115425287e-06, "loss": 0.8183, "step": 7019 }, { "epoch": 0.6285746264480385, "grad_norm": 0.8840504436937133, "learning_rate": 6.404770599797867e-06, "loss": 0.8004, "step": 7020 }, { "epoch": 0.6286641669931166, "grad_norm": 1.0336948076215837, "learning_rate": 6.402064386589426e-06, "loss": 0.7982, "step": 7021 }, { "epoch": 0.6287537075381946, "grad_norm": 0.948788830946544, "learning_rate": 6.3993584760276105e-06, "loss": 0.824, "step": 7022 }, { "epoch": 0.6288432480832727, "grad_norm": 0.9751187453063612, "learning_rate": 6.396652868340021e-06, "loss": 0.816, "step": 7023 }, { "epoch": 0.6289327886283508, "grad_norm": 0.8392067383607967, "learning_rate": 6.393947563754253e-06, "loss": 0.7413, "step": 7024 }, { "epoch": 0.6290223291734288, "grad_norm": 0.8762935430495283, "learning_rate": 6.391242562497864e-06, "loss": 0.8087, "step": 7025 }, { "epoch": 0.629111869718507, "grad_norm": 1.021277806652637, "learning_rate": 6.3885378647983896e-06, "loss": 0.8523, "step": 7026 }, { "epoch": 0.629201410263585, "grad_norm": 0.9301618998883415, "learning_rate": 6.385833470883345e-06, "loss": 0.7941, "step": 7027 }, { "epoch": 0.629290950808663, "grad_norm": 0.9445009272389847, "learning_rate": 6.383129380980209e-06, "loss": 0.8171, "step": 7028 }, { "epoch": 0.6293804913537411, "grad_norm": 0.9296986104138628, "learning_rate": 6.380425595316442e-06, "loss": 0.8268, "step": 7029 }, { "epoch": 0.6294700318988192, "grad_norm": 1.136819963228013, "learning_rate": 6.377722114119478e-06, "loss": 0.7714, "step": 7030 }, { "epoch": 0.6295595724438973, "grad_norm": 1.0700816909027835, "learning_rate": 6.375018937616727e-06, "loss": 0.8638, "step": 7031 }, { "epoch": 0.6296491129889753, "grad_norm": 0.9959267207377598, "learning_rate": 6.372316066035571e-06, "loss": 0.7659, "step": 7032 }, { "epoch": 0.6297386535340533, "grad_norm": 1.1157612145949023, "learning_rate": 6.369613499603368e-06, "loss": 0.7986, "step": 7033 }, { "epoch": 0.6298281940791315, "grad_norm": 0.9349567258103602, "learning_rate": 6.3669112385474445e-06, "loss": 0.7471, "step": 7034 }, { "epoch": 0.6299177346242095, "grad_norm": 1.56993488926938, "learning_rate": 6.364209283095108e-06, "loss": 0.8598, "step": 7035 }, { "epoch": 0.6300072751692876, "grad_norm": 0.971158141512142, "learning_rate": 6.361507633473638e-06, "loss": 0.8137, "step": 7036 }, { "epoch": 0.6300968157143657, "grad_norm": 1.3686136629563976, "learning_rate": 6.358806289910291e-06, "loss": 0.8253, "step": 7037 }, { "epoch": 0.6301863562594437, "grad_norm": 0.953686542401127, "learning_rate": 6.356105252632294e-06, "loss": 0.8578, "step": 7038 }, { "epoch": 0.6302758968045218, "grad_norm": 0.9369165940752319, "learning_rate": 6.353404521866848e-06, "loss": 0.8119, "step": 7039 }, { "epoch": 0.6303654373495998, "grad_norm": 0.8767765889042951, "learning_rate": 6.350704097841129e-06, "loss": 0.7958, "step": 7040 }, { "epoch": 0.630454977894678, "grad_norm": 0.9396009894944904, "learning_rate": 6.348003980782291e-06, "loss": 0.7842, "step": 7041 }, { "epoch": 0.630544518439756, "grad_norm": 0.9546018855617986, "learning_rate": 6.345304170917454e-06, "loss": 0.8027, "step": 7042 }, { "epoch": 0.630634058984834, "grad_norm": 0.9206310639484706, "learning_rate": 6.342604668473724e-06, "loss": 0.7856, "step": 7043 }, { "epoch": 0.6307235995299122, "grad_norm": 0.9995689861637845, "learning_rate": 6.339905473678172e-06, "loss": 0.8362, "step": 7044 }, { "epoch": 0.6308131400749902, "grad_norm": 0.960493144933787, "learning_rate": 6.337206586757842e-06, "loss": 0.8005, "step": 7045 }, { "epoch": 0.6309026806200683, "grad_norm": 1.0543854825983656, "learning_rate": 6.334508007939759e-06, "loss": 0.7975, "step": 7046 }, { "epoch": 0.6309922211651463, "grad_norm": 0.9669013494214188, "learning_rate": 6.3318097374509165e-06, "loss": 0.8243, "step": 7047 }, { "epoch": 0.6310817617102245, "grad_norm": 1.0782000958648794, "learning_rate": 6.329111775518284e-06, "loss": 0.7859, "step": 7048 }, { "epoch": 0.6311713022553025, "grad_norm": 1.099900654164012, "learning_rate": 6.326414122368814e-06, "loss": 0.7603, "step": 7049 }, { "epoch": 0.6312608428003805, "grad_norm": 1.0095604448697453, "learning_rate": 6.323716778229411e-06, "loss": 0.8405, "step": 7050 }, { "epoch": 0.6313503833454586, "grad_norm": 1.0175641576941423, "learning_rate": 6.32101974332697e-06, "loss": 0.8192, "step": 7051 }, { "epoch": 0.6314399238905367, "grad_norm": 0.987528811422066, "learning_rate": 6.318323017888364e-06, "loss": 0.8226, "step": 7052 }, { "epoch": 0.6315294644356148, "grad_norm": 0.9824681983423325, "learning_rate": 6.315626602140425e-06, "loss": 0.8892, "step": 7053 }, { "epoch": 0.6316190049806928, "grad_norm": 0.9255146575702566, "learning_rate": 6.31293049630997e-06, "loss": 0.7946, "step": 7054 }, { "epoch": 0.6317085455257709, "grad_norm": 0.9963842637196021, "learning_rate": 6.310234700623794e-06, "loss": 0.8486, "step": 7055 }, { "epoch": 0.631798086070849, "grad_norm": 0.9984743381961027, "learning_rate": 6.307539215308644e-06, "loss": 0.869, "step": 7056 }, { "epoch": 0.631887626615927, "grad_norm": 0.9173593219005337, "learning_rate": 6.304844040591263e-06, "loss": 0.8366, "step": 7057 }, { "epoch": 0.631977167161005, "grad_norm": 0.8843740764529929, "learning_rate": 6.302149176698361e-06, "loss": 0.8006, "step": 7058 }, { "epoch": 0.6320667077060832, "grad_norm": 0.9183723247563366, "learning_rate": 6.29945462385662e-06, "loss": 0.8269, "step": 7059 }, { "epoch": 0.6321562482511612, "grad_norm": 0.9629639512738353, "learning_rate": 6.296760382292699e-06, "loss": 0.809, "step": 7060 }, { "epoch": 0.6322457887962393, "grad_norm": 1.0282789616588772, "learning_rate": 6.294066452233225e-06, "loss": 0.7885, "step": 7061 }, { "epoch": 0.6323353293413174, "grad_norm": 1.1493429401219268, "learning_rate": 6.291372833904805e-06, "loss": 0.8562, "step": 7062 }, { "epoch": 0.6324248698863955, "grad_norm": 0.9884442962337517, "learning_rate": 6.288679527534014e-06, "loss": 0.8642, "step": 7063 }, { "epoch": 0.6325144104314735, "grad_norm": 0.907608382429478, "learning_rate": 6.28598653334741e-06, "loss": 0.8487, "step": 7064 }, { "epoch": 0.6326039509765515, "grad_norm": 0.91711785399273, "learning_rate": 6.283293851571515e-06, "loss": 0.8354, "step": 7065 }, { "epoch": 0.6326934915216297, "grad_norm": 0.9097698053028457, "learning_rate": 6.280601482432831e-06, "loss": 0.8078, "step": 7066 }, { "epoch": 0.6327830320667077, "grad_norm": 0.848249897616302, "learning_rate": 6.277909426157829e-06, "loss": 0.7687, "step": 7067 }, { "epoch": 0.6328725726117858, "grad_norm": 0.8598755819340291, "learning_rate": 6.275217682972957e-06, "loss": 0.8479, "step": 7068 }, { "epoch": 0.6329621131568638, "grad_norm": 0.9244989216750829, "learning_rate": 6.272526253104634e-06, "loss": 0.8345, "step": 7069 }, { "epoch": 0.6330516537019419, "grad_norm": 0.9866361305073986, "learning_rate": 6.269835136779257e-06, "loss": 0.8581, "step": 7070 }, { "epoch": 0.63314119424702, "grad_norm": 0.9035743965467674, "learning_rate": 6.267144334223194e-06, "loss": 0.8186, "step": 7071 }, { "epoch": 0.633230734792098, "grad_norm": 0.8869064088197958, "learning_rate": 6.264453845662785e-06, "loss": 0.8365, "step": 7072 }, { "epoch": 0.6333202753371762, "grad_norm": 1.0018109078297015, "learning_rate": 6.261763671324345e-06, "loss": 0.8401, "step": 7073 }, { "epoch": 0.6334098158822542, "grad_norm": 1.0271065329642635, "learning_rate": 6.259073811434162e-06, "loss": 0.8972, "step": 7074 }, { "epoch": 0.6334993564273322, "grad_norm": 1.0018144845395736, "learning_rate": 6.256384266218498e-06, "loss": 0.8914, "step": 7075 }, { "epoch": 0.6335888969724103, "grad_norm": 0.8854649655012243, "learning_rate": 6.253695035903598e-06, "loss": 0.7767, "step": 7076 }, { "epoch": 0.6336784375174884, "grad_norm": 0.9337421350337655, "learning_rate": 6.2510061207156566e-06, "loss": 0.835, "step": 7077 }, { "epoch": 0.6337679780625665, "grad_norm": 0.9895732225870155, "learning_rate": 6.2483175208808634e-06, "loss": 0.7911, "step": 7078 }, { "epoch": 0.6338575186076445, "grad_norm": 0.9358033205276218, "learning_rate": 6.245629236625376e-06, "loss": 0.8796, "step": 7079 }, { "epoch": 0.6339470591527226, "grad_norm": 1.0673187047237729, "learning_rate": 6.2429412681753224e-06, "loss": 0.8294, "step": 7080 }, { "epoch": 0.6340365996978007, "grad_norm": 1.0116146000516488, "learning_rate": 6.240253615756805e-06, "loss": 0.7313, "step": 7081 }, { "epoch": 0.6341261402428787, "grad_norm": 0.9465729691199621, "learning_rate": 6.237566279595908e-06, "loss": 0.7974, "step": 7082 }, { "epoch": 0.6342156807879568, "grad_norm": 0.9165613820924751, "learning_rate": 6.2348792599186695e-06, "loss": 0.8165, "step": 7083 }, { "epoch": 0.6343052213330349, "grad_norm": 0.8921114534459816, "learning_rate": 6.232192556951118e-06, "loss": 0.8104, "step": 7084 }, { "epoch": 0.634394761878113, "grad_norm": 0.9957749597806583, "learning_rate": 6.2295061709192505e-06, "loss": 0.8351, "step": 7085 }, { "epoch": 0.634484302423191, "grad_norm": 0.9988394246241886, "learning_rate": 6.226820102049038e-06, "loss": 0.792, "step": 7086 }, { "epoch": 0.634573842968269, "grad_norm": 1.126694794481609, "learning_rate": 6.224134350566424e-06, "loss": 0.8273, "step": 7087 }, { "epoch": 0.6346633835133472, "grad_norm": 1.0466130894346675, "learning_rate": 6.2214489166973235e-06, "loss": 0.8551, "step": 7088 }, { "epoch": 0.6347529240584252, "grad_norm": 0.9081586423033162, "learning_rate": 6.218763800667625e-06, "loss": 0.8493, "step": 7089 }, { "epoch": 0.6348424646035032, "grad_norm": 0.9909629787937567, "learning_rate": 6.216079002703193e-06, "loss": 0.7679, "step": 7090 }, { "epoch": 0.6349320051485814, "grad_norm": 1.0170584340470656, "learning_rate": 6.2133945230298675e-06, "loss": 0.872, "step": 7091 }, { "epoch": 0.6350215456936594, "grad_norm": 0.9251934026850186, "learning_rate": 6.210710361873453e-06, "loss": 0.8173, "step": 7092 }, { "epoch": 0.6351110862387375, "grad_norm": 0.9398167104802502, "learning_rate": 6.208026519459738e-06, "loss": 0.8485, "step": 7093 }, { "epoch": 0.6352006267838155, "grad_norm": 0.9658341075945795, "learning_rate": 6.205342996014474e-06, "loss": 0.7861, "step": 7094 }, { "epoch": 0.6352901673288937, "grad_norm": 0.9222276685307285, "learning_rate": 6.20265979176339e-06, "loss": 0.8395, "step": 7095 }, { "epoch": 0.6353797078739717, "grad_norm": 0.988598157977415, "learning_rate": 6.199976906932188e-06, "loss": 0.8352, "step": 7096 }, { "epoch": 0.6354692484190497, "grad_norm": 0.884046723470731, "learning_rate": 6.197294341746549e-06, "loss": 0.8349, "step": 7097 }, { "epoch": 0.6355587889641279, "grad_norm": 1.056173020712827, "learning_rate": 6.194612096432119e-06, "loss": 0.8415, "step": 7098 }, { "epoch": 0.6356483295092059, "grad_norm": 0.9460365365340243, "learning_rate": 6.191930171214517e-06, "loss": 0.7828, "step": 7099 }, { "epoch": 0.635737870054284, "grad_norm": 0.9250268128591796, "learning_rate": 6.189248566319339e-06, "loss": 0.8066, "step": 7100 }, { "epoch": 0.635827410599362, "grad_norm": 0.8633541502219753, "learning_rate": 6.1865672819721545e-06, "loss": 0.8139, "step": 7101 }, { "epoch": 0.6359169511444401, "grad_norm": 1.038272444169285, "learning_rate": 6.1838863183985e-06, "loss": 0.8798, "step": 7102 }, { "epoch": 0.6360064916895182, "grad_norm": 1.0582820831312751, "learning_rate": 6.181205675823896e-06, "loss": 0.9162, "step": 7103 }, { "epoch": 0.6360960322345962, "grad_norm": 0.9831825362479607, "learning_rate": 6.17852535447383e-06, "loss": 0.7819, "step": 7104 }, { "epoch": 0.6361855727796742, "grad_norm": 1.009434581984189, "learning_rate": 6.175845354573753e-06, "loss": 0.8788, "step": 7105 }, { "epoch": 0.6362751133247524, "grad_norm": 0.9803034126725186, "learning_rate": 6.173165676349103e-06, "loss": 0.7996, "step": 7106 }, { "epoch": 0.6363646538698304, "grad_norm": 0.9697130406576904, "learning_rate": 6.170486320025287e-06, "loss": 0.822, "step": 7107 }, { "epoch": 0.6364541944149085, "grad_norm": 1.0090211000430436, "learning_rate": 6.1678072858276805e-06, "loss": 0.8733, "step": 7108 }, { "epoch": 0.6365437349599866, "grad_norm": 0.9968511454912732, "learning_rate": 6.165128573981642e-06, "loss": 0.7939, "step": 7109 }, { "epoch": 0.6366332755050647, "grad_norm": 0.8891754667407683, "learning_rate": 6.16245018471249e-06, "loss": 0.8345, "step": 7110 }, { "epoch": 0.6367228160501427, "grad_norm": 1.0813200606014772, "learning_rate": 6.159772118245518e-06, "loss": 0.8952, "step": 7111 }, { "epoch": 0.6368123565952207, "grad_norm": 0.8896963469459684, "learning_rate": 6.157094374806005e-06, "loss": 0.7993, "step": 7112 }, { "epoch": 0.6369018971402989, "grad_norm": 0.9093560424284486, "learning_rate": 6.154416954619189e-06, "loss": 0.8348, "step": 7113 }, { "epoch": 0.6369914376853769, "grad_norm": 0.9125174015568591, "learning_rate": 6.1517398579102885e-06, "loss": 0.8531, "step": 7114 }, { "epoch": 0.637080978230455, "grad_norm": 1.036580688636479, "learning_rate": 6.149063084904492e-06, "loss": 0.8027, "step": 7115 }, { "epoch": 0.6371705187755331, "grad_norm": 1.0198915751417055, "learning_rate": 6.1463866358269575e-06, "loss": 0.863, "step": 7116 }, { "epoch": 0.6372600593206111, "grad_norm": 0.911278093190435, "learning_rate": 6.143710510902821e-06, "loss": 0.8349, "step": 7117 }, { "epoch": 0.6373495998656892, "grad_norm": 0.9389083708685582, "learning_rate": 6.14103471035719e-06, "loss": 0.7833, "step": 7118 }, { "epoch": 0.6374391404107672, "grad_norm": 0.9317400669709383, "learning_rate": 6.138359234415146e-06, "loss": 0.7714, "step": 7119 }, { "epoch": 0.6375286809558454, "grad_norm": 0.9333156352087469, "learning_rate": 6.135684083301738e-06, "loss": 0.8253, "step": 7120 }, { "epoch": 0.6376182215009234, "grad_norm": 0.9576604584757233, "learning_rate": 6.133009257241993e-06, "loss": 0.8249, "step": 7121 }, { "epoch": 0.6377077620460014, "grad_norm": 0.9825255573724618, "learning_rate": 6.130334756460907e-06, "loss": 0.7953, "step": 7122 }, { "epoch": 0.6377973025910795, "grad_norm": 0.939528790669359, "learning_rate": 6.1276605811834485e-06, "loss": 0.8208, "step": 7123 }, { "epoch": 0.6378868431361576, "grad_norm": 0.9057899481825418, "learning_rate": 6.124986731634566e-06, "loss": 0.8007, "step": 7124 }, { "epoch": 0.6379763836812357, "grad_norm": 1.0010044825025657, "learning_rate": 6.122313208039172e-06, "loss": 0.7948, "step": 7125 }, { "epoch": 0.6380659242263137, "grad_norm": 0.9091039513398609, "learning_rate": 6.119640010622157e-06, "loss": 0.8229, "step": 7126 }, { "epoch": 0.6381554647713918, "grad_norm": 0.9333508445959617, "learning_rate": 6.116967139608377e-06, "loss": 0.8466, "step": 7127 }, { "epoch": 0.6382450053164699, "grad_norm": 0.9862860070584623, "learning_rate": 6.114294595222667e-06, "loss": 0.8567, "step": 7128 }, { "epoch": 0.6383345458615479, "grad_norm": 0.9312588972876452, "learning_rate": 6.111622377689832e-06, "loss": 0.8367, "step": 7129 }, { "epoch": 0.638424086406626, "grad_norm": 1.052348515470203, "learning_rate": 6.108950487234653e-06, "loss": 0.7828, "step": 7130 }, { "epoch": 0.6385136269517041, "grad_norm": 0.9531537235051175, "learning_rate": 6.106278924081883e-06, "loss": 0.8478, "step": 7131 }, { "epoch": 0.6386031674967821, "grad_norm": 0.8989914778417699, "learning_rate": 6.103607688456237e-06, "loss": 0.826, "step": 7132 }, { "epoch": 0.6386927080418602, "grad_norm": 1.0080488468202637, "learning_rate": 6.100936780582416e-06, "loss": 0.7689, "step": 7133 }, { "epoch": 0.6387822485869383, "grad_norm": 0.9350502948572023, "learning_rate": 6.098266200685088e-06, "loss": 0.8203, "step": 7134 }, { "epoch": 0.6388717891320164, "grad_norm": 0.9842593086685824, "learning_rate": 6.095595948988888e-06, "loss": 0.7946, "step": 7135 }, { "epoch": 0.6389613296770944, "grad_norm": 0.9891581732191831, "learning_rate": 6.092926025718438e-06, "loss": 0.8316, "step": 7136 }, { "epoch": 0.6390508702221724, "grad_norm": 0.9484838801193954, "learning_rate": 6.090256431098323e-06, "loss": 0.7725, "step": 7137 }, { "epoch": 0.6391404107672506, "grad_norm": 0.9314814210895521, "learning_rate": 6.087587165353088e-06, "loss": 0.8316, "step": 7138 }, { "epoch": 0.6392299513123286, "grad_norm": 1.0177008987835385, "learning_rate": 6.084918228707275e-06, "loss": 0.7743, "step": 7139 }, { "epoch": 0.6393194918574067, "grad_norm": 1.035713651930231, "learning_rate": 6.0822496213853825e-06, "loss": 0.85, "step": 7140 }, { "epoch": 0.6394090324024847, "grad_norm": 0.9038513971622288, "learning_rate": 6.079581343611885e-06, "loss": 0.8487, "step": 7141 }, { "epoch": 0.6394985729475628, "grad_norm": 1.2458075230027799, "learning_rate": 6.076913395611231e-06, "loss": 0.7505, "step": 7142 }, { "epoch": 0.6395881134926409, "grad_norm": 1.116459577247019, "learning_rate": 6.074245777607835e-06, "loss": 0.8345, "step": 7143 }, { "epoch": 0.6396776540377189, "grad_norm": 1.01744219089649, "learning_rate": 6.071578489826091e-06, "loss": 0.7972, "step": 7144 }, { "epoch": 0.6397671945827971, "grad_norm": 0.95228955360677, "learning_rate": 6.068911532490364e-06, "loss": 0.817, "step": 7145 }, { "epoch": 0.6398567351278751, "grad_norm": 1.2074015714991864, "learning_rate": 6.066244905824988e-06, "loss": 0.8308, "step": 7146 }, { "epoch": 0.6399462756729531, "grad_norm": 0.9560156162127247, "learning_rate": 6.0635786100542745e-06, "loss": 0.821, "step": 7147 }, { "epoch": 0.6400358162180312, "grad_norm": 1.1504852039171807, "learning_rate": 6.060912645402499e-06, "loss": 0.8489, "step": 7148 }, { "epoch": 0.6401253567631093, "grad_norm": 0.9827147740012714, "learning_rate": 6.058247012093915e-06, "loss": 0.7824, "step": 7149 }, { "epoch": 0.6402148973081874, "grad_norm": 1.0186738854413284, "learning_rate": 6.055581710352744e-06, "loss": 0.8264, "step": 7150 }, { "epoch": 0.6403044378532654, "grad_norm": 0.8655650413701632, "learning_rate": 6.0529167404031905e-06, "loss": 0.7659, "step": 7151 }, { "epoch": 0.6403939783983436, "grad_norm": 0.9091217544442665, "learning_rate": 6.050252102469417e-06, "loss": 0.8397, "step": 7152 }, { "epoch": 0.6404835189434216, "grad_norm": 1.0390417148127027, "learning_rate": 6.0475877967755685e-06, "loss": 0.8498, "step": 7153 }, { "epoch": 0.6405730594884996, "grad_norm": 0.9426177171933874, "learning_rate": 6.044923823545752e-06, "loss": 0.8181, "step": 7154 }, { "epoch": 0.6406626000335777, "grad_norm": 0.9821242494428377, "learning_rate": 6.042260183004054e-06, "loss": 0.8743, "step": 7155 }, { "epoch": 0.6407521405786558, "grad_norm": 0.9647892315068132, "learning_rate": 6.039596875374531e-06, "loss": 0.8243, "step": 7156 }, { "epoch": 0.6408416811237339, "grad_norm": 0.9633553608802812, "learning_rate": 6.036933900881217e-06, "loss": 0.7716, "step": 7157 }, { "epoch": 0.6409312216688119, "grad_norm": 1.0457879016168352, "learning_rate": 6.0342712597481105e-06, "loss": 0.8639, "step": 7158 }, { "epoch": 0.6410207622138899, "grad_norm": 0.9128032712085212, "learning_rate": 6.0316089521991775e-06, "loss": 0.8616, "step": 7159 }, { "epoch": 0.6411103027589681, "grad_norm": 1.0471197353760968, "learning_rate": 6.02894697845837e-06, "loss": 0.7589, "step": 7160 }, { "epoch": 0.6411998433040461, "grad_norm": 0.961378897080113, "learning_rate": 6.0262853387496e-06, "loss": 0.781, "step": 7161 }, { "epoch": 0.6412893838491242, "grad_norm": 0.893724743883784, "learning_rate": 6.023624033296758e-06, "loss": 0.797, "step": 7162 }, { "epoch": 0.6413789243942023, "grad_norm": 1.0501690257706617, "learning_rate": 6.020963062323706e-06, "loss": 0.8116, "step": 7163 }, { "epoch": 0.6414684649392803, "grad_norm": 0.9558225785946043, "learning_rate": 6.0183024260542785e-06, "loss": 0.7995, "step": 7164 }, { "epoch": 0.6415580054843584, "grad_norm": 1.1114284541026738, "learning_rate": 6.015642124712271e-06, "loss": 0.8196, "step": 7165 }, { "epoch": 0.6416475460294364, "grad_norm": 0.966595041604333, "learning_rate": 6.012982158521465e-06, "loss": 0.8478, "step": 7166 }, { "epoch": 0.6417370865745146, "grad_norm": 0.9409852265125261, "learning_rate": 6.010322527705608e-06, "loss": 0.8279, "step": 7167 }, { "epoch": 0.6418266271195926, "grad_norm": 1.0095614291749664, "learning_rate": 6.007663232488418e-06, "loss": 0.8369, "step": 7168 }, { "epoch": 0.6419161676646706, "grad_norm": 0.9007443120439561, "learning_rate": 6.00500427309359e-06, "loss": 0.8349, "step": 7169 }, { "epoch": 0.6420057082097488, "grad_norm": 0.9422820299417833, "learning_rate": 6.002345649744781e-06, "loss": 0.8071, "step": 7170 }, { "epoch": 0.6420952487548268, "grad_norm": 1.0150018023033678, "learning_rate": 5.999687362665627e-06, "loss": 0.8147, "step": 7171 }, { "epoch": 0.6421847892999049, "grad_norm": 1.0140194360928527, "learning_rate": 5.997029412079738e-06, "loss": 0.8263, "step": 7172 }, { "epoch": 0.6422743298449829, "grad_norm": 0.949623263854444, "learning_rate": 5.994371798210692e-06, "loss": 0.8555, "step": 7173 }, { "epoch": 0.642363870390061, "grad_norm": 0.9855417455915145, "learning_rate": 5.991714521282035e-06, "loss": 0.8175, "step": 7174 }, { "epoch": 0.6424534109351391, "grad_norm": 1.037590227964297, "learning_rate": 5.989057581517295e-06, "loss": 0.8452, "step": 7175 }, { "epoch": 0.6425429514802171, "grad_norm": 0.8604341734739114, "learning_rate": 5.986400979139957e-06, "loss": 0.784, "step": 7176 }, { "epoch": 0.6426324920252952, "grad_norm": 0.9310382953543267, "learning_rate": 5.9837447143734875e-06, "loss": 0.8141, "step": 7177 }, { "epoch": 0.6427220325703733, "grad_norm": 0.9051739014057687, "learning_rate": 5.981088787441327e-06, "loss": 0.8018, "step": 7178 }, { "epoch": 0.6428115731154513, "grad_norm": 0.9704794002918998, "learning_rate": 5.978433198566882e-06, "loss": 0.8116, "step": 7179 }, { "epoch": 0.6429011136605294, "grad_norm": 0.9853586782721148, "learning_rate": 5.975777947973532e-06, "loss": 0.8369, "step": 7180 }, { "epoch": 0.6429906542056075, "grad_norm": 1.0386183403366624, "learning_rate": 5.973123035884626e-06, "loss": 0.8452, "step": 7181 }, { "epoch": 0.6430801947506856, "grad_norm": 0.9445704628522498, "learning_rate": 5.9704684625234875e-06, "loss": 0.8314, "step": 7182 }, { "epoch": 0.6431697352957636, "grad_norm": 1.159075662613163, "learning_rate": 5.967814228113409e-06, "loss": 0.8297, "step": 7183 }, { "epoch": 0.6432592758408416, "grad_norm": 0.9384246141434094, "learning_rate": 5.9651603328776606e-06, "loss": 0.8138, "step": 7184 }, { "epoch": 0.6433488163859198, "grad_norm": 0.9605118880508731, "learning_rate": 5.962506777039476e-06, "loss": 0.8429, "step": 7185 }, { "epoch": 0.6434383569309978, "grad_norm": 0.9702023425419578, "learning_rate": 5.959853560822066e-06, "loss": 0.8113, "step": 7186 }, { "epoch": 0.6435278974760759, "grad_norm": 1.0023719681192862, "learning_rate": 5.957200684448607e-06, "loss": 0.8283, "step": 7187 }, { "epoch": 0.643617438021154, "grad_norm": 0.9313455166754111, "learning_rate": 5.954548148142254e-06, "loss": 0.817, "step": 7188 }, { "epoch": 0.643706978566232, "grad_norm": 0.9833967254459589, "learning_rate": 5.951895952126125e-06, "loss": 0.8319, "step": 7189 }, { "epoch": 0.6437965191113101, "grad_norm": 1.1617818403893587, "learning_rate": 5.949244096623317e-06, "loss": 0.8048, "step": 7190 }, { "epoch": 0.6438860596563881, "grad_norm": 0.9104065046674543, "learning_rate": 5.9465925818569e-06, "loss": 0.8482, "step": 7191 }, { "epoch": 0.6439756002014663, "grad_norm": 0.9767775990518126, "learning_rate": 5.9439414080499015e-06, "loss": 0.8607, "step": 7192 }, { "epoch": 0.6440651407465443, "grad_norm": 0.904576624021083, "learning_rate": 5.9412905754253355e-06, "loss": 0.7803, "step": 7193 }, { "epoch": 0.6441546812916223, "grad_norm": 0.9633275396002314, "learning_rate": 5.93864008420618e-06, "loss": 0.8629, "step": 7194 }, { "epoch": 0.6442442218367004, "grad_norm": 1.2767285607196206, "learning_rate": 5.935989934615386e-06, "loss": 0.8908, "step": 7195 }, { "epoch": 0.6443337623817785, "grad_norm": 0.9308928192757084, "learning_rate": 5.933340126875872e-06, "loss": 0.8048, "step": 7196 }, { "epoch": 0.6444233029268566, "grad_norm": 0.9551597668923183, "learning_rate": 5.930690661210543e-06, "loss": 0.8162, "step": 7197 }, { "epoch": 0.6445128434719346, "grad_norm": 0.9789550251092578, "learning_rate": 5.928041537842248e-06, "loss": 0.8172, "step": 7198 }, { "epoch": 0.6446023840170128, "grad_norm": 0.9479853693176701, "learning_rate": 5.925392756993831e-06, "loss": 0.7687, "step": 7199 }, { "epoch": 0.6446919245620908, "grad_norm": 1.0231335816437772, "learning_rate": 5.922744318888098e-06, "loss": 0.8441, "step": 7200 }, { "epoch": 0.6447814651071688, "grad_norm": 1.188463200990797, "learning_rate": 5.920096223747827e-06, "loss": 0.8319, "step": 7201 }, { "epoch": 0.6448710056522469, "grad_norm": 0.9779486640050746, "learning_rate": 5.917448471795766e-06, "loss": 0.7791, "step": 7202 }, { "epoch": 0.644960546197325, "grad_norm": 0.9008776441958956, "learning_rate": 5.914801063254636e-06, "loss": 0.793, "step": 7203 }, { "epoch": 0.645050086742403, "grad_norm": 0.9745691340883105, "learning_rate": 5.912153998347124e-06, "loss": 0.7704, "step": 7204 }, { "epoch": 0.6451396272874811, "grad_norm": 1.0076331206242952, "learning_rate": 5.909507277295901e-06, "loss": 0.8067, "step": 7205 }, { "epoch": 0.6452291678325592, "grad_norm": 1.1650058535900636, "learning_rate": 5.906860900323595e-06, "loss": 0.832, "step": 7206 }, { "epoch": 0.6453187083776373, "grad_norm": 0.9766281368525203, "learning_rate": 5.904214867652811e-06, "loss": 0.8002, "step": 7207 }, { "epoch": 0.6454082489227153, "grad_norm": 0.9564607747482262, "learning_rate": 5.901569179506128e-06, "loss": 0.7919, "step": 7208 }, { "epoch": 0.6454977894677933, "grad_norm": 1.0127029590676444, "learning_rate": 5.8989238361060875e-06, "loss": 0.7635, "step": 7209 }, { "epoch": 0.6455873300128715, "grad_norm": 0.9958602327563781, "learning_rate": 5.896278837675209e-06, "loss": 0.8196, "step": 7210 }, { "epoch": 0.6456768705579495, "grad_norm": 0.9124020522899216, "learning_rate": 5.893634184435983e-06, "loss": 0.7982, "step": 7211 }, { "epoch": 0.6457664111030276, "grad_norm": 1.0866859959192143, "learning_rate": 5.8909898766108684e-06, "loss": 0.8619, "step": 7212 }, { "epoch": 0.6458559516481056, "grad_norm": 0.9870892615201993, "learning_rate": 5.888345914422298e-06, "loss": 0.7717, "step": 7213 }, { "epoch": 0.6459454921931838, "grad_norm": 1.0000316986711761, "learning_rate": 5.885702298092666e-06, "loss": 0.8308, "step": 7214 }, { "epoch": 0.6460350327382618, "grad_norm": 0.9043873536344031, "learning_rate": 5.883059027844351e-06, "loss": 0.8369, "step": 7215 }, { "epoch": 0.6461245732833398, "grad_norm": 0.8583354639025819, "learning_rate": 5.880416103899696e-06, "loss": 0.7811, "step": 7216 }, { "epoch": 0.646214113828418, "grad_norm": 1.317675241875174, "learning_rate": 5.8777735264810094e-06, "loss": 0.8048, "step": 7217 }, { "epoch": 0.646303654373496, "grad_norm": 0.9707293840339516, "learning_rate": 5.875131295810589e-06, "loss": 0.8198, "step": 7218 }, { "epoch": 0.646393194918574, "grad_norm": 0.9194514313679033, "learning_rate": 5.872489412110674e-06, "loss": 0.8597, "step": 7219 }, { "epoch": 0.6464827354636521, "grad_norm": 0.9735621729259253, "learning_rate": 5.869847875603503e-06, "loss": 0.8087, "step": 7220 }, { "epoch": 0.6465722760087302, "grad_norm": 1.0838850774189066, "learning_rate": 5.8672066865112685e-06, "loss": 0.8692, "step": 7221 }, { "epoch": 0.6466618165538083, "grad_norm": 1.009304888140753, "learning_rate": 5.8645658450561416e-06, "loss": 0.8223, "step": 7222 }, { "epoch": 0.6467513570988863, "grad_norm": 0.9989737903762165, "learning_rate": 5.8619253514602556e-06, "loss": 0.7876, "step": 7223 }, { "epoch": 0.6468408976439645, "grad_norm": 0.9976155510178509, "learning_rate": 5.859285205945733e-06, "loss": 0.7904, "step": 7224 }, { "epoch": 0.6469304381890425, "grad_norm": 0.9929203579267685, "learning_rate": 5.856645408734638e-06, "loss": 0.8615, "step": 7225 }, { "epoch": 0.6470199787341205, "grad_norm": 1.0254432144742018, "learning_rate": 5.85400596004903e-06, "loss": 0.8928, "step": 7226 }, { "epoch": 0.6471095192791986, "grad_norm": 1.1345862247066796, "learning_rate": 5.851366860110932e-06, "loss": 0.8315, "step": 7227 }, { "epoch": 0.6471990598242767, "grad_norm": 0.90370862634329, "learning_rate": 5.848728109142334e-06, "loss": 0.8342, "step": 7228 }, { "epoch": 0.6472886003693548, "grad_norm": 0.9381508430840132, "learning_rate": 5.8460897073652015e-06, "loss": 0.8101, "step": 7229 }, { "epoch": 0.6473781409144328, "grad_norm": 0.9350766262316815, "learning_rate": 5.843451655001464e-06, "loss": 0.8336, "step": 7230 }, { "epoch": 0.6474676814595108, "grad_norm": 0.9321846547337961, "learning_rate": 5.8408139522730265e-06, "loss": 0.845, "step": 7231 }, { "epoch": 0.647557222004589, "grad_norm": 0.9186505726259667, "learning_rate": 5.838176599401768e-06, "loss": 0.7761, "step": 7232 }, { "epoch": 0.647646762549667, "grad_norm": 0.9786971563505652, "learning_rate": 5.835539596609532e-06, "loss": 0.7763, "step": 7233 }, { "epoch": 0.6477363030947451, "grad_norm": 1.0628465920264336, "learning_rate": 5.832902944118135e-06, "loss": 0.8505, "step": 7234 }, { "epoch": 0.6478258436398232, "grad_norm": 0.9768446858649646, "learning_rate": 5.830266642149362e-06, "loss": 0.8335, "step": 7235 }, { "epoch": 0.6479153841849012, "grad_norm": 0.9984463672413127, "learning_rate": 5.827630690924971e-06, "loss": 0.7947, "step": 7236 }, { "epoch": 0.6480049247299793, "grad_norm": 0.9474223630463614, "learning_rate": 5.824995090666691e-06, "loss": 0.859, "step": 7237 }, { "epoch": 0.6480944652750573, "grad_norm": 0.954657300039822, "learning_rate": 5.822359841596217e-06, "loss": 0.7942, "step": 7238 }, { "epoch": 0.6481840058201355, "grad_norm": 0.9348212461987404, "learning_rate": 5.819724943935221e-06, "loss": 0.7745, "step": 7239 }, { "epoch": 0.6482735463652135, "grad_norm": 0.9635219207958502, "learning_rate": 5.817090397905344e-06, "loss": 0.818, "step": 7240 }, { "epoch": 0.6483630869102915, "grad_norm": 0.9205815714730823, "learning_rate": 5.814456203728187e-06, "loss": 0.8774, "step": 7241 }, { "epoch": 0.6484526274553697, "grad_norm": 1.122501330240104, "learning_rate": 5.811822361625332e-06, "loss": 0.7974, "step": 7242 }, { "epoch": 0.6485421680004477, "grad_norm": 0.9573772729650327, "learning_rate": 5.809188871818336e-06, "loss": 0.8192, "step": 7243 }, { "epoch": 0.6486317085455258, "grad_norm": 0.9481248937414388, "learning_rate": 5.806555734528714e-06, "loss": 0.7772, "step": 7244 }, { "epoch": 0.6487212490906038, "grad_norm": 0.9320865521008098, "learning_rate": 5.80392294997796e-06, "loss": 0.7951, "step": 7245 }, { "epoch": 0.648810789635682, "grad_norm": 0.8818783685751009, "learning_rate": 5.801290518387537e-06, "loss": 0.8433, "step": 7246 }, { "epoch": 0.64890033018076, "grad_norm": 1.023759001844589, "learning_rate": 5.798658439978869e-06, "loss": 0.8102, "step": 7247 }, { "epoch": 0.648989870725838, "grad_norm": 1.0071736682873758, "learning_rate": 5.796026714973359e-06, "loss": 0.8412, "step": 7248 }, { "epoch": 0.6490794112709161, "grad_norm": 1.0351473701760932, "learning_rate": 5.793395343592385e-06, "loss": 0.7889, "step": 7249 }, { "epoch": 0.6491689518159942, "grad_norm": 1.016197515940743, "learning_rate": 5.7907643260572875e-06, "loss": 0.8187, "step": 7250 }, { "epoch": 0.6492584923610722, "grad_norm": 0.9696778506381909, "learning_rate": 5.788133662589382e-06, "loss": 0.84, "step": 7251 }, { "epoch": 0.6493480329061503, "grad_norm": 0.94084882398586, "learning_rate": 5.7855033534099425e-06, "loss": 0.8667, "step": 7252 }, { "epoch": 0.6494375734512284, "grad_norm": 0.9351150703275226, "learning_rate": 5.7828733987402284e-06, "loss": 0.8024, "step": 7253 }, { "epoch": 0.6495271139963065, "grad_norm": 0.9506930238154522, "learning_rate": 5.780243798801457e-06, "loss": 0.8023, "step": 7254 }, { "epoch": 0.6496166545413845, "grad_norm": 0.9114792652505668, "learning_rate": 5.777614553814831e-06, "loss": 0.7516, "step": 7255 }, { "epoch": 0.6497061950864625, "grad_norm": 1.0480896483002362, "learning_rate": 5.774985664001509e-06, "loss": 0.8266, "step": 7256 }, { "epoch": 0.6497957356315407, "grad_norm": 0.9926862948361596, "learning_rate": 5.772357129582629e-06, "loss": 0.8198, "step": 7257 }, { "epoch": 0.6498852761766187, "grad_norm": 0.8901943400916471, "learning_rate": 5.769728950779285e-06, "loss": 0.8185, "step": 7258 }, { "epoch": 0.6499748167216968, "grad_norm": 0.9875713262285745, "learning_rate": 5.7671011278125585e-06, "loss": 0.7837, "step": 7259 }, { "epoch": 0.6500643572667748, "grad_norm": 0.9220289211211775, "learning_rate": 5.764473660903487e-06, "loss": 0.8755, "step": 7260 }, { "epoch": 0.650153897811853, "grad_norm": 0.8803950878695499, "learning_rate": 5.761846550273093e-06, "loss": 0.7992, "step": 7261 }, { "epoch": 0.650243438356931, "grad_norm": 0.927495976983483, "learning_rate": 5.759219796142359e-06, "loss": 0.8004, "step": 7262 }, { "epoch": 0.650332978902009, "grad_norm": 1.4579481319230319, "learning_rate": 5.756593398732233e-06, "loss": 0.8024, "step": 7263 }, { "epoch": 0.6504225194470872, "grad_norm": 1.2132165909122088, "learning_rate": 5.753967358263643e-06, "loss": 0.7864, "step": 7264 }, { "epoch": 0.6505120599921652, "grad_norm": 0.9576310212225297, "learning_rate": 5.7513416749574815e-06, "loss": 0.7887, "step": 7265 }, { "epoch": 0.6506016005372433, "grad_norm": 0.8860313128664239, "learning_rate": 5.7487163490346085e-06, "loss": 0.8131, "step": 7266 }, { "epoch": 0.6506911410823213, "grad_norm": 1.1811899705765134, "learning_rate": 5.746091380715868e-06, "loss": 0.8778, "step": 7267 }, { "epoch": 0.6507806816273994, "grad_norm": 1.0877782977768105, "learning_rate": 5.743466770222061e-06, "loss": 0.7988, "step": 7268 }, { "epoch": 0.6508702221724775, "grad_norm": 1.1262100601484728, "learning_rate": 5.740842517773956e-06, "loss": 0.818, "step": 7269 }, { "epoch": 0.6509597627175555, "grad_norm": 1.0768584097694722, "learning_rate": 5.738218623592298e-06, "loss": 0.799, "step": 7270 }, { "epoch": 0.6510493032626337, "grad_norm": 1.1667712587143617, "learning_rate": 5.735595087897803e-06, "loss": 0.8638, "step": 7271 }, { "epoch": 0.6511388438077117, "grad_norm": 0.9581948460663813, "learning_rate": 5.732971910911152e-06, "loss": 0.8233, "step": 7272 }, { "epoch": 0.6512283843527897, "grad_norm": 1.0270593977740592, "learning_rate": 5.730349092852997e-06, "loss": 0.8239, "step": 7273 }, { "epoch": 0.6513179248978678, "grad_norm": 0.9712164456055157, "learning_rate": 5.727726633943964e-06, "loss": 0.8078, "step": 7274 }, { "epoch": 0.6514074654429459, "grad_norm": 0.9423601377653869, "learning_rate": 5.725104534404646e-06, "loss": 0.8135, "step": 7275 }, { "epoch": 0.651497005988024, "grad_norm": 0.9442813646573733, "learning_rate": 5.722482794455602e-06, "loss": 0.8191, "step": 7276 }, { "epoch": 0.651586546533102, "grad_norm": 0.8872418669916327, "learning_rate": 5.719861414317367e-06, "loss": 0.8288, "step": 7277 }, { "epoch": 0.65167608707818, "grad_norm": 0.8626985857443107, "learning_rate": 5.717240394210442e-06, "loss": 0.7957, "step": 7278 }, { "epoch": 0.6517656276232582, "grad_norm": 1.054914101541487, "learning_rate": 5.714619734355298e-06, "loss": 0.8814, "step": 7279 }, { "epoch": 0.6518551681683362, "grad_norm": 0.9446433944216591, "learning_rate": 5.711999434972378e-06, "loss": 0.7696, "step": 7280 }, { "epoch": 0.6519447087134143, "grad_norm": 0.8603795016955215, "learning_rate": 5.7093794962820925e-06, "loss": 0.749, "step": 7281 }, { "epoch": 0.6520342492584924, "grad_norm": 1.0383504863392488, "learning_rate": 5.7067599185048204e-06, "loss": 0.8141, "step": 7282 }, { "epoch": 0.6521237898035704, "grad_norm": 0.8879828846981133, "learning_rate": 5.704140701860915e-06, "loss": 0.7909, "step": 7283 }, { "epoch": 0.6522133303486485, "grad_norm": 0.993997998095104, "learning_rate": 5.701521846570695e-06, "loss": 0.849, "step": 7284 }, { "epoch": 0.6523028708937265, "grad_norm": 1.0302388385306116, "learning_rate": 5.698903352854449e-06, "loss": 0.8142, "step": 7285 }, { "epoch": 0.6523924114388047, "grad_norm": 0.8626413153787811, "learning_rate": 5.696285220932439e-06, "loss": 0.7896, "step": 7286 }, { "epoch": 0.6524819519838827, "grad_norm": 1.0077702194034917, "learning_rate": 5.693667451024889e-06, "loss": 0.8235, "step": 7287 }, { "epoch": 0.6525714925289607, "grad_norm": 0.9593947820404494, "learning_rate": 5.6910500433519995e-06, "loss": 0.8141, "step": 7288 }, { "epoch": 0.6526610330740389, "grad_norm": 1.0530256479896194, "learning_rate": 5.688432998133941e-06, "loss": 0.8382, "step": 7289 }, { "epoch": 0.6527505736191169, "grad_norm": 1.0857446826979422, "learning_rate": 5.685816315590848e-06, "loss": 0.8245, "step": 7290 }, { "epoch": 0.652840114164195, "grad_norm": 0.9081781559056147, "learning_rate": 5.6831999959428274e-06, "loss": 0.8588, "step": 7291 }, { "epoch": 0.652929654709273, "grad_norm": 1.0702134686456568, "learning_rate": 5.680584039409955e-06, "loss": 0.7975, "step": 7292 }, { "epoch": 0.6530191952543511, "grad_norm": 0.9725826135016893, "learning_rate": 5.67796844621228e-06, "loss": 0.8543, "step": 7293 }, { "epoch": 0.6531087357994292, "grad_norm": 0.9043360823316526, "learning_rate": 5.675353216569813e-06, "loss": 0.833, "step": 7294 }, { "epoch": 0.6531982763445072, "grad_norm": 1.0435938332836103, "learning_rate": 5.6727383507025455e-06, "loss": 0.8969, "step": 7295 }, { "epoch": 0.6532878168895853, "grad_norm": 0.9219433006794562, "learning_rate": 5.670123848830419e-06, "loss": 0.8136, "step": 7296 }, { "epoch": 0.6533773574346634, "grad_norm": 0.8801659900234023, "learning_rate": 5.667509711173368e-06, "loss": 0.7641, "step": 7297 }, { "epoch": 0.6534668979797414, "grad_norm": 0.8752097531235958, "learning_rate": 5.664895937951282e-06, "loss": 0.8104, "step": 7298 }, { "epoch": 0.6535564385248195, "grad_norm": 1.040426289631077, "learning_rate": 5.662282529384022e-06, "loss": 0.8574, "step": 7299 }, { "epoch": 0.6536459790698976, "grad_norm": 0.9083628838296266, "learning_rate": 5.659669485691426e-06, "loss": 0.7939, "step": 7300 }, { "epoch": 0.6537355196149757, "grad_norm": 0.8598224016711261, "learning_rate": 5.657056807093284e-06, "loss": 0.7586, "step": 7301 }, { "epoch": 0.6538250601600537, "grad_norm": 1.2002828320899779, "learning_rate": 5.654444493809368e-06, "loss": 0.8542, "step": 7302 }, { "epoch": 0.6539146007051317, "grad_norm": 0.8828257344525524, "learning_rate": 5.6518325460594235e-06, "loss": 0.8246, "step": 7303 }, { "epoch": 0.6540041412502099, "grad_norm": 0.8802968442790198, "learning_rate": 5.649220964063156e-06, "loss": 0.8404, "step": 7304 }, { "epoch": 0.6540936817952879, "grad_norm": 1.1403222437605716, "learning_rate": 5.646609748040241e-06, "loss": 0.8712, "step": 7305 }, { "epoch": 0.654183222340366, "grad_norm": 0.924228868014137, "learning_rate": 5.643998898210334e-06, "loss": 0.7553, "step": 7306 }, { "epoch": 0.6542727628854441, "grad_norm": 0.861392971177566, "learning_rate": 5.641388414793041e-06, "loss": 0.7965, "step": 7307 }, { "epoch": 0.6543623034305222, "grad_norm": 0.9086766161455917, "learning_rate": 5.638778298007947e-06, "loss": 0.8237, "step": 7308 }, { "epoch": 0.6544518439756002, "grad_norm": 0.9135696362139765, "learning_rate": 5.636168548074613e-06, "loss": 0.7975, "step": 7309 }, { "epoch": 0.6545413845206782, "grad_norm": 0.9470629582502175, "learning_rate": 5.633559165212561e-06, "loss": 0.7867, "step": 7310 }, { "epoch": 0.6546309250657564, "grad_norm": 1.0727319680281828, "learning_rate": 5.630950149641288e-06, "loss": 0.818, "step": 7311 }, { "epoch": 0.6547204656108344, "grad_norm": 1.0391089595803378, "learning_rate": 5.628341501580246e-06, "loss": 0.8036, "step": 7312 }, { "epoch": 0.6548100061559124, "grad_norm": 0.8858640118657765, "learning_rate": 5.625733221248872e-06, "loss": 0.7807, "step": 7313 }, { "epoch": 0.6548995467009905, "grad_norm": 1.1314388010321084, "learning_rate": 5.623125308866559e-06, "loss": 0.84, "step": 7314 }, { "epoch": 0.6549890872460686, "grad_norm": 1.0415218186624804, "learning_rate": 5.620517764652688e-06, "loss": 0.7979, "step": 7315 }, { "epoch": 0.6550786277911467, "grad_norm": 0.9047649313690294, "learning_rate": 5.617910588826591e-06, "loss": 0.8513, "step": 7316 }, { "epoch": 0.6551681683362247, "grad_norm": 0.9875673380913135, "learning_rate": 5.61530378160758e-06, "loss": 0.8096, "step": 7317 }, { "epoch": 0.6552577088813029, "grad_norm": 0.9073749978676923, "learning_rate": 5.612697343214921e-06, "loss": 0.7871, "step": 7318 }, { "epoch": 0.6553472494263809, "grad_norm": 0.9735949957372808, "learning_rate": 5.610091273867864e-06, "loss": 0.8511, "step": 7319 }, { "epoch": 0.6554367899714589, "grad_norm": 1.05692911885129, "learning_rate": 5.607485573785621e-06, "loss": 0.7992, "step": 7320 }, { "epoch": 0.655526330516537, "grad_norm": 0.8816779120713112, "learning_rate": 5.6048802431873805e-06, "loss": 0.8113, "step": 7321 }, { "epoch": 0.6556158710616151, "grad_norm": 0.9116957227301384, "learning_rate": 5.602275282292298e-06, "loss": 0.7703, "step": 7322 }, { "epoch": 0.6557054116066932, "grad_norm": 1.1308285974169854, "learning_rate": 5.599670691319481e-06, "loss": 0.8961, "step": 7323 }, { "epoch": 0.6557949521517712, "grad_norm": 1.0660618767927528, "learning_rate": 5.597066470488027e-06, "loss": 0.7863, "step": 7324 }, { "epoch": 0.6558844926968493, "grad_norm": 0.9815065313578324, "learning_rate": 5.594462620016994e-06, "loss": 0.8215, "step": 7325 }, { "epoch": 0.6559740332419274, "grad_norm": 0.9316705052277169, "learning_rate": 5.5918591401254085e-06, "loss": 0.7851, "step": 7326 }, { "epoch": 0.6560635737870054, "grad_norm": 0.997563720851311, "learning_rate": 5.589256031032264e-06, "loss": 0.8207, "step": 7327 }, { "epoch": 0.6561531143320835, "grad_norm": 1.0479683239328443, "learning_rate": 5.586653292956536e-06, "loss": 0.7714, "step": 7328 }, { "epoch": 0.6562426548771616, "grad_norm": 0.9523185888104463, "learning_rate": 5.5840509261171486e-06, "loss": 0.8355, "step": 7329 }, { "epoch": 0.6563321954222396, "grad_norm": 0.9286015896025172, "learning_rate": 5.581448930733007e-06, "loss": 0.8496, "step": 7330 }, { "epoch": 0.6564217359673177, "grad_norm": 0.9173572916632705, "learning_rate": 5.578847307022981e-06, "loss": 0.8393, "step": 7331 }, { "epoch": 0.6565112765123957, "grad_norm": 0.857358620175035, "learning_rate": 5.576246055205914e-06, "loss": 0.8114, "step": 7332 }, { "epoch": 0.6566008170574739, "grad_norm": 0.9077333187836812, "learning_rate": 5.573645175500611e-06, "loss": 0.7975, "step": 7333 }, { "epoch": 0.6566903576025519, "grad_norm": 0.9581889495963095, "learning_rate": 5.571044668125853e-06, "loss": 0.8214, "step": 7334 }, { "epoch": 0.6567798981476299, "grad_norm": 0.920453248188504, "learning_rate": 5.568444533300385e-06, "loss": 0.8618, "step": 7335 }, { "epoch": 0.6568694386927081, "grad_norm": 0.9550485409666128, "learning_rate": 5.565844771242922e-06, "loss": 0.822, "step": 7336 }, { "epoch": 0.6569589792377861, "grad_norm": 0.9996630610352414, "learning_rate": 5.563245382172147e-06, "loss": 0.7952, "step": 7337 }, { "epoch": 0.6570485197828642, "grad_norm": 0.9356980566323745, "learning_rate": 5.560646366306712e-06, "loss": 0.8154, "step": 7338 }, { "epoch": 0.6571380603279422, "grad_norm": 1.0726701922099506, "learning_rate": 5.558047723865239e-06, "loss": 0.7855, "step": 7339 }, { "epoch": 0.6572276008730203, "grad_norm": 1.0545881007799642, "learning_rate": 5.555449455066315e-06, "loss": 0.7662, "step": 7340 }, { "epoch": 0.6573171414180984, "grad_norm": 1.073182487184013, "learning_rate": 5.5528515601285e-06, "loss": 0.807, "step": 7341 }, { "epoch": 0.6574066819631764, "grad_norm": 0.9233745154352145, "learning_rate": 5.550254039270319e-06, "loss": 0.8548, "step": 7342 }, { "epoch": 0.6574962225082546, "grad_norm": 1.1331102846919707, "learning_rate": 5.5476568927102695e-06, "loss": 0.8148, "step": 7343 }, { "epoch": 0.6575857630533326, "grad_norm": 0.9484729438441103, "learning_rate": 5.545060120666812e-06, "loss": 0.7938, "step": 7344 }, { "epoch": 0.6576753035984106, "grad_norm": 1.0418646396880868, "learning_rate": 5.542463723358381e-06, "loss": 0.7911, "step": 7345 }, { "epoch": 0.6577648441434887, "grad_norm": 0.9366589963970824, "learning_rate": 5.539867701003375e-06, "loss": 0.8214, "step": 7346 }, { "epoch": 0.6578543846885668, "grad_norm": 1.005938824071756, "learning_rate": 5.537272053820164e-06, "loss": 0.7835, "step": 7347 }, { "epoch": 0.6579439252336449, "grad_norm": 0.9463366603634388, "learning_rate": 5.534676782027085e-06, "loss": 0.8174, "step": 7348 }, { "epoch": 0.6580334657787229, "grad_norm": 0.8785162720719428, "learning_rate": 5.532081885842448e-06, "loss": 0.7461, "step": 7349 }, { "epoch": 0.6581230063238009, "grad_norm": 0.912922919231555, "learning_rate": 5.529487365484516e-06, "loss": 0.6957, "step": 7350 }, { "epoch": 0.6582125468688791, "grad_norm": 0.9390820851009614, "learning_rate": 5.5268932211715405e-06, "loss": 0.761, "step": 7351 }, { "epoch": 0.6583020874139571, "grad_norm": 0.9343757444995957, "learning_rate": 5.524299453121733e-06, "loss": 0.7784, "step": 7352 }, { "epoch": 0.6583916279590352, "grad_norm": 0.8952348721815246, "learning_rate": 5.521706061553269e-06, "loss": 0.7844, "step": 7353 }, { "epoch": 0.6584811685041133, "grad_norm": 0.9362303813039331, "learning_rate": 5.519113046684299e-06, "loss": 0.7859, "step": 7354 }, { "epoch": 0.6585707090491913, "grad_norm": 0.97334749441531, "learning_rate": 5.51652040873294e-06, "loss": 0.8309, "step": 7355 }, { "epoch": 0.6586602495942694, "grad_norm": 0.925735718346601, "learning_rate": 5.513928147917267e-06, "loss": 0.7839, "step": 7356 }, { "epoch": 0.6587497901393474, "grad_norm": 0.9905775656146384, "learning_rate": 5.511336264455342e-06, "loss": 0.8016, "step": 7357 }, { "epoch": 0.6588393306844256, "grad_norm": 0.9238283605969263, "learning_rate": 5.5087447585651855e-06, "loss": 0.8087, "step": 7358 }, { "epoch": 0.6589288712295036, "grad_norm": 0.9086216972085446, "learning_rate": 5.5061536304647835e-06, "loss": 0.8004, "step": 7359 }, { "epoch": 0.6590184117745816, "grad_norm": 0.9595912388366051, "learning_rate": 5.5035628803720975e-06, "loss": 0.826, "step": 7360 }, { "epoch": 0.6591079523196598, "grad_norm": 1.0393704241714115, "learning_rate": 5.500972508505044e-06, "loss": 0.8234, "step": 7361 }, { "epoch": 0.6591974928647378, "grad_norm": 1.02795833513756, "learning_rate": 5.498382515081522e-06, "loss": 0.8097, "step": 7362 }, { "epoch": 0.6592870334098159, "grad_norm": 0.9272335499815851, "learning_rate": 5.495792900319394e-06, "loss": 0.8145, "step": 7363 }, { "epoch": 0.6593765739548939, "grad_norm": 0.8750219249462865, "learning_rate": 5.49320366443649e-06, "loss": 0.8318, "step": 7364 }, { "epoch": 0.6594661144999721, "grad_norm": 0.9796148929627807, "learning_rate": 5.490614807650608e-06, "loss": 0.7887, "step": 7365 }, { "epoch": 0.6595556550450501, "grad_norm": 0.9348057511564736, "learning_rate": 5.488026330179518e-06, "loss": 0.787, "step": 7366 }, { "epoch": 0.6596451955901281, "grad_norm": 0.9453429997748579, "learning_rate": 5.485438232240945e-06, "loss": 0.7794, "step": 7367 }, { "epoch": 0.6597347361352062, "grad_norm": 1.068253877384166, "learning_rate": 5.4828505140525934e-06, "loss": 0.79, "step": 7368 }, { "epoch": 0.6598242766802843, "grad_norm": 1.0380227832377775, "learning_rate": 5.480263175832139e-06, "loss": 0.8142, "step": 7369 }, { "epoch": 0.6599138172253624, "grad_norm": 0.9372313527571041, "learning_rate": 5.477676217797219e-06, "loss": 0.8475, "step": 7370 }, { "epoch": 0.6600033577704404, "grad_norm": 0.9346860805213398, "learning_rate": 5.475089640165442e-06, "loss": 0.8564, "step": 7371 }, { "epoch": 0.6600928983155185, "grad_norm": 0.8790765893486525, "learning_rate": 5.4725034431543756e-06, "loss": 0.7789, "step": 7372 }, { "epoch": 0.6601824388605966, "grad_norm": 0.975050367019759, "learning_rate": 5.469917626981565e-06, "loss": 0.8363, "step": 7373 }, { "epoch": 0.6602719794056746, "grad_norm": 0.9341446083390239, "learning_rate": 5.467332191864518e-06, "loss": 0.8228, "step": 7374 }, { "epoch": 0.6603615199507527, "grad_norm": 1.0216750756924287, "learning_rate": 5.464747138020721e-06, "loss": 0.8228, "step": 7375 }, { "epoch": 0.6604510604958308, "grad_norm": 0.9535301817885083, "learning_rate": 5.462162465667614e-06, "loss": 0.8317, "step": 7376 }, { "epoch": 0.6605406010409088, "grad_norm": 1.0612222195749201, "learning_rate": 5.459578175022617e-06, "loss": 0.8887, "step": 7377 }, { "epoch": 0.6606301415859869, "grad_norm": 1.0040719151911934, "learning_rate": 5.456994266303106e-06, "loss": 0.8495, "step": 7378 }, { "epoch": 0.660719682131065, "grad_norm": 0.9258434973711982, "learning_rate": 5.454410739726432e-06, "loss": 0.8124, "step": 7379 }, { "epoch": 0.6608092226761431, "grad_norm": 0.9208020724616062, "learning_rate": 5.451827595509913e-06, "loss": 0.8114, "step": 7380 }, { "epoch": 0.6608987632212211, "grad_norm": 1.07516496174586, "learning_rate": 5.449244833870833e-06, "loss": 0.8055, "step": 7381 }, { "epoch": 0.6609883037662991, "grad_norm": 0.8967501594127987, "learning_rate": 5.446662455026454e-06, "loss": 0.7525, "step": 7382 }, { "epoch": 0.6610778443113773, "grad_norm": 0.9244881211489062, "learning_rate": 5.44408045919399e-06, "loss": 0.7816, "step": 7383 }, { "epoch": 0.6611673848564553, "grad_norm": 0.9281095189801799, "learning_rate": 5.441498846590629e-06, "loss": 0.8198, "step": 7384 }, { "epoch": 0.6612569254015334, "grad_norm": 1.0485567872463224, "learning_rate": 5.438917617433532e-06, "loss": 0.8423, "step": 7385 }, { "epoch": 0.6613464659466114, "grad_norm": 0.9064092006717579, "learning_rate": 5.436336771939821e-06, "loss": 0.8123, "step": 7386 }, { "epoch": 0.6614360064916895, "grad_norm": 1.0272951103997416, "learning_rate": 5.433756310326586e-06, "loss": 0.8108, "step": 7387 }, { "epoch": 0.6615255470367676, "grad_norm": 0.9531636994194864, "learning_rate": 5.431176232810898e-06, "loss": 0.8352, "step": 7388 }, { "epoch": 0.6616150875818456, "grad_norm": 0.9323118695868394, "learning_rate": 5.428596539609772e-06, "loss": 0.7742, "step": 7389 }, { "epoch": 0.6617046281269238, "grad_norm": 0.9803015530768365, "learning_rate": 5.4260172309402085e-06, "loss": 0.8335, "step": 7390 }, { "epoch": 0.6617941686720018, "grad_norm": 0.9418941710611308, "learning_rate": 5.423438307019169e-06, "loss": 0.8281, "step": 7391 }, { "epoch": 0.6618837092170798, "grad_norm": 0.9845151213632618, "learning_rate": 5.4208597680635866e-06, "loss": 0.8257, "step": 7392 }, { "epoch": 0.6619732497621579, "grad_norm": 1.0003396445470178, "learning_rate": 5.418281614290359e-06, "loss": 0.7762, "step": 7393 }, { "epoch": 0.662062790307236, "grad_norm": 1.0231459160384764, "learning_rate": 5.4157038459163515e-06, "loss": 0.8461, "step": 7394 }, { "epoch": 0.6621523308523141, "grad_norm": 1.148048352112647, "learning_rate": 5.413126463158396e-06, "loss": 0.8049, "step": 7395 }, { "epoch": 0.6622418713973921, "grad_norm": 0.9039517470063846, "learning_rate": 5.410549466233297e-06, "loss": 0.797, "step": 7396 }, { "epoch": 0.6623314119424702, "grad_norm": 0.9367403668381581, "learning_rate": 5.407972855357819e-06, "loss": 0.8003, "step": 7397 }, { "epoch": 0.6624209524875483, "grad_norm": 0.9250594654974407, "learning_rate": 5.405396630748702e-06, "loss": 0.8773, "step": 7398 }, { "epoch": 0.6625104930326263, "grad_norm": 0.9361369106503858, "learning_rate": 5.402820792622649e-06, "loss": 0.8627, "step": 7399 }, { "epoch": 0.6626000335777044, "grad_norm": 1.0570686595577783, "learning_rate": 5.400245341196328e-06, "loss": 0.8532, "step": 7400 }, { "epoch": 0.6626895741227825, "grad_norm": 1.0686440827197978, "learning_rate": 5.397670276686382e-06, "loss": 0.8126, "step": 7401 }, { "epoch": 0.6627791146678605, "grad_norm": 1.0660252880584309, "learning_rate": 5.395095599309413e-06, "loss": 0.8324, "step": 7402 }, { "epoch": 0.6628686552129386, "grad_norm": 0.9822559808903727, "learning_rate": 5.3925213092819965e-06, "loss": 0.7998, "step": 7403 }, { "epoch": 0.6629581957580166, "grad_norm": 0.8760922304305017, "learning_rate": 5.3899474068206725e-06, "loss": 0.7846, "step": 7404 }, { "epoch": 0.6630477363030948, "grad_norm": 1.196021242665968, "learning_rate": 5.387373892141951e-06, "loss": 0.8572, "step": 7405 }, { "epoch": 0.6631372768481728, "grad_norm": 0.937323005649023, "learning_rate": 5.384800765462306e-06, "loss": 0.8224, "step": 7406 }, { "epoch": 0.6632268173932508, "grad_norm": 0.9594156966736437, "learning_rate": 5.38222802699818e-06, "loss": 0.8823, "step": 7407 }, { "epoch": 0.663316357938329, "grad_norm": 0.9240782072387356, "learning_rate": 5.379655676965984e-06, "loss": 0.8021, "step": 7408 }, { "epoch": 0.663405898483407, "grad_norm": 1.103379866416763, "learning_rate": 5.377083715582099e-06, "loss": 0.841, "step": 7409 }, { "epoch": 0.6634954390284851, "grad_norm": 0.9378425975227233, "learning_rate": 5.374512143062866e-06, "loss": 0.8633, "step": 7410 }, { "epoch": 0.6635849795735631, "grad_norm": 0.9839088072058982, "learning_rate": 5.371940959624597e-06, "loss": 0.7524, "step": 7411 }, { "epoch": 0.6636745201186413, "grad_norm": 1.0283064649620686, "learning_rate": 5.3693701654835735e-06, "loss": 0.8125, "step": 7412 }, { "epoch": 0.6637640606637193, "grad_norm": 0.9561122233175984, "learning_rate": 5.366799760856043e-06, "loss": 0.8867, "step": 7413 }, { "epoch": 0.6638536012087973, "grad_norm": 0.9094966346450513, "learning_rate": 5.364229745958218e-06, "loss": 0.7903, "step": 7414 }, { "epoch": 0.6639431417538755, "grad_norm": 0.9375052478403006, "learning_rate": 5.361660121006284e-06, "loss": 0.8034, "step": 7415 }, { "epoch": 0.6640326822989535, "grad_norm": 1.0156641108497368, "learning_rate": 5.359090886216377e-06, "loss": 0.7981, "step": 7416 }, { "epoch": 0.6641222228440316, "grad_norm": 0.990134380946852, "learning_rate": 5.356522041804626e-06, "loss": 0.8524, "step": 7417 }, { "epoch": 0.6642117633891096, "grad_norm": 0.9710696815249935, "learning_rate": 5.353953587987109e-06, "loss": 0.8195, "step": 7418 }, { "epoch": 0.6643013039341877, "grad_norm": 0.9889608822371967, "learning_rate": 5.351385524979876e-06, "loss": 0.7835, "step": 7419 }, { "epoch": 0.6643908444792658, "grad_norm": 0.9702546289702244, "learning_rate": 5.348817852998949e-06, "loss": 0.8224, "step": 7420 }, { "epoch": 0.6644803850243438, "grad_norm": 0.9923977026236258, "learning_rate": 5.346250572260302e-06, "loss": 0.78, "step": 7421 }, { "epoch": 0.6645699255694218, "grad_norm": 1.0137221815382094, "learning_rate": 5.343683682979888e-06, "loss": 0.8515, "step": 7422 }, { "epoch": 0.6646594661145, "grad_norm": 0.9959043052367144, "learning_rate": 5.341117185373634e-06, "loss": 0.7979, "step": 7423 }, { "epoch": 0.664749006659578, "grad_norm": 0.9258300976257043, "learning_rate": 5.338551079657419e-06, "loss": 0.8231, "step": 7424 }, { "epoch": 0.6648385472046561, "grad_norm": 1.0122937954894073, "learning_rate": 5.335985366047098e-06, "loss": 0.8322, "step": 7425 }, { "epoch": 0.6649280877497342, "grad_norm": 0.8692660357547991, "learning_rate": 5.3334200447584925e-06, "loss": 0.8363, "step": 7426 }, { "epoch": 0.6650176282948123, "grad_norm": 0.9662689035173921, "learning_rate": 5.330855116007383e-06, "loss": 0.8617, "step": 7427 }, { "epoch": 0.6651071688398903, "grad_norm": 1.0555477563212419, "learning_rate": 5.328290580009521e-06, "loss": 0.8572, "step": 7428 }, { "epoch": 0.6651967093849683, "grad_norm": 0.9278839810834268, "learning_rate": 5.325726436980636e-06, "loss": 0.778, "step": 7429 }, { "epoch": 0.6652862499300465, "grad_norm": 0.9520589004017551, "learning_rate": 5.32316268713641e-06, "loss": 0.8313, "step": 7430 }, { "epoch": 0.6653757904751245, "grad_norm": 0.964236093657248, "learning_rate": 5.320599330692504e-06, "loss": 0.8138, "step": 7431 }, { "epoch": 0.6654653310202026, "grad_norm": 0.8835533186185465, "learning_rate": 5.3180363678645285e-06, "loss": 0.806, "step": 7432 }, { "epoch": 0.6655548715652807, "grad_norm": 0.9188977601670071, "learning_rate": 5.315473798868076e-06, "loss": 0.8231, "step": 7433 }, { "epoch": 0.6656444121103587, "grad_norm": 0.9848968220683548, "learning_rate": 5.312911623918704e-06, "loss": 0.7825, "step": 7434 }, { "epoch": 0.6657339526554368, "grad_norm": 0.9161923475666346, "learning_rate": 5.310349843231926e-06, "loss": 0.832, "step": 7435 }, { "epoch": 0.6658234932005148, "grad_norm": 0.9961997730664517, "learning_rate": 5.307788457023243e-06, "loss": 0.8295, "step": 7436 }, { "epoch": 0.665913033745593, "grad_norm": 1.0153435195784282, "learning_rate": 5.305227465508107e-06, "loss": 0.8045, "step": 7437 }, { "epoch": 0.666002574290671, "grad_norm": 1.1160008437698334, "learning_rate": 5.302666868901936e-06, "loss": 0.856, "step": 7438 }, { "epoch": 0.666092114835749, "grad_norm": 1.1498082171922674, "learning_rate": 5.300106667420118e-06, "loss": 0.7827, "step": 7439 }, { "epoch": 0.6661816553808271, "grad_norm": 0.9583276176036425, "learning_rate": 5.297546861278013e-06, "loss": 0.8602, "step": 7440 }, { "epoch": 0.6662711959259052, "grad_norm": 0.9778976042348785, "learning_rate": 5.2949874506909385e-06, "loss": 0.8518, "step": 7441 }, { "epoch": 0.6663607364709833, "grad_norm": 0.935504297809276, "learning_rate": 5.292428435874195e-06, "loss": 0.7922, "step": 7442 }, { "epoch": 0.6664502770160613, "grad_norm": 1.0672104663771147, "learning_rate": 5.289869817043026e-06, "loss": 0.762, "step": 7443 }, { "epoch": 0.6665398175611394, "grad_norm": 0.9363078994811208, "learning_rate": 5.2873115944126606e-06, "loss": 0.7891, "step": 7444 }, { "epoch": 0.6666293581062175, "grad_norm": 1.1372459074536938, "learning_rate": 5.284753768198285e-06, "loss": 0.8297, "step": 7445 }, { "epoch": 0.6667188986512955, "grad_norm": 1.1146322193239877, "learning_rate": 5.282196338615058e-06, "loss": 0.8785, "step": 7446 }, { "epoch": 0.6668084391963736, "grad_norm": 0.910814751170856, "learning_rate": 5.279639305878097e-06, "loss": 0.806, "step": 7447 }, { "epoch": 0.6668979797414517, "grad_norm": 1.0879951855120666, "learning_rate": 5.2770826702025026e-06, "loss": 0.8098, "step": 7448 }, { "epoch": 0.6669875202865297, "grad_norm": 0.9997031961300487, "learning_rate": 5.274526431803319e-06, "loss": 0.8259, "step": 7449 }, { "epoch": 0.6670770608316078, "grad_norm": 0.9030495880992627, "learning_rate": 5.271970590895575e-06, "loss": 0.8347, "step": 7450 }, { "epoch": 0.6671666013766859, "grad_norm": 0.9516472055111511, "learning_rate": 5.269415147694257e-06, "loss": 0.8108, "step": 7451 }, { "epoch": 0.667256141921764, "grad_norm": 1.0032231346281724, "learning_rate": 5.266860102414319e-06, "loss": 0.7891, "step": 7452 }, { "epoch": 0.667345682466842, "grad_norm": 1.0672145005540397, "learning_rate": 5.2643054552706886e-06, "loss": 0.7555, "step": 7453 }, { "epoch": 0.66743522301192, "grad_norm": 0.9194535133331326, "learning_rate": 5.26175120647825e-06, "loss": 0.8336, "step": 7454 }, { "epoch": 0.6675247635569982, "grad_norm": 1.0245375535142969, "learning_rate": 5.259197356251857e-06, "loss": 0.7985, "step": 7455 }, { "epoch": 0.6676143041020762, "grad_norm": 1.0146071621606474, "learning_rate": 5.256643904806335e-06, "loss": 0.7828, "step": 7456 }, { "epoch": 0.6677038446471543, "grad_norm": 1.007079229211384, "learning_rate": 5.254090852356473e-06, "loss": 0.8687, "step": 7457 }, { "epoch": 0.6677933851922323, "grad_norm": 0.9560089345908145, "learning_rate": 5.251538199117019e-06, "loss": 0.8076, "step": 7458 }, { "epoch": 0.6678829257373105, "grad_norm": 0.9064796277733639, "learning_rate": 5.2489859453027e-06, "loss": 0.8397, "step": 7459 }, { "epoch": 0.6679724662823885, "grad_norm": 0.9507284224000625, "learning_rate": 5.2464340911282005e-06, "loss": 0.877, "step": 7460 }, { "epoch": 0.6680620068274665, "grad_norm": 0.9925223455270268, "learning_rate": 5.243882636808175e-06, "loss": 0.8553, "step": 7461 }, { "epoch": 0.6681515473725447, "grad_norm": 0.9320338382056615, "learning_rate": 5.241331582557244e-06, "loss": 0.8152, "step": 7462 }, { "epoch": 0.6682410879176227, "grad_norm": 1.0672021840139518, "learning_rate": 5.238780928589992e-06, "loss": 0.8128, "step": 7463 }, { "epoch": 0.6683306284627007, "grad_norm": 1.0342361305984835, "learning_rate": 5.2362306751209745e-06, "loss": 0.858, "step": 7464 }, { "epoch": 0.6684201690077788, "grad_norm": 1.0742472055273755, "learning_rate": 5.233680822364708e-06, "loss": 0.7745, "step": 7465 }, { "epoch": 0.6685097095528569, "grad_norm": 0.9156740426985743, "learning_rate": 5.231131370535678e-06, "loss": 0.7509, "step": 7466 }, { "epoch": 0.668599250097935, "grad_norm": 1.044312950847101, "learning_rate": 5.228582319848338e-06, "loss": 0.851, "step": 7467 }, { "epoch": 0.668688790643013, "grad_norm": 0.997023360533407, "learning_rate": 5.226033670517104e-06, "loss": 0.8289, "step": 7468 }, { "epoch": 0.6687783311880912, "grad_norm": 0.9026166514642375, "learning_rate": 5.2234854227563605e-06, "loss": 0.8204, "step": 7469 }, { "epoch": 0.6688678717331692, "grad_norm": 0.9300533345495118, "learning_rate": 5.220937576780458e-06, "loss": 0.8679, "step": 7470 }, { "epoch": 0.6689574122782472, "grad_norm": 0.887498342420645, "learning_rate": 5.218390132803715e-06, "loss": 0.8381, "step": 7471 }, { "epoch": 0.6690469528233253, "grad_norm": 0.9994265287385279, "learning_rate": 5.215843091040409e-06, "loss": 0.814, "step": 7472 }, { "epoch": 0.6691364933684034, "grad_norm": 1.0229330462446404, "learning_rate": 5.213296451704794e-06, "loss": 0.8132, "step": 7473 }, { "epoch": 0.6692260339134815, "grad_norm": 0.8760383238576165, "learning_rate": 5.210750215011082e-06, "loss": 0.8303, "step": 7474 }, { "epoch": 0.6693155744585595, "grad_norm": 0.9215127312523042, "learning_rate": 5.20820438117346e-06, "loss": 0.7983, "step": 7475 }, { "epoch": 0.6694051150036375, "grad_norm": 1.0585305059620385, "learning_rate": 5.205658950406062e-06, "loss": 0.8278, "step": 7476 }, { "epoch": 0.6694946555487157, "grad_norm": 0.9146819254230831, "learning_rate": 5.203113922923013e-06, "loss": 0.7584, "step": 7477 }, { "epoch": 0.6695841960937937, "grad_norm": 0.8999781515085166, "learning_rate": 5.2005692989383906e-06, "loss": 0.8531, "step": 7478 }, { "epoch": 0.6696737366388718, "grad_norm": 1.058164729069856, "learning_rate": 5.198025078666238e-06, "loss": 0.8349, "step": 7479 }, { "epoch": 0.6697632771839499, "grad_norm": 0.9490043094538922, "learning_rate": 5.195481262320572e-06, "loss": 0.8558, "step": 7480 }, { "epoch": 0.6698528177290279, "grad_norm": 0.955446385343134, "learning_rate": 5.1929378501153605e-06, "loss": 0.8335, "step": 7481 }, { "epoch": 0.669942358274106, "grad_norm": 1.038726251441477, "learning_rate": 5.1903948422645504e-06, "loss": 0.8178, "step": 7482 }, { "epoch": 0.670031898819184, "grad_norm": 0.9287181574941412, "learning_rate": 5.1878522389820564e-06, "loss": 0.7642, "step": 7483 }, { "epoch": 0.6701214393642622, "grad_norm": 0.9212863177157766, "learning_rate": 5.18531004048175e-06, "loss": 0.8077, "step": 7484 }, { "epoch": 0.6702109799093402, "grad_norm": 1.1388234273804994, "learning_rate": 5.1827682469774734e-06, "loss": 0.8636, "step": 7485 }, { "epoch": 0.6703005204544182, "grad_norm": 1.0371924260488867, "learning_rate": 5.180226858683037e-06, "loss": 0.8238, "step": 7486 }, { "epoch": 0.6703900609994964, "grad_norm": 0.8976472394533889, "learning_rate": 5.177685875812208e-06, "loss": 0.8342, "step": 7487 }, { "epoch": 0.6704796015445744, "grad_norm": 1.0051818038805378, "learning_rate": 5.17514529857873e-06, "loss": 0.8455, "step": 7488 }, { "epoch": 0.6705691420896525, "grad_norm": 0.9237004532306378, "learning_rate": 5.1726051271963015e-06, "loss": 0.8098, "step": 7489 }, { "epoch": 0.6706586826347305, "grad_norm": 0.9706467095901712, "learning_rate": 5.170065361878603e-06, "loss": 0.7624, "step": 7490 }, { "epoch": 0.6707482231798086, "grad_norm": 0.9171849596374392, "learning_rate": 5.167526002839269e-06, "loss": 0.8188, "step": 7491 }, { "epoch": 0.6708377637248867, "grad_norm": 0.8471845347852861, "learning_rate": 5.1649870502918985e-06, "loss": 0.7975, "step": 7492 }, { "epoch": 0.6709273042699647, "grad_norm": 1.016625239331802, "learning_rate": 5.1624485044500605e-06, "loss": 0.756, "step": 7493 }, { "epoch": 0.6710168448150428, "grad_norm": 0.9335033752202787, "learning_rate": 5.15991036552729e-06, "loss": 0.7968, "step": 7494 }, { "epoch": 0.6711063853601209, "grad_norm": 0.9242766071517782, "learning_rate": 5.157372633737083e-06, "loss": 0.8168, "step": 7495 }, { "epoch": 0.6711959259051989, "grad_norm": 1.0171395304765374, "learning_rate": 5.1548353092929136e-06, "loss": 0.804, "step": 7496 }, { "epoch": 0.671285466450277, "grad_norm": 0.9029617088719335, "learning_rate": 5.152298392408214e-06, "loss": 0.8441, "step": 7497 }, { "epoch": 0.6713750069953551, "grad_norm": 0.9249425769070901, "learning_rate": 5.149761883296371e-06, "loss": 0.7588, "step": 7498 }, { "epoch": 0.6714645475404332, "grad_norm": 1.0316346555021108, "learning_rate": 5.1472257821707535e-06, "loss": 0.777, "step": 7499 }, { "epoch": 0.6715540880855112, "grad_norm": 0.9954084167613391, "learning_rate": 5.144690089244689e-06, "loss": 0.8317, "step": 7500 }, { "epoch": 0.6716436286305892, "grad_norm": 0.9314948110388149, "learning_rate": 5.142154804731469e-06, "loss": 0.7828, "step": 7501 }, { "epoch": 0.6717331691756674, "grad_norm": 1.0439696949968391, "learning_rate": 5.139619928844364e-06, "loss": 0.7926, "step": 7502 }, { "epoch": 0.6718227097207454, "grad_norm": 1.0055186706159907, "learning_rate": 5.137085461796588e-06, "loss": 0.8162, "step": 7503 }, { "epoch": 0.6719122502658235, "grad_norm": 0.9531468359933698, "learning_rate": 5.134551403801336e-06, "loss": 0.814, "step": 7504 }, { "epoch": 0.6720017908109016, "grad_norm": 0.9346789551081877, "learning_rate": 5.132017755071765e-06, "loss": 0.8596, "step": 7505 }, { "epoch": 0.6720913313559796, "grad_norm": 0.9810943822605321, "learning_rate": 5.129484515820998e-06, "loss": 0.8174, "step": 7506 }, { "epoch": 0.6721808719010577, "grad_norm": 0.9382684512531585, "learning_rate": 5.126951686262117e-06, "loss": 0.7857, "step": 7507 }, { "epoch": 0.6722704124461357, "grad_norm": 1.017698161369508, "learning_rate": 5.124419266608189e-06, "loss": 0.8067, "step": 7508 }, { "epoch": 0.6723599529912139, "grad_norm": 0.9317609377325201, "learning_rate": 5.12188725707222e-06, "loss": 0.8028, "step": 7509 }, { "epoch": 0.6724494935362919, "grad_norm": 2.204019561288505, "learning_rate": 5.1193556578671975e-06, "loss": 0.8299, "step": 7510 }, { "epoch": 0.67253903408137, "grad_norm": 0.9608891975316171, "learning_rate": 5.116824469206074e-06, "loss": 0.8108, "step": 7511 }, { "epoch": 0.672628574626448, "grad_norm": 0.9592661850599546, "learning_rate": 5.114293691301763e-06, "loss": 0.7898, "step": 7512 }, { "epoch": 0.6727181151715261, "grad_norm": 0.8534904469281832, "learning_rate": 5.111763324367145e-06, "loss": 0.7313, "step": 7513 }, { "epoch": 0.6728076557166042, "grad_norm": 0.9547537367829539, "learning_rate": 5.109233368615067e-06, "loss": 0.8272, "step": 7514 }, { "epoch": 0.6728971962616822, "grad_norm": 0.9204370604994293, "learning_rate": 5.106703824258341e-06, "loss": 0.8234, "step": 7515 }, { "epoch": 0.6729867368067604, "grad_norm": 1.0066684713887253, "learning_rate": 5.104174691509743e-06, "loss": 0.8718, "step": 7516 }, { "epoch": 0.6730762773518384, "grad_norm": 0.9002281534564198, "learning_rate": 5.101645970582017e-06, "loss": 0.8566, "step": 7517 }, { "epoch": 0.6731658178969164, "grad_norm": 0.9639191628074935, "learning_rate": 5.099117661687868e-06, "loss": 0.8473, "step": 7518 }, { "epoch": 0.6732553584419945, "grad_norm": 1.0826106820159531, "learning_rate": 5.096589765039972e-06, "loss": 0.8362, "step": 7519 }, { "epoch": 0.6733448989870726, "grad_norm": 1.2107454223334093, "learning_rate": 5.0940622808509645e-06, "loss": 0.8497, "step": 7520 }, { "epoch": 0.6734344395321507, "grad_norm": 0.9917261812284213, "learning_rate": 5.091535209333453e-06, "loss": 0.8256, "step": 7521 }, { "epoch": 0.6735239800772287, "grad_norm": 0.890952544661504, "learning_rate": 5.089008550700003e-06, "loss": 0.8132, "step": 7522 }, { "epoch": 0.6736135206223068, "grad_norm": 1.1269947104198972, "learning_rate": 5.086482305163152e-06, "loss": 0.7769, "step": 7523 }, { "epoch": 0.6737030611673849, "grad_norm": 1.027993473330337, "learning_rate": 5.083956472935397e-06, "loss": 0.7511, "step": 7524 }, { "epoch": 0.6737926017124629, "grad_norm": 0.9969078447399685, "learning_rate": 5.081431054229202e-06, "loss": 0.7811, "step": 7525 }, { "epoch": 0.673882142257541, "grad_norm": 0.8810772464371397, "learning_rate": 5.078906049257e-06, "loss": 0.7871, "step": 7526 }, { "epoch": 0.6739716828026191, "grad_norm": 0.9474611161696102, "learning_rate": 5.076381458231185e-06, "loss": 0.8011, "step": 7527 }, { "epoch": 0.6740612233476971, "grad_norm": 0.9252084659679431, "learning_rate": 5.073857281364116e-06, "loss": 0.8167, "step": 7528 }, { "epoch": 0.6741507638927752, "grad_norm": 0.9965973227698414, "learning_rate": 5.07133351886812e-06, "loss": 0.7971, "step": 7529 }, { "epoch": 0.6742403044378532, "grad_norm": 1.1247928276409016, "learning_rate": 5.068810170955487e-06, "loss": 0.8136, "step": 7530 }, { "epoch": 0.6743298449829314, "grad_norm": 0.9034461762130201, "learning_rate": 5.066287237838474e-06, "loss": 0.8632, "step": 7531 }, { "epoch": 0.6744193855280094, "grad_norm": 0.9173709650549339, "learning_rate": 5.063764719729301e-06, "loss": 0.769, "step": 7532 }, { "epoch": 0.6745089260730874, "grad_norm": 0.916455163389593, "learning_rate": 5.061242616840154e-06, "loss": 0.8209, "step": 7533 }, { "epoch": 0.6745984666181656, "grad_norm": 1.0292300635573934, "learning_rate": 5.058720929383184e-06, "loss": 0.7841, "step": 7534 }, { "epoch": 0.6746880071632436, "grad_norm": 0.8243488510378381, "learning_rate": 5.0561996575705105e-06, "loss": 0.7918, "step": 7535 }, { "epoch": 0.6747775477083217, "grad_norm": 1.0169033932694205, "learning_rate": 5.053678801614205e-06, "loss": 0.8172, "step": 7536 }, { "epoch": 0.6748670882533997, "grad_norm": 0.9965161218062968, "learning_rate": 5.051158361726322e-06, "loss": 0.8438, "step": 7537 }, { "epoch": 0.6749566287984778, "grad_norm": 0.8794659720767055, "learning_rate": 5.048638338118873e-06, "loss": 0.7932, "step": 7538 }, { "epoch": 0.6750461693435559, "grad_norm": 0.8973243245104964, "learning_rate": 5.04611873100383e-06, "loss": 0.8217, "step": 7539 }, { "epoch": 0.6751357098886339, "grad_norm": 0.9468084820196486, "learning_rate": 5.043599540593138e-06, "loss": 0.7828, "step": 7540 }, { "epoch": 0.6752252504337121, "grad_norm": 1.056046061891345, "learning_rate": 5.041080767098705e-06, "loss": 0.7768, "step": 7541 }, { "epoch": 0.6753147909787901, "grad_norm": 0.9138748765882617, "learning_rate": 5.038562410732393e-06, "loss": 0.7956, "step": 7542 }, { "epoch": 0.6754043315238681, "grad_norm": 0.9397286561832034, "learning_rate": 5.03604447170604e-06, "loss": 0.7927, "step": 7543 }, { "epoch": 0.6754938720689462, "grad_norm": 0.9026669017995445, "learning_rate": 5.033526950231453e-06, "loss": 0.8251, "step": 7544 }, { "epoch": 0.6755834126140243, "grad_norm": 0.8667935152454881, "learning_rate": 5.031009846520396e-06, "loss": 0.7354, "step": 7545 }, { "epoch": 0.6756729531591024, "grad_norm": 1.002266487198374, "learning_rate": 5.028493160784602e-06, "loss": 0.7544, "step": 7546 }, { "epoch": 0.6757624937041804, "grad_norm": 0.968580537296959, "learning_rate": 5.025976893235758e-06, "loss": 0.8059, "step": 7547 }, { "epoch": 0.6758520342492584, "grad_norm": 0.9158189331992367, "learning_rate": 5.023461044085529e-06, "loss": 0.8023, "step": 7548 }, { "epoch": 0.6759415747943366, "grad_norm": 0.9462203703567819, "learning_rate": 5.020945613545536e-06, "loss": 0.8449, "step": 7549 }, { "epoch": 0.6760311153394146, "grad_norm": 0.9572721266398347, "learning_rate": 5.018430601827376e-06, "loss": 0.7574, "step": 7550 }, { "epoch": 0.6761206558844927, "grad_norm": 1.018689468576089, "learning_rate": 5.015916009142605e-06, "loss": 0.84, "step": 7551 }, { "epoch": 0.6762101964295708, "grad_norm": 0.9829722911945921, "learning_rate": 5.013401835702733e-06, "loss": 0.7998, "step": 7552 }, { "epoch": 0.6762997369746488, "grad_norm": 0.9625691457898718, "learning_rate": 5.0108880817192504e-06, "loss": 0.8175, "step": 7553 }, { "epoch": 0.6763892775197269, "grad_norm": 0.8573185535742263, "learning_rate": 5.008374747403604e-06, "loss": 0.8173, "step": 7554 }, { "epoch": 0.6764788180648049, "grad_norm": 0.9488532289103421, "learning_rate": 5.005861832967203e-06, "loss": 0.8708, "step": 7555 }, { "epoch": 0.6765683586098831, "grad_norm": 0.9615133905289535, "learning_rate": 5.003349338621435e-06, "loss": 0.8109, "step": 7556 }, { "epoch": 0.6766578991549611, "grad_norm": 1.3226853279419994, "learning_rate": 5.00083726457764e-06, "loss": 0.7571, "step": 7557 }, { "epoch": 0.6767474397000391, "grad_norm": 0.9291071429946327, "learning_rate": 4.998325611047121e-06, "loss": 0.8047, "step": 7558 }, { "epoch": 0.6768369802451173, "grad_norm": 1.036769219474491, "learning_rate": 4.995814378241153e-06, "loss": 0.8127, "step": 7559 }, { "epoch": 0.6769265207901953, "grad_norm": 1.0757566848200948, "learning_rate": 4.993303566370971e-06, "loss": 0.8041, "step": 7560 }, { "epoch": 0.6770160613352734, "grad_norm": 0.9068336875308911, "learning_rate": 4.9907931756477744e-06, "loss": 0.7835, "step": 7561 }, { "epoch": 0.6771056018803514, "grad_norm": 1.1994711857198965, "learning_rate": 4.98828320628274e-06, "loss": 0.8362, "step": 7562 }, { "epoch": 0.6771951424254296, "grad_norm": 0.9641376951626224, "learning_rate": 4.985773658486988e-06, "loss": 0.832, "step": 7563 }, { "epoch": 0.6772846829705076, "grad_norm": 0.9431638330572186, "learning_rate": 4.983264532471614e-06, "loss": 0.8303, "step": 7564 }, { "epoch": 0.6773742235155856, "grad_norm": 0.9746218243758481, "learning_rate": 4.980755828447681e-06, "loss": 0.8495, "step": 7565 }, { "epoch": 0.6774637640606637, "grad_norm": 0.9471577352587641, "learning_rate": 4.97824754662621e-06, "loss": 0.7492, "step": 7566 }, { "epoch": 0.6775533046057418, "grad_norm": 1.1106370398062748, "learning_rate": 4.975739687218188e-06, "loss": 0.8122, "step": 7567 }, { "epoch": 0.6776428451508199, "grad_norm": 1.2833976317718927, "learning_rate": 4.973232250434579e-06, "loss": 0.8209, "step": 7568 }, { "epoch": 0.6777323856958979, "grad_norm": 1.2006940702092859, "learning_rate": 4.970725236486288e-06, "loss": 0.7849, "step": 7569 }, { "epoch": 0.677821926240976, "grad_norm": 1.0565416637083969, "learning_rate": 4.968218645584202e-06, "loss": 0.806, "step": 7570 }, { "epoch": 0.6779114667860541, "grad_norm": 0.8591469239214488, "learning_rate": 4.9657124779391656e-06, "loss": 0.8174, "step": 7571 }, { "epoch": 0.6780010073311321, "grad_norm": 1.0623254200749057, "learning_rate": 4.963206733761991e-06, "loss": 0.8163, "step": 7572 }, { "epoch": 0.6780905478762101, "grad_norm": 0.8791705025294435, "learning_rate": 4.9607014132634515e-06, "loss": 0.6918, "step": 7573 }, { "epoch": 0.6781800884212883, "grad_norm": 0.926894440955406, "learning_rate": 4.958196516654288e-06, "loss": 0.8165, "step": 7574 }, { "epoch": 0.6782696289663663, "grad_norm": 1.0592807743841275, "learning_rate": 4.955692044145203e-06, "loss": 0.7589, "step": 7575 }, { "epoch": 0.6783591695114444, "grad_norm": 0.9047740776223159, "learning_rate": 4.953187995946867e-06, "loss": 0.7469, "step": 7576 }, { "epoch": 0.6784487100565225, "grad_norm": 0.9388423372826439, "learning_rate": 4.950684372269909e-06, "loss": 0.7952, "step": 7577 }, { "epoch": 0.6785382506016006, "grad_norm": 0.9816996176824034, "learning_rate": 4.948181173324928e-06, "loss": 0.8015, "step": 7578 }, { "epoch": 0.6786277911466786, "grad_norm": 0.9624851839084834, "learning_rate": 4.945678399322484e-06, "loss": 0.8457, "step": 7579 }, { "epoch": 0.6787173316917566, "grad_norm": 1.0522278971321075, "learning_rate": 4.943176050473104e-06, "loss": 0.8185, "step": 7580 }, { "epoch": 0.6788068722368348, "grad_norm": 1.0076229725927537, "learning_rate": 4.940674126987275e-06, "loss": 0.7723, "step": 7581 }, { "epoch": 0.6788964127819128, "grad_norm": 0.9804275497429261, "learning_rate": 4.938172629075451e-06, "loss": 0.7914, "step": 7582 }, { "epoch": 0.6789859533269909, "grad_norm": 0.9147226724079263, "learning_rate": 4.9356715569480515e-06, "loss": 0.792, "step": 7583 }, { "epoch": 0.6790754938720689, "grad_norm": 0.9089921563894359, "learning_rate": 4.933170910815457e-06, "loss": 0.7909, "step": 7584 }, { "epoch": 0.679165034417147, "grad_norm": 1.0983847021340474, "learning_rate": 4.930670690888014e-06, "loss": 0.8087, "step": 7585 }, { "epoch": 0.6792545749622251, "grad_norm": 0.9549856195768626, "learning_rate": 4.928170897376034e-06, "loss": 0.8119, "step": 7586 }, { "epoch": 0.6793441155073031, "grad_norm": 1.1262802207785858, "learning_rate": 4.925671530489789e-06, "loss": 0.8091, "step": 7587 }, { "epoch": 0.6794336560523813, "grad_norm": 1.11050910004489, "learning_rate": 4.92317259043952e-06, "loss": 0.8128, "step": 7588 }, { "epoch": 0.6795231965974593, "grad_norm": 0.9483210252979469, "learning_rate": 4.920674077435429e-06, "loss": 0.8119, "step": 7589 }, { "epoch": 0.6796127371425373, "grad_norm": 0.9822993840827573, "learning_rate": 4.918175991687681e-06, "loss": 0.8002, "step": 7590 }, { "epoch": 0.6797022776876154, "grad_norm": 1.0490399036840128, "learning_rate": 4.915678333406411e-06, "loss": 0.791, "step": 7591 }, { "epoch": 0.6797918182326935, "grad_norm": 1.130388825297679, "learning_rate": 4.913181102801709e-06, "loss": 0.7744, "step": 7592 }, { "epoch": 0.6798813587777716, "grad_norm": 1.0292039283944732, "learning_rate": 4.910684300083638e-06, "loss": 0.8222, "step": 7593 }, { "epoch": 0.6799708993228496, "grad_norm": 1.2080676388083944, "learning_rate": 4.908187925462217e-06, "loss": 0.7731, "step": 7594 }, { "epoch": 0.6800604398679277, "grad_norm": 1.0555071494191735, "learning_rate": 4.9056919791474415e-06, "loss": 0.853, "step": 7595 }, { "epoch": 0.6801499804130058, "grad_norm": 0.9471430476366979, "learning_rate": 4.90319646134925e-06, "loss": 0.8401, "step": 7596 }, { "epoch": 0.6802395209580838, "grad_norm": 1.0824350837631747, "learning_rate": 4.900701372277561e-06, "loss": 0.8634, "step": 7597 }, { "epoch": 0.6803290615031619, "grad_norm": 0.8713173419290678, "learning_rate": 4.898206712142258e-06, "loss": 0.8134, "step": 7598 }, { "epoch": 0.68041860204824, "grad_norm": 0.935352956437394, "learning_rate": 4.895712481153181e-06, "loss": 0.8112, "step": 7599 }, { "epoch": 0.680508142593318, "grad_norm": 0.9719918837772018, "learning_rate": 4.893218679520137e-06, "loss": 0.7741, "step": 7600 }, { "epoch": 0.6805976831383961, "grad_norm": 0.9990795148284224, "learning_rate": 4.8907253074529e-06, "loss": 0.8095, "step": 7601 }, { "epoch": 0.6806872236834741, "grad_norm": 1.0801695996720977, "learning_rate": 4.888232365161198e-06, "loss": 0.8335, "step": 7602 }, { "epoch": 0.6807767642285523, "grad_norm": 0.9341204358069326, "learning_rate": 4.885739852854726e-06, "loss": 0.8284, "step": 7603 }, { "epoch": 0.6808663047736303, "grad_norm": 1.2744498853275787, "learning_rate": 4.883247770743157e-06, "loss": 0.8952, "step": 7604 }, { "epoch": 0.6809558453187083, "grad_norm": 0.915741769965082, "learning_rate": 4.88075611903611e-06, "loss": 0.8081, "step": 7605 }, { "epoch": 0.6810453858637865, "grad_norm": 0.9328446516789622, "learning_rate": 4.878264897943181e-06, "loss": 0.8362, "step": 7606 }, { "epoch": 0.6811349264088645, "grad_norm": 0.9173323000262648, "learning_rate": 4.875774107673915e-06, "loss": 0.7871, "step": 7607 }, { "epoch": 0.6812244669539426, "grad_norm": 0.8990433187101706, "learning_rate": 4.8732837484378325e-06, "loss": 0.7947, "step": 7608 }, { "epoch": 0.6813140074990206, "grad_norm": 0.9484370283842838, "learning_rate": 4.87079382044441e-06, "loss": 0.8109, "step": 7609 }, { "epoch": 0.6814035480440988, "grad_norm": 0.95573197457004, "learning_rate": 4.868304323903102e-06, "loss": 0.8214, "step": 7610 }, { "epoch": 0.6814930885891768, "grad_norm": 0.9646586834778087, "learning_rate": 4.865815259023311e-06, "loss": 0.8678, "step": 7611 }, { "epoch": 0.6815826291342548, "grad_norm": 0.9859063125363904, "learning_rate": 4.863326626014413e-06, "loss": 0.8347, "step": 7612 }, { "epoch": 0.681672169679333, "grad_norm": 0.9365612863709317, "learning_rate": 4.860838425085737e-06, "loss": 0.8505, "step": 7613 }, { "epoch": 0.681761710224411, "grad_norm": 1.0297270398875087, "learning_rate": 4.858350656446585e-06, "loss": 0.8118, "step": 7614 }, { "epoch": 0.681851250769489, "grad_norm": 1.1197022641287375, "learning_rate": 4.855863320306218e-06, "loss": 0.8261, "step": 7615 }, { "epoch": 0.6819407913145671, "grad_norm": 1.0158166462829825, "learning_rate": 4.853376416873867e-06, "loss": 0.8152, "step": 7616 }, { "epoch": 0.6820303318596452, "grad_norm": 0.9716624722409757, "learning_rate": 4.850889946358727e-06, "loss": 0.8037, "step": 7617 }, { "epoch": 0.6821198724047233, "grad_norm": 0.9569106891818252, "learning_rate": 4.848403908969939e-06, "loss": 0.788, "step": 7618 }, { "epoch": 0.6822094129498013, "grad_norm": 1.0607843232233458, "learning_rate": 4.845918304916628e-06, "loss": 0.828, "step": 7619 }, { "epoch": 0.6822989534948793, "grad_norm": 0.9026834339520259, "learning_rate": 4.843433134407874e-06, "loss": 0.7685, "step": 7620 }, { "epoch": 0.6823884940399575, "grad_norm": 0.9285624327160942, "learning_rate": 4.840948397652716e-06, "loss": 0.7488, "step": 7621 }, { "epoch": 0.6824780345850355, "grad_norm": 0.8952129327182544, "learning_rate": 4.838464094860175e-06, "loss": 0.8249, "step": 7622 }, { "epoch": 0.6825675751301136, "grad_norm": 0.9354090487094243, "learning_rate": 4.835980226239211e-06, "loss": 0.8642, "step": 7623 }, { "epoch": 0.6826571156751917, "grad_norm": 0.9063402268475304, "learning_rate": 4.833496791998762e-06, "loss": 0.7941, "step": 7624 }, { "epoch": 0.6827466562202698, "grad_norm": 1.054187429198294, "learning_rate": 4.831013792347727e-06, "loss": 0.8467, "step": 7625 }, { "epoch": 0.6828361967653478, "grad_norm": 1.0246144621520605, "learning_rate": 4.828531227494968e-06, "loss": 0.8106, "step": 7626 }, { "epoch": 0.6829257373104258, "grad_norm": 1.0523314318923513, "learning_rate": 4.826049097649309e-06, "loss": 0.7716, "step": 7627 }, { "epoch": 0.683015277855504, "grad_norm": 0.9751922511718797, "learning_rate": 4.82356740301954e-06, "loss": 0.8885, "step": 7628 }, { "epoch": 0.683104818400582, "grad_norm": 0.9465294192145584, "learning_rate": 4.8210861438144126e-06, "loss": 0.8249, "step": 7629 }, { "epoch": 0.68319435894566, "grad_norm": 0.9712886638650309, "learning_rate": 4.818605320242642e-06, "loss": 0.8189, "step": 7630 }, { "epoch": 0.6832838994907382, "grad_norm": 1.0970167104956374, "learning_rate": 4.816124932512908e-06, "loss": 0.8201, "step": 7631 }, { "epoch": 0.6833734400358162, "grad_norm": 1.0249335430441642, "learning_rate": 4.813644980833851e-06, "loss": 0.7223, "step": 7632 }, { "epoch": 0.6834629805808943, "grad_norm": 0.9901811056507533, "learning_rate": 4.811165465414077e-06, "loss": 0.8557, "step": 7633 }, { "epoch": 0.6835525211259723, "grad_norm": 0.961801284809666, "learning_rate": 4.808686386462156e-06, "loss": 0.8099, "step": 7634 }, { "epoch": 0.6836420616710505, "grad_norm": 1.2401348633004983, "learning_rate": 4.806207744186619e-06, "loss": 0.7904, "step": 7635 }, { "epoch": 0.6837316022161285, "grad_norm": 0.845660549421603, "learning_rate": 4.803729538795962e-06, "loss": 0.7643, "step": 7636 }, { "epoch": 0.6838211427612065, "grad_norm": 0.9830017878832183, "learning_rate": 4.801251770498643e-06, "loss": 0.7951, "step": 7637 }, { "epoch": 0.6839106833062846, "grad_norm": 0.8870034294269233, "learning_rate": 4.798774439503083e-06, "loss": 0.7595, "step": 7638 }, { "epoch": 0.6840002238513627, "grad_norm": 0.9963640308409181, "learning_rate": 4.796297546017669e-06, "loss": 0.7931, "step": 7639 }, { "epoch": 0.6840897643964408, "grad_norm": 1.0535424027199087, "learning_rate": 4.793821090250748e-06, "loss": 0.8176, "step": 7640 }, { "epoch": 0.6841793049415188, "grad_norm": 1.039270292588724, "learning_rate": 4.79134507241063e-06, "loss": 0.7515, "step": 7641 }, { "epoch": 0.6842688454865969, "grad_norm": 1.006781845763195, "learning_rate": 4.788869492705593e-06, "loss": 0.8068, "step": 7642 }, { "epoch": 0.684358386031675, "grad_norm": 1.0622641299399038, "learning_rate": 4.786394351343872e-06, "loss": 0.7972, "step": 7643 }, { "epoch": 0.684447926576753, "grad_norm": 0.9646098817166341, "learning_rate": 4.783919648533668e-06, "loss": 0.8354, "step": 7644 }, { "epoch": 0.6845374671218311, "grad_norm": 0.9250212951935629, "learning_rate": 4.781445384483145e-06, "loss": 0.8548, "step": 7645 }, { "epoch": 0.6846270076669092, "grad_norm": 1.155151162348634, "learning_rate": 4.7789715594004325e-06, "loss": 0.7811, "step": 7646 }, { "epoch": 0.6847165482119872, "grad_norm": 1.592338376503375, "learning_rate": 4.776498173493618e-06, "loss": 0.846, "step": 7647 }, { "epoch": 0.6848060887570653, "grad_norm": 0.9848437779403034, "learning_rate": 4.774025226970754e-06, "loss": 0.7672, "step": 7648 }, { "epoch": 0.6848956293021434, "grad_norm": 0.9969095880015193, "learning_rate": 4.7715527200398595e-06, "loss": 0.8169, "step": 7649 }, { "epoch": 0.6849851698472215, "grad_norm": 0.916716849341787, "learning_rate": 4.769080652908915e-06, "loss": 0.852, "step": 7650 }, { "epoch": 0.6850747103922995, "grad_norm": 0.9379705083690661, "learning_rate": 4.766609025785853e-06, "loss": 0.8082, "step": 7651 }, { "epoch": 0.6851642509373775, "grad_norm": 0.9536911403075226, "learning_rate": 4.76413783887859e-06, "loss": 0.768, "step": 7652 }, { "epoch": 0.6852537914824557, "grad_norm": 0.9892955448658866, "learning_rate": 4.761667092394989e-06, "loss": 0.7919, "step": 7653 }, { "epoch": 0.6853433320275337, "grad_norm": 1.0063083527315473, "learning_rate": 4.759196786542882e-06, "loss": 0.7875, "step": 7654 }, { "epoch": 0.6854328725726118, "grad_norm": 0.9789605535837096, "learning_rate": 4.756726921530067e-06, "loss": 0.8304, "step": 7655 }, { "epoch": 0.6855224131176898, "grad_norm": 1.2433125894198715, "learning_rate": 4.754257497564293e-06, "loss": 0.8564, "step": 7656 }, { "epoch": 0.685611953662768, "grad_norm": 0.9290662614153107, "learning_rate": 4.75178851485328e-06, "loss": 0.8424, "step": 7657 }, { "epoch": 0.685701494207846, "grad_norm": 0.9566815537682959, "learning_rate": 4.7493199736047205e-06, "loss": 0.8465, "step": 7658 }, { "epoch": 0.685791034752924, "grad_norm": 0.9071667309646064, "learning_rate": 4.746851874026253e-06, "loss": 0.7891, "step": 7659 }, { "epoch": 0.6858805752980022, "grad_norm": 0.9806487861828885, "learning_rate": 4.744384216325487e-06, "loss": 0.7876, "step": 7660 }, { "epoch": 0.6859701158430802, "grad_norm": 1.0407761021696793, "learning_rate": 4.7419170007099975e-06, "loss": 0.7453, "step": 7661 }, { "epoch": 0.6860596563881582, "grad_norm": 0.9562445846799578, "learning_rate": 4.7394502273873135e-06, "loss": 0.8163, "step": 7662 }, { "epoch": 0.6861491969332363, "grad_norm": 1.0444108755044383, "learning_rate": 4.736983896564928e-06, "loss": 0.7591, "step": 7663 }, { "epoch": 0.6862387374783144, "grad_norm": 0.9683041362144316, "learning_rate": 4.734518008450312e-06, "loss": 0.8287, "step": 7664 }, { "epoch": 0.6863282780233925, "grad_norm": 1.098163438299449, "learning_rate": 4.732052563250882e-06, "loss": 0.7516, "step": 7665 }, { "epoch": 0.6864178185684705, "grad_norm": 0.8173488978592476, "learning_rate": 4.729587561174027e-06, "loss": 0.7569, "step": 7666 }, { "epoch": 0.6865073591135487, "grad_norm": 0.9584898496040919, "learning_rate": 4.727123002427089e-06, "loss": 0.7873, "step": 7667 }, { "epoch": 0.6865968996586267, "grad_norm": 0.926146504490765, "learning_rate": 4.72465888721738e-06, "loss": 0.7789, "step": 7668 }, { "epoch": 0.6866864402037047, "grad_norm": 1.2639116589026649, "learning_rate": 4.722195215752171e-06, "loss": 0.8264, "step": 7669 }, { "epoch": 0.6867759807487828, "grad_norm": 0.9533545296032012, "learning_rate": 4.719731988238706e-06, "loss": 0.801, "step": 7670 }, { "epoch": 0.6868655212938609, "grad_norm": 1.040869138732014, "learning_rate": 4.717269204884178e-06, "loss": 0.7383, "step": 7671 }, { "epoch": 0.686955061838939, "grad_norm": 1.0037610729874973, "learning_rate": 4.714806865895756e-06, "loss": 0.8485, "step": 7672 }, { "epoch": 0.687044602384017, "grad_norm": 0.9178281339532298, "learning_rate": 4.712344971480551e-06, "loss": 0.8039, "step": 7673 }, { "epoch": 0.687134142929095, "grad_norm": 0.9039723582972342, "learning_rate": 4.7098835218456585e-06, "loss": 0.8248, "step": 7674 }, { "epoch": 0.6872236834741732, "grad_norm": 1.0837590541970161, "learning_rate": 4.707422517198119e-06, "loss": 0.8412, "step": 7675 }, { "epoch": 0.6873132240192512, "grad_norm": 1.0356362102668961, "learning_rate": 4.704961957744956e-06, "loss": 0.7524, "step": 7676 }, { "epoch": 0.6874027645643292, "grad_norm": 0.9750985610759151, "learning_rate": 4.702501843693141e-06, "loss": 0.7638, "step": 7677 }, { "epoch": 0.6874923051094074, "grad_norm": 0.9644398739455586, "learning_rate": 4.7000421752496055e-06, "loss": 0.8151, "step": 7678 }, { "epoch": 0.6875818456544854, "grad_norm": 1.1516897239170134, "learning_rate": 4.69758295262125e-06, "loss": 0.7597, "step": 7679 }, { "epoch": 0.6876713861995635, "grad_norm": 0.8791866827091167, "learning_rate": 4.695124176014938e-06, "loss": 0.8384, "step": 7680 }, { "epoch": 0.6877609267446415, "grad_norm": 0.9294702399020952, "learning_rate": 4.692665845637493e-06, "loss": 0.7844, "step": 7681 }, { "epoch": 0.6878504672897197, "grad_norm": 0.965780943257856, "learning_rate": 4.690207961695702e-06, "loss": 0.8198, "step": 7682 }, { "epoch": 0.6879400078347977, "grad_norm": 0.972430293034956, "learning_rate": 4.687750524396314e-06, "loss": 0.864, "step": 7683 }, { "epoch": 0.6880295483798757, "grad_norm": 0.8944555470932781, "learning_rate": 4.685293533946042e-06, "loss": 0.7956, "step": 7684 }, { "epoch": 0.6881190889249539, "grad_norm": 0.8972459279295408, "learning_rate": 4.6828369905515565e-06, "loss": 0.778, "step": 7685 }, { "epoch": 0.6882086294700319, "grad_norm": 0.9209436696056206, "learning_rate": 4.680380894419499e-06, "loss": 0.8245, "step": 7686 }, { "epoch": 0.68829817001511, "grad_norm": 0.9071930465407908, "learning_rate": 4.677925245756464e-06, "loss": 0.8328, "step": 7687 }, { "epoch": 0.688387710560188, "grad_norm": 0.9713506291746641, "learning_rate": 4.675470044769015e-06, "loss": 0.8059, "step": 7688 }, { "epoch": 0.6884772511052661, "grad_norm": 1.0322890781315812, "learning_rate": 4.673015291663674e-06, "loss": 0.8349, "step": 7689 }, { "epoch": 0.6885667916503442, "grad_norm": 0.885311818496522, "learning_rate": 4.6705609866469286e-06, "loss": 0.8, "step": 7690 }, { "epoch": 0.6886563321954222, "grad_norm": 0.8888731075690266, "learning_rate": 4.668107129925225e-06, "loss": 0.7692, "step": 7691 }, { "epoch": 0.6887458727405003, "grad_norm": 0.8856236156532676, "learning_rate": 4.665653721704975e-06, "loss": 0.823, "step": 7692 }, { "epoch": 0.6888354132855784, "grad_norm": 0.8516781050595209, "learning_rate": 4.6632007621925514e-06, "loss": 0.8173, "step": 7693 }, { "epoch": 0.6889249538306564, "grad_norm": 0.8760734530128444, "learning_rate": 4.660748251594288e-06, "loss": 0.7824, "step": 7694 }, { "epoch": 0.6890144943757345, "grad_norm": 0.9777360637584239, "learning_rate": 4.658296190116482e-06, "loss": 0.7815, "step": 7695 }, { "epoch": 0.6891040349208126, "grad_norm": 0.9474816684833404, "learning_rate": 4.6558445779653946e-06, "loss": 0.7974, "step": 7696 }, { "epoch": 0.6891935754658907, "grad_norm": 1.0740336970510092, "learning_rate": 4.653393415347246e-06, "loss": 0.8154, "step": 7697 }, { "epoch": 0.6892831160109687, "grad_norm": 0.8660586746066604, "learning_rate": 4.650942702468219e-06, "loss": 0.7534, "step": 7698 }, { "epoch": 0.6893726565560467, "grad_norm": 0.8909767852453753, "learning_rate": 4.648492439534463e-06, "loss": 0.7966, "step": 7699 }, { "epoch": 0.6894621971011249, "grad_norm": 0.9220333075557067, "learning_rate": 4.646042626752083e-06, "loss": 0.7921, "step": 7700 }, { "epoch": 0.6895517376462029, "grad_norm": 0.9098350959629002, "learning_rate": 4.6435932643271496e-06, "loss": 0.8346, "step": 7701 }, { "epoch": 0.689641278191281, "grad_norm": 0.8965014505742412, "learning_rate": 4.641144352465697e-06, "loss": 0.8193, "step": 7702 }, { "epoch": 0.6897308187363591, "grad_norm": 0.9234633268070609, "learning_rate": 4.638695891373718e-06, "loss": 0.856, "step": 7703 }, { "epoch": 0.6898203592814371, "grad_norm": 0.9374769186639121, "learning_rate": 4.6362478812571746e-06, "loss": 0.7961, "step": 7704 }, { "epoch": 0.6899098998265152, "grad_norm": 1.0058684765401815, "learning_rate": 4.633800322321972e-06, "loss": 0.7304, "step": 7705 }, { "epoch": 0.6899994403715932, "grad_norm": 0.9423020288073447, "learning_rate": 4.631353214774003e-06, "loss": 0.8515, "step": 7706 }, { "epoch": 0.6900889809166714, "grad_norm": 0.9047967924996625, "learning_rate": 4.628906558819106e-06, "loss": 0.7752, "step": 7707 }, { "epoch": 0.6901785214617494, "grad_norm": 0.9308127260698004, "learning_rate": 4.626460354663088e-06, "loss": 0.8291, "step": 7708 }, { "epoch": 0.6902680620068274, "grad_norm": 0.9687661406505695, "learning_rate": 4.624014602511714e-06, "loss": 0.8376, "step": 7709 }, { "epoch": 0.6903576025519055, "grad_norm": 0.9622354488294877, "learning_rate": 4.621569302570715e-06, "loss": 0.8254, "step": 7710 }, { "epoch": 0.6904471430969836, "grad_norm": 1.040135557361331, "learning_rate": 4.6191244550457735e-06, "loss": 0.7294, "step": 7711 }, { "epoch": 0.6905366836420617, "grad_norm": 1.0002495013137138, "learning_rate": 4.616680060142552e-06, "loss": 0.803, "step": 7712 }, { "epoch": 0.6906262241871397, "grad_norm": 0.9219391263097959, "learning_rate": 4.614236118066662e-06, "loss": 0.8666, "step": 7713 }, { "epoch": 0.6907157647322179, "grad_norm": 1.0065588862821842, "learning_rate": 4.611792629023677e-06, "loss": 0.8229, "step": 7714 }, { "epoch": 0.6908053052772959, "grad_norm": 1.002724892031868, "learning_rate": 4.6093495932191425e-06, "loss": 0.8133, "step": 7715 }, { "epoch": 0.6908948458223739, "grad_norm": 0.9097203547323798, "learning_rate": 4.60690701085855e-06, "loss": 0.8113, "step": 7716 }, { "epoch": 0.690984386367452, "grad_norm": 0.9833261082368646, "learning_rate": 4.604464882147362e-06, "loss": 0.7726, "step": 7717 }, { "epoch": 0.6910739269125301, "grad_norm": 0.9135802813590834, "learning_rate": 4.602023207291008e-06, "loss": 0.827, "step": 7718 }, { "epoch": 0.6911634674576081, "grad_norm": 0.9523278917149808, "learning_rate": 4.599581986494872e-06, "loss": 0.8425, "step": 7719 }, { "epoch": 0.6912530080026862, "grad_norm": 1.0003514879124802, "learning_rate": 4.5971412199643005e-06, "loss": 0.7713, "step": 7720 }, { "epoch": 0.6913425485477643, "grad_norm": 1.0838163936618297, "learning_rate": 4.594700907904608e-06, "loss": 0.8341, "step": 7721 }, { "epoch": 0.6914320890928424, "grad_norm": 1.015604191759641, "learning_rate": 4.5922610505210566e-06, "loss": 0.8376, "step": 7722 }, { "epoch": 0.6915216296379204, "grad_norm": 0.8546784471872672, "learning_rate": 4.58982164801888e-06, "loss": 0.805, "step": 7723 }, { "epoch": 0.6916111701829984, "grad_norm": 1.0385395725943547, "learning_rate": 4.587382700603279e-06, "loss": 0.838, "step": 7724 }, { "epoch": 0.6917007107280766, "grad_norm": 0.9078574954047972, "learning_rate": 4.584944208479407e-06, "loss": 0.8336, "step": 7725 }, { "epoch": 0.6917902512731546, "grad_norm": 1.2275033869971155, "learning_rate": 4.582506171852386e-06, "loss": 0.7917, "step": 7726 }, { "epoch": 0.6918797918182327, "grad_norm": 0.9339527676210228, "learning_rate": 4.58006859092729e-06, "loss": 0.8336, "step": 7727 }, { "epoch": 0.6919693323633107, "grad_norm": 0.938958883099842, "learning_rate": 4.57763146590916e-06, "loss": 0.8114, "step": 7728 }, { "epoch": 0.6920588729083889, "grad_norm": 0.921114466993026, "learning_rate": 4.575194797002999e-06, "loss": 0.823, "step": 7729 }, { "epoch": 0.6921484134534669, "grad_norm": 0.8848028670625154, "learning_rate": 4.572758584413777e-06, "loss": 0.8281, "step": 7730 }, { "epoch": 0.6922379539985449, "grad_norm": 0.9286944268121133, "learning_rate": 4.5703228283464165e-06, "loss": 0.7904, "step": 7731 }, { "epoch": 0.6923274945436231, "grad_norm": 0.952864911808103, "learning_rate": 4.56788752900581e-06, "loss": 0.8471, "step": 7732 }, { "epoch": 0.6924170350887011, "grad_norm": 0.9699577288490665, "learning_rate": 4.565452686596799e-06, "loss": 0.7812, "step": 7733 }, { "epoch": 0.6925065756337792, "grad_norm": 0.911285786162714, "learning_rate": 4.563018301324199e-06, "loss": 0.8117, "step": 7734 }, { "epoch": 0.6925961161788572, "grad_norm": 1.0333532386576183, "learning_rate": 4.560584373392783e-06, "loss": 0.8293, "step": 7735 }, { "epoch": 0.6926856567239353, "grad_norm": 0.9697470828221156, "learning_rate": 4.558150903007278e-06, "loss": 0.8098, "step": 7736 }, { "epoch": 0.6927751972690134, "grad_norm": 0.9779244131744482, "learning_rate": 4.555717890372394e-06, "loss": 0.8948, "step": 7737 }, { "epoch": 0.6928647378140914, "grad_norm": 0.9590323306175039, "learning_rate": 4.553285335692776e-06, "loss": 0.832, "step": 7738 }, { "epoch": 0.6929542783591696, "grad_norm": 0.9484394434483989, "learning_rate": 4.550853239173047e-06, "loss": 0.7909, "step": 7739 }, { "epoch": 0.6930438189042476, "grad_norm": 0.9456791005515883, "learning_rate": 4.548421601017786e-06, "loss": 0.802, "step": 7740 }, { "epoch": 0.6931333594493256, "grad_norm": 1.0010515938153786, "learning_rate": 4.545990421431535e-06, "loss": 0.7364, "step": 7741 }, { "epoch": 0.6932228999944037, "grad_norm": 1.0210524712540772, "learning_rate": 4.543559700618792e-06, "loss": 0.7757, "step": 7742 }, { "epoch": 0.6933124405394818, "grad_norm": 1.0273529023242547, "learning_rate": 4.541129438784036e-06, "loss": 0.8012, "step": 7743 }, { "epoch": 0.6934019810845599, "grad_norm": 0.9829214825923223, "learning_rate": 4.538699636131676e-06, "loss": 0.8531, "step": 7744 }, { "epoch": 0.6934915216296379, "grad_norm": 1.0278999485409048, "learning_rate": 4.536270292866108e-06, "loss": 0.7943, "step": 7745 }, { "epoch": 0.6935810621747159, "grad_norm": 0.9454099504143345, "learning_rate": 4.533841409191677e-06, "loss": 0.8208, "step": 7746 }, { "epoch": 0.6936706027197941, "grad_norm": 0.9145060065432331, "learning_rate": 4.531412985312694e-06, "loss": 0.7971, "step": 7747 }, { "epoch": 0.6937601432648721, "grad_norm": 0.9528201805784614, "learning_rate": 4.528985021433431e-06, "loss": 0.8258, "step": 7748 }, { "epoch": 0.6938496838099502, "grad_norm": 1.0701720010775788, "learning_rate": 4.526557517758119e-06, "loss": 0.7632, "step": 7749 }, { "epoch": 0.6939392243550283, "grad_norm": 0.9576936634197529, "learning_rate": 4.524130474490953e-06, "loss": 0.8053, "step": 7750 }, { "epoch": 0.6940287649001063, "grad_norm": 1.082811699820177, "learning_rate": 4.521703891836087e-06, "loss": 0.8491, "step": 7751 }, { "epoch": 0.6941183054451844, "grad_norm": 0.9750481696007716, "learning_rate": 4.519277769997637e-06, "loss": 0.7557, "step": 7752 }, { "epoch": 0.6942078459902624, "grad_norm": 0.9152437304075808, "learning_rate": 4.516852109179682e-06, "loss": 0.8848, "step": 7753 }, { "epoch": 0.6942973865353406, "grad_norm": 0.9789031284351487, "learning_rate": 4.514426909586258e-06, "loss": 0.7547, "step": 7754 }, { "epoch": 0.6943869270804186, "grad_norm": 1.197333642551522, "learning_rate": 4.512002171421368e-06, "loss": 0.8332, "step": 7755 }, { "epoch": 0.6944764676254966, "grad_norm": 0.884446932583186, "learning_rate": 4.50957789488897e-06, "loss": 0.7517, "step": 7756 }, { "epoch": 0.6945660081705748, "grad_norm": 0.8833605930695296, "learning_rate": 4.507154080192989e-06, "loss": 0.8384, "step": 7757 }, { "epoch": 0.6946555487156528, "grad_norm": 1.0608193854329326, "learning_rate": 4.504730727537307e-06, "loss": 0.8073, "step": 7758 }, { "epoch": 0.6947450892607309, "grad_norm": 0.9729726466497831, "learning_rate": 4.502307837125769e-06, "loss": 0.7803, "step": 7759 }, { "epoch": 0.6948346298058089, "grad_norm": 1.023350043853379, "learning_rate": 4.49988540916218e-06, "loss": 0.821, "step": 7760 }, { "epoch": 0.694924170350887, "grad_norm": 0.86515487605003, "learning_rate": 4.497463443850307e-06, "loss": 0.8651, "step": 7761 }, { "epoch": 0.6950137108959651, "grad_norm": 0.9164565222107208, "learning_rate": 4.4950419413938785e-06, "loss": 0.8167, "step": 7762 }, { "epoch": 0.6951032514410431, "grad_norm": 0.9190754724173374, "learning_rate": 4.492620901996583e-06, "loss": 0.818, "step": 7763 }, { "epoch": 0.6951927919861212, "grad_norm": 0.9907111111052157, "learning_rate": 4.490200325862073e-06, "loss": 0.8471, "step": 7764 }, { "epoch": 0.6952823325311993, "grad_norm": 0.9532964344673837, "learning_rate": 4.48778021319395e-06, "loss": 0.7751, "step": 7765 }, { "epoch": 0.6953718730762773, "grad_norm": 0.9225948104435617, "learning_rate": 4.485360564195797e-06, "loss": 0.8216, "step": 7766 }, { "epoch": 0.6954614136213554, "grad_norm": 1.0067853398920983, "learning_rate": 4.482941379071142e-06, "loss": 0.819, "step": 7767 }, { "epoch": 0.6955509541664335, "grad_norm": 0.9196174655914519, "learning_rate": 4.480522658023479e-06, "loss": 0.7719, "step": 7768 }, { "epoch": 0.6956404947115116, "grad_norm": 0.9107983728077748, "learning_rate": 4.478104401256266e-06, "loss": 0.8021, "step": 7769 }, { "epoch": 0.6957300352565896, "grad_norm": 0.9294936855310101, "learning_rate": 4.475686608972918e-06, "loss": 0.7564, "step": 7770 }, { "epoch": 0.6958195758016676, "grad_norm": 0.8450624277835593, "learning_rate": 4.473269281376804e-06, "loss": 0.7805, "step": 7771 }, { "epoch": 0.6959091163467458, "grad_norm": 0.9795022319205948, "learning_rate": 4.470852418671271e-06, "loss": 0.8042, "step": 7772 }, { "epoch": 0.6959986568918238, "grad_norm": 0.9866579823969005, "learning_rate": 4.4684360210596154e-06, "loss": 0.815, "step": 7773 }, { "epoch": 0.6960881974369019, "grad_norm": 1.227092642163139, "learning_rate": 4.466020088745097e-06, "loss": 0.8008, "step": 7774 }, { "epoch": 0.69617773798198, "grad_norm": 0.9510083267097689, "learning_rate": 4.463604621930937e-06, "loss": 0.8126, "step": 7775 }, { "epoch": 0.696267278527058, "grad_norm": 0.9862930043956016, "learning_rate": 4.461189620820312e-06, "loss": 0.8338, "step": 7776 }, { "epoch": 0.6963568190721361, "grad_norm": 0.9418095761541713, "learning_rate": 4.458775085616363e-06, "loss": 0.805, "step": 7777 }, { "epoch": 0.6964463596172141, "grad_norm": 0.9525776491121389, "learning_rate": 4.4563610165221995e-06, "loss": 0.8112, "step": 7778 }, { "epoch": 0.6965359001622923, "grad_norm": 0.8948369026606042, "learning_rate": 4.4539474137408825e-06, "loss": 0.7846, "step": 7779 }, { "epoch": 0.6966254407073703, "grad_norm": 0.921271952153349, "learning_rate": 4.451534277475436e-06, "loss": 0.7583, "step": 7780 }, { "epoch": 0.6967149812524484, "grad_norm": 0.9456674920470037, "learning_rate": 4.44912160792885e-06, "loss": 0.7987, "step": 7781 }, { "epoch": 0.6968045217975264, "grad_norm": 1.0370215105300973, "learning_rate": 4.446709405304061e-06, "loss": 0.8429, "step": 7782 }, { "epoch": 0.6968940623426045, "grad_norm": 0.9629235140407333, "learning_rate": 4.444297669803981e-06, "loss": 0.8453, "step": 7783 }, { "epoch": 0.6969836028876826, "grad_norm": 0.9475338653041179, "learning_rate": 4.441886401631472e-06, "loss": 0.8113, "step": 7784 }, { "epoch": 0.6970731434327606, "grad_norm": 0.9810510231460815, "learning_rate": 4.439475600989372e-06, "loss": 0.8382, "step": 7785 }, { "epoch": 0.6971626839778388, "grad_norm": 0.9593807466063948, "learning_rate": 4.437065268080466e-06, "loss": 0.783, "step": 7786 }, { "epoch": 0.6972522245229168, "grad_norm": 0.9210281043068721, "learning_rate": 4.434655403107499e-06, "loss": 0.7818, "step": 7787 }, { "epoch": 0.6973417650679948, "grad_norm": 0.9531472552897879, "learning_rate": 4.432246006273183e-06, "loss": 0.8135, "step": 7788 }, { "epoch": 0.6974313056130729, "grad_norm": 0.9709806118305161, "learning_rate": 4.42983707778019e-06, "loss": 0.8077, "step": 7789 }, { "epoch": 0.697520846158151, "grad_norm": 0.9314103307693076, "learning_rate": 4.427428617831146e-06, "loss": 0.7729, "step": 7790 }, { "epoch": 0.6976103867032291, "grad_norm": 1.0016380002825547, "learning_rate": 4.4250206266286535e-06, "loss": 0.7844, "step": 7791 }, { "epoch": 0.6976999272483071, "grad_norm": 0.9146578802186879, "learning_rate": 4.422613104375259e-06, "loss": 0.7897, "step": 7792 }, { "epoch": 0.6977894677933852, "grad_norm": 1.0265693910687193, "learning_rate": 4.4202060512734736e-06, "loss": 0.8051, "step": 7793 }, { "epoch": 0.6978790083384633, "grad_norm": 1.0721668945427831, "learning_rate": 4.417799467525772e-06, "loss": 0.8924, "step": 7794 }, { "epoch": 0.6979685488835413, "grad_norm": 0.9693514671026972, "learning_rate": 4.415393353334588e-06, "loss": 0.8024, "step": 7795 }, { "epoch": 0.6980580894286194, "grad_norm": 1.0051226006411156, "learning_rate": 4.4129877089023135e-06, "loss": 0.813, "step": 7796 }, { "epoch": 0.6981476299736975, "grad_norm": 0.9714005803199256, "learning_rate": 4.410582534431313e-06, "loss": 0.8357, "step": 7797 }, { "epoch": 0.6982371705187755, "grad_norm": 1.0392086715695477, "learning_rate": 4.408177830123892e-06, "loss": 0.8025, "step": 7798 }, { "epoch": 0.6983267110638536, "grad_norm": 0.9476414130880175, "learning_rate": 4.40577359618233e-06, "loss": 0.7997, "step": 7799 }, { "epoch": 0.6984162516089316, "grad_norm": 0.9522635099772635, "learning_rate": 4.403369832808862e-06, "loss": 0.7633, "step": 7800 }, { "epoch": 0.6985057921540098, "grad_norm": 0.9691524164500117, "learning_rate": 4.400966540205688e-06, "loss": 0.8148, "step": 7801 }, { "epoch": 0.6985953326990878, "grad_norm": 1.0282249072193876, "learning_rate": 4.398563718574959e-06, "loss": 0.7545, "step": 7802 }, { "epoch": 0.6986848732441658, "grad_norm": 0.9949166864329059, "learning_rate": 4.396161368118803e-06, "loss": 0.8307, "step": 7803 }, { "epoch": 0.698774413789244, "grad_norm": 0.927874432714334, "learning_rate": 4.393759489039288e-06, "loss": 0.8376, "step": 7804 }, { "epoch": 0.698863954334322, "grad_norm": 0.9156546636999212, "learning_rate": 4.391358081538456e-06, "loss": 0.8065, "step": 7805 }, { "epoch": 0.6989534948794001, "grad_norm": 0.9450259623191144, "learning_rate": 4.388957145818305e-06, "loss": 0.7827, "step": 7806 }, { "epoch": 0.6990430354244781, "grad_norm": 0.9623017302044576, "learning_rate": 4.386556682080794e-06, "loss": 0.8143, "step": 7807 }, { "epoch": 0.6991325759695562, "grad_norm": 0.9092367335316479, "learning_rate": 4.384156690527842e-06, "loss": 0.8282, "step": 7808 }, { "epoch": 0.6992221165146343, "grad_norm": 1.0272497689897835, "learning_rate": 4.38175717136133e-06, "loss": 0.7684, "step": 7809 }, { "epoch": 0.6993116570597123, "grad_norm": 1.1460948266469708, "learning_rate": 4.379358124783096e-06, "loss": 0.807, "step": 7810 }, { "epoch": 0.6994011976047905, "grad_norm": 1.0209818011587013, "learning_rate": 4.37695955099494e-06, "loss": 0.8201, "step": 7811 }, { "epoch": 0.6994907381498685, "grad_norm": 0.981735900252388, "learning_rate": 4.3745614501986234e-06, "loss": 0.8194, "step": 7812 }, { "epoch": 0.6995802786949465, "grad_norm": 0.9906248491297962, "learning_rate": 4.372163822595866e-06, "loss": 0.7833, "step": 7813 }, { "epoch": 0.6996698192400246, "grad_norm": 0.9287905432843976, "learning_rate": 4.3697666683883475e-06, "loss": 0.7676, "step": 7814 }, { "epoch": 0.6997593597851027, "grad_norm": 0.9438507823490722, "learning_rate": 4.367369987777711e-06, "loss": 0.7796, "step": 7815 }, { "epoch": 0.6998489003301808, "grad_norm": 0.8883555693470103, "learning_rate": 4.364973780965556e-06, "loss": 0.7951, "step": 7816 }, { "epoch": 0.6999384408752588, "grad_norm": 1.0572897438650468, "learning_rate": 4.362578048153442e-06, "loss": 0.8651, "step": 7817 }, { "epoch": 0.7000279814203368, "grad_norm": 0.8475297817693815, "learning_rate": 4.3601827895428926e-06, "loss": 0.8149, "step": 7818 }, { "epoch": 0.700117521965415, "grad_norm": 0.9092678173475608, "learning_rate": 4.357788005335389e-06, "loss": 0.8225, "step": 7819 }, { "epoch": 0.700207062510493, "grad_norm": 0.8893830545386018, "learning_rate": 4.355393695732371e-06, "loss": 0.814, "step": 7820 }, { "epoch": 0.7002966030555711, "grad_norm": 0.9032946005309451, "learning_rate": 4.352999860935243e-06, "loss": 0.8169, "step": 7821 }, { "epoch": 0.7003861436006492, "grad_norm": 0.9481847724172328, "learning_rate": 4.3506065011453645e-06, "loss": 0.8141, "step": 7822 }, { "epoch": 0.7004756841457273, "grad_norm": 0.9442256078249543, "learning_rate": 4.348213616564057e-06, "loss": 0.7861, "step": 7823 }, { "epoch": 0.7005652246908053, "grad_norm": 0.9324253264008724, "learning_rate": 4.345821207392605e-06, "loss": 0.7409, "step": 7824 }, { "epoch": 0.7006547652358833, "grad_norm": 1.197510646973898, "learning_rate": 4.343429273832242e-06, "loss": 0.8429, "step": 7825 }, { "epoch": 0.7007443057809615, "grad_norm": 0.9549727419731101, "learning_rate": 4.3410378160841785e-06, "loss": 0.7685, "step": 7826 }, { "epoch": 0.7008338463260395, "grad_norm": 0.8876965354083094, "learning_rate": 4.338646834349573e-06, "loss": 0.7565, "step": 7827 }, { "epoch": 0.7009233868711175, "grad_norm": 0.9820012132776125, "learning_rate": 4.336256328829547e-06, "loss": 0.8042, "step": 7828 }, { "epoch": 0.7010129274161957, "grad_norm": 1.026695065438196, "learning_rate": 4.333866299725182e-06, "loss": 0.8188, "step": 7829 }, { "epoch": 0.7011024679612737, "grad_norm": 0.9066142969266826, "learning_rate": 4.331476747237524e-06, "loss": 0.8242, "step": 7830 }, { "epoch": 0.7011920085063518, "grad_norm": 1.0006242470898197, "learning_rate": 4.3290876715675625e-06, "loss": 0.8117, "step": 7831 }, { "epoch": 0.7012815490514298, "grad_norm": 1.0263349500593006, "learning_rate": 4.326699072916269e-06, "loss": 0.8366, "step": 7832 }, { "epoch": 0.701371089596508, "grad_norm": 0.8933530940189127, "learning_rate": 4.324310951484563e-06, "loss": 0.8038, "step": 7833 }, { "epoch": 0.701460630141586, "grad_norm": 0.9064932606278285, "learning_rate": 4.321923307473324e-06, "loss": 0.7977, "step": 7834 }, { "epoch": 0.701550170686664, "grad_norm": 0.8927858256687038, "learning_rate": 4.319536141083397e-06, "loss": 0.7973, "step": 7835 }, { "epoch": 0.7016397112317421, "grad_norm": 0.9699247794056655, "learning_rate": 4.3171494525155745e-06, "loss": 0.8298, "step": 7836 }, { "epoch": 0.7017292517768202, "grad_norm": 1.0082420296479497, "learning_rate": 4.314763241970622e-06, "loss": 0.8104, "step": 7837 }, { "epoch": 0.7018187923218983, "grad_norm": 0.9386219749028808, "learning_rate": 4.312377509649255e-06, "loss": 0.8301, "step": 7838 }, { "epoch": 0.7019083328669763, "grad_norm": 0.9924220137191154, "learning_rate": 4.309992255752161e-06, "loss": 0.8214, "step": 7839 }, { "epoch": 0.7019978734120544, "grad_norm": 0.893715958904001, "learning_rate": 4.307607480479977e-06, "loss": 0.7787, "step": 7840 }, { "epoch": 0.7020874139571325, "grad_norm": 1.1659680417144989, "learning_rate": 4.3052231840333055e-06, "loss": 0.7749, "step": 7841 }, { "epoch": 0.7021769545022105, "grad_norm": 1.1360113900712276, "learning_rate": 4.302839366612699e-06, "loss": 0.8193, "step": 7842 }, { "epoch": 0.7022664950472886, "grad_norm": 1.0902357207734859, "learning_rate": 4.300456028418679e-06, "loss": 0.7777, "step": 7843 }, { "epoch": 0.7023560355923667, "grad_norm": 0.9819442830933688, "learning_rate": 4.298073169651721e-06, "loss": 0.7683, "step": 7844 }, { "epoch": 0.7024455761374447, "grad_norm": 0.9699269300147558, "learning_rate": 4.295690790512271e-06, "loss": 0.7655, "step": 7845 }, { "epoch": 0.7025351166825228, "grad_norm": 1.318565760918533, "learning_rate": 4.293308891200727e-06, "loss": 0.7904, "step": 7846 }, { "epoch": 0.7026246572276009, "grad_norm": 0.8932380165667645, "learning_rate": 4.290927471917438e-06, "loss": 0.8129, "step": 7847 }, { "epoch": 0.702714197772679, "grad_norm": 0.8901047211548933, "learning_rate": 4.288546532862727e-06, "loss": 0.762, "step": 7848 }, { "epoch": 0.702803738317757, "grad_norm": 0.9531585357043469, "learning_rate": 4.2861660742368695e-06, "loss": 0.8109, "step": 7849 }, { "epoch": 0.702893278862835, "grad_norm": 1.0439964747070103, "learning_rate": 4.283786096240098e-06, "loss": 0.7944, "step": 7850 }, { "epoch": 0.7029828194079132, "grad_norm": 0.9514954874560548, "learning_rate": 4.281406599072616e-06, "loss": 0.8591, "step": 7851 }, { "epoch": 0.7030723599529912, "grad_norm": 0.9358611423209198, "learning_rate": 4.279027582934581e-06, "loss": 0.871, "step": 7852 }, { "epoch": 0.7031619004980693, "grad_norm": 1.1246277609593942, "learning_rate": 4.276649048026097e-06, "loss": 0.801, "step": 7853 }, { "epoch": 0.7032514410431473, "grad_norm": 0.9258365067213435, "learning_rate": 4.274270994547246e-06, "loss": 0.8183, "step": 7854 }, { "epoch": 0.7033409815882254, "grad_norm": 1.0393722817239102, "learning_rate": 4.27189342269806e-06, "loss": 0.7878, "step": 7855 }, { "epoch": 0.7034305221333035, "grad_norm": 1.0160081285434908, "learning_rate": 4.269516332678529e-06, "loss": 0.8369, "step": 7856 }, { "epoch": 0.7035200626783815, "grad_norm": 0.9505343277588941, "learning_rate": 4.267139724688618e-06, "loss": 0.8408, "step": 7857 }, { "epoch": 0.7036096032234597, "grad_norm": 1.084436095234868, "learning_rate": 4.2647635989282275e-06, "loss": 0.8662, "step": 7858 }, { "epoch": 0.7036991437685377, "grad_norm": 1.0138485545406704, "learning_rate": 4.262387955597233e-06, "loss": 0.9004, "step": 7859 }, { "epoch": 0.7037886843136157, "grad_norm": 1.0634460732207964, "learning_rate": 4.260012794895468e-06, "loss": 0.7773, "step": 7860 }, { "epoch": 0.7038782248586938, "grad_norm": 0.8835714399883221, "learning_rate": 4.257638117022721e-06, "loss": 0.8256, "step": 7861 }, { "epoch": 0.7039677654037719, "grad_norm": 0.9317028246628536, "learning_rate": 4.255263922178739e-06, "loss": 0.7913, "step": 7862 }, { "epoch": 0.70405730594885, "grad_norm": 0.9356333730773462, "learning_rate": 4.252890210563244e-06, "loss": 0.8488, "step": 7863 }, { "epoch": 0.704146846493928, "grad_norm": 0.9926217639873891, "learning_rate": 4.250516982375892e-06, "loss": 0.7833, "step": 7864 }, { "epoch": 0.7042363870390062, "grad_norm": 1.0265120533352918, "learning_rate": 4.248144237816315e-06, "loss": 0.8155, "step": 7865 }, { "epoch": 0.7043259275840842, "grad_norm": 1.0513806022086791, "learning_rate": 4.245771977084102e-06, "loss": 0.8546, "step": 7866 }, { "epoch": 0.7044154681291622, "grad_norm": 0.8821769700797972, "learning_rate": 4.243400200378798e-06, "loss": 0.7909, "step": 7867 }, { "epoch": 0.7045050086742403, "grad_norm": 1.0525150184734302, "learning_rate": 4.241028907899911e-06, "loss": 0.8223, "step": 7868 }, { "epoch": 0.7045945492193184, "grad_norm": 0.9560231789424795, "learning_rate": 4.238658099846905e-06, "loss": 0.8177, "step": 7869 }, { "epoch": 0.7046840897643964, "grad_norm": 0.9082779880232594, "learning_rate": 4.236287776419206e-06, "loss": 0.7921, "step": 7870 }, { "epoch": 0.7047736303094745, "grad_norm": 0.9209774468871164, "learning_rate": 4.233917937816195e-06, "loss": 0.782, "step": 7871 }, { "epoch": 0.7048631708545525, "grad_norm": 0.9904592439812467, "learning_rate": 4.231548584237219e-06, "loss": 0.7909, "step": 7872 }, { "epoch": 0.7049527113996307, "grad_norm": 0.8814979759439844, "learning_rate": 4.229179715881577e-06, "loss": 0.7606, "step": 7873 }, { "epoch": 0.7050422519447087, "grad_norm": 0.9607445067312818, "learning_rate": 4.226811332948534e-06, "loss": 0.787, "step": 7874 }, { "epoch": 0.7051317924897867, "grad_norm": 0.9329985025763997, "learning_rate": 4.224443435637307e-06, "loss": 0.8061, "step": 7875 }, { "epoch": 0.7052213330348649, "grad_norm": 0.958620912046123, "learning_rate": 4.222076024147077e-06, "loss": 0.755, "step": 7876 }, { "epoch": 0.7053108735799429, "grad_norm": 1.0273678410741975, "learning_rate": 4.219709098676984e-06, "loss": 0.7651, "step": 7877 }, { "epoch": 0.705400414125021, "grad_norm": 0.9759808936753737, "learning_rate": 4.2173426594261254e-06, "loss": 0.823, "step": 7878 }, { "epoch": 0.705489954670099, "grad_norm": 0.9248341649307702, "learning_rate": 4.214976706593559e-06, "loss": 0.7811, "step": 7879 }, { "epoch": 0.7055794952151772, "grad_norm": 1.008164766325714, "learning_rate": 4.2126112403782996e-06, "loss": 0.7852, "step": 7880 }, { "epoch": 0.7056690357602552, "grad_norm": 1.1703414622579127, "learning_rate": 4.210246260979323e-06, "loss": 0.8333, "step": 7881 }, { "epoch": 0.7057585763053332, "grad_norm": 0.9028509865637157, "learning_rate": 4.207881768595564e-06, "loss": 0.7843, "step": 7882 }, { "epoch": 0.7058481168504114, "grad_norm": 0.9838227996832009, "learning_rate": 4.205517763425916e-06, "loss": 0.7918, "step": 7883 }, { "epoch": 0.7059376573954894, "grad_norm": 1.0246423285432387, "learning_rate": 4.203154245669231e-06, "loss": 0.8413, "step": 7884 }, { "epoch": 0.7060271979405675, "grad_norm": 0.9534446735398414, "learning_rate": 4.2007912155243215e-06, "loss": 0.8007, "step": 7885 }, { "epoch": 0.7061167384856455, "grad_norm": 1.0098657084374096, "learning_rate": 4.198428673189956e-06, "loss": 0.7606, "step": 7886 }, { "epoch": 0.7062062790307236, "grad_norm": 0.9629509916070518, "learning_rate": 4.196066618864865e-06, "loss": 0.8154, "step": 7887 }, { "epoch": 0.7062958195758017, "grad_norm": 0.9379535700610556, "learning_rate": 4.193705052747737e-06, "loss": 0.8208, "step": 7888 }, { "epoch": 0.7063853601208797, "grad_norm": 0.9332714417153479, "learning_rate": 4.191343975037219e-06, "loss": 0.7736, "step": 7889 }, { "epoch": 0.7064749006659578, "grad_norm": 0.9653329514944474, "learning_rate": 4.18898338593192e-06, "loss": 0.7614, "step": 7890 }, { "epoch": 0.7065644412110359, "grad_norm": 0.957423444698433, "learning_rate": 4.1866232856304e-06, "loss": 0.7956, "step": 7891 }, { "epoch": 0.7066539817561139, "grad_norm": 0.9612204566749764, "learning_rate": 4.184263674331181e-06, "loss": 0.8145, "step": 7892 }, { "epoch": 0.706743522301192, "grad_norm": 1.1200635799006722, "learning_rate": 4.181904552232753e-06, "loss": 0.7996, "step": 7893 }, { "epoch": 0.7068330628462701, "grad_norm": 0.9031182189730383, "learning_rate": 4.179545919533555e-06, "loss": 0.8206, "step": 7894 }, { "epoch": 0.7069226033913482, "grad_norm": 1.0239648291935521, "learning_rate": 4.177187776431991e-06, "loss": 0.7711, "step": 7895 }, { "epoch": 0.7070121439364262, "grad_norm": 0.9937433912198858, "learning_rate": 4.174830123126412e-06, "loss": 0.8238, "step": 7896 }, { "epoch": 0.7071016844815042, "grad_norm": 0.9943993546463767, "learning_rate": 4.1724729598151414e-06, "loss": 0.859, "step": 7897 }, { "epoch": 0.7071912250265824, "grad_norm": 0.9703798066369955, "learning_rate": 4.170116286696452e-06, "loss": 0.7966, "step": 7898 }, { "epoch": 0.7072807655716604, "grad_norm": 1.0137997829982364, "learning_rate": 4.167760103968585e-06, "loss": 0.8332, "step": 7899 }, { "epoch": 0.7073703061167385, "grad_norm": 0.9205957086889073, "learning_rate": 4.165404411829733e-06, "loss": 0.8347, "step": 7900 }, { "epoch": 0.7074598466618166, "grad_norm": 0.9366670914366876, "learning_rate": 4.163049210478053e-06, "loss": 0.7775, "step": 7901 }, { "epoch": 0.7075493872068946, "grad_norm": 0.9648487891203498, "learning_rate": 4.160694500111648e-06, "loss": 0.7761, "step": 7902 }, { "epoch": 0.7076389277519727, "grad_norm": 0.9579742233533441, "learning_rate": 4.158340280928593e-06, "loss": 0.7935, "step": 7903 }, { "epoch": 0.7077284682970507, "grad_norm": 1.0387420342653628, "learning_rate": 4.155986553126914e-06, "loss": 0.7674, "step": 7904 }, { "epoch": 0.7078180088421289, "grad_norm": 0.9691208797090396, "learning_rate": 4.153633316904606e-06, "loss": 0.7907, "step": 7905 }, { "epoch": 0.7079075493872069, "grad_norm": 0.9089252736690521, "learning_rate": 4.151280572459615e-06, "loss": 0.8024, "step": 7906 }, { "epoch": 0.7079970899322849, "grad_norm": 1.0324167530110693, "learning_rate": 4.14892831998984e-06, "loss": 0.8468, "step": 7907 }, { "epoch": 0.708086630477363, "grad_norm": 0.9349287080606827, "learning_rate": 4.146576559693149e-06, "loss": 0.8407, "step": 7908 }, { "epoch": 0.7081761710224411, "grad_norm": 0.9035333457679109, "learning_rate": 4.144225291767361e-06, "loss": 0.8207, "step": 7909 }, { "epoch": 0.7082657115675192, "grad_norm": 1.0195723202710272, "learning_rate": 4.141874516410256e-06, "loss": 0.7465, "step": 7910 }, { "epoch": 0.7083552521125972, "grad_norm": 0.9400807474145519, "learning_rate": 4.139524233819581e-06, "loss": 0.8371, "step": 7911 }, { "epoch": 0.7084447926576753, "grad_norm": 0.9352587151199779, "learning_rate": 4.137174444193033e-06, "loss": 0.7829, "step": 7912 }, { "epoch": 0.7085343332027534, "grad_norm": 0.9689332243534247, "learning_rate": 4.134825147728262e-06, "loss": 0.8017, "step": 7913 }, { "epoch": 0.7086238737478314, "grad_norm": 1.0010788167056794, "learning_rate": 4.132476344622888e-06, "loss": 0.8122, "step": 7914 }, { "epoch": 0.7087134142929095, "grad_norm": 0.9236896594888196, "learning_rate": 4.130128035074482e-06, "loss": 0.8279, "step": 7915 }, { "epoch": 0.7088029548379876, "grad_norm": 0.985686777914297, "learning_rate": 4.127780219280574e-06, "loss": 0.7951, "step": 7916 }, { "epoch": 0.7088924953830656, "grad_norm": 0.863027795616886, "learning_rate": 4.125432897438666e-06, "loss": 0.7746, "step": 7917 }, { "epoch": 0.7089820359281437, "grad_norm": 1.011365398333579, "learning_rate": 4.123086069746195e-06, "loss": 0.7568, "step": 7918 }, { "epoch": 0.7090715764732218, "grad_norm": 0.9332451547221217, "learning_rate": 4.1207397364005715e-06, "loss": 0.7637, "step": 7919 }, { "epoch": 0.7091611170182999, "grad_norm": 1.0286177760694848, "learning_rate": 4.1183938975991644e-06, "loss": 0.7759, "step": 7920 }, { "epoch": 0.7092506575633779, "grad_norm": 1.007619922556112, "learning_rate": 4.116048553539296e-06, "loss": 0.8194, "step": 7921 }, { "epoch": 0.7093401981084559, "grad_norm": 1.0065814748096025, "learning_rate": 4.11370370441825e-06, "loss": 0.7677, "step": 7922 }, { "epoch": 0.7094297386535341, "grad_norm": 0.9501200430969721, "learning_rate": 4.111359350433265e-06, "loss": 0.7903, "step": 7923 }, { "epoch": 0.7095192791986121, "grad_norm": 0.958283816949814, "learning_rate": 4.109015491781542e-06, "loss": 0.7639, "step": 7924 }, { "epoch": 0.7096088197436902, "grad_norm": 0.960043186707044, "learning_rate": 4.106672128660241e-06, "loss": 0.8056, "step": 7925 }, { "epoch": 0.7096983602887682, "grad_norm": 1.220801612777027, "learning_rate": 4.104329261266474e-06, "loss": 0.7711, "step": 7926 }, { "epoch": 0.7097879008338464, "grad_norm": 1.0184681053866198, "learning_rate": 4.101986889797318e-06, "loss": 0.7886, "step": 7927 }, { "epoch": 0.7098774413789244, "grad_norm": 0.8984661512895591, "learning_rate": 4.099645014449805e-06, "loss": 0.7794, "step": 7928 }, { "epoch": 0.7099669819240024, "grad_norm": 0.9717266925490577, "learning_rate": 4.097303635420925e-06, "loss": 0.8038, "step": 7929 }, { "epoch": 0.7100565224690806, "grad_norm": 0.9656954729521396, "learning_rate": 4.094962752907628e-06, "loss": 0.7813, "step": 7930 }, { "epoch": 0.7101460630141586, "grad_norm": 0.9534951119145328, "learning_rate": 4.0926223671068235e-06, "loss": 0.7729, "step": 7931 }, { "epoch": 0.7102356035592367, "grad_norm": 0.9891020367400325, "learning_rate": 4.090282478215374e-06, "loss": 0.7888, "step": 7932 }, { "epoch": 0.7103251441043147, "grad_norm": 0.8510690078994433, "learning_rate": 4.087943086430104e-06, "loss": 0.7444, "step": 7933 }, { "epoch": 0.7104146846493928, "grad_norm": 0.9998491382768321, "learning_rate": 4.085604191947796e-06, "loss": 0.8064, "step": 7934 }, { "epoch": 0.7105042251944709, "grad_norm": 0.9318757738463096, "learning_rate": 4.0832657949651895e-06, "loss": 0.7532, "step": 7935 }, { "epoch": 0.7105937657395489, "grad_norm": 1.0046147108323689, "learning_rate": 4.080927895678984e-06, "loss": 0.8176, "step": 7936 }, { "epoch": 0.7106833062846271, "grad_norm": 0.8537130152515398, "learning_rate": 4.078590494285835e-06, "loss": 0.789, "step": 7937 }, { "epoch": 0.7107728468297051, "grad_norm": 0.9281267347927185, "learning_rate": 4.07625359098236e-06, "loss": 0.8354, "step": 7938 }, { "epoch": 0.7108623873747831, "grad_norm": 1.0736654965205041, "learning_rate": 4.073917185965126e-06, "loss": 0.8491, "step": 7939 }, { "epoch": 0.7109519279198612, "grad_norm": 0.9727907542381893, "learning_rate": 4.0715812794306685e-06, "loss": 0.7846, "step": 7940 }, { "epoch": 0.7110414684649393, "grad_norm": 0.9444498914137567, "learning_rate": 4.069245871575474e-06, "loss": 0.8057, "step": 7941 }, { "epoch": 0.7111310090100174, "grad_norm": 0.8752647310048607, "learning_rate": 4.066910962595992e-06, "loss": 0.7933, "step": 7942 }, { "epoch": 0.7112205495550954, "grad_norm": 1.3559600165104373, "learning_rate": 4.064576552688624e-06, "loss": 0.7946, "step": 7943 }, { "epoch": 0.7113100901001734, "grad_norm": 1.0482616943537175, "learning_rate": 4.062242642049735e-06, "loss": 0.791, "step": 7944 }, { "epoch": 0.7113996306452516, "grad_norm": 0.9705393193805912, "learning_rate": 4.05990923087565e-06, "loss": 0.7481, "step": 7945 }, { "epoch": 0.7114891711903296, "grad_norm": 0.9004170629753756, "learning_rate": 4.057576319362635e-06, "loss": 0.7807, "step": 7946 }, { "epoch": 0.7115787117354077, "grad_norm": 0.9892818435110183, "learning_rate": 4.0552439077069395e-06, "loss": 0.7834, "step": 7947 }, { "epoch": 0.7116682522804858, "grad_norm": 1.210605684692914, "learning_rate": 4.052911996104754e-06, "loss": 0.778, "step": 7948 }, { "epoch": 0.7117577928255638, "grad_norm": 0.9730425979151602, "learning_rate": 4.050580584752232e-06, "loss": 0.7394, "step": 7949 }, { "epoch": 0.7118473333706419, "grad_norm": 0.9459761549118909, "learning_rate": 4.048249673845487e-06, "loss": 0.8388, "step": 7950 }, { "epoch": 0.7119368739157199, "grad_norm": 1.0157323617866478, "learning_rate": 4.045919263580581e-06, "loss": 0.7967, "step": 7951 }, { "epoch": 0.7120264144607981, "grad_norm": 0.9730101016927492, "learning_rate": 4.043589354153541e-06, "loss": 0.8552, "step": 7952 }, { "epoch": 0.7121159550058761, "grad_norm": 1.066143243095896, "learning_rate": 4.041259945760357e-06, "loss": 0.8064, "step": 7953 }, { "epoch": 0.7122054955509541, "grad_norm": 1.0165902162100957, "learning_rate": 4.038931038596969e-06, "loss": 0.8303, "step": 7954 }, { "epoch": 0.7122950360960323, "grad_norm": 0.8840146987770654, "learning_rate": 4.03660263285928e-06, "loss": 0.8135, "step": 7955 }, { "epoch": 0.7123845766411103, "grad_norm": 0.899977793439772, "learning_rate": 4.034274728743141e-06, "loss": 0.8084, "step": 7956 }, { "epoch": 0.7124741171861884, "grad_norm": 0.9712973427485663, "learning_rate": 4.031947326444372e-06, "loss": 0.7998, "step": 7957 }, { "epoch": 0.7125636577312664, "grad_norm": 0.8740344209898921, "learning_rate": 4.029620426158742e-06, "loss": 0.8192, "step": 7958 }, { "epoch": 0.7126531982763445, "grad_norm": 0.9557553227333138, "learning_rate": 4.02729402808199e-06, "loss": 0.817, "step": 7959 }, { "epoch": 0.7127427388214226, "grad_norm": 0.9432002157250226, "learning_rate": 4.0249681324098e-06, "loss": 0.7738, "step": 7960 }, { "epoch": 0.7128322793665006, "grad_norm": 1.0066602902148762, "learning_rate": 4.022642739337824e-06, "loss": 0.8013, "step": 7961 }, { "epoch": 0.7129218199115787, "grad_norm": 0.957436151866808, "learning_rate": 4.020317849061658e-06, "loss": 0.829, "step": 7962 }, { "epoch": 0.7130113604566568, "grad_norm": 1.0110497044311084, "learning_rate": 4.017993461776869e-06, "loss": 0.7691, "step": 7963 }, { "epoch": 0.7131009010017348, "grad_norm": 0.9986891888961377, "learning_rate": 4.0156695776789736e-06, "loss": 0.8277, "step": 7964 }, { "epoch": 0.7131904415468129, "grad_norm": 1.183629025985562, "learning_rate": 4.013346196963455e-06, "loss": 0.7621, "step": 7965 }, { "epoch": 0.713279982091891, "grad_norm": 1.0496257323601976, "learning_rate": 4.01102331982575e-06, "loss": 0.7961, "step": 7966 }, { "epoch": 0.7133695226369691, "grad_norm": 1.0149202691127053, "learning_rate": 4.0087009464612426e-06, "loss": 0.7539, "step": 7967 }, { "epoch": 0.7134590631820471, "grad_norm": 0.9588487319642659, "learning_rate": 4.006379077065288e-06, "loss": 0.8085, "step": 7968 }, { "epoch": 0.7135486037271251, "grad_norm": 0.9272255713327721, "learning_rate": 4.004057711833193e-06, "loss": 0.7819, "step": 7969 }, { "epoch": 0.7136381442722033, "grad_norm": 0.9325813789105739, "learning_rate": 4.001736850960222e-06, "loss": 0.8246, "step": 7970 }, { "epoch": 0.7137276848172813, "grad_norm": 1.010335752112128, "learning_rate": 3.999416494641604e-06, "loss": 0.8179, "step": 7971 }, { "epoch": 0.7138172253623594, "grad_norm": 0.8859424094485543, "learning_rate": 3.997096643072519e-06, "loss": 0.8206, "step": 7972 }, { "epoch": 0.7139067659074375, "grad_norm": 0.9282613048012488, "learning_rate": 3.9947772964481e-06, "loss": 0.7969, "step": 7973 }, { "epoch": 0.7139963064525156, "grad_norm": 0.9419502149154005, "learning_rate": 3.992458454963445e-06, "loss": 0.8046, "step": 7974 }, { "epoch": 0.7140858469975936, "grad_norm": 1.017906708025248, "learning_rate": 3.990140118813608e-06, "loss": 0.8518, "step": 7975 }, { "epoch": 0.7141753875426716, "grad_norm": 0.8846642506074188, "learning_rate": 3.9878222881936e-06, "loss": 0.8395, "step": 7976 }, { "epoch": 0.7142649280877498, "grad_norm": 0.9024168813494983, "learning_rate": 3.98550496329839e-06, "loss": 0.7929, "step": 7977 }, { "epoch": 0.7143544686328278, "grad_norm": 0.9371200847778404, "learning_rate": 3.983188144322903e-06, "loss": 0.7665, "step": 7978 }, { "epoch": 0.7144440091779058, "grad_norm": 0.9591274244960174, "learning_rate": 3.980871831462021e-06, "loss": 0.7747, "step": 7979 }, { "epoch": 0.7145335497229839, "grad_norm": 1.0776678325139482, "learning_rate": 3.978556024910587e-06, "loss": 0.8369, "step": 7980 }, { "epoch": 0.714623090268062, "grad_norm": 0.9513783281966097, "learning_rate": 3.976240724863397e-06, "loss": 0.8396, "step": 7981 }, { "epoch": 0.7147126308131401, "grad_norm": 0.9722841782002879, "learning_rate": 3.973925931515209e-06, "loss": 0.8576, "step": 7982 }, { "epoch": 0.7148021713582181, "grad_norm": 1.0040634137016153, "learning_rate": 3.971611645060733e-06, "loss": 0.8162, "step": 7983 }, { "epoch": 0.7148917119032963, "grad_norm": 0.9237933316661637, "learning_rate": 3.969297865694641e-06, "loss": 0.763, "step": 7984 }, { "epoch": 0.7149812524483743, "grad_norm": 0.9499048967072253, "learning_rate": 3.966984593611562e-06, "loss": 0.7532, "step": 7985 }, { "epoch": 0.7150707929934523, "grad_norm": 1.1389897159918918, "learning_rate": 3.964671829006077e-06, "loss": 0.8011, "step": 7986 }, { "epoch": 0.7151603335385304, "grad_norm": 1.0261115567855932, "learning_rate": 3.962359572072731e-06, "loss": 0.8149, "step": 7987 }, { "epoch": 0.7152498740836085, "grad_norm": 1.1441028080738103, "learning_rate": 3.960047823006024e-06, "loss": 0.8059, "step": 7988 }, { "epoch": 0.7153394146286866, "grad_norm": 1.160074525132683, "learning_rate": 3.957736582000411e-06, "loss": 0.7693, "step": 7989 }, { "epoch": 0.7154289551737646, "grad_norm": 0.9672790373711078, "learning_rate": 3.955425849250306e-06, "loss": 0.7985, "step": 7990 }, { "epoch": 0.7155184957188427, "grad_norm": 0.9215136614405941, "learning_rate": 3.953115624950082e-06, "loss": 0.8289, "step": 7991 }, { "epoch": 0.7156080362639208, "grad_norm": 0.9938147266252024, "learning_rate": 3.950805909294067e-06, "loss": 0.8112, "step": 7992 }, { "epoch": 0.7156975768089988, "grad_norm": 1.0276848101316947, "learning_rate": 3.9484967024765455e-06, "loss": 0.8623, "step": 7993 }, { "epoch": 0.7157871173540769, "grad_norm": 0.9755649065242386, "learning_rate": 3.946188004691761e-06, "loss": 0.8213, "step": 7994 }, { "epoch": 0.715876657899155, "grad_norm": 1.0073359299386058, "learning_rate": 3.943879816133915e-06, "loss": 0.757, "step": 7995 }, { "epoch": 0.715966198444233, "grad_norm": 0.9160595124822789, "learning_rate": 3.941572136997164e-06, "loss": 0.7846, "step": 7996 }, { "epoch": 0.7160557389893111, "grad_norm": 0.9371290887147591, "learning_rate": 3.939264967475621e-06, "loss": 0.7846, "step": 7997 }, { "epoch": 0.7161452795343891, "grad_norm": 0.8692792357205559, "learning_rate": 3.936958307763359e-06, "loss": 0.805, "step": 7998 }, { "epoch": 0.7162348200794673, "grad_norm": 0.905193356552538, "learning_rate": 3.934652158054411e-06, "loss": 0.8427, "step": 7999 }, { "epoch": 0.7163243606245453, "grad_norm": 1.032344251142508, "learning_rate": 3.93234651854275e-06, "loss": 0.7878, "step": 8000 }, { "epoch": 0.7164139011696233, "grad_norm": 1.0087972887651344, "learning_rate": 3.930041389422331e-06, "loss": 0.7603, "step": 8001 }, { "epoch": 0.7165034417147015, "grad_norm": 0.9059150175063786, "learning_rate": 3.927736770887051e-06, "loss": 0.7677, "step": 8002 }, { "epoch": 0.7165929822597795, "grad_norm": 0.92748998457732, "learning_rate": 3.925432663130765e-06, "loss": 0.8345, "step": 8003 }, { "epoch": 0.7166825228048576, "grad_norm": 1.1181317201951209, "learning_rate": 3.9231290663472885e-06, "loss": 0.8411, "step": 8004 }, { "epoch": 0.7167720633499356, "grad_norm": 0.9313268099527412, "learning_rate": 3.920825980730396e-06, "loss": 0.8042, "step": 8005 }, { "epoch": 0.7168616038950137, "grad_norm": 0.9946674575222921, "learning_rate": 3.918523406473805e-06, "loss": 0.7948, "step": 8006 }, { "epoch": 0.7169511444400918, "grad_norm": 0.9288893990107036, "learning_rate": 3.916221343771211e-06, "loss": 0.8106, "step": 8007 }, { "epoch": 0.7170406849851698, "grad_norm": 0.9053957623973863, "learning_rate": 3.913919792816252e-06, "loss": 0.765, "step": 8008 }, { "epoch": 0.717130225530248, "grad_norm": 1.0021944640227343, "learning_rate": 3.911618753802526e-06, "loss": 0.7994, "step": 8009 }, { "epoch": 0.717219766075326, "grad_norm": 1.0033825936963994, "learning_rate": 3.909318226923595e-06, "loss": 0.7544, "step": 8010 }, { "epoch": 0.717309306620404, "grad_norm": 1.2158394849756007, "learning_rate": 3.9070182123729635e-06, "loss": 0.849, "step": 8011 }, { "epoch": 0.7173988471654821, "grad_norm": 0.9184268916762881, "learning_rate": 3.904718710344101e-06, "loss": 0.7671, "step": 8012 }, { "epoch": 0.7174883877105602, "grad_norm": 0.9125189166497218, "learning_rate": 3.9024197210304415e-06, "loss": 0.7582, "step": 8013 }, { "epoch": 0.7175779282556383, "grad_norm": 1.0523067583606374, "learning_rate": 3.900121244625366e-06, "loss": 0.8017, "step": 8014 }, { "epoch": 0.7176674688007163, "grad_norm": 1.0256777600298561, "learning_rate": 3.897823281322212e-06, "loss": 0.8007, "step": 8015 }, { "epoch": 0.7177570093457943, "grad_norm": 1.0869108154789842, "learning_rate": 3.895525831314282e-06, "loss": 0.8211, "step": 8016 }, { "epoch": 0.7178465498908725, "grad_norm": 1.2495996134599217, "learning_rate": 3.893228894794824e-06, "loss": 0.795, "step": 8017 }, { "epoch": 0.7179360904359505, "grad_norm": 0.9950749795943328, "learning_rate": 3.8909324719570465e-06, "loss": 0.8463, "step": 8018 }, { "epoch": 0.7180256309810286, "grad_norm": 0.95745753774366, "learning_rate": 3.888636562994126e-06, "loss": 0.7838, "step": 8019 }, { "epoch": 0.7181151715261067, "grad_norm": 1.0524359568894825, "learning_rate": 3.886341168099182e-06, "loss": 0.7946, "step": 8020 }, { "epoch": 0.7182047120711847, "grad_norm": 0.9278183375525393, "learning_rate": 3.884046287465301e-06, "loss": 0.8409, "step": 8021 }, { "epoch": 0.7182942526162628, "grad_norm": 0.9364164843220585, "learning_rate": 3.881751921285511e-06, "loss": 0.8118, "step": 8022 }, { "epoch": 0.7183837931613408, "grad_norm": 1.0136311894022136, "learning_rate": 3.879458069752814e-06, "loss": 0.8571, "step": 8023 }, { "epoch": 0.718473333706419, "grad_norm": 1.0017969451364805, "learning_rate": 3.877164733060154e-06, "loss": 0.7609, "step": 8024 }, { "epoch": 0.718562874251497, "grad_norm": 0.8972703093890377, "learning_rate": 3.874871911400449e-06, "loss": 0.7891, "step": 8025 }, { "epoch": 0.718652414796575, "grad_norm": 1.0252206317435733, "learning_rate": 3.872579604966561e-06, "loss": 0.8407, "step": 8026 }, { "epoch": 0.7187419553416532, "grad_norm": 0.9459550098041324, "learning_rate": 3.870287813951307e-06, "loss": 0.8029, "step": 8027 }, { "epoch": 0.7188314958867312, "grad_norm": 1.15623606566502, "learning_rate": 3.867996538547466e-06, "loss": 0.8109, "step": 8028 }, { "epoch": 0.7189210364318093, "grad_norm": 1.0318236631971909, "learning_rate": 3.865705778947774e-06, "loss": 0.8068, "step": 8029 }, { "epoch": 0.7190105769768873, "grad_norm": 0.9899560999344845, "learning_rate": 3.863415535344922e-06, "loss": 0.7837, "step": 8030 }, { "epoch": 0.7191001175219655, "grad_norm": 0.9112563641482628, "learning_rate": 3.861125807931555e-06, "loss": 0.8358, "step": 8031 }, { "epoch": 0.7191896580670435, "grad_norm": 0.9849194139074654, "learning_rate": 3.858836596900286e-06, "loss": 0.8483, "step": 8032 }, { "epoch": 0.7192791986121215, "grad_norm": 0.8952514405515637, "learning_rate": 3.856547902443668e-06, "loss": 0.7663, "step": 8033 }, { "epoch": 0.7193687391571996, "grad_norm": 0.9354199453808414, "learning_rate": 3.85425972475422e-06, "loss": 0.7526, "step": 8034 }, { "epoch": 0.7194582797022777, "grad_norm": 1.0062265731282394, "learning_rate": 3.8519720640244174e-06, "loss": 0.7762, "step": 8035 }, { "epoch": 0.7195478202473558, "grad_norm": 0.9562340083177004, "learning_rate": 3.8496849204466906e-06, "loss": 0.7831, "step": 8036 }, { "epoch": 0.7196373607924338, "grad_norm": 0.997124203732317, "learning_rate": 3.847398294213425e-06, "loss": 0.8162, "step": 8037 }, { "epoch": 0.7197269013375119, "grad_norm": 0.865633526356508, "learning_rate": 3.845112185516966e-06, "loss": 0.8034, "step": 8038 }, { "epoch": 0.71981644188259, "grad_norm": 0.9179973980461723, "learning_rate": 3.842826594549612e-06, "loss": 0.8267, "step": 8039 }, { "epoch": 0.719905982427668, "grad_norm": 0.9644592330231287, "learning_rate": 3.840541521503622e-06, "loss": 0.794, "step": 8040 }, { "epoch": 0.719995522972746, "grad_norm": 0.9008078217844488, "learning_rate": 3.838256966571207e-06, "loss": 0.811, "step": 8041 }, { "epoch": 0.7200850635178242, "grad_norm": 0.9649580178942744, "learning_rate": 3.835972929944537e-06, "loss": 0.7526, "step": 8042 }, { "epoch": 0.7201746040629022, "grad_norm": 1.2026079919165602, "learning_rate": 3.833689411815736e-06, "loss": 0.8408, "step": 8043 }, { "epoch": 0.7202641446079803, "grad_norm": 0.8753704204773811, "learning_rate": 3.831406412376889e-06, "loss": 0.8079, "step": 8044 }, { "epoch": 0.7203536851530584, "grad_norm": 0.8803886757760556, "learning_rate": 3.829123931820031e-06, "loss": 0.7928, "step": 8045 }, { "epoch": 0.7204432256981365, "grad_norm": 0.9163872344600541, "learning_rate": 3.8268419703371605e-06, "loss": 0.7831, "step": 8046 }, { "epoch": 0.7205327662432145, "grad_norm": 0.8961934150094862, "learning_rate": 3.824560528120227e-06, "loss": 0.8116, "step": 8047 }, { "epoch": 0.7206223067882925, "grad_norm": 0.8796961345182243, "learning_rate": 3.822279605361138e-06, "loss": 0.8211, "step": 8048 }, { "epoch": 0.7207118473333707, "grad_norm": 0.9527637654143722, "learning_rate": 3.819999202251756e-06, "loss": 0.7861, "step": 8049 }, { "epoch": 0.7208013878784487, "grad_norm": 1.0914293678368323, "learning_rate": 3.817719318983903e-06, "loss": 0.8075, "step": 8050 }, { "epoch": 0.7208909284235268, "grad_norm": 0.9175555823292989, "learning_rate": 3.815439955749355e-06, "loss": 0.7916, "step": 8051 }, { "epoch": 0.7209804689686048, "grad_norm": 0.9683923944958911, "learning_rate": 3.8131611127398436e-06, "loss": 0.7975, "step": 8052 }, { "epoch": 0.7210700095136829, "grad_norm": 1.1111248589629346, "learning_rate": 3.810882790147059e-06, "loss": 0.8446, "step": 8053 }, { "epoch": 0.721159550058761, "grad_norm": 0.866830034766766, "learning_rate": 3.8086049881626453e-06, "loss": 0.757, "step": 8054 }, { "epoch": 0.721249090603839, "grad_norm": 0.9333112125504118, "learning_rate": 3.8063277069782047e-06, "loss": 0.7649, "step": 8055 }, { "epoch": 0.7213386311489172, "grad_norm": 0.9682247942287495, "learning_rate": 3.8040509467852926e-06, "loss": 0.7856, "step": 8056 }, { "epoch": 0.7214281716939952, "grad_norm": 1.0255023826794731, "learning_rate": 3.8017747077754252e-06, "loss": 0.7964, "step": 8057 }, { "epoch": 0.7215177122390732, "grad_norm": 1.0998535628079154, "learning_rate": 3.79949899014007e-06, "loss": 0.7649, "step": 8058 }, { "epoch": 0.7216072527841513, "grad_norm": 1.0307667273747294, "learning_rate": 3.797223794070659e-06, "loss": 0.8281, "step": 8059 }, { "epoch": 0.7216967933292294, "grad_norm": 0.9274621816833512, "learning_rate": 3.794949119758562e-06, "loss": 0.7855, "step": 8060 }, { "epoch": 0.7217863338743075, "grad_norm": 0.9866631774880115, "learning_rate": 3.792674967395128e-06, "loss": 0.8064, "step": 8061 }, { "epoch": 0.7218758744193855, "grad_norm": 0.8910589452060911, "learning_rate": 3.7904013371716485e-06, "loss": 0.8252, "step": 8062 }, { "epoch": 0.7219654149644636, "grad_norm": 1.0353101918553995, "learning_rate": 3.788128229279373e-06, "loss": 0.8544, "step": 8063 }, { "epoch": 0.7220549555095417, "grad_norm": 1.1380655138446412, "learning_rate": 3.7858556439095073e-06, "loss": 0.7387, "step": 8064 }, { "epoch": 0.7221444960546197, "grad_norm": 1.0295647522710205, "learning_rate": 3.7835835812532194e-06, "loss": 0.834, "step": 8065 }, { "epoch": 0.7222340365996978, "grad_norm": 0.9888002351891818, "learning_rate": 3.781312041501616e-06, "loss": 0.8066, "step": 8066 }, { "epoch": 0.7223235771447759, "grad_norm": 1.062838979211488, "learning_rate": 3.779041024845782e-06, "loss": 0.8262, "step": 8067 }, { "epoch": 0.722413117689854, "grad_norm": 0.9898503839660485, "learning_rate": 3.7767705314767444e-06, "loss": 0.8307, "step": 8068 }, { "epoch": 0.722502658234932, "grad_norm": 1.28546456795325, "learning_rate": 3.77450056158549e-06, "loss": 0.7595, "step": 8069 }, { "epoch": 0.72259219878001, "grad_norm": 0.9323703622844991, "learning_rate": 3.7722311153629654e-06, "loss": 0.8278, "step": 8070 }, { "epoch": 0.7226817393250882, "grad_norm": 1.0569201472244942, "learning_rate": 3.7699621930000617e-06, "loss": 0.8052, "step": 8071 }, { "epoch": 0.7227712798701662, "grad_norm": 0.8216402297424501, "learning_rate": 3.7676937946876324e-06, "loss": 0.8542, "step": 8072 }, { "epoch": 0.7228608204152442, "grad_norm": 1.0250025859374998, "learning_rate": 3.7654259206164956e-06, "loss": 0.8111, "step": 8073 }, { "epoch": 0.7229503609603224, "grad_norm": 0.9852012841158032, "learning_rate": 3.763158570977413e-06, "loss": 0.8629, "step": 8074 }, { "epoch": 0.7230399015054004, "grad_norm": 0.9445473731434566, "learning_rate": 3.7608917459611083e-06, "loss": 0.7752, "step": 8075 }, { "epoch": 0.7231294420504785, "grad_norm": 0.8854110373307847, "learning_rate": 3.7586254457582615e-06, "loss": 0.7903, "step": 8076 }, { "epoch": 0.7232189825955565, "grad_norm": 1.0782779709867207, "learning_rate": 3.7563596705595006e-06, "loss": 0.818, "step": 8077 }, { "epoch": 0.7233085231406347, "grad_norm": 1.2375401834473274, "learning_rate": 3.754094420555414e-06, "loss": 0.8289, "step": 8078 }, { "epoch": 0.7233980636857127, "grad_norm": 0.8681756008706156, "learning_rate": 3.7518296959365542e-06, "loss": 0.7578, "step": 8079 }, { "epoch": 0.7234876042307907, "grad_norm": 0.9224835569109648, "learning_rate": 3.74956549689342e-06, "loss": 0.8005, "step": 8080 }, { "epoch": 0.7235771447758689, "grad_norm": 0.9455182529160145, "learning_rate": 3.7473018236164715e-06, "loss": 0.806, "step": 8081 }, { "epoch": 0.7236666853209469, "grad_norm": 1.0293908593967704, "learning_rate": 3.7450386762961145e-06, "loss": 0.8459, "step": 8082 }, { "epoch": 0.723756225866025, "grad_norm": 0.9239862937412785, "learning_rate": 3.742776055122721e-06, "loss": 0.7544, "step": 8083 }, { "epoch": 0.723845766411103, "grad_norm": 0.9302986884047778, "learning_rate": 3.7405139602866146e-06, "loss": 0.8017, "step": 8084 }, { "epoch": 0.7239353069561811, "grad_norm": 0.9108004997925813, "learning_rate": 3.7382523919780732e-06, "loss": 0.7921, "step": 8085 }, { "epoch": 0.7240248475012592, "grad_norm": 1.0932783116028644, "learning_rate": 3.735991350387339e-06, "loss": 0.7476, "step": 8086 }, { "epoch": 0.7241143880463372, "grad_norm": 0.9335525755226466, "learning_rate": 3.733730835704603e-06, "loss": 0.8144, "step": 8087 }, { "epoch": 0.7242039285914152, "grad_norm": 1.0327189325007675, "learning_rate": 3.731470848120006e-06, "loss": 0.8083, "step": 8088 }, { "epoch": 0.7242934691364934, "grad_norm": 0.9840277520250201, "learning_rate": 3.7292113878236537e-06, "loss": 0.8565, "step": 8089 }, { "epoch": 0.7243830096815714, "grad_norm": 1.0670242787033324, "learning_rate": 3.7269524550056045e-06, "loss": 0.8032, "step": 8090 }, { "epoch": 0.7244725502266495, "grad_norm": 1.0434151671534462, "learning_rate": 3.724694049855869e-06, "loss": 0.8287, "step": 8091 }, { "epoch": 0.7245620907717276, "grad_norm": 0.9723349670942161, "learning_rate": 3.7224361725644285e-06, "loss": 0.8419, "step": 8092 }, { "epoch": 0.7246516313168057, "grad_norm": 1.023858203091878, "learning_rate": 3.7201788233211965e-06, "loss": 0.8014, "step": 8093 }, { "epoch": 0.7247411718618837, "grad_norm": 1.0487199548722101, "learning_rate": 3.717922002316059e-06, "loss": 0.7957, "step": 8094 }, { "epoch": 0.7248307124069617, "grad_norm": 0.9784360260851159, "learning_rate": 3.7156657097388493e-06, "loss": 0.8156, "step": 8095 }, { "epoch": 0.7249202529520399, "grad_norm": 0.9399933535671251, "learning_rate": 3.7134099457793625e-06, "loss": 0.826, "step": 8096 }, { "epoch": 0.7250097934971179, "grad_norm": 0.8641708179418636, "learning_rate": 3.7111547106273448e-06, "loss": 0.8166, "step": 8097 }, { "epoch": 0.725099334042196, "grad_norm": 1.4610792170627893, "learning_rate": 3.7089000044724997e-06, "loss": 0.7741, "step": 8098 }, { "epoch": 0.7251888745872741, "grad_norm": 0.9777267329792186, "learning_rate": 3.706645827504485e-06, "loss": 0.8278, "step": 8099 }, { "epoch": 0.7252784151323521, "grad_norm": 0.8817027317092367, "learning_rate": 3.7043921799129145e-06, "loss": 0.8039, "step": 8100 }, { "epoch": 0.7253679556774302, "grad_norm": 0.9451867599978477, "learning_rate": 3.7021390618873587e-06, "loss": 0.7534, "step": 8101 }, { "epoch": 0.7254574962225082, "grad_norm": 0.9700691957493127, "learning_rate": 3.6998864736173425e-06, "loss": 0.811, "step": 8102 }, { "epoch": 0.7255470367675864, "grad_norm": 0.9743847232684555, "learning_rate": 3.697634415292346e-06, "loss": 0.8426, "step": 8103 }, { "epoch": 0.7256365773126644, "grad_norm": 0.9698954052937843, "learning_rate": 3.695382887101805e-06, "loss": 0.8023, "step": 8104 }, { "epoch": 0.7257261178577424, "grad_norm": 0.9104559750589593, "learning_rate": 3.69313188923511e-06, "loss": 0.7699, "step": 8105 }, { "epoch": 0.7258156584028205, "grad_norm": 0.905320770822798, "learning_rate": 3.690881421881609e-06, "loss": 0.8161, "step": 8106 }, { "epoch": 0.7259051989478986, "grad_norm": 0.893614236010859, "learning_rate": 3.6886314852306025e-06, "loss": 0.8037, "step": 8107 }, { "epoch": 0.7259947394929767, "grad_norm": 1.0413342803925896, "learning_rate": 3.686382079471349e-06, "loss": 0.7249, "step": 8108 }, { "epoch": 0.7260842800380547, "grad_norm": 0.9729440231252229, "learning_rate": 3.684133204793061e-06, "loss": 0.7961, "step": 8109 }, { "epoch": 0.7261738205831328, "grad_norm": 1.127014078092048, "learning_rate": 3.6818848613849056e-06, "loss": 0.7793, "step": 8110 }, { "epoch": 0.7262633611282109, "grad_norm": 0.9326193113021066, "learning_rate": 3.679637049436008e-06, "loss": 0.8047, "step": 8111 }, { "epoch": 0.7263529016732889, "grad_norm": 0.9335242891812642, "learning_rate": 3.677389769135444e-06, "loss": 0.8234, "step": 8112 }, { "epoch": 0.726442442218367, "grad_norm": 0.9085982521571315, "learning_rate": 3.6751430206722506e-06, "loss": 0.8111, "step": 8113 }, { "epoch": 0.7265319827634451, "grad_norm": 0.9367893990429778, "learning_rate": 3.672896804235414e-06, "loss": 0.8501, "step": 8114 }, { "epoch": 0.7266215233085231, "grad_norm": 1.0104586939624385, "learning_rate": 3.6706511200138807e-06, "loss": 0.8373, "step": 8115 }, { "epoch": 0.7267110638536012, "grad_norm": 0.9166733516354795, "learning_rate": 3.66840596819655e-06, "loss": 0.7828, "step": 8116 }, { "epoch": 0.7268006043986793, "grad_norm": 0.8962816712159897, "learning_rate": 3.666161348972277e-06, "loss": 0.7993, "step": 8117 }, { "epoch": 0.7268901449437574, "grad_norm": 0.9996413295607297, "learning_rate": 3.6639172625298703e-06, "loss": 0.8286, "step": 8118 }, { "epoch": 0.7269796854888354, "grad_norm": 1.0523346870678778, "learning_rate": 3.661673709058099e-06, "loss": 0.7637, "step": 8119 }, { "epoch": 0.7270692260339134, "grad_norm": 1.0060814989737494, "learning_rate": 3.6594306887456744e-06, "loss": 0.8259, "step": 8120 }, { "epoch": 0.7271587665789916, "grad_norm": 1.021275517182984, "learning_rate": 3.657188201781282e-06, "loss": 0.8362, "step": 8121 }, { "epoch": 0.7272483071240696, "grad_norm": 0.986303850086801, "learning_rate": 3.654946248353548e-06, "loss": 0.8212, "step": 8122 }, { "epoch": 0.7273378476691477, "grad_norm": 1.083852868023282, "learning_rate": 3.6527048286510604e-06, "loss": 0.8105, "step": 8123 }, { "epoch": 0.7274273882142257, "grad_norm": 1.081898490144128, "learning_rate": 3.650463942862357e-06, "loss": 0.8135, "step": 8124 }, { "epoch": 0.7275169287593038, "grad_norm": 0.9548603089428934, "learning_rate": 3.648223591175939e-06, "loss": 0.8197, "step": 8125 }, { "epoch": 0.7276064693043819, "grad_norm": 1.0177733505314792, "learning_rate": 3.6459837737802484e-06, "loss": 0.8166, "step": 8126 }, { "epoch": 0.7276960098494599, "grad_norm": 0.891809750796513, "learning_rate": 3.643744490863699e-06, "loss": 0.8006, "step": 8127 }, { "epoch": 0.7277855503945381, "grad_norm": 1.0402400784847927, "learning_rate": 3.6415057426146504e-06, "loss": 0.8434, "step": 8128 }, { "epoch": 0.7278750909396161, "grad_norm": 1.0160968105500245, "learning_rate": 3.6392675292214185e-06, "loss": 0.8672, "step": 8129 }, { "epoch": 0.7279646314846941, "grad_norm": 1.0061379297165198, "learning_rate": 3.637029850872277e-06, "loss": 0.8495, "step": 8130 }, { "epoch": 0.7280541720297722, "grad_norm": 1.0300151491318532, "learning_rate": 3.634792707755447e-06, "loss": 0.7943, "step": 8131 }, { "epoch": 0.7281437125748503, "grad_norm": 1.0234193868385502, "learning_rate": 3.6325561000591082e-06, "loss": 0.7727, "step": 8132 }, { "epoch": 0.7282332531199284, "grad_norm": 1.0127265095035938, "learning_rate": 3.6303200279714033e-06, "loss": 0.8421, "step": 8133 }, { "epoch": 0.7283227936650064, "grad_norm": 0.875944777756982, "learning_rate": 3.6280844916804214e-06, "loss": 0.7335, "step": 8134 }, { "epoch": 0.7284123342100846, "grad_norm": 0.9626628362535917, "learning_rate": 3.6258494913742083e-06, "loss": 0.8109, "step": 8135 }, { "epoch": 0.7285018747551626, "grad_norm": 0.9409644227030993, "learning_rate": 3.6236150272407677e-06, "loss": 0.7647, "step": 8136 }, { "epoch": 0.7285914153002406, "grad_norm": 0.8959154973684125, "learning_rate": 3.6213810994680487e-06, "loss": 0.8267, "step": 8137 }, { "epoch": 0.7286809558453187, "grad_norm": 0.9467169304626755, "learning_rate": 3.619147708243965e-06, "loss": 0.8219, "step": 8138 }, { "epoch": 0.7287704963903968, "grad_norm": 1.0299178111567786, "learning_rate": 3.61691485375638e-06, "loss": 0.8373, "step": 8139 }, { "epoch": 0.7288600369354749, "grad_norm": 1.018567088123488, "learning_rate": 3.61468253619312e-06, "loss": 0.8778, "step": 8140 }, { "epoch": 0.7289495774805529, "grad_norm": 1.0796847496302113, "learning_rate": 3.612450755741962e-06, "loss": 0.8083, "step": 8141 }, { "epoch": 0.7290391180256309, "grad_norm": 0.8703934020805227, "learning_rate": 3.6102195125906257e-06, "loss": 0.8146, "step": 8142 }, { "epoch": 0.7291286585707091, "grad_norm": 1.0162685599689851, "learning_rate": 3.6079888069268034e-06, "loss": 0.8821, "step": 8143 }, { "epoch": 0.7292181991157871, "grad_norm": 0.9106763349275075, "learning_rate": 3.6057586389381326e-06, "loss": 0.7899, "step": 8144 }, { "epoch": 0.7293077396608652, "grad_norm": 0.9198185770642722, "learning_rate": 3.6035290088122043e-06, "loss": 0.8742, "step": 8145 }, { "epoch": 0.7293972802059433, "grad_norm": 0.9760022955830061, "learning_rate": 3.6012999167365746e-06, "loss": 0.8226, "step": 8146 }, { "epoch": 0.7294868207510213, "grad_norm": 0.9515393113527117, "learning_rate": 3.599071362898748e-06, "loss": 0.8136, "step": 8147 }, { "epoch": 0.7295763612960994, "grad_norm": 0.9762557578995822, "learning_rate": 3.5968433474861777e-06, "loss": 0.8362, "step": 8148 }, { "epoch": 0.7296659018411774, "grad_norm": 0.9915788824158718, "learning_rate": 3.5946158706862776e-06, "loss": 0.7976, "step": 8149 }, { "epoch": 0.7297554423862556, "grad_norm": 1.0579424716388717, "learning_rate": 3.592388932686417e-06, "loss": 0.768, "step": 8150 }, { "epoch": 0.7298449829313336, "grad_norm": 0.9470715447523234, "learning_rate": 3.5901625336739167e-06, "loss": 0.7374, "step": 8151 }, { "epoch": 0.7299345234764116, "grad_norm": 0.9238397973121099, "learning_rate": 3.587936673836062e-06, "loss": 0.8296, "step": 8152 }, { "epoch": 0.7300240640214898, "grad_norm": 0.9939585721757632, "learning_rate": 3.585711353360076e-06, "loss": 0.7991, "step": 8153 }, { "epoch": 0.7301136045665678, "grad_norm": 0.9569908555159269, "learning_rate": 3.583486572433149e-06, "loss": 0.8115, "step": 8154 }, { "epoch": 0.7302031451116459, "grad_norm": 0.8713856542147157, "learning_rate": 3.5812623312424223e-06, "loss": 0.8193, "step": 8155 }, { "epoch": 0.7302926856567239, "grad_norm": 0.8595706673677564, "learning_rate": 3.57903862997499e-06, "loss": 0.7501, "step": 8156 }, { "epoch": 0.730382226201802, "grad_norm": 0.9593305369643707, "learning_rate": 3.5768154688179056e-06, "loss": 0.8195, "step": 8157 }, { "epoch": 0.7304717667468801, "grad_norm": 0.9067085189796982, "learning_rate": 3.5745928479581726e-06, "loss": 0.8047, "step": 8158 }, { "epoch": 0.7305613072919581, "grad_norm": 0.9318275857191267, "learning_rate": 3.57237076758275e-06, "loss": 0.8187, "step": 8159 }, { "epoch": 0.7306508478370362, "grad_norm": 0.8903758729513356, "learning_rate": 3.5701492278785543e-06, "loss": 0.8344, "step": 8160 }, { "epoch": 0.7307403883821143, "grad_norm": 0.956221432404543, "learning_rate": 3.567928229032451e-06, "loss": 0.8488, "step": 8161 }, { "epoch": 0.7308299289271923, "grad_norm": 0.9263486732008133, "learning_rate": 3.565707771231265e-06, "loss": 0.7958, "step": 8162 }, { "epoch": 0.7309194694722704, "grad_norm": 0.9723045277236428, "learning_rate": 3.5634878546617746e-06, "loss": 0.7907, "step": 8163 }, { "epoch": 0.7310090100173485, "grad_norm": 0.9952755754990151, "learning_rate": 3.561268479510711e-06, "loss": 0.8065, "step": 8164 }, { "epoch": 0.7310985505624266, "grad_norm": 0.9237053099001223, "learning_rate": 3.5590496459647605e-06, "loss": 0.803, "step": 8165 }, { "epoch": 0.7311880911075046, "grad_norm": 0.9788055402294681, "learning_rate": 3.5568313542105648e-06, "loss": 0.8774, "step": 8166 }, { "epoch": 0.7312776316525826, "grad_norm": 0.9583853180914002, "learning_rate": 3.554613604434719e-06, "loss": 0.8117, "step": 8167 }, { "epoch": 0.7313671721976608, "grad_norm": 0.975711563757069, "learning_rate": 3.552396396823774e-06, "loss": 0.806, "step": 8168 }, { "epoch": 0.7314567127427388, "grad_norm": 1.0225837677503467, "learning_rate": 3.550179731564233e-06, "loss": 0.852, "step": 8169 }, { "epoch": 0.7315462532878169, "grad_norm": 0.9560903169115398, "learning_rate": 3.547963608842554e-06, "loss": 0.8125, "step": 8170 }, { "epoch": 0.731635793832895, "grad_norm": 0.9767869818696021, "learning_rate": 3.5457480288451516e-06, "loss": 0.8126, "step": 8171 }, { "epoch": 0.731725334377973, "grad_norm": 1.1207696260589222, "learning_rate": 3.5435329917583926e-06, "loss": 0.8212, "step": 8172 }, { "epoch": 0.7318148749230511, "grad_norm": 1.1384089660751477, "learning_rate": 3.541318497768599e-06, "loss": 0.7952, "step": 8173 }, { "epoch": 0.7319044154681291, "grad_norm": 0.9187041238646247, "learning_rate": 3.5391045470620454e-06, "loss": 0.7865, "step": 8174 }, { "epoch": 0.7319939560132073, "grad_norm": 0.9678929992019771, "learning_rate": 3.536891139824964e-06, "loss": 0.8374, "step": 8175 }, { "epoch": 0.7320834965582853, "grad_norm": 0.9958138999542374, "learning_rate": 3.5346782762435383e-06, "loss": 0.8149, "step": 8176 }, { "epoch": 0.7321730371033633, "grad_norm": 0.9293242013789866, "learning_rate": 3.5324659565039078e-06, "loss": 0.8544, "step": 8177 }, { "epoch": 0.7322625776484414, "grad_norm": 0.9155204742731905, "learning_rate": 3.5302541807921644e-06, "loss": 0.7882, "step": 8178 }, { "epoch": 0.7323521181935195, "grad_norm": 1.043025098766824, "learning_rate": 3.5280429492943602e-06, "loss": 0.8203, "step": 8179 }, { "epoch": 0.7324416587385976, "grad_norm": 0.8430319324190177, "learning_rate": 3.525832262196486e-06, "loss": 0.7749, "step": 8180 }, { "epoch": 0.7325311992836756, "grad_norm": 1.0473199493498606, "learning_rate": 3.523622119684509e-06, "loss": 0.7535, "step": 8181 }, { "epoch": 0.7326207398287538, "grad_norm": 1.0438194644799894, "learning_rate": 3.5214125219443328e-06, "loss": 0.8316, "step": 8182 }, { "epoch": 0.7327102803738318, "grad_norm": 0.9940286324236198, "learning_rate": 3.5192034691618247e-06, "loss": 0.7927, "step": 8183 }, { "epoch": 0.7327998209189098, "grad_norm": 1.0244578356254466, "learning_rate": 3.5169949615228016e-06, "loss": 0.7781, "step": 8184 }, { "epoch": 0.7328893614639879, "grad_norm": 0.9664108261905939, "learning_rate": 3.514786999213039e-06, "loss": 0.8569, "step": 8185 }, { "epoch": 0.732978902009066, "grad_norm": 0.903907094128431, "learning_rate": 3.512579582418254e-06, "loss": 0.7914, "step": 8186 }, { "epoch": 0.733068442554144, "grad_norm": 1.0154010232820232, "learning_rate": 3.510372711324138e-06, "loss": 0.8677, "step": 8187 }, { "epoch": 0.7331579830992221, "grad_norm": 1.1255327513168671, "learning_rate": 3.5081663861163217e-06, "loss": 0.8379, "step": 8188 }, { "epoch": 0.7332475236443002, "grad_norm": 0.958109867809121, "learning_rate": 3.5059606069803932e-06, "loss": 0.8045, "step": 8189 }, { "epoch": 0.7333370641893783, "grad_norm": 1.1612352560884336, "learning_rate": 3.5037553741019005e-06, "loss": 0.7996, "step": 8190 }, { "epoch": 0.7334266047344563, "grad_norm": 1.1031352294145034, "learning_rate": 3.501550687666333e-06, "loss": 0.7995, "step": 8191 }, { "epoch": 0.7335161452795343, "grad_norm": 0.9585411595654155, "learning_rate": 3.4993465478591447e-06, "loss": 0.7996, "step": 8192 }, { "epoch": 0.7336056858246125, "grad_norm": 0.9563541893254343, "learning_rate": 3.4971429548657377e-06, "loss": 0.8341, "step": 8193 }, { "epoch": 0.7336952263696905, "grad_norm": 0.8964672147630822, "learning_rate": 3.4949399088714776e-06, "loss": 0.8141, "step": 8194 }, { "epoch": 0.7337847669147686, "grad_norm": 1.2767614375930394, "learning_rate": 3.492737410061675e-06, "loss": 0.7792, "step": 8195 }, { "epoch": 0.7338743074598466, "grad_norm": 0.9594438537396436, "learning_rate": 3.490535458621599e-06, "loss": 0.8206, "step": 8196 }, { "epoch": 0.7339638480049248, "grad_norm": 1.0504216911660342, "learning_rate": 3.488334054736464e-06, "loss": 0.7863, "step": 8197 }, { "epoch": 0.7340533885500028, "grad_norm": 1.052226561582015, "learning_rate": 3.4861331985914504e-06, "loss": 0.7882, "step": 8198 }, { "epoch": 0.7341429290950808, "grad_norm": 1.0533131509887879, "learning_rate": 3.483932890371681e-06, "loss": 0.7675, "step": 8199 }, { "epoch": 0.734232469640159, "grad_norm": 0.9560068421951312, "learning_rate": 3.481733130262246e-06, "loss": 0.8335, "step": 8200 }, { "epoch": 0.734322010185237, "grad_norm": 0.8713721730947953, "learning_rate": 3.4795339184481824e-06, "loss": 0.812, "step": 8201 }, { "epoch": 0.734411550730315, "grad_norm": 1.0522418663993582, "learning_rate": 3.4773352551144746e-06, "loss": 0.8525, "step": 8202 }, { "epoch": 0.7345010912753931, "grad_norm": 1.4915087502357198, "learning_rate": 3.4751371404460688e-06, "loss": 0.8156, "step": 8203 }, { "epoch": 0.7345906318204712, "grad_norm": 1.0304608190093065, "learning_rate": 3.472939574627865e-06, "loss": 0.7916, "step": 8204 }, { "epoch": 0.7346801723655493, "grad_norm": 0.9986926282138018, "learning_rate": 3.4707425578447106e-06, "loss": 0.8014, "step": 8205 }, { "epoch": 0.7347697129106273, "grad_norm": 1.0481875015911823, "learning_rate": 3.4685460902814183e-06, "loss": 0.822, "step": 8206 }, { "epoch": 0.7348592534557055, "grad_norm": 0.880734983485274, "learning_rate": 3.4663501721227487e-06, "loss": 0.7807, "step": 8207 }, { "epoch": 0.7349487940007835, "grad_norm": 0.9243619858786883, "learning_rate": 3.464154803553408e-06, "loss": 0.8437, "step": 8208 }, { "epoch": 0.7350383345458615, "grad_norm": 0.9124630219284329, "learning_rate": 3.4619599847580675e-06, "loss": 0.8485, "step": 8209 }, { "epoch": 0.7351278750909396, "grad_norm": 1.084160882421561, "learning_rate": 3.459765715921346e-06, "loss": 0.7848, "step": 8210 }, { "epoch": 0.7352174156360177, "grad_norm": 0.870437785952094, "learning_rate": 3.4575719972278177e-06, "loss": 0.7736, "step": 8211 }, { "epoch": 0.7353069561810958, "grad_norm": 0.9367461683382013, "learning_rate": 3.4553788288620193e-06, "loss": 0.805, "step": 8212 }, { "epoch": 0.7353964967261738, "grad_norm": 1.0280746350918715, "learning_rate": 3.4531862110084236e-06, "loss": 0.8273, "step": 8213 }, { "epoch": 0.7354860372712518, "grad_norm": 0.9744816678769256, "learning_rate": 3.4509941438514707e-06, "loss": 0.7713, "step": 8214 }, { "epoch": 0.73557557781633, "grad_norm": 1.047145189395263, "learning_rate": 3.448802627575548e-06, "loss": 0.8391, "step": 8215 }, { "epoch": 0.735665118361408, "grad_norm": 1.2554424241102236, "learning_rate": 3.446611662364999e-06, "loss": 0.8733, "step": 8216 }, { "epoch": 0.7357546589064861, "grad_norm": 0.9275410024035535, "learning_rate": 3.4444212484041194e-06, "loss": 0.8506, "step": 8217 }, { "epoch": 0.7358441994515642, "grad_norm": 0.8703564766269303, "learning_rate": 3.4422313858771683e-06, "loss": 0.8021, "step": 8218 }, { "epoch": 0.7359337399966422, "grad_norm": 0.9375397539595276, "learning_rate": 3.4400420749683395e-06, "loss": 0.7777, "step": 8219 }, { "epoch": 0.7360232805417203, "grad_norm": 0.9163161668713934, "learning_rate": 3.4378533158617954e-06, "loss": 0.7603, "step": 8220 }, { "epoch": 0.7361128210867983, "grad_norm": 0.9211102763528913, "learning_rate": 3.4356651087416447e-06, "loss": 0.7876, "step": 8221 }, { "epoch": 0.7362023616318765, "grad_norm": 1.1034000847252037, "learning_rate": 3.4334774537919547e-06, "loss": 0.7386, "step": 8222 }, { "epoch": 0.7362919021769545, "grad_norm": 0.9352820283075752, "learning_rate": 3.4312903511967432e-06, "loss": 0.8079, "step": 8223 }, { "epoch": 0.7363814427220325, "grad_norm": 0.9345038174381147, "learning_rate": 3.429103801139981e-06, "loss": 0.7898, "step": 8224 }, { "epoch": 0.7364709832671107, "grad_norm": 0.9202191709313174, "learning_rate": 3.426917803805595e-06, "loss": 0.8209, "step": 8225 }, { "epoch": 0.7365605238121887, "grad_norm": 1.0134994917447437, "learning_rate": 3.424732359377464e-06, "loss": 0.8231, "step": 8226 }, { "epoch": 0.7366500643572668, "grad_norm": 0.9265484595296583, "learning_rate": 3.422547468039419e-06, "loss": 0.7356, "step": 8227 }, { "epoch": 0.7367396049023448, "grad_norm": 1.124737798633363, "learning_rate": 3.420363129975248e-06, "loss": 0.8426, "step": 8228 }, { "epoch": 0.736829145447423, "grad_norm": 1.108652783298048, "learning_rate": 3.4181793453686885e-06, "loss": 0.8504, "step": 8229 }, { "epoch": 0.736918685992501, "grad_norm": 0.9386557896952514, "learning_rate": 3.4159961144034347e-06, "loss": 0.8181, "step": 8230 }, { "epoch": 0.737008226537579, "grad_norm": 1.0556093076937823, "learning_rate": 3.4138134372631327e-06, "loss": 0.7706, "step": 8231 }, { "epoch": 0.7370977670826571, "grad_norm": 0.98189646961218, "learning_rate": 3.4116313141313815e-06, "loss": 0.8141, "step": 8232 }, { "epoch": 0.7371873076277352, "grad_norm": 1.791806398031152, "learning_rate": 3.409449745191735e-06, "loss": 0.7728, "step": 8233 }, { "epoch": 0.7372768481728132, "grad_norm": 0.9903009613172046, "learning_rate": 3.4072687306276995e-06, "loss": 0.8258, "step": 8234 }, { "epoch": 0.7373663887178913, "grad_norm": 1.0208752947815167, "learning_rate": 3.4050882706227338e-06, "loss": 0.8204, "step": 8235 }, { "epoch": 0.7374559292629694, "grad_norm": 1.1574628832313596, "learning_rate": 3.4029083653602535e-06, "loss": 0.8684, "step": 8236 }, { "epoch": 0.7375454698080475, "grad_norm": 0.9469080357704458, "learning_rate": 3.4007290150236214e-06, "loss": 0.834, "step": 8237 }, { "epoch": 0.7376350103531255, "grad_norm": 0.9880452783230189, "learning_rate": 3.3985502197961605e-06, "loss": 0.8487, "step": 8238 }, { "epoch": 0.7377245508982035, "grad_norm": 7.943878173367735, "learning_rate": 3.3963719798611474e-06, "loss": 0.8229, "step": 8239 }, { "epoch": 0.7378140914432817, "grad_norm": 0.9053647361343301, "learning_rate": 3.394194295401796e-06, "loss": 0.7973, "step": 8240 }, { "epoch": 0.7379036319883597, "grad_norm": 0.9493044923646228, "learning_rate": 3.3920171666012978e-06, "loss": 0.8365, "step": 8241 }, { "epoch": 0.7379931725334378, "grad_norm": 1.0500582612165192, "learning_rate": 3.3898405936427814e-06, "loss": 0.808, "step": 8242 }, { "epoch": 0.7380827130785159, "grad_norm": 1.0450332562406779, "learning_rate": 3.387664576709335e-06, "loss": 0.8018, "step": 8243 }, { "epoch": 0.738172253623594, "grad_norm": 0.9578536646991703, "learning_rate": 3.3854891159839965e-06, "loss": 0.838, "step": 8244 }, { "epoch": 0.738261794168672, "grad_norm": 1.041649060160065, "learning_rate": 3.383314211649761e-06, "loss": 0.8394, "step": 8245 }, { "epoch": 0.73835133471375, "grad_norm": 0.9661561659406418, "learning_rate": 3.3811398638895697e-06, "loss": 0.8153, "step": 8246 }, { "epoch": 0.7384408752588282, "grad_norm": 0.974278763520402, "learning_rate": 3.3789660728863204e-06, "loss": 0.8168, "step": 8247 }, { "epoch": 0.7385304158039062, "grad_norm": 0.9414369291356697, "learning_rate": 3.376792838822873e-06, "loss": 0.8015, "step": 8248 }, { "epoch": 0.7386199563489843, "grad_norm": 0.9573638806725929, "learning_rate": 3.3746201618820286e-06, "loss": 0.7854, "step": 8249 }, { "epoch": 0.7387094968940623, "grad_norm": 0.9534568312356602, "learning_rate": 3.37244804224655e-06, "loss": 0.7748, "step": 8250 }, { "epoch": 0.7387990374391404, "grad_norm": 0.9728086830258825, "learning_rate": 3.3702764800991405e-06, "loss": 0.8363, "step": 8251 }, { "epoch": 0.7388885779842185, "grad_norm": 0.9153508446705969, "learning_rate": 3.3681054756224697e-06, "loss": 0.7498, "step": 8252 }, { "epoch": 0.7389781185292965, "grad_norm": 0.9573004423223903, "learning_rate": 3.3659350289991523e-06, "loss": 0.8511, "step": 8253 }, { "epoch": 0.7390676590743747, "grad_norm": 1.01705265721414, "learning_rate": 3.363765140411763e-06, "loss": 0.7911, "step": 8254 }, { "epoch": 0.7391571996194527, "grad_norm": 0.9584731284549515, "learning_rate": 3.361595810042827e-06, "loss": 0.7737, "step": 8255 }, { "epoch": 0.7392467401645307, "grad_norm": 0.9995214922800939, "learning_rate": 3.3594270380748205e-06, "loss": 0.789, "step": 8256 }, { "epoch": 0.7393362807096088, "grad_norm": 0.8860714813982932, "learning_rate": 3.3572588246901694e-06, "loss": 0.807, "step": 8257 }, { "epoch": 0.7394258212546869, "grad_norm": 0.9595655434043556, "learning_rate": 3.3550911700712594e-06, "loss": 0.8274, "step": 8258 }, { "epoch": 0.739515361799765, "grad_norm": 0.9403281503000278, "learning_rate": 3.352924074400422e-06, "loss": 0.8342, "step": 8259 }, { "epoch": 0.739604902344843, "grad_norm": 1.04432666518782, "learning_rate": 3.3507575378599555e-06, "loss": 0.809, "step": 8260 }, { "epoch": 0.7396944428899211, "grad_norm": 0.9242927600914393, "learning_rate": 3.3485915606320986e-06, "loss": 0.8265, "step": 8261 }, { "epoch": 0.7397839834349992, "grad_norm": 1.0292663424614346, "learning_rate": 3.346426142899043e-06, "loss": 0.8165, "step": 8262 }, { "epoch": 0.7398735239800772, "grad_norm": 0.9259371979760177, "learning_rate": 3.3442612848429368e-06, "loss": 0.7998, "step": 8263 }, { "epoch": 0.7399630645251553, "grad_norm": 0.9071690403584297, "learning_rate": 3.342096986645883e-06, "loss": 0.8313, "step": 8264 }, { "epoch": 0.7400526050702334, "grad_norm": 0.9716342819850818, "learning_rate": 3.339933248489932e-06, "loss": 0.7908, "step": 8265 }, { "epoch": 0.7401421456153114, "grad_norm": 0.9131591642538947, "learning_rate": 3.337770070557095e-06, "loss": 0.7915, "step": 8266 }, { "epoch": 0.7402316861603895, "grad_norm": 0.9469414609468296, "learning_rate": 3.3356074530293325e-06, "loss": 0.8017, "step": 8267 }, { "epoch": 0.7403212267054675, "grad_norm": 0.9528157289533642, "learning_rate": 3.3334453960885514e-06, "loss": 0.8155, "step": 8268 }, { "epoch": 0.7404107672505457, "grad_norm": 1.0458907385901584, "learning_rate": 3.3312838999166187e-06, "loss": 0.8195, "step": 8269 }, { "epoch": 0.7405003077956237, "grad_norm": 1.2002683688510003, "learning_rate": 3.3291229646953525e-06, "loss": 0.7916, "step": 8270 }, { "epoch": 0.7405898483407017, "grad_norm": 1.0474982791372098, "learning_rate": 3.326962590606522e-06, "loss": 0.7822, "step": 8271 }, { "epoch": 0.7406793888857799, "grad_norm": 0.9641383347371756, "learning_rate": 3.3248027778318593e-06, "loss": 0.8016, "step": 8272 }, { "epoch": 0.7407689294308579, "grad_norm": 0.8769795023598519, "learning_rate": 3.322643526553031e-06, "loss": 0.7627, "step": 8273 }, { "epoch": 0.740858469975936, "grad_norm": 0.9256998559472221, "learning_rate": 3.3204848369516697e-06, "loss": 0.8313, "step": 8274 }, { "epoch": 0.740948010521014, "grad_norm": 0.9738964176071728, "learning_rate": 3.3183267092093563e-06, "loss": 0.8203, "step": 8275 }, { "epoch": 0.7410375510660921, "grad_norm": 1.013427918713693, "learning_rate": 3.316169143507628e-06, "loss": 0.8653, "step": 8276 }, { "epoch": 0.7411270916111702, "grad_norm": 0.9601294184935731, "learning_rate": 3.3140121400279702e-06, "loss": 0.7914, "step": 8277 }, { "epoch": 0.7412166321562482, "grad_norm": 0.9400360606224476, "learning_rate": 3.3118556989518237e-06, "loss": 0.8516, "step": 8278 }, { "epoch": 0.7413061727013264, "grad_norm": 0.9453311104685804, "learning_rate": 3.30969982046058e-06, "loss": 0.8288, "step": 8279 }, { "epoch": 0.7413957132464044, "grad_norm": 0.9454192638486407, "learning_rate": 3.307544504735587e-06, "loss": 0.7747, "step": 8280 }, { "epoch": 0.7414852537914824, "grad_norm": 1.0597763387997936, "learning_rate": 3.305389751958141e-06, "loss": 0.8186, "step": 8281 }, { "epoch": 0.7415747943365605, "grad_norm": 1.1142102209547495, "learning_rate": 3.3032355623094936e-06, "loss": 0.8569, "step": 8282 }, { "epoch": 0.7416643348816386, "grad_norm": 0.9599979307735721, "learning_rate": 3.301081935970848e-06, "loss": 0.8257, "step": 8283 }, { "epoch": 0.7417538754267167, "grad_norm": 1.3132447208396074, "learning_rate": 3.2989288731233592e-06, "loss": 0.8129, "step": 8284 }, { "epoch": 0.7418434159717947, "grad_norm": 0.9748835229342114, "learning_rate": 3.2967763739481383e-06, "loss": 0.8263, "step": 8285 }, { "epoch": 0.7419329565168727, "grad_norm": 0.9959029124218913, "learning_rate": 3.2946244386262438e-06, "loss": 0.7881, "step": 8286 }, { "epoch": 0.7420224970619509, "grad_norm": 0.9290108029565046, "learning_rate": 3.292473067338691e-06, "loss": 0.7884, "step": 8287 }, { "epoch": 0.7421120376070289, "grad_norm": 1.0188359216583187, "learning_rate": 3.2903222602664464e-06, "loss": 0.7809, "step": 8288 }, { "epoch": 0.742201578152107, "grad_norm": 0.9196518046064179, "learning_rate": 3.2881720175904274e-06, "loss": 0.825, "step": 8289 }, { "epoch": 0.7422911186971851, "grad_norm": 0.9527855218030232, "learning_rate": 3.286022339491508e-06, "loss": 0.7736, "step": 8290 }, { "epoch": 0.7423806592422632, "grad_norm": 0.877622294462354, "learning_rate": 3.283873226150509e-06, "loss": 0.8266, "step": 8291 }, { "epoch": 0.7424701997873412, "grad_norm": 0.9837888578373631, "learning_rate": 3.281724677748209e-06, "loss": 0.7498, "step": 8292 }, { "epoch": 0.7425597403324192, "grad_norm": 0.9672394340247271, "learning_rate": 3.279576694465336e-06, "loss": 0.8009, "step": 8293 }, { "epoch": 0.7426492808774974, "grad_norm": 0.9810656893013797, "learning_rate": 3.277429276482572e-06, "loss": 0.7649, "step": 8294 }, { "epoch": 0.7427388214225754, "grad_norm": 0.9511414330818468, "learning_rate": 3.2752824239805504e-06, "loss": 0.7725, "step": 8295 }, { "epoch": 0.7428283619676534, "grad_norm": 0.9908390841167228, "learning_rate": 3.273136137139857e-06, "loss": 0.8288, "step": 8296 }, { "epoch": 0.7429179025127316, "grad_norm": 0.8869941995233392, "learning_rate": 3.270990416141031e-06, "loss": 0.7828, "step": 8297 }, { "epoch": 0.7430074430578096, "grad_norm": 1.0694115423000443, "learning_rate": 3.268845261164564e-06, "loss": 0.8165, "step": 8298 }, { "epoch": 0.7430969836028877, "grad_norm": 0.9404369614063781, "learning_rate": 3.2667006723909014e-06, "loss": 0.7785, "step": 8299 }, { "epoch": 0.7431865241479657, "grad_norm": 1.1396583546098442, "learning_rate": 3.2645566500004334e-06, "loss": 0.787, "step": 8300 }, { "epoch": 0.7432760646930439, "grad_norm": 0.9255876397539137, "learning_rate": 3.262413194173507e-06, "loss": 0.7973, "step": 8301 }, { "epoch": 0.7433656052381219, "grad_norm": 1.0034392427249323, "learning_rate": 3.2602703050904315e-06, "loss": 0.8044, "step": 8302 }, { "epoch": 0.7434551457831999, "grad_norm": 0.9071280343504933, "learning_rate": 3.258127982931454e-06, "loss": 0.7853, "step": 8303 }, { "epoch": 0.743544686328278, "grad_norm": 0.9165223718636158, "learning_rate": 3.255986227876782e-06, "loss": 0.8288, "step": 8304 }, { "epoch": 0.7436342268733561, "grad_norm": 0.9089590849353633, "learning_rate": 3.2538450401065745e-06, "loss": 0.7967, "step": 8305 }, { "epoch": 0.7437237674184342, "grad_norm": 0.8895118995758451, "learning_rate": 3.251704419800935e-06, "loss": 0.8232, "step": 8306 }, { "epoch": 0.7438133079635122, "grad_norm": 1.3388120775999515, "learning_rate": 3.249564367139926e-06, "loss": 0.7757, "step": 8307 }, { "epoch": 0.7439028485085903, "grad_norm": 0.9431627672642698, "learning_rate": 3.247424882303568e-06, "loss": 0.7998, "step": 8308 }, { "epoch": 0.7439923890536684, "grad_norm": 0.9477567715952222, "learning_rate": 3.245285965471824e-06, "loss": 0.795, "step": 8309 }, { "epoch": 0.7440819295987464, "grad_norm": 1.0388825824578114, "learning_rate": 3.243147616824617e-06, "loss": 0.7755, "step": 8310 }, { "epoch": 0.7441714701438245, "grad_norm": 0.9623783182561384, "learning_rate": 3.2410098365418098e-06, "loss": 0.8691, "step": 8311 }, { "epoch": 0.7442610106889026, "grad_norm": 0.9858334468392603, "learning_rate": 3.2388726248032297e-06, "loss": 0.8367, "step": 8312 }, { "epoch": 0.7443505512339806, "grad_norm": 1.0151182194361599, "learning_rate": 3.236735981788649e-06, "loss": 0.8263, "step": 8313 }, { "epoch": 0.7444400917790587, "grad_norm": 0.9238849114024604, "learning_rate": 3.2345999076778e-06, "loss": 0.7988, "step": 8314 }, { "epoch": 0.7445296323241368, "grad_norm": 1.289704456511458, "learning_rate": 3.2324644026503614e-06, "loss": 0.8274, "step": 8315 }, { "epoch": 0.7446191728692149, "grad_norm": 0.9765533074547375, "learning_rate": 3.2303294668859674e-06, "loss": 0.8381, "step": 8316 }, { "epoch": 0.7447087134142929, "grad_norm": 0.952563596789054, "learning_rate": 3.2281951005641954e-06, "loss": 0.8342, "step": 8317 }, { "epoch": 0.7447982539593709, "grad_norm": 0.9178244778747817, "learning_rate": 3.2260613038645837e-06, "loss": 0.7686, "step": 8318 }, { "epoch": 0.7448877945044491, "grad_norm": 1.0643299747917072, "learning_rate": 3.223928076966617e-06, "loss": 0.8913, "step": 8319 }, { "epoch": 0.7449773350495271, "grad_norm": 0.9637961667295508, "learning_rate": 3.221795420049744e-06, "loss": 0.7909, "step": 8320 }, { "epoch": 0.7450668755946052, "grad_norm": 0.9344466150189342, "learning_rate": 3.2196633332933535e-06, "loss": 0.8107, "step": 8321 }, { "epoch": 0.7451564161396832, "grad_norm": 0.9367505553082477, "learning_rate": 3.2175318168767853e-06, "loss": 0.7866, "step": 8322 }, { "epoch": 0.7452459566847613, "grad_norm": 1.1725460898164488, "learning_rate": 3.2154008709793392e-06, "loss": 0.8304, "step": 8323 }, { "epoch": 0.7453354972298394, "grad_norm": 1.1318478775889105, "learning_rate": 3.213270495780264e-06, "loss": 0.7718, "step": 8324 }, { "epoch": 0.7454250377749174, "grad_norm": 0.9221397698631435, "learning_rate": 3.211140691458754e-06, "loss": 0.8466, "step": 8325 }, { "epoch": 0.7455145783199956, "grad_norm": 0.9973973311867955, "learning_rate": 3.20901145819397e-06, "loss": 0.8552, "step": 8326 }, { "epoch": 0.7456041188650736, "grad_norm": 1.0122414036798697, "learning_rate": 3.206882796165015e-06, "loss": 0.7386, "step": 8327 }, { "epoch": 0.7456936594101516, "grad_norm": 0.9556754250325548, "learning_rate": 3.20475470555094e-06, "loss": 0.783, "step": 8328 }, { "epoch": 0.7457831999552297, "grad_norm": 0.9425563684471793, "learning_rate": 3.2026271865307544e-06, "loss": 0.7687, "step": 8329 }, { "epoch": 0.7458727405003078, "grad_norm": 0.9309863280223316, "learning_rate": 3.2005002392834196e-06, "loss": 0.8104, "step": 8330 }, { "epoch": 0.7459622810453859, "grad_norm": 1.0441749089542567, "learning_rate": 3.1983738639878483e-06, "loss": 0.7847, "step": 8331 }, { "epoch": 0.7460518215904639, "grad_norm": 0.9056614491582808, "learning_rate": 3.1962480608229017e-06, "loss": 0.8024, "step": 8332 }, { "epoch": 0.746141362135542, "grad_norm": 0.9073466972676839, "learning_rate": 3.1941228299673965e-06, "loss": 0.802, "step": 8333 }, { "epoch": 0.7462309026806201, "grad_norm": 0.9784283026300207, "learning_rate": 3.1919981716001016e-06, "loss": 0.8014, "step": 8334 }, { "epoch": 0.7463204432256981, "grad_norm": 0.9926652964775603, "learning_rate": 3.1898740858997346e-06, "loss": 0.8004, "step": 8335 }, { "epoch": 0.7464099837707762, "grad_norm": 1.054792116854774, "learning_rate": 3.1877505730449677e-06, "loss": 0.8271, "step": 8336 }, { "epoch": 0.7464995243158543, "grad_norm": 0.8924070783798097, "learning_rate": 3.185627633214424e-06, "loss": 0.7897, "step": 8337 }, { "epoch": 0.7465890648609323, "grad_norm": 1.0089858966306902, "learning_rate": 3.1835052665866774e-06, "loss": 0.7608, "step": 8338 }, { "epoch": 0.7466786054060104, "grad_norm": 1.0251058748060222, "learning_rate": 3.181383473340254e-06, "loss": 0.7893, "step": 8339 }, { "epoch": 0.7467681459510884, "grad_norm": 0.9161314403257049, "learning_rate": 3.1792622536536333e-06, "loss": 0.817, "step": 8340 }, { "epoch": 0.7468576864961666, "grad_norm": 1.0804600589225808, "learning_rate": 3.1771416077052454e-06, "loss": 0.8287, "step": 8341 }, { "epoch": 0.7469472270412446, "grad_norm": 1.0656056986648266, "learning_rate": 3.1750215356734716e-06, "loss": 0.8455, "step": 8342 }, { "epoch": 0.7470367675863226, "grad_norm": 0.9757441302350184, "learning_rate": 3.172902037736646e-06, "loss": 0.7786, "step": 8343 }, { "epoch": 0.7471263081314008, "grad_norm": 1.143715192036921, "learning_rate": 3.1707831140730538e-06, "loss": 0.8079, "step": 8344 }, { "epoch": 0.7472158486764788, "grad_norm": 0.8733022446612078, "learning_rate": 3.16866476486093e-06, "loss": 0.7849, "step": 8345 }, { "epoch": 0.7473053892215569, "grad_norm": 1.0435459056011203, "learning_rate": 3.1665469902784664e-06, "loss": 0.8446, "step": 8346 }, { "epoch": 0.7473949297666349, "grad_norm": 0.9662162117237109, "learning_rate": 3.1644297905038012e-06, "loss": 0.8088, "step": 8347 }, { "epoch": 0.7474844703117131, "grad_norm": 0.977194786880148, "learning_rate": 3.1623131657150275e-06, "loss": 0.8111, "step": 8348 }, { "epoch": 0.7475740108567911, "grad_norm": 0.9072252917028317, "learning_rate": 3.160197116090188e-06, "loss": 0.7743, "step": 8349 }, { "epoch": 0.7476635514018691, "grad_norm": 0.9288365160415224, "learning_rate": 3.158081641807278e-06, "loss": 0.7983, "step": 8350 }, { "epoch": 0.7477530919469473, "grad_norm": 1.0802897151028696, "learning_rate": 3.155966743044244e-06, "loss": 0.8651, "step": 8351 }, { "epoch": 0.7478426324920253, "grad_norm": 1.1862089531882507, "learning_rate": 3.1538524199789853e-06, "loss": 0.8152, "step": 8352 }, { "epoch": 0.7479321730371034, "grad_norm": 1.3582774338340178, "learning_rate": 3.1517386727893516e-06, "loss": 0.8241, "step": 8353 }, { "epoch": 0.7480217135821814, "grad_norm": 0.8494561354719177, "learning_rate": 3.1496255016531483e-06, "loss": 0.7855, "step": 8354 }, { "epoch": 0.7481112541272595, "grad_norm": 0.9490660304926756, "learning_rate": 3.147512906748117e-06, "loss": 0.765, "step": 8355 }, { "epoch": 0.7482007946723376, "grad_norm": 1.1648714895839236, "learning_rate": 3.145400888251974e-06, "loss": 0.8246, "step": 8356 }, { "epoch": 0.7482903352174156, "grad_norm": 0.9618483499329786, "learning_rate": 3.1432894463423704e-06, "loss": 0.7783, "step": 8357 }, { "epoch": 0.7483798757624937, "grad_norm": 0.9957417654626594, "learning_rate": 3.141178581196914e-06, "loss": 0.8149, "step": 8358 }, { "epoch": 0.7484694163075718, "grad_norm": 0.9365264971830646, "learning_rate": 3.139068292993168e-06, "loss": 0.8468, "step": 8359 }, { "epoch": 0.7485589568526498, "grad_norm": 1.0186652536239151, "learning_rate": 3.1369585819086366e-06, "loss": 0.7706, "step": 8360 }, { "epoch": 0.7486484973977279, "grad_norm": 1.0573329532095415, "learning_rate": 3.1348494481207812e-06, "loss": 0.8164, "step": 8361 }, { "epoch": 0.748738037942806, "grad_norm": 0.8885511023560523, "learning_rate": 3.1327408918070224e-06, "loss": 0.7116, "step": 8362 }, { "epoch": 0.7488275784878841, "grad_norm": 0.9595784244249952, "learning_rate": 3.130632913144721e-06, "loss": 0.7563, "step": 8363 }, { "epoch": 0.7489171190329621, "grad_norm": 0.9106877528156478, "learning_rate": 3.128525512311195e-06, "loss": 0.7873, "step": 8364 }, { "epoch": 0.7490066595780401, "grad_norm": 0.9477113948004295, "learning_rate": 3.1264186894837123e-06, "loss": 0.8328, "step": 8365 }, { "epoch": 0.7490962001231183, "grad_norm": 0.908704787715687, "learning_rate": 3.124312444839488e-06, "loss": 0.744, "step": 8366 }, { "epoch": 0.7491857406681963, "grad_norm": 0.9710452999531055, "learning_rate": 3.122206778555691e-06, "loss": 0.7876, "step": 8367 }, { "epoch": 0.7492752812132744, "grad_norm": 1.255770075813098, "learning_rate": 3.1201016908094518e-06, "loss": 0.8078, "step": 8368 }, { "epoch": 0.7493648217583525, "grad_norm": 1.0848826269333336, "learning_rate": 3.1179971817778374e-06, "loss": 0.8222, "step": 8369 }, { "epoch": 0.7494543623034305, "grad_norm": 1.0097975940198562, "learning_rate": 3.115893251637877e-06, "loss": 0.8263, "step": 8370 }, { "epoch": 0.7495439028485086, "grad_norm": 0.9733497416007205, "learning_rate": 3.1137899005665405e-06, "loss": 0.8324, "step": 8371 }, { "epoch": 0.7496334433935866, "grad_norm": 0.9777212017128502, "learning_rate": 3.1116871287407567e-06, "loss": 0.7789, "step": 8372 }, { "epoch": 0.7497229839386648, "grad_norm": 0.9912906253252568, "learning_rate": 3.109584936337402e-06, "loss": 0.7699, "step": 8373 }, { "epoch": 0.7498125244837428, "grad_norm": 0.9842793058831869, "learning_rate": 3.1074833235333123e-06, "loss": 0.8075, "step": 8374 }, { "epoch": 0.7499020650288208, "grad_norm": 0.9809210354261523, "learning_rate": 3.1053822905052643e-06, "loss": 0.8368, "step": 8375 }, { "epoch": 0.7499916055738989, "grad_norm": 1.1409334755096951, "learning_rate": 3.1032818374299934e-06, "loss": 0.8064, "step": 8376 }, { "epoch": 0.750081146118977, "grad_norm": 1.3023911315722125, "learning_rate": 3.1011819644841766e-06, "loss": 0.8194, "step": 8377 }, { "epoch": 0.7501706866640551, "grad_norm": 1.0037651508068588, "learning_rate": 3.099082671844452e-06, "loss": 0.7731, "step": 8378 }, { "epoch": 0.7502602272091331, "grad_norm": 1.0820138743440917, "learning_rate": 3.096983959687403e-06, "loss": 0.8363, "step": 8379 }, { "epoch": 0.7503497677542112, "grad_norm": 0.9876822930021091, "learning_rate": 3.09488582818957e-06, "loss": 0.8233, "step": 8380 }, { "epoch": 0.7504393082992893, "grad_norm": 0.9042398358661778, "learning_rate": 3.092788277527443e-06, "loss": 0.7778, "step": 8381 }, { "epoch": 0.7505288488443673, "grad_norm": 1.0156283836289686, "learning_rate": 3.090691307877455e-06, "loss": 0.7972, "step": 8382 }, { "epoch": 0.7506183893894454, "grad_norm": 1.0536718460703651, "learning_rate": 3.088594919415998e-06, "loss": 0.7673, "step": 8383 }, { "epoch": 0.7507079299345235, "grad_norm": 1.2055677266840024, "learning_rate": 3.086499112319414e-06, "loss": 0.7942, "step": 8384 }, { "epoch": 0.7507974704796015, "grad_norm": 1.0473403562125527, "learning_rate": 3.084403886763997e-06, "loss": 0.755, "step": 8385 }, { "epoch": 0.7508870110246796, "grad_norm": 1.19806486344036, "learning_rate": 3.082309242925985e-06, "loss": 0.8151, "step": 8386 }, { "epoch": 0.7509765515697577, "grad_norm": 0.914038864349572, "learning_rate": 3.0802151809815826e-06, "loss": 0.8028, "step": 8387 }, { "epoch": 0.7510660921148358, "grad_norm": 0.9571492498524934, "learning_rate": 3.0781217011069274e-06, "loss": 0.8048, "step": 8388 }, { "epoch": 0.7511556326599138, "grad_norm": 0.9892350382828197, "learning_rate": 3.076028803478118e-06, "loss": 0.8104, "step": 8389 }, { "epoch": 0.7512451732049918, "grad_norm": 0.8525923809982836, "learning_rate": 3.073936488271203e-06, "loss": 0.7166, "step": 8390 }, { "epoch": 0.75133471375007, "grad_norm": 0.9949521001063397, "learning_rate": 3.07184475566218e-06, "loss": 0.7811, "step": 8391 }, { "epoch": 0.751424254295148, "grad_norm": 0.9475080141521904, "learning_rate": 3.0697536058269993e-06, "loss": 0.8069, "step": 8392 }, { "epoch": 0.7515137948402261, "grad_norm": 1.0350788817504553, "learning_rate": 3.0676630389415617e-06, "loss": 0.7953, "step": 8393 }, { "epoch": 0.7516033353853041, "grad_norm": 1.0370464847915886, "learning_rate": 3.0655730551817188e-06, "loss": 0.8295, "step": 8394 }, { "epoch": 0.7516928759303823, "grad_norm": 1.0298302371131696, "learning_rate": 3.063483654723274e-06, "loss": 0.7865, "step": 8395 }, { "epoch": 0.7517824164754603, "grad_norm": 0.901665792095309, "learning_rate": 3.0613948377419787e-06, "loss": 0.8001, "step": 8396 }, { "epoch": 0.7518719570205383, "grad_norm": 0.9113509578230589, "learning_rate": 3.05930660441354e-06, "loss": 0.8123, "step": 8397 }, { "epoch": 0.7519614975656165, "grad_norm": 1.0283593087312342, "learning_rate": 3.0572189549136124e-06, "loss": 0.7833, "step": 8398 }, { "epoch": 0.7520510381106945, "grad_norm": 1.0107979620028282, "learning_rate": 3.0551318894178004e-06, "loss": 0.8597, "step": 8399 }, { "epoch": 0.7521405786557726, "grad_norm": 0.9031408046901314, "learning_rate": 3.0530454081016637e-06, "loss": 0.8204, "step": 8400 }, { "epoch": 0.7522301192008506, "grad_norm": 0.9897790600934279, "learning_rate": 3.0509595111407086e-06, "loss": 0.7766, "step": 8401 }, { "epoch": 0.7523196597459287, "grad_norm": 0.9047802970964851, "learning_rate": 3.048874198710394e-06, "loss": 0.7555, "step": 8402 }, { "epoch": 0.7524092002910068, "grad_norm": 1.1169892624186948, "learning_rate": 3.0467894709861313e-06, "loss": 0.8214, "step": 8403 }, { "epoch": 0.7524987408360848, "grad_norm": 0.9671192783308427, "learning_rate": 3.044705328143279e-06, "loss": 0.8121, "step": 8404 }, { "epoch": 0.752588281381163, "grad_norm": 1.0940444370670752, "learning_rate": 3.0426217703571505e-06, "loss": 0.8324, "step": 8405 }, { "epoch": 0.752677821926241, "grad_norm": 1.000378236730116, "learning_rate": 3.0405387978030054e-06, "loss": 0.7435, "step": 8406 }, { "epoch": 0.752767362471319, "grad_norm": 0.9281160540672754, "learning_rate": 3.0384564106560586e-06, "loss": 0.7912, "step": 8407 }, { "epoch": 0.7528569030163971, "grad_norm": 0.9446364400380358, "learning_rate": 3.0363746090914723e-06, "loss": 0.8419, "step": 8408 }, { "epoch": 0.7529464435614752, "grad_norm": 0.995166448511503, "learning_rate": 3.034293393284362e-06, "loss": 0.83, "step": 8409 }, { "epoch": 0.7530359841065533, "grad_norm": 0.9448969415260732, "learning_rate": 3.032212763409792e-06, "loss": 0.7756, "step": 8410 }, { "epoch": 0.7531255246516313, "grad_norm": 1.0395443660310903, "learning_rate": 3.030132719642779e-06, "loss": 0.8314, "step": 8411 }, { "epoch": 0.7532150651967093, "grad_norm": 1.0004157133881215, "learning_rate": 3.028053262158288e-06, "loss": 0.8237, "step": 8412 }, { "epoch": 0.7533046057417875, "grad_norm": 0.9262750826013264, "learning_rate": 3.025974391131238e-06, "loss": 0.8387, "step": 8413 }, { "epoch": 0.7533941462868655, "grad_norm": 0.9543870966400738, "learning_rate": 3.023896106736498e-06, "loss": 0.8457, "step": 8414 }, { "epoch": 0.7534836868319436, "grad_norm": 0.998653815959704, "learning_rate": 3.021818409148879e-06, "loss": 0.8097, "step": 8415 }, { "epoch": 0.7535732273770217, "grad_norm": 0.9807664080726413, "learning_rate": 3.0197412985431584e-06, "loss": 0.7657, "step": 8416 }, { "epoch": 0.7536627679220997, "grad_norm": 0.9939136806932819, "learning_rate": 3.0176647750940526e-06, "loss": 0.8155, "step": 8417 }, { "epoch": 0.7537523084671778, "grad_norm": 1.0442352207198478, "learning_rate": 3.0155888389762334e-06, "loss": 0.7517, "step": 8418 }, { "epoch": 0.7538418490122558, "grad_norm": 0.9184536765693112, "learning_rate": 3.0135134903643204e-06, "loss": 0.7366, "step": 8419 }, { "epoch": 0.753931389557334, "grad_norm": 1.2751573368812699, "learning_rate": 3.0114387294328896e-06, "loss": 0.755, "step": 8420 }, { "epoch": 0.754020930102412, "grad_norm": 0.922604355428223, "learning_rate": 3.0093645563564523e-06, "loss": 0.8097, "step": 8421 }, { "epoch": 0.75411047064749, "grad_norm": 0.9469872345746867, "learning_rate": 3.007290971309491e-06, "loss": 0.795, "step": 8422 }, { "epoch": 0.7542000111925682, "grad_norm": 0.9309059319052506, "learning_rate": 3.0052179744664265e-06, "loss": 0.8012, "step": 8423 }, { "epoch": 0.7542895517376462, "grad_norm": 0.9867986761268118, "learning_rate": 3.003145566001632e-06, "loss": 0.7759, "step": 8424 }, { "epoch": 0.7543790922827243, "grad_norm": 1.1042394309888715, "learning_rate": 3.0010737460894346e-06, "loss": 0.8355, "step": 8425 }, { "epoch": 0.7544686328278023, "grad_norm": 0.949768977194555, "learning_rate": 2.9990025149041035e-06, "loss": 0.783, "step": 8426 }, { "epoch": 0.7545581733728804, "grad_norm": 1.0032264365816737, "learning_rate": 2.996931872619864e-06, "loss": 0.7943, "step": 8427 }, { "epoch": 0.7546477139179585, "grad_norm": 1.0038270446779733, "learning_rate": 2.994861819410897e-06, "loss": 0.8153, "step": 8428 }, { "epoch": 0.7547372544630365, "grad_norm": 0.8706594779828497, "learning_rate": 2.992792355451326e-06, "loss": 0.7565, "step": 8429 }, { "epoch": 0.7548267950081146, "grad_norm": 0.9378330973836738, "learning_rate": 2.9907234809152306e-06, "loss": 0.8377, "step": 8430 }, { "epoch": 0.7549163355531927, "grad_norm": 0.9910203596675979, "learning_rate": 2.988655195976632e-06, "loss": 0.8116, "step": 8431 }, { "epoch": 0.7550058760982707, "grad_norm": 0.9453265425499182, "learning_rate": 2.9865875008095114e-06, "loss": 0.7542, "step": 8432 }, { "epoch": 0.7550954166433488, "grad_norm": 1.0254615842493653, "learning_rate": 2.9845203955877913e-06, "loss": 0.8086, "step": 8433 }, { "epoch": 0.7551849571884269, "grad_norm": 0.9252447191795289, "learning_rate": 2.9824538804853577e-06, "loss": 0.8318, "step": 8434 }, { "epoch": 0.755274497733505, "grad_norm": 0.8951620879551295, "learning_rate": 2.980387955676035e-06, "loss": 0.8203, "step": 8435 }, { "epoch": 0.755364038278583, "grad_norm": 1.0639385900118499, "learning_rate": 2.9783226213336058e-06, "loss": 0.836, "step": 8436 }, { "epoch": 0.755453578823661, "grad_norm": 0.9748128156033251, "learning_rate": 2.976257877631793e-06, "loss": 0.8872, "step": 8437 }, { "epoch": 0.7555431193687392, "grad_norm": 1.0606488709344801, "learning_rate": 2.9741937247442797e-06, "loss": 0.8086, "step": 8438 }, { "epoch": 0.7556326599138172, "grad_norm": 0.9444580818823709, "learning_rate": 2.9721301628446954e-06, "loss": 0.8125, "step": 8439 }, { "epoch": 0.7557222004588953, "grad_norm": 1.0933936311538284, "learning_rate": 2.970067192106617e-06, "loss": 0.8006, "step": 8440 }, { "epoch": 0.7558117410039734, "grad_norm": 1.0285499377574334, "learning_rate": 2.968004812703583e-06, "loss": 0.7836, "step": 8441 }, { "epoch": 0.7559012815490515, "grad_norm": 1.0843926935492594, "learning_rate": 2.965943024809067e-06, "loss": 0.8278, "step": 8442 }, { "epoch": 0.7559908220941295, "grad_norm": 0.9450764761526654, "learning_rate": 2.963881828596502e-06, "loss": 0.759, "step": 8443 }, { "epoch": 0.7560803626392075, "grad_norm": 0.9966843234082728, "learning_rate": 2.9618212242392687e-06, "loss": 0.8432, "step": 8444 }, { "epoch": 0.7561699031842857, "grad_norm": 0.9892037217108254, "learning_rate": 2.9597612119106977e-06, "loss": 0.8362, "step": 8445 }, { "epoch": 0.7562594437293637, "grad_norm": 1.0925176311561264, "learning_rate": 2.957701791784069e-06, "loss": 0.7332, "step": 8446 }, { "epoch": 0.7563489842744417, "grad_norm": 1.0633876066346057, "learning_rate": 2.9556429640326236e-06, "loss": 0.7839, "step": 8447 }, { "epoch": 0.7564385248195198, "grad_norm": 0.9854212257470317, "learning_rate": 2.953584728829533e-06, "loss": 0.7987, "step": 8448 }, { "epoch": 0.7565280653645979, "grad_norm": 0.9155916635040601, "learning_rate": 2.951527086347933e-06, "loss": 0.7909, "step": 8449 }, { "epoch": 0.756617605909676, "grad_norm": 0.8965571066251745, "learning_rate": 2.9494700367609054e-06, "loss": 0.8122, "step": 8450 }, { "epoch": 0.756707146454754, "grad_norm": 0.9888177551818268, "learning_rate": 2.947413580241483e-06, "loss": 0.7871, "step": 8451 }, { "epoch": 0.7567966869998322, "grad_norm": 0.9124739663774152, "learning_rate": 2.9453577169626467e-06, "loss": 0.8412, "step": 8452 }, { "epoch": 0.7568862275449102, "grad_norm": 0.9745502663343589, "learning_rate": 2.9433024470973316e-06, "loss": 0.7841, "step": 8453 }, { "epoch": 0.7569757680899882, "grad_norm": 1.061356773543484, "learning_rate": 2.9412477708184182e-06, "loss": 0.864, "step": 8454 }, { "epoch": 0.7570653086350663, "grad_norm": 1.0667769891271757, "learning_rate": 2.9391936882987415e-06, "loss": 0.8043, "step": 8455 }, { "epoch": 0.7571548491801444, "grad_norm": 1.075565501366885, "learning_rate": 2.9371401997110817e-06, "loss": 0.7653, "step": 8456 }, { "epoch": 0.7572443897252225, "grad_norm": 1.0313659440218141, "learning_rate": 2.9350873052281713e-06, "loss": 0.8536, "step": 8457 }, { "epoch": 0.7573339302703005, "grad_norm": 1.007847630917006, "learning_rate": 2.933035005022696e-06, "loss": 0.8135, "step": 8458 }, { "epoch": 0.7574234708153786, "grad_norm": 0.9265063600269383, "learning_rate": 2.930983299267286e-06, "loss": 0.8207, "step": 8459 }, { "epoch": 0.7575130113604567, "grad_norm": 0.9868171272970366, "learning_rate": 2.9289321881345257e-06, "loss": 0.7892, "step": 8460 }, { "epoch": 0.7576025519055347, "grad_norm": 0.9485422584043367, "learning_rate": 2.9268816717969475e-06, "loss": 0.8266, "step": 8461 }, { "epoch": 0.7576920924506128, "grad_norm": 1.0525898326478405, "learning_rate": 2.924831750427033e-06, "loss": 0.8465, "step": 8462 }, { "epoch": 0.7577816329956909, "grad_norm": 0.9486414196052384, "learning_rate": 2.922782424197216e-06, "loss": 0.8417, "step": 8463 }, { "epoch": 0.7578711735407689, "grad_norm": 0.8798180519380967, "learning_rate": 2.920733693279879e-06, "loss": 0.8156, "step": 8464 }, { "epoch": 0.757960714085847, "grad_norm": 1.056914318111195, "learning_rate": 2.9186855578473537e-06, "loss": 0.7809, "step": 8465 }, { "epoch": 0.758050254630925, "grad_norm": 0.8849132676657065, "learning_rate": 2.9166380180719243e-06, "loss": 0.7481, "step": 8466 }, { "epoch": 0.7581397951760032, "grad_norm": 1.0730683733670445, "learning_rate": 2.914591074125822e-06, "loss": 0.82, "step": 8467 }, { "epoch": 0.7582293357210812, "grad_norm": 0.9620797464507241, "learning_rate": 2.9125447261812302e-06, "loss": 0.8565, "step": 8468 }, { "epoch": 0.7583188762661592, "grad_norm": 0.9857609816079624, "learning_rate": 2.9104989744102784e-06, "loss": 0.8337, "step": 8469 }, { "epoch": 0.7584084168112374, "grad_norm": 1.0004489367076872, "learning_rate": 2.9084538189850508e-06, "loss": 0.8004, "step": 8470 }, { "epoch": 0.7584979573563154, "grad_norm": 1.0499643798689031, "learning_rate": 2.9064092600775797e-06, "loss": 0.836, "step": 8471 }, { "epoch": 0.7585874979013935, "grad_norm": 0.8951616811749844, "learning_rate": 2.9043652978598446e-06, "loss": 0.7894, "step": 8472 }, { "epoch": 0.7586770384464715, "grad_norm": 1.1106370355419521, "learning_rate": 2.902321932503779e-06, "loss": 0.7958, "step": 8473 }, { "epoch": 0.7587665789915496, "grad_norm": 1.0214101881203135, "learning_rate": 2.9002791641812657e-06, "loss": 0.7891, "step": 8474 }, { "epoch": 0.7588561195366277, "grad_norm": 0.9800844037363966, "learning_rate": 2.8982369930641275e-06, "loss": 0.8227, "step": 8475 }, { "epoch": 0.7589456600817057, "grad_norm": 0.8756657185287371, "learning_rate": 2.896195419324154e-06, "loss": 0.8011, "step": 8476 }, { "epoch": 0.7590352006267839, "grad_norm": 0.9430827458942261, "learning_rate": 2.8941544431330716e-06, "loss": 0.8252, "step": 8477 }, { "epoch": 0.7591247411718619, "grad_norm": 1.0000061697920135, "learning_rate": 2.8921140646625623e-06, "loss": 0.7649, "step": 8478 }, { "epoch": 0.7592142817169399, "grad_norm": 1.0134399891363568, "learning_rate": 2.8900742840842556e-06, "loss": 0.7902, "step": 8479 }, { "epoch": 0.759303822262018, "grad_norm": 0.9825489454156966, "learning_rate": 2.8880351015697337e-06, "loss": 0.7316, "step": 8480 }, { "epoch": 0.7593933628070961, "grad_norm": 0.9981363148845781, "learning_rate": 2.885996517290517e-06, "loss": 0.8486, "step": 8481 }, { "epoch": 0.7594829033521742, "grad_norm": 1.0997368388530167, "learning_rate": 2.8839585314180953e-06, "loss": 0.8207, "step": 8482 }, { "epoch": 0.7595724438972522, "grad_norm": 0.9021681120950972, "learning_rate": 2.8819211441238916e-06, "loss": 0.7708, "step": 8483 }, { "epoch": 0.7596619844423302, "grad_norm": 0.976818004156447, "learning_rate": 2.8798843555792857e-06, "loss": 0.8126, "step": 8484 }, { "epoch": 0.7597515249874084, "grad_norm": 1.0398684251879042, "learning_rate": 2.877848165955608e-06, "loss": 0.7914, "step": 8485 }, { "epoch": 0.7598410655324864, "grad_norm": 0.987795395646323, "learning_rate": 2.8758125754241306e-06, "loss": 0.801, "step": 8486 }, { "epoch": 0.7599306060775645, "grad_norm": 0.9012149443854951, "learning_rate": 2.8737775841560844e-06, "loss": 0.841, "step": 8487 }, { "epoch": 0.7600201466226426, "grad_norm": 0.9493776186737061, "learning_rate": 2.8717431923226424e-06, "loss": 0.7544, "step": 8488 }, { "epoch": 0.7601096871677206, "grad_norm": 1.0592989727412336, "learning_rate": 2.8697094000949376e-06, "loss": 0.8129, "step": 8489 }, { "epoch": 0.7601992277127987, "grad_norm": 1.0430297480155186, "learning_rate": 2.8676762076440414e-06, "loss": 0.8331, "step": 8490 }, { "epoch": 0.7602887682578767, "grad_norm": 0.9673249354304414, "learning_rate": 2.865643615140985e-06, "loss": 0.8501, "step": 8491 }, { "epoch": 0.7603783088029549, "grad_norm": 0.9106925941006304, "learning_rate": 2.8636116227567346e-06, "loss": 0.8095, "step": 8492 }, { "epoch": 0.7604678493480329, "grad_norm": 0.9417541724704757, "learning_rate": 2.8615802306622196e-06, "loss": 0.7601, "step": 8493 }, { "epoch": 0.760557389893111, "grad_norm": 1.0114451636520094, "learning_rate": 2.8595494390283106e-06, "loss": 0.8226, "step": 8494 }, { "epoch": 0.7606469304381891, "grad_norm": 1.0133891626469809, "learning_rate": 2.857519248025837e-06, "loss": 0.7733, "step": 8495 }, { "epoch": 0.7607364709832671, "grad_norm": 1.064258340458245, "learning_rate": 2.855489657825573e-06, "loss": 0.8499, "step": 8496 }, { "epoch": 0.7608260115283452, "grad_norm": 1.1668892552714307, "learning_rate": 2.8534606685982326e-06, "loss": 0.7892, "step": 8497 }, { "epoch": 0.7609155520734232, "grad_norm": 0.9722154078220108, "learning_rate": 2.8514322805144934e-06, "loss": 0.8226, "step": 8498 }, { "epoch": 0.7610050926185014, "grad_norm": 1.086653490234758, "learning_rate": 2.849404493744975e-06, "loss": 0.8292, "step": 8499 }, { "epoch": 0.7610946331635794, "grad_norm": 1.055138191046296, "learning_rate": 2.8473773084602463e-06, "loss": 0.7375, "step": 8500 }, { "epoch": 0.7611841737086574, "grad_norm": 0.88952226372476, "learning_rate": 2.8453507248308367e-06, "loss": 0.769, "step": 8501 }, { "epoch": 0.7612737142537355, "grad_norm": 0.9332149827773796, "learning_rate": 2.843324743027206e-06, "loss": 0.7784, "step": 8502 }, { "epoch": 0.7613632547988136, "grad_norm": 0.9916400972608397, "learning_rate": 2.841299363219776e-06, "loss": 0.8018, "step": 8503 }, { "epoch": 0.7614527953438917, "grad_norm": 0.9695069068441983, "learning_rate": 2.839274585578915e-06, "loss": 0.8025, "step": 8504 }, { "epoch": 0.7615423358889697, "grad_norm": 1.0313757240343981, "learning_rate": 2.837250410274942e-06, "loss": 0.8276, "step": 8505 }, { "epoch": 0.7616318764340478, "grad_norm": 1.2706135518112773, "learning_rate": 2.83522683747812e-06, "loss": 0.8045, "step": 8506 }, { "epoch": 0.7617214169791259, "grad_norm": 1.0372167997640347, "learning_rate": 2.833203867358675e-06, "loss": 0.8665, "step": 8507 }, { "epoch": 0.7618109575242039, "grad_norm": 0.9373774118567355, "learning_rate": 2.8311815000867628e-06, "loss": 0.8054, "step": 8508 }, { "epoch": 0.761900498069282, "grad_norm": 0.9770215701025963, "learning_rate": 2.8291597358325005e-06, "loss": 0.7938, "step": 8509 }, { "epoch": 0.7619900386143601, "grad_norm": 1.0808950831152446, "learning_rate": 2.8271385747659553e-06, "loss": 0.8106, "step": 8510 }, { "epoch": 0.7620795791594381, "grad_norm": 0.957244939947962, "learning_rate": 2.8251180170571378e-06, "loss": 0.7921, "step": 8511 }, { "epoch": 0.7621691197045162, "grad_norm": 1.0522251369668427, "learning_rate": 2.823098062876013e-06, "loss": 0.761, "step": 8512 }, { "epoch": 0.7622586602495943, "grad_norm": 1.030016738537877, "learning_rate": 2.821078712392491e-06, "loss": 0.8361, "step": 8513 }, { "epoch": 0.7623482007946724, "grad_norm": 1.0304283408745756, "learning_rate": 2.8190599657764338e-06, "loss": 0.8324, "step": 8514 }, { "epoch": 0.7624377413397504, "grad_norm": 1.126219644220213, "learning_rate": 2.8170418231976527e-06, "loss": 0.8628, "step": 8515 }, { "epoch": 0.7625272818848284, "grad_norm": 0.9299680228717754, "learning_rate": 2.8150242848259056e-06, "loss": 0.7911, "step": 8516 }, { "epoch": 0.7626168224299066, "grad_norm": 0.9169723825136841, "learning_rate": 2.813007350830902e-06, "loss": 0.8484, "step": 8517 }, { "epoch": 0.7627063629749846, "grad_norm": 1.092752544019851, "learning_rate": 2.810991021382299e-06, "loss": 0.8313, "step": 8518 }, { "epoch": 0.7627959035200627, "grad_norm": 0.8974944729752725, "learning_rate": 2.808975296649705e-06, "loss": 0.8121, "step": 8519 }, { "epoch": 0.7628854440651407, "grad_norm": 0.9710999246449759, "learning_rate": 2.8069601768026767e-06, "loss": 0.7903, "step": 8520 }, { "epoch": 0.7629749846102188, "grad_norm": 0.9326181821066316, "learning_rate": 2.804945662010716e-06, "loss": 0.768, "step": 8521 }, { "epoch": 0.7630645251552969, "grad_norm": 0.9739257624114248, "learning_rate": 2.802931752443282e-06, "loss": 0.8051, "step": 8522 }, { "epoch": 0.7631540657003749, "grad_norm": 1.024883664860179, "learning_rate": 2.800918448269775e-06, "loss": 0.8046, "step": 8523 }, { "epoch": 0.7632436062454531, "grad_norm": 0.9336058310182836, "learning_rate": 2.7989057496595486e-06, "loss": 0.8338, "step": 8524 }, { "epoch": 0.7633331467905311, "grad_norm": 0.9927860225759481, "learning_rate": 2.796893656781904e-06, "loss": 0.8195, "step": 8525 }, { "epoch": 0.7634226873356091, "grad_norm": 0.9551141362776654, "learning_rate": 2.7948821698060913e-06, "loss": 0.8288, "step": 8526 }, { "epoch": 0.7635122278806872, "grad_norm": 0.9972085425335548, "learning_rate": 2.792871288901312e-06, "loss": 0.8183, "step": 8527 }, { "epoch": 0.7636017684257653, "grad_norm": 1.1372744300789777, "learning_rate": 2.7908610142367144e-06, "loss": 0.8439, "step": 8528 }, { "epoch": 0.7636913089708434, "grad_norm": 0.8931759740171602, "learning_rate": 2.7888513459813958e-06, "loss": 0.8028, "step": 8529 }, { "epoch": 0.7637808495159214, "grad_norm": 0.9798708303713464, "learning_rate": 2.786842284304402e-06, "loss": 0.8384, "step": 8530 }, { "epoch": 0.7638703900609995, "grad_norm": 0.9291650963671254, "learning_rate": 2.78483382937473e-06, "loss": 0.7851, "step": 8531 }, { "epoch": 0.7639599306060776, "grad_norm": 0.8988447956775906, "learning_rate": 2.7828259813613256e-06, "loss": 0.78, "step": 8532 }, { "epoch": 0.7640494711511556, "grad_norm": 1.038150509737125, "learning_rate": 2.78081874043308e-06, "loss": 0.8476, "step": 8533 }, { "epoch": 0.7641390116962337, "grad_norm": 1.012935214353084, "learning_rate": 2.778812106758839e-06, "loss": 0.8279, "step": 8534 }, { "epoch": 0.7642285522413118, "grad_norm": 0.9427401280508876, "learning_rate": 2.776806080507387e-06, "loss": 0.7675, "step": 8535 }, { "epoch": 0.7643180927863898, "grad_norm": 1.2935688983187228, "learning_rate": 2.774800661847472e-06, "loss": 0.8107, "step": 8536 }, { "epoch": 0.7644076333314679, "grad_norm": 0.9651724826167283, "learning_rate": 2.7727958509477802e-06, "loss": 0.8223, "step": 8537 }, { "epoch": 0.7644971738765459, "grad_norm": 1.0190535489483015, "learning_rate": 2.7707916479769505e-06, "loss": 0.8645, "step": 8538 }, { "epoch": 0.7645867144216241, "grad_norm": 1.2364880327157546, "learning_rate": 2.7687880531035695e-06, "loss": 0.8928, "step": 8539 }, { "epoch": 0.7646762549667021, "grad_norm": 0.9650447301309327, "learning_rate": 2.766785066496176e-06, "loss": 0.8353, "step": 8540 }, { "epoch": 0.7647657955117801, "grad_norm": 1.1032427887851581, "learning_rate": 2.7647826883232486e-06, "loss": 0.8319, "step": 8541 }, { "epoch": 0.7648553360568583, "grad_norm": 0.9794727183701555, "learning_rate": 2.7627809187532207e-06, "loss": 0.8145, "step": 8542 }, { "epoch": 0.7649448766019363, "grad_norm": 1.0282006022761396, "learning_rate": 2.7607797579544817e-06, "loss": 0.8378, "step": 8543 }, { "epoch": 0.7650344171470144, "grad_norm": 1.0281428883831272, "learning_rate": 2.758779206095359e-06, "loss": 0.7916, "step": 8544 }, { "epoch": 0.7651239576920924, "grad_norm": 0.9233451945498561, "learning_rate": 2.756779263344135e-06, "loss": 0.8492, "step": 8545 }, { "epoch": 0.7652134982371706, "grad_norm": 0.9468718500892725, "learning_rate": 2.7547799298690327e-06, "loss": 0.8198, "step": 8546 }, { "epoch": 0.7653030387822486, "grad_norm": 1.0053103798748328, "learning_rate": 2.752781205838233e-06, "loss": 0.8293, "step": 8547 }, { "epoch": 0.7653925793273266, "grad_norm": 1.0536730418913196, "learning_rate": 2.7507830914198587e-06, "loss": 0.8053, "step": 8548 }, { "epoch": 0.7654821198724048, "grad_norm": 0.9979288096554082, "learning_rate": 2.74878558678199e-06, "loss": 0.8034, "step": 8549 }, { "epoch": 0.7655716604174828, "grad_norm": 0.9112349997538397, "learning_rate": 2.74678869209265e-06, "loss": 0.7328, "step": 8550 }, { "epoch": 0.7656612009625609, "grad_norm": 0.9286807378027299, "learning_rate": 2.7447924075198106e-06, "loss": 0.8197, "step": 8551 }, { "epoch": 0.7657507415076389, "grad_norm": 1.0547527058165194, "learning_rate": 2.7427967332313888e-06, "loss": 0.84, "step": 8552 }, { "epoch": 0.765840282052717, "grad_norm": 1.0300520442858276, "learning_rate": 2.7408016693952566e-06, "loss": 0.8293, "step": 8553 }, { "epoch": 0.7659298225977951, "grad_norm": 1.0297481236913109, "learning_rate": 2.73880721617923e-06, "loss": 0.8173, "step": 8554 }, { "epoch": 0.7660193631428731, "grad_norm": 1.0369250982490426, "learning_rate": 2.7368133737510805e-06, "loss": 0.7719, "step": 8555 }, { "epoch": 0.7661089036879511, "grad_norm": 0.9969617654196644, "learning_rate": 2.734820142278525e-06, "loss": 0.7703, "step": 8556 }, { "epoch": 0.7661984442330293, "grad_norm": 0.969076635777658, "learning_rate": 2.73282752192922e-06, "loss": 0.8118, "step": 8557 }, { "epoch": 0.7662879847781073, "grad_norm": 0.9400732764710739, "learning_rate": 2.730835512870783e-06, "loss": 0.7868, "step": 8558 }, { "epoch": 0.7663775253231854, "grad_norm": 0.9448544286211332, "learning_rate": 2.7288441152707745e-06, "loss": 0.8105, "step": 8559 }, { "epoch": 0.7664670658682635, "grad_norm": 0.9504827667935589, "learning_rate": 2.7268533292967026e-06, "loss": 0.7651, "step": 8560 }, { "epoch": 0.7665566064133416, "grad_norm": 0.9786735043913761, "learning_rate": 2.724863155116029e-06, "loss": 0.8043, "step": 8561 }, { "epoch": 0.7666461469584196, "grad_norm": 0.9500121806982903, "learning_rate": 2.7228735928961635e-06, "loss": 0.8419, "step": 8562 }, { "epoch": 0.7667356875034976, "grad_norm": 0.9330501277953063, "learning_rate": 2.7208846428044535e-06, "loss": 0.8081, "step": 8563 }, { "epoch": 0.7668252280485758, "grad_norm": 1.0052201909769134, "learning_rate": 2.7188963050082073e-06, "loss": 0.8322, "step": 8564 }, { "epoch": 0.7669147685936538, "grad_norm": 0.8694345321107568, "learning_rate": 2.7169085796746762e-06, "loss": 0.7741, "step": 8565 }, { "epoch": 0.7670043091387319, "grad_norm": 1.4664512120990643, "learning_rate": 2.71492146697106e-06, "loss": 0.8614, "step": 8566 }, { "epoch": 0.76709384968381, "grad_norm": 0.986184825106137, "learning_rate": 2.7129349670645157e-06, "loss": 0.8379, "step": 8567 }, { "epoch": 0.767183390228888, "grad_norm": 1.000471154686253, "learning_rate": 2.7109490801221327e-06, "loss": 0.8277, "step": 8568 }, { "epoch": 0.7672729307739661, "grad_norm": 0.938506133035383, "learning_rate": 2.7089638063109602e-06, "loss": 0.7814, "step": 8569 }, { "epoch": 0.7673624713190441, "grad_norm": 0.9630635762015237, "learning_rate": 2.706979145797992e-06, "loss": 0.7754, "step": 8570 }, { "epoch": 0.7674520118641223, "grad_norm": 0.8987277568057228, "learning_rate": 2.704995098750174e-06, "loss": 0.8084, "step": 8571 }, { "epoch": 0.7675415524092003, "grad_norm": 0.9122576028411004, "learning_rate": 2.703011665334395e-06, "loss": 0.7934, "step": 8572 }, { "epoch": 0.7676310929542783, "grad_norm": 1.0925721029013176, "learning_rate": 2.701028845717496e-06, "loss": 0.8161, "step": 8573 }, { "epoch": 0.7677206334993564, "grad_norm": 1.242002011819233, "learning_rate": 2.699046640066265e-06, "loss": 0.7935, "step": 8574 }, { "epoch": 0.7678101740444345, "grad_norm": 1.003307596054991, "learning_rate": 2.6970650485474393e-06, "loss": 0.8031, "step": 8575 }, { "epoch": 0.7678997145895126, "grad_norm": 0.9841436045079488, "learning_rate": 2.6950840713277037e-06, "loss": 0.7751, "step": 8576 }, { "epoch": 0.7679892551345906, "grad_norm": 0.9193788398396291, "learning_rate": 2.693103708573692e-06, "loss": 0.7863, "step": 8577 }, { "epoch": 0.7680787956796687, "grad_norm": 0.9064801866426804, "learning_rate": 2.6911239604519844e-06, "loss": 0.7897, "step": 8578 }, { "epoch": 0.7681683362247468, "grad_norm": 0.9564495393890579, "learning_rate": 2.6891448271291123e-06, "loss": 0.8131, "step": 8579 }, { "epoch": 0.7682578767698248, "grad_norm": 0.9185102027209784, "learning_rate": 2.687166308771554e-06, "loss": 0.7708, "step": 8580 }, { "epoch": 0.7683474173149029, "grad_norm": 0.9633800838468224, "learning_rate": 2.6851884055457343e-06, "loss": 0.7998, "step": 8581 }, { "epoch": 0.768436957859981, "grad_norm": 0.8807658926947759, "learning_rate": 2.68321111761803e-06, "loss": 0.7775, "step": 8582 }, { "epoch": 0.768526498405059, "grad_norm": 1.0827544074731394, "learning_rate": 2.6812344451547624e-06, "loss": 0.8483, "step": 8583 }, { "epoch": 0.7686160389501371, "grad_norm": 0.9802224528998077, "learning_rate": 2.6792583883222047e-06, "loss": 0.7957, "step": 8584 }, { "epoch": 0.7687055794952152, "grad_norm": 0.9359662322678803, "learning_rate": 2.6772829472865735e-06, "loss": 0.8064, "step": 8585 }, { "epoch": 0.7687951200402933, "grad_norm": 1.0056466047699095, "learning_rate": 2.6753081222140398e-06, "loss": 0.7758, "step": 8586 }, { "epoch": 0.7688846605853713, "grad_norm": 0.853240631544896, "learning_rate": 2.6733339132707172e-06, "loss": 0.8019, "step": 8587 }, { "epoch": 0.7689742011304493, "grad_norm": 0.9046056721612803, "learning_rate": 2.67136032062267e-06, "loss": 0.7818, "step": 8588 }, { "epoch": 0.7690637416755275, "grad_norm": 1.0591530838030743, "learning_rate": 2.669387344435912e-06, "loss": 0.7652, "step": 8589 }, { "epoch": 0.7691532822206055, "grad_norm": 0.9905679236283128, "learning_rate": 2.6674149848764006e-06, "loss": 0.8008, "step": 8590 }, { "epoch": 0.7692428227656836, "grad_norm": 0.8860448283298954, "learning_rate": 2.6654432421100472e-06, "loss": 0.7957, "step": 8591 }, { "epoch": 0.7693323633107616, "grad_norm": 1.0735306758648655, "learning_rate": 2.6634721163027076e-06, "loss": 0.7717, "step": 8592 }, { "epoch": 0.7694219038558398, "grad_norm": 1.2491509821121964, "learning_rate": 2.661501607620185e-06, "loss": 0.8152, "step": 8593 }, { "epoch": 0.7695114444009178, "grad_norm": 0.9978549067224076, "learning_rate": 2.6595317162282364e-06, "loss": 0.7837, "step": 8594 }, { "epoch": 0.7696009849459958, "grad_norm": 0.9619042373968957, "learning_rate": 2.657562442292556e-06, "loss": 0.7608, "step": 8595 }, { "epoch": 0.769690525491074, "grad_norm": 1.0156562907666078, "learning_rate": 2.655593785978794e-06, "loss": 0.8023, "step": 8596 }, { "epoch": 0.769780066036152, "grad_norm": 1.0592335041965242, "learning_rate": 2.6536257474525517e-06, "loss": 0.8163, "step": 8597 }, { "epoch": 0.76986960658123, "grad_norm": 1.056145414382865, "learning_rate": 2.651658326879373e-06, "loss": 0.7745, "step": 8598 }, { "epoch": 0.7699591471263081, "grad_norm": 0.9076650119450872, "learning_rate": 2.649691524424749e-06, "loss": 0.8355, "step": 8599 }, { "epoch": 0.7700486876713862, "grad_norm": 0.9457089439781576, "learning_rate": 2.6477253402541235e-06, "loss": 0.8201, "step": 8600 }, { "epoch": 0.7701382282164643, "grad_norm": 1.016497694013363, "learning_rate": 2.645759774532882e-06, "loss": 0.8064, "step": 8601 }, { "epoch": 0.7702277687615423, "grad_norm": 0.8821943593236058, "learning_rate": 2.643794827426359e-06, "loss": 0.7756, "step": 8602 }, { "epoch": 0.7703173093066205, "grad_norm": 1.0412009355697802, "learning_rate": 2.6418304990998466e-06, "loss": 0.7973, "step": 8603 }, { "epoch": 0.7704068498516985, "grad_norm": 1.0756177128252604, "learning_rate": 2.6398667897185758e-06, "loss": 0.8292, "step": 8604 }, { "epoch": 0.7704963903967765, "grad_norm": 1.0205422362926408, "learning_rate": 2.6379036994477282e-06, "loss": 0.7534, "step": 8605 }, { "epoch": 0.7705859309418546, "grad_norm": 0.9621012562133423, "learning_rate": 2.6359412284524266e-06, "loss": 0.8486, "step": 8606 }, { "epoch": 0.7706754714869327, "grad_norm": 0.9917432518805662, "learning_rate": 2.6339793768977527e-06, "loss": 0.8123, "step": 8607 }, { "epoch": 0.7707650120320108, "grad_norm": 0.9275201768664162, "learning_rate": 2.632018144948727e-06, "loss": 0.8086, "step": 8608 }, { "epoch": 0.7708545525770888, "grad_norm": 1.017321377763287, "learning_rate": 2.630057532770327e-06, "loss": 0.8096, "step": 8609 }, { "epoch": 0.7709440931221668, "grad_norm": 1.1293951149719799, "learning_rate": 2.628097540527471e-06, "loss": 0.8036, "step": 8610 }, { "epoch": 0.771033633667245, "grad_norm": 0.9229212662475096, "learning_rate": 2.626138168385031e-06, "loss": 0.7612, "step": 8611 }, { "epoch": 0.771123174212323, "grad_norm": 0.951957383852106, "learning_rate": 2.6241794165078162e-06, "loss": 0.7888, "step": 8612 }, { "epoch": 0.771212714757401, "grad_norm": 0.9839710966832312, "learning_rate": 2.622221285060592e-06, "loss": 0.7828, "step": 8613 }, { "epoch": 0.7713022553024792, "grad_norm": 0.9169230820937256, "learning_rate": 2.6202637742080684e-06, "loss": 0.8064, "step": 8614 }, { "epoch": 0.7713917958475572, "grad_norm": 0.9736040219292256, "learning_rate": 2.618306884114912e-06, "loss": 0.83, "step": 8615 }, { "epoch": 0.7714813363926353, "grad_norm": 1.0769632309149675, "learning_rate": 2.6163506149457274e-06, "loss": 0.8611, "step": 8616 }, { "epoch": 0.7715708769377133, "grad_norm": 0.9154192542953049, "learning_rate": 2.614394966865066e-06, "loss": 0.7357, "step": 8617 }, { "epoch": 0.7716604174827915, "grad_norm": 0.8808401062705739, "learning_rate": 2.612439940037431e-06, "loss": 0.7555, "step": 8618 }, { "epoch": 0.7717499580278695, "grad_norm": 0.9340499473620993, "learning_rate": 2.6104855346272763e-06, "loss": 0.8069, "step": 8619 }, { "epoch": 0.7718394985729475, "grad_norm": 0.919452992391543, "learning_rate": 2.6085317507989938e-06, "loss": 0.8092, "step": 8620 }, { "epoch": 0.7719290391180257, "grad_norm": 0.9815207998760314, "learning_rate": 2.606578588716937e-06, "loss": 0.7625, "step": 8621 }, { "epoch": 0.7720185796631037, "grad_norm": 1.0263188570960775, "learning_rate": 2.6046260485454e-06, "loss": 0.7891, "step": 8622 }, { "epoch": 0.7721081202081818, "grad_norm": 0.9985707736808533, "learning_rate": 2.602674130448617e-06, "loss": 0.8262, "step": 8623 }, { "epoch": 0.7721976607532598, "grad_norm": 0.9519739733291744, "learning_rate": 2.600722834590781e-06, "loss": 0.812, "step": 8624 }, { "epoch": 0.7722872012983379, "grad_norm": 1.0450559177726395, "learning_rate": 2.5987721611360273e-06, "loss": 0.7996, "step": 8625 }, { "epoch": 0.772376741843416, "grad_norm": 0.8817484448659242, "learning_rate": 2.5968221102484413e-06, "loss": 0.7654, "step": 8626 }, { "epoch": 0.772466282388494, "grad_norm": 1.0282925047988678, "learning_rate": 2.5948726820920544e-06, "loss": 0.8012, "step": 8627 }, { "epoch": 0.7725558229335721, "grad_norm": 0.9839203186674594, "learning_rate": 2.5929238768308474e-06, "loss": 0.7944, "step": 8628 }, { "epoch": 0.7726453634786502, "grad_norm": 0.9604644007233709, "learning_rate": 2.5909756946287458e-06, "loss": 0.7681, "step": 8629 }, { "epoch": 0.7727349040237282, "grad_norm": 0.9623383898855488, "learning_rate": 2.5890281356496237e-06, "loss": 0.8399, "step": 8630 }, { "epoch": 0.7728244445688063, "grad_norm": 1.1080714755499659, "learning_rate": 2.5870812000573066e-06, "loss": 0.7437, "step": 8631 }, { "epoch": 0.7729139851138844, "grad_norm": 0.9397860484066614, "learning_rate": 2.5851348880155612e-06, "loss": 0.8003, "step": 8632 }, { "epoch": 0.7730035256589625, "grad_norm": 0.8662947339045861, "learning_rate": 2.5831891996881063e-06, "loss": 0.7849, "step": 8633 }, { "epoch": 0.7730930662040405, "grad_norm": 0.9343143596506978, "learning_rate": 2.5812441352386076e-06, "loss": 0.7767, "step": 8634 }, { "epoch": 0.7731826067491185, "grad_norm": 1.043604919326881, "learning_rate": 2.579299694830676e-06, "loss": 0.8174, "step": 8635 }, { "epoch": 0.7732721472941967, "grad_norm": 0.9293076407546806, "learning_rate": 2.577355878627872e-06, "loss": 0.7409, "step": 8636 }, { "epoch": 0.7733616878392747, "grad_norm": 0.9715274564245949, "learning_rate": 2.5754126867937033e-06, "loss": 0.7939, "step": 8637 }, { "epoch": 0.7734512283843528, "grad_norm": 1.0010773935681752, "learning_rate": 2.5734701194916257e-06, "loss": 0.8328, "step": 8638 }, { "epoch": 0.7735407689294309, "grad_norm": 0.990816679536554, "learning_rate": 2.5715281768850397e-06, "loss": 0.7491, "step": 8639 }, { "epoch": 0.773630309474509, "grad_norm": 0.9479352493736147, "learning_rate": 2.5695868591372975e-06, "loss": 0.841, "step": 8640 }, { "epoch": 0.773719850019587, "grad_norm": 1.1221524788483135, "learning_rate": 2.5676461664116946e-06, "loss": 0.813, "step": 8641 }, { "epoch": 0.773809390564665, "grad_norm": 0.9913798272793234, "learning_rate": 2.5657060988714755e-06, "loss": 0.8236, "step": 8642 }, { "epoch": 0.7738989311097432, "grad_norm": 0.9334447308920091, "learning_rate": 2.5637666566798347e-06, "loss": 0.7842, "step": 8643 }, { "epoch": 0.7739884716548212, "grad_norm": 0.9167462086416046, "learning_rate": 2.5618278399999097e-06, "loss": 0.7818, "step": 8644 }, { "epoch": 0.7740780121998992, "grad_norm": 0.9553280856509895, "learning_rate": 2.5598896489947877e-06, "loss": 0.76, "step": 8645 }, { "epoch": 0.7741675527449773, "grad_norm": 0.8856535569954458, "learning_rate": 2.5579520838275027e-06, "loss": 0.8351, "step": 8646 }, { "epoch": 0.7742570932900554, "grad_norm": 1.044037521820496, "learning_rate": 2.556015144661038e-06, "loss": 0.7466, "step": 8647 }, { "epoch": 0.7743466338351335, "grad_norm": 1.0215521099309144, "learning_rate": 2.554078831658321e-06, "loss": 0.7982, "step": 8648 }, { "epoch": 0.7744361743802115, "grad_norm": 0.9898693360606619, "learning_rate": 2.552143144982232e-06, "loss": 0.8005, "step": 8649 }, { "epoch": 0.7745257149252897, "grad_norm": 1.6898269850714511, "learning_rate": 2.5502080847955857e-06, "loss": 0.752, "step": 8650 }, { "epoch": 0.7746152554703677, "grad_norm": 0.8779736606091741, "learning_rate": 2.5482736512611606e-06, "loss": 0.7768, "step": 8651 }, { "epoch": 0.7747047960154457, "grad_norm": 1.018725861726274, "learning_rate": 2.5463398445416744e-06, "loss": 0.8374, "step": 8652 }, { "epoch": 0.7747943365605238, "grad_norm": 0.8882064886574702, "learning_rate": 2.54440666479979e-06, "loss": 0.8264, "step": 8653 }, { "epoch": 0.7748838771056019, "grad_norm": 0.970085593463449, "learning_rate": 2.5424741121981243e-06, "loss": 0.848, "step": 8654 }, { "epoch": 0.77497341765068, "grad_norm": 1.1033980180626324, "learning_rate": 2.5405421868992318e-06, "loss": 0.8382, "step": 8655 }, { "epoch": 0.775062958195758, "grad_norm": 1.1602708014260483, "learning_rate": 2.538610889065619e-06, "loss": 0.7939, "step": 8656 }, { "epoch": 0.7751524987408361, "grad_norm": 0.9576070804071292, "learning_rate": 2.536680218859747e-06, "loss": 0.7762, "step": 8657 }, { "epoch": 0.7752420392859142, "grad_norm": 0.9755052719125886, "learning_rate": 2.534750176444015e-06, "loss": 0.8112, "step": 8658 }, { "epoch": 0.7753315798309922, "grad_norm": 0.9200417005596881, "learning_rate": 2.532820761980771e-06, "loss": 0.8018, "step": 8659 }, { "epoch": 0.7754211203760702, "grad_norm": 0.9136067069660301, "learning_rate": 2.5308919756323135e-06, "loss": 0.7958, "step": 8660 }, { "epoch": 0.7755106609211484, "grad_norm": 1.0342810366916786, "learning_rate": 2.52896381756088e-06, "loss": 0.825, "step": 8661 }, { "epoch": 0.7756002014662264, "grad_norm": 0.9800098989806436, "learning_rate": 2.5270362879286625e-06, "loss": 0.8397, "step": 8662 }, { "epoch": 0.7756897420113045, "grad_norm": 0.9277957497916605, "learning_rate": 2.5251093868978025e-06, "loss": 0.7876, "step": 8663 }, { "epoch": 0.7757792825563825, "grad_norm": 0.9164352253695796, "learning_rate": 2.523183114630382e-06, "loss": 0.7573, "step": 8664 }, { "epoch": 0.7758688231014607, "grad_norm": 1.0015859512173726, "learning_rate": 2.5212574712884375e-06, "loss": 0.8197, "step": 8665 }, { "epoch": 0.7759583636465387, "grad_norm": 1.000380341334665, "learning_rate": 2.5193324570339394e-06, "loss": 0.7878, "step": 8666 }, { "epoch": 0.7760479041916167, "grad_norm": 1.0145045593940272, "learning_rate": 2.5174080720288185e-06, "loss": 0.7861, "step": 8667 }, { "epoch": 0.7761374447366949, "grad_norm": 1.0461682919502595, "learning_rate": 2.515484316434943e-06, "loss": 0.8145, "step": 8668 }, { "epoch": 0.7762269852817729, "grad_norm": 1.0542460641806677, "learning_rate": 2.513561190414141e-06, "loss": 0.8029, "step": 8669 }, { "epoch": 0.776316525826851, "grad_norm": 0.8988684672442186, "learning_rate": 2.5116386941281745e-06, "loss": 0.7902, "step": 8670 }, { "epoch": 0.776406066371929, "grad_norm": 0.968524869194405, "learning_rate": 2.509716827738763e-06, "loss": 0.7804, "step": 8671 }, { "epoch": 0.7764956069170071, "grad_norm": 0.9576889324243132, "learning_rate": 2.507795591407559e-06, "loss": 0.8193, "step": 8672 }, { "epoch": 0.7765851474620852, "grad_norm": 0.9261099870586427, "learning_rate": 2.5058749852961763e-06, "loss": 0.7644, "step": 8673 }, { "epoch": 0.7766746880071632, "grad_norm": 0.8266529068042214, "learning_rate": 2.5039550095661647e-06, "loss": 0.713, "step": 8674 }, { "epoch": 0.7767642285522414, "grad_norm": 0.932759042317134, "learning_rate": 2.5020356643790342e-06, "loss": 0.8135, "step": 8675 }, { "epoch": 0.7768537690973194, "grad_norm": 0.9924550749297483, "learning_rate": 2.500116949896232e-06, "loss": 0.8383, "step": 8676 }, { "epoch": 0.7769433096423974, "grad_norm": 0.9772352184536196, "learning_rate": 2.4981988662791503e-06, "loss": 0.8462, "step": 8677 }, { "epoch": 0.7770328501874755, "grad_norm": 0.838097212027536, "learning_rate": 2.4962814136891324e-06, "loss": 0.6846, "step": 8678 }, { "epoch": 0.7771223907325536, "grad_norm": 0.8953152298828604, "learning_rate": 2.4943645922874704e-06, "loss": 0.8032, "step": 8679 }, { "epoch": 0.7772119312776317, "grad_norm": 0.9674460783736636, "learning_rate": 2.4924484022354003e-06, "loss": 0.7579, "step": 8680 }, { "epoch": 0.7773014718227097, "grad_norm": 1.0726600574331795, "learning_rate": 2.4905328436941024e-06, "loss": 0.8596, "step": 8681 }, { "epoch": 0.7773910123677877, "grad_norm": 0.9409641449162625, "learning_rate": 2.488617916824716e-06, "loss": 0.7395, "step": 8682 }, { "epoch": 0.7774805529128659, "grad_norm": 1.0238950124004955, "learning_rate": 2.4867036217883113e-06, "loss": 0.8299, "step": 8683 }, { "epoch": 0.7775700934579439, "grad_norm": 1.0944642125293673, "learning_rate": 2.4847899587459134e-06, "loss": 0.7797, "step": 8684 }, { "epoch": 0.777659634003022, "grad_norm": 1.0121175565873832, "learning_rate": 2.4828769278584952e-06, "loss": 0.864, "step": 8685 }, { "epoch": 0.7777491745481001, "grad_norm": 1.019195156547706, "learning_rate": 2.480964529286973e-06, "loss": 0.7968, "step": 8686 }, { "epoch": 0.7778387150931781, "grad_norm": 0.995580144602024, "learning_rate": 2.4790527631922125e-06, "loss": 0.7789, "step": 8687 }, { "epoch": 0.7779282556382562, "grad_norm": 0.9295724075330994, "learning_rate": 2.477141629735025e-06, "loss": 0.794, "step": 8688 }, { "epoch": 0.7780177961833342, "grad_norm": 0.9389635970758421, "learning_rate": 2.4752311290761688e-06, "loss": 0.842, "step": 8689 }, { "epoch": 0.7781073367284124, "grad_norm": 0.9013828602665985, "learning_rate": 2.473321261376349e-06, "loss": 0.7776, "step": 8690 }, { "epoch": 0.7781968772734904, "grad_norm": 0.9423517718232737, "learning_rate": 2.4714120267962173e-06, "loss": 0.7674, "step": 8691 }, { "epoch": 0.7782864178185684, "grad_norm": 0.9082063303114358, "learning_rate": 2.4695034254963734e-06, "loss": 0.8213, "step": 8692 }, { "epoch": 0.7783759583636466, "grad_norm": 1.0097603361206131, "learning_rate": 2.4675954576373608e-06, "loss": 0.7884, "step": 8693 }, { "epoch": 0.7784654989087246, "grad_norm": 1.0091109306547712, "learning_rate": 2.4656881233796725e-06, "loss": 0.7757, "step": 8694 }, { "epoch": 0.7785550394538027, "grad_norm": 0.9571119416811313, "learning_rate": 2.4637814228837474e-06, "loss": 0.8706, "step": 8695 }, { "epoch": 0.7786445799988807, "grad_norm": 0.9738450728011351, "learning_rate": 2.4618753563099705e-06, "loss": 0.8174, "step": 8696 }, { "epoch": 0.7787341205439589, "grad_norm": 0.96139900373006, "learning_rate": 2.4599699238186747e-06, "loss": 0.7825, "step": 8697 }, { "epoch": 0.7788236610890369, "grad_norm": 0.9695645553768653, "learning_rate": 2.4580651255701373e-06, "loss": 0.7718, "step": 8698 }, { "epoch": 0.7789132016341149, "grad_norm": 1.2431225661471146, "learning_rate": 2.456160961724585e-06, "loss": 0.7784, "step": 8699 }, { "epoch": 0.779002742179193, "grad_norm": 1.1021443597540335, "learning_rate": 2.45425743244219e-06, "loss": 0.826, "step": 8700 }, { "epoch": 0.7790922827242711, "grad_norm": 1.1710146063721463, "learning_rate": 2.4523545378830693e-06, "loss": 0.8076, "step": 8701 }, { "epoch": 0.7791818232693491, "grad_norm": 1.174422142434489, "learning_rate": 2.4504522782072905e-06, "loss": 0.8443, "step": 8702 }, { "epoch": 0.7792713638144272, "grad_norm": 0.9695273151706497, "learning_rate": 2.4485506535748658e-06, "loss": 0.7689, "step": 8703 }, { "epoch": 0.7793609043595053, "grad_norm": 1.3197561015419235, "learning_rate": 2.4466496641457483e-06, "loss": 0.7784, "step": 8704 }, { "epoch": 0.7794504449045834, "grad_norm": 0.9335034165544284, "learning_rate": 2.444749310079848e-06, "loss": 0.7907, "step": 8705 }, { "epoch": 0.7795399854496614, "grad_norm": 0.9685182898566673, "learning_rate": 2.4428495915370165e-06, "loss": 0.8384, "step": 8706 }, { "epoch": 0.7796295259947394, "grad_norm": 0.939756671441516, "learning_rate": 2.44095050867705e-06, "loss": 0.7957, "step": 8707 }, { "epoch": 0.7797190665398176, "grad_norm": 0.868974422322578, "learning_rate": 2.439052061659695e-06, "loss": 0.818, "step": 8708 }, { "epoch": 0.7798086070848956, "grad_norm": 0.9491204767267132, "learning_rate": 2.4371542506446446e-06, "loss": 0.7585, "step": 8709 }, { "epoch": 0.7798981476299737, "grad_norm": 0.9615964751158518, "learning_rate": 2.4352570757915273e-06, "loss": 0.8055, "step": 8710 }, { "epoch": 0.7799876881750518, "grad_norm": 1.1089166193866478, "learning_rate": 2.4333605372599355e-06, "loss": 0.8208, "step": 8711 }, { "epoch": 0.7800772287201299, "grad_norm": 0.9875479566492662, "learning_rate": 2.4314646352093997e-06, "loss": 0.8524, "step": 8712 }, { "epoch": 0.7801667692652079, "grad_norm": 1.0527589074095396, "learning_rate": 2.429569369799394e-06, "loss": 0.817, "step": 8713 }, { "epoch": 0.7802563098102859, "grad_norm": 0.9777616013113768, "learning_rate": 2.4276747411893464e-06, "loss": 0.8212, "step": 8714 }, { "epoch": 0.7803458503553641, "grad_norm": 0.9719598362925369, "learning_rate": 2.425780749538621e-06, "loss": 0.8217, "step": 8715 }, { "epoch": 0.7804353909004421, "grad_norm": 0.9838348057068431, "learning_rate": 2.4238873950065335e-06, "loss": 0.8156, "step": 8716 }, { "epoch": 0.7805249314455202, "grad_norm": 0.9378831730612278, "learning_rate": 2.421994677752353e-06, "loss": 0.7874, "step": 8717 }, { "epoch": 0.7806144719905982, "grad_norm": 0.9826384840592557, "learning_rate": 2.4201025979352857e-06, "loss": 0.781, "step": 8718 }, { "epoch": 0.7807040125356763, "grad_norm": 0.9987453383325858, "learning_rate": 2.418211155714486e-06, "loss": 0.8232, "step": 8719 }, { "epoch": 0.7807935530807544, "grad_norm": 0.9292814822179866, "learning_rate": 2.416320351249062e-06, "loss": 0.8008, "step": 8720 }, { "epoch": 0.7808830936258324, "grad_norm": 0.9817358904005138, "learning_rate": 2.4144301846980533e-06, "loss": 0.7657, "step": 8721 }, { "epoch": 0.7809726341709106, "grad_norm": 1.019932249549998, "learning_rate": 2.4125406562204554e-06, "loss": 0.8098, "step": 8722 }, { "epoch": 0.7810621747159886, "grad_norm": 0.9687145054773424, "learning_rate": 2.4106517659752148e-06, "loss": 0.8052, "step": 8723 }, { "epoch": 0.7811517152610666, "grad_norm": 1.0484305597442376, "learning_rate": 2.408763514121216e-06, "loss": 0.8214, "step": 8724 }, { "epoch": 0.7812412558061447, "grad_norm": 0.9894091661346851, "learning_rate": 2.406875900817297e-06, "loss": 0.7598, "step": 8725 }, { "epoch": 0.7813307963512228, "grad_norm": 1.043930387999368, "learning_rate": 2.4049889262222302e-06, "loss": 0.8381, "step": 8726 }, { "epoch": 0.7814203368963009, "grad_norm": 1.0237127488413573, "learning_rate": 2.4031025904947445e-06, "loss": 0.7931, "step": 8727 }, { "epoch": 0.7815098774413789, "grad_norm": 0.9325259836035235, "learning_rate": 2.40121689379351e-06, "loss": 0.8027, "step": 8728 }, { "epoch": 0.781599417986457, "grad_norm": 0.990468417700911, "learning_rate": 2.3993318362771512e-06, "loss": 0.7892, "step": 8729 }, { "epoch": 0.7816889585315351, "grad_norm": 0.862094648680522, "learning_rate": 2.3974474181042308e-06, "loss": 0.8154, "step": 8730 }, { "epoch": 0.7817784990766131, "grad_norm": 0.9584993276031846, "learning_rate": 2.3955636394332602e-06, "loss": 0.8547, "step": 8731 }, { "epoch": 0.7818680396216912, "grad_norm": 1.1391455130256374, "learning_rate": 2.393680500422695e-06, "loss": 0.8048, "step": 8732 }, { "epoch": 0.7819575801667693, "grad_norm": 0.9174764319765369, "learning_rate": 2.391798001230937e-06, "loss": 0.8021, "step": 8733 }, { "epoch": 0.7820471207118473, "grad_norm": 1.0884402618014684, "learning_rate": 2.3899161420163398e-06, "loss": 0.7758, "step": 8734 }, { "epoch": 0.7821366612569254, "grad_norm": 1.0108327963843435, "learning_rate": 2.3880349229371946e-06, "loss": 0.8339, "step": 8735 }, { "epoch": 0.7822262018020034, "grad_norm": 1.0535519498263965, "learning_rate": 2.386154344151752e-06, "loss": 0.8265, "step": 8736 }, { "epoch": 0.7823157423470816, "grad_norm": 1.1526385851654188, "learning_rate": 2.3842744058181923e-06, "loss": 0.7772, "step": 8737 }, { "epoch": 0.7824052828921596, "grad_norm": 0.9876798554462964, "learning_rate": 2.3823951080946516e-06, "loss": 0.8221, "step": 8738 }, { "epoch": 0.7824948234372376, "grad_norm": 0.9898243017443573, "learning_rate": 2.380516451139212e-06, "loss": 0.8665, "step": 8739 }, { "epoch": 0.7825843639823158, "grad_norm": 0.9042842363854605, "learning_rate": 2.378638435109898e-06, "loss": 0.7334, "step": 8740 }, { "epoch": 0.7826739045273938, "grad_norm": 0.9500000541992688, "learning_rate": 2.3767610601646798e-06, "loss": 0.8287, "step": 8741 }, { "epoch": 0.7827634450724719, "grad_norm": 0.9404061186406021, "learning_rate": 2.3748843264614864e-06, "loss": 0.8269, "step": 8742 }, { "epoch": 0.7828529856175499, "grad_norm": 1.0806817404180178, "learning_rate": 2.3730082341581715e-06, "loss": 0.8391, "step": 8743 }, { "epoch": 0.782942526162628, "grad_norm": 1.0017549328595794, "learning_rate": 2.3711327834125496e-06, "loss": 0.8186, "step": 8744 }, { "epoch": 0.7830320667077061, "grad_norm": 1.010073845125027, "learning_rate": 2.3692579743823783e-06, "loss": 0.8298, "step": 8745 }, { "epoch": 0.7831216072527841, "grad_norm": 0.9549761052982769, "learning_rate": 2.3673838072253597e-06, "loss": 0.8596, "step": 8746 }, { "epoch": 0.7832111477978623, "grad_norm": 1.0207355898438168, "learning_rate": 2.3655102820991425e-06, "loss": 0.7556, "step": 8747 }, { "epoch": 0.7833006883429403, "grad_norm": 0.9624512074377601, "learning_rate": 2.3636373991613225e-06, "loss": 0.8222, "step": 8748 }, { "epoch": 0.7833902288880183, "grad_norm": 1.0519269400205644, "learning_rate": 2.36176515856944e-06, "loss": 0.8427, "step": 8749 }, { "epoch": 0.7834797694330964, "grad_norm": 1.0441377955891853, "learning_rate": 2.3598935604809813e-06, "loss": 0.7933, "step": 8750 }, { "epoch": 0.7835693099781745, "grad_norm": 0.8989804098882979, "learning_rate": 2.3580226050533794e-06, "loss": 0.7599, "step": 8751 }, { "epoch": 0.7836588505232526, "grad_norm": 0.9307537421470434, "learning_rate": 2.3561522924440127e-06, "loss": 0.8091, "step": 8752 }, { "epoch": 0.7837483910683306, "grad_norm": 1.1672075585219102, "learning_rate": 2.354282622810207e-06, "loss": 0.8267, "step": 8753 }, { "epoch": 0.7838379316134086, "grad_norm": 0.9321849038589167, "learning_rate": 2.352413596309231e-06, "loss": 0.8009, "step": 8754 }, { "epoch": 0.7839274721584868, "grad_norm": 1.1401846708248895, "learning_rate": 2.3505452130983018e-06, "loss": 0.7609, "step": 8755 }, { "epoch": 0.7840170127035648, "grad_norm": 1.0086536537345712, "learning_rate": 2.348677473334583e-06, "loss": 0.8246, "step": 8756 }, { "epoch": 0.7841065532486429, "grad_norm": 1.0960619415607866, "learning_rate": 2.3468103771751816e-06, "loss": 0.7877, "step": 8757 }, { "epoch": 0.784196093793721, "grad_norm": 0.9493553029955952, "learning_rate": 2.344943924777151e-06, "loss": 0.7995, "step": 8758 }, { "epoch": 0.784285634338799, "grad_norm": 0.9784209525735732, "learning_rate": 2.3430781162974927e-06, "loss": 0.8373, "step": 8759 }, { "epoch": 0.7843751748838771, "grad_norm": 0.9656322004647505, "learning_rate": 2.3412129518931516e-06, "loss": 0.7997, "step": 8760 }, { "epoch": 0.7844647154289551, "grad_norm": 0.9494327500637705, "learning_rate": 2.3393484317210192e-06, "loss": 0.8395, "step": 8761 }, { "epoch": 0.7845542559740333, "grad_norm": 0.9726676120228631, "learning_rate": 2.3374845559379323e-06, "loss": 0.7657, "step": 8762 }, { "epoch": 0.7846437965191113, "grad_norm": 0.9296199793875822, "learning_rate": 2.3356213247006754e-06, "loss": 0.811, "step": 8763 }, { "epoch": 0.7847333370641894, "grad_norm": 1.0542117574625105, "learning_rate": 2.3337587381659753e-06, "loss": 0.7798, "step": 8764 }, { "epoch": 0.7848228776092675, "grad_norm": 1.0146806637316026, "learning_rate": 2.3318967964905103e-06, "loss": 0.8857, "step": 8765 }, { "epoch": 0.7849124181543455, "grad_norm": 0.9455980437618184, "learning_rate": 2.3300354998308972e-06, "loss": 0.7791, "step": 8766 }, { "epoch": 0.7850019586994236, "grad_norm": 0.8931486512684882, "learning_rate": 2.3281748483437038e-06, "loss": 0.8029, "step": 8767 }, { "epoch": 0.7850914992445016, "grad_norm": 0.9451687590167771, "learning_rate": 2.326314842185443e-06, "loss": 0.7983, "step": 8768 }, { "epoch": 0.7851810397895798, "grad_norm": 1.1189137871939476, "learning_rate": 2.3244554815125732e-06, "loss": 0.7663, "step": 8769 }, { "epoch": 0.7852705803346578, "grad_norm": 1.0008957545276058, "learning_rate": 2.3225967664814907e-06, "loss": 0.817, "step": 8770 }, { "epoch": 0.7853601208797358, "grad_norm": 1.0052841692325039, "learning_rate": 2.3207386972485522e-06, "loss": 0.8393, "step": 8771 }, { "epoch": 0.7854496614248139, "grad_norm": 0.9977413844293204, "learning_rate": 2.3188812739700506e-06, "loss": 0.7778, "step": 8772 }, { "epoch": 0.785539201969892, "grad_norm": 1.0365196226117177, "learning_rate": 2.317024496802226e-06, "loss": 0.8226, "step": 8773 }, { "epoch": 0.7856287425149701, "grad_norm": 1.1630771621425773, "learning_rate": 2.315168365901267e-06, "loss": 0.7628, "step": 8774 }, { "epoch": 0.7857182830600481, "grad_norm": 1.1337964037027655, "learning_rate": 2.3133128814232988e-06, "loss": 0.7727, "step": 8775 }, { "epoch": 0.7858078236051262, "grad_norm": 0.9107756358290092, "learning_rate": 2.3114580435244004e-06, "loss": 0.8009, "step": 8776 }, { "epoch": 0.7858973641502043, "grad_norm": 1.0592945362549007, "learning_rate": 2.309603852360599e-06, "loss": 0.7827, "step": 8777 }, { "epoch": 0.7859869046952823, "grad_norm": 0.926739927611948, "learning_rate": 2.3077503080878605e-06, "loss": 0.7934, "step": 8778 }, { "epoch": 0.7860764452403604, "grad_norm": 1.0627083194969547, "learning_rate": 2.3058974108621003e-06, "loss": 0.7852, "step": 8779 }, { "epoch": 0.7861659857854385, "grad_norm": 1.0507656737132343, "learning_rate": 2.304045160839179e-06, "loss": 0.799, "step": 8780 }, { "epoch": 0.7862555263305165, "grad_norm": 0.9610498882128611, "learning_rate": 2.3021935581748978e-06, "loss": 0.8268, "step": 8781 }, { "epoch": 0.7863450668755946, "grad_norm": 0.9235347897084841, "learning_rate": 2.3003426030250064e-06, "loss": 0.7788, "step": 8782 }, { "epoch": 0.7864346074206727, "grad_norm": 1.1127204532451767, "learning_rate": 2.298492295545207e-06, "loss": 0.8619, "step": 8783 }, { "epoch": 0.7865241479657508, "grad_norm": 0.9592300023531104, "learning_rate": 2.2966426358911387e-06, "loss": 0.8194, "step": 8784 }, { "epoch": 0.7866136885108288, "grad_norm": 1.0030501380640002, "learning_rate": 2.294793624218392e-06, "loss": 0.8237, "step": 8785 }, { "epoch": 0.7867032290559068, "grad_norm": 1.1028807924671524, "learning_rate": 2.2929452606824942e-06, "loss": 0.8019, "step": 8786 }, { "epoch": 0.786792769600985, "grad_norm": 0.9820773594965431, "learning_rate": 2.291097545438926e-06, "loss": 0.8298, "step": 8787 }, { "epoch": 0.786882310146063, "grad_norm": 1.0210859607733827, "learning_rate": 2.28925047864311e-06, "loss": 0.7826, "step": 8788 }, { "epoch": 0.7869718506911411, "grad_norm": 0.946124897122373, "learning_rate": 2.2874040604504155e-06, "loss": 0.8366, "step": 8789 }, { "epoch": 0.7870613912362191, "grad_norm": 0.8881763926749735, "learning_rate": 2.285558291016161e-06, "loss": 0.8333, "step": 8790 }, { "epoch": 0.7871509317812972, "grad_norm": 0.907528679723277, "learning_rate": 2.283713170495606e-06, "loss": 0.7641, "step": 8791 }, { "epoch": 0.7872404723263753, "grad_norm": 0.8472856997477702, "learning_rate": 2.281868699043951e-06, "loss": 0.7623, "step": 8792 }, { "epoch": 0.7873300128714533, "grad_norm": 1.0218533312874023, "learning_rate": 2.2800248768163515e-06, "loss": 0.8351, "step": 8793 }, { "epoch": 0.7874195534165315, "grad_norm": 1.0179797540669078, "learning_rate": 2.278181703967901e-06, "loss": 0.7974, "step": 8794 }, { "epoch": 0.7875090939616095, "grad_norm": 0.9597173283575153, "learning_rate": 2.2763391806536396e-06, "loss": 0.7763, "step": 8795 }, { "epoch": 0.7875986345066875, "grad_norm": 0.9481685366579282, "learning_rate": 2.2744973070285624e-06, "loss": 0.7761, "step": 8796 }, { "epoch": 0.7876881750517656, "grad_norm": 0.9775912472727575, "learning_rate": 2.2726560832475952e-06, "loss": 0.8477, "step": 8797 }, { "epoch": 0.7877777155968437, "grad_norm": 1.0247270170899692, "learning_rate": 2.270815509465616e-06, "loss": 0.822, "step": 8798 }, { "epoch": 0.7878672561419218, "grad_norm": 1.0284337356477788, "learning_rate": 2.2689755858374497e-06, "loss": 0.8085, "step": 8799 }, { "epoch": 0.7879567966869998, "grad_norm": 0.9872958708900921, "learning_rate": 2.2671363125178635e-06, "loss": 0.8171, "step": 8800 }, { "epoch": 0.788046337232078, "grad_norm": 0.9854110917532096, "learning_rate": 2.2652976896615684e-06, "loss": 0.8073, "step": 8801 }, { "epoch": 0.788135877777156, "grad_norm": 0.9548908592913122, "learning_rate": 2.263459717423233e-06, "loss": 0.7974, "step": 8802 }, { "epoch": 0.788225418322234, "grad_norm": 1.0316180066778164, "learning_rate": 2.2616223959574523e-06, "loss": 0.7137, "step": 8803 }, { "epoch": 0.7883149588673121, "grad_norm": 1.0002898413091361, "learning_rate": 2.2597857254187783e-06, "loss": 0.7828, "step": 8804 }, { "epoch": 0.7884044994123902, "grad_norm": 0.9420785300864903, "learning_rate": 2.257949705961706e-06, "loss": 0.7276, "step": 8805 }, { "epoch": 0.7884940399574683, "grad_norm": 0.9974899413573534, "learning_rate": 2.256114337740676e-06, "loss": 0.8173, "step": 8806 }, { "epoch": 0.7885835805025463, "grad_norm": 2.0828067823459424, "learning_rate": 2.2542796209100716e-06, "loss": 0.8174, "step": 8807 }, { "epoch": 0.7886731210476243, "grad_norm": 0.9630039001867484, "learning_rate": 2.252445555624225e-06, "loss": 0.8426, "step": 8808 }, { "epoch": 0.7887626615927025, "grad_norm": 1.311208445544868, "learning_rate": 2.2506121420374116e-06, "loss": 0.7796, "step": 8809 }, { "epoch": 0.7888522021377805, "grad_norm": 0.9750142899042925, "learning_rate": 2.2487793803038516e-06, "loss": 0.8166, "step": 8810 }, { "epoch": 0.7889417426828585, "grad_norm": 1.0921116353647442, "learning_rate": 2.2469472705777105e-06, "loss": 0.8466, "step": 8811 }, { "epoch": 0.7890312832279367, "grad_norm": 1.0269712688705133, "learning_rate": 2.245115813013101e-06, "loss": 0.8698, "step": 8812 }, { "epoch": 0.7891208237730147, "grad_norm": 0.9456547808577754, "learning_rate": 2.243285007764079e-06, "loss": 0.7754, "step": 8813 }, { "epoch": 0.7892103643180928, "grad_norm": 0.9033694835512854, "learning_rate": 2.241454854984645e-06, "loss": 0.7539, "step": 8814 }, { "epoch": 0.7892999048631708, "grad_norm": 1.0013098745570665, "learning_rate": 2.239625354828745e-06, "loss": 0.7933, "step": 8815 }, { "epoch": 0.789389445408249, "grad_norm": 0.9801371057762093, "learning_rate": 2.237796507450272e-06, "loss": 0.788, "step": 8816 }, { "epoch": 0.789478985953327, "grad_norm": 1.012532582309637, "learning_rate": 2.235968313003062e-06, "loss": 0.8404, "step": 8817 }, { "epoch": 0.789568526498405, "grad_norm": 0.9723675348770959, "learning_rate": 2.2341407716408957e-06, "loss": 0.8311, "step": 8818 }, { "epoch": 0.7896580670434832, "grad_norm": 1.0135581393841304, "learning_rate": 2.2323138835175027e-06, "loss": 0.8223, "step": 8819 }, { "epoch": 0.7897476075885612, "grad_norm": 1.0367338273330169, "learning_rate": 2.2304876487865524e-06, "loss": 0.8708, "step": 8820 }, { "epoch": 0.7898371481336393, "grad_norm": 0.9618332907325252, "learning_rate": 2.2286620676016624e-06, "loss": 0.7893, "step": 8821 }, { "epoch": 0.7899266886787173, "grad_norm": 0.9289094126116735, "learning_rate": 2.226837140116396e-06, "loss": 0.8499, "step": 8822 }, { "epoch": 0.7900162292237954, "grad_norm": 1.000094770991562, "learning_rate": 2.2250128664842574e-06, "loss": 0.7873, "step": 8823 }, { "epoch": 0.7901057697688735, "grad_norm": 1.13759964390537, "learning_rate": 2.223189246858701e-06, "loss": 0.8009, "step": 8824 }, { "epoch": 0.7901953103139515, "grad_norm": 1.0365054008241492, "learning_rate": 2.2213662813931224e-06, "loss": 0.7318, "step": 8825 }, { "epoch": 0.7902848508590296, "grad_norm": 1.045021585803547, "learning_rate": 2.2195439702408637e-06, "loss": 0.8301, "step": 8826 }, { "epoch": 0.7903743914041077, "grad_norm": 0.9923981155938767, "learning_rate": 2.2177223135552126e-06, "loss": 0.8186, "step": 8827 }, { "epoch": 0.7904639319491857, "grad_norm": 0.9499573148857462, "learning_rate": 2.2159013114894e-06, "loss": 0.7703, "step": 8828 }, { "epoch": 0.7905534724942638, "grad_norm": 0.9107601572419948, "learning_rate": 2.2140809641966066e-06, "loss": 0.7942, "step": 8829 }, { "epoch": 0.7906430130393419, "grad_norm": 1.0292158207018305, "learning_rate": 2.2122612718299443e-06, "loss": 0.8129, "step": 8830 }, { "epoch": 0.79073255358442, "grad_norm": 1.148734373913572, "learning_rate": 2.210442234542488e-06, "loss": 0.764, "step": 8831 }, { "epoch": 0.790822094129498, "grad_norm": 1.0255395463226753, "learning_rate": 2.208623852487248e-06, "loss": 0.783, "step": 8832 }, { "epoch": 0.790911634674576, "grad_norm": 0.970354735831638, "learning_rate": 2.206806125817179e-06, "loss": 0.7439, "step": 8833 }, { "epoch": 0.7910011752196542, "grad_norm": 1.0097089662494023, "learning_rate": 2.204989054685187e-06, "loss": 0.8105, "step": 8834 }, { "epoch": 0.7910907157647322, "grad_norm": 0.9924486497518542, "learning_rate": 2.20317263924411e-06, "loss": 0.8284, "step": 8835 }, { "epoch": 0.7911802563098103, "grad_norm": 0.9655739240678225, "learning_rate": 2.201356879646741e-06, "loss": 0.7851, "step": 8836 }, { "epoch": 0.7912697968548884, "grad_norm": 1.1058994333531043, "learning_rate": 2.1995417760458205e-06, "loss": 0.8207, "step": 8837 }, { "epoch": 0.7913593373999664, "grad_norm": 1.0276176191339472, "learning_rate": 2.197727328594026e-06, "loss": 0.8757, "step": 8838 }, { "epoch": 0.7914488779450445, "grad_norm": 1.101754550237192, "learning_rate": 2.1959135374439832e-06, "loss": 0.7547, "step": 8839 }, { "epoch": 0.7915384184901225, "grad_norm": 1.0565682849241695, "learning_rate": 2.194100402748266e-06, "loss": 0.8221, "step": 8840 }, { "epoch": 0.7916279590352007, "grad_norm": 1.0560905693258091, "learning_rate": 2.192287924659383e-06, "loss": 0.8038, "step": 8841 }, { "epoch": 0.7917174995802787, "grad_norm": 0.9582270705261191, "learning_rate": 2.190476103329796e-06, "loss": 0.8415, "step": 8842 }, { "epoch": 0.7918070401253567, "grad_norm": 1.0203777687067934, "learning_rate": 2.1886649389119084e-06, "loss": 0.7785, "step": 8843 }, { "epoch": 0.7918965806704348, "grad_norm": 0.9166993743527946, "learning_rate": 2.186854431558073e-06, "loss": 0.8424, "step": 8844 }, { "epoch": 0.7919861212155129, "grad_norm": 1.0649461166358671, "learning_rate": 2.185044581420586e-06, "loss": 0.8246, "step": 8845 }, { "epoch": 0.792075661760591, "grad_norm": 0.9771693558322117, "learning_rate": 2.183235388651679e-06, "loss": 0.8385, "step": 8846 }, { "epoch": 0.792165202305669, "grad_norm": 0.8922495877768892, "learning_rate": 2.181426853403538e-06, "loss": 0.7821, "step": 8847 }, { "epoch": 0.7922547428507472, "grad_norm": 0.9537647602461077, "learning_rate": 2.1796189758282917e-06, "loss": 0.7645, "step": 8848 }, { "epoch": 0.7923442833958252, "grad_norm": 1.0698966323901165, "learning_rate": 2.177811756078011e-06, "loss": 0.8485, "step": 8849 }, { "epoch": 0.7924338239409032, "grad_norm": 0.9191909929695842, "learning_rate": 2.176005194304718e-06, "loss": 0.828, "step": 8850 }, { "epoch": 0.7925233644859813, "grad_norm": 0.9389178614805662, "learning_rate": 2.1741992906603736e-06, "loss": 0.8278, "step": 8851 }, { "epoch": 0.7926129050310594, "grad_norm": 1.0511934024051504, "learning_rate": 2.1723940452968804e-06, "loss": 0.8288, "step": 8852 }, { "epoch": 0.7927024455761374, "grad_norm": 0.9805535410851165, "learning_rate": 2.1705894583660924e-06, "loss": 0.7853, "step": 8853 }, { "epoch": 0.7927919861212155, "grad_norm": 0.9429170712915458, "learning_rate": 2.168785530019806e-06, "loss": 0.8176, "step": 8854 }, { "epoch": 0.7928815266662936, "grad_norm": 0.9868937450701486, "learning_rate": 2.166982260409758e-06, "loss": 0.844, "step": 8855 }, { "epoch": 0.7929710672113717, "grad_norm": 0.9716747338511501, "learning_rate": 2.165179649687642e-06, "loss": 0.7673, "step": 8856 }, { "epoch": 0.7930606077564497, "grad_norm": 0.9581162629847151, "learning_rate": 2.16337769800508e-06, "loss": 0.7605, "step": 8857 }, { "epoch": 0.7931501483015277, "grad_norm": 0.9541289463155737, "learning_rate": 2.161576405513649e-06, "loss": 0.8244, "step": 8858 }, { "epoch": 0.7932396888466059, "grad_norm": 0.9793815598484333, "learning_rate": 2.159775772364868e-06, "loss": 0.7841, "step": 8859 }, { "epoch": 0.7933292293916839, "grad_norm": 0.9282901114744271, "learning_rate": 2.157975798710199e-06, "loss": 0.7872, "step": 8860 }, { "epoch": 0.793418769936762, "grad_norm": 0.9648132712096085, "learning_rate": 2.156176484701049e-06, "loss": 0.8012, "step": 8861 }, { "epoch": 0.79350831048184, "grad_norm": 1.1146016129785001, "learning_rate": 2.1543778304887786e-06, "loss": 0.764, "step": 8862 }, { "epoch": 0.7935978510269182, "grad_norm": 1.002962446634905, "learning_rate": 2.1525798362246743e-06, "loss": 0.8195, "step": 8863 }, { "epoch": 0.7936873915719962, "grad_norm": 0.9510714212136187, "learning_rate": 2.1507825020599827e-06, "loss": 0.8275, "step": 8864 }, { "epoch": 0.7937769321170742, "grad_norm": 0.9522354776051893, "learning_rate": 2.1489858281458886e-06, "loss": 0.7642, "step": 8865 }, { "epoch": 0.7938664726621524, "grad_norm": 1.0518491796279594, "learning_rate": 2.1471898146335223e-06, "loss": 0.784, "step": 8866 }, { "epoch": 0.7939560132072304, "grad_norm": 0.8939040459416904, "learning_rate": 2.1453944616739587e-06, "loss": 0.7981, "step": 8867 }, { "epoch": 0.7940455537523085, "grad_norm": 1.2042825864351896, "learning_rate": 2.1435997694182174e-06, "loss": 0.7992, "step": 8868 }, { "epoch": 0.7941350942973865, "grad_norm": 1.029196072443649, "learning_rate": 2.141805738017262e-06, "loss": 0.796, "step": 8869 }, { "epoch": 0.7942246348424646, "grad_norm": 0.9254921620057439, "learning_rate": 2.1400123676219995e-06, "loss": 0.773, "step": 8870 }, { "epoch": 0.7943141753875427, "grad_norm": 0.879414169996277, "learning_rate": 2.1382196583832838e-06, "loss": 0.7702, "step": 8871 }, { "epoch": 0.7944037159326207, "grad_norm": 1.0247312099828798, "learning_rate": 2.136427610451912e-06, "loss": 0.8186, "step": 8872 }, { "epoch": 0.7944932564776989, "grad_norm": 0.9132595674668879, "learning_rate": 2.1346362239786234e-06, "loss": 0.7755, "step": 8873 }, { "epoch": 0.7945827970227769, "grad_norm": 0.9751940031190726, "learning_rate": 2.1328454991141056e-06, "loss": 0.7762, "step": 8874 }, { "epoch": 0.7946723375678549, "grad_norm": 1.0362840050223117, "learning_rate": 2.1310554360089874e-06, "loss": 0.7835, "step": 8875 }, { "epoch": 0.794761878112933, "grad_norm": 1.0399341139820528, "learning_rate": 2.1292660348138427e-06, "loss": 0.8115, "step": 8876 }, { "epoch": 0.7948514186580111, "grad_norm": 0.9718358730389632, "learning_rate": 2.127477295679191e-06, "loss": 0.7593, "step": 8877 }, { "epoch": 0.7949409592030892, "grad_norm": 0.8816905002650758, "learning_rate": 2.1256892187554957e-06, "loss": 0.7934, "step": 8878 }, { "epoch": 0.7950304997481672, "grad_norm": 1.0407617644966412, "learning_rate": 2.1239018041931636e-06, "loss": 0.7352, "step": 8879 }, { "epoch": 0.7951200402932452, "grad_norm": 0.9152238276073582, "learning_rate": 2.122115052142545e-06, "loss": 0.7771, "step": 8880 }, { "epoch": 0.7952095808383234, "grad_norm": 0.9640855152198113, "learning_rate": 2.120328962753936e-06, "loss": 0.8457, "step": 8881 }, { "epoch": 0.7952991213834014, "grad_norm": 0.9292131675962387, "learning_rate": 2.1185435361775784e-06, "loss": 0.8155, "step": 8882 }, { "epoch": 0.7953886619284795, "grad_norm": 0.9879104841689254, "learning_rate": 2.116758772563654e-06, "loss": 0.7743, "step": 8883 }, { "epoch": 0.7954782024735576, "grad_norm": 0.9745975647515067, "learning_rate": 2.114974672062293e-06, "loss": 0.8067, "step": 8884 }, { "epoch": 0.7955677430186356, "grad_norm": 0.9658721146486333, "learning_rate": 2.1131912348235686e-06, "loss": 0.7727, "step": 8885 }, { "epoch": 0.7956572835637137, "grad_norm": 0.9305853784472153, "learning_rate": 2.111408460997495e-06, "loss": 0.8008, "step": 8886 }, { "epoch": 0.7957468241087917, "grad_norm": 0.8027246211015022, "learning_rate": 2.1096263507340364e-06, "loss": 0.7533, "step": 8887 }, { "epoch": 0.7958363646538699, "grad_norm": 1.0265060236723729, "learning_rate": 2.107844904183096e-06, "loss": 0.8112, "step": 8888 }, { "epoch": 0.7959259051989479, "grad_norm": 1.1915036697364327, "learning_rate": 2.1060641214945277e-06, "loss": 0.7856, "step": 8889 }, { "epoch": 0.7960154457440259, "grad_norm": 1.0771954716619745, "learning_rate": 2.1042840028181154e-06, "loss": 0.7831, "step": 8890 }, { "epoch": 0.7961049862891041, "grad_norm": 1.0424067683201574, "learning_rate": 2.1025045483036056e-06, "loss": 0.8087, "step": 8891 }, { "epoch": 0.7961945268341821, "grad_norm": 0.9047816933166197, "learning_rate": 2.100725758100678e-06, "loss": 0.7841, "step": 8892 }, { "epoch": 0.7962840673792602, "grad_norm": 1.1216797578559359, "learning_rate": 2.0989476323589577e-06, "loss": 0.8016, "step": 8893 }, { "epoch": 0.7963736079243382, "grad_norm": 0.9040761294760578, "learning_rate": 2.0971701712280157e-06, "loss": 0.8009, "step": 8894 }, { "epoch": 0.7964631484694163, "grad_norm": 0.9593532458777572, "learning_rate": 2.0953933748573686e-06, "loss": 0.8137, "step": 8895 }, { "epoch": 0.7965526890144944, "grad_norm": 0.9672365130167049, "learning_rate": 2.0936172433964696e-06, "loss": 0.7526, "step": 8896 }, { "epoch": 0.7966422295595724, "grad_norm": 0.9674436620913189, "learning_rate": 2.0918417769947207e-06, "loss": 0.8305, "step": 8897 }, { "epoch": 0.7967317701046505, "grad_norm": 1.022019894307431, "learning_rate": 2.0900669758014734e-06, "loss": 0.8555, "step": 8898 }, { "epoch": 0.7968213106497286, "grad_norm": 0.9647428995115607, "learning_rate": 2.0882928399660165e-06, "loss": 0.8157, "step": 8899 }, { "epoch": 0.7969108511948066, "grad_norm": 1.0221131979223792, "learning_rate": 2.0865193696375864e-06, "loss": 0.857, "step": 8900 }, { "epoch": 0.7970003917398847, "grad_norm": 1.086049933322398, "learning_rate": 2.0847465649653563e-06, "loss": 0.8008, "step": 8901 }, { "epoch": 0.7970899322849628, "grad_norm": 0.9341295784050314, "learning_rate": 2.082974426098452e-06, "loss": 0.7696, "step": 8902 }, { "epoch": 0.7971794728300409, "grad_norm": 1.014916977125944, "learning_rate": 2.081202953185937e-06, "loss": 0.8024, "step": 8903 }, { "epoch": 0.7972690133751189, "grad_norm": 1.1244790021383773, "learning_rate": 2.0794321463768275e-06, "loss": 0.7744, "step": 8904 }, { "epoch": 0.7973585539201969, "grad_norm": 1.0609299785461552, "learning_rate": 2.077662005820078e-06, "loss": 0.719, "step": 8905 }, { "epoch": 0.7974480944652751, "grad_norm": 0.8813540731635965, "learning_rate": 2.075892531664581e-06, "loss": 0.8562, "step": 8906 }, { "epoch": 0.7975376350103531, "grad_norm": 1.0129547571339297, "learning_rate": 2.0741237240591816e-06, "loss": 0.8284, "step": 8907 }, { "epoch": 0.7976271755554312, "grad_norm": 1.026752532385267, "learning_rate": 2.0723555831526664e-06, "loss": 0.7533, "step": 8908 }, { "epoch": 0.7977167161005093, "grad_norm": 1.0082881740929979, "learning_rate": 2.070588109093763e-06, "loss": 0.7799, "step": 8909 }, { "epoch": 0.7978062566455874, "grad_norm": 1.0222125820743238, "learning_rate": 2.068821302031151e-06, "loss": 0.859, "step": 8910 }, { "epoch": 0.7978957971906654, "grad_norm": 0.8406768313365586, "learning_rate": 2.0670551621134493e-06, "loss": 0.7588, "step": 8911 }, { "epoch": 0.7979853377357434, "grad_norm": 0.940715655258229, "learning_rate": 2.065289689489213e-06, "loss": 0.8142, "step": 8912 }, { "epoch": 0.7980748782808216, "grad_norm": 0.9908633358188143, "learning_rate": 2.063524884306951e-06, "loss": 0.7465, "step": 8913 }, { "epoch": 0.7981644188258996, "grad_norm": 0.9587871217646229, "learning_rate": 2.0617607467151122e-06, "loss": 0.7797, "step": 8914 }, { "epoch": 0.7982539593709777, "grad_norm": 0.9133146776474791, "learning_rate": 2.0599972768620903e-06, "loss": 0.831, "step": 8915 }, { "epoch": 0.7983434999160557, "grad_norm": 0.9565861570919098, "learning_rate": 2.058234474896227e-06, "loss": 0.7946, "step": 8916 }, { "epoch": 0.7984330404611338, "grad_norm": 0.9473138511540742, "learning_rate": 2.056472340965798e-06, "loss": 0.7643, "step": 8917 }, { "epoch": 0.7985225810062119, "grad_norm": 0.9523408903439556, "learning_rate": 2.05471087521903e-06, "loss": 0.7912, "step": 8918 }, { "epoch": 0.7986121215512899, "grad_norm": 1.0343372038253384, "learning_rate": 2.052950077804091e-06, "loss": 0.775, "step": 8919 }, { "epoch": 0.7987016620963681, "grad_norm": 0.8707703942501248, "learning_rate": 2.0511899488690955e-06, "loss": 0.8002, "step": 8920 }, { "epoch": 0.7987912026414461, "grad_norm": 1.084775374225361, "learning_rate": 2.049430488562095e-06, "loss": 0.806, "step": 8921 }, { "epoch": 0.7988807431865241, "grad_norm": 1.0462282722156777, "learning_rate": 2.0476716970310993e-06, "loss": 0.8717, "step": 8922 }, { "epoch": 0.7989702837316022, "grad_norm": 0.9354454866462218, "learning_rate": 2.0459135744240443e-06, "loss": 0.8414, "step": 8923 }, { "epoch": 0.7990598242766803, "grad_norm": 0.9536108262320696, "learning_rate": 2.0441561208888183e-06, "loss": 0.8418, "step": 8924 }, { "epoch": 0.7991493648217584, "grad_norm": 0.9476826742219421, "learning_rate": 2.0423993365732544e-06, "loss": 0.8164, "step": 8925 }, { "epoch": 0.7992389053668364, "grad_norm": 1.0427614582132403, "learning_rate": 2.040643221625126e-06, "loss": 0.761, "step": 8926 }, { "epoch": 0.7993284459119145, "grad_norm": 1.002837024706317, "learning_rate": 2.038887776192152e-06, "loss": 0.7974, "step": 8927 }, { "epoch": 0.7994179864569926, "grad_norm": 0.9533197788703245, "learning_rate": 2.037133000421997e-06, "loss": 0.7787, "step": 8928 }, { "epoch": 0.7995075270020706, "grad_norm": 1.0013106881363134, "learning_rate": 2.0353788944622643e-06, "loss": 0.7879, "step": 8929 }, { "epoch": 0.7995970675471487, "grad_norm": 1.2136168670820995, "learning_rate": 2.0336254584605053e-06, "loss": 0.8189, "step": 8930 }, { "epoch": 0.7996866080922268, "grad_norm": 1.0670456884612995, "learning_rate": 2.0318726925642116e-06, "loss": 0.8563, "step": 8931 }, { "epoch": 0.7997761486373048, "grad_norm": 1.0565358854782514, "learning_rate": 2.0301205969208227e-06, "loss": 0.8094, "step": 8932 }, { "epoch": 0.7998656891823829, "grad_norm": 0.9343082196443558, "learning_rate": 2.0283691716777166e-06, "loss": 0.7268, "step": 8933 }, { "epoch": 0.7999552297274609, "grad_norm": 0.9297123089909098, "learning_rate": 2.026618416982219e-06, "loss": 0.7345, "step": 8934 }, { "epoch": 0.8000447702725391, "grad_norm": 1.086878060051482, "learning_rate": 2.024868332981598e-06, "loss": 0.7605, "step": 8935 }, { "epoch": 0.8001343108176171, "grad_norm": 1.0291050125375094, "learning_rate": 2.0231189198230626e-06, "loss": 0.8069, "step": 8936 }, { "epoch": 0.8002238513626951, "grad_norm": 0.967943902030107, "learning_rate": 2.021370177653771e-06, "loss": 0.8317, "step": 8937 }, { "epoch": 0.8003133919077733, "grad_norm": 0.9247849716532679, "learning_rate": 2.019622106620819e-06, "loss": 0.8312, "step": 8938 }, { "epoch": 0.8004029324528513, "grad_norm": 1.0434874446655396, "learning_rate": 2.01787470687125e-06, "loss": 0.8228, "step": 8939 }, { "epoch": 0.8004924729979294, "grad_norm": 0.9616706510975945, "learning_rate": 2.016127978552049e-06, "loss": 0.825, "step": 8940 }, { "epoch": 0.8005820135430074, "grad_norm": 1.0410328633357984, "learning_rate": 2.014381921810147e-06, "loss": 0.7771, "step": 8941 }, { "epoch": 0.8006715540880855, "grad_norm": 0.9339190201088139, "learning_rate": 2.012636536792413e-06, "loss": 0.7539, "step": 8942 }, { "epoch": 0.8007610946331636, "grad_norm": 0.9730528227189571, "learning_rate": 2.0108918236456654e-06, "loss": 0.8447, "step": 8943 }, { "epoch": 0.8008506351782416, "grad_norm": 0.9509085097253794, "learning_rate": 2.0091477825166637e-06, "loss": 0.8135, "step": 8944 }, { "epoch": 0.8009401757233198, "grad_norm": 1.0525816281768863, "learning_rate": 2.007404413552112e-06, "loss": 0.8386, "step": 8945 }, { "epoch": 0.8010297162683978, "grad_norm": 0.94800814523983, "learning_rate": 2.005661716898654e-06, "loss": 0.8122, "step": 8946 }, { "epoch": 0.8011192568134758, "grad_norm": 0.9973954804133057, "learning_rate": 2.0039196927028813e-06, "loss": 0.8486, "step": 8947 }, { "epoch": 0.8012087973585539, "grad_norm": 0.9492069303956361, "learning_rate": 2.002178341111327e-06, "loss": 0.8136, "step": 8948 }, { "epoch": 0.801298337903632, "grad_norm": 0.8953903843025304, "learning_rate": 2.000437662270471e-06, "loss": 0.7751, "step": 8949 }, { "epoch": 0.8013878784487101, "grad_norm": 1.0461058352497297, "learning_rate": 1.998697656326729e-06, "loss": 0.7899, "step": 8950 }, { "epoch": 0.8014774189937881, "grad_norm": 1.0657325396807649, "learning_rate": 1.9969583234264635e-06, "loss": 0.8365, "step": 8951 }, { "epoch": 0.8015669595388661, "grad_norm": 1.0461092662547886, "learning_rate": 1.9952196637159858e-06, "loss": 0.7823, "step": 8952 }, { "epoch": 0.8016565000839443, "grad_norm": 1.0027703698822077, "learning_rate": 1.9934816773415457e-06, "loss": 0.7849, "step": 8953 }, { "epoch": 0.8017460406290223, "grad_norm": 0.912991787207638, "learning_rate": 1.9917443644493352e-06, "loss": 0.8102, "step": 8954 }, { "epoch": 0.8018355811741004, "grad_norm": 0.9053533387917336, "learning_rate": 1.9900077251854955e-06, "loss": 0.8204, "step": 8955 }, { "epoch": 0.8019251217191785, "grad_norm": 1.0362582077310218, "learning_rate": 1.9882717596961e-06, "loss": 0.836, "step": 8956 }, { "epoch": 0.8020146622642566, "grad_norm": 0.9850006238838357, "learning_rate": 1.986536468127175e-06, "loss": 0.7874, "step": 8957 }, { "epoch": 0.8021042028093346, "grad_norm": 1.1007564617007124, "learning_rate": 1.9848018506246904e-06, "loss": 0.7908, "step": 8958 }, { "epoch": 0.8021937433544126, "grad_norm": 1.0173632481032941, "learning_rate": 1.983067907334556e-06, "loss": 0.7868, "step": 8959 }, { "epoch": 0.8022832838994908, "grad_norm": 0.9528036035216374, "learning_rate": 1.9813346384026266e-06, "loss": 0.7655, "step": 8960 }, { "epoch": 0.8023728244445688, "grad_norm": 0.8933264277553895, "learning_rate": 1.9796020439746943e-06, "loss": 0.7737, "step": 8961 }, { "epoch": 0.8024623649896468, "grad_norm": 1.0618710977185855, "learning_rate": 1.9778701241965017e-06, "loss": 0.8419, "step": 8962 }, { "epoch": 0.802551905534725, "grad_norm": 0.9342727698834351, "learning_rate": 1.9761388792137303e-06, "loss": 0.7727, "step": 8963 }, { "epoch": 0.802641446079803, "grad_norm": 0.9471890502671254, "learning_rate": 1.9744083091720113e-06, "loss": 0.8067, "step": 8964 }, { "epoch": 0.8027309866248811, "grad_norm": 0.9572770499155245, "learning_rate": 1.972678414216912e-06, "loss": 0.8307, "step": 8965 }, { "epoch": 0.8028205271699591, "grad_norm": 1.0299877446627645, "learning_rate": 1.9709491944939485e-06, "loss": 0.7562, "step": 8966 }, { "epoch": 0.8029100677150373, "grad_norm": 1.3289082813277708, "learning_rate": 1.9692206501485724e-06, "loss": 0.727, "step": 8967 }, { "epoch": 0.8029996082601153, "grad_norm": 0.9556713289414934, "learning_rate": 1.967492781326186e-06, "loss": 0.8191, "step": 8968 }, { "epoch": 0.8030891488051933, "grad_norm": 0.9903434912599169, "learning_rate": 1.9657655881721272e-06, "loss": 0.7727, "step": 8969 }, { "epoch": 0.8031786893502714, "grad_norm": 0.9714167184849704, "learning_rate": 1.964039070831689e-06, "loss": 0.8093, "step": 8970 }, { "epoch": 0.8032682298953495, "grad_norm": 0.9616262558505945, "learning_rate": 1.9623132294501e-06, "loss": 0.762, "step": 8971 }, { "epoch": 0.8033577704404276, "grad_norm": 0.926812601022747, "learning_rate": 1.9605880641725273e-06, "loss": 0.7901, "step": 8972 }, { "epoch": 0.8034473109855056, "grad_norm": 0.973963233507621, "learning_rate": 1.958863575144089e-06, "loss": 0.7921, "step": 8973 }, { "epoch": 0.8035368515305837, "grad_norm": 0.9438660338460594, "learning_rate": 1.957139762509842e-06, "loss": 0.7852, "step": 8974 }, { "epoch": 0.8036263920756618, "grad_norm": 1.0748574364098864, "learning_rate": 1.955416626414787e-06, "loss": 0.8176, "step": 8975 }, { "epoch": 0.8037159326207398, "grad_norm": 1.0185709993695307, "learning_rate": 1.9536941670038745e-06, "loss": 0.8195, "step": 8976 }, { "epoch": 0.8038054731658179, "grad_norm": 0.9936771906554822, "learning_rate": 1.9519723844219875e-06, "loss": 0.7944, "step": 8977 }, { "epoch": 0.803895013710896, "grad_norm": 0.9189253310546502, "learning_rate": 1.950251278813956e-06, "loss": 0.8333, "step": 8978 }, { "epoch": 0.803984554255974, "grad_norm": 0.9987581782225002, "learning_rate": 1.948530850324556e-06, "loss": 0.8034, "step": 8979 }, { "epoch": 0.8040740948010521, "grad_norm": 1.0702214711120264, "learning_rate": 1.946811099098502e-06, "loss": 0.8843, "step": 8980 }, { "epoch": 0.8041636353461302, "grad_norm": 0.9673717659861186, "learning_rate": 1.9450920252804573e-06, "loss": 0.7986, "step": 8981 }, { "epoch": 0.8042531758912083, "grad_norm": 1.0194964070380141, "learning_rate": 1.943373629015022e-06, "loss": 0.8358, "step": 8982 }, { "epoch": 0.8043427164362863, "grad_norm": 0.9341431057917803, "learning_rate": 1.9416559104467425e-06, "loss": 0.8095, "step": 8983 }, { "epoch": 0.8044322569813643, "grad_norm": 0.9701940667830611, "learning_rate": 1.939938869720108e-06, "loss": 0.7714, "step": 8984 }, { "epoch": 0.8045217975264425, "grad_norm": 0.9386588546553041, "learning_rate": 1.9382225069795513e-06, "loss": 0.7655, "step": 8985 }, { "epoch": 0.8046113380715205, "grad_norm": 0.9701659143995255, "learning_rate": 1.936506822369446e-06, "loss": 0.7828, "step": 8986 }, { "epoch": 0.8047008786165986, "grad_norm": 1.0617948837381164, "learning_rate": 1.9347918160341105e-06, "loss": 0.816, "step": 8987 }, { "epoch": 0.8047904191616766, "grad_norm": 1.013032603969619, "learning_rate": 1.9330774881178047e-06, "loss": 0.7807, "step": 8988 }, { "epoch": 0.8048799597067547, "grad_norm": 0.9618764284654682, "learning_rate": 1.931363838764733e-06, "loss": 0.8189, "step": 8989 }, { "epoch": 0.8049695002518328, "grad_norm": 0.9992129156814863, "learning_rate": 1.9296508681190416e-06, "loss": 0.8257, "step": 8990 }, { "epoch": 0.8050590407969108, "grad_norm": 1.023404451436432, "learning_rate": 1.9279385763248214e-06, "loss": 0.8043, "step": 8991 }, { "epoch": 0.805148581341989, "grad_norm": 0.9402971998786305, "learning_rate": 1.926226963526103e-06, "loss": 0.793, "step": 8992 }, { "epoch": 0.805238121887067, "grad_norm": 1.0251500827856768, "learning_rate": 1.9245160298668632e-06, "loss": 0.7668, "step": 8993 }, { "epoch": 0.805327662432145, "grad_norm": 0.9440613142332002, "learning_rate": 1.9228057754910177e-06, "loss": 0.758, "step": 8994 }, { "epoch": 0.8054172029772231, "grad_norm": 0.9957975700036773, "learning_rate": 1.9210962005424305e-06, "loss": 0.7904, "step": 8995 }, { "epoch": 0.8055067435223012, "grad_norm": 1.0292565840136336, "learning_rate": 1.9193873051649036e-06, "loss": 0.7429, "step": 8996 }, { "epoch": 0.8055962840673793, "grad_norm": 0.9651295273645711, "learning_rate": 1.917679089502185e-06, "loss": 0.7496, "step": 8997 }, { "epoch": 0.8056858246124573, "grad_norm": 1.1209103157727258, "learning_rate": 1.9159715536979628e-06, "loss": 0.7708, "step": 8998 }, { "epoch": 0.8057753651575355, "grad_norm": 0.9491936847010795, "learning_rate": 1.91426469789587e-06, "loss": 0.7895, "step": 8999 }, { "epoch": 0.8058649057026135, "grad_norm": 0.9843853048397123, "learning_rate": 1.9125585222394814e-06, "loss": 0.8142, "step": 9000 }, { "epoch": 0.8059544462476915, "grad_norm": 0.9248184534867555, "learning_rate": 1.9108530268723167e-06, "loss": 0.7868, "step": 9001 }, { "epoch": 0.8060439867927696, "grad_norm": 2.4815782543065072, "learning_rate": 1.9091482119378346e-06, "loss": 0.7689, "step": 9002 }, { "epoch": 0.8061335273378477, "grad_norm": 0.9641198330162245, "learning_rate": 1.907444077579439e-06, "loss": 0.805, "step": 9003 }, { "epoch": 0.8062230678829257, "grad_norm": 0.9518216044462596, "learning_rate": 1.9057406239404786e-06, "loss": 0.7435, "step": 9004 }, { "epoch": 0.8063126084280038, "grad_norm": 1.0207956289552649, "learning_rate": 1.9040378511642355e-06, "loss": 0.7873, "step": 9005 }, { "epoch": 0.8064021489730818, "grad_norm": 0.9125418437638111, "learning_rate": 1.9023357593939485e-06, "loss": 0.7586, "step": 9006 }, { "epoch": 0.80649168951816, "grad_norm": 0.898497349168295, "learning_rate": 1.9006343487727896e-06, "loss": 0.8054, "step": 9007 }, { "epoch": 0.806581230063238, "grad_norm": 1.0471157932414012, "learning_rate": 1.8989336194438756e-06, "loss": 0.7878, "step": 9008 }, { "epoch": 0.806670770608316, "grad_norm": 0.9772046484422434, "learning_rate": 1.8972335715502687e-06, "loss": 0.8302, "step": 9009 }, { "epoch": 0.8067603111533942, "grad_norm": 0.8422422952886176, "learning_rate": 1.895534205234968e-06, "loss": 0.7769, "step": 9010 }, { "epoch": 0.8068498516984722, "grad_norm": 0.9477424942326121, "learning_rate": 1.8938355206409165e-06, "loss": 0.8012, "step": 9011 }, { "epoch": 0.8069393922435503, "grad_norm": 0.9926723351100647, "learning_rate": 1.892137517911008e-06, "loss": 0.8174, "step": 9012 }, { "epoch": 0.8070289327886283, "grad_norm": 0.9384424732056108, "learning_rate": 1.8904401971880703e-06, "loss": 0.8092, "step": 9013 }, { "epoch": 0.8071184733337065, "grad_norm": 1.1235009937124925, "learning_rate": 1.8887435586148772e-06, "loss": 0.8677, "step": 9014 }, { "epoch": 0.8072080138787845, "grad_norm": 1.334879947944591, "learning_rate": 1.8870476023341456e-06, "loss": 0.7917, "step": 9015 }, { "epoch": 0.8072975544238625, "grad_norm": 0.9836151490059016, "learning_rate": 1.8853523284885289e-06, "loss": 0.7931, "step": 9016 }, { "epoch": 0.8073870949689407, "grad_norm": 1.1408229155960663, "learning_rate": 1.88365773722063e-06, "loss": 0.8078, "step": 9017 }, { "epoch": 0.8074766355140187, "grad_norm": 0.9659798979190097, "learning_rate": 1.8819638286729946e-06, "loss": 0.8172, "step": 9018 }, { "epoch": 0.8075661760590968, "grad_norm": 0.8996621295845219, "learning_rate": 1.8802706029881091e-06, "loss": 0.7165, "step": 9019 }, { "epoch": 0.8076557166041748, "grad_norm": 0.8734692229072873, "learning_rate": 1.8785780603084025e-06, "loss": 0.7852, "step": 9020 }, { "epoch": 0.8077452571492529, "grad_norm": 0.9943321898823613, "learning_rate": 1.8768862007762412e-06, "loss": 0.8056, "step": 9021 }, { "epoch": 0.807834797694331, "grad_norm": 1.0360163371134286, "learning_rate": 1.8751950245339423e-06, "loss": 0.8224, "step": 9022 }, { "epoch": 0.807924338239409, "grad_norm": 0.9718435555357828, "learning_rate": 1.8735045317237587e-06, "loss": 0.7754, "step": 9023 }, { "epoch": 0.808013878784487, "grad_norm": 1.0950285025808022, "learning_rate": 1.8718147224878957e-06, "loss": 0.79, "step": 9024 }, { "epoch": 0.8081034193295652, "grad_norm": 0.9394027106476904, "learning_rate": 1.8701255969684894e-06, "loss": 0.7703, "step": 9025 }, { "epoch": 0.8081929598746432, "grad_norm": 0.9515404013027152, "learning_rate": 1.8684371553076286e-06, "loss": 0.8428, "step": 9026 }, { "epoch": 0.8082825004197213, "grad_norm": 0.951663853325569, "learning_rate": 1.8667493976473329e-06, "loss": 0.798, "step": 9027 }, { "epoch": 0.8083720409647994, "grad_norm": 0.9158310426768923, "learning_rate": 1.8650623241295751e-06, "loss": 0.7826, "step": 9028 }, { "epoch": 0.8084615815098775, "grad_norm": 0.9809458921575934, "learning_rate": 1.863375934896261e-06, "loss": 0.8296, "step": 9029 }, { "epoch": 0.8085511220549555, "grad_norm": 1.0488940449923219, "learning_rate": 1.8616902300892525e-06, "loss": 0.8786, "step": 9030 }, { "epoch": 0.8086406626000335, "grad_norm": 0.9310478720524025, "learning_rate": 1.8600052098503429e-06, "loss": 0.7921, "step": 9031 }, { "epoch": 0.8087302031451117, "grad_norm": 0.8923906876205239, "learning_rate": 1.8583208743212667e-06, "loss": 0.7494, "step": 9032 }, { "epoch": 0.8088197436901897, "grad_norm": 1.0374518233457615, "learning_rate": 1.856637223643708e-06, "loss": 0.8167, "step": 9033 }, { "epoch": 0.8089092842352678, "grad_norm": 0.9555493499064428, "learning_rate": 1.8549542579592894e-06, "loss": 0.7989, "step": 9034 }, { "epoch": 0.8089988247803459, "grad_norm": 0.9618246492738148, "learning_rate": 1.8532719774095754e-06, "loss": 0.7885, "step": 9035 }, { "epoch": 0.8090883653254239, "grad_norm": 1.0478558988006612, "learning_rate": 1.8515903821360748e-06, "loss": 0.7878, "step": 9036 }, { "epoch": 0.809177905870502, "grad_norm": 1.00081799822429, "learning_rate": 1.849909472280239e-06, "loss": 0.8131, "step": 9037 }, { "epoch": 0.80926744641558, "grad_norm": 0.9798296674399435, "learning_rate": 1.8482292479834585e-06, "loss": 0.7511, "step": 9038 }, { "epoch": 0.8093569869606582, "grad_norm": 0.9522369483262472, "learning_rate": 1.846549709387071e-06, "loss": 0.8029, "step": 9039 }, { "epoch": 0.8094465275057362, "grad_norm": 0.9777865284301476, "learning_rate": 1.8448708566323504e-06, "loss": 0.7802, "step": 9040 }, { "epoch": 0.8095360680508142, "grad_norm": 1.0360109034449243, "learning_rate": 1.84319268986052e-06, "loss": 0.8556, "step": 9041 }, { "epoch": 0.8096256085958923, "grad_norm": 0.9287199009580602, "learning_rate": 1.8415152092127385e-06, "loss": 0.8181, "step": 9042 }, { "epoch": 0.8097151491409704, "grad_norm": 1.0905452087378134, "learning_rate": 1.839838414830112e-06, "loss": 0.7808, "step": 9043 }, { "epoch": 0.8098046896860485, "grad_norm": 0.9269534373620009, "learning_rate": 1.838162306853687e-06, "loss": 0.7616, "step": 9044 }, { "epoch": 0.8098942302311265, "grad_norm": 1.1089000055966205, "learning_rate": 1.836486885424451e-06, "loss": 0.8305, "step": 9045 }, { "epoch": 0.8099837707762046, "grad_norm": 0.9528507633010177, "learning_rate": 1.834812150683336e-06, "loss": 0.7761, "step": 9046 }, { "epoch": 0.8100733113212827, "grad_norm": 0.9565485973385509, "learning_rate": 1.8331381027712148e-06, "loss": 0.8461, "step": 9047 }, { "epoch": 0.8101628518663607, "grad_norm": 1.0530314267042464, "learning_rate": 1.8314647418289033e-06, "loss": 0.8313, "step": 9048 }, { "epoch": 0.8102523924114388, "grad_norm": 0.9849000516574142, "learning_rate": 1.8297920679971593e-06, "loss": 0.8424, "step": 9049 }, { "epoch": 0.8103419329565169, "grad_norm": 0.8783724017762079, "learning_rate": 1.8281200814166811e-06, "loss": 0.7447, "step": 9050 }, { "epoch": 0.810431473501595, "grad_norm": 0.9625116514655508, "learning_rate": 1.8264487822281129e-06, "loss": 0.8088, "step": 9051 }, { "epoch": 0.810521014046673, "grad_norm": 0.8736516958671962, "learning_rate": 1.8247781705720368e-06, "loss": 0.7942, "step": 9052 }, { "epoch": 0.8106105545917511, "grad_norm": 0.8781099315761819, "learning_rate": 1.8231082465889816e-06, "loss": 0.7582, "step": 9053 }, { "epoch": 0.8107000951368292, "grad_norm": 0.9408665291923171, "learning_rate": 1.8214390104194146e-06, "loss": 0.776, "step": 9054 }, { "epoch": 0.8107896356819072, "grad_norm": 1.0173987332719001, "learning_rate": 1.819770462203746e-06, "loss": 0.8246, "step": 9055 }, { "epoch": 0.8108791762269852, "grad_norm": 0.8885261141706097, "learning_rate": 1.818102602082329e-06, "loss": 0.7424, "step": 9056 }, { "epoch": 0.8109687167720634, "grad_norm": 0.9017281813579983, "learning_rate": 1.816435430195459e-06, "loss": 0.7791, "step": 9057 }, { "epoch": 0.8110582573171414, "grad_norm": 0.9636264621245374, "learning_rate": 1.8147689466833751e-06, "loss": 0.8164, "step": 9058 }, { "epoch": 0.8111477978622195, "grad_norm": 1.022273926864252, "learning_rate": 1.8131031516862495e-06, "loss": 0.8404, "step": 9059 }, { "epoch": 0.8112373384072975, "grad_norm": 1.0339494915947232, "learning_rate": 1.8114380453442104e-06, "loss": 0.7785, "step": 9060 }, { "epoch": 0.8113268789523757, "grad_norm": 1.0753261623958086, "learning_rate": 1.8097736277973189e-06, "loss": 0.7878, "step": 9061 }, { "epoch": 0.8114164194974537, "grad_norm": 0.9721020292082916, "learning_rate": 1.8081098991855806e-06, "loss": 0.8314, "step": 9062 }, { "epoch": 0.8115059600425317, "grad_norm": 1.0037477020254262, "learning_rate": 1.8064468596489427e-06, "loss": 0.8409, "step": 9063 }, { "epoch": 0.8115955005876099, "grad_norm": 1.0850523193778836, "learning_rate": 1.8047845093272964e-06, "loss": 0.7395, "step": 9064 }, { "epoch": 0.8116850411326879, "grad_norm": 0.9214504427182194, "learning_rate": 1.8031228483604668e-06, "loss": 0.7985, "step": 9065 }, { "epoch": 0.811774581677766, "grad_norm": 0.9515984446943759, "learning_rate": 1.8014618768882341e-06, "loss": 0.8172, "step": 9066 }, { "epoch": 0.811864122222844, "grad_norm": 0.9326935235062704, "learning_rate": 1.7998015950503124e-06, "loss": 0.7632, "step": 9067 }, { "epoch": 0.8119536627679221, "grad_norm": 0.9241165834407706, "learning_rate": 1.7981420029863583e-06, "loss": 0.7857, "step": 9068 }, { "epoch": 0.8120432033130002, "grad_norm": 1.0706026999787563, "learning_rate": 1.796483100835974e-06, "loss": 0.7949, "step": 9069 }, { "epoch": 0.8121327438580782, "grad_norm": 1.009278519007451, "learning_rate": 1.7948248887386953e-06, "loss": 0.8273, "step": 9070 }, { "epoch": 0.8122222844031564, "grad_norm": 0.9863841231043998, "learning_rate": 1.7931673668340067e-06, "loss": 0.8525, "step": 9071 }, { "epoch": 0.8123118249482344, "grad_norm": 0.9207652243726682, "learning_rate": 1.7915105352613382e-06, "loss": 0.8076, "step": 9072 }, { "epoch": 0.8124013654933124, "grad_norm": 1.039691479378543, "learning_rate": 1.7898543941600545e-06, "loss": 0.7804, "step": 9073 }, { "epoch": 0.8124909060383905, "grad_norm": 1.023316502493222, "learning_rate": 1.7881989436694647e-06, "loss": 0.7553, "step": 9074 }, { "epoch": 0.8125804465834686, "grad_norm": 1.0037677411105437, "learning_rate": 1.7865441839288223e-06, "loss": 0.8536, "step": 9075 }, { "epoch": 0.8126699871285467, "grad_norm": 1.1567209641057226, "learning_rate": 1.7848901150773158e-06, "loss": 0.792, "step": 9076 }, { "epoch": 0.8127595276736247, "grad_norm": 1.0149760458111703, "learning_rate": 1.7832367372540782e-06, "loss": 0.8372, "step": 9077 }, { "epoch": 0.8128490682187027, "grad_norm": 1.018290762249544, "learning_rate": 1.7815840505981941e-06, "loss": 0.8011, "step": 9078 }, { "epoch": 0.8129386087637809, "grad_norm": 0.9643589252719816, "learning_rate": 1.7799320552486787e-06, "loss": 0.7837, "step": 9079 }, { "epoch": 0.8130281493088589, "grad_norm": 1.0683162125501195, "learning_rate": 1.7782807513444933e-06, "loss": 0.8417, "step": 9080 }, { "epoch": 0.813117689853937, "grad_norm": 1.04389470365152, "learning_rate": 1.7766301390245367e-06, "loss": 0.8626, "step": 9081 }, { "epoch": 0.8132072303990151, "grad_norm": 0.9898304079679849, "learning_rate": 1.7749802184276565e-06, "loss": 0.8385, "step": 9082 }, { "epoch": 0.8132967709440931, "grad_norm": 1.1054341022626595, "learning_rate": 1.7733309896926331e-06, "loss": 0.8031, "step": 9083 }, { "epoch": 0.8133863114891712, "grad_norm": 0.9326003788303517, "learning_rate": 1.7716824529582022e-06, "loss": 0.7452, "step": 9084 }, { "epoch": 0.8134758520342492, "grad_norm": 0.9437263567551972, "learning_rate": 1.7700346083630294e-06, "loss": 0.8014, "step": 9085 }, { "epoch": 0.8135653925793274, "grad_norm": 1.09413995171599, "learning_rate": 1.7683874560457293e-06, "loss": 0.6972, "step": 9086 }, { "epoch": 0.8136549331244054, "grad_norm": 0.9299337256106801, "learning_rate": 1.766740996144849e-06, "loss": 0.8263, "step": 9087 }, { "epoch": 0.8137444736694834, "grad_norm": 0.9965748048985237, "learning_rate": 1.7650952287988864e-06, "loss": 0.7311, "step": 9088 }, { "epoch": 0.8138340142145616, "grad_norm": 1.0780259608646732, "learning_rate": 1.763450154146279e-06, "loss": 0.7629, "step": 9089 }, { "epoch": 0.8139235547596396, "grad_norm": 1.0536936232897605, "learning_rate": 1.7618057723254e-06, "loss": 0.811, "step": 9090 }, { "epoch": 0.8140130953047177, "grad_norm": 0.950342848517687, "learning_rate": 1.7601620834745791e-06, "loss": 0.785, "step": 9091 }, { "epoch": 0.8141026358497957, "grad_norm": 0.9795322908673348, "learning_rate": 1.7585190877320712e-06, "loss": 0.8134, "step": 9092 }, { "epoch": 0.8141921763948738, "grad_norm": 0.9764738213255899, "learning_rate": 1.7568767852360802e-06, "loss": 0.8177, "step": 9093 }, { "epoch": 0.8142817169399519, "grad_norm": 0.9857086552388424, "learning_rate": 1.7552351761247521e-06, "loss": 0.8754, "step": 9094 }, { "epoch": 0.8143712574850299, "grad_norm": 1.059198494810535, "learning_rate": 1.7535942605361733e-06, "loss": 0.8001, "step": 9095 }, { "epoch": 0.814460798030108, "grad_norm": 1.0463127052209766, "learning_rate": 1.751954038608371e-06, "loss": 0.8165, "step": 9096 }, { "epoch": 0.8145503385751861, "grad_norm": 1.0947461984123277, "learning_rate": 1.7503145104793219e-06, "loss": 0.7826, "step": 9097 }, { "epoch": 0.8146398791202641, "grad_norm": 0.9464998720916529, "learning_rate": 1.7486756762869294e-06, "loss": 0.7874, "step": 9098 }, { "epoch": 0.8147294196653422, "grad_norm": 1.0015918885181458, "learning_rate": 1.7470375361690516e-06, "loss": 0.7692, "step": 9099 }, { "epoch": 0.8148189602104203, "grad_norm": 1.0306190451817843, "learning_rate": 1.7454000902634827e-06, "loss": 0.7904, "step": 9100 }, { "epoch": 0.8149085007554984, "grad_norm": 0.9261621358196472, "learning_rate": 1.7437633387079577e-06, "loss": 0.7835, "step": 9101 }, { "epoch": 0.8149980413005764, "grad_norm": 0.9538858836129114, "learning_rate": 1.7421272816401557e-06, "loss": 0.807, "step": 9102 }, { "epoch": 0.8150875818456544, "grad_norm": 0.9270530209460515, "learning_rate": 1.7404919191976976e-06, "loss": 0.8181, "step": 9103 }, { "epoch": 0.8151771223907326, "grad_norm": 1.117927887546288, "learning_rate": 1.7388572515181445e-06, "loss": 0.8074, "step": 9104 }, { "epoch": 0.8152666629358106, "grad_norm": 1.0353780028344353, "learning_rate": 1.7372232787389986e-06, "loss": 0.8107, "step": 9105 }, { "epoch": 0.8153562034808887, "grad_norm": 0.8845052714273995, "learning_rate": 1.7355900009977033e-06, "loss": 0.8136, "step": 9106 }, { "epoch": 0.8154457440259668, "grad_norm": 0.9914004997255265, "learning_rate": 1.7339574184316477e-06, "loss": 0.7549, "step": 9107 }, { "epoch": 0.8155352845710448, "grad_norm": 0.9340670857185971, "learning_rate": 1.7323255311781561e-06, "loss": 0.7961, "step": 9108 }, { "epoch": 0.8156248251161229, "grad_norm": 1.0394660673709044, "learning_rate": 1.730694339374499e-06, "loss": 0.8025, "step": 9109 }, { "epoch": 0.8157143656612009, "grad_norm": 1.0395710288181919, "learning_rate": 1.7290638431578877e-06, "loss": 0.7955, "step": 9110 }, { "epoch": 0.8158039062062791, "grad_norm": 0.9526695488679436, "learning_rate": 1.7274340426654723e-06, "loss": 0.7381, "step": 9111 }, { "epoch": 0.8158934467513571, "grad_norm": 1.0950757537838844, "learning_rate": 1.7258049380343478e-06, "loss": 0.789, "step": 9112 }, { "epoch": 0.8159829872964351, "grad_norm": 0.9263183840058052, "learning_rate": 1.724176529401549e-06, "loss": 0.8154, "step": 9113 }, { "epoch": 0.8160725278415132, "grad_norm": 0.9414962992120856, "learning_rate": 1.7225488169040517e-06, "loss": 0.8146, "step": 9114 }, { "epoch": 0.8161620683865913, "grad_norm": 1.1476430170162337, "learning_rate": 1.7209218006787743e-06, "loss": 0.8504, "step": 9115 }, { "epoch": 0.8162516089316694, "grad_norm": 0.9565402618708234, "learning_rate": 1.7192954808625761e-06, "loss": 0.8439, "step": 9116 }, { "epoch": 0.8163411494767474, "grad_norm": 0.9676346829665454, "learning_rate": 1.7176698575922578e-06, "loss": 0.8177, "step": 9117 }, { "epoch": 0.8164306900218256, "grad_norm": 1.056012494171971, "learning_rate": 1.7160449310045647e-06, "loss": 0.8021, "step": 9118 }, { "epoch": 0.8165202305669036, "grad_norm": 0.8938925752786546, "learning_rate": 1.7144207012361702e-06, "loss": 0.7927, "step": 9119 }, { "epoch": 0.8166097711119816, "grad_norm": 1.0012587382378806, "learning_rate": 1.7127971684237098e-06, "loss": 0.769, "step": 9120 }, { "epoch": 0.8166993116570597, "grad_norm": 0.9422885751322433, "learning_rate": 1.7111743327037456e-06, "loss": 0.7889, "step": 9121 }, { "epoch": 0.8167888522021378, "grad_norm": 1.0644087579372201, "learning_rate": 1.7095521942127858e-06, "loss": 0.8156, "step": 9122 }, { "epoch": 0.8168783927472159, "grad_norm": 1.037733722579703, "learning_rate": 1.7079307530872802e-06, "loss": 0.7964, "step": 9123 }, { "epoch": 0.8169679332922939, "grad_norm": 1.3007389805184877, "learning_rate": 1.7063100094636197e-06, "loss": 0.7785, "step": 9124 }, { "epoch": 0.817057473837372, "grad_norm": 1.0102029791112401, "learning_rate": 1.7046899634781288e-06, "loss": 0.841, "step": 9125 }, { "epoch": 0.8171470143824501, "grad_norm": 0.9301832897170735, "learning_rate": 1.7030706152670905e-06, "loss": 0.8352, "step": 9126 }, { "epoch": 0.8172365549275281, "grad_norm": 0.8942958971853152, "learning_rate": 1.7014519649667138e-06, "loss": 0.7373, "step": 9127 }, { "epoch": 0.8173260954726062, "grad_norm": 0.9558940281828195, "learning_rate": 1.699834012713155e-06, "loss": 0.824, "step": 9128 }, { "epoch": 0.8174156360176843, "grad_norm": 1.0510158365875901, "learning_rate": 1.6982167586425146e-06, "loss": 0.8194, "step": 9129 }, { "epoch": 0.8175051765627623, "grad_norm": 0.98709440994425, "learning_rate": 1.6966002028908246e-06, "loss": 0.7975, "step": 9130 }, { "epoch": 0.8175947171078404, "grad_norm": 1.0226995747486753, "learning_rate": 1.694984345594065e-06, "loss": 0.8143, "step": 9131 }, { "epoch": 0.8176842576529184, "grad_norm": 0.9771757651457715, "learning_rate": 1.6933691868881608e-06, "loss": 0.8534, "step": 9132 }, { "epoch": 0.8177737981979966, "grad_norm": 0.8734747723253398, "learning_rate": 1.6917547269089717e-06, "loss": 0.7403, "step": 9133 }, { "epoch": 0.8178633387430746, "grad_norm": 1.0412913925180924, "learning_rate": 1.6901409657923006e-06, "loss": 0.7687, "step": 9134 }, { "epoch": 0.8179528792881526, "grad_norm": 0.9167815390618094, "learning_rate": 1.6885279036738944e-06, "loss": 0.8025, "step": 9135 }, { "epoch": 0.8180424198332308, "grad_norm": 1.0294977935043785, "learning_rate": 1.6869155406894344e-06, "loss": 0.784, "step": 9136 }, { "epoch": 0.8181319603783088, "grad_norm": 1.1020025788532832, "learning_rate": 1.6853038769745466e-06, "loss": 0.8073, "step": 9137 }, { "epoch": 0.8182215009233869, "grad_norm": 0.9707815393568654, "learning_rate": 1.683692912664805e-06, "loss": 0.7648, "step": 9138 }, { "epoch": 0.8183110414684649, "grad_norm": 0.9603154102121777, "learning_rate": 1.6820826478957143e-06, "loss": 0.8094, "step": 9139 }, { "epoch": 0.818400582013543, "grad_norm": 0.9114923130754732, "learning_rate": 1.6804730828027272e-06, "loss": 0.7777, "step": 9140 }, { "epoch": 0.8184901225586211, "grad_norm": 1.0943239557510362, "learning_rate": 1.6788642175212321e-06, "loss": 0.7878, "step": 9141 }, { "epoch": 0.8185796631036991, "grad_norm": 0.9574895839061504, "learning_rate": 1.6772560521865633e-06, "loss": 0.815, "step": 9142 }, { "epoch": 0.8186692036487773, "grad_norm": 0.8948124875995853, "learning_rate": 1.6756485869339933e-06, "loss": 0.7436, "step": 9143 }, { "epoch": 0.8187587441938553, "grad_norm": 0.9198239299965093, "learning_rate": 1.6740418218987354e-06, "loss": 0.8164, "step": 9144 }, { "epoch": 0.8188482847389333, "grad_norm": 0.891341362014982, "learning_rate": 1.67243575721595e-06, "loss": 0.7542, "step": 9145 }, { "epoch": 0.8189378252840114, "grad_norm": 0.9044440709544029, "learning_rate": 1.6708303930207337e-06, "loss": 0.812, "step": 9146 }, { "epoch": 0.8190273658290895, "grad_norm": 0.9199080611609968, "learning_rate": 1.6692257294481208e-06, "loss": 0.8108, "step": 9147 }, { "epoch": 0.8191169063741676, "grad_norm": 0.9429174098182886, "learning_rate": 1.6676217666330907e-06, "loss": 0.801, "step": 9148 }, { "epoch": 0.8192064469192456, "grad_norm": 1.0290953756465717, "learning_rate": 1.666018504710566e-06, "loss": 0.8494, "step": 9149 }, { "epoch": 0.8192959874643236, "grad_norm": 1.0138900284964079, "learning_rate": 1.664415943815404e-06, "loss": 0.8642, "step": 9150 }, { "epoch": 0.8193855280094018, "grad_norm": 1.0057291841627611, "learning_rate": 1.6628140840824147e-06, "loss": 0.7483, "step": 9151 }, { "epoch": 0.8194750685544798, "grad_norm": 0.9543478609799284, "learning_rate": 1.6612129256463338e-06, "loss": 0.8043, "step": 9152 }, { "epoch": 0.8195646090995579, "grad_norm": 0.9841818418448113, "learning_rate": 1.659612468641847e-06, "loss": 0.817, "step": 9153 }, { "epoch": 0.819654149644636, "grad_norm": 0.9571140494556872, "learning_rate": 1.6580127132035817e-06, "loss": 0.7799, "step": 9154 }, { "epoch": 0.819743690189714, "grad_norm": 1.0088624085943356, "learning_rate": 1.6564136594661017e-06, "loss": 0.8152, "step": 9155 }, { "epoch": 0.8198332307347921, "grad_norm": 0.9464452231263504, "learning_rate": 1.654815307563914e-06, "loss": 0.7438, "step": 9156 }, { "epoch": 0.8199227712798701, "grad_norm": 1.0144955586733013, "learning_rate": 1.653217657631473e-06, "loss": 0.7847, "step": 9157 }, { "epoch": 0.8200123118249483, "grad_norm": 0.9768462844772362, "learning_rate": 1.65162070980316e-06, "loss": 0.7693, "step": 9158 }, { "epoch": 0.8201018523700263, "grad_norm": 1.0884047583704985, "learning_rate": 1.6500244642133078e-06, "loss": 0.7825, "step": 9159 }, { "epoch": 0.8201913929151043, "grad_norm": 0.9339344409165125, "learning_rate": 1.6484289209961879e-06, "loss": 0.8146, "step": 9160 }, { "epoch": 0.8202809334601825, "grad_norm": 0.9068256653221051, "learning_rate": 1.6468340802860117e-06, "loss": 0.7639, "step": 9161 }, { "epoch": 0.8203704740052605, "grad_norm": 0.9222521094481316, "learning_rate": 1.645239942216933e-06, "loss": 0.7643, "step": 9162 }, { "epoch": 0.8204600145503386, "grad_norm": 1.0492799877752386, "learning_rate": 1.6436465069230433e-06, "loss": 0.8327, "step": 9163 }, { "epoch": 0.8205495550954166, "grad_norm": 1.1293354754093645, "learning_rate": 1.6420537745383792e-06, "loss": 0.7911, "step": 9164 }, { "epoch": 0.8206390956404948, "grad_norm": 0.947684421848324, "learning_rate": 1.6404617451969164e-06, "loss": 0.8406, "step": 9165 }, { "epoch": 0.8207286361855728, "grad_norm": 0.9300337340114597, "learning_rate": 1.6388704190325689e-06, "loss": 0.7727, "step": 9166 }, { "epoch": 0.8208181767306508, "grad_norm": 1.0567045786637472, "learning_rate": 1.6372797961791963e-06, "loss": 0.7541, "step": 9167 }, { "epoch": 0.8209077172757289, "grad_norm": 1.1165041998293563, "learning_rate": 1.6356898767705954e-06, "loss": 0.8368, "step": 9168 }, { "epoch": 0.820997257820807, "grad_norm": 0.9232689996094305, "learning_rate": 1.6341006609405052e-06, "loss": 0.7968, "step": 9169 }, { "epoch": 0.821086798365885, "grad_norm": 1.0159918299533541, "learning_rate": 1.6325121488226048e-06, "loss": 0.8167, "step": 9170 }, { "epoch": 0.8211763389109631, "grad_norm": 0.9505530199345926, "learning_rate": 1.630924340550516e-06, "loss": 0.8125, "step": 9171 }, { "epoch": 0.8212658794560412, "grad_norm": 1.0239086428024038, "learning_rate": 1.6293372362577987e-06, "loss": 0.744, "step": 9172 }, { "epoch": 0.8213554200011193, "grad_norm": 0.9055839607828072, "learning_rate": 1.627750836077956e-06, "loss": 0.7712, "step": 9173 }, { "epoch": 0.8214449605461973, "grad_norm": 0.9687670522957229, "learning_rate": 1.62616514014443e-06, "loss": 0.8063, "step": 9174 }, { "epoch": 0.8215345010912753, "grad_norm": 0.960832720858117, "learning_rate": 1.6245801485906054e-06, "loss": 0.8012, "step": 9175 }, { "epoch": 0.8216240416363535, "grad_norm": 0.9516673183138459, "learning_rate": 1.6229958615498054e-06, "loss": 0.7827, "step": 9176 }, { "epoch": 0.8217135821814315, "grad_norm": 0.9873237747377506, "learning_rate": 1.6214122791552944e-06, "loss": 0.7949, "step": 9177 }, { "epoch": 0.8218031227265096, "grad_norm": 0.9839296072803839, "learning_rate": 1.6198294015402827e-06, "loss": 0.793, "step": 9178 }, { "epoch": 0.8218926632715877, "grad_norm": 1.0397672912973064, "learning_rate": 1.618247228837908e-06, "loss": 0.8768, "step": 9179 }, { "epoch": 0.8219822038166658, "grad_norm": 0.9561157259825637, "learning_rate": 1.6166657611812654e-06, "loss": 0.8666, "step": 9180 }, { "epoch": 0.8220717443617438, "grad_norm": 1.2394992987577216, "learning_rate": 1.6150849987033802e-06, "loss": 0.8312, "step": 9181 }, { "epoch": 0.8221612849068218, "grad_norm": 1.0349530451013844, "learning_rate": 1.6135049415372195e-06, "loss": 0.7656, "step": 9182 }, { "epoch": 0.8222508254519, "grad_norm": 1.0151965663059215, "learning_rate": 1.611925589815696e-06, "loss": 0.7905, "step": 9183 }, { "epoch": 0.822340365996978, "grad_norm": 0.9846615685850826, "learning_rate": 1.610346943671659e-06, "loss": 0.766, "step": 9184 }, { "epoch": 0.822429906542056, "grad_norm": 0.9091793031559229, "learning_rate": 1.6087690032378933e-06, "loss": 0.7719, "step": 9185 }, { "epoch": 0.8225194470871341, "grad_norm": 1.0666489614879002, "learning_rate": 1.6071917686471362e-06, "loss": 0.8238, "step": 9186 }, { "epoch": 0.8226089876322122, "grad_norm": 1.0723383842729917, "learning_rate": 1.605615240032059e-06, "loss": 0.7847, "step": 9187 }, { "epoch": 0.8226985281772903, "grad_norm": 0.9485737636242153, "learning_rate": 1.6040394175252716e-06, "loss": 0.8309, "step": 9188 }, { "epoch": 0.8227880687223683, "grad_norm": 1.0675102720548943, "learning_rate": 1.6024643012593322e-06, "loss": 0.7813, "step": 9189 }, { "epoch": 0.8228776092674465, "grad_norm": 1.0372239350562886, "learning_rate": 1.600889891366727e-06, "loss": 0.7881, "step": 9190 }, { "epoch": 0.8229671498125245, "grad_norm": 0.9858053192255403, "learning_rate": 1.5993161879798946e-06, "loss": 0.7443, "step": 9191 }, { "epoch": 0.8230566903576025, "grad_norm": 0.9889827557822123, "learning_rate": 1.597743191231207e-06, "loss": 0.7353, "step": 9192 }, { "epoch": 0.8231462309026806, "grad_norm": 1.0491242341336773, "learning_rate": 1.5961709012529836e-06, "loss": 0.8253, "step": 9193 }, { "epoch": 0.8232357714477587, "grad_norm": 1.1062179401072145, "learning_rate": 1.5945993181774788e-06, "loss": 0.828, "step": 9194 }, { "epoch": 0.8233253119928368, "grad_norm": 1.1394212359660891, "learning_rate": 1.5930284421368914e-06, "loss": 0.7919, "step": 9195 }, { "epoch": 0.8234148525379148, "grad_norm": 0.9651201321965137, "learning_rate": 1.5914582732633521e-06, "loss": 0.8341, "step": 9196 }, { "epoch": 0.823504393082993, "grad_norm": 1.0505817915242417, "learning_rate": 1.5898888116889433e-06, "loss": 0.8397, "step": 9197 }, { "epoch": 0.823593933628071, "grad_norm": 0.9858376974959439, "learning_rate": 1.588320057545678e-06, "loss": 0.8039, "step": 9198 }, { "epoch": 0.823683474173149, "grad_norm": 1.045461343998573, "learning_rate": 1.586752010965521e-06, "loss": 0.7997, "step": 9199 }, { "epoch": 0.8237730147182271, "grad_norm": 0.890178143421997, "learning_rate": 1.585184672080371e-06, "loss": 0.8287, "step": 9200 }, { "epoch": 0.8238625552633052, "grad_norm": 1.1799860808709588, "learning_rate": 1.5836180410220625e-06, "loss": 0.809, "step": 9201 }, { "epoch": 0.8239520958083832, "grad_norm": 1.0470790376313244, "learning_rate": 1.582052117922378e-06, "loss": 0.7813, "step": 9202 }, { "epoch": 0.8240416363534613, "grad_norm": 0.9427023414271469, "learning_rate": 1.5804869029130376e-06, "loss": 0.817, "step": 9203 }, { "epoch": 0.8241311768985393, "grad_norm": 0.9368987079816568, "learning_rate": 1.5789223961257005e-06, "loss": 0.7696, "step": 9204 }, { "epoch": 0.8242207174436175, "grad_norm": 1.005571873751868, "learning_rate": 1.5773585976919715e-06, "loss": 0.7678, "step": 9205 }, { "epoch": 0.8243102579886955, "grad_norm": 1.249774771628386, "learning_rate": 1.5757955077433929e-06, "loss": 0.7995, "step": 9206 }, { "epoch": 0.8243997985337735, "grad_norm": 0.9776102026288237, "learning_rate": 1.5742331264114418e-06, "loss": 0.7944, "step": 9207 }, { "epoch": 0.8244893390788517, "grad_norm": 0.9775295456228488, "learning_rate": 1.5726714538275422e-06, "loss": 0.8154, "step": 9208 }, { "epoch": 0.8245788796239297, "grad_norm": 0.9944785496211577, "learning_rate": 1.5711104901230589e-06, "loss": 0.8113, "step": 9209 }, { "epoch": 0.8246684201690078, "grad_norm": 1.0221743521165492, "learning_rate": 1.5695502354292913e-06, "loss": 0.825, "step": 9210 }, { "epoch": 0.8247579607140858, "grad_norm": 0.9390009504352447, "learning_rate": 1.56799068987749e-06, "loss": 0.7913, "step": 9211 }, { "epoch": 0.824847501259164, "grad_norm": 1.06663363657489, "learning_rate": 1.5664318535988322e-06, "loss": 0.8245, "step": 9212 }, { "epoch": 0.824937041804242, "grad_norm": 0.9765250631641107, "learning_rate": 1.564873726724444e-06, "loss": 0.7851, "step": 9213 }, { "epoch": 0.82502658234932, "grad_norm": 1.1271073533720082, "learning_rate": 1.563316309385391e-06, "loss": 0.832, "step": 9214 }, { "epoch": 0.8251161228943982, "grad_norm": 0.8825484903561781, "learning_rate": 1.561759601712677e-06, "loss": 0.775, "step": 9215 }, { "epoch": 0.8252056634394762, "grad_norm": 0.9666564524882509, "learning_rate": 1.5602036038372448e-06, "loss": 0.7703, "step": 9216 }, { "epoch": 0.8252952039845542, "grad_norm": 0.9592847466797919, "learning_rate": 1.558648315889988e-06, "loss": 0.7733, "step": 9217 }, { "epoch": 0.8253847445296323, "grad_norm": 1.0453895211312276, "learning_rate": 1.5570937380017248e-06, "loss": 0.7985, "step": 9218 }, { "epoch": 0.8254742850747104, "grad_norm": 1.0088715171851086, "learning_rate": 1.5555398703032232e-06, "loss": 0.8167, "step": 9219 }, { "epoch": 0.8255638256197885, "grad_norm": 1.1091935613251864, "learning_rate": 1.5539867129251895e-06, "loss": 0.7815, "step": 9220 }, { "epoch": 0.8256533661648665, "grad_norm": 0.9867309990735131, "learning_rate": 1.5524342659982705e-06, "loss": 0.8135, "step": 9221 }, { "epoch": 0.8257429067099445, "grad_norm": 1.037690460207771, "learning_rate": 1.5508825296530538e-06, "loss": 0.8167, "step": 9222 }, { "epoch": 0.8258324472550227, "grad_norm": 1.1443965799166527, "learning_rate": 1.549331504020064e-06, "loss": 0.7607, "step": 9223 }, { "epoch": 0.8259219878001007, "grad_norm": 0.9857718855950449, "learning_rate": 1.5477811892297711e-06, "loss": 0.8073, "step": 9224 }, { "epoch": 0.8260115283451788, "grad_norm": 0.8953967349383639, "learning_rate": 1.5462315854125809e-06, "loss": 0.7925, "step": 9225 }, { "epoch": 0.8261010688902569, "grad_norm": 1.1471323468408094, "learning_rate": 1.5446826926988413e-06, "loss": 0.79, "step": 9226 }, { "epoch": 0.826190609435335, "grad_norm": 0.9559289844372381, "learning_rate": 1.54313451121884e-06, "loss": 0.7574, "step": 9227 }, { "epoch": 0.826280149980413, "grad_norm": 0.9314018233105323, "learning_rate": 1.5415870411028055e-06, "loss": 0.8101, "step": 9228 }, { "epoch": 0.826369690525491, "grad_norm": 1.0730597275060598, "learning_rate": 1.5400402824809058e-06, "loss": 0.8546, "step": 9229 }, { "epoch": 0.8264592310705692, "grad_norm": 1.0304056660382788, "learning_rate": 1.538494235483249e-06, "loss": 0.8124, "step": 9230 }, { "epoch": 0.8265487716156472, "grad_norm": 1.0031773331721223, "learning_rate": 1.536948900239883e-06, "loss": 0.8088, "step": 9231 }, { "epoch": 0.8266383121607253, "grad_norm": 2.237930828070287, "learning_rate": 1.5354042768807976e-06, "loss": 0.7928, "step": 9232 }, { "epoch": 0.8267278527058034, "grad_norm": 1.012665118530479, "learning_rate": 1.5338603655359196e-06, "loss": 0.7512, "step": 9233 }, { "epoch": 0.8268173932508814, "grad_norm": 1.0529608926019303, "learning_rate": 1.53231716633512e-06, "loss": 0.8361, "step": 9234 }, { "epoch": 0.8269069337959595, "grad_norm": 0.9594525097366378, "learning_rate": 1.5307746794082067e-06, "loss": 0.7584, "step": 9235 }, { "epoch": 0.8269964743410375, "grad_norm": 0.9729025477307098, "learning_rate": 1.5292329048849286e-06, "loss": 0.8067, "step": 9236 }, { "epoch": 0.8270860148861157, "grad_norm": 0.9590582861006676, "learning_rate": 1.527691842894975e-06, "loss": 0.8043, "step": 9237 }, { "epoch": 0.8271755554311937, "grad_norm": 0.9811386062725952, "learning_rate": 1.5261514935679743e-06, "loss": 0.8501, "step": 9238 }, { "epoch": 0.8272650959762717, "grad_norm": 0.9329749402570326, "learning_rate": 1.5246118570334967e-06, "loss": 0.7679, "step": 9239 }, { "epoch": 0.8273546365213498, "grad_norm": 1.2280015508950233, "learning_rate": 1.5230729334210514e-06, "loss": 0.8507, "step": 9240 }, { "epoch": 0.8274441770664279, "grad_norm": 1.533775043861544, "learning_rate": 1.5215347228600863e-06, "loss": 0.8106, "step": 9241 }, { "epoch": 0.827533717611506, "grad_norm": 1.0116777039512166, "learning_rate": 1.5199972254799922e-06, "loss": 0.8491, "step": 9242 }, { "epoch": 0.827623258156584, "grad_norm": 0.8647606308651872, "learning_rate": 1.5184604414100968e-06, "loss": 0.7981, "step": 9243 }, { "epoch": 0.8277127987016621, "grad_norm": 0.9719311309186743, "learning_rate": 1.5169243707796732e-06, "loss": 0.7701, "step": 9244 }, { "epoch": 0.8278023392467402, "grad_norm": 0.9843673073988213, "learning_rate": 1.515389013717925e-06, "loss": 0.842, "step": 9245 }, { "epoch": 0.8278918797918182, "grad_norm": 1.0501890422294864, "learning_rate": 1.513854370354002e-06, "loss": 0.7878, "step": 9246 }, { "epoch": 0.8279814203368963, "grad_norm": 0.9890706852779633, "learning_rate": 1.5123204408169977e-06, "loss": 0.844, "step": 9247 }, { "epoch": 0.8280709608819744, "grad_norm": 1.0141820878578718, "learning_rate": 1.510787225235939e-06, "loss": 0.8258, "step": 9248 }, { "epoch": 0.8281605014270524, "grad_norm": 0.9799480830212686, "learning_rate": 1.509254723739797e-06, "loss": 0.7823, "step": 9249 }, { "epoch": 0.8282500419721305, "grad_norm": 1.024470028594685, "learning_rate": 1.5077229364574774e-06, "loss": 0.8335, "step": 9250 }, { "epoch": 0.8283395825172086, "grad_norm": 0.974423238209472, "learning_rate": 1.5061918635178307e-06, "loss": 0.743, "step": 9251 }, { "epoch": 0.8284291230622867, "grad_norm": 1.2024733000253067, "learning_rate": 1.5046615050496427e-06, "loss": 0.8302, "step": 9252 }, { "epoch": 0.8285186636073647, "grad_norm": 0.9296230197348008, "learning_rate": 1.503131861181647e-06, "loss": 0.7308, "step": 9253 }, { "epoch": 0.8286082041524427, "grad_norm": 1.0468800928722324, "learning_rate": 1.501602932042512e-06, "loss": 0.8, "step": 9254 }, { "epoch": 0.8286977446975209, "grad_norm": 1.0140774743385133, "learning_rate": 1.5000747177608454e-06, "loss": 0.8336, "step": 9255 }, { "epoch": 0.8287872852425989, "grad_norm": 1.0817005554862047, "learning_rate": 1.4985472184651927e-06, "loss": 0.7979, "step": 9256 }, { "epoch": 0.828876825787677, "grad_norm": 0.9296353754479479, "learning_rate": 1.4970204342840445e-06, "loss": 0.7689, "step": 9257 }, { "epoch": 0.828966366332755, "grad_norm": 1.0514594903982553, "learning_rate": 1.4954943653458265e-06, "loss": 0.8541, "step": 9258 }, { "epoch": 0.8290559068778331, "grad_norm": 1.0047022855627736, "learning_rate": 1.4939690117789107e-06, "loss": 0.8241, "step": 9259 }, { "epoch": 0.8291454474229112, "grad_norm": 0.9546997951028352, "learning_rate": 1.4924443737116057e-06, "loss": 0.7803, "step": 9260 }, { "epoch": 0.8292349879679892, "grad_norm": 1.0025659437999093, "learning_rate": 1.4909204512721542e-06, "loss": 0.7863, "step": 9261 }, { "epoch": 0.8293245285130674, "grad_norm": 1.021572472609321, "learning_rate": 1.4893972445887451e-06, "loss": 0.8062, "step": 9262 }, { "epoch": 0.8294140690581454, "grad_norm": 0.995831239039463, "learning_rate": 1.4878747537895067e-06, "loss": 0.822, "step": 9263 }, { "epoch": 0.8295036096032234, "grad_norm": 1.00713756948133, "learning_rate": 1.4863529790025033e-06, "loss": 0.7974, "step": 9264 }, { "epoch": 0.8295931501483015, "grad_norm": 0.9769879958124396, "learning_rate": 1.484831920355746e-06, "loss": 0.7848, "step": 9265 }, { "epoch": 0.8296826906933796, "grad_norm": 1.0131127152634398, "learning_rate": 1.4833115779771813e-06, "loss": 0.8145, "step": 9266 }, { "epoch": 0.8297722312384577, "grad_norm": 1.0208204266027203, "learning_rate": 1.4817919519946922e-06, "loss": 0.8004, "step": 9267 }, { "epoch": 0.8298617717835357, "grad_norm": 1.0645144060282736, "learning_rate": 1.4802730425361044e-06, "loss": 0.8782, "step": 9268 }, { "epoch": 0.8299513123286139, "grad_norm": 0.9571386361415842, "learning_rate": 1.4787548497291848e-06, "loss": 0.7679, "step": 9269 }, { "epoch": 0.8300408528736919, "grad_norm": 1.0541557617426582, "learning_rate": 1.4772373737016376e-06, "loss": 0.7635, "step": 9270 }, { "epoch": 0.8301303934187699, "grad_norm": 0.9478972451152083, "learning_rate": 1.4757206145811143e-06, "loss": 0.821, "step": 9271 }, { "epoch": 0.830219933963848, "grad_norm": 0.905756682549673, "learning_rate": 1.4742045724951914e-06, "loss": 0.7899, "step": 9272 }, { "epoch": 0.8303094745089261, "grad_norm": 0.9036745208599757, "learning_rate": 1.4726892475713972e-06, "loss": 0.7094, "step": 9273 }, { "epoch": 0.8303990150540042, "grad_norm": 0.9283629046460248, "learning_rate": 1.4711746399371952e-06, "loss": 0.8262, "step": 9274 }, { "epoch": 0.8304885555990822, "grad_norm": 1.06349578407657, "learning_rate": 1.46966074971999e-06, "loss": 0.8181, "step": 9275 }, { "epoch": 0.8305780961441602, "grad_norm": 1.0553265729155814, "learning_rate": 1.4681475770471254e-06, "loss": 0.8635, "step": 9276 }, { "epoch": 0.8306676366892384, "grad_norm": 1.0515576618831197, "learning_rate": 1.466635122045883e-06, "loss": 0.7972, "step": 9277 }, { "epoch": 0.8307571772343164, "grad_norm": 1.0474234154596638, "learning_rate": 1.4651233848434865e-06, "loss": 0.8418, "step": 9278 }, { "epoch": 0.8308467177793945, "grad_norm": 1.0402734793578703, "learning_rate": 1.4636123655670976e-06, "loss": 0.8143, "step": 9279 }, { "epoch": 0.8309362583244726, "grad_norm": 1.057697969715789, "learning_rate": 1.46210206434382e-06, "loss": 0.7857, "step": 9280 }, { "epoch": 0.8310257988695506, "grad_norm": 1.0150548984305776, "learning_rate": 1.4605924813006933e-06, "loss": 0.8479, "step": 9281 }, { "epoch": 0.8311153394146287, "grad_norm": 1.1847705855892516, "learning_rate": 1.4590836165647003e-06, "loss": 0.7807, "step": 9282 }, { "epoch": 0.8312048799597067, "grad_norm": 0.9834288682085118, "learning_rate": 1.457575470262762e-06, "loss": 0.8285, "step": 9283 }, { "epoch": 0.8312944205047849, "grad_norm": 0.932081910083776, "learning_rate": 1.4560680425217365e-06, "loss": 0.7469, "step": 9284 }, { "epoch": 0.8313839610498629, "grad_norm": 1.0037831945446654, "learning_rate": 1.4545613334684262e-06, "loss": 0.7566, "step": 9285 }, { "epoch": 0.8314735015949409, "grad_norm": 0.9652530058535604, "learning_rate": 1.453055343229568e-06, "loss": 0.8238, "step": 9286 }, { "epoch": 0.8315630421400191, "grad_norm": 1.0231788973172407, "learning_rate": 1.4515500719318432e-06, "loss": 0.7806, "step": 9287 }, { "epoch": 0.8316525826850971, "grad_norm": 1.0123969257649366, "learning_rate": 1.4500455197018703e-06, "loss": 0.7895, "step": 9288 }, { "epoch": 0.8317421232301752, "grad_norm": 0.9670798000656414, "learning_rate": 1.448541686666205e-06, "loss": 0.8152, "step": 9289 }, { "epoch": 0.8318316637752532, "grad_norm": 1.0400110586077793, "learning_rate": 1.447038572951347e-06, "loss": 0.8141, "step": 9290 }, { "epoch": 0.8319212043203313, "grad_norm": 1.0225821726761233, "learning_rate": 1.445536178683733e-06, "loss": 0.8163, "step": 9291 }, { "epoch": 0.8320107448654094, "grad_norm": 1.0627421404061446, "learning_rate": 1.4440345039897398e-06, "loss": 0.8295, "step": 9292 }, { "epoch": 0.8321002854104874, "grad_norm": 0.9494389265736692, "learning_rate": 1.4425335489956816e-06, "loss": 0.7581, "step": 9293 }, { "epoch": 0.8321898259555655, "grad_norm": 0.9597468636537545, "learning_rate": 1.4410333138278153e-06, "loss": 0.8062, "step": 9294 }, { "epoch": 0.8322793665006436, "grad_norm": 1.4483754003333227, "learning_rate": 1.4395337986123349e-06, "loss": 0.8566, "step": 9295 }, { "epoch": 0.8323689070457216, "grad_norm": 0.9990419177062249, "learning_rate": 1.4380350034753766e-06, "loss": 0.8201, "step": 9296 }, { "epoch": 0.8324584475907997, "grad_norm": 0.99089138471879, "learning_rate": 1.436536928543012e-06, "loss": 0.8225, "step": 9297 }, { "epoch": 0.8325479881358778, "grad_norm": 0.9454435431021928, "learning_rate": 1.435039573941256e-06, "loss": 0.8183, "step": 9298 }, { "epoch": 0.8326375286809559, "grad_norm": 0.9321080031700404, "learning_rate": 1.433542939796062e-06, "loss": 0.7969, "step": 9299 }, { "epoch": 0.8327270692260339, "grad_norm": 1.0628322003806028, "learning_rate": 1.4320470262333154e-06, "loss": 0.7814, "step": 9300 }, { "epoch": 0.8328166097711119, "grad_norm": 0.9396604359453226, "learning_rate": 1.430551833378856e-06, "loss": 0.764, "step": 9301 }, { "epoch": 0.8329061503161901, "grad_norm": 1.0256126135806607, "learning_rate": 1.4290573613584502e-06, "loss": 0.7862, "step": 9302 }, { "epoch": 0.8329956908612681, "grad_norm": 0.9503959428146012, "learning_rate": 1.4275636102978086e-06, "loss": 0.863, "step": 9303 }, { "epoch": 0.8330852314063462, "grad_norm": 1.0330595563695593, "learning_rate": 1.4260705803225838e-06, "loss": 0.8502, "step": 9304 }, { "epoch": 0.8331747719514243, "grad_norm": 0.9235747800319499, "learning_rate": 1.42457827155836e-06, "loss": 0.7862, "step": 9305 }, { "epoch": 0.8332643124965023, "grad_norm": 1.031778751467731, "learning_rate": 1.4230866841306645e-06, "loss": 0.8428, "step": 9306 }, { "epoch": 0.8333538530415804, "grad_norm": 0.942230496291779, "learning_rate": 1.4215958181649702e-06, "loss": 0.8061, "step": 9307 }, { "epoch": 0.8334433935866584, "grad_norm": 1.1862723260445964, "learning_rate": 1.4201056737866813e-06, "loss": 0.8087, "step": 9308 }, { "epoch": 0.8335329341317366, "grad_norm": 0.9128967073761088, "learning_rate": 1.4186162511211454e-06, "loss": 0.7819, "step": 9309 }, { "epoch": 0.8336224746768146, "grad_norm": 1.1165664576287428, "learning_rate": 1.4171275502936445e-06, "loss": 0.845, "step": 9310 }, { "epoch": 0.8337120152218926, "grad_norm": 1.2055046566607368, "learning_rate": 1.4156395714294048e-06, "loss": 0.8091, "step": 9311 }, { "epoch": 0.8338015557669707, "grad_norm": 0.9953980491177246, "learning_rate": 1.4141523146535886e-06, "loss": 0.7361, "step": 9312 }, { "epoch": 0.8338910963120488, "grad_norm": 1.0263834808837446, "learning_rate": 1.4126657800913023e-06, "loss": 0.7348, "step": 9313 }, { "epoch": 0.8339806368571269, "grad_norm": 0.8972194798939946, "learning_rate": 1.4111799678675875e-06, "loss": 0.8014, "step": 9314 }, { "epoch": 0.8340701774022049, "grad_norm": 0.9795184640822597, "learning_rate": 1.4096948781074282e-06, "loss": 0.8355, "step": 9315 }, { "epoch": 0.834159717947283, "grad_norm": 0.9818780129950107, "learning_rate": 1.4082105109357403e-06, "loss": 0.7737, "step": 9316 }, { "epoch": 0.8342492584923611, "grad_norm": 1.047338751982747, "learning_rate": 1.4067268664773849e-06, "loss": 0.7772, "step": 9317 }, { "epoch": 0.8343387990374391, "grad_norm": 0.9508092693964137, "learning_rate": 1.4052439448571608e-06, "loss": 0.7661, "step": 9318 }, { "epoch": 0.8344283395825172, "grad_norm": 0.9844442963406824, "learning_rate": 1.403761746199811e-06, "loss": 0.7726, "step": 9319 }, { "epoch": 0.8345178801275953, "grad_norm": 0.9420578197626516, "learning_rate": 1.402280270630013e-06, "loss": 0.7966, "step": 9320 }, { "epoch": 0.8346074206726734, "grad_norm": 0.9826005784325806, "learning_rate": 1.4007995182723778e-06, "loss": 0.7962, "step": 9321 }, { "epoch": 0.8346969612177514, "grad_norm": 1.1032863816983074, "learning_rate": 1.399319489251466e-06, "loss": 0.8509, "step": 9322 }, { "epoch": 0.8347865017628295, "grad_norm": 1.0446112990473353, "learning_rate": 1.397840183691771e-06, "loss": 0.789, "step": 9323 }, { "epoch": 0.8348760423079076, "grad_norm": 1.332732400053229, "learning_rate": 1.3963616017177262e-06, "loss": 0.8648, "step": 9324 }, { "epoch": 0.8349655828529856, "grad_norm": 1.0416975658695535, "learning_rate": 1.3948837434537087e-06, "loss": 0.7559, "step": 9325 }, { "epoch": 0.8350551233980636, "grad_norm": 1.0853684461501492, "learning_rate": 1.3934066090240306e-06, "loss": 0.8591, "step": 9326 }, { "epoch": 0.8351446639431418, "grad_norm": 1.063084288549793, "learning_rate": 1.391930198552941e-06, "loss": 0.827, "step": 9327 }, { "epoch": 0.8352342044882198, "grad_norm": 1.0863449712139108, "learning_rate": 1.3904545121646319e-06, "loss": 0.7476, "step": 9328 }, { "epoch": 0.8353237450332979, "grad_norm": 1.1092637286053433, "learning_rate": 1.3889795499832327e-06, "loss": 0.7889, "step": 9329 }, { "epoch": 0.8354132855783759, "grad_norm": 0.920590489969962, "learning_rate": 1.3875053121328142e-06, "loss": 0.7789, "step": 9330 }, { "epoch": 0.8355028261234541, "grad_norm": 1.257558246405282, "learning_rate": 1.3860317987373817e-06, "loss": 0.8177, "step": 9331 }, { "epoch": 0.8355923666685321, "grad_norm": 0.9926872985998035, "learning_rate": 1.384559009920885e-06, "loss": 0.8713, "step": 9332 }, { "epoch": 0.8356819072136101, "grad_norm": 0.9619027877371132, "learning_rate": 1.3830869458072083e-06, "loss": 0.7901, "step": 9333 }, { "epoch": 0.8357714477586883, "grad_norm": 1.006640321906385, "learning_rate": 1.3816156065201791e-06, "loss": 0.8241, "step": 9334 }, { "epoch": 0.8358609883037663, "grad_norm": 1.0515813212075948, "learning_rate": 1.3801449921835585e-06, "loss": 0.7816, "step": 9335 }, { "epoch": 0.8359505288488444, "grad_norm": 1.1303103018572864, "learning_rate": 1.3786751029210532e-06, "loss": 0.7634, "step": 9336 }, { "epoch": 0.8360400693939224, "grad_norm": 0.9616497506280306, "learning_rate": 1.377205938856303e-06, "loss": 0.775, "step": 9337 }, { "epoch": 0.8361296099390005, "grad_norm": 0.9979300401969639, "learning_rate": 1.3757375001128903e-06, "loss": 0.7357, "step": 9338 }, { "epoch": 0.8362191504840786, "grad_norm": 1.0464891806927004, "learning_rate": 1.3742697868143362e-06, "loss": 0.8075, "step": 9339 }, { "epoch": 0.8363086910291566, "grad_norm": 0.9893442732004429, "learning_rate": 1.3728027990840976e-06, "loss": 0.779, "step": 9340 }, { "epoch": 0.8363982315742348, "grad_norm": 0.9805203776882536, "learning_rate": 1.3713365370455744e-06, "loss": 0.8177, "step": 9341 }, { "epoch": 0.8364877721193128, "grad_norm": 0.9634067576564073, "learning_rate": 1.369871000822104e-06, "loss": 0.8197, "step": 9342 }, { "epoch": 0.8365773126643908, "grad_norm": 1.0091130284816987, "learning_rate": 1.3684061905369606e-06, "loss": 0.8026, "step": 9343 }, { "epoch": 0.8366668532094689, "grad_norm": 0.9888378950541664, "learning_rate": 1.3669421063133626e-06, "loss": 0.8234, "step": 9344 }, { "epoch": 0.836756393754547, "grad_norm": 1.0693551686954252, "learning_rate": 1.3654787482744601e-06, "loss": 0.8529, "step": 9345 }, { "epoch": 0.8368459342996251, "grad_norm": 1.115279424313577, "learning_rate": 1.3640161165433496e-06, "loss": 0.7989, "step": 9346 }, { "epoch": 0.8369354748447031, "grad_norm": 0.962231855705234, "learning_rate": 1.36255421124306e-06, "loss": 0.8294, "step": 9347 }, { "epoch": 0.8370250153897811, "grad_norm": 1.0958877398234657, "learning_rate": 1.3610930324965643e-06, "loss": 0.8553, "step": 9348 }, { "epoch": 0.8371145559348593, "grad_norm": 1.0083307553447438, "learning_rate": 1.3596325804267696e-06, "loss": 0.8021, "step": 9349 }, { "epoch": 0.8372040964799373, "grad_norm": 1.0260208232843464, "learning_rate": 1.3581728551565275e-06, "loss": 0.8414, "step": 9350 }, { "epoch": 0.8372936370250154, "grad_norm": 1.0181013358918505, "learning_rate": 1.3567138568086225e-06, "loss": 0.8092, "step": 9351 }, { "epoch": 0.8373831775700935, "grad_norm": 0.979063610338897, "learning_rate": 1.3552555855057825e-06, "loss": 0.8085, "step": 9352 }, { "epoch": 0.8374727181151715, "grad_norm": 0.9732686826197213, "learning_rate": 1.3537980413706742e-06, "loss": 0.7682, "step": 9353 }, { "epoch": 0.8375622586602496, "grad_norm": 1.0298868342013812, "learning_rate": 1.3523412245258948e-06, "loss": 0.8597, "step": 9354 }, { "epoch": 0.8376517992053276, "grad_norm": 1.0210219595458543, "learning_rate": 1.3508851350939934e-06, "loss": 0.8428, "step": 9355 }, { "epoch": 0.8377413397504058, "grad_norm": 0.9491656805823092, "learning_rate": 1.3494297731974494e-06, "loss": 0.7323, "step": 9356 }, { "epoch": 0.8378308802954838, "grad_norm": 1.020970130401167, "learning_rate": 1.3479751389586836e-06, "loss": 0.8393, "step": 9357 }, { "epoch": 0.8379204208405618, "grad_norm": 1.1289678333450825, "learning_rate": 1.3465212325000543e-06, "loss": 0.8579, "step": 9358 }, { "epoch": 0.83800996138564, "grad_norm": 0.9965978905279886, "learning_rate": 1.345068053943863e-06, "loss": 0.8405, "step": 9359 }, { "epoch": 0.838099501930718, "grad_norm": 1.1431984683165126, "learning_rate": 1.3436156034123383e-06, "loss": 0.7505, "step": 9360 }, { "epoch": 0.8381890424757961, "grad_norm": 0.9878026435496984, "learning_rate": 1.3421638810276615e-06, "loss": 0.8349, "step": 9361 }, { "epoch": 0.8382785830208741, "grad_norm": 0.9758222306741415, "learning_rate": 1.3407128869119469e-06, "loss": 0.8536, "step": 9362 }, { "epoch": 0.8383681235659523, "grad_norm": 0.9576380617256895, "learning_rate": 1.3392626211872462e-06, "loss": 0.763, "step": 9363 }, { "epoch": 0.8384576641110303, "grad_norm": 1.191263813692219, "learning_rate": 1.3378130839755533e-06, "loss": 0.8132, "step": 9364 }, { "epoch": 0.8385472046561083, "grad_norm": 0.9698702345466614, "learning_rate": 1.3363642753987938e-06, "loss": 0.8074, "step": 9365 }, { "epoch": 0.8386367452011864, "grad_norm": 1.3110961185862187, "learning_rate": 1.334916195578837e-06, "loss": 0.7987, "step": 9366 }, { "epoch": 0.8387262857462645, "grad_norm": 0.907938899406801, "learning_rate": 1.3334688446374944e-06, "loss": 0.808, "step": 9367 }, { "epoch": 0.8388158262913425, "grad_norm": 0.9478701278421511, "learning_rate": 1.3320222226965119e-06, "loss": 0.759, "step": 9368 }, { "epoch": 0.8389053668364206, "grad_norm": 1.0981178770515205, "learning_rate": 1.3305763298775732e-06, "loss": 0.8437, "step": 9369 }, { "epoch": 0.8389949073814987, "grad_norm": 0.9380162544959776, "learning_rate": 1.3291311663023055e-06, "loss": 0.809, "step": 9370 }, { "epoch": 0.8390844479265768, "grad_norm": 0.9163900798218562, "learning_rate": 1.327686732092265e-06, "loss": 0.8224, "step": 9371 }, { "epoch": 0.8391739884716548, "grad_norm": 1.0593361852416765, "learning_rate": 1.3262430273689542e-06, "loss": 0.8304, "step": 9372 }, { "epoch": 0.8392635290167328, "grad_norm": 1.065417561373481, "learning_rate": 1.3248000522538174e-06, "loss": 0.8049, "step": 9373 }, { "epoch": 0.839353069561811, "grad_norm": 1.0355679906606496, "learning_rate": 1.3233578068682295e-06, "loss": 0.8436, "step": 9374 }, { "epoch": 0.839442610106889, "grad_norm": 0.9814774237493498, "learning_rate": 1.3219162913335115e-06, "loss": 0.7769, "step": 9375 }, { "epoch": 0.8395321506519671, "grad_norm": 1.0055103996469403, "learning_rate": 1.320475505770913e-06, "loss": 0.7948, "step": 9376 }, { "epoch": 0.8396216911970452, "grad_norm": 0.9898926534419442, "learning_rate": 1.3190354503016312e-06, "loss": 0.8329, "step": 9377 }, { "epoch": 0.8397112317421233, "grad_norm": 1.3350544758262883, "learning_rate": 1.3175961250467962e-06, "loss": 0.7893, "step": 9378 }, { "epoch": 0.8398007722872013, "grad_norm": 1.0981814832288939, "learning_rate": 1.3161575301274832e-06, "loss": 0.8341, "step": 9379 }, { "epoch": 0.8398903128322793, "grad_norm": 0.9564197075065075, "learning_rate": 1.3147196656647044e-06, "loss": 0.8154, "step": 9380 }, { "epoch": 0.8399798533773575, "grad_norm": 0.9200920513060068, "learning_rate": 1.3132825317794019e-06, "loss": 0.8035, "step": 9381 }, { "epoch": 0.8400693939224355, "grad_norm": 1.0014036790061887, "learning_rate": 1.3118461285924643e-06, "loss": 0.8191, "step": 9382 }, { "epoch": 0.8401589344675136, "grad_norm": 0.9189910846009759, "learning_rate": 1.31041045622472e-06, "loss": 0.7865, "step": 9383 }, { "epoch": 0.8402484750125916, "grad_norm": 0.9199667695563013, "learning_rate": 1.3089755147969297e-06, "loss": 0.7727, "step": 9384 }, { "epoch": 0.8403380155576697, "grad_norm": 0.9918103194931872, "learning_rate": 1.3075413044297969e-06, "loss": 0.793, "step": 9385 }, { "epoch": 0.8404275561027478, "grad_norm": 1.107784823147746, "learning_rate": 1.3061078252439662e-06, "loss": 0.8187, "step": 9386 }, { "epoch": 0.8405170966478258, "grad_norm": 1.0663026137216804, "learning_rate": 1.3046750773600137e-06, "loss": 0.8763, "step": 9387 }, { "epoch": 0.840606637192904, "grad_norm": 1.1036165674997183, "learning_rate": 1.303243060898457e-06, "loss": 0.8243, "step": 9388 }, { "epoch": 0.840696177737982, "grad_norm": 1.0625703808868427, "learning_rate": 1.3018117759797543e-06, "loss": 0.7785, "step": 9389 }, { "epoch": 0.84078571828306, "grad_norm": 0.9465974307320747, "learning_rate": 1.3003812227243008e-06, "loss": 0.8323, "step": 9390 }, { "epoch": 0.8408752588281381, "grad_norm": 0.9440261369084691, "learning_rate": 1.2989514012524285e-06, "loss": 0.8345, "step": 9391 }, { "epoch": 0.8409647993732162, "grad_norm": 1.2503007591932942, "learning_rate": 1.2975223116844115e-06, "loss": 0.7603, "step": 9392 }, { "epoch": 0.8410543399182943, "grad_norm": 1.0417051341188797, "learning_rate": 1.2960939541404572e-06, "loss": 0.8147, "step": 9393 }, { "epoch": 0.8411438804633723, "grad_norm": 0.9618166110649607, "learning_rate": 1.2946663287407169e-06, "loss": 0.7755, "step": 9394 }, { "epoch": 0.8412334210084504, "grad_norm": 0.9972289893865883, "learning_rate": 1.2932394356052768e-06, "loss": 0.8042, "step": 9395 }, { "epoch": 0.8413229615535285, "grad_norm": 0.9231087083050765, "learning_rate": 1.2918132748541624e-06, "loss": 0.8008, "step": 9396 }, { "epoch": 0.8414125020986065, "grad_norm": 0.9449350453612199, "learning_rate": 1.2903878466073382e-06, "loss": 0.8029, "step": 9397 }, { "epoch": 0.8415020426436846, "grad_norm": 0.9424755848644499, "learning_rate": 1.2889631509847067e-06, "loss": 0.824, "step": 9398 }, { "epoch": 0.8415915831887627, "grad_norm": 1.0619739390075245, "learning_rate": 1.2875391881061072e-06, "loss": 0.8486, "step": 9399 }, { "epoch": 0.8416811237338407, "grad_norm": 1.0490472726070956, "learning_rate": 1.2861159580913207e-06, "loss": 0.8207, "step": 9400 }, { "epoch": 0.8417706642789188, "grad_norm": 0.9426405641807752, "learning_rate": 1.2846934610600636e-06, "loss": 0.8086, "step": 9401 }, { "epoch": 0.8418602048239968, "grad_norm": 0.9954250590077064, "learning_rate": 1.2832716971319914e-06, "loss": 0.7845, "step": 9402 }, { "epoch": 0.841949745369075, "grad_norm": 1.245168462987065, "learning_rate": 1.2818506664266993e-06, "loss": 0.8017, "step": 9403 }, { "epoch": 0.842039285914153, "grad_norm": 1.095330937115553, "learning_rate": 1.28043036906372e-06, "loss": 0.8345, "step": 9404 }, { "epoch": 0.842128826459231, "grad_norm": 1.272543221548517, "learning_rate": 1.2790108051625228e-06, "loss": 0.7979, "step": 9405 }, { "epoch": 0.8422183670043092, "grad_norm": 0.8837808468662629, "learning_rate": 1.277591974842517e-06, "loss": 0.8204, "step": 9406 }, { "epoch": 0.8423079075493872, "grad_norm": 0.9493494481412919, "learning_rate": 1.2761738782230516e-06, "loss": 0.796, "step": 9407 }, { "epoch": 0.8423974480944653, "grad_norm": 0.8743774614321845, "learning_rate": 1.274756515423411e-06, "loss": 0.7685, "step": 9408 }, { "epoch": 0.8424869886395433, "grad_norm": 0.982702787315466, "learning_rate": 1.2733398865628189e-06, "loss": 0.8363, "step": 9409 }, { "epoch": 0.8425765291846214, "grad_norm": 0.9253358754093878, "learning_rate": 1.2719239917604375e-06, "loss": 0.8104, "step": 9410 }, { "epoch": 0.8426660697296995, "grad_norm": 1.0292574948712812, "learning_rate": 1.2705088311353687e-06, "loss": 0.8227, "step": 9411 }, { "epoch": 0.8427556102747775, "grad_norm": 0.9871819714266331, "learning_rate": 1.26909440480665e-06, "loss": 0.8187, "step": 9412 }, { "epoch": 0.8428451508198557, "grad_norm": 0.9567690800315941, "learning_rate": 1.26768071289326e-06, "loss": 0.8322, "step": 9413 }, { "epoch": 0.8429346913649337, "grad_norm": 0.9415354074840518, "learning_rate": 1.266267755514109e-06, "loss": 0.8691, "step": 9414 }, { "epoch": 0.8430242319100117, "grad_norm": 0.9682124829935164, "learning_rate": 1.264855532788055e-06, "loss": 0.8335, "step": 9415 }, { "epoch": 0.8431137724550898, "grad_norm": 1.0098555381199852, "learning_rate": 1.263444044833889e-06, "loss": 0.7765, "step": 9416 }, { "epoch": 0.8432033130001679, "grad_norm": 0.9590123214488422, "learning_rate": 1.2620332917703404e-06, "loss": 0.7641, "step": 9417 }, { "epoch": 0.843292853545246, "grad_norm": 1.0822258328180616, "learning_rate": 1.2606232737160762e-06, "loss": 0.8152, "step": 9418 }, { "epoch": 0.843382394090324, "grad_norm": 1.019164939675285, "learning_rate": 1.259213990789705e-06, "loss": 0.7858, "step": 9419 }, { "epoch": 0.843471934635402, "grad_norm": 0.8942582572575131, "learning_rate": 1.2578054431097664e-06, "loss": 0.7348, "step": 9420 }, { "epoch": 0.8435614751804802, "grad_norm": 0.9997575570124957, "learning_rate": 1.2563976307947467e-06, "loss": 0.8008, "step": 9421 }, { "epoch": 0.8436510157255582, "grad_norm": 1.1527811447071072, "learning_rate": 1.2549905539630659e-06, "loss": 0.8068, "step": 9422 }, { "epoch": 0.8437405562706363, "grad_norm": 0.9740551403720202, "learning_rate": 1.253584212733081e-06, "loss": 0.8084, "step": 9423 }, { "epoch": 0.8438300968157144, "grad_norm": 1.1486142228829237, "learning_rate": 1.2521786072230935e-06, "loss": 0.8221, "step": 9424 }, { "epoch": 0.8439196373607925, "grad_norm": 0.9196442799200877, "learning_rate": 1.250773737551333e-06, "loss": 0.7709, "step": 9425 }, { "epoch": 0.8440091779058705, "grad_norm": 1.238847067470714, "learning_rate": 1.2493696038359726e-06, "loss": 0.7771, "step": 9426 }, { "epoch": 0.8440987184509485, "grad_norm": 1.0484319442145777, "learning_rate": 1.247966206195127e-06, "loss": 0.826, "step": 9427 }, { "epoch": 0.8441882589960267, "grad_norm": 0.974343115951212, "learning_rate": 1.2465635447468437e-06, "loss": 0.8022, "step": 9428 }, { "epoch": 0.8442777995411047, "grad_norm": 1.002867652709638, "learning_rate": 1.2451616196091109e-06, "loss": 0.7698, "step": 9429 }, { "epoch": 0.8443673400861827, "grad_norm": 0.9509962367359098, "learning_rate": 1.243760430899854e-06, "loss": 0.8201, "step": 9430 }, { "epoch": 0.8444568806312609, "grad_norm": 0.9946709587412615, "learning_rate": 1.2423599787369344e-06, "loss": 0.8055, "step": 9431 }, { "epoch": 0.8445464211763389, "grad_norm": 0.9465193414641924, "learning_rate": 1.2409602632381535e-06, "loss": 0.7314, "step": 9432 }, { "epoch": 0.844635961721417, "grad_norm": 0.9932042436517587, "learning_rate": 1.2395612845212534e-06, "loss": 0.7766, "step": 9433 }, { "epoch": 0.844725502266495, "grad_norm": 0.9025696476038836, "learning_rate": 1.2381630427039105e-06, "loss": 0.8107, "step": 9434 }, { "epoch": 0.8448150428115732, "grad_norm": 1.044463165232071, "learning_rate": 1.2367655379037424e-06, "loss": 0.8489, "step": 9435 }, { "epoch": 0.8449045833566512, "grad_norm": 0.9208071850649792, "learning_rate": 1.2353687702382978e-06, "loss": 0.7652, "step": 9436 }, { "epoch": 0.8449941239017292, "grad_norm": 0.9524663946012304, "learning_rate": 1.2339727398250711e-06, "loss": 0.7738, "step": 9437 }, { "epoch": 0.8450836644468073, "grad_norm": 1.0531635333020888, "learning_rate": 1.232577446781492e-06, "loss": 0.8289, "step": 9438 }, { "epoch": 0.8451732049918854, "grad_norm": 0.9994901936326652, "learning_rate": 1.2311828912249258e-06, "loss": 0.8259, "step": 9439 }, { "epoch": 0.8452627455369635, "grad_norm": 0.939832901007213, "learning_rate": 1.2297890732726814e-06, "loss": 0.7599, "step": 9440 }, { "epoch": 0.8453522860820415, "grad_norm": 1.1473839878539012, "learning_rate": 1.228395993042003e-06, "loss": 0.802, "step": 9441 }, { "epoch": 0.8454418266271196, "grad_norm": 0.9741412892641501, "learning_rate": 1.227003650650067e-06, "loss": 0.7797, "step": 9442 }, { "epoch": 0.8455313671721977, "grad_norm": 0.9893199232585013, "learning_rate": 1.2256120462139963e-06, "loss": 0.8683, "step": 9443 }, { "epoch": 0.8456209077172757, "grad_norm": 0.9008887564516423, "learning_rate": 1.2242211798508464e-06, "loss": 0.8504, "step": 9444 }, { "epoch": 0.8457104482623538, "grad_norm": 1.003194637913834, "learning_rate": 1.222831051677611e-06, "loss": 0.8603, "step": 9445 }, { "epoch": 0.8457999888074319, "grad_norm": 0.9742825520050253, "learning_rate": 1.2214416618112302e-06, "loss": 0.8413, "step": 9446 }, { "epoch": 0.8458895293525099, "grad_norm": 1.051911440744658, "learning_rate": 1.220053010368567e-06, "loss": 0.7739, "step": 9447 }, { "epoch": 0.845979069897588, "grad_norm": 1.1145617458488868, "learning_rate": 1.2186650974664337e-06, "loss": 0.7834, "step": 9448 }, { "epoch": 0.8460686104426661, "grad_norm": 1.1002011082265823, "learning_rate": 1.217277923221577e-06, "loss": 0.7859, "step": 9449 }, { "epoch": 0.8461581509877442, "grad_norm": 0.9507004583417564, "learning_rate": 1.215891487750681e-06, "loss": 0.8104, "step": 9450 }, { "epoch": 0.8462476915328222, "grad_norm": 1.0007878599485773, "learning_rate": 1.2145057911703683e-06, "loss": 0.8073, "step": 9451 }, { "epoch": 0.8463372320779002, "grad_norm": 0.9323226864633257, "learning_rate": 1.2131208335971988e-06, "loss": 0.7493, "step": 9452 }, { "epoch": 0.8464267726229784, "grad_norm": 1.0474800338546226, "learning_rate": 1.2117366151476716e-06, "loss": 0.8419, "step": 9453 }, { "epoch": 0.8465163131680564, "grad_norm": 1.0939916402224295, "learning_rate": 1.2103531359382214e-06, "loss": 0.7377, "step": 9454 }, { "epoch": 0.8466058537131345, "grad_norm": 1.082476535830715, "learning_rate": 1.208970396085223e-06, "loss": 0.8173, "step": 9455 }, { "epoch": 0.8466953942582125, "grad_norm": 0.9079978422558551, "learning_rate": 1.2075883957049862e-06, "loss": 0.7941, "step": 9456 }, { "epoch": 0.8467849348032906, "grad_norm": 1.0027050784110783, "learning_rate": 1.2062071349137627e-06, "loss": 0.8038, "step": 9457 }, { "epoch": 0.8468744753483687, "grad_norm": 1.0017879626356014, "learning_rate": 1.2048266138277388e-06, "loss": 0.7775, "step": 9458 }, { "epoch": 0.8469640158934467, "grad_norm": 0.9639095623977575, "learning_rate": 1.203446832563039e-06, "loss": 0.8161, "step": 9459 }, { "epoch": 0.8470535564385249, "grad_norm": 1.148007910766874, "learning_rate": 1.202067791235726e-06, "loss": 0.8203, "step": 9460 }, { "epoch": 0.8471430969836029, "grad_norm": 0.9264145345238843, "learning_rate": 1.2006894899618016e-06, "loss": 0.7906, "step": 9461 }, { "epoch": 0.8472326375286809, "grad_norm": 0.9712203921305085, "learning_rate": 1.199311928857202e-06, "loss": 0.8463, "step": 9462 }, { "epoch": 0.847322178073759, "grad_norm": 1.1841286242305693, "learning_rate": 1.1979351080378042e-06, "loss": 0.8327, "step": 9463 }, { "epoch": 0.8474117186188371, "grad_norm": 1.0864736445283076, "learning_rate": 1.1965590276194217e-06, "loss": 0.8094, "step": 9464 }, { "epoch": 0.8475012591639152, "grad_norm": 1.0196336839641365, "learning_rate": 1.1951836877178069e-06, "loss": 0.7741, "step": 9465 }, { "epoch": 0.8475907997089932, "grad_norm": 0.9720058616682495, "learning_rate": 1.1938090884486476e-06, "loss": 0.8134, "step": 9466 }, { "epoch": 0.8476803402540714, "grad_norm": 1.1150781787598198, "learning_rate": 1.19243522992757e-06, "loss": 0.8021, "step": 9467 }, { "epoch": 0.8477698807991494, "grad_norm": 0.9832396470614113, "learning_rate": 1.1910621122701405e-06, "loss": 0.7275, "step": 9468 }, { "epoch": 0.8478594213442274, "grad_norm": 1.0226878159083508, "learning_rate": 1.1896897355918602e-06, "loss": 0.8098, "step": 9469 }, { "epoch": 0.8479489618893055, "grad_norm": 0.9990860856360991, "learning_rate": 1.188318100008169e-06, "loss": 0.7484, "step": 9470 }, { "epoch": 0.8480385024343836, "grad_norm": 0.9634318291989801, "learning_rate": 1.1869472056344455e-06, "loss": 0.8267, "step": 9471 }, { "epoch": 0.8481280429794616, "grad_norm": 0.9654549232795181, "learning_rate": 1.1855770525860033e-06, "loss": 0.7816, "step": 9472 }, { "epoch": 0.8482175835245397, "grad_norm": 0.9800099967037483, "learning_rate": 1.1842076409780977e-06, "loss": 0.8137, "step": 9473 }, { "epoch": 0.8483071240696177, "grad_norm": 1.1763036579790658, "learning_rate": 1.1828389709259125e-06, "loss": 0.7718, "step": 9474 }, { "epoch": 0.8483966646146959, "grad_norm": 1.0435418595979582, "learning_rate": 1.1814710425445842e-06, "loss": 0.7594, "step": 9475 }, { "epoch": 0.8484862051597739, "grad_norm": 1.0307086210870409, "learning_rate": 1.180103855949174e-06, "loss": 0.8252, "step": 9476 }, { "epoch": 0.848575745704852, "grad_norm": 0.9537632326015479, "learning_rate": 1.1787374112546856e-06, "loss": 0.7966, "step": 9477 }, { "epoch": 0.8486652862499301, "grad_norm": 0.9158085342558261, "learning_rate": 1.1773717085760606e-06, "loss": 0.7885, "step": 9478 }, { "epoch": 0.8487548267950081, "grad_norm": 1.0227837995190376, "learning_rate": 1.1760067480281801e-06, "loss": 0.8439, "step": 9479 }, { "epoch": 0.8488443673400862, "grad_norm": 1.0152985357472795, "learning_rate": 1.1746425297258513e-06, "loss": 0.7914, "step": 9480 }, { "epoch": 0.8489339078851642, "grad_norm": 1.1448130848156446, "learning_rate": 1.1732790537838369e-06, "loss": 0.8052, "step": 9481 }, { "epoch": 0.8490234484302424, "grad_norm": 1.035450225749339, "learning_rate": 1.171916320316825e-06, "loss": 0.8109, "step": 9482 }, { "epoch": 0.8491129889753204, "grad_norm": 1.0379151041972199, "learning_rate": 1.170554329439444e-06, "loss": 0.7849, "step": 9483 }, { "epoch": 0.8492025295203984, "grad_norm": 1.1615330236744337, "learning_rate": 1.1691930812662622e-06, "loss": 0.837, "step": 9484 }, { "epoch": 0.8492920700654766, "grad_norm": 1.1092377424311615, "learning_rate": 1.1678325759117782e-06, "loss": 0.7911, "step": 9485 }, { "epoch": 0.8493816106105546, "grad_norm": 1.1254574497463448, "learning_rate": 1.1664728134904358e-06, "loss": 0.8349, "step": 9486 }, { "epoch": 0.8494711511556327, "grad_norm": 1.0225274445599308, "learning_rate": 1.1651137941166169e-06, "loss": 0.7369, "step": 9487 }, { "epoch": 0.8495606917007107, "grad_norm": 0.9453485120651803, "learning_rate": 1.1637555179046344e-06, "loss": 0.8088, "step": 9488 }, { "epoch": 0.8496502322457888, "grad_norm": 1.0195367076397395, "learning_rate": 1.1623979849687429e-06, "loss": 0.8143, "step": 9489 }, { "epoch": 0.8497397727908669, "grad_norm": 0.9988113740426068, "learning_rate": 1.161041195423136e-06, "loss": 0.8291, "step": 9490 }, { "epoch": 0.8498293133359449, "grad_norm": 1.0734114295013308, "learning_rate": 1.1596851493819383e-06, "loss": 0.783, "step": 9491 }, { "epoch": 0.849918853881023, "grad_norm": 0.9091074497815385, "learning_rate": 1.1583298469592185e-06, "loss": 0.8156, "step": 9492 }, { "epoch": 0.8500083944261011, "grad_norm": 0.9809529874139203, "learning_rate": 1.1569752882689766e-06, "loss": 0.775, "step": 9493 }, { "epoch": 0.8500979349711791, "grad_norm": 0.9963326851454292, "learning_rate": 1.1556214734251592e-06, "loss": 0.7643, "step": 9494 }, { "epoch": 0.8501874755162572, "grad_norm": 1.1062157967736226, "learning_rate": 1.154268402541644e-06, "loss": 0.7528, "step": 9495 }, { "epoch": 0.8502770160613353, "grad_norm": 1.260644312273016, "learning_rate": 1.1529160757322432e-06, "loss": 0.7993, "step": 9496 }, { "epoch": 0.8503665566064134, "grad_norm": 0.9599525588341968, "learning_rate": 1.1515644931107129e-06, "loss": 0.8222, "step": 9497 }, { "epoch": 0.8504560971514914, "grad_norm": 0.962076443847939, "learning_rate": 1.1502136547907417e-06, "loss": 0.746, "step": 9498 }, { "epoch": 0.8505456376965694, "grad_norm": 1.0153818162669237, "learning_rate": 1.1488635608859578e-06, "loss": 0.758, "step": 9499 }, { "epoch": 0.8506351782416476, "grad_norm": 0.9075039173324784, "learning_rate": 1.147514211509929e-06, "loss": 0.7841, "step": 9500 }, { "epoch": 0.8507247187867256, "grad_norm": 0.9724410596319404, "learning_rate": 1.1461656067761605e-06, "loss": 0.7745, "step": 9501 }, { "epoch": 0.8508142593318037, "grad_norm": 1.073459678086075, "learning_rate": 1.144817746798086e-06, "loss": 0.7831, "step": 9502 }, { "epoch": 0.8509037998768818, "grad_norm": 0.9569767801514034, "learning_rate": 1.1434706316890865e-06, "loss": 0.7383, "step": 9503 }, { "epoch": 0.8509933404219598, "grad_norm": 0.9286263866754826, "learning_rate": 1.1421242615624772e-06, "loss": 0.7479, "step": 9504 }, { "epoch": 0.8510828809670379, "grad_norm": 0.9538416711408632, "learning_rate": 1.1407786365315076e-06, "loss": 0.8277, "step": 9505 }, { "epoch": 0.8511724215121159, "grad_norm": 1.1806466862913678, "learning_rate": 1.1394337567093728e-06, "loss": 0.7816, "step": 9506 }, { "epoch": 0.8512619620571941, "grad_norm": 0.9716931585252794, "learning_rate": 1.1380896222091953e-06, "loss": 0.7992, "step": 9507 }, { "epoch": 0.8513515026022721, "grad_norm": 0.9679859806528697, "learning_rate": 1.1367462331440404e-06, "loss": 0.8258, "step": 9508 }, { "epoch": 0.8514410431473501, "grad_norm": 1.1031217507479882, "learning_rate": 1.135403589626909e-06, "loss": 0.814, "step": 9509 }, { "epoch": 0.8515305836924282, "grad_norm": 0.898820820986938, "learning_rate": 1.1340616917707415e-06, "loss": 0.7793, "step": 9510 }, { "epoch": 0.8516201242375063, "grad_norm": 1.0543523510164359, "learning_rate": 1.1327205396884123e-06, "loss": 0.8266, "step": 9511 }, { "epoch": 0.8517096647825844, "grad_norm": 0.9046389463712229, "learning_rate": 1.1313801334927355e-06, "loss": 0.8173, "step": 9512 }, { "epoch": 0.8517992053276624, "grad_norm": 1.3168328610196058, "learning_rate": 1.1300404732964621e-06, "loss": 0.789, "step": 9513 }, { "epoch": 0.8518887458727405, "grad_norm": 0.8792623060339958, "learning_rate": 1.1287015592122785e-06, "loss": 0.8108, "step": 9514 }, { "epoch": 0.8519782864178186, "grad_norm": 1.207497985153025, "learning_rate": 1.1273633913528115e-06, "loss": 0.769, "step": 9515 }, { "epoch": 0.8520678269628966, "grad_norm": 0.9927586457286008, "learning_rate": 1.1260259698306231e-06, "loss": 0.766, "step": 9516 }, { "epoch": 0.8521573675079747, "grad_norm": 0.9380127407786942, "learning_rate": 1.1246892947582121e-06, "loss": 0.8027, "step": 9517 }, { "epoch": 0.8522469080530528, "grad_norm": 0.9574665086858727, "learning_rate": 1.1233533662480156e-06, "loss": 0.767, "step": 9518 }, { "epoch": 0.8523364485981308, "grad_norm": 1.094419084597495, "learning_rate": 1.1220181844124078e-06, "loss": 0.8252, "step": 9519 }, { "epoch": 0.8524259891432089, "grad_norm": 1.0033028569413718, "learning_rate": 1.1206837493636992e-06, "loss": 0.8347, "step": 9520 }, { "epoch": 0.852515529688287, "grad_norm": 1.0517820300863763, "learning_rate": 1.1193500612141384e-06, "loss": 0.7481, "step": 9521 }, { "epoch": 0.8526050702333651, "grad_norm": 0.9510987503277331, "learning_rate": 1.1180171200759115e-06, "loss": 0.8137, "step": 9522 }, { "epoch": 0.8526946107784431, "grad_norm": 1.0004156751218516, "learning_rate": 1.11668492606114e-06, "loss": 0.8213, "step": 9523 }, { "epoch": 0.8527841513235211, "grad_norm": 1.095855759420275, "learning_rate": 1.1153534792818854e-06, "loss": 0.7361, "step": 9524 }, { "epoch": 0.8528736918685993, "grad_norm": 1.073705877262268, "learning_rate": 1.1140227798501435e-06, "loss": 0.7933, "step": 9525 }, { "epoch": 0.8529632324136773, "grad_norm": 1.0938905108679595, "learning_rate": 1.112692827877848e-06, "loss": 0.8222, "step": 9526 }, { "epoch": 0.8530527729587554, "grad_norm": 0.9016545239267576, "learning_rate": 1.111363623476871e-06, "loss": 0.7955, "step": 9527 }, { "epoch": 0.8531423135038334, "grad_norm": 0.9915350967422595, "learning_rate": 1.1100351667590203e-06, "loss": 0.8146, "step": 9528 }, { "epoch": 0.8532318540489116, "grad_norm": 0.9706926854106749, "learning_rate": 1.1087074578360424e-06, "loss": 0.7714, "step": 9529 }, { "epoch": 0.8533213945939896, "grad_norm": 0.9480052924543311, "learning_rate": 1.1073804968196189e-06, "loss": 0.8199, "step": 9530 }, { "epoch": 0.8534109351390676, "grad_norm": 1.040471103733673, "learning_rate": 1.1060542838213695e-06, "loss": 0.8255, "step": 9531 }, { "epoch": 0.8535004756841458, "grad_norm": 1.0316185773437594, "learning_rate": 1.1047288189528504e-06, "loss": 0.7649, "step": 9532 }, { "epoch": 0.8535900162292238, "grad_norm": 0.9320963252575724, "learning_rate": 1.1034041023255581e-06, "loss": 0.8029, "step": 9533 }, { "epoch": 0.8536795567743019, "grad_norm": 1.0485771996100557, "learning_rate": 1.102080134050918e-06, "loss": 0.8598, "step": 9534 }, { "epoch": 0.8537690973193799, "grad_norm": 1.3059548940731849, "learning_rate": 1.100756914240303e-06, "loss": 0.8331, "step": 9535 }, { "epoch": 0.853858637864458, "grad_norm": 0.9983426477509885, "learning_rate": 1.0994344430050163e-06, "loss": 0.7919, "step": 9536 }, { "epoch": 0.8539481784095361, "grad_norm": 1.0386778339035228, "learning_rate": 1.0981127204563001e-06, "loss": 0.7822, "step": 9537 }, { "epoch": 0.8540377189546141, "grad_norm": 1.2650346217168582, "learning_rate": 1.0967917467053336e-06, "loss": 0.7924, "step": 9538 }, { "epoch": 0.8541272594996923, "grad_norm": 1.0252441163245603, "learning_rate": 1.095471521863234e-06, "loss": 0.7736, "step": 9539 }, { "epoch": 0.8542168000447703, "grad_norm": 0.9862055767900356, "learning_rate": 1.0941520460410482e-06, "loss": 0.8657, "step": 9540 }, { "epoch": 0.8543063405898483, "grad_norm": 0.9664315080025508, "learning_rate": 1.0928333193497731e-06, "loss": 0.8062, "step": 9541 }, { "epoch": 0.8543958811349264, "grad_norm": 1.136712935526052, "learning_rate": 1.0915153419003343e-06, "loss": 0.7886, "step": 9542 }, { "epoch": 0.8544854216800045, "grad_norm": 0.9385308992038491, "learning_rate": 1.0901981138035933e-06, "loss": 0.8041, "step": 9543 }, { "epoch": 0.8545749622250826, "grad_norm": 0.9889571661520725, "learning_rate": 1.0888816351703557e-06, "loss": 0.7596, "step": 9544 }, { "epoch": 0.8546645027701606, "grad_norm": 0.9887101625764154, "learning_rate": 1.087565906111354e-06, "loss": 0.7376, "step": 9545 }, { "epoch": 0.8547540433152386, "grad_norm": 1.0800904903172717, "learning_rate": 1.0862509267372657e-06, "loss": 0.7742, "step": 9546 }, { "epoch": 0.8548435838603168, "grad_norm": 0.9587519000193999, "learning_rate": 1.0849366971586995e-06, "loss": 0.849, "step": 9547 }, { "epoch": 0.8549331244053948, "grad_norm": 0.9649091094034121, "learning_rate": 1.0836232174862083e-06, "loss": 0.8163, "step": 9548 }, { "epoch": 0.8550226649504729, "grad_norm": 1.0298647027366474, "learning_rate": 1.082310487830277e-06, "loss": 0.7788, "step": 9549 }, { "epoch": 0.855112205495551, "grad_norm": 0.9256010130599501, "learning_rate": 1.0809985083013296e-06, "loss": 0.7757, "step": 9550 }, { "epoch": 0.855201746040629, "grad_norm": 1.0097264214768489, "learning_rate": 1.0796872790097213e-06, "loss": 0.8805, "step": 9551 }, { "epoch": 0.8552912865857071, "grad_norm": 1.0212327979221512, "learning_rate": 1.078376800065749e-06, "loss": 0.8069, "step": 9552 }, { "epoch": 0.8553808271307851, "grad_norm": 1.097505244562918, "learning_rate": 1.0770670715796472e-06, "loss": 0.8045, "step": 9553 }, { "epoch": 0.8554703676758633, "grad_norm": 0.931354645599154, "learning_rate": 1.0757580936615874e-06, "loss": 0.8153, "step": 9554 }, { "epoch": 0.8555599082209413, "grad_norm": 1.0227330487480157, "learning_rate": 1.0744498664216774e-06, "loss": 0.7704, "step": 9555 }, { "epoch": 0.8556494487660193, "grad_norm": 0.9051430392192664, "learning_rate": 1.0731423899699568e-06, "loss": 0.7721, "step": 9556 }, { "epoch": 0.8557389893110975, "grad_norm": 1.013037974239861, "learning_rate": 1.0718356644164074e-06, "loss": 0.8074, "step": 9557 }, { "epoch": 0.8558285298561755, "grad_norm": 0.9854900190122775, "learning_rate": 1.0705296898709493e-06, "loss": 0.7728, "step": 9558 }, { "epoch": 0.8559180704012536, "grad_norm": 1.1038421401784437, "learning_rate": 1.0692244664434326e-06, "loss": 0.8356, "step": 9559 }, { "epoch": 0.8560076109463316, "grad_norm": 1.0371183556966863, "learning_rate": 1.0679199942436525e-06, "loss": 0.7895, "step": 9560 }, { "epoch": 0.8560971514914097, "grad_norm": 1.0460183665115081, "learning_rate": 1.066616273381338e-06, "loss": 0.7496, "step": 9561 }, { "epoch": 0.8561866920364878, "grad_norm": 0.9838422391759553, "learning_rate": 1.065313303966149e-06, "loss": 0.7957, "step": 9562 }, { "epoch": 0.8562762325815658, "grad_norm": 0.9666060392154043, "learning_rate": 1.0640110861076902e-06, "loss": 0.7607, "step": 9563 }, { "epoch": 0.8563657731266439, "grad_norm": 1.0046538942601368, "learning_rate": 1.0627096199154985e-06, "loss": 0.7853, "step": 9564 }, { "epoch": 0.856455313671722, "grad_norm": 0.8593984814100597, "learning_rate": 1.0614089054990474e-06, "loss": 0.7642, "step": 9565 }, { "epoch": 0.8565448542168, "grad_norm": 0.9333816836430744, "learning_rate": 1.0601089429677547e-06, "loss": 0.78, "step": 9566 }, { "epoch": 0.8566343947618781, "grad_norm": 0.9562053988629583, "learning_rate": 1.0588097324309643e-06, "loss": 0.7886, "step": 9567 }, { "epoch": 0.8567239353069562, "grad_norm": 0.9292858053625015, "learning_rate": 1.057511273997962e-06, "loss": 0.7325, "step": 9568 }, { "epoch": 0.8568134758520343, "grad_norm": 1.070072831312537, "learning_rate": 1.0562135677779694e-06, "loss": 0.7966, "step": 9569 }, { "epoch": 0.8569030163971123, "grad_norm": 0.9931404720942684, "learning_rate": 1.0549166138801482e-06, "loss": 0.7941, "step": 9570 }, { "epoch": 0.8569925569421903, "grad_norm": 0.9170716929023089, "learning_rate": 1.0536204124135885e-06, "loss": 0.7842, "step": 9571 }, { "epoch": 0.8570820974872685, "grad_norm": 0.942424492974866, "learning_rate": 1.0523249634873312e-06, "loss": 0.7738, "step": 9572 }, { "epoch": 0.8571716380323465, "grad_norm": 0.9496521084047362, "learning_rate": 1.051030267210338e-06, "loss": 0.8209, "step": 9573 }, { "epoch": 0.8572611785774246, "grad_norm": 1.0325117838445033, "learning_rate": 1.0497363236915158e-06, "loss": 0.7838, "step": 9574 }, { "epoch": 0.8573507191225027, "grad_norm": 0.9354377269349455, "learning_rate": 1.0484431330397083e-06, "loss": 0.7551, "step": 9575 }, { "epoch": 0.8574402596675808, "grad_norm": 0.9574097071653969, "learning_rate": 1.0471506953636944e-06, "loss": 0.7002, "step": 9576 }, { "epoch": 0.8575298002126588, "grad_norm": 0.9398283824855922, "learning_rate": 1.0458590107721889e-06, "loss": 0.776, "step": 9577 }, { "epoch": 0.8576193407577368, "grad_norm": 1.0496664290807773, "learning_rate": 1.0445680793738444e-06, "loss": 0.8196, "step": 9578 }, { "epoch": 0.857708881302815, "grad_norm": 0.9010200824606396, "learning_rate": 1.0432779012772498e-06, "loss": 0.7723, "step": 9579 }, { "epoch": 0.857798421847893, "grad_norm": 1.0881859468403354, "learning_rate": 1.0419884765909315e-06, "loss": 0.8137, "step": 9580 }, { "epoch": 0.857887962392971, "grad_norm": 0.9352122452571657, "learning_rate": 1.0406998054233507e-06, "loss": 0.7587, "step": 9581 }, { "epoch": 0.8579775029380491, "grad_norm": 0.9866732237796213, "learning_rate": 1.0394118878829063e-06, "loss": 0.8577, "step": 9582 }, { "epoch": 0.8580670434831272, "grad_norm": 0.9775160907078001, "learning_rate": 1.0381247240779346e-06, "loss": 0.8001, "step": 9583 }, { "epoch": 0.8581565840282053, "grad_norm": 0.9103113700857707, "learning_rate": 1.036838314116706e-06, "loss": 0.8213, "step": 9584 }, { "epoch": 0.8582461245732833, "grad_norm": 0.9479596697638377, "learning_rate": 1.0355526581074316e-06, "loss": 0.7652, "step": 9585 }, { "epoch": 0.8583356651183615, "grad_norm": 1.0986459290781252, "learning_rate": 1.0342677561582536e-06, "loss": 0.8159, "step": 9586 }, { "epoch": 0.8584252056634395, "grad_norm": 0.9790392434003694, "learning_rate": 1.032983608377256e-06, "loss": 0.8185, "step": 9587 }, { "epoch": 0.8585147462085175, "grad_norm": 0.9559111704954044, "learning_rate": 1.0317002148724564e-06, "loss": 0.8229, "step": 9588 }, { "epoch": 0.8586042867535956, "grad_norm": 1.0042402745486412, "learning_rate": 1.0304175757518097e-06, "loss": 0.7894, "step": 9589 }, { "epoch": 0.8586938272986737, "grad_norm": 0.9779955369643236, "learning_rate": 1.0291356911232075e-06, "loss": 0.7651, "step": 9590 }, { "epoch": 0.8587833678437518, "grad_norm": 0.9577369298652622, "learning_rate": 1.0278545610944778e-06, "loss": 0.786, "step": 9591 }, { "epoch": 0.8588729083888298, "grad_norm": 0.9522307660730169, "learning_rate": 1.0265741857733846e-06, "loss": 0.7475, "step": 9592 }, { "epoch": 0.8589624489339079, "grad_norm": 0.9623770285700268, "learning_rate": 1.0252945652676305e-06, "loss": 0.7936, "step": 9593 }, { "epoch": 0.859051989478986, "grad_norm": 1.0364636348843959, "learning_rate": 1.0240156996848483e-06, "loss": 0.8294, "step": 9594 }, { "epoch": 0.859141530024064, "grad_norm": 1.0200836930309878, "learning_rate": 1.0227375891326175e-06, "loss": 0.8226, "step": 9595 }, { "epoch": 0.859231070569142, "grad_norm": 0.9032347816067988, "learning_rate": 1.0214602337184465e-06, "loss": 0.7785, "step": 9596 }, { "epoch": 0.8593206111142202, "grad_norm": 1.1752470031322075, "learning_rate": 1.0201836335497816e-06, "loss": 0.8055, "step": 9597 }, { "epoch": 0.8594101516592982, "grad_norm": 0.9165920602098242, "learning_rate": 1.0189077887340072e-06, "loss": 0.7775, "step": 9598 }, { "epoch": 0.8594996922043763, "grad_norm": 0.9845016126791963, "learning_rate": 1.0176326993784447e-06, "loss": 0.7846, "step": 9599 }, { "epoch": 0.8595892327494543, "grad_norm": 0.9971936330820526, "learning_rate": 1.0163583655903464e-06, "loss": 0.8439, "step": 9600 }, { "epoch": 0.8596787732945325, "grad_norm": 0.9516604741793241, "learning_rate": 1.0150847874769065e-06, "loss": 0.7734, "step": 9601 }, { "epoch": 0.8597683138396105, "grad_norm": 0.9378683326908789, "learning_rate": 1.013811965145256e-06, "loss": 0.8166, "step": 9602 }, { "epoch": 0.8598578543846885, "grad_norm": 0.9640512891341534, "learning_rate": 1.0125398987024605e-06, "loss": 0.7766, "step": 9603 }, { "epoch": 0.8599473949297667, "grad_norm": 0.9562342692896773, "learning_rate": 1.0112685882555229e-06, "loss": 0.7839, "step": 9604 }, { "epoch": 0.8600369354748447, "grad_norm": 0.9354697769660194, "learning_rate": 1.009998033911378e-06, "loss": 0.7919, "step": 9605 }, { "epoch": 0.8601264760199228, "grad_norm": 1.0597545611253751, "learning_rate": 1.0087282357769024e-06, "loss": 0.8591, "step": 9606 }, { "epoch": 0.8602160165650008, "grad_norm": 0.9879164144454476, "learning_rate": 1.0074591939589063e-06, "loss": 0.8286, "step": 9607 }, { "epoch": 0.8603055571100789, "grad_norm": 0.9181539111383245, "learning_rate": 1.0061909085641397e-06, "loss": 0.7955, "step": 9608 }, { "epoch": 0.860395097655157, "grad_norm": 0.9095216392409001, "learning_rate": 1.0049233796992874e-06, "loss": 0.8216, "step": 9609 }, { "epoch": 0.860484638200235, "grad_norm": 0.9453638623526938, "learning_rate": 1.0036566074709686e-06, "loss": 0.7508, "step": 9610 }, { "epoch": 0.8605741787453132, "grad_norm": 0.9383002865255036, "learning_rate": 1.002390591985738e-06, "loss": 0.8029, "step": 9611 }, { "epoch": 0.8606637192903912, "grad_norm": 1.0401824576994183, "learning_rate": 1.0011253333500903e-06, "loss": 0.7303, "step": 9612 }, { "epoch": 0.8607532598354692, "grad_norm": 0.993142229029005, "learning_rate": 9.99860831670454e-07, "loss": 0.8285, "step": 9613 }, { "epoch": 0.8608428003805473, "grad_norm": 1.0858151847039264, "learning_rate": 9.985970870531968e-07, "loss": 0.7993, "step": 9614 }, { "epoch": 0.8609323409256254, "grad_norm": 0.9707399542360674, "learning_rate": 9.973340996046211e-07, "loss": 0.7869, "step": 9615 }, { "epoch": 0.8610218814707035, "grad_norm": 0.9637358422273686, "learning_rate": 9.960718694309623e-07, "loss": 0.8391, "step": 9616 }, { "epoch": 0.8611114220157815, "grad_norm": 0.9506965384047706, "learning_rate": 9.94810396638397e-07, "loss": 0.7457, "step": 9617 }, { "epoch": 0.8612009625608595, "grad_norm": 1.0064334990522585, "learning_rate": 9.935496813330358e-07, "loss": 0.8612, "step": 9618 }, { "epoch": 0.8612905031059377, "grad_norm": 1.04671898858899, "learning_rate": 9.92289723620924e-07, "loss": 0.7953, "step": 9619 }, { "epoch": 0.8613800436510157, "grad_norm": 0.966471857702878, "learning_rate": 9.910305236080498e-07, "loss": 0.7758, "step": 9620 }, { "epoch": 0.8614695841960938, "grad_norm": 0.9169349444127871, "learning_rate": 9.89772081400332e-07, "loss": 0.7814, "step": 9621 }, { "epoch": 0.8615591247411719, "grad_norm": 0.9684515908038033, "learning_rate": 9.885143971036226e-07, "loss": 0.8274, "step": 9622 }, { "epoch": 0.86164866528625, "grad_norm": 1.0023198485233595, "learning_rate": 9.87257470823717e-07, "loss": 0.8158, "step": 9623 }, { "epoch": 0.861738205831328, "grad_norm": 1.0060778787485087, "learning_rate": 9.860013026663428e-07, "loss": 0.7653, "step": 9624 }, { "epoch": 0.861827746376406, "grad_norm": 0.9039885261115476, "learning_rate": 9.847458927371623e-07, "loss": 0.7618, "step": 9625 }, { "epoch": 0.8619172869214842, "grad_norm": 1.0048151482667587, "learning_rate": 9.83491241141784e-07, "loss": 0.787, "step": 9626 }, { "epoch": 0.8620068274665622, "grad_norm": 0.913490046675313, "learning_rate": 9.822373479857383e-07, "loss": 0.8387, "step": 9627 }, { "epoch": 0.8620963680116402, "grad_norm": 0.9323123974510575, "learning_rate": 9.809842133745006e-07, "loss": 0.8247, "step": 9628 }, { "epoch": 0.8621859085567184, "grad_norm": 0.8961867270554208, "learning_rate": 9.797318374134811e-07, "loss": 0.7931, "step": 9629 }, { "epoch": 0.8622754491017964, "grad_norm": 1.0336249865529368, "learning_rate": 9.784802202080246e-07, "loss": 0.7809, "step": 9630 }, { "epoch": 0.8623649896468745, "grad_norm": 0.9762678791967343, "learning_rate": 9.772293618634131e-07, "loss": 0.8322, "step": 9631 }, { "epoch": 0.8624545301919525, "grad_norm": 1.0127184917535343, "learning_rate": 9.759792624848662e-07, "loss": 0.7968, "step": 9632 }, { "epoch": 0.8625440707370307, "grad_norm": 1.0072060920571084, "learning_rate": 9.747299221775363e-07, "loss": 0.8243, "step": 9633 }, { "epoch": 0.8626336112821087, "grad_norm": 0.97938694648066, "learning_rate": 9.734813410465149e-07, "loss": 0.7839, "step": 9634 }, { "epoch": 0.8627231518271867, "grad_norm": 1.0633339897191998, "learning_rate": 9.72233519196828e-07, "loss": 0.8267, "step": 9635 }, { "epoch": 0.8628126923722648, "grad_norm": 0.9832441241606009, "learning_rate": 9.709864567334394e-07, "loss": 0.8166, "step": 9636 }, { "epoch": 0.8629022329173429, "grad_norm": 0.9087356822438557, "learning_rate": 9.697401537612472e-07, "loss": 0.8019, "step": 9637 }, { "epoch": 0.862991773462421, "grad_norm": 1.0948780341908522, "learning_rate": 9.684946103850856e-07, "loss": 0.7553, "step": 9638 }, { "epoch": 0.863081314007499, "grad_norm": 0.9516049107709965, "learning_rate": 9.672498267097263e-07, "loss": 0.7899, "step": 9639 }, { "epoch": 0.8631708545525771, "grad_norm": 1.0802748806367994, "learning_rate": 9.660058028398766e-07, "loss": 0.8156, "step": 9640 }, { "epoch": 0.8632603950976552, "grad_norm": 1.5291317730117053, "learning_rate": 9.647625388801806e-07, "loss": 0.7498, "step": 9641 }, { "epoch": 0.8633499356427332, "grad_norm": 1.042043481654263, "learning_rate": 9.635200349352159e-07, "loss": 0.8149, "step": 9642 }, { "epoch": 0.8634394761878113, "grad_norm": 1.0495975206915507, "learning_rate": 9.622782911094985e-07, "loss": 0.8296, "step": 9643 }, { "epoch": 0.8635290167328894, "grad_norm": 0.9364997658852079, "learning_rate": 9.610373075074808e-07, "loss": 0.7393, "step": 9644 }, { "epoch": 0.8636185572779674, "grad_norm": 1.03187591297213, "learning_rate": 9.59797084233548e-07, "loss": 0.7791, "step": 9645 }, { "epoch": 0.8637080978230455, "grad_norm": 0.8710400080566761, "learning_rate": 9.585576213920267e-07, "loss": 0.777, "step": 9646 }, { "epoch": 0.8637976383681236, "grad_norm": 0.8767545858490539, "learning_rate": 9.573189190871735e-07, "loss": 0.7297, "step": 9647 }, { "epoch": 0.8638871789132017, "grad_norm": 0.9918131614146409, "learning_rate": 9.560809774231872e-07, "loss": 0.8091, "step": 9648 }, { "epoch": 0.8639767194582797, "grad_norm": 0.9586679312276287, "learning_rate": 9.548437965041957e-07, "loss": 0.8301, "step": 9649 }, { "epoch": 0.8640662600033577, "grad_norm": 1.2369426522401579, "learning_rate": 9.536073764342701e-07, "loss": 0.7837, "step": 9650 }, { "epoch": 0.8641558005484359, "grad_norm": 0.9589246993903835, "learning_rate": 9.523717173174118e-07, "loss": 0.7958, "step": 9651 }, { "epoch": 0.8642453410935139, "grad_norm": 0.9134175670755973, "learning_rate": 9.511368192575609e-07, "loss": 0.828, "step": 9652 }, { "epoch": 0.864334881638592, "grad_norm": 0.9424394933401217, "learning_rate": 9.499026823585955e-07, "loss": 0.8123, "step": 9653 }, { "epoch": 0.86442442218367, "grad_norm": 1.0897698967553955, "learning_rate": 9.486693067243225e-07, "loss": 0.7554, "step": 9654 }, { "epoch": 0.8645139627287481, "grad_norm": 0.9251022065648881, "learning_rate": 9.474366924584899e-07, "loss": 0.7939, "step": 9655 }, { "epoch": 0.8646035032738262, "grad_norm": 0.9426288892078449, "learning_rate": 9.46204839664786e-07, "loss": 0.8026, "step": 9656 }, { "epoch": 0.8646930438189042, "grad_norm": 0.9831841895862496, "learning_rate": 9.449737484468258e-07, "loss": 0.8024, "step": 9657 }, { "epoch": 0.8647825843639824, "grad_norm": 0.9674842315793059, "learning_rate": 9.437434189081674e-07, "loss": 0.8371, "step": 9658 }, { "epoch": 0.8648721249090604, "grad_norm": 1.0366789998960846, "learning_rate": 9.425138511523024e-07, "loss": 0.7827, "step": 9659 }, { "epoch": 0.8649616654541384, "grad_norm": 0.9181265540012836, "learning_rate": 9.41285045282655e-07, "loss": 0.7807, "step": 9660 }, { "epoch": 0.8650512059992165, "grad_norm": 1.0117389879512924, "learning_rate": 9.400570014025879e-07, "loss": 0.8397, "step": 9661 }, { "epoch": 0.8651407465442946, "grad_norm": 0.9929570364874514, "learning_rate": 9.388297196154039e-07, "loss": 0.7283, "step": 9662 }, { "epoch": 0.8652302870893727, "grad_norm": 1.114845567276967, "learning_rate": 9.376032000243362e-07, "loss": 0.7708, "step": 9663 }, { "epoch": 0.8653198276344507, "grad_norm": 2.2909094024800214, "learning_rate": 9.363774427325578e-07, "loss": 0.8074, "step": 9664 }, { "epoch": 0.8654093681795287, "grad_norm": 0.9759099478725024, "learning_rate": 9.351524478431717e-07, "loss": 0.822, "step": 9665 }, { "epoch": 0.8654989087246069, "grad_norm": 1.015316668681043, "learning_rate": 9.339282154592211e-07, "loss": 0.826, "step": 9666 }, { "epoch": 0.8655884492696849, "grad_norm": 0.8693365140495327, "learning_rate": 9.327047456836835e-07, "loss": 0.7325, "step": 9667 }, { "epoch": 0.865677989814763, "grad_norm": 0.9521801876254858, "learning_rate": 9.314820386194778e-07, "loss": 0.8801, "step": 9668 }, { "epoch": 0.8657675303598411, "grad_norm": 1.078156932076896, "learning_rate": 9.302600943694507e-07, "loss": 0.7804, "step": 9669 }, { "epoch": 0.8658570709049191, "grad_norm": 0.949543342533609, "learning_rate": 9.290389130363908e-07, "loss": 0.7521, "step": 9670 }, { "epoch": 0.8659466114499972, "grad_norm": 0.9925034754511334, "learning_rate": 9.278184947230162e-07, "loss": 0.8337, "step": 9671 }, { "epoch": 0.8660361519950752, "grad_norm": 0.9818205812626446, "learning_rate": 9.26598839531987e-07, "loss": 0.8091, "step": 9672 }, { "epoch": 0.8661256925401534, "grad_norm": 0.8863572868773191, "learning_rate": 9.253799475658931e-07, "loss": 0.8338, "step": 9673 }, { "epoch": 0.8662152330852314, "grad_norm": 0.9274481257642485, "learning_rate": 9.241618189272683e-07, "loss": 0.7757, "step": 9674 }, { "epoch": 0.8663047736303094, "grad_norm": 1.097303224698927, "learning_rate": 9.229444537185784e-07, "loss": 0.7591, "step": 9675 }, { "epoch": 0.8663943141753876, "grad_norm": 0.953171076171005, "learning_rate": 9.217278520422202e-07, "loss": 0.8434, "step": 9676 }, { "epoch": 0.8664838547204656, "grad_norm": 1.0202255044836142, "learning_rate": 9.205120140005309e-07, "loss": 0.7461, "step": 9677 }, { "epoch": 0.8665733952655437, "grad_norm": 0.9564295151387561, "learning_rate": 9.19296939695783e-07, "loss": 0.8209, "step": 9678 }, { "epoch": 0.8666629358106217, "grad_norm": 0.9536856667457038, "learning_rate": 9.180826292301837e-07, "loss": 0.8111, "step": 9679 }, { "epoch": 0.8667524763556999, "grad_norm": 1.0240962964789941, "learning_rate": 9.168690827058813e-07, "loss": 0.8263, "step": 9680 }, { "epoch": 0.8668420169007779, "grad_norm": 1.0158430463736017, "learning_rate": 9.15656300224953e-07, "loss": 0.7447, "step": 9681 }, { "epoch": 0.8669315574458559, "grad_norm": 0.9103247559027354, "learning_rate": 9.144442818894117e-07, "loss": 0.8271, "step": 9682 }, { "epoch": 0.867021097990934, "grad_norm": 0.9513142310063918, "learning_rate": 9.132330278012114e-07, "loss": 0.8023, "step": 9683 }, { "epoch": 0.8671106385360121, "grad_norm": 0.9495255104532422, "learning_rate": 9.120225380622372e-07, "loss": 0.8098, "step": 9684 }, { "epoch": 0.8672001790810902, "grad_norm": 1.550329171346597, "learning_rate": 9.10812812774311e-07, "loss": 0.8796, "step": 9685 }, { "epoch": 0.8672897196261682, "grad_norm": 1.0227010042693285, "learning_rate": 9.096038520391936e-07, "loss": 0.8072, "step": 9686 }, { "epoch": 0.8673792601712463, "grad_norm": 1.0316099731365584, "learning_rate": 9.08395655958576e-07, "loss": 0.7486, "step": 9687 }, { "epoch": 0.8674688007163244, "grad_norm": 1.0024889513637125, "learning_rate": 9.071882246340902e-07, "loss": 0.7708, "step": 9688 }, { "epoch": 0.8675583412614024, "grad_norm": 1.0198861443608758, "learning_rate": 9.059815581672993e-07, "loss": 0.8035, "step": 9689 }, { "epoch": 0.8676478818064804, "grad_norm": 1.0078690570358035, "learning_rate": 9.047756566597055e-07, "loss": 0.8069, "step": 9690 }, { "epoch": 0.8677374223515586, "grad_norm": 0.9510465944103557, "learning_rate": 9.035705202127443e-07, "loss": 0.7994, "step": 9691 }, { "epoch": 0.8678269628966366, "grad_norm": 1.0284646894086966, "learning_rate": 9.02366148927789e-07, "loss": 0.8354, "step": 9692 }, { "epoch": 0.8679165034417147, "grad_norm": 1.0977158614859097, "learning_rate": 9.011625429061455e-07, "loss": 0.7892, "step": 9693 }, { "epoch": 0.8680060439867928, "grad_norm": 0.9744013668506387, "learning_rate": 8.999597022490603e-07, "loss": 0.8258, "step": 9694 }, { "epoch": 0.8680955845318709, "grad_norm": 1.2154247075048843, "learning_rate": 8.987576270577092e-07, "loss": 0.7983, "step": 9695 }, { "epoch": 0.8681851250769489, "grad_norm": 1.0794590426208217, "learning_rate": 8.975563174332091e-07, "loss": 0.8397, "step": 9696 }, { "epoch": 0.8682746656220269, "grad_norm": 0.9593467168555687, "learning_rate": 8.963557734766082e-07, "loss": 0.8063, "step": 9697 }, { "epoch": 0.8683642061671051, "grad_norm": 1.0347340641383724, "learning_rate": 8.951559952888944e-07, "loss": 0.7489, "step": 9698 }, { "epoch": 0.8684537467121831, "grad_norm": 0.9038258764355297, "learning_rate": 8.939569829709882e-07, "loss": 0.7857, "step": 9699 }, { "epoch": 0.8685432872572612, "grad_norm": 1.000410913283481, "learning_rate": 8.927587366237467e-07, "loss": 0.788, "step": 9700 }, { "epoch": 0.8686328278023392, "grad_norm": 0.9804602115083662, "learning_rate": 8.915612563479625e-07, "loss": 0.7161, "step": 9701 }, { "epoch": 0.8687223683474173, "grad_norm": 1.066161848935821, "learning_rate": 8.903645422443641e-07, "loss": 0.7849, "step": 9702 }, { "epoch": 0.8688119088924954, "grad_norm": 1.0428819049132827, "learning_rate": 8.891685944136141e-07, "loss": 0.798, "step": 9703 }, { "epoch": 0.8689014494375734, "grad_norm": 0.9442231980705521, "learning_rate": 8.879734129563133e-07, "loss": 0.8337, "step": 9704 }, { "epoch": 0.8689909899826516, "grad_norm": 1.0241257729361417, "learning_rate": 8.867789979729947e-07, "loss": 0.7594, "step": 9705 }, { "epoch": 0.8690805305277296, "grad_norm": 0.8485619026589344, "learning_rate": 8.855853495641309e-07, "loss": 0.732, "step": 9706 }, { "epoch": 0.8691700710728076, "grad_norm": 0.9972226368388587, "learning_rate": 8.843924678301253e-07, "loss": 0.8112, "step": 9707 }, { "epoch": 0.8692596116178857, "grad_norm": 1.196430818881917, "learning_rate": 8.832003528713218e-07, "loss": 0.7397, "step": 9708 }, { "epoch": 0.8693491521629638, "grad_norm": 1.064788671067656, "learning_rate": 8.820090047879926e-07, "loss": 0.8284, "step": 9709 }, { "epoch": 0.8694386927080419, "grad_norm": 1.0057104727753952, "learning_rate": 8.80818423680354e-07, "loss": 0.8017, "step": 9710 }, { "epoch": 0.8695282332531199, "grad_norm": 0.9981387630261437, "learning_rate": 8.79628609648554e-07, "loss": 0.7768, "step": 9711 }, { "epoch": 0.869617773798198, "grad_norm": 0.9220572838316972, "learning_rate": 8.784395627926734e-07, "loss": 0.7741, "step": 9712 }, { "epoch": 0.8697073143432761, "grad_norm": 1.1155381554749546, "learning_rate": 8.772512832127355e-07, "loss": 0.8101, "step": 9713 }, { "epoch": 0.8697968548883541, "grad_norm": 1.2985708180177682, "learning_rate": 8.760637710086884e-07, "loss": 0.8478, "step": 9714 }, { "epoch": 0.8698863954334322, "grad_norm": 0.9726915265661399, "learning_rate": 8.74877026280424e-07, "loss": 0.824, "step": 9715 }, { "epoch": 0.8699759359785103, "grad_norm": 0.9993378812328921, "learning_rate": 8.736910491277694e-07, "loss": 0.8225, "step": 9716 }, { "epoch": 0.8700654765235883, "grad_norm": 0.922964843439431, "learning_rate": 8.725058396504837e-07, "loss": 0.7618, "step": 9717 }, { "epoch": 0.8701550170686664, "grad_norm": 0.8623185804931164, "learning_rate": 8.713213979482626e-07, "loss": 0.7768, "step": 9718 }, { "epoch": 0.8702445576137444, "grad_norm": 0.9977991938017136, "learning_rate": 8.701377241207409e-07, "loss": 0.7786, "step": 9719 }, { "epoch": 0.8703340981588226, "grad_norm": 1.0002034440085878, "learning_rate": 8.68954818267479e-07, "loss": 0.8229, "step": 9720 }, { "epoch": 0.8704236387039006, "grad_norm": 0.90750303976621, "learning_rate": 8.677726804879816e-07, "loss": 0.7458, "step": 9721 }, { "epoch": 0.8705131792489786, "grad_norm": 1.3617394982505355, "learning_rate": 8.665913108816882e-07, "loss": 0.8176, "step": 9722 }, { "epoch": 0.8706027197940568, "grad_norm": 0.961764787627757, "learning_rate": 8.654107095479713e-07, "loss": 0.8158, "step": 9723 }, { "epoch": 0.8706922603391348, "grad_norm": 0.9783296713225507, "learning_rate": 8.642308765861407e-07, "loss": 0.7965, "step": 9724 }, { "epoch": 0.8707818008842129, "grad_norm": 0.9744647398972225, "learning_rate": 8.630518120954357e-07, "loss": 0.7608, "step": 9725 }, { "epoch": 0.8708713414292909, "grad_norm": 1.0460643325578858, "learning_rate": 8.618735161750369e-07, "loss": 0.8148, "step": 9726 }, { "epoch": 0.870960881974369, "grad_norm": 1.2722133471352022, "learning_rate": 8.606959889240584e-07, "loss": 0.8515, "step": 9727 }, { "epoch": 0.8710504225194471, "grad_norm": 1.2595857308109049, "learning_rate": 8.595192304415534e-07, "loss": 0.8041, "step": 9728 }, { "epoch": 0.8711399630645251, "grad_norm": 1.0636581024202847, "learning_rate": 8.583432408265036e-07, "loss": 0.8384, "step": 9729 }, { "epoch": 0.8712295036096033, "grad_norm": 1.0691356028166792, "learning_rate": 8.571680201778321e-07, "loss": 0.7986, "step": 9730 }, { "epoch": 0.8713190441546813, "grad_norm": 1.136833385893806, "learning_rate": 8.559935685943921e-07, "loss": 0.8618, "step": 9731 }, { "epoch": 0.8714085846997593, "grad_norm": 1.0968095491316403, "learning_rate": 8.548198861749757e-07, "loss": 0.7985, "step": 9732 }, { "epoch": 0.8714981252448374, "grad_norm": 0.9193782074151037, "learning_rate": 8.536469730183061e-07, "loss": 0.8303, "step": 9733 }, { "epoch": 0.8715876657899155, "grad_norm": 1.031783788471677, "learning_rate": 8.5247482922305e-07, "loss": 0.7978, "step": 9734 }, { "epoch": 0.8716772063349936, "grad_norm": 0.9726529757745428, "learning_rate": 8.513034548878041e-07, "loss": 0.7301, "step": 9735 }, { "epoch": 0.8717667468800716, "grad_norm": 0.9697544033645193, "learning_rate": 8.501328501110972e-07, "loss": 0.823, "step": 9736 }, { "epoch": 0.8718562874251496, "grad_norm": 0.9303911750766886, "learning_rate": 8.489630149913985e-07, "loss": 0.7734, "step": 9737 }, { "epoch": 0.8719458279702278, "grad_norm": 1.0173442582118148, "learning_rate": 8.477939496271092e-07, "loss": 0.7582, "step": 9738 }, { "epoch": 0.8720353685153058, "grad_norm": 0.9876091508872713, "learning_rate": 8.466256541165696e-07, "loss": 0.8469, "step": 9739 }, { "epoch": 0.8721249090603839, "grad_norm": 0.9311107004376196, "learning_rate": 8.454581285580499e-07, "loss": 0.8082, "step": 9740 }, { "epoch": 0.872214449605462, "grad_norm": 1.0229410973244177, "learning_rate": 8.442913730497638e-07, "loss": 0.85, "step": 9741 }, { "epoch": 0.87230399015054, "grad_norm": 1.0575898092150893, "learning_rate": 8.431253876898504e-07, "loss": 0.8008, "step": 9742 }, { "epoch": 0.8723935306956181, "grad_norm": 0.9003071081156029, "learning_rate": 8.419601725763893e-07, "loss": 0.7734, "step": 9743 }, { "epoch": 0.8724830712406961, "grad_norm": 0.9588727888795702, "learning_rate": 8.407957278073952e-07, "loss": 0.7364, "step": 9744 }, { "epoch": 0.8725726117857743, "grad_norm": 1.0986648690827348, "learning_rate": 8.396320534808178e-07, "loss": 0.8344, "step": 9745 }, { "epoch": 0.8726621523308523, "grad_norm": 0.9095840509931644, "learning_rate": 8.384691496945408e-07, "loss": 0.8103, "step": 9746 }, { "epoch": 0.8727516928759304, "grad_norm": 1.0882825966551244, "learning_rate": 8.373070165463837e-07, "loss": 0.8066, "step": 9747 }, { "epoch": 0.8728412334210085, "grad_norm": 0.96475108066984, "learning_rate": 8.361456541341028e-07, "loss": 0.7983, "step": 9748 }, { "epoch": 0.8729307739660865, "grad_norm": 0.9007587428518711, "learning_rate": 8.349850625553868e-07, "loss": 0.8187, "step": 9749 }, { "epoch": 0.8730203145111646, "grad_norm": 0.9335155387730634, "learning_rate": 8.338252419078608e-07, "loss": 0.7526, "step": 9750 }, { "epoch": 0.8731098550562426, "grad_norm": 1.0438817038966874, "learning_rate": 8.326661922890855e-07, "loss": 0.7631, "step": 9751 }, { "epoch": 0.8731993956013208, "grad_norm": 0.9964501776378016, "learning_rate": 8.315079137965576e-07, "loss": 0.7937, "step": 9752 }, { "epoch": 0.8732889361463988, "grad_norm": 0.944569110424118, "learning_rate": 8.303504065277057e-07, "loss": 0.7671, "step": 9753 }, { "epoch": 0.8733784766914768, "grad_norm": 1.0045386260209057, "learning_rate": 8.291936705798964e-07, "loss": 0.7597, "step": 9754 }, { "epoch": 0.8734680172365549, "grad_norm": 0.9819509726408752, "learning_rate": 8.280377060504308e-07, "loss": 0.817, "step": 9755 }, { "epoch": 0.873557557781633, "grad_norm": 1.124808700625049, "learning_rate": 8.268825130365454e-07, "loss": 0.8299, "step": 9756 }, { "epoch": 0.8736470983267111, "grad_norm": 0.9346114983164763, "learning_rate": 8.257280916354093e-07, "loss": 0.835, "step": 9757 }, { "epoch": 0.8737366388717891, "grad_norm": 0.9537586993383416, "learning_rate": 8.245744419441304e-07, "loss": 0.8182, "step": 9758 }, { "epoch": 0.8738261794168672, "grad_norm": 1.2773304723099699, "learning_rate": 8.234215640597498e-07, "loss": 0.7948, "step": 9759 }, { "epoch": 0.8739157199619453, "grad_norm": 1.0250542574123196, "learning_rate": 8.222694580792434e-07, "loss": 0.8192, "step": 9760 }, { "epoch": 0.8740052605070233, "grad_norm": 0.9220182331191347, "learning_rate": 8.211181240995225e-07, "loss": 0.7724, "step": 9761 }, { "epoch": 0.8740948010521014, "grad_norm": 0.9358149522953858, "learning_rate": 8.199675622174342e-07, "loss": 0.837, "step": 9762 }, { "epoch": 0.8741843415971795, "grad_norm": 0.9053557277607059, "learning_rate": 8.188177725297585e-07, "loss": 0.7939, "step": 9763 }, { "epoch": 0.8742738821422575, "grad_norm": 0.972740016786196, "learning_rate": 8.176687551332141e-07, "loss": 0.8347, "step": 9764 }, { "epoch": 0.8743634226873356, "grad_norm": 1.0162046063925805, "learning_rate": 8.1652051012445e-07, "loss": 0.847, "step": 9765 }, { "epoch": 0.8744529632324137, "grad_norm": 0.9539888776710241, "learning_rate": 8.153730376000557e-07, "loss": 0.8359, "step": 9766 }, { "epoch": 0.8745425037774918, "grad_norm": 0.9561572907391523, "learning_rate": 8.142263376565518e-07, "loss": 0.8139, "step": 9767 }, { "epoch": 0.8746320443225698, "grad_norm": 1.0036655042734668, "learning_rate": 8.130804103903956e-07, "loss": 0.8099, "step": 9768 }, { "epoch": 0.8747215848676478, "grad_norm": 0.9705083777095447, "learning_rate": 8.119352558979742e-07, "loss": 0.8211, "step": 9769 }, { "epoch": 0.874811125412726, "grad_norm": 1.1687135551347951, "learning_rate": 8.107908742756198e-07, "loss": 0.742, "step": 9770 }, { "epoch": 0.874900665957804, "grad_norm": 0.9558009318580759, "learning_rate": 8.09647265619592e-07, "loss": 0.7538, "step": 9771 }, { "epoch": 0.8749902065028821, "grad_norm": 1.045613323224456, "learning_rate": 8.08504430026088e-07, "loss": 0.7566, "step": 9772 }, { "epoch": 0.8750797470479601, "grad_norm": 0.946625502604746, "learning_rate": 8.07362367591239e-07, "loss": 0.7717, "step": 9773 }, { "epoch": 0.8751692875930382, "grad_norm": 0.9702793710486658, "learning_rate": 8.062210784111135e-07, "loss": 0.8081, "step": 9774 }, { "epoch": 0.8752588281381163, "grad_norm": 0.9995915891444216, "learning_rate": 8.050805625817071e-07, "loss": 0.8157, "step": 9775 }, { "epoch": 0.8753483686831943, "grad_norm": 0.9062155125740023, "learning_rate": 8.039408201989618e-07, "loss": 0.8032, "step": 9776 }, { "epoch": 0.8754379092282725, "grad_norm": 0.9573408905890085, "learning_rate": 8.028018513587477e-07, "loss": 0.8153, "step": 9777 }, { "epoch": 0.8755274497733505, "grad_norm": 1.0417428335204069, "learning_rate": 8.016636561568713e-07, "loss": 0.8316, "step": 9778 }, { "epoch": 0.8756169903184285, "grad_norm": 0.8639736584756212, "learning_rate": 8.005262346890752e-07, "loss": 0.7465, "step": 9779 }, { "epoch": 0.8757065308635066, "grad_norm": 0.9593382063459616, "learning_rate": 7.993895870510316e-07, "loss": 0.8052, "step": 9780 }, { "epoch": 0.8757960714085847, "grad_norm": 0.9853090804400931, "learning_rate": 7.982537133383528e-07, "loss": 0.84, "step": 9781 }, { "epoch": 0.8758856119536628, "grad_norm": 0.9270240324666513, "learning_rate": 7.97118613646587e-07, "loss": 0.8244, "step": 9782 }, { "epoch": 0.8759751524987408, "grad_norm": 0.934849005931734, "learning_rate": 7.959842880712143e-07, "loss": 0.7859, "step": 9783 }, { "epoch": 0.876064693043819, "grad_norm": 1.0276032025232105, "learning_rate": 7.948507367076519e-07, "loss": 0.8031, "step": 9784 }, { "epoch": 0.876154233588897, "grad_norm": 0.941833516126144, "learning_rate": 7.937179596512467e-07, "loss": 0.7807, "step": 9785 }, { "epoch": 0.876243774133975, "grad_norm": 0.9086344039477847, "learning_rate": 7.92585956997286e-07, "loss": 0.7363, "step": 9786 }, { "epoch": 0.8763333146790531, "grad_norm": 0.9464328887920288, "learning_rate": 7.914547288409891e-07, "loss": 0.7601, "step": 9787 }, { "epoch": 0.8764228552241312, "grad_norm": 1.0067352267720557, "learning_rate": 7.903242752775142e-07, "loss": 0.8014, "step": 9788 }, { "epoch": 0.8765123957692093, "grad_norm": 0.985419110955858, "learning_rate": 7.891945964019488e-07, "loss": 0.8225, "step": 9789 }, { "epoch": 0.8766019363142873, "grad_norm": 1.0092430178200362, "learning_rate": 7.880656923093211e-07, "loss": 0.7776, "step": 9790 }, { "epoch": 0.8766914768593653, "grad_norm": 0.9485356945957446, "learning_rate": 7.869375630945875e-07, "loss": 0.7672, "step": 9791 }, { "epoch": 0.8767810174044435, "grad_norm": 0.996539467663567, "learning_rate": 7.85810208852642e-07, "loss": 0.7967, "step": 9792 }, { "epoch": 0.8768705579495215, "grad_norm": 1.0334554465199335, "learning_rate": 7.846836296783167e-07, "loss": 0.8255, "step": 9793 }, { "epoch": 0.8769600984945995, "grad_norm": 0.9088401012000215, "learning_rate": 7.835578256663712e-07, "loss": 0.8294, "step": 9794 }, { "epoch": 0.8770496390396777, "grad_norm": 1.078680478430899, "learning_rate": 7.824327969115119e-07, "loss": 0.7569, "step": 9795 }, { "epoch": 0.8771391795847557, "grad_norm": 1.028118944230959, "learning_rate": 7.813085435083678e-07, "loss": 0.8464, "step": 9796 }, { "epoch": 0.8772287201298338, "grad_norm": 1.0679999767384944, "learning_rate": 7.801850655515064e-07, "loss": 0.8322, "step": 9797 }, { "epoch": 0.8773182606749118, "grad_norm": 0.946355863168016, "learning_rate": 7.790623631354333e-07, "loss": 0.8051, "step": 9798 }, { "epoch": 0.87740780121999, "grad_norm": 1.0215508333014118, "learning_rate": 7.779404363545861e-07, "loss": 0.7976, "step": 9799 }, { "epoch": 0.877497341765068, "grad_norm": 1.0817862241527378, "learning_rate": 7.768192853033352e-07, "loss": 0.802, "step": 9800 }, { "epoch": 0.877586882310146, "grad_norm": 1.08636407754348, "learning_rate": 7.756989100759949e-07, "loss": 0.7839, "step": 9801 }, { "epoch": 0.8776764228552242, "grad_norm": 0.8711485410377877, "learning_rate": 7.745793107667998e-07, "loss": 0.7878, "step": 9802 }, { "epoch": 0.8777659634003022, "grad_norm": 1.000664383474099, "learning_rate": 7.734604874699315e-07, "loss": 0.772, "step": 9803 }, { "epoch": 0.8778555039453803, "grad_norm": 1.0185897915799873, "learning_rate": 7.723424402794999e-07, "loss": 0.8124, "step": 9804 }, { "epoch": 0.8779450444904583, "grad_norm": 0.9892352812041512, "learning_rate": 7.712251692895522e-07, "loss": 0.7649, "step": 9805 }, { "epoch": 0.8780345850355364, "grad_norm": 1.3611582057179115, "learning_rate": 7.70108674594069e-07, "loss": 0.7925, "step": 9806 }, { "epoch": 0.8781241255806145, "grad_norm": 0.9953033088493918, "learning_rate": 7.689929562869669e-07, "loss": 0.8122, "step": 9807 }, { "epoch": 0.8782136661256925, "grad_norm": 1.3580779417717428, "learning_rate": 7.678780144620956e-07, "loss": 0.842, "step": 9808 }, { "epoch": 0.8783032066707706, "grad_norm": 0.9028211830247261, "learning_rate": 7.667638492132423e-07, "loss": 0.7879, "step": 9809 }, { "epoch": 0.8783927472158487, "grad_norm": 1.0273831995755325, "learning_rate": 7.656504606341242e-07, "loss": 0.7853, "step": 9810 }, { "epoch": 0.8784822877609267, "grad_norm": 0.9292371083901101, "learning_rate": 7.645378488183986e-07, "loss": 0.7805, "step": 9811 }, { "epoch": 0.8785718283060048, "grad_norm": 0.9260584537783453, "learning_rate": 7.634260138596528e-07, "loss": 0.7869, "step": 9812 }, { "epoch": 0.8786613688510829, "grad_norm": 1.1267648806819965, "learning_rate": 7.623149558514109e-07, "loss": 0.8349, "step": 9813 }, { "epoch": 0.878750909396161, "grad_norm": 0.9557321799807886, "learning_rate": 7.612046748871327e-07, "loss": 0.7401, "step": 9814 }, { "epoch": 0.878840449941239, "grad_norm": 1.0005819877209745, "learning_rate": 7.600951710602111e-07, "loss": 0.7781, "step": 9815 }, { "epoch": 0.878929990486317, "grad_norm": 1.0509734204573693, "learning_rate": 7.589864444639727e-07, "loss": 0.7949, "step": 9816 }, { "epoch": 0.8790195310313952, "grad_norm": 0.9409700263899377, "learning_rate": 7.578784951916818e-07, "loss": 0.8425, "step": 9817 }, { "epoch": 0.8791090715764732, "grad_norm": 1.1381749428005803, "learning_rate": 7.567713233365337e-07, "loss": 0.859, "step": 9818 }, { "epoch": 0.8791986121215513, "grad_norm": 0.9905079487291544, "learning_rate": 7.556649289916618e-07, "loss": 0.8465, "step": 9819 }, { "epoch": 0.8792881526666294, "grad_norm": 1.1592414290959263, "learning_rate": 7.545593122501305e-07, "loss": 0.7977, "step": 9820 }, { "epoch": 0.8793776932117074, "grad_norm": 0.943338095280278, "learning_rate": 7.534544732049431e-07, "loss": 0.7853, "step": 9821 }, { "epoch": 0.8794672337567855, "grad_norm": 1.038089148182455, "learning_rate": 7.523504119490321e-07, "loss": 0.6971, "step": 9822 }, { "epoch": 0.8795567743018635, "grad_norm": 0.9668546617246336, "learning_rate": 7.512471285752698e-07, "loss": 0.7952, "step": 9823 }, { "epoch": 0.8796463148469417, "grad_norm": 1.0377805610079907, "learning_rate": 7.501446231764609e-07, "loss": 0.7672, "step": 9824 }, { "epoch": 0.8797358553920197, "grad_norm": 0.9550606872011534, "learning_rate": 7.490428958453422e-07, "loss": 0.8041, "step": 9825 }, { "epoch": 0.8798253959370977, "grad_norm": 0.9850703320592671, "learning_rate": 7.479419466745908e-07, "loss": 0.7886, "step": 9826 }, { "epoch": 0.8799149364821758, "grad_norm": 0.8629978378926485, "learning_rate": 7.468417757568114e-07, "loss": 0.7687, "step": 9827 }, { "epoch": 0.8800044770272539, "grad_norm": 1.076171519992695, "learning_rate": 7.457423831845511e-07, "loss": 0.761, "step": 9828 }, { "epoch": 0.880094017572332, "grad_norm": 0.9526392617395856, "learning_rate": 7.446437690502806e-07, "loss": 0.7808, "step": 9829 }, { "epoch": 0.88018355811741, "grad_norm": 0.9181673950704209, "learning_rate": 7.435459334464179e-07, "loss": 0.852, "step": 9830 }, { "epoch": 0.8802730986624882, "grad_norm": 0.9397464708923388, "learning_rate": 7.424488764653082e-07, "loss": 0.8225, "step": 9831 }, { "epoch": 0.8803626392075662, "grad_norm": 0.9388564347080101, "learning_rate": 7.413525981992298e-07, "loss": 0.7665, "step": 9832 }, { "epoch": 0.8804521797526442, "grad_norm": 0.9771259216936814, "learning_rate": 7.402570987404001e-07, "loss": 0.8221, "step": 9833 }, { "epoch": 0.8805417202977223, "grad_norm": 1.0184685156760551, "learning_rate": 7.391623781809709e-07, "loss": 0.829, "step": 9834 }, { "epoch": 0.8806312608428004, "grad_norm": 1.073933743661023, "learning_rate": 7.380684366130197e-07, "loss": 0.753, "step": 9835 }, { "epoch": 0.8807208013878784, "grad_norm": 1.1403977625911974, "learning_rate": 7.369752741285729e-07, "loss": 0.744, "step": 9836 }, { "epoch": 0.8808103419329565, "grad_norm": 1.2751822216343298, "learning_rate": 7.358828908195792e-07, "loss": 0.7813, "step": 9837 }, { "epoch": 0.8808998824780346, "grad_norm": 0.9586220881159375, "learning_rate": 7.347912867779283e-07, "loss": 0.8013, "step": 9838 }, { "epoch": 0.8809894230231127, "grad_norm": 1.04948302898256, "learning_rate": 7.337004620954435e-07, "loss": 0.7847, "step": 9839 }, { "epoch": 0.8810789635681907, "grad_norm": 0.9670661870410249, "learning_rate": 7.32610416863877e-07, "loss": 0.7888, "step": 9840 }, { "epoch": 0.8811685041132687, "grad_norm": 1.0298972520242087, "learning_rate": 7.315211511749242e-07, "loss": 0.8, "step": 9841 }, { "epoch": 0.8812580446583469, "grad_norm": 1.0175773605713476, "learning_rate": 7.304326651202065e-07, "loss": 0.8362, "step": 9842 }, { "epoch": 0.8813475852034249, "grad_norm": 0.9189549171001492, "learning_rate": 7.29344958791287e-07, "loss": 0.8128, "step": 9843 }, { "epoch": 0.881437125748503, "grad_norm": 0.9960933827101033, "learning_rate": 7.282580322796606e-07, "loss": 0.874, "step": 9844 }, { "epoch": 0.881526666293581, "grad_norm": 0.9732778571116106, "learning_rate": 7.271718856767562e-07, "loss": 0.8189, "step": 9845 }, { "epoch": 0.8816162068386592, "grad_norm": 0.9858362820808764, "learning_rate": 7.26086519073933e-07, "loss": 0.7978, "step": 9846 }, { "epoch": 0.8817057473837372, "grad_norm": 0.980870945883378, "learning_rate": 7.250019325624912e-07, "loss": 0.8054, "step": 9847 }, { "epoch": 0.8817952879288152, "grad_norm": 0.9807060169239823, "learning_rate": 7.239181262336604e-07, "loss": 0.789, "step": 9848 }, { "epoch": 0.8818848284738934, "grad_norm": 1.0194821064282118, "learning_rate": 7.228351001786116e-07, "loss": 0.789, "step": 9849 }, { "epoch": 0.8819743690189714, "grad_norm": 0.9679439639230165, "learning_rate": 7.217528544884433e-07, "loss": 0.7678, "step": 9850 }, { "epoch": 0.8820639095640495, "grad_norm": 1.0003269571387614, "learning_rate": 7.206713892541884e-07, "loss": 0.7844, "step": 9851 }, { "epoch": 0.8821534501091275, "grad_norm": 0.9280736978949874, "learning_rate": 7.195907045668171e-07, "loss": 0.8084, "step": 9852 }, { "epoch": 0.8822429906542056, "grad_norm": 0.9412840758958403, "learning_rate": 7.185108005172347e-07, "loss": 0.7822, "step": 9853 }, { "epoch": 0.8823325311992837, "grad_norm": 1.0889754939069687, "learning_rate": 7.174316771962752e-07, "loss": 0.7411, "step": 9854 }, { "epoch": 0.8824220717443617, "grad_norm": 1.0272199833259048, "learning_rate": 7.163533346947183e-07, "loss": 0.7876, "step": 9855 }, { "epoch": 0.8825116122894399, "grad_norm": 1.0403525550361983, "learning_rate": 7.152757731032645e-07, "loss": 0.8132, "step": 9856 }, { "epoch": 0.8826011528345179, "grad_norm": 1.0091771608241993, "learning_rate": 7.141989925125559e-07, "loss": 0.841, "step": 9857 }, { "epoch": 0.8826906933795959, "grad_norm": 0.9734762855463261, "learning_rate": 7.131229930131689e-07, "loss": 0.7676, "step": 9858 }, { "epoch": 0.882780233924674, "grad_norm": 1.2835988569437364, "learning_rate": 7.120477746956123e-07, "loss": 0.806, "step": 9859 }, { "epoch": 0.8828697744697521, "grad_norm": 0.9684613488185034, "learning_rate": 7.109733376503281e-07, "loss": 0.816, "step": 9860 }, { "epoch": 0.8829593150148302, "grad_norm": 1.214126291893131, "learning_rate": 7.098996819677006e-07, "loss": 0.7465, "step": 9861 }, { "epoch": 0.8830488555599082, "grad_norm": 0.9933197081487194, "learning_rate": 7.088268077380356e-07, "loss": 0.8165, "step": 9862 }, { "epoch": 0.8831383961049862, "grad_norm": 1.0433426419260803, "learning_rate": 7.07754715051584e-07, "loss": 0.8252, "step": 9863 }, { "epoch": 0.8832279366500644, "grad_norm": 0.9588611716236244, "learning_rate": 7.066834039985237e-07, "loss": 0.7915, "step": 9864 }, { "epoch": 0.8833174771951424, "grad_norm": 1.1333183014029315, "learning_rate": 7.056128746689717e-07, "loss": 0.8206, "step": 9865 }, { "epoch": 0.8834070177402205, "grad_norm": 0.8894077619600714, "learning_rate": 7.045431271529767e-07, "loss": 0.7704, "step": 9866 }, { "epoch": 0.8834965582852986, "grad_norm": 0.9732615935059117, "learning_rate": 7.034741615405227e-07, "loss": 0.8293, "step": 9867 }, { "epoch": 0.8835860988303766, "grad_norm": 1.0604689430851104, "learning_rate": 7.024059779215287e-07, "loss": 0.7936, "step": 9868 }, { "epoch": 0.8836756393754547, "grad_norm": 0.9990437921434144, "learning_rate": 7.013385763858449e-07, "loss": 0.7458, "step": 9869 }, { "epoch": 0.8837651799205327, "grad_norm": 0.9223506198710638, "learning_rate": 7.002719570232586e-07, "loss": 0.7701, "step": 9870 }, { "epoch": 0.8838547204656109, "grad_norm": 1.0233802711643287, "learning_rate": 6.9920611992349e-07, "loss": 0.8115, "step": 9871 }, { "epoch": 0.8839442610106889, "grad_norm": 0.9439578122268587, "learning_rate": 6.981410651761933e-07, "loss": 0.763, "step": 9872 }, { "epoch": 0.8840338015557669, "grad_norm": 0.9590002220547745, "learning_rate": 6.970767928709599e-07, "loss": 0.8136, "step": 9873 }, { "epoch": 0.8841233421008451, "grad_norm": 0.9805836162859698, "learning_rate": 6.960133030973104e-07, "loss": 0.8214, "step": 9874 }, { "epoch": 0.8842128826459231, "grad_norm": 0.9698652912697726, "learning_rate": 6.949505959447023e-07, "loss": 0.8057, "step": 9875 }, { "epoch": 0.8843024231910012, "grad_norm": 1.005258093280118, "learning_rate": 6.938886715025284e-07, "loss": 0.7435, "step": 9876 }, { "epoch": 0.8843919637360792, "grad_norm": 1.0260772790272097, "learning_rate": 6.92827529860114e-07, "loss": 0.7977, "step": 9877 }, { "epoch": 0.8844815042811573, "grad_norm": 0.9510253123237471, "learning_rate": 6.917671711067176e-07, "loss": 0.7737, "step": 9878 }, { "epoch": 0.8845710448262354, "grad_norm": 0.9687981500437558, "learning_rate": 6.907075953315346e-07, "loss": 0.7997, "step": 9879 }, { "epoch": 0.8846605853713134, "grad_norm": 0.9422514901604037, "learning_rate": 6.896488026236914e-07, "loss": 0.7626, "step": 9880 }, { "epoch": 0.8847501259163915, "grad_norm": 1.044599744388437, "learning_rate": 6.885907930722525e-07, "loss": 0.8226, "step": 9881 }, { "epoch": 0.8848396664614696, "grad_norm": 1.0535834807569582, "learning_rate": 6.87533566766212e-07, "loss": 0.824, "step": 9882 }, { "epoch": 0.8849292070065476, "grad_norm": 0.9547869762437075, "learning_rate": 6.864771237945022e-07, "loss": 0.8207, "step": 9883 }, { "epoch": 0.8850187475516257, "grad_norm": 1.023538380131524, "learning_rate": 6.854214642459855e-07, "loss": 0.7923, "step": 9884 }, { "epoch": 0.8851082880967038, "grad_norm": 1.0423316879756122, "learning_rate": 6.84366588209463e-07, "loss": 0.785, "step": 9885 }, { "epoch": 0.8851978286417819, "grad_norm": 1.2147324676930111, "learning_rate": 6.833124957736659e-07, "loss": 0.8118, "step": 9886 }, { "epoch": 0.8852873691868599, "grad_norm": 1.0001329990900338, "learning_rate": 6.82259187027261e-07, "loss": 0.806, "step": 9887 }, { "epoch": 0.8853769097319379, "grad_norm": 0.9687319433743499, "learning_rate": 6.81206662058852e-07, "loss": 0.7569, "step": 9888 }, { "epoch": 0.8854664502770161, "grad_norm": 1.5485829855799689, "learning_rate": 6.801549209569669e-07, "loss": 0.8073, "step": 9889 }, { "epoch": 0.8855559908220941, "grad_norm": 0.9575800121372752, "learning_rate": 6.791039638100816e-07, "loss": 0.8475, "step": 9890 }, { "epoch": 0.8856455313671722, "grad_norm": 1.009529690296095, "learning_rate": 6.780537907065965e-07, "loss": 0.8155, "step": 9891 }, { "epoch": 0.8857350719122503, "grad_norm": 0.9721110469169533, "learning_rate": 6.770044017348498e-07, "loss": 0.8417, "step": 9892 }, { "epoch": 0.8858246124573284, "grad_norm": 0.9481725185534631, "learning_rate": 6.759557969831109e-07, "loss": 0.826, "step": 9893 }, { "epoch": 0.8859141530024064, "grad_norm": 1.0774780509105069, "learning_rate": 6.749079765395883e-07, "loss": 0.843, "step": 9894 }, { "epoch": 0.8860036935474844, "grad_norm": 1.034737793132593, "learning_rate": 6.738609404924168e-07, "loss": 0.7561, "step": 9895 }, { "epoch": 0.8860932340925626, "grad_norm": 1.0186796952311126, "learning_rate": 6.728146889296716e-07, "loss": 0.7348, "step": 9896 }, { "epoch": 0.8861827746376406, "grad_norm": 1.0368944939513032, "learning_rate": 6.717692219393601e-07, "loss": 0.7841, "step": 9897 }, { "epoch": 0.8862723151827187, "grad_norm": 0.9077689129683076, "learning_rate": 6.707245396094253e-07, "loss": 0.8046, "step": 9898 }, { "epoch": 0.8863618557277967, "grad_norm": 0.9376919886076567, "learning_rate": 6.696806420277413e-07, "loss": 0.7791, "step": 9899 }, { "epoch": 0.8864513962728748, "grad_norm": 0.8802664037859377, "learning_rate": 6.686375292821157e-07, "loss": 0.7456, "step": 9900 }, { "epoch": 0.8865409368179529, "grad_norm": 1.1283498152495757, "learning_rate": 6.675952014602937e-07, "loss": 0.8131, "step": 9901 }, { "epoch": 0.8866304773630309, "grad_norm": 0.9863867866210473, "learning_rate": 6.665536586499488e-07, "loss": 0.8071, "step": 9902 }, { "epoch": 0.8867200179081091, "grad_norm": 0.9848282144043862, "learning_rate": 6.655129009386974e-07, "loss": 0.8451, "step": 9903 }, { "epoch": 0.8868095584531871, "grad_norm": 1.0818080777758607, "learning_rate": 6.644729284140828e-07, "loss": 0.8561, "step": 9904 }, { "epoch": 0.8868990989982651, "grad_norm": 0.8885447875057321, "learning_rate": 6.634337411635849e-07, "loss": 0.7736, "step": 9905 }, { "epoch": 0.8869886395433432, "grad_norm": 1.0342204171878764, "learning_rate": 6.623953392746152e-07, "loss": 0.7816, "step": 9906 }, { "epoch": 0.8870781800884213, "grad_norm": 1.0048154355805707, "learning_rate": 6.613577228345202e-07, "loss": 0.7731, "step": 9907 }, { "epoch": 0.8871677206334994, "grad_norm": 0.8818723378372741, "learning_rate": 6.603208919305792e-07, "loss": 0.7783, "step": 9908 }, { "epoch": 0.8872572611785774, "grad_norm": 0.9979421614145839, "learning_rate": 6.592848466500123e-07, "loss": 0.7797, "step": 9909 }, { "epoch": 0.8873468017236555, "grad_norm": 0.9889813314459065, "learning_rate": 6.582495870799666e-07, "loss": 0.7636, "step": 9910 }, { "epoch": 0.8874363422687336, "grad_norm": 1.2623720123065876, "learning_rate": 6.572151133075222e-07, "loss": 0.8132, "step": 9911 }, { "epoch": 0.8875258828138116, "grad_norm": 1.079529749992829, "learning_rate": 6.561814254196974e-07, "loss": 0.8563, "step": 9912 }, { "epoch": 0.8876154233588897, "grad_norm": 1.018669930889253, "learning_rate": 6.551485235034416e-07, "loss": 0.8079, "step": 9913 }, { "epoch": 0.8877049639039678, "grad_norm": 0.9563525638873442, "learning_rate": 6.541164076456385e-07, "loss": 0.8266, "step": 9914 }, { "epoch": 0.8877945044490458, "grad_norm": 1.1260987048704978, "learning_rate": 6.530850779331099e-07, "loss": 0.8233, "step": 9915 }, { "epoch": 0.8878840449941239, "grad_norm": 1.2221780742977932, "learning_rate": 6.520545344526063e-07, "loss": 0.7777, "step": 9916 }, { "epoch": 0.8879735855392019, "grad_norm": 1.0720258552291122, "learning_rate": 6.51024777290813e-07, "loss": 0.8265, "step": 9917 }, { "epoch": 0.8880631260842801, "grad_norm": 1.0025242255099671, "learning_rate": 6.499958065343492e-07, "loss": 0.7149, "step": 9918 }, { "epoch": 0.8881526666293581, "grad_norm": 0.9894722797972897, "learning_rate": 6.489676222697683e-07, "loss": 0.8018, "step": 9919 }, { "epoch": 0.8882422071744361, "grad_norm": 0.9433127041715978, "learning_rate": 6.479402245835587e-07, "loss": 0.7857, "step": 9920 }, { "epoch": 0.8883317477195143, "grad_norm": 0.9585168062099032, "learning_rate": 6.469136135621434e-07, "loss": 0.7875, "step": 9921 }, { "epoch": 0.8884212882645923, "grad_norm": 1.0448981301626772, "learning_rate": 6.458877892918758e-07, "loss": 0.7709, "step": 9922 }, { "epoch": 0.8885108288096704, "grad_norm": 0.9469908831668271, "learning_rate": 6.448627518590444e-07, "loss": 0.792, "step": 9923 }, { "epoch": 0.8886003693547484, "grad_norm": 0.872736211836871, "learning_rate": 6.438385013498727e-07, "loss": 0.7806, "step": 9924 }, { "epoch": 0.8886899098998265, "grad_norm": 1.0505593698527258, "learning_rate": 6.428150378505171e-07, "loss": 0.8295, "step": 9925 }, { "epoch": 0.8887794504449046, "grad_norm": 0.9855662574745117, "learning_rate": 6.417923614470689e-07, "loss": 0.717, "step": 9926 }, { "epoch": 0.8888689909899826, "grad_norm": 1.0226501088945537, "learning_rate": 6.407704722255514e-07, "loss": 0.8054, "step": 9927 }, { "epoch": 0.8889585315350608, "grad_norm": 1.013583703887514, "learning_rate": 6.397493702719226e-07, "loss": 0.8594, "step": 9928 }, { "epoch": 0.8890480720801388, "grad_norm": 0.9178398536134609, "learning_rate": 6.387290556720749e-07, "loss": 0.8154, "step": 9929 }, { "epoch": 0.8891376126252168, "grad_norm": 0.9994942006875942, "learning_rate": 6.377095285118329e-07, "loss": 0.8298, "step": 9930 }, { "epoch": 0.8892271531702949, "grad_norm": 0.9860537494489169, "learning_rate": 6.366907888769569e-07, "loss": 0.8098, "step": 9931 }, { "epoch": 0.889316693715373, "grad_norm": 0.9321200968247388, "learning_rate": 6.356728368531384e-07, "loss": 0.8199, "step": 9932 }, { "epoch": 0.8894062342604511, "grad_norm": 1.0091043074095212, "learning_rate": 6.346556725260067e-07, "loss": 0.817, "step": 9933 }, { "epoch": 0.8894957748055291, "grad_norm": 1.0961654503467206, "learning_rate": 6.336392959811199e-07, "loss": 0.7693, "step": 9934 }, { "epoch": 0.8895853153506071, "grad_norm": 0.9755442473176906, "learning_rate": 6.326237073039743e-07, "loss": 0.7426, "step": 9935 }, { "epoch": 0.8896748558956853, "grad_norm": 1.1460587620962055, "learning_rate": 6.316089065799958e-07, "loss": 0.7813, "step": 9936 }, { "epoch": 0.8897643964407633, "grad_norm": 1.0466513473659182, "learning_rate": 6.305948938945483e-07, "loss": 0.7729, "step": 9937 }, { "epoch": 0.8898539369858414, "grad_norm": 1.1298520487309693, "learning_rate": 6.29581669332926e-07, "loss": 0.8028, "step": 9938 }, { "epoch": 0.8899434775309195, "grad_norm": 0.9713403205632621, "learning_rate": 6.285692329803572e-07, "loss": 0.7501, "step": 9939 }, { "epoch": 0.8900330180759976, "grad_norm": 1.124085686499429, "learning_rate": 6.275575849220072e-07, "loss": 0.7881, "step": 9940 }, { "epoch": 0.8901225586210756, "grad_norm": 1.0860753523770987, "learning_rate": 6.265467252429702e-07, "loss": 0.8082, "step": 9941 }, { "epoch": 0.8902120991661536, "grad_norm": 0.9899562494913732, "learning_rate": 6.255366540282782e-07, "loss": 0.8314, "step": 9942 }, { "epoch": 0.8903016397112318, "grad_norm": 0.9524596789668761, "learning_rate": 6.245273713628941e-07, "loss": 0.74, "step": 9943 }, { "epoch": 0.8903911802563098, "grad_norm": 0.9755006968342398, "learning_rate": 6.235188773317146e-07, "loss": 0.7585, "step": 9944 }, { "epoch": 0.8904807208013878, "grad_norm": 1.0846689513127081, "learning_rate": 6.225111720195731e-07, "loss": 0.808, "step": 9945 }, { "epoch": 0.890570261346466, "grad_norm": 1.0099944870087119, "learning_rate": 6.215042555112327e-07, "loss": 0.7426, "step": 9946 }, { "epoch": 0.890659801891544, "grad_norm": 0.9521833797242738, "learning_rate": 6.204981278913936e-07, "loss": 0.8149, "step": 9947 }, { "epoch": 0.8907493424366221, "grad_norm": 0.9774130601149447, "learning_rate": 6.194927892446878e-07, "loss": 0.7971, "step": 9948 }, { "epoch": 0.8908388829817001, "grad_norm": 0.9646551441927631, "learning_rate": 6.184882396556779e-07, "loss": 0.8152, "step": 9949 }, { "epoch": 0.8909284235267783, "grad_norm": 0.9062045091522603, "learning_rate": 6.174844792088652e-07, "loss": 0.7824, "step": 9950 }, { "epoch": 0.8910179640718563, "grad_norm": 1.0978580092150452, "learning_rate": 6.164815079886844e-07, "loss": 0.8009, "step": 9951 }, { "epoch": 0.8911075046169343, "grad_norm": 0.9793458529125768, "learning_rate": 6.154793260795011e-07, "loss": 0.795, "step": 9952 }, { "epoch": 0.8911970451620124, "grad_norm": 1.0084524542284112, "learning_rate": 6.144779335656159e-07, "loss": 0.7663, "step": 9953 }, { "epoch": 0.8912865857070905, "grad_norm": 1.0838937083673923, "learning_rate": 6.134773305312636e-07, "loss": 0.8439, "step": 9954 }, { "epoch": 0.8913761262521686, "grad_norm": 1.0583795847350332, "learning_rate": 6.12477517060609e-07, "loss": 0.7878, "step": 9955 }, { "epoch": 0.8914656667972466, "grad_norm": 0.9437666283381655, "learning_rate": 6.114784932377526e-07, "loss": 0.814, "step": 9956 }, { "epoch": 0.8915552073423247, "grad_norm": 0.8876357868918994, "learning_rate": 6.104802591467329e-07, "loss": 0.785, "step": 9957 }, { "epoch": 0.8916447478874028, "grad_norm": 0.9453750217581149, "learning_rate": 6.09482814871516e-07, "loss": 0.8062, "step": 9958 }, { "epoch": 0.8917342884324808, "grad_norm": 1.0427685090786825, "learning_rate": 6.084861604960047e-07, "loss": 0.7681, "step": 9959 }, { "epoch": 0.8918238289775589, "grad_norm": 0.930499158074984, "learning_rate": 6.074902961040319e-07, "loss": 0.76, "step": 9960 }, { "epoch": 0.891913369522637, "grad_norm": 0.9588367291430306, "learning_rate": 6.064952217793685e-07, "loss": 0.8256, "step": 9961 }, { "epoch": 0.892002910067715, "grad_norm": 0.9988894418366342, "learning_rate": 6.055009376057152e-07, "loss": 0.8183, "step": 9962 }, { "epoch": 0.8920924506127931, "grad_norm": 1.0562786970788445, "learning_rate": 6.045074436667108e-07, "loss": 0.7772, "step": 9963 }, { "epoch": 0.8921819911578712, "grad_norm": 1.1765680904902591, "learning_rate": 6.035147400459218e-07, "loss": 0.8826, "step": 9964 }, { "epoch": 0.8922715317029493, "grad_norm": 1.027828479486301, "learning_rate": 6.025228268268557e-07, "loss": 0.8447, "step": 9965 }, { "epoch": 0.8923610722480273, "grad_norm": 0.9697866937651108, "learning_rate": 6.015317040929425e-07, "loss": 0.7632, "step": 9966 }, { "epoch": 0.8924506127931053, "grad_norm": 1.0517693589092671, "learning_rate": 6.005413719275566e-07, "loss": 0.7657, "step": 9967 }, { "epoch": 0.8925401533381835, "grad_norm": 1.1655550570603423, "learning_rate": 5.995518304139991e-07, "loss": 0.7837, "step": 9968 }, { "epoch": 0.8926296938832615, "grad_norm": 0.9466188600696482, "learning_rate": 5.985630796355091e-07, "loss": 0.7869, "step": 9969 }, { "epoch": 0.8927192344283396, "grad_norm": 0.9335773209850894, "learning_rate": 5.975751196752589e-07, "loss": 0.8414, "step": 9970 }, { "epoch": 0.8928087749734176, "grad_norm": 0.9782809613187378, "learning_rate": 5.965879506163475e-07, "loss": 0.7461, "step": 9971 }, { "epoch": 0.8928983155184957, "grad_norm": 1.0069936035871836, "learning_rate": 5.956015725418152e-07, "loss": 0.8177, "step": 9972 }, { "epoch": 0.8929878560635738, "grad_norm": 0.9305892452213185, "learning_rate": 5.946159855346323e-07, "loss": 0.7436, "step": 9973 }, { "epoch": 0.8930773966086518, "grad_norm": 1.0075484139974755, "learning_rate": 5.936311896777014e-07, "loss": 0.8406, "step": 9974 }, { "epoch": 0.89316693715373, "grad_norm": 0.9853121460991587, "learning_rate": 5.92647185053864e-07, "loss": 0.8117, "step": 9975 }, { "epoch": 0.893256477698808, "grad_norm": 1.0441078609605476, "learning_rate": 5.916639717458917e-07, "loss": 0.8082, "step": 9976 }, { "epoch": 0.893346018243886, "grad_norm": 1.181633530126853, "learning_rate": 5.906815498364848e-07, "loss": 0.8002, "step": 9977 }, { "epoch": 0.8934355587889641, "grad_norm": 1.0580123822669165, "learning_rate": 5.896999194082842e-07, "loss": 0.8372, "step": 9978 }, { "epoch": 0.8935250993340422, "grad_norm": 0.8959174586658117, "learning_rate": 5.887190805438614e-07, "loss": 0.7456, "step": 9979 }, { "epoch": 0.8936146398791203, "grad_norm": 0.9565687323618474, "learning_rate": 5.877390333257204e-07, "loss": 0.8184, "step": 9980 }, { "epoch": 0.8937041804241983, "grad_norm": 1.11615953951939, "learning_rate": 5.86759777836301e-07, "loss": 0.8719, "step": 9981 }, { "epoch": 0.8937937209692765, "grad_norm": 0.9379327677049312, "learning_rate": 5.857813141579726e-07, "loss": 0.8166, "step": 9982 }, { "epoch": 0.8938832615143545, "grad_norm": 0.9393516389690907, "learning_rate": 5.84803642373043e-07, "loss": 0.7903, "step": 9983 }, { "epoch": 0.8939728020594325, "grad_norm": 1.22291923194786, "learning_rate": 5.838267625637495e-07, "loss": 0.8486, "step": 9984 }, { "epoch": 0.8940623426045106, "grad_norm": 1.1976238436112334, "learning_rate": 5.828506748122642e-07, "loss": 0.7178, "step": 9985 }, { "epoch": 0.8941518831495887, "grad_norm": 0.9812165181039328, "learning_rate": 5.818753792006926e-07, "loss": 0.8028, "step": 9986 }, { "epoch": 0.8942414236946667, "grad_norm": 0.9854806076048201, "learning_rate": 5.809008758110724e-07, "loss": 0.782, "step": 9987 }, { "epoch": 0.8943309642397448, "grad_norm": 1.0270250325290047, "learning_rate": 5.799271647253768e-07, "loss": 0.79, "step": 9988 }, { "epoch": 0.8944205047848228, "grad_norm": 0.9379729769780246, "learning_rate": 5.789542460255115e-07, "loss": 0.753, "step": 9989 }, { "epoch": 0.894510045329901, "grad_norm": 1.0063728667137473, "learning_rate": 5.779821197933144e-07, "loss": 0.8373, "step": 9990 }, { "epoch": 0.894599585874979, "grad_norm": 0.9547802631041389, "learning_rate": 5.770107861105578e-07, "loss": 0.8641, "step": 9991 }, { "epoch": 0.894689126420057, "grad_norm": 1.2546355839015437, "learning_rate": 5.760402450589464e-07, "loss": 0.786, "step": 9992 }, { "epoch": 0.8947786669651352, "grad_norm": 0.9694806063263535, "learning_rate": 5.750704967201204e-07, "loss": 0.7878, "step": 9993 }, { "epoch": 0.8948682075102132, "grad_norm": 0.9465183345372448, "learning_rate": 5.741015411756513e-07, "loss": 0.8305, "step": 9994 }, { "epoch": 0.8949577480552913, "grad_norm": 0.8765383184654157, "learning_rate": 5.731333785070437e-07, "loss": 0.7951, "step": 9995 }, { "epoch": 0.8950472886003693, "grad_norm": 1.156313780653024, "learning_rate": 5.721660087957382e-07, "loss": 0.7603, "step": 9996 }, { "epoch": 0.8951368291454475, "grad_norm": 0.9460604138619739, "learning_rate": 5.71199432123104e-07, "loss": 0.7983, "step": 9997 }, { "epoch": 0.8952263696905255, "grad_norm": 0.9873503895829151, "learning_rate": 5.702336485704485e-07, "loss": 0.7793, "step": 9998 }, { "epoch": 0.8953159102356035, "grad_norm": 1.0067657581726523, "learning_rate": 5.692686582190099e-07, "loss": 0.81, "step": 9999 }, { "epoch": 0.8954054507806817, "grad_norm": 1.0171821290039484, "learning_rate": 5.6830446114996e-07, "loss": 0.8167, "step": 10000 }, { "epoch": 0.8954949913257597, "grad_norm": 1.1356278967575386, "learning_rate": 5.673410574444027e-07, "loss": 0.8391, "step": 10001 }, { "epoch": 0.8955845318708378, "grad_norm": 1.0570739890030072, "learning_rate": 5.663784471833777e-07, "loss": 0.7987, "step": 10002 }, { "epoch": 0.8956740724159158, "grad_norm": 0.9585881496514032, "learning_rate": 5.654166304478581e-07, "loss": 0.7719, "step": 10003 }, { "epoch": 0.8957636129609939, "grad_norm": 0.9710179853590815, "learning_rate": 5.644556073187446e-07, "loss": 0.83, "step": 10004 }, { "epoch": 0.895853153506072, "grad_norm": 0.9030289756216271, "learning_rate": 5.634953778768793e-07, "loss": 0.7851, "step": 10005 }, { "epoch": 0.89594269405115, "grad_norm": 0.9163993010636187, "learning_rate": 5.625359422030308e-07, "loss": 0.8146, "step": 10006 }, { "epoch": 0.896032234596228, "grad_norm": 0.9855617993120946, "learning_rate": 5.615773003779057e-07, "loss": 0.8588, "step": 10007 }, { "epoch": 0.8961217751413062, "grad_norm": 0.9556209325252594, "learning_rate": 5.606194524821429e-07, "loss": 0.7774, "step": 10008 }, { "epoch": 0.8962113156863842, "grad_norm": 0.9430905772706917, "learning_rate": 5.596623985963101e-07, "loss": 0.7826, "step": 10009 }, { "epoch": 0.8963008562314623, "grad_norm": 1.1351390436002864, "learning_rate": 5.587061388009107e-07, "loss": 0.8333, "step": 10010 }, { "epoch": 0.8963903967765404, "grad_norm": 0.9654083945280173, "learning_rate": 5.577506731763871e-07, "loss": 0.8238, "step": 10011 }, { "epoch": 0.8964799373216185, "grad_norm": 0.8906166103605484, "learning_rate": 5.56796001803106e-07, "loss": 0.7749, "step": 10012 }, { "epoch": 0.8965694778666965, "grad_norm": 0.9357488861191008, "learning_rate": 5.558421247613732e-07, "loss": 0.7675, "step": 10013 }, { "epoch": 0.8966590184117745, "grad_norm": 1.092934498926836, "learning_rate": 5.548890421314279e-07, "loss": 0.791, "step": 10014 }, { "epoch": 0.8967485589568527, "grad_norm": 1.0274872931425811, "learning_rate": 5.539367539934348e-07, "loss": 0.768, "step": 10015 }, { "epoch": 0.8968380995019307, "grad_norm": 0.9666219707935592, "learning_rate": 5.529852604274987e-07, "loss": 0.7263, "step": 10016 }, { "epoch": 0.8969276400470088, "grad_norm": 1.0058974570161976, "learning_rate": 5.520345615136591e-07, "loss": 0.7374, "step": 10017 }, { "epoch": 0.8970171805920869, "grad_norm": 1.0380446827022656, "learning_rate": 5.51084657331884e-07, "loss": 0.8489, "step": 10018 }, { "epoch": 0.8971067211371649, "grad_norm": 0.9199319773731917, "learning_rate": 5.501355479620774e-07, "loss": 0.7997, "step": 10019 }, { "epoch": 0.897196261682243, "grad_norm": 0.9749102380171594, "learning_rate": 5.491872334840731e-07, "loss": 0.8171, "step": 10020 }, { "epoch": 0.897285802227321, "grad_norm": 0.9896420546965614, "learning_rate": 5.482397139776419e-07, "loss": 0.7918, "step": 10021 }, { "epoch": 0.8973753427723992, "grad_norm": 1.0291700929950498, "learning_rate": 5.472929895224832e-07, "loss": 0.8009, "step": 10022 }, { "epoch": 0.8974648833174772, "grad_norm": 0.9003815295508676, "learning_rate": 5.463470601982357e-07, "loss": 0.797, "step": 10023 }, { "epoch": 0.8975544238625552, "grad_norm": 0.9684958679330604, "learning_rate": 5.454019260844678e-07, "loss": 0.8074, "step": 10024 }, { "epoch": 0.8976439644076333, "grad_norm": 1.0409909361803587, "learning_rate": 5.444575872606816e-07, "loss": 0.8185, "step": 10025 }, { "epoch": 0.8977335049527114, "grad_norm": 1.3769915433772986, "learning_rate": 5.43514043806308e-07, "loss": 0.7734, "step": 10026 }, { "epoch": 0.8978230454977895, "grad_norm": 0.9903127022847581, "learning_rate": 5.425712958007179e-07, "loss": 0.8282, "step": 10027 }, { "epoch": 0.8979125860428675, "grad_norm": 0.9343133209674779, "learning_rate": 5.4162934332321e-07, "loss": 0.7948, "step": 10028 }, { "epoch": 0.8980021265879456, "grad_norm": 0.944797588323456, "learning_rate": 5.406881864530212e-07, "loss": 0.7728, "step": 10029 }, { "epoch": 0.8980916671330237, "grad_norm": 1.0412404995015119, "learning_rate": 5.397478252693178e-07, "loss": 0.8245, "step": 10030 }, { "epoch": 0.8981812076781017, "grad_norm": 1.0598063934066146, "learning_rate": 5.38808259851199e-07, "loss": 0.8477, "step": 10031 }, { "epoch": 0.8982707482231798, "grad_norm": 0.9716796782053765, "learning_rate": 5.37869490277697e-07, "loss": 0.7857, "step": 10032 }, { "epoch": 0.8983602887682579, "grad_norm": 1.039570692534801, "learning_rate": 5.3693151662778e-07, "loss": 0.8099, "step": 10033 }, { "epoch": 0.898449829313336, "grad_norm": 1.1566494930140174, "learning_rate": 5.359943389803457e-07, "loss": 0.8102, "step": 10034 }, { "epoch": 0.898539369858414, "grad_norm": 0.961363097045426, "learning_rate": 5.350579574142256e-07, "loss": 0.8231, "step": 10035 }, { "epoch": 0.8986289104034921, "grad_norm": 0.9896523149800057, "learning_rate": 5.3412237200819e-07, "loss": 0.8019, "step": 10036 }, { "epoch": 0.8987184509485702, "grad_norm": 1.1049265003686415, "learning_rate": 5.331875828409327e-07, "loss": 0.7827, "step": 10037 }, { "epoch": 0.8988079914936482, "grad_norm": 1.01670629572103, "learning_rate": 5.322535899910863e-07, "loss": 0.786, "step": 10038 }, { "epoch": 0.8988975320387262, "grad_norm": 1.0511879150515668, "learning_rate": 5.313203935372158e-07, "loss": 0.7791, "step": 10039 }, { "epoch": 0.8989870725838044, "grad_norm": 1.0393754871319771, "learning_rate": 5.303879935578182e-07, "loss": 0.7647, "step": 10040 }, { "epoch": 0.8990766131288824, "grad_norm": 1.116070174573382, "learning_rate": 5.294563901313232e-07, "loss": 0.8441, "step": 10041 }, { "epoch": 0.8991661536739605, "grad_norm": 1.1020580966620328, "learning_rate": 5.285255833360947e-07, "loss": 0.7845, "step": 10042 }, { "epoch": 0.8992556942190385, "grad_norm": 1.0971457403664175, "learning_rate": 5.275955732504301e-07, "loss": 0.8115, "step": 10043 }, { "epoch": 0.8993452347641167, "grad_norm": 0.9251690386558937, "learning_rate": 5.266663599525579e-07, "loss": 0.7981, "step": 10044 }, { "epoch": 0.8994347753091947, "grad_norm": 0.9515745576765963, "learning_rate": 5.257379435206411e-07, "loss": 0.7693, "step": 10045 }, { "epoch": 0.8995243158542727, "grad_norm": 0.9206058224340953, "learning_rate": 5.248103240327739e-07, "loss": 0.8, "step": 10046 }, { "epoch": 0.8996138563993509, "grad_norm": 0.9695190083188647, "learning_rate": 5.238835015669863e-07, "loss": 0.809, "step": 10047 }, { "epoch": 0.8997033969444289, "grad_norm": 1.0747155323889144, "learning_rate": 5.229574762012379e-07, "loss": 0.8441, "step": 10048 }, { "epoch": 0.899792937489507, "grad_norm": 0.9480782021570312, "learning_rate": 5.220322480134243e-07, "loss": 0.8133, "step": 10049 }, { "epoch": 0.899882478034585, "grad_norm": 1.0084423027138234, "learning_rate": 5.21107817081371e-07, "loss": 0.8253, "step": 10050 }, { "epoch": 0.8999720185796631, "grad_norm": 1.1366616227214632, "learning_rate": 5.201841834828402e-07, "loss": 0.8468, "step": 10051 }, { "epoch": 0.9000615591247412, "grad_norm": 0.95141204694734, "learning_rate": 5.192613472955243e-07, "loss": 0.7937, "step": 10052 }, { "epoch": 0.9001510996698192, "grad_norm": 1.194573400406679, "learning_rate": 5.183393085970478e-07, "loss": 0.8464, "step": 10053 }, { "epoch": 0.9002406402148974, "grad_norm": 1.025638331724656, "learning_rate": 5.174180674649721e-07, "loss": 0.8315, "step": 10054 }, { "epoch": 0.9003301807599754, "grad_norm": 1.0656291840117451, "learning_rate": 5.164976239767872e-07, "loss": 0.7724, "step": 10055 }, { "epoch": 0.9004197213050534, "grad_norm": 1.2677672814740164, "learning_rate": 5.15577978209918e-07, "loss": 0.8444, "step": 10056 }, { "epoch": 0.9005092618501315, "grad_norm": 0.9602801985604429, "learning_rate": 5.146591302417236e-07, "loss": 0.7902, "step": 10057 }, { "epoch": 0.9005988023952096, "grad_norm": 0.9237174185980306, "learning_rate": 5.137410801494902e-07, "loss": 0.851, "step": 10058 }, { "epoch": 0.9006883429402877, "grad_norm": 0.9987198666443217, "learning_rate": 5.128238280104458e-07, "loss": 0.7455, "step": 10059 }, { "epoch": 0.9007778834853657, "grad_norm": 0.972489091235027, "learning_rate": 5.119073739017455e-07, "loss": 0.7527, "step": 10060 }, { "epoch": 0.9008674240304437, "grad_norm": 0.9771328558788107, "learning_rate": 5.109917179004775e-07, "loss": 0.8108, "step": 10061 }, { "epoch": 0.9009569645755219, "grad_norm": 1.0808820207200132, "learning_rate": 5.100768600836647e-07, "loss": 0.797, "step": 10062 }, { "epoch": 0.9010465051205999, "grad_norm": 1.2570395925133155, "learning_rate": 5.091628005282634e-07, "loss": 0.8235, "step": 10063 }, { "epoch": 0.901136045665678, "grad_norm": 0.9532771180583974, "learning_rate": 5.082495393111564e-07, "loss": 0.8272, "step": 10064 }, { "epoch": 0.9012255862107561, "grad_norm": 0.9492457797170617, "learning_rate": 5.073370765091678e-07, "loss": 0.8173, "step": 10065 }, { "epoch": 0.9013151267558341, "grad_norm": 1.0395294566610298, "learning_rate": 5.06425412199052e-07, "loss": 0.7871, "step": 10066 }, { "epoch": 0.9014046673009122, "grad_norm": 1.386683375318751, "learning_rate": 5.055145464574929e-07, "loss": 0.7876, "step": 10067 }, { "epoch": 0.9014942078459902, "grad_norm": 0.9790797269435996, "learning_rate": 5.046044793611126e-07, "loss": 0.8369, "step": 10068 }, { "epoch": 0.9015837483910684, "grad_norm": 1.056691953471654, "learning_rate": 5.036952109864579e-07, "loss": 0.7543, "step": 10069 }, { "epoch": 0.9016732889361464, "grad_norm": 1.1379718579382234, "learning_rate": 5.027867414100163e-07, "loss": 0.8071, "step": 10070 }, { "epoch": 0.9017628294812244, "grad_norm": 0.921164365577179, "learning_rate": 5.018790707082066e-07, "loss": 0.7555, "step": 10071 }, { "epoch": 0.9018523700263026, "grad_norm": 0.9301831413461399, "learning_rate": 5.009721989573779e-07, "loss": 0.776, "step": 10072 }, { "epoch": 0.9019419105713806, "grad_norm": 1.0118199467700413, "learning_rate": 5.000661262338135e-07, "loss": 0.7844, "step": 10073 }, { "epoch": 0.9020314511164587, "grad_norm": 0.8888780205303195, "learning_rate": 4.991608526137293e-07, "loss": 0.7208, "step": 10074 }, { "epoch": 0.9021209916615367, "grad_norm": 1.1829061196171597, "learning_rate": 4.982563781732741e-07, "loss": 0.7916, "step": 10075 }, { "epoch": 0.9022105322066148, "grad_norm": 0.9640349294479128, "learning_rate": 4.973527029885261e-07, "loss": 0.7989, "step": 10076 }, { "epoch": 0.9023000727516929, "grad_norm": 1.04957770290259, "learning_rate": 4.964498271355044e-07, "loss": 0.7915, "step": 10077 }, { "epoch": 0.9023896132967709, "grad_norm": 0.9269261850396617, "learning_rate": 4.95547750690154e-07, "loss": 0.7832, "step": 10078 }, { "epoch": 0.902479153841849, "grad_norm": 1.0523619531204833, "learning_rate": 4.946464737283562e-07, "loss": 0.7505, "step": 10079 }, { "epoch": 0.9025686943869271, "grad_norm": 0.905655591157747, "learning_rate": 4.937459963259206e-07, "loss": 0.7648, "step": 10080 }, { "epoch": 0.9026582349320051, "grad_norm": 1.142042422030418, "learning_rate": 4.92846318558593e-07, "loss": 0.7564, "step": 10081 }, { "epoch": 0.9027477754770832, "grad_norm": 0.9556214693921594, "learning_rate": 4.919474405020519e-07, "loss": 0.8269, "step": 10082 }, { "epoch": 0.9028373160221613, "grad_norm": 1.2390691981544155, "learning_rate": 4.910493622319079e-07, "loss": 0.789, "step": 10083 }, { "epoch": 0.9029268565672394, "grad_norm": 0.9645356460804934, "learning_rate": 4.901520838237062e-07, "loss": 0.8099, "step": 10084 }, { "epoch": 0.9030163971123174, "grad_norm": 0.9711658016216062, "learning_rate": 4.892556053529218e-07, "loss": 0.7426, "step": 10085 }, { "epoch": 0.9031059376573954, "grad_norm": 0.9766055890572426, "learning_rate": 4.883599268949624e-07, "loss": 0.7409, "step": 10086 }, { "epoch": 0.9031954782024736, "grad_norm": 0.9445889788747694, "learning_rate": 4.874650485251697e-07, "loss": 0.752, "step": 10087 }, { "epoch": 0.9032850187475516, "grad_norm": 0.9919993056573089, "learning_rate": 4.865709703188193e-07, "loss": 0.8041, "step": 10088 }, { "epoch": 0.9033745592926297, "grad_norm": 0.9849671556416439, "learning_rate": 4.856776923511164e-07, "loss": 0.8344, "step": 10089 }, { "epoch": 0.9034640998377078, "grad_norm": 1.1437429843570166, "learning_rate": 4.847852146972032e-07, "loss": 0.8529, "step": 10090 }, { "epoch": 0.9035536403827858, "grad_norm": 0.9991252872277344, "learning_rate": 4.838935374321496e-07, "loss": 0.7914, "step": 10091 }, { "epoch": 0.9036431809278639, "grad_norm": 0.9551529646731034, "learning_rate": 4.830026606309623e-07, "loss": 0.8113, "step": 10092 }, { "epoch": 0.9037327214729419, "grad_norm": 1.1020543772302855, "learning_rate": 4.82112584368577e-07, "loss": 0.7484, "step": 10093 }, { "epoch": 0.9038222620180201, "grad_norm": 1.069545370514513, "learning_rate": 4.812233087198659e-07, "loss": 0.845, "step": 10094 }, { "epoch": 0.9039118025630981, "grad_norm": 1.2093081507042924, "learning_rate": 4.803348337596292e-07, "loss": 0.7743, "step": 10095 }, { "epoch": 0.9040013431081761, "grad_norm": 0.9619773413538854, "learning_rate": 4.794471595626071e-07, "loss": 0.7626, "step": 10096 }, { "epoch": 0.9040908836532542, "grad_norm": 0.979422762207851, "learning_rate": 4.785602862034644e-07, "loss": 0.8242, "step": 10097 }, { "epoch": 0.9041804241983323, "grad_norm": 1.072534225404781, "learning_rate": 4.776742137568025e-07, "loss": 0.7809, "step": 10098 }, { "epoch": 0.9042699647434104, "grad_norm": 0.8997036291387532, "learning_rate": 4.767889422971561e-07, "loss": 0.7604, "step": 10099 }, { "epoch": 0.9043595052884884, "grad_norm": 0.9221515491070817, "learning_rate": 4.7590447189899025e-07, "loss": 0.7794, "step": 10100 }, { "epoch": 0.9044490458335666, "grad_norm": 1.0677456446305342, "learning_rate": 4.7502080263670315e-07, "loss": 0.7368, "step": 10101 }, { "epoch": 0.9045385863786446, "grad_norm": 0.9372921038753773, "learning_rate": 4.7413793458462755e-07, "loss": 0.8296, "step": 10102 }, { "epoch": 0.9046281269237226, "grad_norm": 0.9621980107243769, "learning_rate": 4.7325586781702737e-07, "loss": 0.7697, "step": 10103 }, { "epoch": 0.9047176674688007, "grad_norm": 0.9941961937849999, "learning_rate": 4.7237460240809884e-07, "loss": 0.8402, "step": 10104 }, { "epoch": 0.9048072080138788, "grad_norm": 0.9666086497788448, "learning_rate": 4.7149413843197047e-07, "loss": 0.7389, "step": 10105 }, { "epoch": 0.9048967485589569, "grad_norm": 1.0544866015025116, "learning_rate": 4.7061447596270407e-07, "loss": 0.7683, "step": 10106 }, { "epoch": 0.9049862891040349, "grad_norm": 1.1227543198350987, "learning_rate": 4.69735615074296e-07, "loss": 0.8433, "step": 10107 }, { "epoch": 0.905075829649113, "grad_norm": 0.9333528756997943, "learning_rate": 4.688575558406705e-07, "loss": 0.7538, "step": 10108 }, { "epoch": 0.9051653701941911, "grad_norm": 1.0740345105537787, "learning_rate": 4.679802983356885e-07, "loss": 0.8125, "step": 10109 }, { "epoch": 0.9052549107392691, "grad_norm": 0.9514236392400367, "learning_rate": 4.67103842633142e-07, "loss": 0.83, "step": 10110 }, { "epoch": 0.9053444512843472, "grad_norm": 1.0201465676417034, "learning_rate": 4.662281888067555e-07, "loss": 0.8018, "step": 10111 }, { "epoch": 0.9054339918294253, "grad_norm": 1.0368656668849545, "learning_rate": 4.653533369301855e-07, "loss": 0.8218, "step": 10112 }, { "epoch": 0.9055235323745033, "grad_norm": 0.9105575020161973, "learning_rate": 4.644792870770221e-07, "loss": 0.8092, "step": 10113 }, { "epoch": 0.9056130729195814, "grad_norm": 1.0608751803786725, "learning_rate": 4.636060393207886e-07, "loss": 0.808, "step": 10114 }, { "epoch": 0.9057026134646594, "grad_norm": 1.0439123481748924, "learning_rate": 4.6273359373493753e-07, "loss": 0.8156, "step": 10115 }, { "epoch": 0.9057921540097376, "grad_norm": 0.9011035952167434, "learning_rate": 4.61861950392859e-07, "loss": 0.7735, "step": 10116 }, { "epoch": 0.9058816945548156, "grad_norm": 1.0058359741762124, "learning_rate": 4.6099110936786985e-07, "loss": 0.8308, "step": 10117 }, { "epoch": 0.9059712350998936, "grad_norm": 0.9815458544954996, "learning_rate": 4.601210707332238e-07, "loss": 0.8557, "step": 10118 }, { "epoch": 0.9060607756449718, "grad_norm": 0.9465152881641007, "learning_rate": 4.5925183456210665e-07, "loss": 0.7849, "step": 10119 }, { "epoch": 0.9061503161900498, "grad_norm": 0.9082264006103713, "learning_rate": 4.5838340092763444e-07, "loss": 0.7726, "step": 10120 }, { "epoch": 0.9062398567351279, "grad_norm": 1.0123992821896415, "learning_rate": 4.5751576990285654e-07, "loss": 0.804, "step": 10121 }, { "epoch": 0.9063293972802059, "grad_norm": 0.9972070069634262, "learning_rate": 4.566489415607567e-07, "loss": 0.8332, "step": 10122 }, { "epoch": 0.906418937825284, "grad_norm": 1.0163971681306823, "learning_rate": 4.5578291597424995e-07, "loss": 0.7979, "step": 10123 }, { "epoch": 0.9065084783703621, "grad_norm": 1.0401426624712895, "learning_rate": 4.5491769321617916e-07, "loss": 0.8442, "step": 10124 }, { "epoch": 0.9065980189154401, "grad_norm": 0.9318571258489935, "learning_rate": 4.5405327335932946e-07, "loss": 0.8376, "step": 10125 }, { "epoch": 0.9066875594605183, "grad_norm": 1.0706044477015795, "learning_rate": 4.5318965647641153e-07, "loss": 0.8298, "step": 10126 }, { "epoch": 0.9067771000055963, "grad_norm": 1.0621141142874881, "learning_rate": 4.5232684264006845e-07, "loss": 0.7918, "step": 10127 }, { "epoch": 0.9068666405506743, "grad_norm": 1.0272380538193457, "learning_rate": 4.514648319228798e-07, "loss": 0.7986, "step": 10128 }, { "epoch": 0.9069561810957524, "grad_norm": 0.9323940423354363, "learning_rate": 4.5060362439735326e-07, "loss": 0.7786, "step": 10129 }, { "epoch": 0.9070457216408305, "grad_norm": 0.9685156727642298, "learning_rate": 4.4974322013592865e-07, "loss": 0.8252, "step": 10130 }, { "epoch": 0.9071352621859086, "grad_norm": 0.9884631665684678, "learning_rate": 4.4888361921098466e-07, "loss": 0.8122, "step": 10131 }, { "epoch": 0.9072248027309866, "grad_norm": 1.0370423356327139, "learning_rate": 4.4802482169482687e-07, "loss": 0.8216, "step": 10132 }, { "epoch": 0.9073143432760646, "grad_norm": 1.0412646465163535, "learning_rate": 4.471668276596941e-07, "loss": 0.8337, "step": 10133 }, { "epoch": 0.9074038838211428, "grad_norm": 1.1843716308736492, "learning_rate": 4.4630963717775864e-07, "loss": 0.804, "step": 10134 }, { "epoch": 0.9074934243662208, "grad_norm": 0.9016651865895908, "learning_rate": 4.4545325032112284e-07, "loss": 0.7711, "step": 10135 }, { "epoch": 0.9075829649112989, "grad_norm": 0.9886539876548629, "learning_rate": 4.445976671618224e-07, "loss": 0.766, "step": 10136 }, { "epoch": 0.907672505456377, "grad_norm": 1.0446380252464238, "learning_rate": 4.4374288777182973e-07, "loss": 0.8285, "step": 10137 }, { "epoch": 0.907762046001455, "grad_norm": 0.9307301307100393, "learning_rate": 4.42888912223044e-07, "loss": 0.8114, "step": 10138 }, { "epoch": 0.9078515865465331, "grad_norm": 1.0145652387066293, "learning_rate": 4.4203574058730105e-07, "loss": 0.7678, "step": 10139 }, { "epoch": 0.9079411270916111, "grad_norm": 0.9407210564805037, "learning_rate": 4.4118337293636346e-07, "loss": 0.7867, "step": 10140 }, { "epoch": 0.9080306676366893, "grad_norm": 1.0252578729277835, "learning_rate": 4.4033180934193065e-07, "loss": 0.7869, "step": 10141 }, { "epoch": 0.9081202081817673, "grad_norm": 1.0430196845598696, "learning_rate": 4.3948104987563414e-07, "loss": 0.797, "step": 10142 }, { "epoch": 0.9082097487268453, "grad_norm": 0.9958909556896179, "learning_rate": 4.3863109460903554e-07, "loss": 0.7784, "step": 10143 }, { "epoch": 0.9082992892719235, "grad_norm": 0.9211012379629752, "learning_rate": 4.3778194361363323e-07, "loss": 0.788, "step": 10144 }, { "epoch": 0.9083888298170015, "grad_norm": 1.0138301343921006, "learning_rate": 4.369335969608546e-07, "loss": 0.826, "step": 10145 }, { "epoch": 0.9084783703620796, "grad_norm": 0.9057143437302676, "learning_rate": 4.36086054722058e-07, "loss": 0.7801, "step": 10146 }, { "epoch": 0.9085679109071576, "grad_norm": 1.0678102513188796, "learning_rate": 4.352393169685354e-07, "loss": 0.7603, "step": 10147 }, { "epoch": 0.9086574514522358, "grad_norm": 1.3628057258257065, "learning_rate": 4.343933837715131e-07, "loss": 0.798, "step": 10148 }, { "epoch": 0.9087469919973138, "grad_norm": 0.9315675573957879, "learning_rate": 4.335482552021464e-07, "loss": 0.7851, "step": 10149 }, { "epoch": 0.9088365325423918, "grad_norm": 0.9553972345262876, "learning_rate": 4.3270393133152845e-07, "loss": 0.8499, "step": 10150 }, { "epoch": 0.9089260730874699, "grad_norm": 1.0400301828637, "learning_rate": 4.31860412230678e-07, "loss": 0.8176, "step": 10151 }, { "epoch": 0.909015613632548, "grad_norm": 0.9600373634190053, "learning_rate": 4.310176979705505e-07, "loss": 0.8263, "step": 10152 }, { "epoch": 0.909105154177626, "grad_norm": 0.9263208593672583, "learning_rate": 4.301757886220315e-07, "loss": 0.8405, "step": 10153 }, { "epoch": 0.9091946947227041, "grad_norm": 0.928051505557771, "learning_rate": 4.2933468425593984e-07, "loss": 0.8087, "step": 10154 }, { "epoch": 0.9092842352677822, "grad_norm": 1.024351940536971, "learning_rate": 4.284943849430245e-07, "loss": 0.8081, "step": 10155 }, { "epoch": 0.9093737758128603, "grad_norm": 0.9950679271675924, "learning_rate": 4.2765489075397457e-07, "loss": 0.8267, "step": 10156 }, { "epoch": 0.9094633163579383, "grad_norm": 0.9504087689479536, "learning_rate": 4.268162017594002e-07, "loss": 0.7857, "step": 10157 }, { "epoch": 0.9095528569030163, "grad_norm": 1.0887880979556708, "learning_rate": 4.2597831802985044e-07, "loss": 0.8338, "step": 10158 }, { "epoch": 0.9096423974480945, "grad_norm": 0.8642062703817855, "learning_rate": 4.2514123963580564e-07, "loss": 0.8047, "step": 10159 }, { "epoch": 0.9097319379931725, "grad_norm": 0.9896844748287289, "learning_rate": 4.243049666476784e-07, "loss": 0.798, "step": 10160 }, { "epoch": 0.9098214785382506, "grad_norm": 0.9746433301020835, "learning_rate": 4.2346949913581236e-07, "loss": 0.8103, "step": 10161 }, { "epoch": 0.9099110190833287, "grad_norm": 1.0602153839573412, "learning_rate": 4.226348371704858e-07, "loss": 0.7734, "step": 10162 }, { "epoch": 0.9100005596284068, "grad_norm": 1.1885229280885539, "learning_rate": 4.218009808219059e-07, "loss": 0.8571, "step": 10163 }, { "epoch": 0.9100901001734848, "grad_norm": 0.9180805827794428, "learning_rate": 4.2096793016021655e-07, "loss": 0.7572, "step": 10164 }, { "epoch": 0.9101796407185628, "grad_norm": 0.9973093324549029, "learning_rate": 4.201356852554883e-07, "loss": 0.7732, "step": 10165 }, { "epoch": 0.910269181263641, "grad_norm": 1.0270850759923835, "learning_rate": 4.193042461777286e-07, "loss": 0.7463, "step": 10166 }, { "epoch": 0.910358721808719, "grad_norm": 0.906425058716286, "learning_rate": 4.184736129968758e-07, "loss": 0.8019, "step": 10167 }, { "epoch": 0.910448262353797, "grad_norm": 0.9900453178096095, "learning_rate": 4.176437857827986e-07, "loss": 0.7878, "step": 10168 }, { "epoch": 0.9105378028988751, "grad_norm": 0.8977143647306958, "learning_rate": 4.168147646053e-07, "loss": 0.7941, "step": 10169 }, { "epoch": 0.9106273434439532, "grad_norm": 0.9872389206654621, "learning_rate": 4.1598654953411535e-07, "loss": 0.7914, "step": 10170 }, { "epoch": 0.9107168839890313, "grad_norm": 1.0319970666729261, "learning_rate": 4.1515914063890993e-07, "loss": 0.7406, "step": 10171 }, { "epoch": 0.9108064245341093, "grad_norm": 1.0257465439504205, "learning_rate": 4.1433253798928374e-07, "loss": 0.7766, "step": 10172 }, { "epoch": 0.9108959650791875, "grad_norm": 1.034188943002644, "learning_rate": 4.135067416547678e-07, "loss": 0.823, "step": 10173 }, { "epoch": 0.9109855056242655, "grad_norm": 0.9158666103136256, "learning_rate": 4.126817517048243e-07, "loss": 0.762, "step": 10174 }, { "epoch": 0.9110750461693435, "grad_norm": 0.9446223979817144, "learning_rate": 4.11857568208851e-07, "loss": 0.816, "step": 10175 }, { "epoch": 0.9111645867144216, "grad_norm": 0.9531483832959262, "learning_rate": 4.110341912361726e-07, "loss": 0.7757, "step": 10176 }, { "epoch": 0.9112541272594997, "grad_norm": 1.0616937631812924, "learning_rate": 4.102116208560514e-07, "loss": 0.7469, "step": 10177 }, { "epoch": 0.9113436678045778, "grad_norm": 1.0062312838250378, "learning_rate": 4.0938985713767864e-07, "loss": 0.7843, "step": 10178 }, { "epoch": 0.9114332083496558, "grad_norm": 1.0058391514229243, "learning_rate": 4.0856890015017803e-07, "loss": 0.8034, "step": 10179 }, { "epoch": 0.911522748894734, "grad_norm": 0.9376062084849668, "learning_rate": 4.077487499626054e-07, "loss": 0.7801, "step": 10180 }, { "epoch": 0.911612289439812, "grad_norm": 1.0009489326886032, "learning_rate": 4.0692940664395e-07, "loss": 0.8122, "step": 10181 }, { "epoch": 0.91170182998489, "grad_norm": 1.0748307119914122, "learning_rate": 4.061108702631311e-07, "loss": 0.7995, "step": 10182 }, { "epoch": 0.9117913705299681, "grad_norm": 1.0781528884024503, "learning_rate": 4.0529314088900487e-07, "loss": 0.8007, "step": 10183 }, { "epoch": 0.9118809110750462, "grad_norm": 0.990521916376194, "learning_rate": 4.044762185903495e-07, "loss": 0.8206, "step": 10184 }, { "epoch": 0.9119704516201242, "grad_norm": 1.123736018925702, "learning_rate": 4.036601034358878e-07, "loss": 0.7949, "step": 10185 }, { "epoch": 0.9120599921652023, "grad_norm": 0.8628727479393513, "learning_rate": 4.02844795494266e-07, "loss": 0.7432, "step": 10186 }, { "epoch": 0.9121495327102803, "grad_norm": 1.2678452501147484, "learning_rate": 4.0203029483406595e-07, "loss": 0.8428, "step": 10187 }, { "epoch": 0.9122390732553585, "grad_norm": 0.9471598808735879, "learning_rate": 4.0121660152380173e-07, "loss": 0.7749, "step": 10188 }, { "epoch": 0.9123286138004365, "grad_norm": 0.8510305240144038, "learning_rate": 4.0040371563191627e-07, "loss": 0.7768, "step": 10189 }, { "epoch": 0.9124181543455145, "grad_norm": 1.1740486285560436, "learning_rate": 3.995916372267872e-07, "loss": 0.756, "step": 10190 }, { "epoch": 0.9125076948905927, "grad_norm": 1.0382179407886767, "learning_rate": 3.9878036637672535e-07, "loss": 0.8139, "step": 10191 }, { "epoch": 0.9125972354356707, "grad_norm": 1.262502287741117, "learning_rate": 3.9796990314997176e-07, "loss": 0.7537, "step": 10192 }, { "epoch": 0.9126867759807488, "grad_norm": 1.0480601614789575, "learning_rate": 3.9716024761469963e-07, "loss": 0.8344, "step": 10193 }, { "epoch": 0.9127763165258268, "grad_norm": 1.0270592052910643, "learning_rate": 3.963513998390156e-07, "loss": 0.799, "step": 10194 }, { "epoch": 0.912865857070905, "grad_norm": 1.0804660201421288, "learning_rate": 3.955433598909553e-07, "loss": 0.7522, "step": 10195 }, { "epoch": 0.912955397615983, "grad_norm": 1.009607226771179, "learning_rate": 3.947361278384898e-07, "loss": 0.8016, "step": 10196 }, { "epoch": 0.913044938161061, "grad_norm": 0.9831734748289264, "learning_rate": 3.9392970374951935e-07, "loss": 0.8159, "step": 10197 }, { "epoch": 0.9131344787061392, "grad_norm": 0.9031422659253104, "learning_rate": 3.931240876918796e-07, "loss": 0.7399, "step": 10198 }, { "epoch": 0.9132240192512172, "grad_norm": 1.141100402046188, "learning_rate": 3.923192797333375e-07, "loss": 0.7608, "step": 10199 }, { "epoch": 0.9133135597962952, "grad_norm": 0.9437089718228813, "learning_rate": 3.915152799415867e-07, "loss": 0.7851, "step": 10200 }, { "epoch": 0.9134031003413733, "grad_norm": 1.0448268550414508, "learning_rate": 3.9071208838426077e-07, "loss": 0.8008, "step": 10201 }, { "epoch": 0.9134926408864514, "grad_norm": 1.3473923993908299, "learning_rate": 3.899097051289191e-07, "loss": 0.798, "step": 10202 }, { "epoch": 0.9135821814315295, "grad_norm": 0.8617714468052282, "learning_rate": 3.891081302430555e-07, "loss": 0.709, "step": 10203 }, { "epoch": 0.9136717219766075, "grad_norm": 0.9085286574816174, "learning_rate": 3.883073637940982e-07, "loss": 0.7427, "step": 10204 }, { "epoch": 0.9137612625216855, "grad_norm": 0.9788919857980458, "learning_rate": 3.875074058494055e-07, "loss": 0.8121, "step": 10205 }, { "epoch": 0.9138508030667637, "grad_norm": 0.9906544647316243, "learning_rate": 3.867082564762636e-07, "loss": 0.8012, "step": 10206 }, { "epoch": 0.9139403436118417, "grad_norm": 0.9351380561191347, "learning_rate": 3.859099157418966e-07, "loss": 0.7901, "step": 10207 }, { "epoch": 0.9140298841569198, "grad_norm": 1.0301687794620427, "learning_rate": 3.851123837134585e-07, "loss": 0.7747, "step": 10208 }, { "epoch": 0.9141194247019979, "grad_norm": 0.950858494470719, "learning_rate": 3.8431566045803335e-07, "loss": 0.8272, "step": 10209 }, { "epoch": 0.914208965247076, "grad_norm": 0.9586689899143684, "learning_rate": 3.835197460426421e-07, "loss": 0.8256, "step": 10210 }, { "epoch": 0.914298505792154, "grad_norm": 1.095436161544864, "learning_rate": 3.8272464053423106e-07, "loss": 0.8512, "step": 10211 }, { "epoch": 0.914388046337232, "grad_norm": 1.005235550853481, "learning_rate": 3.8193034399968465e-07, "loss": 0.7989, "step": 10212 }, { "epoch": 0.9144775868823102, "grad_norm": 0.974354920986707, "learning_rate": 3.811368565058138e-07, "loss": 0.7909, "step": 10213 }, { "epoch": 0.9145671274273882, "grad_norm": 1.0260295532449546, "learning_rate": 3.803441781193662e-07, "loss": 0.8174, "step": 10214 }, { "epoch": 0.9146566679724663, "grad_norm": 1.023667998690655, "learning_rate": 3.7955230890701743e-07, "loss": 0.8216, "step": 10215 }, { "epoch": 0.9147462085175444, "grad_norm": 0.9335557702771213, "learning_rate": 3.7876124893538093e-07, "loss": 0.8065, "step": 10216 }, { "epoch": 0.9148357490626224, "grad_norm": 1.0419615955520867, "learning_rate": 3.779709982709945e-07, "loss": 0.8072, "step": 10217 }, { "epoch": 0.9149252896077005, "grad_norm": 1.0283557518177207, "learning_rate": 3.7718155698033166e-07, "loss": 0.8066, "step": 10218 }, { "epoch": 0.9150148301527785, "grad_norm": 1.4801867096547554, "learning_rate": 3.763929251297982e-07, "loss": 0.8334, "step": 10219 }, { "epoch": 0.9151043706978567, "grad_norm": 0.9288379508981409, "learning_rate": 3.7560510278573215e-07, "loss": 0.7398, "step": 10220 }, { "epoch": 0.9151939112429347, "grad_norm": 0.90143203968346, "learning_rate": 3.748180900144016e-07, "loss": 0.7875, "step": 10221 }, { "epoch": 0.9152834517880127, "grad_norm": 1.0819278691546608, "learning_rate": 3.7403188688200697e-07, "loss": 0.8123, "step": 10222 }, { "epoch": 0.9153729923330908, "grad_norm": 0.961700817108955, "learning_rate": 3.73246493454682e-07, "loss": 0.8046, "step": 10223 }, { "epoch": 0.9154625328781689, "grad_norm": 0.9379860371131326, "learning_rate": 3.7246190979849164e-07, "loss": 0.7576, "step": 10224 }, { "epoch": 0.915552073423247, "grad_norm": 0.9357138820755269, "learning_rate": 3.7167813597943305e-07, "loss": 0.837, "step": 10225 }, { "epoch": 0.915641613968325, "grad_norm": 0.9985275547669544, "learning_rate": 3.708951720634324e-07, "loss": 0.7893, "step": 10226 }, { "epoch": 0.9157311545134031, "grad_norm": 0.9334485696251853, "learning_rate": 3.701130181163515e-07, "loss": 0.7995, "step": 10227 }, { "epoch": 0.9158206950584812, "grad_norm": 0.959824260452471, "learning_rate": 3.693316742039832e-07, "loss": 0.8478, "step": 10228 }, { "epoch": 0.9159102356035592, "grad_norm": 1.076958330532077, "learning_rate": 3.6855114039205164e-07, "loss": 0.7893, "step": 10229 }, { "epoch": 0.9159997761486373, "grad_norm": 0.9601203776817674, "learning_rate": 3.6777141674621095e-07, "loss": 0.7753, "step": 10230 }, { "epoch": 0.9160893166937154, "grad_norm": 0.9981316968393739, "learning_rate": 3.6699250333204984e-07, "loss": 0.7932, "step": 10231 }, { "epoch": 0.9161788572387934, "grad_norm": 1.1356403369756547, "learning_rate": 3.6621440021508916e-07, "loss": 0.8022, "step": 10232 }, { "epoch": 0.9162683977838715, "grad_norm": 0.9584375790747502, "learning_rate": 3.654371074607788e-07, "loss": 0.8468, "step": 10233 }, { "epoch": 0.9163579383289496, "grad_norm": 0.8981791017133977, "learning_rate": 3.646606251345031e-07, "loss": 0.7901, "step": 10234 }, { "epoch": 0.9164474788740277, "grad_norm": 1.0212996232050626, "learning_rate": 3.638849533015776e-07, "loss": 0.8039, "step": 10235 }, { "epoch": 0.9165370194191057, "grad_norm": 0.9762218321327993, "learning_rate": 3.631100920272479e-07, "loss": 0.8268, "step": 10236 }, { "epoch": 0.9166265599641837, "grad_norm": 0.9567346393948778, "learning_rate": 3.62336041376693e-07, "loss": 0.7728, "step": 10237 }, { "epoch": 0.9167161005092619, "grad_norm": 0.919520551745331, "learning_rate": 3.615628014150241e-07, "loss": 0.7774, "step": 10238 }, { "epoch": 0.9168056410543399, "grad_norm": 1.0762646118226438, "learning_rate": 3.6079037220728475e-07, "loss": 0.7836, "step": 10239 }, { "epoch": 0.916895181599418, "grad_norm": 1.0924485726285225, "learning_rate": 3.600187538184463e-07, "loss": 0.8134, "step": 10240 }, { "epoch": 0.916984722144496, "grad_norm": 1.0753690952790407, "learning_rate": 3.5924794631341797e-07, "loss": 0.8113, "step": 10241 }, { "epoch": 0.9170742626895741, "grad_norm": 0.9986894702516604, "learning_rate": 3.584779497570345e-07, "loss": 0.7979, "step": 10242 }, { "epoch": 0.9171638032346522, "grad_norm": 0.9876691529749223, "learning_rate": 3.5770876421406974e-07, "loss": 0.7847, "step": 10243 }, { "epoch": 0.9172533437797302, "grad_norm": 0.9212064134206783, "learning_rate": 3.5694038974921854e-07, "loss": 0.7636, "step": 10244 }, { "epoch": 0.9173428843248084, "grad_norm": 1.0202913456382599, "learning_rate": 3.5617282642712025e-07, "loss": 0.7814, "step": 10245 }, { "epoch": 0.9174324248698864, "grad_norm": 0.9392893567436356, "learning_rate": 3.5540607431233665e-07, "loss": 0.7895, "step": 10246 }, { "epoch": 0.9175219654149644, "grad_norm": 0.9483487718985076, "learning_rate": 3.546401334693661e-07, "loss": 0.8258, "step": 10247 }, { "epoch": 0.9176115059600425, "grad_norm": 0.9518491887397156, "learning_rate": 3.5387500396263486e-07, "loss": 0.7746, "step": 10248 }, { "epoch": 0.9177010465051206, "grad_norm": 0.962848039465841, "learning_rate": 3.5311068585650697e-07, "loss": 0.7786, "step": 10249 }, { "epoch": 0.9177905870501987, "grad_norm": 0.9081570365689157, "learning_rate": 3.5234717921526997e-07, "loss": 0.763, "step": 10250 }, { "epoch": 0.9178801275952767, "grad_norm": 1.1049134053822405, "learning_rate": 3.5158448410314796e-07, "loss": 0.8214, "step": 10251 }, { "epoch": 0.9179696681403549, "grad_norm": 0.9728374112404232, "learning_rate": 3.508226005842996e-07, "loss": 0.7774, "step": 10252 }, { "epoch": 0.9180592086854329, "grad_norm": 1.1548049432173089, "learning_rate": 3.500615287228093e-07, "loss": 0.8208, "step": 10253 }, { "epoch": 0.9181487492305109, "grad_norm": 1.054341147029551, "learning_rate": 3.493012685826991e-07, "loss": 0.8193, "step": 10254 }, { "epoch": 0.918238289775589, "grad_norm": 0.9251262072918132, "learning_rate": 3.485418202279156e-07, "loss": 0.829, "step": 10255 }, { "epoch": 0.9183278303206671, "grad_norm": 0.9895891884770561, "learning_rate": 3.477831837223433e-07, "loss": 0.7604, "step": 10256 }, { "epoch": 0.9184173708657452, "grad_norm": 1.0290115796664598, "learning_rate": 3.470253591297945e-07, "loss": 0.799, "step": 10257 }, { "epoch": 0.9185069114108232, "grad_norm": 1.0921393300558306, "learning_rate": 3.462683465140182e-07, "loss": 0.8361, "step": 10258 }, { "epoch": 0.9185964519559012, "grad_norm": 1.6036710637757527, "learning_rate": 3.455121459386901e-07, "loss": 0.8026, "step": 10259 }, { "epoch": 0.9186859925009794, "grad_norm": 0.9964055039713121, "learning_rate": 3.447567574674193e-07, "loss": 0.8651, "step": 10260 }, { "epoch": 0.9187755330460574, "grad_norm": 1.1435343128440822, "learning_rate": 3.4400218116374505e-07, "loss": 0.8444, "step": 10261 }, { "epoch": 0.9188650735911355, "grad_norm": 0.9001133359565945, "learning_rate": 3.432484170911421e-07, "loss": 0.8203, "step": 10262 }, { "epoch": 0.9189546141362136, "grad_norm": 0.9802295841943113, "learning_rate": 3.4249546531301194e-07, "loss": 0.7422, "step": 10263 }, { "epoch": 0.9190441546812916, "grad_norm": 1.2147861103605762, "learning_rate": 3.417433258926939e-07, "loss": 0.794, "step": 10264 }, { "epoch": 0.9191336952263697, "grad_norm": 1.6637536706191507, "learning_rate": 3.4099199889345515e-07, "loss": 0.7764, "step": 10265 }, { "epoch": 0.9192232357714477, "grad_norm": 1.0152571170785933, "learning_rate": 3.4024148437849293e-07, "loss": 0.816, "step": 10266 }, { "epoch": 0.9193127763165259, "grad_norm": 0.9804225187867743, "learning_rate": 3.394917824109378e-07, "loss": 0.7826, "step": 10267 }, { "epoch": 0.9194023168616039, "grad_norm": 0.8952562960633743, "learning_rate": 3.3874289305385387e-07, "loss": 0.7971, "step": 10268 }, { "epoch": 0.9194918574066819, "grad_norm": 1.0381131654830873, "learning_rate": 3.379948163702329e-07, "loss": 0.8116, "step": 10269 }, { "epoch": 0.9195813979517601, "grad_norm": 1.0244903444210625, "learning_rate": 3.3724755242300454e-07, "loss": 0.809, "step": 10270 }, { "epoch": 0.9196709384968381, "grad_norm": 0.9754442902640937, "learning_rate": 3.36501101275023e-07, "loss": 0.8143, "step": 10271 }, { "epoch": 0.9197604790419162, "grad_norm": 0.9811801563602721, "learning_rate": 3.3575546298907914e-07, "loss": 0.7601, "step": 10272 }, { "epoch": 0.9198500195869942, "grad_norm": 0.9697593878880849, "learning_rate": 3.3501063762789167e-07, "loss": 0.8295, "step": 10273 }, { "epoch": 0.9199395601320723, "grad_norm": 1.1223603139606662, "learning_rate": 3.342666252541149e-07, "loss": 0.8063, "step": 10274 }, { "epoch": 0.9200291006771504, "grad_norm": 0.9491278929281788, "learning_rate": 3.335234259303299e-07, "loss": 0.7802, "step": 10275 }, { "epoch": 0.9201186412222284, "grad_norm": 1.169816092425431, "learning_rate": 3.3278103971905787e-07, "loss": 0.8143, "step": 10276 }, { "epoch": 0.9202081817673065, "grad_norm": 1.160915738208693, "learning_rate": 3.320394666827398e-07, "loss": 0.8234, "step": 10277 }, { "epoch": 0.9202977223123846, "grad_norm": 0.9200495827881482, "learning_rate": 3.31298706883757e-07, "loss": 0.786, "step": 10278 }, { "epoch": 0.9203872628574626, "grad_norm": 1.0247845313753299, "learning_rate": 3.3055876038441957e-07, "loss": 0.7649, "step": 10279 }, { "epoch": 0.9204768034025407, "grad_norm": 0.9226900042092755, "learning_rate": 3.298196272469689e-07, "loss": 0.7964, "step": 10280 }, { "epoch": 0.9205663439476188, "grad_norm": 1.037153589774467, "learning_rate": 3.290813075335797e-07, "loss": 0.8249, "step": 10281 }, { "epoch": 0.9206558844926969, "grad_norm": 1.0114486902126962, "learning_rate": 3.283438013063567e-07, "loss": 0.7942, "step": 10282 }, { "epoch": 0.9207454250377749, "grad_norm": 0.9007216029574396, "learning_rate": 3.276071086273347e-07, "loss": 0.8047, "step": 10283 }, { "epoch": 0.9208349655828529, "grad_norm": 0.944909183193198, "learning_rate": 3.2687122955848416e-07, "loss": 0.8241, "step": 10284 }, { "epoch": 0.9209245061279311, "grad_norm": 0.9951093603143992, "learning_rate": 3.2613616416170334e-07, "loss": 0.761, "step": 10285 }, { "epoch": 0.9210140466730091, "grad_norm": 0.9956023000366161, "learning_rate": 3.25401912498825e-07, "loss": 0.7973, "step": 10286 }, { "epoch": 0.9211035872180872, "grad_norm": 1.1392744362956087, "learning_rate": 3.246684746316109e-07, "loss": 0.7902, "step": 10287 }, { "epoch": 0.9211931277631653, "grad_norm": 1.059350103027795, "learning_rate": 3.239358506217549e-07, "loss": 0.8146, "step": 10288 }, { "epoch": 0.9212826683082433, "grad_norm": 0.9903693958007836, "learning_rate": 3.232040405308845e-07, "loss": 0.7529, "step": 10289 }, { "epoch": 0.9213722088533214, "grad_norm": 0.9631068734171713, "learning_rate": 3.224730444205559e-07, "loss": 0.8133, "step": 10290 }, { "epoch": 0.9214617493983994, "grad_norm": 0.9861696943105579, "learning_rate": 3.2174286235225895e-07, "loss": 0.8438, "step": 10291 }, { "epoch": 0.9215512899434776, "grad_norm": 1.1105409002779312, "learning_rate": 3.2101349438741324e-07, "loss": 0.8627, "step": 10292 }, { "epoch": 0.9216408304885556, "grad_norm": 0.9394942590410027, "learning_rate": 3.20284940587372e-07, "loss": 0.8236, "step": 10293 }, { "epoch": 0.9217303710336336, "grad_norm": 1.0301808724016983, "learning_rate": 3.195572010134185e-07, "loss": 0.7757, "step": 10294 }, { "epoch": 0.9218199115787117, "grad_norm": 0.9860910615224818, "learning_rate": 3.1883027572676697e-07, "loss": 0.8276, "step": 10295 }, { "epoch": 0.9219094521237898, "grad_norm": 0.9416708530185449, "learning_rate": 3.181041647885641e-07, "loss": 0.7481, "step": 10296 }, { "epoch": 0.9219989926688679, "grad_norm": 0.8926805369350793, "learning_rate": 3.1737886825988995e-07, "loss": 0.7517, "step": 10297 }, { "epoch": 0.9220885332139459, "grad_norm": 0.8924847220579502, "learning_rate": 3.166543862017513e-07, "loss": 0.7453, "step": 10298 }, { "epoch": 0.922178073759024, "grad_norm": 1.0101931714195473, "learning_rate": 3.159307186750915e-07, "loss": 0.791, "step": 10299 }, { "epoch": 0.9222676143041021, "grad_norm": 1.030110910704043, "learning_rate": 3.152078657407809e-07, "loss": 0.7684, "step": 10300 }, { "epoch": 0.9223571548491801, "grad_norm": 0.944199747066465, "learning_rate": 3.144858274596263e-07, "loss": 0.8038, "step": 10301 }, { "epoch": 0.9224466953942582, "grad_norm": 0.9859854548067637, "learning_rate": 3.137646038923603e-07, "loss": 0.8124, "step": 10302 }, { "epoch": 0.9225362359393363, "grad_norm": 1.0820241874930916, "learning_rate": 3.1304419509965324e-07, "loss": 0.7333, "step": 10303 }, { "epoch": 0.9226257764844144, "grad_norm": 0.9875040169485647, "learning_rate": 3.1232460114209994e-07, "loss": 0.8067, "step": 10304 }, { "epoch": 0.9227153170294924, "grad_norm": 1.059337545399125, "learning_rate": 3.116058220802309e-07, "loss": 0.7892, "step": 10305 }, { "epoch": 0.9228048575745705, "grad_norm": 1.1345716548424762, "learning_rate": 3.1088785797451004e-07, "loss": 0.77, "step": 10306 }, { "epoch": 0.9228943981196486, "grad_norm": 0.9082387943931942, "learning_rate": 3.1017070888532895e-07, "loss": 0.7913, "step": 10307 }, { "epoch": 0.9229839386647266, "grad_norm": 1.0800572438110203, "learning_rate": 3.0945437487301054e-07, "loss": 0.8557, "step": 10308 }, { "epoch": 0.9230734792098046, "grad_norm": 0.9727558260436585, "learning_rate": 3.0873885599781326e-07, "loss": 0.8255, "step": 10309 }, { "epoch": 0.9231630197548828, "grad_norm": 1.122570728119037, "learning_rate": 3.080241523199212e-07, "loss": 0.7331, "step": 10310 }, { "epoch": 0.9232525602999608, "grad_norm": 0.9674791684600307, "learning_rate": 3.07310263899453e-07, "loss": 0.8036, "step": 10311 }, { "epoch": 0.9233421008450389, "grad_norm": 0.9632815091071751, "learning_rate": 3.0659719079646045e-07, "loss": 0.7544, "step": 10312 }, { "epoch": 0.9234316413901169, "grad_norm": 0.9610519144862579, "learning_rate": 3.058849330709246e-07, "loss": 0.8489, "step": 10313 }, { "epoch": 0.9235211819351951, "grad_norm": 0.9901522043304055, "learning_rate": 3.051734907827586e-07, "loss": 0.7595, "step": 10314 }, { "epoch": 0.9236107224802731, "grad_norm": 1.2710529545449645, "learning_rate": 3.0446286399180567e-07, "loss": 0.8001, "step": 10315 }, { "epoch": 0.9237002630253511, "grad_norm": 0.9065470831554353, "learning_rate": 3.037530527578414e-07, "loss": 0.7901, "step": 10316 }, { "epoch": 0.9237898035704293, "grad_norm": 0.9823717561159129, "learning_rate": 3.030440571405724e-07, "loss": 0.8213, "step": 10317 }, { "epoch": 0.9238793441155073, "grad_norm": 0.9493127558503596, "learning_rate": 3.0233587719963875e-07, "loss": 0.8097, "step": 10318 }, { "epoch": 0.9239688846605854, "grad_norm": 0.9483944054844584, "learning_rate": 3.0162851299460836e-07, "loss": 0.7701, "step": 10319 }, { "epoch": 0.9240584252056634, "grad_norm": 0.9532077109712128, "learning_rate": 3.009219645849859e-07, "loss": 0.8277, "step": 10320 }, { "epoch": 0.9241479657507415, "grad_norm": 1.1378269722144438, "learning_rate": 3.0021623203019933e-07, "loss": 0.7869, "step": 10321 }, { "epoch": 0.9242375062958196, "grad_norm": 0.9190880425942959, "learning_rate": 2.9951131538961453e-07, "loss": 0.7729, "step": 10322 }, { "epoch": 0.9243270468408976, "grad_norm": 0.9922502567049982, "learning_rate": 2.9880721472252627e-07, "loss": 0.8062, "step": 10323 }, { "epoch": 0.9244165873859758, "grad_norm": 0.9252633115390057, "learning_rate": 2.9810393008816275e-07, "loss": 0.7611, "step": 10324 }, { "epoch": 0.9245061279310538, "grad_norm": 0.9539961758438952, "learning_rate": 2.974014615456822e-07, "loss": 0.777, "step": 10325 }, { "epoch": 0.9245956684761318, "grad_norm": 0.9429084171187704, "learning_rate": 2.9669980915417175e-07, "loss": 0.7313, "step": 10326 }, { "epoch": 0.9246852090212099, "grad_norm": 1.0522027215298702, "learning_rate": 2.959989729726531e-07, "loss": 0.7939, "step": 10327 }, { "epoch": 0.924774749566288, "grad_norm": 1.0881795568192887, "learning_rate": 2.9529895306007805e-07, "loss": 0.8458, "step": 10328 }, { "epoch": 0.9248642901113661, "grad_norm": 0.9690530505580998, "learning_rate": 2.945997494753294e-07, "loss": 0.8775, "step": 10329 }, { "epoch": 0.9249538306564441, "grad_norm": 0.909305688869949, "learning_rate": 2.9390136227722464e-07, "loss": 0.7908, "step": 10330 }, { "epoch": 0.9250433712015221, "grad_norm": 1.0150985933068009, "learning_rate": 2.9320379152450783e-07, "loss": 0.8191, "step": 10331 }, { "epoch": 0.9251329117466003, "grad_norm": 0.9587437953238741, "learning_rate": 2.925070372758565e-07, "loss": 0.7941, "step": 10332 }, { "epoch": 0.9252224522916783, "grad_norm": 1.120583201473498, "learning_rate": 2.9181109958987817e-07, "loss": 0.7995, "step": 10333 }, { "epoch": 0.9253119928367564, "grad_norm": 0.8789668274939847, "learning_rate": 2.9111597852511495e-07, "loss": 0.7506, "step": 10334 }, { "epoch": 0.9254015333818345, "grad_norm": 1.0986920550165604, "learning_rate": 2.9042167414003674e-07, "loss": 0.7957, "step": 10335 }, { "epoch": 0.9254910739269125, "grad_norm": 1.1090794969060618, "learning_rate": 2.897281864930468e-07, "loss": 0.8128, "step": 10336 }, { "epoch": 0.9255806144719906, "grad_norm": 1.1587259788149673, "learning_rate": 2.8903551564247956e-07, "loss": 0.8103, "step": 10337 }, { "epoch": 0.9256701550170686, "grad_norm": 1.0380396401821168, "learning_rate": 2.883436616465984e-07, "loss": 0.7883, "step": 10338 }, { "epoch": 0.9257596955621468, "grad_norm": 0.9692384434054838, "learning_rate": 2.876526245636013e-07, "loss": 0.766, "step": 10339 }, { "epoch": 0.9258492361072248, "grad_norm": 1.0648099914291804, "learning_rate": 2.8696240445161617e-07, "loss": 0.8155, "step": 10340 }, { "epoch": 0.9259387766523028, "grad_norm": 0.9918486308179162, "learning_rate": 2.862730013687021e-07, "loss": 0.7431, "step": 10341 }, { "epoch": 0.926028317197381, "grad_norm": 1.059412567669058, "learning_rate": 2.855844153728482e-07, "loss": 0.8154, "step": 10342 }, { "epoch": 0.926117857742459, "grad_norm": 1.0525522720107576, "learning_rate": 2.8489664652197714e-07, "loss": 0.8666, "step": 10343 }, { "epoch": 0.9262073982875371, "grad_norm": 1.012495694735497, "learning_rate": 2.8420969487394147e-07, "loss": 0.7846, "step": 10344 }, { "epoch": 0.9262969388326151, "grad_norm": 0.976706633798738, "learning_rate": 2.835235604865261e-07, "loss": 0.7858, "step": 10345 }, { "epoch": 0.9263864793776933, "grad_norm": 0.9018973504000815, "learning_rate": 2.828382434174448e-07, "loss": 0.7854, "step": 10346 }, { "epoch": 0.9264760199227713, "grad_norm": 0.970085026579723, "learning_rate": 2.821537437243449e-07, "loss": 0.794, "step": 10347 }, { "epoch": 0.9265655604678493, "grad_norm": 1.0037054561559644, "learning_rate": 2.8147006146480584e-07, "loss": 0.8138, "step": 10348 }, { "epoch": 0.9266551010129274, "grad_norm": 0.8810066634005758, "learning_rate": 2.8078719669633383e-07, "loss": 0.7904, "step": 10349 }, { "epoch": 0.9267446415580055, "grad_norm": 1.0135292821936814, "learning_rate": 2.8010514947637177e-07, "loss": 0.8122, "step": 10350 }, { "epoch": 0.9268341821030835, "grad_norm": 0.9306043484605416, "learning_rate": 2.794239198622906e-07, "loss": 0.7808, "step": 10351 }, { "epoch": 0.9269237226481616, "grad_norm": 1.0871645898060174, "learning_rate": 2.7874350791139203e-07, "loss": 0.8072, "step": 10352 }, { "epoch": 0.9270132631932397, "grad_norm": 1.126376367334948, "learning_rate": 2.7806391368091046e-07, "loss": 0.7444, "step": 10353 }, { "epoch": 0.9271028037383178, "grad_norm": 1.058420830710669, "learning_rate": 2.773851372280123e-07, "loss": 0.7458, "step": 10354 }, { "epoch": 0.9271923442833958, "grad_norm": 1.0891769264821776, "learning_rate": 2.76707178609793e-07, "loss": 0.7713, "step": 10355 }, { "epoch": 0.9272818848284738, "grad_norm": 0.9484715847668866, "learning_rate": 2.760300378832803e-07, "loss": 0.7573, "step": 10356 }, { "epoch": 0.927371425373552, "grad_norm": 0.9158990676619223, "learning_rate": 2.753537151054342e-07, "loss": 0.8169, "step": 10357 }, { "epoch": 0.92746096591863, "grad_norm": 0.9800790731279037, "learning_rate": 2.7467821033314466e-07, "loss": 0.7589, "step": 10358 }, { "epoch": 0.9275505064637081, "grad_norm": 0.995090075901634, "learning_rate": 2.740035236232297e-07, "loss": 0.7251, "step": 10359 }, { "epoch": 0.9276400470087862, "grad_norm": 0.9738402382800051, "learning_rate": 2.733296550324449e-07, "loss": 0.7655, "step": 10360 }, { "epoch": 0.9277295875538643, "grad_norm": 1.1565707429976642, "learning_rate": 2.726566046174739e-07, "loss": 0.7754, "step": 10361 }, { "epoch": 0.9278191280989423, "grad_norm": 0.8982566934106699, "learning_rate": 2.7198437243493025e-07, "loss": 0.7621, "step": 10362 }, { "epoch": 0.9279086686440203, "grad_norm": 1.0640298414119185, "learning_rate": 2.71312958541361e-07, "loss": 0.8194, "step": 10363 }, { "epoch": 0.9279982091890985, "grad_norm": 0.9072130316880089, "learning_rate": 2.706423629932431e-07, "loss": 0.7721, "step": 10364 }, { "epoch": 0.9280877497341765, "grad_norm": 0.9423744965404699, "learning_rate": 2.699725858469826e-07, "loss": 0.8501, "step": 10365 }, { "epoch": 0.9281772902792546, "grad_norm": 1.1217944256901895, "learning_rate": 2.693036271589222e-07, "loss": 0.8126, "step": 10366 }, { "epoch": 0.9282668308243326, "grad_norm": 0.9436714953809203, "learning_rate": 2.686354869853303e-07, "loss": 0.7585, "step": 10367 }, { "epoch": 0.9283563713694107, "grad_norm": 1.039714796580645, "learning_rate": 2.6796816538241065e-07, "loss": 0.857, "step": 10368 }, { "epoch": 0.9284459119144888, "grad_norm": 1.014289499471726, "learning_rate": 2.673016624062963e-07, "loss": 0.8392, "step": 10369 }, { "epoch": 0.9285354524595668, "grad_norm": 0.9231815213737606, "learning_rate": 2.6663597811304897e-07, "loss": 0.7816, "step": 10370 }, { "epoch": 0.928624993004645, "grad_norm": 0.9471998627660329, "learning_rate": 2.659711125586628e-07, "loss": 0.7977, "step": 10371 }, { "epoch": 0.928714533549723, "grad_norm": 0.896761752933092, "learning_rate": 2.6530706579906863e-07, "loss": 0.7985, "step": 10372 }, { "epoch": 0.928804074094801, "grad_norm": 0.9602115457422237, "learning_rate": 2.646438378901217e-07, "loss": 0.7916, "step": 10373 }, { "epoch": 0.9288936146398791, "grad_norm": 0.9579676116823409, "learning_rate": 2.639814288876108e-07, "loss": 0.8112, "step": 10374 }, { "epoch": 0.9289831551849572, "grad_norm": 0.9668924661263862, "learning_rate": 2.6331983884725466e-07, "loss": 0.8005, "step": 10375 }, { "epoch": 0.9290726957300353, "grad_norm": 0.9538131929728025, "learning_rate": 2.626590678247043e-07, "loss": 0.8422, "step": 10376 }, { "epoch": 0.9291622362751133, "grad_norm": 1.0596792609344523, "learning_rate": 2.6199911587554197e-07, "loss": 0.7663, "step": 10377 }, { "epoch": 0.9292517768201914, "grad_norm": 0.9471124023156818, "learning_rate": 2.6133998305528094e-07, "loss": 0.7525, "step": 10378 }, { "epoch": 0.9293413173652695, "grad_norm": 0.9745855380654572, "learning_rate": 2.606816694193648e-07, "loss": 0.7865, "step": 10379 }, { "epoch": 0.9294308579103475, "grad_norm": 1.0311893063596922, "learning_rate": 2.6002417502317024e-07, "loss": 0.7812, "step": 10380 }, { "epoch": 0.9295203984554256, "grad_norm": 0.9572733969795126, "learning_rate": 2.5936749992200193e-07, "loss": 0.7657, "step": 10381 }, { "epoch": 0.9296099390005037, "grad_norm": 1.2680803777364593, "learning_rate": 2.5871164417109796e-07, "loss": 0.8401, "step": 10382 }, { "epoch": 0.9296994795455817, "grad_norm": 1.030995776714883, "learning_rate": 2.5805660782562524e-07, "loss": 0.7937, "step": 10383 }, { "epoch": 0.9297890200906598, "grad_norm": 0.9855559696009664, "learning_rate": 2.574023909406853e-07, "loss": 0.7523, "step": 10384 }, { "epoch": 0.9298785606357378, "grad_norm": 1.0349853667210318, "learning_rate": 2.5674899357130855e-07, "loss": 0.8233, "step": 10385 }, { "epoch": 0.929968101180816, "grad_norm": 0.9069962726098078, "learning_rate": 2.560964157724555e-07, "loss": 0.8279, "step": 10386 }, { "epoch": 0.930057641725894, "grad_norm": 0.9962518001381842, "learning_rate": 2.554446575990199e-07, "loss": 0.8293, "step": 10387 }, { "epoch": 0.930147182270972, "grad_norm": 1.045553853091121, "learning_rate": 2.547937191058247e-07, "loss": 0.7602, "step": 10388 }, { "epoch": 0.9302367228160502, "grad_norm": 1.0831403246692417, "learning_rate": 2.54143600347625e-07, "loss": 0.8274, "step": 10389 }, { "epoch": 0.9303262633611282, "grad_norm": 0.9766537188611281, "learning_rate": 2.534943013791069e-07, "loss": 0.8449, "step": 10390 }, { "epoch": 0.9304158039062063, "grad_norm": 0.9600251548628375, "learning_rate": 2.5284582225488685e-07, "loss": 0.7819, "step": 10391 }, { "epoch": 0.9305053444512843, "grad_norm": 1.007578000811047, "learning_rate": 2.521981630295134e-07, "loss": 0.8106, "step": 10392 }, { "epoch": 0.9305948849963624, "grad_norm": 1.0009351910819597, "learning_rate": 2.515513237574663e-07, "loss": 0.8332, "step": 10393 }, { "epoch": 0.9306844255414405, "grad_norm": 1.8080047228168352, "learning_rate": 2.509053044931531e-07, "loss": 0.7763, "step": 10394 }, { "epoch": 0.9307739660865185, "grad_norm": 0.9762181347922312, "learning_rate": 2.5026010529091704e-07, "loss": 0.8537, "step": 10395 }, { "epoch": 0.9308635066315967, "grad_norm": 0.9663398719882882, "learning_rate": 2.496157262050292e-07, "loss": 0.7595, "step": 10396 }, { "epoch": 0.9309530471766747, "grad_norm": 1.037134227478139, "learning_rate": 2.4897216728969274e-07, "loss": 0.8423, "step": 10397 }, { "epoch": 0.9310425877217527, "grad_norm": 0.9161485366988351, "learning_rate": 2.4832942859904117e-07, "loss": 0.7623, "step": 10398 }, { "epoch": 0.9311321282668308, "grad_norm": 1.0167785297189118, "learning_rate": 2.476875101871412e-07, "loss": 0.8529, "step": 10399 }, { "epoch": 0.9312216688119089, "grad_norm": 0.9052264599008792, "learning_rate": 2.4704641210798853e-07, "loss": 0.7476, "step": 10400 }, { "epoch": 0.931311209356987, "grad_norm": 1.050125075135331, "learning_rate": 2.464061344155089e-07, "loss": 0.8333, "step": 10401 }, { "epoch": 0.931400749902065, "grad_norm": 0.930114328107802, "learning_rate": 2.457666771635614e-07, "loss": 0.7778, "step": 10402 }, { "epoch": 0.931490290447143, "grad_norm": 1.0013715082926637, "learning_rate": 2.451280404059342e-07, "loss": 0.7943, "step": 10403 }, { "epoch": 0.9315798309922212, "grad_norm": 0.9509729367125315, "learning_rate": 2.444902241963487e-07, "loss": 0.8126, "step": 10404 }, { "epoch": 0.9316693715372992, "grad_norm": 0.9345979818784474, "learning_rate": 2.438532285884543e-07, "loss": 0.809, "step": 10405 }, { "epoch": 0.9317589120823773, "grad_norm": 0.9593798201420923, "learning_rate": 2.432170536358347e-07, "loss": 0.8004, "step": 10406 }, { "epoch": 0.9318484526274554, "grad_norm": 0.99697336642487, "learning_rate": 2.4258169939200273e-07, "loss": 0.8014, "step": 10407 }, { "epoch": 0.9319379931725335, "grad_norm": 0.9785393144245268, "learning_rate": 2.419471659104e-07, "loss": 0.8055, "step": 10408 }, { "epoch": 0.9320275337176115, "grad_norm": 1.028171431211335, "learning_rate": 2.41313453244405e-07, "loss": 0.786, "step": 10409 }, { "epoch": 0.9321170742626895, "grad_norm": 1.010397680456655, "learning_rate": 2.4068056144732067e-07, "loss": 0.8505, "step": 10410 }, { "epoch": 0.9322066148077677, "grad_norm": 0.9955135110873424, "learning_rate": 2.400484905723843e-07, "loss": 0.7747, "step": 10411 }, { "epoch": 0.9322961553528457, "grad_norm": 1.0221097185289496, "learning_rate": 2.394172406727657e-07, "loss": 0.8103, "step": 10412 }, { "epoch": 0.9323856958979237, "grad_norm": 1.0171951546289935, "learning_rate": 2.3878681180156015e-07, "loss": 0.7928, "step": 10413 }, { "epoch": 0.9324752364430019, "grad_norm": 1.0998398088031849, "learning_rate": 2.3815720401179965e-07, "loss": 0.7884, "step": 10414 }, { "epoch": 0.9325647769880799, "grad_norm": 0.8733095362659594, "learning_rate": 2.3752841735644405e-07, "loss": 0.7318, "step": 10415 }, { "epoch": 0.932654317533158, "grad_norm": 1.0088865572526513, "learning_rate": 2.369004518883855e-07, "loss": 0.8084, "step": 10416 }, { "epoch": 0.932743858078236, "grad_norm": 0.9864535188476026, "learning_rate": 2.362733076604451e-07, "loss": 0.7797, "step": 10417 }, { "epoch": 0.9328333986233142, "grad_norm": 0.9482907388659072, "learning_rate": 2.3564698472537838e-07, "loss": 0.8122, "step": 10418 }, { "epoch": 0.9329229391683922, "grad_norm": 1.0061781130828806, "learning_rate": 2.350214831358666e-07, "loss": 0.7581, "step": 10419 }, { "epoch": 0.9330124797134702, "grad_norm": 1.2253064826067683, "learning_rate": 2.343968029445276e-07, "loss": 0.7633, "step": 10420 }, { "epoch": 0.9331020202585483, "grad_norm": 0.9833587246362245, "learning_rate": 2.3377294420390494e-07, "loss": 0.7206, "step": 10421 }, { "epoch": 0.9331915608036264, "grad_norm": 0.9290378360302113, "learning_rate": 2.3314990696647888e-07, "loss": 0.7992, "step": 10422 }, { "epoch": 0.9332811013487045, "grad_norm": 0.8893130120937318, "learning_rate": 2.3252769128465524e-07, "loss": 0.7661, "step": 10423 }, { "epoch": 0.9333706418937825, "grad_norm": 1.0420498809111451, "learning_rate": 2.319062972107722e-07, "loss": 0.8284, "step": 10424 }, { "epoch": 0.9334601824388606, "grad_norm": 1.1484917327750357, "learning_rate": 2.3128572479709898e-07, "loss": 0.796, "step": 10425 }, { "epoch": 0.9335497229839387, "grad_norm": 1.0013306662768982, "learning_rate": 2.3066597409583836e-07, "loss": 0.8265, "step": 10426 }, { "epoch": 0.9336392635290167, "grad_norm": 0.9208649945924663, "learning_rate": 2.300470451591197e-07, "loss": 0.8429, "step": 10427 }, { "epoch": 0.9337288040740948, "grad_norm": 0.9834931805118609, "learning_rate": 2.29428938039008e-07, "loss": 0.7619, "step": 10428 }, { "epoch": 0.9338183446191729, "grad_norm": 1.0542958281207289, "learning_rate": 2.2881165278749395e-07, "loss": 0.8299, "step": 10429 }, { "epoch": 0.9339078851642509, "grad_norm": 0.9784948682955003, "learning_rate": 2.2819518945650265e-07, "loss": 0.7543, "step": 10430 }, { "epoch": 0.933997425709329, "grad_norm": 0.9222639783693087, "learning_rate": 2.2757954809788708e-07, "loss": 0.8038, "step": 10431 }, { "epoch": 0.9340869662544071, "grad_norm": 0.9693582647389698, "learning_rate": 2.2696472876343467e-07, "loss": 0.8179, "step": 10432 }, { "epoch": 0.9341765067994852, "grad_norm": 1.0020495750456926, "learning_rate": 2.2635073150486297e-07, "loss": 0.7546, "step": 10433 }, { "epoch": 0.9342660473445632, "grad_norm": 0.9935241366593474, "learning_rate": 2.2573755637381844e-07, "loss": 0.8453, "step": 10434 }, { "epoch": 0.9343555878896412, "grad_norm": 0.9632003375841415, "learning_rate": 2.251252034218787e-07, "loss": 0.8288, "step": 10435 }, { "epoch": 0.9344451284347194, "grad_norm": 1.0186367505331855, "learning_rate": 2.2451367270055368e-07, "loss": 0.7858, "step": 10436 }, { "epoch": 0.9345346689797974, "grad_norm": 1.0128222554324635, "learning_rate": 2.239029642612822e-07, "loss": 0.8095, "step": 10437 }, { "epoch": 0.9346242095248755, "grad_norm": 1.0641396031550858, "learning_rate": 2.2329307815543656e-07, "loss": 0.784, "step": 10438 }, { "epoch": 0.9347137500699535, "grad_norm": 1.0232975166969698, "learning_rate": 2.2268401443431787e-07, "loss": 0.8325, "step": 10439 }, { "epoch": 0.9348032906150316, "grad_norm": 0.9366500324068437, "learning_rate": 2.220757731491585e-07, "loss": 0.8368, "step": 10440 }, { "epoch": 0.9348928311601097, "grad_norm": 1.0107139582661775, "learning_rate": 2.2146835435112202e-07, "loss": 0.7973, "step": 10441 }, { "epoch": 0.9349823717051877, "grad_norm": 0.9500843459456265, "learning_rate": 2.2086175809130195e-07, "loss": 0.7387, "step": 10442 }, { "epoch": 0.9350719122502659, "grad_norm": 1.0255843173409078, "learning_rate": 2.2025598442072304e-07, "loss": 0.7849, "step": 10443 }, { "epoch": 0.9351614527953439, "grad_norm": 0.9332186133563786, "learning_rate": 2.196510333903412e-07, "loss": 0.8313, "step": 10444 }, { "epoch": 0.9352509933404219, "grad_norm": 1.0885072414475259, "learning_rate": 2.1904690505104466e-07, "loss": 0.7944, "step": 10445 }, { "epoch": 0.9353405338855, "grad_norm": 1.0428802716226617, "learning_rate": 2.1844359945364824e-07, "loss": 0.7758, "step": 10446 }, { "epoch": 0.9354300744305781, "grad_norm": 1.1410478269059265, "learning_rate": 2.178411166489014e-07, "loss": 0.8481, "step": 10447 }, { "epoch": 0.9355196149756562, "grad_norm": 0.9136794874718338, "learning_rate": 2.1723945668748248e-07, "loss": 0.7544, "step": 10448 }, { "epoch": 0.9356091555207342, "grad_norm": 0.9040025365359545, "learning_rate": 2.16638619620001e-07, "loss": 0.7958, "step": 10449 }, { "epoch": 0.9356986960658124, "grad_norm": 1.038759869576706, "learning_rate": 2.1603860549699763e-07, "loss": 0.7916, "step": 10450 }, { "epoch": 0.9357882366108904, "grad_norm": 1.036919434584511, "learning_rate": 2.1543941436894534e-07, "loss": 0.8351, "step": 10451 }, { "epoch": 0.9358777771559684, "grad_norm": 0.962887606867669, "learning_rate": 2.1484104628624268e-07, "loss": 0.7997, "step": 10452 }, { "epoch": 0.9359673177010465, "grad_norm": 0.9400624084996615, "learning_rate": 2.14243501299225e-07, "loss": 0.8222, "step": 10453 }, { "epoch": 0.9360568582461246, "grad_norm": 0.9504122725053432, "learning_rate": 2.1364677945815538e-07, "loss": 0.7356, "step": 10454 }, { "epoch": 0.9361463987912026, "grad_norm": 0.9413958258616596, "learning_rate": 2.13050880813227e-07, "loss": 0.7469, "step": 10455 }, { "epoch": 0.9362359393362807, "grad_norm": 1.0792866056927848, "learning_rate": 2.1245580541456645e-07, "loss": 0.8069, "step": 10456 }, { "epoch": 0.9363254798813587, "grad_norm": 1.02385093299912, "learning_rate": 2.1186155331222925e-07, "loss": 0.7427, "step": 10457 }, { "epoch": 0.9364150204264369, "grad_norm": 0.9970507994924301, "learning_rate": 2.1126812455620094e-07, "loss": 0.7801, "step": 10458 }, { "epoch": 0.9365045609715149, "grad_norm": 0.949199296158414, "learning_rate": 2.106755191963994e-07, "loss": 0.8132, "step": 10459 }, { "epoch": 0.936594101516593, "grad_norm": 0.9494750461259707, "learning_rate": 2.100837372826725e-07, "loss": 0.8062, "step": 10460 }, { "epoch": 0.9366836420616711, "grad_norm": 1.0188024357883734, "learning_rate": 2.0949277886479935e-07, "loss": 0.7645, "step": 10461 }, { "epoch": 0.9367731826067491, "grad_norm": 0.9718496989019507, "learning_rate": 2.0890264399248905e-07, "loss": 0.7946, "step": 10462 }, { "epoch": 0.9368627231518272, "grad_norm": 0.9347843431944246, "learning_rate": 2.083133327153819e-07, "loss": 0.7864, "step": 10463 }, { "epoch": 0.9369522636969052, "grad_norm": 1.1337913709690801, "learning_rate": 2.0772484508304937e-07, "loss": 0.7678, "step": 10464 }, { "epoch": 0.9370418042419834, "grad_norm": 0.9797317131732842, "learning_rate": 2.0713718114499293e-07, "loss": 0.8076, "step": 10465 }, { "epoch": 0.9371313447870614, "grad_norm": 0.917629630970039, "learning_rate": 2.065503409506453e-07, "loss": 0.8375, "step": 10466 }, { "epoch": 0.9372208853321394, "grad_norm": 0.9186325146478544, "learning_rate": 2.059643245493681e-07, "loss": 0.745, "step": 10467 }, { "epoch": 0.9373104258772176, "grad_norm": 1.0885336496063627, "learning_rate": 2.0537913199045633e-07, "loss": 0.8239, "step": 10468 }, { "epoch": 0.9373999664222956, "grad_norm": 0.9097677422967195, "learning_rate": 2.0479476332313398e-07, "loss": 0.8174, "step": 10469 }, { "epoch": 0.9374895069673737, "grad_norm": 0.9072592642426942, "learning_rate": 2.0421121859655723e-07, "loss": 0.8063, "step": 10470 }, { "epoch": 0.9375790475124517, "grad_norm": 1.0102842139427806, "learning_rate": 2.036284978598102e-07, "loss": 0.7682, "step": 10471 }, { "epoch": 0.9376685880575298, "grad_norm": 1.1211751436145219, "learning_rate": 2.0304660116191145e-07, "loss": 0.781, "step": 10472 }, { "epoch": 0.9377581286026079, "grad_norm": 1.0157958911437328, "learning_rate": 2.0246552855180625e-07, "loss": 0.8311, "step": 10473 }, { "epoch": 0.9378476691476859, "grad_norm": 0.9507851809664931, "learning_rate": 2.0188528007837328e-07, "loss": 0.834, "step": 10474 }, { "epoch": 0.937937209692764, "grad_norm": 1.0379071841054388, "learning_rate": 2.013058557904224e-07, "loss": 0.8404, "step": 10475 }, { "epoch": 0.9380267502378421, "grad_norm": 1.0170820987516789, "learning_rate": 2.0072725573669128e-07, "loss": 0.798, "step": 10476 }, { "epoch": 0.9381162907829201, "grad_norm": 1.075043281581752, "learning_rate": 2.00149479965851e-07, "loss": 0.8131, "step": 10477 }, { "epoch": 0.9382058313279982, "grad_norm": 1.0946551493812278, "learning_rate": 1.9957252852650267e-07, "loss": 0.7644, "step": 10478 }, { "epoch": 0.9382953718730763, "grad_norm": 1.0513174801381848, "learning_rate": 1.9899640146717413e-07, "loss": 0.7836, "step": 10479 }, { "epoch": 0.9383849124181544, "grad_norm": 0.9709426708148006, "learning_rate": 1.984210988363311e-07, "loss": 0.7197, "step": 10480 }, { "epoch": 0.9384744529632324, "grad_norm": 0.9800868760815739, "learning_rate": 1.9784662068236483e-07, "loss": 0.8066, "step": 10481 }, { "epoch": 0.9385639935083104, "grad_norm": 0.9780341942172683, "learning_rate": 1.9727296705359777e-07, "loss": 0.7665, "step": 10482 }, { "epoch": 0.9386535340533886, "grad_norm": 0.9085362740329684, "learning_rate": 1.967001379982858e-07, "loss": 0.8012, "step": 10483 }, { "epoch": 0.9387430745984666, "grad_norm": 0.9368651048329844, "learning_rate": 1.9612813356461146e-07, "loss": 0.7757, "step": 10484 }, { "epoch": 0.9388326151435447, "grad_norm": 1.1444718004476282, "learning_rate": 1.9555695380068963e-07, "loss": 0.7801, "step": 10485 }, { "epoch": 0.9389221556886228, "grad_norm": 0.9178771368671131, "learning_rate": 1.949865987545685e-07, "loss": 0.7575, "step": 10486 }, { "epoch": 0.9390116962337008, "grad_norm": 1.2017330091134577, "learning_rate": 1.9441706847422193e-07, "loss": 0.812, "step": 10487 }, { "epoch": 0.9391012367787789, "grad_norm": 0.9815427041940191, "learning_rate": 1.9384836300755937e-07, "loss": 0.7722, "step": 10488 }, { "epoch": 0.9391907773238569, "grad_norm": 0.9570668878069648, "learning_rate": 1.9328048240241704e-07, "loss": 0.7861, "step": 10489 }, { "epoch": 0.9392803178689351, "grad_norm": 1.0396498270520502, "learning_rate": 1.9271342670656336e-07, "loss": 0.7125, "step": 10490 }, { "epoch": 0.9393698584140131, "grad_norm": 0.9985456335543632, "learning_rate": 1.921471959676957e-07, "loss": 0.8076, "step": 10491 }, { "epoch": 0.9394593989590911, "grad_norm": 0.9907317496304813, "learning_rate": 1.9158179023344602e-07, "loss": 0.8234, "step": 10492 }, { "epoch": 0.9395489395041692, "grad_norm": 0.9654378895058732, "learning_rate": 1.9101720955137293e-07, "loss": 0.8299, "step": 10493 }, { "epoch": 0.9396384800492473, "grad_norm": 1.0074574281156012, "learning_rate": 1.9045345396896842e-07, "loss": 0.7905, "step": 10494 }, { "epoch": 0.9397280205943254, "grad_norm": 1.0285614162846088, "learning_rate": 1.8989052353365345e-07, "loss": 0.8124, "step": 10495 }, { "epoch": 0.9398175611394034, "grad_norm": 1.0362353772999737, "learning_rate": 1.8932841829277794e-07, "loss": 0.7841, "step": 10496 }, { "epoch": 0.9399071016844815, "grad_norm": 1.083685695554076, "learning_rate": 1.8876713829362626e-07, "loss": 0.8015, "step": 10497 }, { "epoch": 0.9399966422295596, "grad_norm": 1.0059396100346234, "learning_rate": 1.8820668358341065e-07, "loss": 0.7471, "step": 10498 }, { "epoch": 0.9400861827746376, "grad_norm": 0.985909761466985, "learning_rate": 1.8764705420927566e-07, "loss": 0.7722, "step": 10499 }, { "epoch": 0.9401757233197157, "grad_norm": 1.0856857488873042, "learning_rate": 1.8708825021829468e-07, "loss": 0.711, "step": 10500 }, { "epoch": 0.9402652638647938, "grad_norm": 1.054403568688193, "learning_rate": 1.865302716574735e-07, "loss": 0.7739, "step": 10501 }, { "epoch": 0.9403548044098718, "grad_norm": 1.0526527826025391, "learning_rate": 1.8597311857374568e-07, "loss": 0.8174, "step": 10502 }, { "epoch": 0.9404443449549499, "grad_norm": 1.092840794807833, "learning_rate": 1.8541679101397814e-07, "loss": 0.8139, "step": 10503 }, { "epoch": 0.940533885500028, "grad_norm": 0.9613845143083771, "learning_rate": 1.8486128902496682e-07, "loss": 0.8113, "step": 10504 }, { "epoch": 0.9406234260451061, "grad_norm": 0.9789444516415102, "learning_rate": 1.8430661265344095e-07, "loss": 0.8193, "step": 10505 }, { "epoch": 0.9407129665901841, "grad_norm": 1.0209292311231506, "learning_rate": 1.8375276194605552e-07, "loss": 0.7375, "step": 10506 }, { "epoch": 0.9408025071352621, "grad_norm": 1.0154420577946865, "learning_rate": 1.8319973694939986e-07, "loss": 0.8375, "step": 10507 }, { "epoch": 0.9408920476803403, "grad_norm": 0.9827808467265221, "learning_rate": 1.8264753770999233e-07, "loss": 0.8073, "step": 10508 }, { "epoch": 0.9409815882254183, "grad_norm": 1.0592262294798354, "learning_rate": 1.8209616427428134e-07, "loss": 0.8171, "step": 10509 }, { "epoch": 0.9410711287704964, "grad_norm": 1.0551366290391107, "learning_rate": 1.8154561668864645e-07, "loss": 0.8252, "step": 10510 }, { "epoch": 0.9411606693155744, "grad_norm": 0.9872608581185702, "learning_rate": 1.8099589499940062e-07, "loss": 0.815, "step": 10511 }, { "epoch": 0.9412502098606526, "grad_norm": 0.9978466270788319, "learning_rate": 1.8044699925278242e-07, "loss": 0.8025, "step": 10512 }, { "epoch": 0.9413397504057306, "grad_norm": 0.9176123158204633, "learning_rate": 1.798989294949638e-07, "loss": 0.762, "step": 10513 }, { "epoch": 0.9414292909508086, "grad_norm": 0.9655092882387789, "learning_rate": 1.7935168577204676e-07, "loss": 0.7515, "step": 10514 }, { "epoch": 0.9415188314958868, "grad_norm": 0.9617660578138079, "learning_rate": 1.7880526813006226e-07, "loss": 0.7529, "step": 10515 }, { "epoch": 0.9416083720409648, "grad_norm": 0.917844488395302, "learning_rate": 1.7825967661497466e-07, "loss": 0.755, "step": 10516 }, { "epoch": 0.9416979125860429, "grad_norm": 0.9137419306686104, "learning_rate": 1.7771491127267726e-07, "loss": 0.8, "step": 10517 }, { "epoch": 0.9417874531311209, "grad_norm": 0.891426292033392, "learning_rate": 1.7717097214899338e-07, "loss": 0.7431, "step": 10518 }, { "epoch": 0.941876993676199, "grad_norm": 1.0092366914009525, "learning_rate": 1.7662785928967752e-07, "loss": 0.7765, "step": 10519 }, { "epoch": 0.9419665342212771, "grad_norm": 0.9800397999646994, "learning_rate": 1.7608557274041428e-07, "loss": 0.7955, "step": 10520 }, { "epoch": 0.9420560747663551, "grad_norm": 0.9256999760073314, "learning_rate": 1.7554411254682047e-07, "loss": 0.8051, "step": 10521 }, { "epoch": 0.9421456153114333, "grad_norm": 0.9182907826140562, "learning_rate": 1.7500347875443968e-07, "loss": 0.8692, "step": 10522 }, { "epoch": 0.9422351558565113, "grad_norm": 1.1121737860956646, "learning_rate": 1.7446367140874998e-07, "loss": 0.778, "step": 10523 }, { "epoch": 0.9423246964015893, "grad_norm": 0.9280995317634912, "learning_rate": 1.7392469055515837e-07, "loss": 0.7778, "step": 10524 }, { "epoch": 0.9424142369466674, "grad_norm": 0.9869076547135597, "learning_rate": 1.733865362390008e-07, "loss": 0.7608, "step": 10525 }, { "epoch": 0.9425037774917455, "grad_norm": 1.0329562250546835, "learning_rate": 1.7284920850554664e-07, "loss": 0.8239, "step": 10526 }, { "epoch": 0.9425933180368236, "grad_norm": 1.0569717671041021, "learning_rate": 1.7231270739999195e-07, "loss": 0.8099, "step": 10527 }, { "epoch": 0.9426828585819016, "grad_norm": 0.9692978270052119, "learning_rate": 1.7177703296746838e-07, "loss": 0.8226, "step": 10528 }, { "epoch": 0.9427723991269796, "grad_norm": 0.9883768428363052, "learning_rate": 1.7124218525303217e-07, "loss": 0.8111, "step": 10529 }, { "epoch": 0.9428619396720578, "grad_norm": 1.003751295938683, "learning_rate": 1.7070816430167503e-07, "loss": 0.8548, "step": 10530 }, { "epoch": 0.9429514802171358, "grad_norm": 1.0120636166209647, "learning_rate": 1.701749701583155e-07, "loss": 0.7997, "step": 10531 }, { "epoch": 0.9430410207622139, "grad_norm": 1.1157242678924864, "learning_rate": 1.6964260286780666e-07, "loss": 0.7993, "step": 10532 }, { "epoch": 0.943130561307292, "grad_norm": 1.0290609021136858, "learning_rate": 1.6911106247492592e-07, "loss": 0.7867, "step": 10533 }, { "epoch": 0.94322010185237, "grad_norm": 0.9879479982881689, "learning_rate": 1.6858034902438757e-07, "loss": 0.7872, "step": 10534 }, { "epoch": 0.9433096423974481, "grad_norm": 1.1033960939862946, "learning_rate": 1.6805046256083257e-07, "loss": 0.769, "step": 10535 }, { "epoch": 0.9433991829425261, "grad_norm": 1.0116193686039905, "learning_rate": 1.6752140312883304e-07, "loss": 0.8357, "step": 10536 }, { "epoch": 0.9434887234876043, "grad_norm": 1.0233721592929788, "learning_rate": 1.6699317077289223e-07, "loss": 0.8229, "step": 10537 }, { "epoch": 0.9435782640326823, "grad_norm": 0.9630626997699553, "learning_rate": 1.6646576553744465e-07, "loss": 0.7961, "step": 10538 }, { "epoch": 0.9436678045777603, "grad_norm": 0.9743169789566054, "learning_rate": 1.6593918746684923e-07, "loss": 0.7356, "step": 10539 }, { "epoch": 0.9437573451228385, "grad_norm": 0.9595195360946134, "learning_rate": 1.65413436605405e-07, "loss": 0.7884, "step": 10540 }, { "epoch": 0.9438468856679165, "grad_norm": 0.9378856016892926, "learning_rate": 1.6488851299733322e-07, "loss": 0.756, "step": 10541 }, { "epoch": 0.9439364262129946, "grad_norm": 1.0150204949502133, "learning_rate": 1.643644166867908e-07, "loss": 0.7944, "step": 10542 }, { "epoch": 0.9440259667580726, "grad_norm": 1.0190050036785487, "learning_rate": 1.6384114771786254e-07, "loss": 0.807, "step": 10543 }, { "epoch": 0.9441155073031507, "grad_norm": 0.9289392608148778, "learning_rate": 1.6331870613456423e-07, "loss": 0.7648, "step": 10544 }, { "epoch": 0.9442050478482288, "grad_norm": 0.9457360813246329, "learning_rate": 1.6279709198084082e-07, "loss": 0.7796, "step": 10545 }, { "epoch": 0.9442945883933068, "grad_norm": 0.9560795275673031, "learning_rate": 1.6227630530056825e-07, "loss": 0.7979, "step": 10546 }, { "epoch": 0.9443841289383849, "grad_norm": 0.9466778412735403, "learning_rate": 1.6175634613755597e-07, "loss": 0.7764, "step": 10547 }, { "epoch": 0.944473669483463, "grad_norm": 1.0349731811488116, "learning_rate": 1.6123721453553897e-07, "loss": 0.7694, "step": 10548 }, { "epoch": 0.944563210028541, "grad_norm": 0.9824597761244583, "learning_rate": 1.607189105381879e-07, "loss": 0.823, "step": 10549 }, { "epoch": 0.9446527505736191, "grad_norm": 1.1755373311349047, "learning_rate": 1.6020143418909783e-07, "loss": 0.8099, "step": 10550 }, { "epoch": 0.9447422911186972, "grad_norm": 1.0214590778626063, "learning_rate": 1.5968478553179733e-07, "loss": 0.7795, "step": 10551 }, { "epoch": 0.9448318316637753, "grad_norm": 0.923884689396745, "learning_rate": 1.5916896460974608e-07, "loss": 0.7848, "step": 10552 }, { "epoch": 0.9449213722088533, "grad_norm": 1.0381815190055639, "learning_rate": 1.5865397146633265e-07, "loss": 0.7458, "step": 10553 }, { "epoch": 0.9450109127539313, "grad_norm": 0.9663942748447644, "learning_rate": 1.581398061448791e-07, "loss": 0.7625, "step": 10554 }, { "epoch": 0.9451004532990095, "grad_norm": 0.8617052443269132, "learning_rate": 1.5762646868863195e-07, "loss": 0.7375, "step": 10555 }, { "epoch": 0.9451899938440875, "grad_norm": 0.9519206086421542, "learning_rate": 1.5711395914077333e-07, "loss": 0.8045, "step": 10556 }, { "epoch": 0.9452795343891656, "grad_norm": 1.0685310697504005, "learning_rate": 1.5660227754441316e-07, "loss": 0.786, "step": 10557 }, { "epoch": 0.9453690749342437, "grad_norm": 1.1451355224528967, "learning_rate": 1.5609142394259257e-07, "loss": 0.8234, "step": 10558 }, { "epoch": 0.9454586154793218, "grad_norm": 1.002885238597944, "learning_rate": 1.5558139837828278e-07, "loss": 0.8074, "step": 10559 }, { "epoch": 0.9455481560243998, "grad_norm": 1.2939504662471963, "learning_rate": 1.5507220089438724e-07, "loss": 0.804, "step": 10560 }, { "epoch": 0.9456376965694778, "grad_norm": 0.9893762602582188, "learning_rate": 1.54563831533735e-07, "loss": 0.763, "step": 10561 }, { "epoch": 0.945727237114556, "grad_norm": 0.9627461590038338, "learning_rate": 1.5405629033909075e-07, "loss": 0.7811, "step": 10562 }, { "epoch": 0.945816777659634, "grad_norm": 1.010902898651545, "learning_rate": 1.535495773531459e-07, "loss": 0.8186, "step": 10563 }, { "epoch": 0.945906318204712, "grad_norm": 0.9812904854451682, "learning_rate": 1.5304369261852304e-07, "loss": 0.7711, "step": 10564 }, { "epoch": 0.9459958587497901, "grad_norm": 0.9615552488581134, "learning_rate": 1.525386361777781e-07, "loss": 0.7715, "step": 10565 }, { "epoch": 0.9460853992948682, "grad_norm": 0.9437096278595699, "learning_rate": 1.5203440807339265e-07, "loss": 0.8143, "step": 10566 }, { "epoch": 0.9461749398399463, "grad_norm": 0.9748468924276358, "learning_rate": 1.5153100834778057e-07, "loss": 0.8562, "step": 10567 }, { "epoch": 0.9462644803850243, "grad_norm": 0.9567760334628974, "learning_rate": 1.5102843704328684e-07, "loss": 0.7504, "step": 10568 }, { "epoch": 0.9463540209301025, "grad_norm": 0.9205170139724177, "learning_rate": 1.5052669420218656e-07, "loss": 0.7727, "step": 10569 }, { "epoch": 0.9464435614751805, "grad_norm": 1.0058719554159496, "learning_rate": 1.5002577986668376e-07, "loss": 0.7357, "step": 10570 }, { "epoch": 0.9465331020202585, "grad_norm": 1.0760287779525304, "learning_rate": 1.495256940789147e-07, "loss": 0.7586, "step": 10571 }, { "epoch": 0.9466226425653366, "grad_norm": 0.9049619192721745, "learning_rate": 1.4902643688094465e-07, "loss": 0.782, "step": 10572 }, { "epoch": 0.9467121831104147, "grad_norm": 1.0097372518213306, "learning_rate": 1.4852800831476887e-07, "loss": 0.7529, "step": 10573 }, { "epoch": 0.9468017236554928, "grad_norm": 0.9873071979192511, "learning_rate": 1.4803040842231385e-07, "loss": 0.8063, "step": 10574 }, { "epoch": 0.9468912642005708, "grad_norm": 1.1024028769655165, "learning_rate": 1.4753363724543723e-07, "loss": 0.8335, "step": 10575 }, { "epoch": 0.9469808047456489, "grad_norm": 0.9299046625198809, "learning_rate": 1.4703769482592335e-07, "loss": 0.7689, "step": 10576 }, { "epoch": 0.947070345290727, "grad_norm": 1.0078546292811292, "learning_rate": 1.4654258120549102e-07, "loss": 0.7946, "step": 10577 }, { "epoch": 0.947159885835805, "grad_norm": 1.018350369987424, "learning_rate": 1.460482964257881e-07, "loss": 0.7383, "step": 10578 }, { "epoch": 0.947249426380883, "grad_norm": 0.9372374689687759, "learning_rate": 1.455548405283913e-07, "loss": 0.8234, "step": 10579 }, { "epoch": 0.9473389669259612, "grad_norm": 0.9916344862173925, "learning_rate": 1.4506221355480744e-07, "loss": 0.7273, "step": 10580 }, { "epoch": 0.9474285074710392, "grad_norm": 1.0274132283379285, "learning_rate": 1.4457041554647667e-07, "loss": 0.7589, "step": 10581 }, { "epoch": 0.9475180480161173, "grad_norm": 0.9393736670457288, "learning_rate": 1.4407944654476702e-07, "loss": 0.7951, "step": 10582 }, { "epoch": 0.9476075885611953, "grad_norm": 1.0835777818320282, "learning_rate": 1.4358930659097658e-07, "loss": 0.7515, "step": 10583 }, { "epoch": 0.9476971291062735, "grad_norm": 0.8851659204513486, "learning_rate": 1.430999957263335e-07, "loss": 0.7468, "step": 10584 }, { "epoch": 0.9477866696513515, "grad_norm": 0.9695539500596535, "learning_rate": 1.4261151399199924e-07, "loss": 0.8184, "step": 10585 }, { "epoch": 0.9478762101964295, "grad_norm": 1.0622914114536484, "learning_rate": 1.4212386142906209e-07, "loss": 0.8493, "step": 10586 }, { "epoch": 0.9479657507415077, "grad_norm": 1.0075261186914495, "learning_rate": 1.4163703807854147e-07, "loss": 0.7961, "step": 10587 }, { "epoch": 0.9480552912865857, "grad_norm": 0.9989404182267662, "learning_rate": 1.411510439813868e-07, "loss": 0.8317, "step": 10588 }, { "epoch": 0.9481448318316638, "grad_norm": 0.9566753800221245, "learning_rate": 1.4066587917848097e-07, "loss": 0.8235, "step": 10589 }, { "epoch": 0.9482343723767418, "grad_norm": 1.4457440422862284, "learning_rate": 1.401815437106313e-07, "loss": 0.8025, "step": 10590 }, { "epoch": 0.94832391292182, "grad_norm": 0.9798218385394535, "learning_rate": 1.3969803761858082e-07, "loss": 0.7196, "step": 10591 }, { "epoch": 0.948413453466898, "grad_norm": 0.9430661497168401, "learning_rate": 1.3921536094299914e-07, "loss": 0.7587, "step": 10592 }, { "epoch": 0.948502994011976, "grad_norm": 1.4948437096893703, "learning_rate": 1.3873351372448828e-07, "loss": 0.7924, "step": 10593 }, { "epoch": 0.9485925345570542, "grad_norm": 0.9803300803573628, "learning_rate": 1.3825249600357915e-07, "loss": 0.76, "step": 10594 }, { "epoch": 0.9486820751021322, "grad_norm": 1.003604409507791, "learning_rate": 1.3777230782073382e-07, "loss": 0.822, "step": 10595 }, { "epoch": 0.9487716156472102, "grad_norm": 0.9901994852247739, "learning_rate": 1.372929492163433e-07, "loss": 0.7626, "step": 10596 }, { "epoch": 0.9488611561922883, "grad_norm": 1.0032595140646359, "learning_rate": 1.3681442023073089e-07, "loss": 0.818, "step": 10597 }, { "epoch": 0.9489506967373664, "grad_norm": 1.071070828266927, "learning_rate": 1.3633672090414775e-07, "loss": 0.7883, "step": 10598 }, { "epoch": 0.9490402372824445, "grad_norm": 0.9307142156168312, "learning_rate": 1.3585985127677724e-07, "loss": 0.8253, "step": 10599 }, { "epoch": 0.9491297778275225, "grad_norm": 0.951937796914392, "learning_rate": 1.353838113887307e-07, "loss": 0.8376, "step": 10600 }, { "epoch": 0.9492193183726005, "grad_norm": 0.93505506198809, "learning_rate": 1.3490860128005267e-07, "loss": 0.8021, "step": 10601 }, { "epoch": 0.9493088589176787, "grad_norm": 0.9815630071241641, "learning_rate": 1.344342209907168e-07, "loss": 0.7855, "step": 10602 }, { "epoch": 0.9493983994627567, "grad_norm": 0.9499900554125869, "learning_rate": 1.3396067056062444e-07, "loss": 0.7707, "step": 10603 }, { "epoch": 0.9494879400078348, "grad_norm": 1.0721293651676682, "learning_rate": 1.3348795002961046e-07, "loss": 0.8306, "step": 10604 }, { "epoch": 0.9495774805529129, "grad_norm": 0.8941313027061598, "learning_rate": 1.3301605943743744e-07, "loss": 0.8176, "step": 10605 }, { "epoch": 0.949667021097991, "grad_norm": 1.0424319293277788, "learning_rate": 1.3254499882379922e-07, "loss": 0.7843, "step": 10606 }, { "epoch": 0.949756561643069, "grad_norm": 0.9298724380798908, "learning_rate": 1.320747682283219e-07, "loss": 0.743, "step": 10607 }, { "epoch": 0.949846102188147, "grad_norm": 0.872563212442681, "learning_rate": 1.3160536769055708e-07, "loss": 0.778, "step": 10608 }, { "epoch": 0.9499356427332252, "grad_norm": 0.9999678593960863, "learning_rate": 1.311367972499922e-07, "loss": 0.8207, "step": 10609 }, { "epoch": 0.9500251832783032, "grad_norm": 0.9639668169959477, "learning_rate": 1.3066905694604004e-07, "loss": 0.8024, "step": 10610 }, { "epoch": 0.9501147238233812, "grad_norm": 0.9703339776921578, "learning_rate": 1.3020214681804477e-07, "loss": 0.8155, "step": 10611 }, { "epoch": 0.9502042643684594, "grad_norm": 1.1661844948212763, "learning_rate": 1.2973606690528162e-07, "loss": 0.8309, "step": 10612 }, { "epoch": 0.9502938049135374, "grad_norm": 1.0429892588027683, "learning_rate": 1.29270817246957e-07, "loss": 0.7525, "step": 10613 }, { "epoch": 0.9503833454586155, "grad_norm": 1.1153590224096566, "learning_rate": 1.288063978822063e-07, "loss": 0.8284, "step": 10614 }, { "epoch": 0.9504728860036935, "grad_norm": 1.0367178884872523, "learning_rate": 1.2834280885009375e-07, "loss": 0.8415, "step": 10615 }, { "epoch": 0.9505624265487717, "grad_norm": 1.2801265988405155, "learning_rate": 1.2788005018961492e-07, "loss": 0.7822, "step": 10616 }, { "epoch": 0.9506519670938497, "grad_norm": 1.2338212012812513, "learning_rate": 1.2741812193969528e-07, "loss": 0.844, "step": 10617 }, { "epoch": 0.9507415076389277, "grad_norm": 0.9407184995249785, "learning_rate": 1.2695702413919152e-07, "loss": 0.7949, "step": 10618 }, { "epoch": 0.9508310481840058, "grad_norm": 0.962092215626412, "learning_rate": 1.2649675682689044e-07, "loss": 0.7774, "step": 10619 }, { "epoch": 0.9509205887290839, "grad_norm": 0.9780304283020984, "learning_rate": 1.260373200415077e-07, "loss": 0.7737, "step": 10620 }, { "epoch": 0.951010129274162, "grad_norm": 0.9790475839881357, "learning_rate": 1.2557871382168795e-07, "loss": 0.8311, "step": 10621 }, { "epoch": 0.95109966981924, "grad_norm": 0.9963308247141596, "learning_rate": 1.2512093820600922e-07, "loss": 0.7662, "step": 10622 }, { "epoch": 0.9511892103643181, "grad_norm": 1.090963492528114, "learning_rate": 1.2466399323297851e-07, "loss": 0.7544, "step": 10623 }, { "epoch": 0.9512787509093962, "grad_norm": 1.0057320245272516, "learning_rate": 1.2420787894103058e-07, "loss": 0.7859, "step": 10624 }, { "epoch": 0.9513682914544742, "grad_norm": 1.065061393013988, "learning_rate": 1.237525953685359e-07, "loss": 0.7875, "step": 10625 }, { "epoch": 0.9514578319995523, "grad_norm": 0.8964377964519797, "learning_rate": 1.2329814255378713e-07, "loss": 0.7374, "step": 10626 }, { "epoch": 0.9515473725446304, "grad_norm": 1.0713444173270354, "learning_rate": 1.2284452053501483e-07, "loss": 0.8013, "step": 10627 }, { "epoch": 0.9516369130897084, "grad_norm": 0.9770274185596978, "learning_rate": 1.22391729350374e-07, "loss": 0.8114, "step": 10628 }, { "epoch": 0.9517264536347865, "grad_norm": 0.9606472233247016, "learning_rate": 1.2193976903795203e-07, "loss": 0.8134, "step": 10629 }, { "epoch": 0.9518159941798646, "grad_norm": 0.9490257693443686, "learning_rate": 1.214886396357684e-07, "loss": 0.7389, "step": 10630 }, { "epoch": 0.9519055347249427, "grad_norm": 1.0000073650405197, "learning_rate": 1.210383411817684e-07, "loss": 0.8299, "step": 10631 }, { "epoch": 0.9519950752700207, "grad_norm": 0.9518960583625684, "learning_rate": 1.2058887371383054e-07, "loss": 0.7163, "step": 10632 }, { "epoch": 0.9520846158150987, "grad_norm": 1.037474257880494, "learning_rate": 1.2014023726976242e-07, "loss": 0.7652, "step": 10633 }, { "epoch": 0.9521741563601769, "grad_norm": 1.055329942281204, "learning_rate": 1.1969243188730273e-07, "loss": 0.8264, "step": 10634 }, { "epoch": 0.9522636969052549, "grad_norm": 1.0482411176673405, "learning_rate": 1.1924545760411798e-07, "loss": 0.76, "step": 10635 }, { "epoch": 0.952353237450333, "grad_norm": 0.9400739283147899, "learning_rate": 1.1879931445780702e-07, "loss": 0.7875, "step": 10636 }, { "epoch": 0.952442777995411, "grad_norm": 1.0088195085030172, "learning_rate": 1.1835400248589756e-07, "loss": 0.8164, "step": 10637 }, { "epoch": 0.9525323185404891, "grad_norm": 1.0670088165795595, "learning_rate": 1.1790952172584858e-07, "loss": 0.843, "step": 10638 }, { "epoch": 0.9526218590855672, "grad_norm": 1.0309419294151398, "learning_rate": 1.174658722150479e-07, "loss": 0.8123, "step": 10639 }, { "epoch": 0.9527113996306452, "grad_norm": 0.9910358758513832, "learning_rate": 1.1702305399081349e-07, "loss": 0.7708, "step": 10640 }, { "epoch": 0.9528009401757234, "grad_norm": 0.9297155839896183, "learning_rate": 1.1658106709039441e-07, "loss": 0.7648, "step": 10641 }, { "epoch": 0.9528904807208014, "grad_norm": 0.957882872384158, "learning_rate": 1.161399115509676e-07, "loss": 0.7417, "step": 10642 }, { "epoch": 0.9529800212658794, "grad_norm": 0.8964124035575063, "learning_rate": 1.1569958740964449e-07, "loss": 0.7895, "step": 10643 }, { "epoch": 0.9530695618109575, "grad_norm": 0.9155916389922302, "learning_rate": 1.15260094703461e-07, "loss": 0.7755, "step": 10644 }, { "epoch": 0.9531591023560356, "grad_norm": 1.218252575293368, "learning_rate": 1.1482143346938757e-07, "loss": 0.8459, "step": 10645 }, { "epoch": 0.9532486429011137, "grad_norm": 0.9898203666733881, "learning_rate": 1.1438360374432134e-07, "loss": 0.8456, "step": 10646 }, { "epoch": 0.9533381834461917, "grad_norm": 1.103660416204914, "learning_rate": 1.1394660556509285e-07, "loss": 0.8236, "step": 10647 }, { "epoch": 0.9534277239912698, "grad_norm": 1.2989330019185708, "learning_rate": 1.1351043896846048e-07, "loss": 0.7945, "step": 10648 }, { "epoch": 0.9535172645363479, "grad_norm": 1.0603891802204617, "learning_rate": 1.1307510399111266e-07, "loss": 0.7717, "step": 10649 }, { "epoch": 0.9536068050814259, "grad_norm": 0.9182954179885463, "learning_rate": 1.1264060066966786e-07, "loss": 0.8176, "step": 10650 }, { "epoch": 0.953696345626504, "grad_norm": 1.0141724099238418, "learning_rate": 1.1220692904067687e-07, "loss": 0.8355, "step": 10651 }, { "epoch": 0.9537858861715821, "grad_norm": 1.015956073421443, "learning_rate": 1.1177408914061604e-07, "loss": 0.8083, "step": 10652 }, { "epoch": 0.9538754267166601, "grad_norm": 1.0649456999798674, "learning_rate": 1.113420810058985e-07, "loss": 0.794, "step": 10653 }, { "epoch": 0.9539649672617382, "grad_norm": 1.0051863678755355, "learning_rate": 1.1091090467285848e-07, "loss": 0.8097, "step": 10654 }, { "epoch": 0.9540545078068162, "grad_norm": 0.9796699311175886, "learning_rate": 1.1048056017776809e-07, "loss": 0.7538, "step": 10655 }, { "epoch": 0.9541440483518944, "grad_norm": 0.9254765725641129, "learning_rate": 1.1005104755682617e-07, "loss": 0.7804, "step": 10656 }, { "epoch": 0.9542335888969724, "grad_norm": 0.9882063342980724, "learning_rate": 1.0962236684616157e-07, "loss": 0.8011, "step": 10657 }, { "epoch": 0.9543231294420504, "grad_norm": 1.3708350502973423, "learning_rate": 1.0919451808183435e-07, "loss": 0.8598, "step": 10658 }, { "epoch": 0.9544126699871286, "grad_norm": 1.3766498483472975, "learning_rate": 1.0876750129983349e-07, "loss": 0.8307, "step": 10659 }, { "epoch": 0.9545022105322066, "grad_norm": 0.9390187108296227, "learning_rate": 1.0834131653607582e-07, "loss": 0.7812, "step": 10660 }, { "epoch": 0.9545917510772847, "grad_norm": 1.0079062932445724, "learning_rate": 1.0791596382641378e-07, "loss": 0.834, "step": 10661 }, { "epoch": 0.9546812916223627, "grad_norm": 0.96229420986048, "learning_rate": 1.0749144320662541e-07, "loss": 0.7531, "step": 10662 }, { "epoch": 0.9547708321674409, "grad_norm": 0.9035627002833151, "learning_rate": 1.0706775471242104e-07, "loss": 0.7252, "step": 10663 }, { "epoch": 0.9548603727125189, "grad_norm": 0.9622529907786203, "learning_rate": 1.066448983794377e-07, "loss": 0.8324, "step": 10664 }, { "epoch": 0.9549499132575969, "grad_norm": 0.9872810285582793, "learning_rate": 1.0622287424324696e-07, "loss": 0.8197, "step": 10665 }, { "epoch": 0.9550394538026751, "grad_norm": 1.0803384847328674, "learning_rate": 1.0580168233934596e-07, "loss": 0.7708, "step": 10666 }, { "epoch": 0.9551289943477531, "grad_norm": 0.9420107021016394, "learning_rate": 1.0538132270316526e-07, "loss": 0.7512, "step": 10667 }, { "epoch": 0.9552185348928312, "grad_norm": 0.9613135363148781, "learning_rate": 1.0496179537006435e-07, "loss": 0.7751, "step": 10668 }, { "epoch": 0.9553080754379092, "grad_norm": 1.0299769318328478, "learning_rate": 1.0454310037533388e-07, "loss": 0.7931, "step": 10669 }, { "epoch": 0.9553976159829873, "grad_norm": 0.9607330423012813, "learning_rate": 1.0412523775419014e-07, "loss": 0.8361, "step": 10670 }, { "epoch": 0.9554871565280654, "grad_norm": 0.9794103489741643, "learning_rate": 1.0370820754178279e-07, "loss": 0.8507, "step": 10671 }, { "epoch": 0.9555766970731434, "grad_norm": 0.914166971626092, "learning_rate": 1.0329200977319265e-07, "loss": 0.6974, "step": 10672 }, { "epoch": 0.9556662376182214, "grad_norm": 0.9744205034679365, "learning_rate": 1.0287664448342838e-07, "loss": 0.791, "step": 10673 }, { "epoch": 0.9557557781632996, "grad_norm": 0.9290085611373504, "learning_rate": 1.024621117074287e-07, "loss": 0.7745, "step": 10674 }, { "epoch": 0.9558453187083776, "grad_norm": 0.9410225610472153, "learning_rate": 1.020484114800635e-07, "loss": 0.762, "step": 10675 }, { "epoch": 0.9559348592534557, "grad_norm": 1.1130783841409277, "learning_rate": 1.0163554383613161e-07, "loss": 0.8089, "step": 10676 }, { "epoch": 0.9560243997985338, "grad_norm": 1.0917557655286239, "learning_rate": 1.0122350881036081e-07, "loss": 0.7641, "step": 10677 }, { "epoch": 0.9561139403436119, "grad_norm": 0.9306129744003709, "learning_rate": 1.0081230643741114e-07, "loss": 0.7618, "step": 10678 }, { "epoch": 0.9562034808886899, "grad_norm": 0.947991085683517, "learning_rate": 1.0040193675187271e-07, "loss": 0.8156, "step": 10679 }, { "epoch": 0.9562930214337679, "grad_norm": 0.8929510145425168, "learning_rate": 9.999239978826459e-08, "loss": 0.8067, "step": 10680 }, { "epoch": 0.9563825619788461, "grad_norm": 0.9904437917492505, "learning_rate": 9.958369558103254e-08, "loss": 0.8349, "step": 10681 }, { "epoch": 0.9564721025239241, "grad_norm": 1.0777655000349118, "learning_rate": 9.917582416455796e-08, "loss": 0.801, "step": 10682 }, { "epoch": 0.9565616430690022, "grad_norm": 0.9766062588971706, "learning_rate": 9.876878557315006e-08, "loss": 0.7447, "step": 10683 }, { "epoch": 0.9566511836140803, "grad_norm": 0.9938630049899553, "learning_rate": 9.836257984104591e-08, "loss": 0.8133, "step": 10684 }, { "epoch": 0.9567407241591583, "grad_norm": 1.0011471936749594, "learning_rate": 9.795720700241595e-08, "loss": 0.8146, "step": 10685 }, { "epoch": 0.9568302647042364, "grad_norm": 1.178253758047852, "learning_rate": 9.755266709135736e-08, "loss": 0.8414, "step": 10686 }, { "epoch": 0.9569198052493144, "grad_norm": 1.02579160710646, "learning_rate": 9.714896014189845e-08, "loss": 0.8141, "step": 10687 }, { "epoch": 0.9570093457943926, "grad_norm": 0.915020117213658, "learning_rate": 9.674608618799986e-08, "loss": 0.813, "step": 10688 }, { "epoch": 0.9570988863394706, "grad_norm": 1.0283961868037617, "learning_rate": 9.634404526354779e-08, "loss": 0.7973, "step": 10689 }, { "epoch": 0.9571884268845486, "grad_norm": 1.0842826273719446, "learning_rate": 9.594283740236187e-08, "loss": 0.8192, "step": 10690 }, { "epoch": 0.9572779674296267, "grad_norm": 1.0530430326430122, "learning_rate": 9.554246263819067e-08, "loss": 0.8001, "step": 10691 }, { "epoch": 0.9573675079747048, "grad_norm": 1.040726874036281, "learning_rate": 9.514292100471056e-08, "loss": 0.799, "step": 10692 }, { "epoch": 0.9574570485197829, "grad_norm": 0.996005404353338, "learning_rate": 9.474421253553135e-08, "loss": 0.8194, "step": 10693 }, { "epoch": 0.9575465890648609, "grad_norm": 1.048512735214228, "learning_rate": 9.434633726419173e-08, "loss": 0.8274, "step": 10694 }, { "epoch": 0.957636129609939, "grad_norm": 1.0184722854961523, "learning_rate": 9.394929522415719e-08, "loss": 0.8506, "step": 10695 }, { "epoch": 0.9577256701550171, "grad_norm": 0.9435227092023951, "learning_rate": 9.355308644882877e-08, "loss": 0.7942, "step": 10696 }, { "epoch": 0.9578152107000951, "grad_norm": 0.8663202384694958, "learning_rate": 9.315771097153092e-08, "loss": 0.8017, "step": 10697 }, { "epoch": 0.9579047512451732, "grad_norm": 0.936533555532817, "learning_rate": 9.276316882552372e-08, "loss": 0.7981, "step": 10698 }, { "epoch": 0.9579942917902513, "grad_norm": 1.033619364141493, "learning_rate": 9.236946004399394e-08, "loss": 0.8165, "step": 10699 }, { "epoch": 0.9580838323353293, "grad_norm": 0.9672780354091469, "learning_rate": 9.197658466005843e-08, "loss": 0.834, "step": 10700 }, { "epoch": 0.9581733728804074, "grad_norm": 1.1331011114076677, "learning_rate": 9.15845427067663e-08, "loss": 0.8162, "step": 10701 }, { "epoch": 0.9582629134254855, "grad_norm": 0.909364142416958, "learning_rate": 9.119333421709341e-08, "loss": 0.8013, "step": 10702 }, { "epoch": 0.9583524539705636, "grad_norm": 1.0545715700275688, "learning_rate": 9.080295922394788e-08, "loss": 0.8256, "step": 10703 }, { "epoch": 0.9584419945156416, "grad_norm": 1.120573983545074, "learning_rate": 9.041341776016565e-08, "loss": 0.7923, "step": 10704 }, { "epoch": 0.9585315350607196, "grad_norm": 1.0474368437261667, "learning_rate": 9.002470985851386e-08, "loss": 0.8394, "step": 10705 }, { "epoch": 0.9586210756057978, "grad_norm": 1.0291204732826473, "learning_rate": 8.963683555169078e-08, "loss": 0.8066, "step": 10706 }, { "epoch": 0.9587106161508758, "grad_norm": 1.0311381891936429, "learning_rate": 8.924979487232255e-08, "loss": 0.7868, "step": 10707 }, { "epoch": 0.9588001566959539, "grad_norm": 1.0105482206935334, "learning_rate": 8.886358785296423e-08, "loss": 0.8091, "step": 10708 }, { "epoch": 0.9588896972410319, "grad_norm": 1.0188566167049309, "learning_rate": 8.847821452610316e-08, "loss": 0.7826, "step": 10709 }, { "epoch": 0.95897923778611, "grad_norm": 1.01137759379712, "learning_rate": 8.809367492415677e-08, "loss": 0.8001, "step": 10710 }, { "epoch": 0.9590687783311881, "grad_norm": 1.023838869857259, "learning_rate": 8.770996907947027e-08, "loss": 0.8211, "step": 10711 }, { "epoch": 0.9591583188762661, "grad_norm": 0.9887333676523925, "learning_rate": 8.732709702432007e-08, "loss": 0.8212, "step": 10712 }, { "epoch": 0.9592478594213443, "grad_norm": 1.0600232572798522, "learning_rate": 8.694505879091263e-08, "loss": 0.7595, "step": 10713 }, { "epoch": 0.9593373999664223, "grad_norm": 1.0856134361660486, "learning_rate": 8.656385441138227e-08, "loss": 0.8203, "step": 10714 }, { "epoch": 0.9594269405115003, "grad_norm": 0.9167649652956195, "learning_rate": 8.618348391779551e-08, "loss": 0.8441, "step": 10715 }, { "epoch": 0.9595164810565784, "grad_norm": 1.0070769256538696, "learning_rate": 8.580394734214792e-08, "loss": 0.7378, "step": 10716 }, { "epoch": 0.9596060216016565, "grad_norm": 0.9868131879287869, "learning_rate": 8.542524471636504e-08, "loss": 0.7931, "step": 10717 }, { "epoch": 0.9596955621467346, "grad_norm": 0.9758084856321736, "learning_rate": 8.504737607230252e-08, "loss": 0.8303, "step": 10718 }, { "epoch": 0.9597851026918126, "grad_norm": 0.9630950799962955, "learning_rate": 8.467034144174491e-08, "loss": 0.7638, "step": 10719 }, { "epoch": 0.9598746432368908, "grad_norm": 1.1283929233516516, "learning_rate": 8.429414085640574e-08, "loss": 0.8005, "step": 10720 }, { "epoch": 0.9599641837819688, "grad_norm": 0.9379747067637351, "learning_rate": 8.391877434793194e-08, "loss": 0.8308, "step": 10721 }, { "epoch": 0.9600537243270468, "grad_norm": 1.0430526069037074, "learning_rate": 8.354424194789713e-08, "loss": 0.8318, "step": 10722 }, { "epoch": 0.9601432648721249, "grad_norm": 0.9721217077337018, "learning_rate": 8.317054368780497e-08, "loss": 0.8205, "step": 10723 }, { "epoch": 0.960232805417203, "grad_norm": 1.1012463343682937, "learning_rate": 8.279767959909257e-08, "loss": 0.7872, "step": 10724 }, { "epoch": 0.960322345962281, "grad_norm": 1.1032419540099363, "learning_rate": 8.24256497131204e-08, "loss": 0.8437, "step": 10725 }, { "epoch": 0.9604118865073591, "grad_norm": 1.006673084550879, "learning_rate": 8.20544540611845e-08, "loss": 0.7575, "step": 10726 }, { "epoch": 0.9605014270524371, "grad_norm": 0.9939876509058281, "learning_rate": 8.168409267450883e-08, "loss": 0.8057, "step": 10727 }, { "epoch": 0.9605909675975153, "grad_norm": 1.002189372674301, "learning_rate": 8.131456558424622e-08, "loss": 0.7992, "step": 10728 }, { "epoch": 0.9606805081425933, "grad_norm": 1.0878673719911007, "learning_rate": 8.09458728214807e-08, "loss": 0.7513, "step": 10729 }, { "epoch": 0.9607700486876714, "grad_norm": 0.9334721931125108, "learning_rate": 8.057801441722524e-08, "loss": 0.788, "step": 10730 }, { "epoch": 0.9608595892327495, "grad_norm": 1.0131028166238258, "learning_rate": 8.021099040242175e-08, "loss": 0.8117, "step": 10731 }, { "epoch": 0.9609491297778275, "grad_norm": 0.9530102521647551, "learning_rate": 7.984480080794443e-08, "loss": 0.793, "step": 10732 }, { "epoch": 0.9610386703229056, "grad_norm": 0.9016752457452283, "learning_rate": 7.947944566459532e-08, "loss": 0.7445, "step": 10733 }, { "epoch": 0.9611282108679836, "grad_norm": 0.972612932130089, "learning_rate": 7.911492500310758e-08, "loss": 0.7652, "step": 10734 }, { "epoch": 0.9612177514130618, "grad_norm": 1.0101671197354496, "learning_rate": 7.875123885414337e-08, "loss": 0.7746, "step": 10735 }, { "epoch": 0.9613072919581398, "grad_norm": 1.0569272783740187, "learning_rate": 7.83883872482949e-08, "loss": 0.8063, "step": 10736 }, { "epoch": 0.9613968325032178, "grad_norm": 0.887543913209278, "learning_rate": 7.802637021608329e-08, "loss": 0.8054, "step": 10737 }, { "epoch": 0.961486373048296, "grad_norm": 0.9589668848409285, "learning_rate": 7.766518778795972e-08, "loss": 0.7705, "step": 10738 }, { "epoch": 0.961575913593374, "grad_norm": 0.9177599437614837, "learning_rate": 7.730483999430661e-08, "loss": 0.811, "step": 10739 }, { "epoch": 0.9616654541384521, "grad_norm": 0.9873938517712042, "learning_rate": 7.694532686543632e-08, "loss": 0.8221, "step": 10740 }, { "epoch": 0.9617549946835301, "grad_norm": 1.1130843923784588, "learning_rate": 7.658664843158914e-08, "loss": 0.7601, "step": 10741 }, { "epoch": 0.9618445352286082, "grad_norm": 1.0127411071774635, "learning_rate": 7.622880472293537e-08, "loss": 0.7857, "step": 10742 }, { "epoch": 0.9619340757736863, "grad_norm": 0.9832283983248711, "learning_rate": 7.587179576957537e-08, "loss": 0.8982, "step": 10743 }, { "epoch": 0.9620236163187643, "grad_norm": 1.0562070421441245, "learning_rate": 7.551562160153957e-08, "loss": 0.8803, "step": 10744 }, { "epoch": 0.9621131568638424, "grad_norm": 0.9007844840785783, "learning_rate": 7.516028224878957e-08, "loss": 0.7833, "step": 10745 }, { "epoch": 0.9622026974089205, "grad_norm": 0.8696844899468402, "learning_rate": 7.480577774121478e-08, "loss": 0.7702, "step": 10746 }, { "epoch": 0.9622922379539985, "grad_norm": 0.9495309562581002, "learning_rate": 7.445210810863357e-08, "loss": 0.8133, "step": 10747 }, { "epoch": 0.9623817784990766, "grad_norm": 1.11055758773311, "learning_rate": 7.40992733807977e-08, "loss": 0.7542, "step": 10748 }, { "epoch": 0.9624713190441547, "grad_norm": 0.9524018960485578, "learning_rate": 7.374727358738454e-08, "loss": 0.8098, "step": 10749 }, { "epoch": 0.9625608595892328, "grad_norm": 1.0183370538093015, "learning_rate": 7.339610875800485e-08, "loss": 0.8367, "step": 10750 }, { "epoch": 0.9626504001343108, "grad_norm": 0.9102362230603674, "learning_rate": 7.30457789221961e-08, "loss": 0.7889, "step": 10751 }, { "epoch": 0.9627399406793888, "grad_norm": 0.9700603445062228, "learning_rate": 7.269628410942808e-08, "loss": 0.7416, "step": 10752 }, { "epoch": 0.962829481224467, "grad_norm": 1.021629753837941, "learning_rate": 7.234762434909725e-08, "loss": 0.8563, "step": 10753 }, { "epoch": 0.962919021769545, "grad_norm": 0.9728270123423797, "learning_rate": 7.199979967053461e-08, "loss": 0.7743, "step": 10754 }, { "epoch": 0.9630085623146231, "grad_norm": 1.0251613038647325, "learning_rate": 7.165281010299452e-08, "loss": 0.725, "step": 10755 }, { "epoch": 0.9630981028597012, "grad_norm": 1.0684647688296331, "learning_rate": 7.130665567566808e-08, "loss": 0.8202, "step": 10756 }, { "epoch": 0.9631876434047792, "grad_norm": 1.00897035927911, "learning_rate": 7.096133641767088e-08, "loss": 0.7555, "step": 10757 }, { "epoch": 0.9632771839498573, "grad_norm": 0.9735896219928971, "learning_rate": 7.061685235804972e-08, "loss": 0.7934, "step": 10758 }, { "epoch": 0.9633667244949353, "grad_norm": 0.9471928711425164, "learning_rate": 7.027320352578137e-08, "loss": 0.74, "step": 10759 }, { "epoch": 0.9634562650400135, "grad_norm": 0.9021493190171377, "learning_rate": 6.993038994977386e-08, "loss": 0.7347, "step": 10760 }, { "epoch": 0.9635458055850915, "grad_norm": 0.9787628717453836, "learning_rate": 6.958841165886299e-08, "loss": 0.7287, "step": 10761 }, { "epoch": 0.9636353461301695, "grad_norm": 0.9771052321076897, "learning_rate": 6.924726868181464e-08, "loss": 0.8171, "step": 10762 }, { "epoch": 0.9637248866752476, "grad_norm": 0.9818512616284012, "learning_rate": 6.890696104732475e-08, "loss": 0.7583, "step": 10763 }, { "epoch": 0.9638144272203257, "grad_norm": 0.9653019922177514, "learning_rate": 6.85674887840182e-08, "loss": 0.7692, "step": 10764 }, { "epoch": 0.9639039677654038, "grad_norm": 1.0539220449500888, "learning_rate": 6.822885192045215e-08, "loss": 0.7901, "step": 10765 }, { "epoch": 0.9639935083104818, "grad_norm": 0.984672347571423, "learning_rate": 6.789105048510935e-08, "loss": 0.7769, "step": 10766 }, { "epoch": 0.96408304885556, "grad_norm": 1.078862941483327, "learning_rate": 6.755408450640599e-08, "loss": 0.7894, "step": 10767 }, { "epoch": 0.964172589400638, "grad_norm": 1.14397365618563, "learning_rate": 6.721795401268493e-08, "loss": 0.8711, "step": 10768 }, { "epoch": 0.964262129945716, "grad_norm": 1.10631340995849, "learning_rate": 6.688265903222247e-08, "loss": 0.8284, "step": 10769 }, { "epoch": 0.9643516704907941, "grad_norm": 0.9255328016932481, "learning_rate": 6.654819959322268e-08, "loss": 0.7704, "step": 10770 }, { "epoch": 0.9644412110358722, "grad_norm": 1.0169822620107298, "learning_rate": 6.62145757238164e-08, "loss": 0.8035, "step": 10771 }, { "epoch": 0.9645307515809503, "grad_norm": 0.9470005735548787, "learning_rate": 6.588178745207008e-08, "loss": 0.7842, "step": 10772 }, { "epoch": 0.9646202921260283, "grad_norm": 0.9791359742993925, "learning_rate": 6.554983480597576e-08, "loss": 0.8205, "step": 10773 }, { "epoch": 0.9647098326711064, "grad_norm": 0.9686201893409735, "learning_rate": 6.521871781345446e-08, "loss": 0.7997, "step": 10774 }, { "epoch": 0.9647993732161845, "grad_norm": 1.1272040213586003, "learning_rate": 6.488843650236054e-08, "loss": 0.8078, "step": 10775 }, { "epoch": 0.9648889137612625, "grad_norm": 0.9855648531827726, "learning_rate": 6.455899090047623e-08, "loss": 0.8036, "step": 10776 }, { "epoch": 0.9649784543063405, "grad_norm": 0.9400190269580695, "learning_rate": 6.42303810355116e-08, "loss": 0.7537, "step": 10777 }, { "epoch": 0.9650679948514187, "grad_norm": 1.0167767901886637, "learning_rate": 6.390260693511119e-08, "loss": 0.7952, "step": 10778 }, { "epoch": 0.9651575353964967, "grad_norm": 1.0092617018081764, "learning_rate": 6.357566862684406e-08, "loss": 0.7748, "step": 10779 }, { "epoch": 0.9652470759415748, "grad_norm": 0.8930946494348282, "learning_rate": 6.324956613821153e-08, "loss": 0.7548, "step": 10780 }, { "epoch": 0.9653366164866528, "grad_norm": 1.06087981184842, "learning_rate": 6.292429949664502e-08, "loss": 0.7988, "step": 10781 }, { "epoch": 0.965426157031731, "grad_norm": 0.8781480498745912, "learning_rate": 6.259986872950485e-08, "loss": 0.7879, "step": 10782 }, { "epoch": 0.965515697576809, "grad_norm": 1.063333256649386, "learning_rate": 6.227627386408031e-08, "loss": 0.7479, "step": 10783 }, { "epoch": 0.965605238121887, "grad_norm": 1.0951482394660654, "learning_rate": 6.195351492759183e-08, "loss": 0.8082, "step": 10784 }, { "epoch": 0.9656947786669652, "grad_norm": 1.0381692512860752, "learning_rate": 6.16315919471877e-08, "loss": 0.7676, "step": 10785 }, { "epoch": 0.9657843192120432, "grad_norm": 0.9260969546095864, "learning_rate": 6.13105049499474e-08, "loss": 0.8167, "step": 10786 }, { "epoch": 0.9658738597571213, "grad_norm": 1.0479636750775732, "learning_rate": 6.09902539628815e-08, "loss": 0.7742, "step": 10787 }, { "epoch": 0.9659634003021993, "grad_norm": 1.2213792251467075, "learning_rate": 6.067083901292625e-08, "loss": 0.7547, "step": 10788 }, { "epoch": 0.9660529408472774, "grad_norm": 0.9671806654773634, "learning_rate": 6.035226012695239e-08, "loss": 0.8019, "step": 10789 }, { "epoch": 0.9661424813923555, "grad_norm": 1.0582673159394054, "learning_rate": 6.003451733175402e-08, "loss": 0.7623, "step": 10790 }, { "epoch": 0.9662320219374335, "grad_norm": 0.9099283361199353, "learning_rate": 5.971761065406201e-08, "loss": 0.8098, "step": 10791 }, { "epoch": 0.9663215624825117, "grad_norm": 0.9406632993251882, "learning_rate": 5.9401540120531674e-08, "loss": 0.776, "step": 10792 }, { "epoch": 0.9664111030275897, "grad_norm": 0.9518705821517812, "learning_rate": 5.908630575774954e-08, "loss": 0.7892, "step": 10793 }, { "epoch": 0.9665006435726677, "grad_norm": 0.9996671578251665, "learning_rate": 5.877190759223328e-08, "loss": 0.8284, "step": 10794 }, { "epoch": 0.9665901841177458, "grad_norm": 0.984071282205227, "learning_rate": 5.8458345650429513e-08, "loss": 0.7692, "step": 10795 }, { "epoch": 0.9666797246628239, "grad_norm": 0.9322092240780453, "learning_rate": 5.8145619958712704e-08, "loss": 0.8146, "step": 10796 }, { "epoch": 0.966769265207902, "grad_norm": 1.0359515777335124, "learning_rate": 5.783373054338848e-08, "loss": 0.8068, "step": 10797 }, { "epoch": 0.96685880575298, "grad_norm": 0.9987766199450244, "learning_rate": 5.7522677430691396e-08, "loss": 0.807, "step": 10798 }, { "epoch": 0.966948346298058, "grad_norm": 0.9553711570893652, "learning_rate": 5.7212460646788314e-08, "loss": 0.7175, "step": 10799 }, { "epoch": 0.9670378868431362, "grad_norm": 0.9886142039827353, "learning_rate": 5.69030802177728e-08, "loss": 0.7791, "step": 10800 }, { "epoch": 0.9671274273882142, "grad_norm": 0.9550426514146412, "learning_rate": 5.659453616966737e-08, "loss": 0.7917, "step": 10801 }, { "epoch": 0.9672169679332923, "grad_norm": 1.0574861568027605, "learning_rate": 5.628682852842793e-08, "loss": 0.8289, "step": 10802 }, { "epoch": 0.9673065084783704, "grad_norm": 1.0528018845226228, "learning_rate": 5.5979957319935996e-08, "loss": 0.7745, "step": 10803 }, { "epoch": 0.9673960490234484, "grad_norm": 0.9371964017601132, "learning_rate": 5.5673922570006475e-08, "loss": 0.7556, "step": 10804 }, { "epoch": 0.9674855895685265, "grad_norm": 0.970024857904332, "learning_rate": 5.5368724304379896e-08, "loss": 0.7752, "step": 10805 }, { "epoch": 0.9675751301136045, "grad_norm": 0.9354010024366369, "learning_rate": 5.506436254873016e-08, "loss": 0.7994, "step": 10806 }, { "epoch": 0.9676646706586827, "grad_norm": 1.114516516099089, "learning_rate": 5.4760837328659e-08, "loss": 0.7858, "step": 10807 }, { "epoch": 0.9677542112037607, "grad_norm": 0.9980220188459672, "learning_rate": 5.445814866969712e-08, "loss": 0.7997, "step": 10808 }, { "epoch": 0.9678437517488387, "grad_norm": 0.9904212513112618, "learning_rate": 5.4156296597306366e-08, "loss": 0.8071, "step": 10809 }, { "epoch": 0.9679332922939169, "grad_norm": 1.0247963569767937, "learning_rate": 5.385528113687755e-08, "loss": 0.7907, "step": 10810 }, { "epoch": 0.9680228328389949, "grad_norm": 0.8767082477989512, "learning_rate": 5.35551023137304e-08, "loss": 0.7996, "step": 10811 }, { "epoch": 0.968112373384073, "grad_norm": 1.116565585243958, "learning_rate": 5.325576015311584e-08, "loss": 0.8162, "step": 10812 }, { "epoch": 0.968201913929151, "grad_norm": 1.006974753004568, "learning_rate": 5.295725468021373e-08, "loss": 0.7997, "step": 10813 }, { "epoch": 0.9682914544742292, "grad_norm": 1.0026658797022288, "learning_rate": 5.2659585920131765e-08, "loss": 0.813, "step": 10814 }, { "epoch": 0.9683809950193072, "grad_norm": 0.903062581312795, "learning_rate": 5.2362753897911015e-08, "loss": 0.7584, "step": 10815 }, { "epoch": 0.9684705355643852, "grad_norm": 1.011843672505144, "learning_rate": 5.206675863851818e-08, "loss": 0.8096, "step": 10816 }, { "epoch": 0.9685600761094633, "grad_norm": 1.2486023911472193, "learning_rate": 5.177160016685334e-08, "loss": 0.7996, "step": 10817 }, { "epoch": 0.9686496166545414, "grad_norm": 0.9954910158165448, "learning_rate": 5.14772785077422e-08, "loss": 0.8332, "step": 10818 }, { "epoch": 0.9687391571996194, "grad_norm": 1.110040433154663, "learning_rate": 5.118379368594384e-08, "loss": 0.7948, "step": 10819 }, { "epoch": 0.9688286977446975, "grad_norm": 1.429613524166745, "learning_rate": 5.0891145726144066e-08, "loss": 0.8149, "step": 10820 }, { "epoch": 0.9689182382897756, "grad_norm": 0.9781741424716999, "learning_rate": 5.0599334652959854e-08, "loss": 0.8404, "step": 10821 }, { "epoch": 0.9690077788348537, "grad_norm": 1.2503484822645612, "learning_rate": 5.0308360490937125e-08, "loss": 0.7868, "step": 10822 }, { "epoch": 0.9690973193799317, "grad_norm": 1.277293310630612, "learning_rate": 5.0018223264552967e-08, "loss": 0.8009, "step": 10823 }, { "epoch": 0.9691868599250097, "grad_norm": 1.1525367241737872, "learning_rate": 4.972892299821119e-08, "loss": 0.7799, "step": 10824 }, { "epoch": 0.9692764004700879, "grad_norm": 0.9834685864305659, "learning_rate": 4.944045971624678e-08, "loss": 0.7469, "step": 10825 }, { "epoch": 0.9693659410151659, "grad_norm": 1.1050614090312476, "learning_rate": 4.9152833442925876e-08, "loss": 0.8154, "step": 10826 }, { "epoch": 0.969455481560244, "grad_norm": 1.0580024624141107, "learning_rate": 4.886604420244245e-08, "loss": 0.7871, "step": 10827 }, { "epoch": 0.9695450221053221, "grad_norm": 0.9532963638373693, "learning_rate": 4.8580092018918334e-08, "loss": 0.7736, "step": 10828 }, { "epoch": 0.9696345626504002, "grad_norm": 0.951326407840951, "learning_rate": 4.829497691640872e-08, "loss": 0.7833, "step": 10829 }, { "epoch": 0.9697241031954782, "grad_norm": 0.9375876146204878, "learning_rate": 4.8010698918895535e-08, "loss": 0.7633, "step": 10830 }, { "epoch": 0.9698136437405562, "grad_norm": 0.9351884901397687, "learning_rate": 4.772725805029188e-08, "loss": 0.8129, "step": 10831 }, { "epoch": 0.9699031842856344, "grad_norm": 0.8931821379813093, "learning_rate": 4.744465433443979e-08, "loss": 0.7591, "step": 10832 }, { "epoch": 0.9699927248307124, "grad_norm": 1.0224944716172502, "learning_rate": 4.7162887795111353e-08, "loss": 0.8174, "step": 10833 }, { "epoch": 0.9700822653757905, "grad_norm": 1.8390359022996827, "learning_rate": 4.688195845600763e-08, "loss": 0.7824, "step": 10834 }, { "epoch": 0.9701718059208685, "grad_norm": 1.0301385948422506, "learning_rate": 4.660186634075858e-08, "loss": 0.815, "step": 10835 }, { "epoch": 0.9702613464659466, "grad_norm": 1.0445084759442553, "learning_rate": 4.6322611472925383e-08, "loss": 0.852, "step": 10836 }, { "epoch": 0.9703508870110247, "grad_norm": 1.0145575960527322, "learning_rate": 4.604419387599812e-08, "loss": 0.7771, "step": 10837 }, { "epoch": 0.9704404275561027, "grad_norm": 1.012950325419236, "learning_rate": 4.5766613573396956e-08, "loss": 0.8043, "step": 10838 }, { "epoch": 0.9705299681011809, "grad_norm": 1.053777496774982, "learning_rate": 4.548987058846988e-08, "loss": 0.7762, "step": 10839 }, { "epoch": 0.9706195086462589, "grad_norm": 0.9808388785012142, "learning_rate": 4.521396494449604e-08, "loss": 0.8047, "step": 10840 }, { "epoch": 0.9707090491913369, "grad_norm": 0.9736352042078391, "learning_rate": 4.493889666468354e-08, "loss": 0.8456, "step": 10841 }, { "epoch": 0.970798589736415, "grad_norm": 1.0525845701416303, "learning_rate": 4.4664665772170547e-08, "loss": 0.8311, "step": 10842 }, { "epoch": 0.9708881302814931, "grad_norm": 1.0756805797050957, "learning_rate": 4.439127229002416e-08, "loss": 0.8258, "step": 10843 }, { "epoch": 0.9709776708265712, "grad_norm": 0.93351865202235, "learning_rate": 4.411871624124264e-08, "loss": 0.7949, "step": 10844 }, { "epoch": 0.9710672113716492, "grad_norm": 0.9888040629321048, "learning_rate": 4.3846997648751e-08, "loss": 0.7613, "step": 10845 }, { "epoch": 0.9711567519167273, "grad_norm": 1.0388408064091816, "learning_rate": 4.3576116535405387e-08, "loss": 0.8137, "step": 10846 }, { "epoch": 0.9712462924618054, "grad_norm": 1.1335329691142393, "learning_rate": 4.3306072923990914e-08, "loss": 0.8336, "step": 10847 }, { "epoch": 0.9713358330068834, "grad_norm": 1.0229264833433342, "learning_rate": 4.303686683722497e-08, "loss": 0.8153, "step": 10848 }, { "epoch": 0.9714253735519615, "grad_norm": 0.8987098264269682, "learning_rate": 4.276849829775165e-08, "loss": 0.8302, "step": 10849 }, { "epoch": 0.9715149140970396, "grad_norm": 1.0170094464792827, "learning_rate": 4.2500967328142904e-08, "loss": 0.782, "step": 10850 }, { "epoch": 0.9716044546421176, "grad_norm": 1.1266490167829915, "learning_rate": 4.223427395090518e-08, "loss": 0.7498, "step": 10851 }, { "epoch": 0.9716939951871957, "grad_norm": 0.958345580669655, "learning_rate": 4.1968418188470525e-08, "loss": 0.8101, "step": 10852 }, { "epoch": 0.9717835357322737, "grad_norm": 1.0599976204903112, "learning_rate": 4.170340006320217e-08, "loss": 0.8593, "step": 10853 }, { "epoch": 0.9718730762773519, "grad_norm": 0.8819897464838972, "learning_rate": 4.143921959739339e-08, "loss": 0.8116, "step": 10854 }, { "epoch": 0.9719626168224299, "grad_norm": 0.937699032394238, "learning_rate": 4.1175876813265295e-08, "loss": 0.7696, "step": 10855 }, { "epoch": 0.9720521573675079, "grad_norm": 0.9799334135639653, "learning_rate": 4.0913371732969055e-08, "loss": 0.8072, "step": 10856 }, { "epoch": 0.9721416979125861, "grad_norm": 0.9796239606188051, "learning_rate": 4.065170437858701e-08, "loss": 0.8262, "step": 10857 }, { "epoch": 0.9722312384576641, "grad_norm": 1.0430276111872807, "learning_rate": 4.0390874772128216e-08, "loss": 0.821, "step": 10858 }, { "epoch": 0.9723207790027422, "grad_norm": 1.0868197667135415, "learning_rate": 4.0130882935532914e-08, "loss": 0.7982, "step": 10859 }, { "epoch": 0.9724103195478202, "grad_norm": 0.9526364002645701, "learning_rate": 3.987172889067359e-08, "loss": 0.7877, "step": 10860 }, { "epoch": 0.9724998600928983, "grad_norm": 1.0347415758233793, "learning_rate": 3.9613412659346154e-08, "loss": 0.7985, "step": 10861 }, { "epoch": 0.9725894006379764, "grad_norm": 0.9344835938203859, "learning_rate": 3.935593426327988e-08, "loss": 0.7868, "step": 10862 }, { "epoch": 0.9726789411830544, "grad_norm": 1.110291548457916, "learning_rate": 3.909929372413413e-08, "loss": 0.8187, "step": 10863 }, { "epoch": 0.9727684817281326, "grad_norm": 1.0301198424078752, "learning_rate": 3.884349106349716e-08, "loss": 0.8085, "step": 10864 }, { "epoch": 0.9728580222732106, "grad_norm": 1.0098307129498267, "learning_rate": 3.858852630288401e-08, "loss": 0.8005, "step": 10865 }, { "epoch": 0.9729475628182886, "grad_norm": 0.9335698018717342, "learning_rate": 3.8334399463743063e-08, "loss": 0.7949, "step": 10866 }, { "epoch": 0.9730371033633667, "grad_norm": 1.0809478207144838, "learning_rate": 3.808111056745056e-08, "loss": 0.7716, "step": 10867 }, { "epoch": 0.9731266439084448, "grad_norm": 1.0465248748184246, "learning_rate": 3.7828659635311683e-08, "loss": 0.799, "step": 10868 }, { "epoch": 0.9732161844535229, "grad_norm": 0.8643911985209095, "learning_rate": 3.7577046688562765e-08, "loss": 0.7609, "step": 10869 }, { "epoch": 0.9733057249986009, "grad_norm": 0.9826457685941364, "learning_rate": 3.7326271748368005e-08, "loss": 0.8705, "step": 10870 }, { "epoch": 0.973395265543679, "grad_norm": 0.9156518439591341, "learning_rate": 3.707633483582163e-08, "loss": 0.7889, "step": 10871 }, { "epoch": 0.9734848060887571, "grad_norm": 0.9564442721887093, "learning_rate": 3.682723597194793e-08, "loss": 0.7797, "step": 10872 }, { "epoch": 0.9735743466338351, "grad_norm": 0.9847541797183128, "learning_rate": 3.657897517770015e-08, "loss": 0.8664, "step": 10873 }, { "epoch": 0.9736638871789132, "grad_norm": 1.092664361711944, "learning_rate": 3.6331552473960475e-08, "loss": 0.7621, "step": 10874 }, { "epoch": 0.9737534277239913, "grad_norm": 1.0380940451746594, "learning_rate": 3.6084967881542255e-08, "loss": 0.7994, "step": 10875 }, { "epoch": 0.9738429682690694, "grad_norm": 0.9540290425948933, "learning_rate": 3.5839221421187783e-08, "loss": 0.7876, "step": 10876 }, { "epoch": 0.9739325088141474, "grad_norm": 0.9305595497566058, "learning_rate": 3.5594313113567204e-08, "loss": 0.7615, "step": 10877 }, { "epoch": 0.9740220493592254, "grad_norm": 0.9612148170748019, "learning_rate": 3.53502429792818e-08, "loss": 0.7715, "step": 10878 }, { "epoch": 0.9741115899043036, "grad_norm": 1.060420905037074, "learning_rate": 3.510701103886183e-08, "loss": 0.8097, "step": 10879 }, { "epoch": 0.9742011304493816, "grad_norm": 0.9248796308503917, "learning_rate": 3.486461731276869e-08, "loss": 0.7496, "step": 10880 }, { "epoch": 0.9742906709944597, "grad_norm": 1.0252440774836882, "learning_rate": 3.4623061821389417e-08, "loss": 0.8469, "step": 10881 }, { "epoch": 0.9743802115395378, "grad_norm": 1.128621839803474, "learning_rate": 3.438234458504441e-08, "loss": 0.815, "step": 10882 }, { "epoch": 0.9744697520846158, "grad_norm": 0.931926578307456, "learning_rate": 3.4142465623980825e-08, "loss": 0.7584, "step": 10883 }, { "epoch": 0.9745592926296939, "grad_norm": 0.9220423625907225, "learning_rate": 3.390342495837806e-08, "loss": 0.7887, "step": 10884 }, { "epoch": 0.9746488331747719, "grad_norm": 0.8478271567160813, "learning_rate": 3.366522260834226e-08, "loss": 0.7425, "step": 10885 }, { "epoch": 0.9747383737198501, "grad_norm": 0.9860442092337542, "learning_rate": 3.342785859391073e-08, "loss": 0.788, "step": 10886 }, { "epoch": 0.9748279142649281, "grad_norm": 1.0907144225169243, "learning_rate": 3.3191332935050837e-08, "loss": 0.7783, "step": 10887 }, { "epoch": 0.9749174548100061, "grad_norm": 1.0742627107821257, "learning_rate": 3.2955645651655544e-08, "loss": 0.8355, "step": 10888 }, { "epoch": 0.9750069953550842, "grad_norm": 1.0046721141989614, "learning_rate": 3.272079676355233e-08, "loss": 0.8343, "step": 10889 }, { "epoch": 0.9750965359001623, "grad_norm": 1.020325117524468, "learning_rate": 3.2486786290494287e-08, "loss": 0.7647, "step": 10890 }, { "epoch": 0.9751860764452404, "grad_norm": 0.897369159065843, "learning_rate": 3.2253614252167884e-08, "loss": 0.797, "step": 10891 }, { "epoch": 0.9752756169903184, "grad_norm": 0.9703177332404401, "learning_rate": 3.202128066818522e-08, "loss": 0.7985, "step": 10892 }, { "epoch": 0.9753651575353965, "grad_norm": 1.0692641174684516, "learning_rate": 3.178978555808954e-08, "loss": 0.788, "step": 10893 }, { "epoch": 0.9754546980804746, "grad_norm": 1.0525563316038185, "learning_rate": 3.155912894135304e-08, "loss": 0.8774, "step": 10894 }, { "epoch": 0.9755442386255526, "grad_norm": 1.0532747830822367, "learning_rate": 3.132931083737911e-08, "loss": 0.8092, "step": 10895 }, { "epoch": 0.9756337791706307, "grad_norm": 1.0268324065995567, "learning_rate": 3.110033126549894e-08, "loss": 0.8093, "step": 10896 }, { "epoch": 0.9757233197157088, "grad_norm": 0.9689220458155984, "learning_rate": 3.0872190244972676e-08, "loss": 0.81, "step": 10897 }, { "epoch": 0.9758128602607868, "grad_norm": 0.8758630710981218, "learning_rate": 3.064488779499164e-08, "loss": 0.7614, "step": 10898 }, { "epoch": 0.9759024008058649, "grad_norm": 0.942146098434114, "learning_rate": 3.041842393467609e-08, "loss": 0.7905, "step": 10899 }, { "epoch": 0.975991941350943, "grad_norm": 0.8864566659068037, "learning_rate": 3.019279868307412e-08, "loss": 0.7726, "step": 10900 }, { "epoch": 0.9760814818960211, "grad_norm": 1.0591996455307302, "learning_rate": 2.9968012059163886e-08, "loss": 0.7742, "step": 10901 }, { "epoch": 0.9761710224410991, "grad_norm": 0.8732837357472804, "learning_rate": 2.974406408185693e-08, "loss": 0.7276, "step": 10902 }, { "epoch": 0.9762605629861771, "grad_norm": 0.9759263803764883, "learning_rate": 2.9520954769988176e-08, "loss": 0.8225, "step": 10903 }, { "epoch": 0.9763501035312553, "grad_norm": 1.0239633000084492, "learning_rate": 2.929868414232706e-08, "loss": 0.7708, "step": 10904 }, { "epoch": 0.9764396440763333, "grad_norm": 1.0095945091621976, "learning_rate": 2.907725221756863e-08, "loss": 0.7987, "step": 10905 }, { "epoch": 0.9765291846214114, "grad_norm": 1.0287527367881886, "learning_rate": 2.8856659014339095e-08, "loss": 0.8576, "step": 10906 }, { "epoch": 0.9766187251664894, "grad_norm": 1.0216037843003316, "learning_rate": 2.863690455119361e-08, "loss": 0.8174, "step": 10907 }, { "epoch": 0.9767082657115675, "grad_norm": 1.0253402093837434, "learning_rate": 2.8417988846619615e-08, "loss": 0.81, "step": 10908 }, { "epoch": 0.9767978062566456, "grad_norm": 1.1188792402825865, "learning_rate": 2.8199911919029043e-08, "loss": 0.8397, "step": 10909 }, { "epoch": 0.9768873468017236, "grad_norm": 0.8724473692947046, "learning_rate": 2.7982673786767223e-08, "loss": 0.7583, "step": 10910 }, { "epoch": 0.9769768873468018, "grad_norm": 0.9281583561583105, "learning_rate": 2.7766274468106204e-08, "loss": 0.8535, "step": 10911 }, { "epoch": 0.9770664278918798, "grad_norm": 0.9084828165092352, "learning_rate": 2.755071398125031e-08, "loss": 0.7961, "step": 10912 }, { "epoch": 0.9771559684369578, "grad_norm": 1.1263858022417552, "learning_rate": 2.7335992344330597e-08, "loss": 0.7653, "step": 10913 }, { "epoch": 0.9772455089820359, "grad_norm": 1.1099044673229395, "learning_rate": 2.7122109575410393e-08, "loss": 0.8113, "step": 10914 }, { "epoch": 0.977335049527114, "grad_norm": 1.0294592583817066, "learning_rate": 2.690906569247864e-08, "loss": 0.7801, "step": 10915 }, { "epoch": 0.9774245900721921, "grad_norm": 0.9644686802549949, "learning_rate": 2.6696860713457674e-08, "loss": 0.8051, "step": 10916 }, { "epoch": 0.9775141306172701, "grad_norm": 1.0222813314860248, "learning_rate": 2.6485494656195432e-08, "loss": 0.7773, "step": 10917 }, { "epoch": 0.9776036711623483, "grad_norm": 1.1636561975875346, "learning_rate": 2.6274967538473252e-08, "loss": 0.7945, "step": 10918 }, { "epoch": 0.9776932117074263, "grad_norm": 0.9736701299019339, "learning_rate": 2.6065279378000296e-08, "loss": 0.7824, "step": 10919 }, { "epoch": 0.9777827522525043, "grad_norm": 0.9747604071312382, "learning_rate": 2.5856430192413574e-08, "loss": 0.8019, "step": 10920 }, { "epoch": 0.9778722927975824, "grad_norm": 0.9109892000822356, "learning_rate": 2.564841999928014e-08, "loss": 0.816, "step": 10921 }, { "epoch": 0.9779618333426605, "grad_norm": 1.0705611436158202, "learning_rate": 2.544124881609933e-08, "loss": 0.7476, "step": 10922 }, { "epoch": 0.9780513738877386, "grad_norm": 1.020725733439833, "learning_rate": 2.5234916660296094e-08, "loss": 0.8691, "step": 10923 }, { "epoch": 0.9781409144328166, "grad_norm": 0.9705013988332137, "learning_rate": 2.502942354922655e-08, "loss": 0.7698, "step": 10924 }, { "epoch": 0.9782304549778946, "grad_norm": 0.9258290911143892, "learning_rate": 2.482476950017576e-08, "loss": 0.7358, "step": 10925 }, { "epoch": 0.9783199955229728, "grad_norm": 0.9699969233202095, "learning_rate": 2.4620954530361062e-08, "loss": 0.7877, "step": 10926 }, { "epoch": 0.9784095360680508, "grad_norm": 1.0396309005777478, "learning_rate": 2.44179786569243e-08, "loss": 0.8202, "step": 10927 }, { "epoch": 0.9784990766131288, "grad_norm": 1.0580992920994223, "learning_rate": 2.4215841896938486e-08, "loss": 0.7709, "step": 10928 }, { "epoch": 0.978588617158207, "grad_norm": 0.9894416669961281, "learning_rate": 2.40145442674089e-08, "loss": 0.829, "step": 10929 }, { "epoch": 0.978678157703285, "grad_norm": 1.3381440525317723, "learning_rate": 2.381408578526756e-08, "loss": 0.7512, "step": 10930 }, { "epoch": 0.9787676982483631, "grad_norm": 1.068536036442939, "learning_rate": 2.361446646737431e-08, "loss": 0.8112, "step": 10931 }, { "epoch": 0.9788572387934411, "grad_norm": 0.986426829083777, "learning_rate": 2.341568633052349e-08, "loss": 0.8148, "step": 10932 }, { "epoch": 0.9789467793385193, "grad_norm": 0.9552644367163482, "learning_rate": 2.3217745391433954e-08, "loss": 0.8852, "step": 10933 }, { "epoch": 0.9790363198835973, "grad_norm": 0.970275697693054, "learning_rate": 2.3020643666756824e-08, "loss": 0.7448, "step": 10934 }, { "epoch": 0.9791258604286753, "grad_norm": 0.9967750359043535, "learning_rate": 2.2824381173069953e-08, "loss": 0.7869, "step": 10935 }, { "epoch": 0.9792154009737535, "grad_norm": 0.9664024954562596, "learning_rate": 2.2628957926884576e-08, "loss": 0.8052, "step": 10936 }, { "epoch": 0.9793049415188315, "grad_norm": 1.1131865925126674, "learning_rate": 2.2434373944637544e-08, "loss": 0.8333, "step": 10937 }, { "epoch": 0.9793944820639096, "grad_norm": 1.0452815764423837, "learning_rate": 2.2240629242696878e-08, "loss": 0.7916, "step": 10938 }, { "epoch": 0.9794840226089876, "grad_norm": 1.0587639729220244, "learning_rate": 2.2047723837359538e-08, "loss": 0.8147, "step": 10939 }, { "epoch": 0.9795735631540657, "grad_norm": 1.035995143617918, "learning_rate": 2.1855657744853653e-08, "loss": 0.7835, "step": 10940 }, { "epoch": 0.9796631036991438, "grad_norm": 1.0640113398286637, "learning_rate": 2.1664430981332972e-08, "loss": 0.8035, "step": 10941 }, { "epoch": 0.9797526442442218, "grad_norm": 0.9788596806174482, "learning_rate": 2.147404356288463e-08, "loss": 0.8305, "step": 10942 }, { "epoch": 0.9798421847892999, "grad_norm": 1.1431515022476078, "learning_rate": 2.1284495505521362e-08, "loss": 0.8701, "step": 10943 }, { "epoch": 0.979931725334378, "grad_norm": 0.9199979162908835, "learning_rate": 2.1095786825190423e-08, "loss": 0.8364, "step": 10944 }, { "epoch": 0.980021265879456, "grad_norm": 1.0763467704641456, "learning_rate": 2.0907917537762446e-08, "loss": 0.7866, "step": 10945 }, { "epoch": 0.9801108064245341, "grad_norm": 1.0164574572704694, "learning_rate": 2.0720887659041457e-08, "loss": 0.8108, "step": 10946 }, { "epoch": 0.9802003469696122, "grad_norm": 0.9682160783684138, "learning_rate": 2.0534697204761534e-08, "loss": 0.8189, "step": 10947 }, { "epoch": 0.9802898875146903, "grad_norm": 1.0125320763172467, "learning_rate": 2.0349346190581265e-08, "loss": 0.7439, "step": 10948 }, { "epoch": 0.9803794280597683, "grad_norm": 0.9276760062447196, "learning_rate": 2.0164834632092622e-08, "loss": 0.8046, "step": 10949 }, { "epoch": 0.9804689686048463, "grad_norm": 1.1083794236571993, "learning_rate": 1.9981162544817634e-08, "loss": 0.7891, "step": 10950 }, { "epoch": 0.9805585091499245, "grad_norm": 0.9256740873449438, "learning_rate": 1.9798329944206164e-08, "loss": 0.773, "step": 10951 }, { "epoch": 0.9806480496950025, "grad_norm": 1.031328111929455, "learning_rate": 1.961633684563591e-08, "loss": 0.8109, "step": 10952 }, { "epoch": 0.9807375902400806, "grad_norm": 1.0534182157455327, "learning_rate": 1.9435183264415734e-08, "loss": 0.8458, "step": 10953 }, { "epoch": 0.9808271307851587, "grad_norm": 0.9323390261957298, "learning_rate": 1.9254869215785677e-08, "loss": 0.8113, "step": 10954 }, { "epoch": 0.9809166713302367, "grad_norm": 0.9485777404632014, "learning_rate": 1.9075394714910267e-08, "loss": 0.8016, "step": 10955 }, { "epoch": 0.9810062118753148, "grad_norm": 1.4360089363589001, "learning_rate": 1.889675977688854e-08, "loss": 0.7844, "step": 10956 }, { "epoch": 0.9810957524203928, "grad_norm": 0.9874012611705577, "learning_rate": 1.8718964416745146e-08, "loss": 0.8294, "step": 10957 }, { "epoch": 0.981185292965471, "grad_norm": 0.9785488442259475, "learning_rate": 1.8542008649437003e-08, "loss": 0.8114, "step": 10958 }, { "epoch": 0.981274833510549, "grad_norm": 0.9000012868210979, "learning_rate": 1.836589248984888e-08, "loss": 0.7877, "step": 10959 }, { "epoch": 0.981364374055627, "grad_norm": 1.1160896065277681, "learning_rate": 1.8190615952794477e-08, "loss": 0.822, "step": 10960 }, { "epoch": 0.9814539146007051, "grad_norm": 0.9216385427260639, "learning_rate": 1.8016179053016445e-08, "loss": 0.7525, "step": 10961 }, { "epoch": 0.9815434551457832, "grad_norm": 0.9361958883370017, "learning_rate": 1.784258180519083e-08, "loss": 0.7798, "step": 10962 }, { "epoch": 0.9816329956908613, "grad_norm": 1.017260121657136, "learning_rate": 1.7669824223917053e-08, "loss": 0.8268, "step": 10963 }, { "epoch": 0.9817225362359393, "grad_norm": 1.0557202173605085, "learning_rate": 1.7497906323729053e-08, "loss": 0.8136, "step": 10964 }, { "epoch": 0.9818120767810175, "grad_norm": 1.0288222428077731, "learning_rate": 1.732682811908748e-08, "loss": 0.8305, "step": 10965 }, { "epoch": 0.9819016173260955, "grad_norm": 1.1262011212845917, "learning_rate": 1.7156589624381937e-08, "loss": 0.769, "step": 10966 }, { "epoch": 0.9819911578711735, "grad_norm": 0.9815835051855141, "learning_rate": 1.698719085393208e-08, "loss": 0.7982, "step": 10967 }, { "epoch": 0.9820806984162516, "grad_norm": 1.0941168022296672, "learning_rate": 1.681863182198984e-08, "loss": 0.8283, "step": 10968 }, { "epoch": 0.9821702389613297, "grad_norm": 0.9535843934831546, "learning_rate": 1.6650912542730547e-08, "loss": 0.8017, "step": 10969 }, { "epoch": 0.9822597795064077, "grad_norm": 0.9622204618916594, "learning_rate": 1.6484033030265134e-08, "loss": 0.7836, "step": 10970 }, { "epoch": 0.9823493200514858, "grad_norm": 0.9507054811883398, "learning_rate": 1.6317993298627933e-08, "loss": 0.8457, "step": 10971 }, { "epoch": 0.9824388605965639, "grad_norm": 0.9272066635170336, "learning_rate": 1.6152793361788877e-08, "loss": 0.762, "step": 10972 }, { "epoch": 0.982528401141642, "grad_norm": 0.9770987698433586, "learning_rate": 1.59884332336413e-08, "loss": 0.813, "step": 10973 }, { "epoch": 0.98261794168672, "grad_norm": 1.1171921762524795, "learning_rate": 1.5824912928011914e-08, "loss": 0.7917, "step": 10974 }, { "epoch": 0.982707482231798, "grad_norm": 0.9544845724412198, "learning_rate": 1.566223245865528e-08, "loss": 0.8596, "step": 10975 }, { "epoch": 0.9827970227768762, "grad_norm": 1.3158842504284802, "learning_rate": 1.5500391839256002e-08, "loss": 0.812, "step": 10976 }, { "epoch": 0.9828865633219542, "grad_norm": 0.8741930793865039, "learning_rate": 1.5339391083427635e-08, "loss": 0.7926, "step": 10977 }, { "epoch": 0.9829761038670323, "grad_norm": 0.9067389175997302, "learning_rate": 1.517923020471268e-08, "loss": 0.7931, "step": 10978 }, { "epoch": 0.9830656444121103, "grad_norm": 0.9217560976770917, "learning_rate": 1.5019909216582585e-08, "loss": 0.8444, "step": 10979 }, { "epoch": 0.9831551849571885, "grad_norm": 1.0479441396390379, "learning_rate": 1.486142813243996e-08, "loss": 0.7898, "step": 10980 }, { "epoch": 0.9832447255022665, "grad_norm": 1.028276892511461, "learning_rate": 1.4703786965615252e-08, "loss": 0.7937, "step": 10981 }, { "epoch": 0.9833342660473445, "grad_norm": 0.985480121205418, "learning_rate": 1.4546985729368968e-08, "loss": 0.7807, "step": 10982 }, { "epoch": 0.9834238065924227, "grad_norm": 0.9822224671535137, "learning_rate": 1.4391024436890555e-08, "loss": 0.8421, "step": 10983 }, { "epoch": 0.9835133471375007, "grad_norm": 0.9526151762628563, "learning_rate": 1.423590310129841e-08, "loss": 0.7936, "step": 10984 }, { "epoch": 0.9836028876825788, "grad_norm": 0.9431625456922188, "learning_rate": 1.4081621735642093e-08, "loss": 0.8225, "step": 10985 }, { "epoch": 0.9836924282276568, "grad_norm": 1.111826916103104, "learning_rate": 1.3928180352899001e-08, "loss": 0.8651, "step": 10986 }, { "epoch": 0.9837819687727349, "grad_norm": 0.8751763648724818, "learning_rate": 1.3775578965975477e-08, "loss": 0.7704, "step": 10987 }, { "epoch": 0.983871509317813, "grad_norm": 0.9597224539181783, "learning_rate": 1.3623817587707922e-08, "loss": 0.7981, "step": 10988 }, { "epoch": 0.983961049862891, "grad_norm": 1.0562373323568417, "learning_rate": 1.3472896230861676e-08, "loss": 0.7977, "step": 10989 }, { "epoch": 0.9840505904079692, "grad_norm": 0.978583733833062, "learning_rate": 1.3322814908133252e-08, "loss": 0.7609, "step": 10990 }, { "epoch": 0.9841401309530472, "grad_norm": 0.9274077148424624, "learning_rate": 1.3173573632144775e-08, "loss": 0.7867, "step": 10991 }, { "epoch": 0.9842296714981252, "grad_norm": 0.9484605628557444, "learning_rate": 1.3025172415451758e-08, "loss": 0.8194, "step": 10992 }, { "epoch": 0.9843192120432033, "grad_norm": 1.0130882956670826, "learning_rate": 1.2877611270537549e-08, "loss": 0.8207, "step": 10993 }, { "epoch": 0.9844087525882814, "grad_norm": 0.995597020613136, "learning_rate": 1.273089020981222e-08, "loss": 0.7757, "step": 10994 }, { "epoch": 0.9844982931333595, "grad_norm": 0.906379058666672, "learning_rate": 1.2585009245620339e-08, "loss": 0.7423, "step": 10995 }, { "epoch": 0.9845878336784375, "grad_norm": 0.9850607950800196, "learning_rate": 1.2439968390229873e-08, "loss": 0.7973, "step": 10996 }, { "epoch": 0.9846773742235155, "grad_norm": 1.080642028153105, "learning_rate": 1.2295767655844393e-08, "loss": 0.7988, "step": 10997 }, { "epoch": 0.9847669147685937, "grad_norm": 0.9862044383105814, "learning_rate": 1.2152407054590864e-08, "loss": 0.8188, "step": 10998 }, { "epoch": 0.9848564553136717, "grad_norm": 1.0068202491803395, "learning_rate": 1.2009886598529642e-08, "loss": 0.7911, "step": 10999 }, { "epoch": 0.9849459958587498, "grad_norm": 1.122672060710015, "learning_rate": 1.186820629964891e-08, "loss": 0.8394, "step": 11000 }, { "epoch": 0.9850355364038279, "grad_norm": 1.045875661063741, "learning_rate": 1.1727366169865806e-08, "loss": 0.8245, "step": 11001 }, { "epoch": 0.9851250769489059, "grad_norm": 1.0684597219690246, "learning_rate": 1.158736622102863e-08, "loss": 0.8683, "step": 11002 }, { "epoch": 0.985214617493984, "grad_norm": 1.0515237779637685, "learning_rate": 1.1448206464912404e-08, "loss": 0.8061, "step": 11003 }, { "epoch": 0.985304158039062, "grad_norm": 1.0071626301247134, "learning_rate": 1.1309886913223323e-08, "loss": 0.7743, "step": 11004 }, { "epoch": 0.9853936985841402, "grad_norm": 0.9356441718381159, "learning_rate": 1.1172407577596523e-08, "loss": 0.8001, "step": 11005 }, { "epoch": 0.9854832391292182, "grad_norm": 1.0623259826415612, "learning_rate": 1.1035768469596086e-08, "loss": 0.8388, "step": 11006 }, { "epoch": 0.9855727796742962, "grad_norm": 0.9092533712444809, "learning_rate": 1.0899969600716153e-08, "loss": 0.7985, "step": 11007 }, { "epoch": 0.9856623202193744, "grad_norm": 0.9817629691734051, "learning_rate": 1.0765010982378698e-08, "loss": 0.7738, "step": 11008 }, { "epoch": 0.9857518607644524, "grad_norm": 0.9809402057490023, "learning_rate": 1.0630892625936862e-08, "loss": 0.7879, "step": 11009 }, { "epoch": 0.9858414013095305, "grad_norm": 0.9899907416098209, "learning_rate": 1.049761454267162e-08, "loss": 0.7767, "step": 11010 }, { "epoch": 0.9859309418546085, "grad_norm": 0.9907172123871733, "learning_rate": 1.0365176743795113e-08, "loss": 0.7578, "step": 11011 }, { "epoch": 0.9860204823996866, "grad_norm": 1.052098009341602, "learning_rate": 1.0233579240446213e-08, "loss": 0.7909, "step": 11012 }, { "epoch": 0.9861100229447647, "grad_norm": 1.05610311290053, "learning_rate": 1.0102822043694948e-08, "loss": 0.8376, "step": 11013 }, { "epoch": 0.9861995634898427, "grad_norm": 0.9242004720657334, "learning_rate": 9.972905164539193e-09, "loss": 0.7879, "step": 11014 }, { "epoch": 0.9862891040349208, "grad_norm": 1.0161138754084202, "learning_rate": 9.84382861390909e-09, "loss": 0.7602, "step": 11015 }, { "epoch": 0.9863786445799989, "grad_norm": 1.0374981083068733, "learning_rate": 9.715592402660401e-09, "loss": 0.7592, "step": 11016 }, { "epoch": 0.986468185125077, "grad_norm": 0.9025882924506432, "learning_rate": 9.588196541582273e-09, "loss": 0.7758, "step": 11017 }, { "epoch": 0.986557725670155, "grad_norm": 0.938375491987435, "learning_rate": 9.461641041388358e-09, "loss": 0.7818, "step": 11018 }, { "epoch": 0.9866472662152331, "grad_norm": 1.0676738386003928, "learning_rate": 9.335925912724587e-09, "loss": 0.8026, "step": 11019 }, { "epoch": 0.9867368067603112, "grad_norm": 1.028499195631394, "learning_rate": 9.21105116616583e-09, "loss": 0.766, "step": 11020 }, { "epoch": 0.9868263473053892, "grad_norm": 0.9882958018704673, "learning_rate": 9.08701681221702e-09, "loss": 0.8672, "step": 11021 }, { "epoch": 0.9869158878504672, "grad_norm": 1.0071212069310187, "learning_rate": 8.963822861310923e-09, "loss": 0.839, "step": 11022 }, { "epoch": 0.9870054283955454, "grad_norm": 0.8846753898922584, "learning_rate": 8.84146932381036e-09, "loss": 0.7407, "step": 11023 }, { "epoch": 0.9870949689406234, "grad_norm": 1.033192646202552, "learning_rate": 8.719956210007096e-09, "loss": 0.7839, "step": 11024 }, { "epoch": 0.9871845094857015, "grad_norm": 0.9907469946929138, "learning_rate": 8.599283530122959e-09, "loss": 0.7854, "step": 11025 }, { "epoch": 0.9872740500307796, "grad_norm": 0.9497709036072383, "learning_rate": 8.479451294307605e-09, "loss": 0.7756, "step": 11026 }, { "epoch": 0.9873635905758577, "grad_norm": 1.190187552531041, "learning_rate": 8.36045951264075e-09, "loss": 0.8271, "step": 11027 }, { "epoch": 0.9874531311209357, "grad_norm": 1.014434213549999, "learning_rate": 8.242308195133276e-09, "loss": 0.805, "step": 11028 }, { "epoch": 0.9875426716660137, "grad_norm": 0.9848531631454394, "learning_rate": 8.124997351721675e-09, "loss": 0.7877, "step": 11029 }, { "epoch": 0.9876322122110919, "grad_norm": 0.947036178356658, "learning_rate": 8.008526992275834e-09, "loss": 0.7691, "step": 11030 }, { "epoch": 0.9877217527561699, "grad_norm": 1.1101027349334025, "learning_rate": 7.892897126591248e-09, "loss": 0.8159, "step": 11031 }, { "epoch": 0.987811293301248, "grad_norm": 0.9590515781520789, "learning_rate": 7.778107764394583e-09, "loss": 0.8027, "step": 11032 }, { "epoch": 0.987900833846326, "grad_norm": 0.8690743994982413, "learning_rate": 7.664158915341447e-09, "loss": 0.7356, "step": 11033 }, { "epoch": 0.9879903743914041, "grad_norm": 1.0441380654827883, "learning_rate": 7.551050589018615e-09, "loss": 0.8266, "step": 11034 }, { "epoch": 0.9880799149364822, "grad_norm": 0.9789549264263988, "learning_rate": 7.438782794937372e-09, "loss": 0.8313, "step": 11035 }, { "epoch": 0.9881694554815602, "grad_norm": 0.9667267313438569, "learning_rate": 7.3273555425446005e-09, "loss": 0.8483, "step": 11036 }, { "epoch": 0.9882589960266384, "grad_norm": 1.0290506040242104, "learning_rate": 7.2167688412105866e-09, "loss": 0.7858, "step": 11037 }, { "epoch": 0.9883485365717164, "grad_norm": 0.9357993059290411, "learning_rate": 7.1070227002378866e-09, "loss": 0.8057, "step": 11038 }, { "epoch": 0.9884380771167944, "grad_norm": 1.103289364350727, "learning_rate": 6.9981171288591166e-09, "loss": 0.7493, "step": 11039 }, { "epoch": 0.9885276176618725, "grad_norm": 1.0283976445185, "learning_rate": 6.890052136234726e-09, "loss": 0.7768, "step": 11040 }, { "epoch": 0.9886171582069506, "grad_norm": 0.9804534903792114, "learning_rate": 6.782827731454111e-09, "loss": 0.7871, "step": 11041 }, { "epoch": 0.9887066987520287, "grad_norm": 1.0133677661855653, "learning_rate": 6.676443923537834e-09, "loss": 0.7603, "step": 11042 }, { "epoch": 0.9887962392971067, "grad_norm": 1.0599518658517826, "learning_rate": 6.570900721433182e-09, "loss": 0.8447, "step": 11043 }, { "epoch": 0.9888857798421848, "grad_norm": 1.2534355531964176, "learning_rate": 6.4661981340186084e-09, "loss": 0.7948, "step": 11044 }, { "epoch": 0.9889753203872629, "grad_norm": 0.9236856424709777, "learning_rate": 6.362336170101513e-09, "loss": 0.7322, "step": 11045 }, { "epoch": 0.9890648609323409, "grad_norm": 1.0136713322581734, "learning_rate": 6.25931483841935e-09, "loss": 0.8319, "step": 11046 }, { "epoch": 0.989154401477419, "grad_norm": 0.9263162870994055, "learning_rate": 6.1571341476363015e-09, "loss": 0.774, "step": 11047 }, { "epoch": 0.9892439420224971, "grad_norm": 0.9335953513451672, "learning_rate": 6.055794106347712e-09, "loss": 0.7666, "step": 11048 }, { "epoch": 0.9893334825675751, "grad_norm": 0.8718038630342524, "learning_rate": 5.955294723078986e-09, "loss": 0.8128, "step": 11049 }, { "epoch": 0.9894230231126532, "grad_norm": 0.9155980273481104, "learning_rate": 5.855636006283361e-09, "loss": 0.786, "step": 11050 }, { "epoch": 0.9895125636577312, "grad_norm": 0.9673132635230739, "learning_rate": 5.75681796434302e-09, "loss": 0.76, "step": 11051 }, { "epoch": 0.9896021042028094, "grad_norm": 0.99450892025568, "learning_rate": 5.658840605571314e-09, "loss": 0.8249, "step": 11052 }, { "epoch": 0.9896916447478874, "grad_norm": 1.0005564840208243, "learning_rate": 5.561703938209428e-09, "loss": 0.7443, "step": 11053 }, { "epoch": 0.9897811852929654, "grad_norm": 1.0537643935337768, "learning_rate": 5.465407970427494e-09, "loss": 0.7743, "step": 11054 }, { "epoch": 0.9898707258380436, "grad_norm": 1.017756464734135, "learning_rate": 5.369952710326809e-09, "loss": 0.8119, "step": 11055 }, { "epoch": 0.9899602663831216, "grad_norm": 1.0396235978246284, "learning_rate": 5.275338165935395e-09, "loss": 0.7512, "step": 11056 }, { "epoch": 0.9900498069281997, "grad_norm": 0.9727641961020701, "learning_rate": 5.181564345213552e-09, "loss": 0.7493, "step": 11057 }, { "epoch": 0.9901393474732777, "grad_norm": 1.1660452734640803, "learning_rate": 5.088631256048304e-09, "loss": 0.7364, "step": 11058 }, { "epoch": 0.9902288880183558, "grad_norm": 0.9565587855601493, "learning_rate": 4.9965389062567316e-09, "loss": 0.7848, "step": 11059 }, { "epoch": 0.9903184285634339, "grad_norm": 1.0564130697219387, "learning_rate": 4.905287303585971e-09, "loss": 0.8268, "step": 11060 }, { "epoch": 0.9904079691085119, "grad_norm": 0.9450602803807338, "learning_rate": 4.814876455710993e-09, "loss": 0.8434, "step": 11061 }, { "epoch": 0.9904975096535901, "grad_norm": 0.9203708858435595, "learning_rate": 4.725306370236827e-09, "loss": 0.8447, "step": 11062 }, { "epoch": 0.9905870501986681, "grad_norm": 0.9663626335468976, "learning_rate": 4.636577054698554e-09, "loss": 0.8117, "step": 11063 }, { "epoch": 0.9906765907437461, "grad_norm": 1.0744467261084973, "learning_rate": 4.548688516559097e-09, "loss": 0.7795, "step": 11064 }, { "epoch": 0.9907661312888242, "grad_norm": 0.9159920379576059, "learning_rate": 4.461640763212538e-09, "loss": 0.7417, "step": 11065 }, { "epoch": 0.9908556718339023, "grad_norm": 0.9918163530451493, "learning_rate": 4.375433801979689e-09, "loss": 0.83, "step": 11066 }, { "epoch": 0.9909452123789804, "grad_norm": 1.0045341603547748, "learning_rate": 4.290067640113637e-09, "loss": 0.8016, "step": 11067 }, { "epoch": 0.9910347529240584, "grad_norm": 1.1066627796260093, "learning_rate": 4.2055422847930846e-09, "loss": 0.788, "step": 11068 }, { "epoch": 0.9911242934691364, "grad_norm": 1.0199953037305114, "learning_rate": 4.12185774312901e-09, "loss": 0.7829, "step": 11069 }, { "epoch": 0.9912138340142146, "grad_norm": 1.018977449567371, "learning_rate": 4.039014022160226e-09, "loss": 0.815, "step": 11070 }, { "epoch": 0.9913033745592926, "grad_norm": 0.9095701610202261, "learning_rate": 3.957011128856714e-09, "loss": 0.841, "step": 11071 }, { "epoch": 0.9913929151043707, "grad_norm": 1.032361202197047, "learning_rate": 3.875849070115179e-09, "loss": 0.8595, "step": 11072 }, { "epoch": 0.9914824556494488, "grad_norm": 1.032437132546964, "learning_rate": 3.795527852762382e-09, "loss": 0.7874, "step": 11073 }, { "epoch": 0.9915719961945269, "grad_norm": 0.9958227435716922, "learning_rate": 3.716047483555141e-09, "loss": 0.689, "step": 11074 }, { "epoch": 0.9916615367396049, "grad_norm": 1.0660261111918978, "learning_rate": 3.6374079691792185e-09, "loss": 0.8149, "step": 11075 }, { "epoch": 0.9917510772846829, "grad_norm": 0.991064911975881, "learning_rate": 3.5596093162493238e-09, "loss": 0.7761, "step": 11076 }, { "epoch": 0.9918406178297611, "grad_norm": 1.0044793916797132, "learning_rate": 3.4826515313091115e-09, "loss": 0.7715, "step": 11077 }, { "epoch": 0.9919301583748391, "grad_norm": 0.9878163815550307, "learning_rate": 3.4065346208334016e-09, "loss": 0.7466, "step": 11078 }, { "epoch": 0.9920196989199171, "grad_norm": 0.9486239299193588, "learning_rate": 3.3312585912237406e-09, "loss": 0.8019, "step": 11079 }, { "epoch": 0.9921092394649953, "grad_norm": 0.9968803558888224, "learning_rate": 3.25682344881173e-09, "loss": 0.8484, "step": 11080 }, { "epoch": 0.9921987800100733, "grad_norm": 1.149436776108533, "learning_rate": 3.1832291998601384e-09, "loss": 0.772, "step": 11081 }, { "epoch": 0.9922883205551514, "grad_norm": 1.217837298843362, "learning_rate": 3.1104758505584587e-09, "loss": 0.7658, "step": 11082 }, { "epoch": 0.9923778611002294, "grad_norm": 1.099557874191986, "learning_rate": 3.0385634070262406e-09, "loss": 0.7746, "step": 11083 }, { "epoch": 0.9924674016453076, "grad_norm": 0.9995373851938801, "learning_rate": 2.967491875314199e-09, "loss": 0.8505, "step": 11084 }, { "epoch": 0.9925569421903856, "grad_norm": 0.9820065655438729, "learning_rate": 2.897261261397555e-09, "loss": 0.8262, "step": 11085 }, { "epoch": 0.9926464827354636, "grad_norm": 1.0654981981596758, "learning_rate": 2.827871571187135e-09, "loss": 0.7695, "step": 11086 }, { "epoch": 0.9927360232805417, "grad_norm": 0.9212408597043569, "learning_rate": 2.7593228105171623e-09, "loss": 0.8159, "step": 11087 }, { "epoch": 0.9928255638256198, "grad_norm": 1.0067241689655846, "learning_rate": 2.6916149851563542e-09, "loss": 0.808, "step": 11088 }, { "epoch": 0.9929151043706979, "grad_norm": 0.995624029648079, "learning_rate": 2.624748100797936e-09, "loss": 0.7835, "step": 11089 }, { "epoch": 0.9930046449157759, "grad_norm": 1.1848262152631013, "learning_rate": 2.5587221630674063e-09, "loss": 0.7844, "step": 11090 }, { "epoch": 0.993094185460854, "grad_norm": 0.9587342170871609, "learning_rate": 2.4935371775181015e-09, "loss": 0.7808, "step": 11091 }, { "epoch": 0.9931837260059321, "grad_norm": 1.0960445852989076, "learning_rate": 2.429193149633413e-09, "loss": 0.7945, "step": 11092 }, { "epoch": 0.9932732665510101, "grad_norm": 0.9586002943106384, "learning_rate": 2.365690084825678e-09, "loss": 0.7769, "step": 11093 }, { "epoch": 0.9933628070960882, "grad_norm": 1.0641587404932487, "learning_rate": 2.3030279884372896e-09, "loss": 0.8284, "step": 11094 }, { "epoch": 0.9934523476411663, "grad_norm": 0.9602061072976953, "learning_rate": 2.2412068657384766e-09, "loss": 0.7973, "step": 11095 }, { "epoch": 0.9935418881862443, "grad_norm": 0.9420386697733488, "learning_rate": 2.1802267219295236e-09, "loss": 0.8036, "step": 11096 }, { "epoch": 0.9936314287313224, "grad_norm": 0.9974359043957409, "learning_rate": 2.1200875621407713e-09, "loss": 0.7813, "step": 11097 }, { "epoch": 0.9937209692764005, "grad_norm": 1.0129217304170872, "learning_rate": 2.0607893914292852e-09, "loss": 0.768, "step": 11098 }, { "epoch": 0.9938105098214786, "grad_norm": 1.0509226284700077, "learning_rate": 2.002332214783298e-09, "loss": 0.7718, "step": 11099 }, { "epoch": 0.9939000503665566, "grad_norm": 1.0338762829000094, "learning_rate": 1.9447160371222072e-09, "loss": 0.8762, "step": 11100 }, { "epoch": 0.9939895909116346, "grad_norm": 0.9861582060266968, "learning_rate": 1.8879408632899166e-09, "loss": 0.7823, "step": 11101 }, { "epoch": 0.9940791314567128, "grad_norm": 0.9499161736587978, "learning_rate": 1.832006698062605e-09, "loss": 0.7902, "step": 11102 }, { "epoch": 0.9941686720017908, "grad_norm": 0.9549066382923261, "learning_rate": 1.776913546146508e-09, "loss": 0.7883, "step": 11103 }, { "epoch": 0.9942582125468689, "grad_norm": 1.0268234989776426, "learning_rate": 1.7226614121756968e-09, "loss": 0.83, "step": 11104 }, { "epoch": 0.9943477530919469, "grad_norm": 0.9476598376405527, "learning_rate": 1.6692503007131878e-09, "loss": 0.7663, "step": 11105 }, { "epoch": 0.994437293637025, "grad_norm": 0.9207876775314704, "learning_rate": 1.6166802162509432e-09, "loss": 0.8003, "step": 11106 }, { "epoch": 0.9945268341821031, "grad_norm": 0.8792071443721514, "learning_rate": 1.5649511632120917e-09, "loss": 0.7776, "step": 11107 }, { "epoch": 0.9946163747271811, "grad_norm": 0.9193413595643799, "learning_rate": 1.5140631459475973e-09, "loss": 0.7991, "step": 11108 }, { "epoch": 0.9947059152722593, "grad_norm": 0.9414064498691534, "learning_rate": 1.46401616873737e-09, "loss": 0.7457, "step": 11109 }, { "epoch": 0.9947954558173373, "grad_norm": 0.9567177882706259, "learning_rate": 1.4148102357924853e-09, "loss": 0.7999, "step": 11110 }, { "epoch": 0.9948849963624153, "grad_norm": 0.92570706442964, "learning_rate": 1.3664453512518549e-09, "loss": 0.7937, "step": 11111 }, { "epoch": 0.9949745369074934, "grad_norm": 1.142568490233922, "learning_rate": 1.3189215191822259e-09, "loss": 0.7767, "step": 11112 }, { "epoch": 0.9950640774525715, "grad_norm": 0.9870159645814577, "learning_rate": 1.272238743582621e-09, "loss": 0.7766, "step": 11113 }, { "epoch": 0.9951536179976496, "grad_norm": 1.0072415293964974, "learning_rate": 1.2263970283798998e-09, "loss": 0.7816, "step": 11114 }, { "epoch": 0.9952431585427276, "grad_norm": 1.1052734122972252, "learning_rate": 1.1813963774287563e-09, "loss": 0.7496, "step": 11115 }, { "epoch": 0.9953326990878058, "grad_norm": 0.889485336470331, "learning_rate": 1.1372367945161612e-09, "loss": 0.7714, "step": 11116 }, { "epoch": 0.9954222396328838, "grad_norm": 0.9992063631291115, "learning_rate": 1.0939182833558104e-09, "loss": 0.7983, "step": 11117 }, { "epoch": 0.9955117801779618, "grad_norm": 0.9423410990919285, "learning_rate": 1.0514408475914561e-09, "loss": 0.7898, "step": 11118 }, { "epoch": 0.9956013207230399, "grad_norm": 0.925779656684305, "learning_rate": 1.009804490795796e-09, "loss": 0.8263, "step": 11119 }, { "epoch": 0.995690861268118, "grad_norm": 0.9631395838148694, "learning_rate": 9.690092164715835e-10, "loss": 0.8519, "step": 11120 }, { "epoch": 0.995780401813196, "grad_norm": 0.9186420962002961, "learning_rate": 9.290550280505184e-10, "loss": 0.776, "step": 11121 }, { "epoch": 0.9958699423582741, "grad_norm": 1.0706150219470834, "learning_rate": 8.899419288943556e-10, "loss": 0.7873, "step": 11122 }, { "epoch": 0.9959594829033521, "grad_norm": 0.8799716788006113, "learning_rate": 8.516699222915759e-10, "loss": 0.8478, "step": 11123 }, { "epoch": 0.9960490234484303, "grad_norm": 0.9931369049101895, "learning_rate": 8.14239011461826e-10, "loss": 0.7947, "step": 11124 }, { "epoch": 0.9961385639935083, "grad_norm": 0.9411226261200677, "learning_rate": 7.776491995536984e-10, "loss": 0.7935, "step": 11125 }, { "epoch": 0.9962281045385863, "grad_norm": 1.033253272459167, "learning_rate": 7.419004896447313e-10, "loss": 0.771, "step": 11126 }, { "epoch": 0.9963176450836645, "grad_norm": 1.0749758378111818, "learning_rate": 7.069928847436291e-10, "loss": 0.7515, "step": 11127 }, { "epoch": 0.9964071856287425, "grad_norm": 0.9747974918239087, "learning_rate": 6.729263877847114e-10, "loss": 0.7823, "step": 11128 }, { "epoch": 0.9964967261738206, "grad_norm": 1.0810498783383968, "learning_rate": 6.397010016356842e-10, "loss": 0.8548, "step": 11129 }, { "epoch": 0.9965862667188986, "grad_norm": 0.8974041909212097, "learning_rate": 6.073167290887582e-10, "loss": 0.7561, "step": 11130 }, { "epoch": 0.9966758072639768, "grad_norm": 0.9855959745239473, "learning_rate": 5.757735728695313e-10, "loss": 0.8123, "step": 11131 }, { "epoch": 0.9967653478090548, "grad_norm": 1.0135532783865846, "learning_rate": 5.450715356314363e-10, "loss": 0.7392, "step": 11132 }, { "epoch": 0.9968548883541328, "grad_norm": 1.0591160342401547, "learning_rate": 5.152106199568519e-10, "loss": 0.8276, "step": 11133 }, { "epoch": 0.996944428899211, "grad_norm": 1.0233364566689012, "learning_rate": 4.861908283571026e-10, "loss": 0.8671, "step": 11134 }, { "epoch": 0.997033969444289, "grad_norm": 1.087326653001518, "learning_rate": 4.580121632724588e-10, "loss": 0.834, "step": 11135 }, { "epoch": 0.997123509989367, "grad_norm": 1.0844423175944673, "learning_rate": 4.3067462707546693e-10, "loss": 0.7852, "step": 11136 }, { "epoch": 0.9972130505344451, "grad_norm": 1.0376214459889592, "learning_rate": 4.041782220642887e-10, "loss": 0.7822, "step": 11137 }, { "epoch": 0.9973025910795232, "grad_norm": 0.8730031302991897, "learning_rate": 3.785229504682519e-10, "loss": 0.7918, "step": 11138 }, { "epoch": 0.9973921316246013, "grad_norm": 1.125151449284699, "learning_rate": 3.5370881444452e-10, "loss": 0.8199, "step": 11139 }, { "epoch": 0.9974816721696793, "grad_norm": 0.9624139988481608, "learning_rate": 3.2973581608142234e-10, "loss": 0.8703, "step": 11140 }, { "epoch": 0.9975712127147573, "grad_norm": 0.921695001158502, "learning_rate": 3.066039573940138e-10, "loss": 0.7775, "step": 11141 }, { "epoch": 0.9976607532598355, "grad_norm": 0.8950767283416653, "learning_rate": 2.843132403296256e-10, "loss": 0.7621, "step": 11142 }, { "epoch": 0.9977502938049135, "grad_norm": 0.9349637857208811, "learning_rate": 2.628636667634243e-10, "loss": 0.7665, "step": 11143 }, { "epoch": 0.9978398343499916, "grad_norm": 1.1041455084647787, "learning_rate": 2.4225523849841225e-10, "loss": 0.7699, "step": 11144 }, { "epoch": 0.9979293748950697, "grad_norm": 1.0564882772402155, "learning_rate": 2.224879572676475e-10, "loss": 0.7886, "step": 11145 }, { "epoch": 0.9980189154401478, "grad_norm": 1.037917291989691, "learning_rate": 2.0356182473646458e-10, "loss": 0.7898, "step": 11146 }, { "epoch": 0.9981084559852258, "grad_norm": 1.019778840356656, "learning_rate": 1.8547684249470288e-10, "loss": 0.7716, "step": 11147 }, { "epoch": 0.9981979965303038, "grad_norm": 0.9090738421813411, "learning_rate": 1.6823301206336796e-10, "loss": 0.7758, "step": 11148 }, { "epoch": 0.998287537075382, "grad_norm": 1.0000851338482686, "learning_rate": 1.518303348946315e-10, "loss": 0.8103, "step": 11149 }, { "epoch": 0.99837707762046, "grad_norm": 1.082956981556173, "learning_rate": 1.3626881236739053e-10, "loss": 0.825, "step": 11150 }, { "epoch": 0.9984666181655381, "grad_norm": 1.3432620600541343, "learning_rate": 1.21548445790598e-10, "loss": 0.785, "step": 11151 }, { "epoch": 0.9985561587106162, "grad_norm": 1.061937718697999, "learning_rate": 1.0766923640215254e-10, "loss": 0.7637, "step": 11152 }, { "epoch": 0.9986456992556942, "grad_norm": 0.9693415064531373, "learning_rate": 9.463118537000882e-11, "loss": 0.8187, "step": 11153 }, { "epoch": 0.9987352398007723, "grad_norm": 0.9258957249178724, "learning_rate": 8.243429379106716e-11, "loss": 0.7979, "step": 11154 }, { "epoch": 0.9988247803458503, "grad_norm": 0.9306948430918408, "learning_rate": 7.107856269006342e-11, "loss": 0.7536, "step": 11155 }, { "epoch": 0.9989143208909285, "grad_norm": 0.9897909968903863, "learning_rate": 6.056399302400984e-11, "loss": 0.7299, "step": 11156 }, { "epoch": 0.9990038614360065, "grad_norm": 0.9966890196757293, "learning_rate": 5.089058567664396e-11, "loss": 0.8125, "step": 11157 }, { "epoch": 0.9990934019810845, "grad_norm": 0.9710463795168044, "learning_rate": 4.205834146064902e-11, "loss": 0.7746, "step": 11158 }, { "epoch": 0.9991829425261626, "grad_norm": 1.0802563022335718, "learning_rate": 3.4067261120984684e-11, "loss": 0.795, "step": 11159 }, { "epoch": 0.9992724830712407, "grad_norm": 1.0067298497614499, "learning_rate": 2.6917345328225653e-11, "loss": 0.8149, "step": 11160 }, { "epoch": 0.9993620236163188, "grad_norm": 1.089581608273436, "learning_rate": 2.060859468300258e-11, "loss": 0.8233, "step": 11161 }, { "epoch": 0.9994515641613968, "grad_norm": 1.1516062666467761, "learning_rate": 1.514100971822252e-11, "loss": 0.7471, "step": 11162 }, { "epoch": 0.999541104706475, "grad_norm": 0.9880798501695719, "learning_rate": 1.0514590893517807e-11, "loss": 0.817, "step": 11163 }, { "epoch": 0.999630645251553, "grad_norm": 1.0438818650034132, "learning_rate": 6.729338596356272e-12, "loss": 0.7806, "step": 11164 }, { "epoch": 0.999720185796631, "grad_norm": 1.057060240229699, "learning_rate": 3.785253146482148e-12, "loss": 0.8326, "step": 11165 }, { "epoch": 0.9998097263417091, "grad_norm": 0.9140718394821942, "learning_rate": 1.6823347903649478e-12, "loss": 0.7493, "step": 11166 }, { "epoch": 0.9998992668867872, "grad_norm": 1.0844146151998872, "learning_rate": 4.2058370675057693e-13, "loss": 0.7957, "step": 11167 }, { "epoch": 0.9999888074318652, "grad_norm": 0.9303737122818287, "learning_rate": 0.0, "loss": 0.8216, "step": 11168 }, { "epoch": 0.9999888074318652, "step": 11168, "total_flos": 2.6264864681754624e+16, "train_loss": 0.2994103609800424, "train_runtime": 169468.3256, "train_samples_per_second": 8.435, "train_steps_per_second": 0.066 } ], "logging_steps": 1.0, "max_steps": 11168, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.6264864681754624e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }