{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.19853157538350275, "eval_steps": 1000, "global_step": 14000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.4180826813107339e-05, "grad_norm": 23.5, "learning_rate": 2.3584905660377358e-07, "loss": 2.2462, "step": 1 }, { "epoch": 2.8361653626214678e-05, "grad_norm": 5.40625, "learning_rate": 4.7169811320754717e-07, "loss": 2.0729, "step": 2 }, { "epoch": 5.6723307252429356e-05, "grad_norm": 83.5, "learning_rate": 9.433962264150943e-07, "loss": 2.1368, "step": 4 }, { "epoch": 8.508496087864403e-05, "grad_norm": 11.625, "learning_rate": 1.4150943396226415e-06, "loss": 2.0221, "step": 6 }, { "epoch": 0.00011344661450485871, "grad_norm": 13.375, "learning_rate": 1.8867924528301887e-06, "loss": 2.3639, "step": 8 }, { "epoch": 0.00014180826813107338, "grad_norm": 10.625, "learning_rate": 2.358490566037736e-06, "loss": 2.0759, "step": 10 }, { "epoch": 0.00017016992175728805, "grad_norm": 30.375, "learning_rate": 2.830188679245283e-06, "loss": 2.5909, "step": 12 }, { "epoch": 0.00019853157538350273, "grad_norm": 9.0625, "learning_rate": 3.30188679245283e-06, "loss": 2.3101, "step": 14 }, { "epoch": 0.00022689322900971742, "grad_norm": 9.875, "learning_rate": 3.7735849056603773e-06, "loss": 2.3223, "step": 16 }, { "epoch": 0.00025525488263593207, "grad_norm": 7.4375, "learning_rate": 4.245283018867925e-06, "loss": 2.3394, "step": 18 }, { "epoch": 0.00028361653626214677, "grad_norm": 17.125, "learning_rate": 4.716981132075472e-06, "loss": 2.3398, "step": 20 }, { "epoch": 0.00031197818988836146, "grad_norm": 10.1875, "learning_rate": 5.188679245283019e-06, "loss": 2.1757, "step": 22 }, { "epoch": 0.0003403398435145761, "grad_norm": 19.625, "learning_rate": 5.660377358490566e-06, "loss": 2.0609, "step": 24 }, { "epoch": 0.0003687014971407908, "grad_norm": 8.5625, "learning_rate": 6.132075471698113e-06, "loss": 2.0856, "step": 26 }, { "epoch": 0.00039706315076700545, "grad_norm": 8.125, "learning_rate": 6.60377358490566e-06, "loss": 1.9317, "step": 28 }, { "epoch": 0.00042542480439322015, "grad_norm": 8.625, "learning_rate": 7.0754716981132075e-06, "loss": 2.028, "step": 30 }, { "epoch": 0.00045378645801943485, "grad_norm": 10.75, "learning_rate": 7.547169811320755e-06, "loss": 2.0806, "step": 32 }, { "epoch": 0.0004821481116456495, "grad_norm": 8.375, "learning_rate": 8.018867924528302e-06, "loss": 2.1792, "step": 34 }, { "epoch": 0.0005105097652718641, "grad_norm": 7.46875, "learning_rate": 8.49056603773585e-06, "loss": 1.9408, "step": 36 }, { "epoch": 0.0005388714188980789, "grad_norm": 7.21875, "learning_rate": 8.962264150943396e-06, "loss": 1.8844, "step": 38 }, { "epoch": 0.0005672330725242935, "grad_norm": 10.875, "learning_rate": 9.433962264150944e-06, "loss": 2.0355, "step": 40 }, { "epoch": 0.0005955947261505082, "grad_norm": 8.5625, "learning_rate": 9.905660377358492e-06, "loss": 1.6167, "step": 42 }, { "epoch": 0.0006239563797767229, "grad_norm": 7.1875, "learning_rate": 1.0377358490566038e-05, "loss": 1.6281, "step": 44 }, { "epoch": 0.0006523180334029376, "grad_norm": 7.21875, "learning_rate": 1.0849056603773586e-05, "loss": 1.5859, "step": 46 }, { "epoch": 0.0006806796870291522, "grad_norm": 6.03125, "learning_rate": 1.1320754716981132e-05, "loss": 1.6459, "step": 48 }, { "epoch": 0.0007090413406553669, "grad_norm": 5.1875, "learning_rate": 1.179245283018868e-05, "loss": 1.4428, "step": 50 }, { "epoch": 0.0007374029942815816, "grad_norm": 7.0, "learning_rate": 1.2264150943396227e-05, "loss": 1.4423, "step": 52 }, { "epoch": 0.0007657646479077963, "grad_norm": 3.65625, "learning_rate": 1.2735849056603775e-05, "loss": 1.4288, "step": 54 }, { "epoch": 0.0007941263015340109, "grad_norm": 3.984375, "learning_rate": 1.320754716981132e-05, "loss": 1.3693, "step": 56 }, { "epoch": 0.0008224879551602257, "grad_norm": 3.8125, "learning_rate": 1.3679245283018869e-05, "loss": 1.4592, "step": 58 }, { "epoch": 0.0008508496087864403, "grad_norm": 2.90625, "learning_rate": 1.4150943396226415e-05, "loss": 1.4144, "step": 60 }, { "epoch": 0.0008792112624126549, "grad_norm": 2.59375, "learning_rate": 1.4622641509433963e-05, "loss": 1.4228, "step": 62 }, { "epoch": 0.0009075729160388697, "grad_norm": 2.9375, "learning_rate": 1.509433962264151e-05, "loss": 1.4243, "step": 64 }, { "epoch": 0.0009359345696650843, "grad_norm": 3.5625, "learning_rate": 1.5566037735849056e-05, "loss": 1.4232, "step": 66 }, { "epoch": 0.000964296223291299, "grad_norm": 4.0625, "learning_rate": 1.6037735849056604e-05, "loss": 1.5609, "step": 68 }, { "epoch": 0.0009926578769175137, "grad_norm": 2.796875, "learning_rate": 1.650943396226415e-05, "loss": 1.3382, "step": 70 }, { "epoch": 0.0010210195305437283, "grad_norm": 2.46875, "learning_rate": 1.69811320754717e-05, "loss": 1.2712, "step": 72 }, { "epoch": 0.001049381184169943, "grad_norm": 3.703125, "learning_rate": 1.7452830188679244e-05, "loss": 1.3288, "step": 74 }, { "epoch": 0.0010777428377961578, "grad_norm": 3.953125, "learning_rate": 1.7924528301886792e-05, "loss": 1.4028, "step": 76 }, { "epoch": 0.0011061044914223723, "grad_norm": 2.859375, "learning_rate": 1.839622641509434e-05, "loss": 1.3597, "step": 78 }, { "epoch": 0.001134466145048587, "grad_norm": 2.90625, "learning_rate": 1.8867924528301888e-05, "loss": 1.4342, "step": 80 }, { "epoch": 0.0011628277986748018, "grad_norm": 2.671875, "learning_rate": 1.9339622641509436e-05, "loss": 1.2457, "step": 82 }, { "epoch": 0.0011911894523010164, "grad_norm": 5.0625, "learning_rate": 1.9811320754716984e-05, "loss": 1.4336, "step": 84 }, { "epoch": 0.001219551105927231, "grad_norm": 2.875, "learning_rate": 2.0283018867924532e-05, "loss": 1.2609, "step": 86 }, { "epoch": 0.0012479127595534459, "grad_norm": 2.578125, "learning_rate": 2.0754716981132076e-05, "loss": 1.2695, "step": 88 }, { "epoch": 0.0012762744131796604, "grad_norm": 2.8125, "learning_rate": 2.1226415094339624e-05, "loss": 1.2783, "step": 90 }, { "epoch": 0.0013046360668058751, "grad_norm": 3.46875, "learning_rate": 2.1698113207547172e-05, "loss": 1.309, "step": 92 }, { "epoch": 0.00133299772043209, "grad_norm": 3.125, "learning_rate": 2.216981132075472e-05, "loss": 1.3988, "step": 94 }, { "epoch": 0.0013613593740583044, "grad_norm": 2.9375, "learning_rate": 2.2641509433962265e-05, "loss": 1.327, "step": 96 }, { "epoch": 0.0013897210276845192, "grad_norm": 4.25, "learning_rate": 2.3113207547169813e-05, "loss": 1.303, "step": 98 }, { "epoch": 0.0014180826813107337, "grad_norm": 3.828125, "learning_rate": 2.358490566037736e-05, "loss": 1.2823, "step": 100 }, { "epoch": 0.0014464443349369485, "grad_norm": 20.5, "learning_rate": 2.405660377358491e-05, "loss": 1.3277, "step": 102 }, { "epoch": 0.0014748059885631632, "grad_norm": 4.34375, "learning_rate": 2.4528301886792453e-05, "loss": 1.3433, "step": 104 }, { "epoch": 0.0015031676421893778, "grad_norm": 5.84375, "learning_rate": 2.5e-05, "loss": 1.2733, "step": 106 }, { "epoch": 0.0015315292958155925, "grad_norm": 3.15625, "learning_rate": 2.547169811320755e-05, "loss": 1.269, "step": 108 }, { "epoch": 0.0015598909494418073, "grad_norm": 5.15625, "learning_rate": 2.5943396226415094e-05, "loss": 1.2991, "step": 110 }, { "epoch": 0.0015882526030680218, "grad_norm": 3.296875, "learning_rate": 2.641509433962264e-05, "loss": 1.2476, "step": 112 }, { "epoch": 0.0016166142566942366, "grad_norm": 3.890625, "learning_rate": 2.688679245283019e-05, "loss": 1.283, "step": 114 }, { "epoch": 0.0016449759103204513, "grad_norm": 3.21875, "learning_rate": 2.7358490566037738e-05, "loss": 1.2708, "step": 116 }, { "epoch": 0.0016733375639466658, "grad_norm": 3.5, "learning_rate": 2.7830188679245282e-05, "loss": 1.2617, "step": 118 }, { "epoch": 0.0017016992175728806, "grad_norm": 3.234375, "learning_rate": 2.830188679245283e-05, "loss": 1.2324, "step": 120 }, { "epoch": 0.0017300608711990953, "grad_norm": 3.046875, "learning_rate": 2.8773584905660378e-05, "loss": 1.317, "step": 122 }, { "epoch": 0.0017584225248253099, "grad_norm": 4.875, "learning_rate": 2.9245283018867926e-05, "loss": 1.2038, "step": 124 }, { "epoch": 0.0017867841784515246, "grad_norm": 3.375, "learning_rate": 2.971698113207547e-05, "loss": 1.319, "step": 126 }, { "epoch": 0.0018151458320777394, "grad_norm": 3.8125, "learning_rate": 3.018867924528302e-05, "loss": 1.2748, "step": 128 }, { "epoch": 0.001843507485703954, "grad_norm": 3.59375, "learning_rate": 3.0660377358490567e-05, "loss": 1.3204, "step": 130 }, { "epoch": 0.0018718691393301687, "grad_norm": 3.125, "learning_rate": 3.113207547169811e-05, "loss": 1.3213, "step": 132 }, { "epoch": 0.0019002307929563834, "grad_norm": 3.09375, "learning_rate": 3.160377358490566e-05, "loss": 1.2092, "step": 134 }, { "epoch": 0.001928592446582598, "grad_norm": 3.09375, "learning_rate": 3.207547169811321e-05, "loss": 1.196, "step": 136 }, { "epoch": 0.0019569541002088125, "grad_norm": 3.625, "learning_rate": 3.254716981132075e-05, "loss": 1.1678, "step": 138 }, { "epoch": 0.0019853157538350275, "grad_norm": 3.390625, "learning_rate": 3.30188679245283e-05, "loss": 1.2782, "step": 140 }, { "epoch": 0.002013677407461242, "grad_norm": 4.3125, "learning_rate": 3.349056603773585e-05, "loss": 1.3041, "step": 142 }, { "epoch": 0.0020420390610874565, "grad_norm": 3.53125, "learning_rate": 3.39622641509434e-05, "loss": 1.1766, "step": 144 }, { "epoch": 0.0020704007147136715, "grad_norm": 4.34375, "learning_rate": 3.4433962264150943e-05, "loss": 1.2957, "step": 146 }, { "epoch": 0.002098762368339886, "grad_norm": 3.546875, "learning_rate": 3.490566037735849e-05, "loss": 1.2442, "step": 148 }, { "epoch": 0.0021271240219661006, "grad_norm": 3.765625, "learning_rate": 3.537735849056604e-05, "loss": 1.2219, "step": 150 }, { "epoch": 0.0021554856755923156, "grad_norm": 3.96875, "learning_rate": 3.5849056603773584e-05, "loss": 1.2118, "step": 152 }, { "epoch": 0.00218384732921853, "grad_norm": 3.265625, "learning_rate": 3.632075471698113e-05, "loss": 1.2072, "step": 154 }, { "epoch": 0.0022122089828447446, "grad_norm": 2.90625, "learning_rate": 3.679245283018868e-05, "loss": 1.2434, "step": 156 }, { "epoch": 0.0022405706364709596, "grad_norm": 3.90625, "learning_rate": 3.7264150943396224e-05, "loss": 1.242, "step": 158 }, { "epoch": 0.002268932290097174, "grad_norm": 3.9375, "learning_rate": 3.7735849056603776e-05, "loss": 1.2218, "step": 160 }, { "epoch": 0.0022972939437233887, "grad_norm": 4.875, "learning_rate": 3.820754716981133e-05, "loss": 1.2711, "step": 162 }, { "epoch": 0.0023256555973496036, "grad_norm": 3.453125, "learning_rate": 3.867924528301887e-05, "loss": 1.2997, "step": 164 }, { "epoch": 0.002354017250975818, "grad_norm": 4.0, "learning_rate": 3.9150943396226416e-05, "loss": 1.2355, "step": 166 }, { "epoch": 0.0023823789046020327, "grad_norm": 3.171875, "learning_rate": 3.962264150943397e-05, "loss": 1.2066, "step": 168 }, { "epoch": 0.0024107405582282477, "grad_norm": 4.03125, "learning_rate": 4.009433962264151e-05, "loss": 1.2166, "step": 170 }, { "epoch": 0.002439102211854462, "grad_norm": 3.640625, "learning_rate": 4.0566037735849064e-05, "loss": 1.2211, "step": 172 }, { "epoch": 0.0024674638654806767, "grad_norm": 3.921875, "learning_rate": 4.103773584905661e-05, "loss": 1.1586, "step": 174 }, { "epoch": 0.0024958255191068917, "grad_norm": 4.15625, "learning_rate": 4.150943396226415e-05, "loss": 1.1605, "step": 176 }, { "epoch": 0.0025241871727331062, "grad_norm": 3.46875, "learning_rate": 4.1981132075471704e-05, "loss": 1.1742, "step": 178 }, { "epoch": 0.002552548826359321, "grad_norm": 3.9375, "learning_rate": 4.245283018867925e-05, "loss": 1.2395, "step": 180 }, { "epoch": 0.0025809104799855358, "grad_norm": 4.53125, "learning_rate": 4.292452830188679e-05, "loss": 1.1823, "step": 182 }, { "epoch": 0.0026092721336117503, "grad_norm": 3.75, "learning_rate": 4.3396226415094345e-05, "loss": 1.1712, "step": 184 }, { "epoch": 0.002637633787237965, "grad_norm": 3.296875, "learning_rate": 4.386792452830189e-05, "loss": 1.2018, "step": 186 }, { "epoch": 0.00266599544086418, "grad_norm": 2.90625, "learning_rate": 4.433962264150944e-05, "loss": 1.1264, "step": 188 }, { "epoch": 0.0026943570944903943, "grad_norm": 3.578125, "learning_rate": 4.4811320754716985e-05, "loss": 1.2231, "step": 190 }, { "epoch": 0.002722718748116609, "grad_norm": 3.328125, "learning_rate": 4.528301886792453e-05, "loss": 1.1657, "step": 192 }, { "epoch": 0.0027510804017428234, "grad_norm": 3.890625, "learning_rate": 4.575471698113208e-05, "loss": 1.2448, "step": 194 }, { "epoch": 0.0027794420553690384, "grad_norm": 4.21875, "learning_rate": 4.6226415094339625e-05, "loss": 1.2995, "step": 196 }, { "epoch": 0.002807803708995253, "grad_norm": 3.90625, "learning_rate": 4.669811320754717e-05, "loss": 1.1806, "step": 198 }, { "epoch": 0.0028361653626214674, "grad_norm": 4.46875, "learning_rate": 4.716981132075472e-05, "loss": 1.1848, "step": 200 }, { "epoch": 0.0028645270162476824, "grad_norm": 4.78125, "learning_rate": 4.7641509433962266e-05, "loss": 1.179, "step": 202 }, { "epoch": 0.002892888669873897, "grad_norm": 4.21875, "learning_rate": 4.811320754716982e-05, "loss": 1.2387, "step": 204 }, { "epoch": 0.0029212503235001115, "grad_norm": 4.9375, "learning_rate": 4.858490566037736e-05, "loss": 1.1862, "step": 206 }, { "epoch": 0.0029496119771263265, "grad_norm": 4.15625, "learning_rate": 4.9056603773584906e-05, "loss": 1.1575, "step": 208 }, { "epoch": 0.002977973630752541, "grad_norm": 5.40625, "learning_rate": 4.952830188679246e-05, "loss": 1.2318, "step": 210 }, { "epoch": 0.0030063352843787555, "grad_norm": 4.25, "learning_rate": 5e-05, "loss": 1.2164, "step": 212 }, { "epoch": 0.0030346969380049705, "grad_norm": 3.90625, "learning_rate": 4.9999999900161665e-05, "loss": 1.1131, "step": 214 }, { "epoch": 0.003063058591631185, "grad_norm": 4.5625, "learning_rate": 4.999999960064667e-05, "loss": 1.1882, "step": 216 }, { "epoch": 0.0030914202452573996, "grad_norm": 3.703125, "learning_rate": 4.9999999101455004e-05, "loss": 1.2517, "step": 218 }, { "epoch": 0.0031197818988836145, "grad_norm": 3.71875, "learning_rate": 4.999999840258668e-05, "loss": 1.1909, "step": 220 }, { "epoch": 0.003148143552509829, "grad_norm": 3.484375, "learning_rate": 4.9999997504041694e-05, "loss": 1.1448, "step": 222 }, { "epoch": 0.0031765052061360436, "grad_norm": 4.0625, "learning_rate": 4.999999640582007e-05, "loss": 1.1793, "step": 224 }, { "epoch": 0.0032048668597622586, "grad_norm": 3.703125, "learning_rate": 4.9999995107921805e-05, "loss": 1.1572, "step": 226 }, { "epoch": 0.003233228513388473, "grad_norm": 3.203125, "learning_rate": 4.999999361034691e-05, "loss": 1.1737, "step": 228 }, { "epoch": 0.0032615901670146876, "grad_norm": 5.28125, "learning_rate": 4.99999919130954e-05, "loss": 1.1441, "step": 230 }, { "epoch": 0.0032899518206409026, "grad_norm": 3.703125, "learning_rate": 4.9999990016167286e-05, "loss": 1.1727, "step": 232 }, { "epoch": 0.003318313474267117, "grad_norm": 4.0, "learning_rate": 4.999998791956258e-05, "loss": 1.2494, "step": 234 }, { "epoch": 0.0033466751278933317, "grad_norm": 4.125, "learning_rate": 4.999998562328131e-05, "loss": 1.1364, "step": 236 }, { "epoch": 0.0033750367815195467, "grad_norm": 3.421875, "learning_rate": 4.999998312732349e-05, "loss": 1.1748, "step": 238 }, { "epoch": 0.003403398435145761, "grad_norm": 4.1875, "learning_rate": 4.999998043168912e-05, "loss": 1.1553, "step": 240 }, { "epoch": 0.0034317600887719757, "grad_norm": 3.703125, "learning_rate": 4.999997753637825e-05, "loss": 1.1333, "step": 242 }, { "epoch": 0.0034601217423981907, "grad_norm": 3.71875, "learning_rate": 4.999997444139089e-05, "loss": 1.067, "step": 244 }, { "epoch": 0.0034884833960244052, "grad_norm": 3.5, "learning_rate": 4.9999971146727075e-05, "loss": 1.1889, "step": 246 }, { "epoch": 0.0035168450496506198, "grad_norm": 3.609375, "learning_rate": 4.999996765238682e-05, "loss": 1.1171, "step": 248 }, { "epoch": 0.0035452067032768347, "grad_norm": 2.921875, "learning_rate": 4.999996395837016e-05, "loss": 1.0563, "step": 250 }, { "epoch": 0.0035735683569030493, "grad_norm": 3.765625, "learning_rate": 4.9999960064677104e-05, "loss": 1.1624, "step": 252 }, { "epoch": 0.003601930010529264, "grad_norm": 5.65625, "learning_rate": 4.999995597130771e-05, "loss": 1.1688, "step": 254 }, { "epoch": 0.0036302916641554788, "grad_norm": 3.28125, "learning_rate": 4.9999951678262e-05, "loss": 1.1031, "step": 256 }, { "epoch": 0.0036586533177816933, "grad_norm": 2.953125, "learning_rate": 4.999994718554001e-05, "loss": 1.0506, "step": 258 }, { "epoch": 0.003687014971407908, "grad_norm": 3.671875, "learning_rate": 4.9999942493141766e-05, "loss": 1.1787, "step": 260 }, { "epoch": 0.003715376625034123, "grad_norm": 3.046875, "learning_rate": 4.999993760106732e-05, "loss": 1.1705, "step": 262 }, { "epoch": 0.0037437382786603374, "grad_norm": 4.4375, "learning_rate": 4.9999932509316706e-05, "loss": 1.165, "step": 264 }, { "epoch": 0.003772099932286552, "grad_norm": 3.53125, "learning_rate": 4.999992721788996e-05, "loss": 1.198, "step": 266 }, { "epoch": 0.003800461585912767, "grad_norm": 3.484375, "learning_rate": 4.999992172678713e-05, "loss": 1.1073, "step": 268 }, { "epoch": 0.0038288232395389814, "grad_norm": 3.109375, "learning_rate": 4.999991603600826e-05, "loss": 1.1029, "step": 270 }, { "epoch": 0.003857184893165196, "grad_norm": 3.71875, "learning_rate": 4.9999910145553386e-05, "loss": 1.1733, "step": 272 }, { "epoch": 0.0038855465467914105, "grad_norm": 3.609375, "learning_rate": 4.9999904055422565e-05, "loss": 1.1625, "step": 274 }, { "epoch": 0.003913908200417625, "grad_norm": 4.65625, "learning_rate": 4.999989776561584e-05, "loss": 1.116, "step": 276 }, { "epoch": 0.00394226985404384, "grad_norm": 4.84375, "learning_rate": 4.9999891276133274e-05, "loss": 1.1644, "step": 278 }, { "epoch": 0.003970631507670055, "grad_norm": 3.21875, "learning_rate": 4.99998845869749e-05, "loss": 1.1055, "step": 280 }, { "epoch": 0.003998993161296269, "grad_norm": 4.125, "learning_rate": 4.9999877698140783e-05, "loss": 1.147, "step": 282 }, { "epoch": 0.004027354814922484, "grad_norm": 4.21875, "learning_rate": 4.999987060963098e-05, "loss": 1.1561, "step": 284 }, { "epoch": 0.004055716468548699, "grad_norm": 3.796875, "learning_rate": 4.999986332144554e-05, "loss": 1.104, "step": 286 }, { "epoch": 0.004084078122174913, "grad_norm": 3.421875, "learning_rate": 4.9999855833584516e-05, "loss": 1.0773, "step": 288 }, { "epoch": 0.004112439775801128, "grad_norm": 3.390625, "learning_rate": 4.999984814604799e-05, "loss": 1.0895, "step": 290 }, { "epoch": 0.004140801429427343, "grad_norm": 3.546875, "learning_rate": 4.9999840258835994e-05, "loss": 1.0812, "step": 292 }, { "epoch": 0.004169163083053557, "grad_norm": 3.328125, "learning_rate": 4.999983217194862e-05, "loss": 1.0921, "step": 294 }, { "epoch": 0.004197524736679772, "grad_norm": 3.578125, "learning_rate": 4.999982388538591e-05, "loss": 1.1024, "step": 296 }, { "epoch": 0.004225886390305987, "grad_norm": 3.421875, "learning_rate": 4.999981539914794e-05, "loss": 1.0863, "step": 298 }, { "epoch": 0.004254248043932201, "grad_norm": 4.6875, "learning_rate": 4.999980671323479e-05, "loss": 1.2086, "step": 300 }, { "epoch": 0.004282609697558416, "grad_norm": 3.8125, "learning_rate": 4.99997978276465e-05, "loss": 1.1149, "step": 302 }, { "epoch": 0.004310971351184631, "grad_norm": 3.90625, "learning_rate": 4.999978874238316e-05, "loss": 1.0967, "step": 304 }, { "epoch": 0.004339333004810845, "grad_norm": 3.9375, "learning_rate": 4.9999779457444844e-05, "loss": 1.0695, "step": 306 }, { "epoch": 0.00436769465843706, "grad_norm": 3.109375, "learning_rate": 4.999976997283162e-05, "loss": 1.1057, "step": 308 }, { "epoch": 0.004396056312063275, "grad_norm": 3.265625, "learning_rate": 4.999976028854357e-05, "loss": 1.1259, "step": 310 }, { "epoch": 0.004424417965689489, "grad_norm": 3.0, "learning_rate": 4.999975040458076e-05, "loss": 1.0606, "step": 312 }, { "epoch": 0.004452779619315704, "grad_norm": 3.359375, "learning_rate": 4.9999740320943275e-05, "loss": 1.1007, "step": 314 }, { "epoch": 0.004481141272941919, "grad_norm": 3.34375, "learning_rate": 4.999973003763121e-05, "loss": 1.1371, "step": 316 }, { "epoch": 0.004509502926568133, "grad_norm": 3.828125, "learning_rate": 4.999971955464462e-05, "loss": 1.145, "step": 318 }, { "epoch": 0.004537864580194348, "grad_norm": 3.4375, "learning_rate": 4.999970887198361e-05, "loss": 1.1202, "step": 320 }, { "epoch": 0.004566226233820563, "grad_norm": 3.46875, "learning_rate": 4.999969798964825e-05, "loss": 1.1516, "step": 322 }, { "epoch": 0.004594587887446777, "grad_norm": 3.6875, "learning_rate": 4.9999686907638646e-05, "loss": 1.1018, "step": 324 }, { "epoch": 0.004622949541072992, "grad_norm": 4.34375, "learning_rate": 4.999967562595487e-05, "loss": 1.1226, "step": 326 }, { "epoch": 0.004651311194699207, "grad_norm": 4.375, "learning_rate": 4.999966414459701e-05, "loss": 1.0955, "step": 328 }, { "epoch": 0.004679672848325421, "grad_norm": 2.96875, "learning_rate": 4.999965246356517e-05, "loss": 1.047, "step": 330 }, { "epoch": 0.004708034501951636, "grad_norm": 3.734375, "learning_rate": 4.999964058285944e-05, "loss": 1.1238, "step": 332 }, { "epoch": 0.004736396155577851, "grad_norm": 3.65625, "learning_rate": 4.999962850247991e-05, "loss": 1.0768, "step": 334 }, { "epoch": 0.004764757809204065, "grad_norm": 4.375, "learning_rate": 4.999961622242669e-05, "loss": 1.1649, "step": 336 }, { "epoch": 0.00479311946283028, "grad_norm": 3.90625, "learning_rate": 4.999960374269986e-05, "loss": 1.1531, "step": 338 }, { "epoch": 0.004821481116456495, "grad_norm": 3.828125, "learning_rate": 4.999959106329952e-05, "loss": 1.1492, "step": 340 }, { "epoch": 0.0048498427700827094, "grad_norm": 3.984375, "learning_rate": 4.999957818422579e-05, "loss": 1.0841, "step": 342 }, { "epoch": 0.004878204423708924, "grad_norm": 3.609375, "learning_rate": 4.9999565105478755e-05, "loss": 1.0165, "step": 344 }, { "epoch": 0.004906566077335139, "grad_norm": 4.40625, "learning_rate": 4.9999551827058536e-05, "loss": 1.1411, "step": 346 }, { "epoch": 0.0049349277309613535, "grad_norm": 3.421875, "learning_rate": 4.999953834896521e-05, "loss": 1.174, "step": 348 }, { "epoch": 0.0049632893845875685, "grad_norm": 4.75, "learning_rate": 4.999952467119892e-05, "loss": 1.1168, "step": 350 }, { "epoch": 0.004991651038213783, "grad_norm": 3.953125, "learning_rate": 4.999951079375976e-05, "loss": 1.1348, "step": 352 }, { "epoch": 0.0050200126918399975, "grad_norm": 4.3125, "learning_rate": 4.9999496716647834e-05, "loss": 1.1131, "step": 354 }, { "epoch": 0.0050483743454662125, "grad_norm": 4.0625, "learning_rate": 4.999948243986325e-05, "loss": 1.1325, "step": 356 }, { "epoch": 0.0050767359990924275, "grad_norm": 3.578125, "learning_rate": 4.999946796340615e-05, "loss": 1.1337, "step": 358 }, { "epoch": 0.005105097652718642, "grad_norm": 3.453125, "learning_rate": 4.9999453287276624e-05, "loss": 1.151, "step": 360 }, { "epoch": 0.0051334593063448565, "grad_norm": 3.875, "learning_rate": 4.9999438411474794e-05, "loss": 1.1385, "step": 362 }, { "epoch": 0.0051618209599710715, "grad_norm": 3.71875, "learning_rate": 4.999942333600079e-05, "loss": 1.1325, "step": 364 }, { "epoch": 0.005190182613597286, "grad_norm": 2.984375, "learning_rate": 4.999940806085472e-05, "loss": 1.0621, "step": 366 }, { "epoch": 0.005218544267223501, "grad_norm": 3.203125, "learning_rate": 4.999939258603671e-05, "loss": 1.06, "step": 368 }, { "epoch": 0.0052469059208497155, "grad_norm": 3.59375, "learning_rate": 4.999937691154688e-05, "loss": 1.1121, "step": 370 }, { "epoch": 0.00527526757447593, "grad_norm": 3.3125, "learning_rate": 4.9999361037385366e-05, "loss": 1.0988, "step": 372 }, { "epoch": 0.005303629228102145, "grad_norm": 4.21875, "learning_rate": 4.9999344963552285e-05, "loss": 1.0685, "step": 374 }, { "epoch": 0.00533199088172836, "grad_norm": 3.765625, "learning_rate": 4.999932869004777e-05, "loss": 1.1825, "step": 376 }, { "epoch": 0.005360352535354574, "grad_norm": 3.171875, "learning_rate": 4.9999312216871944e-05, "loss": 1.1556, "step": 378 }, { "epoch": 0.005388714188980789, "grad_norm": 3.71875, "learning_rate": 4.999929554402495e-05, "loss": 1.07, "step": 380 }, { "epoch": 0.005417075842607004, "grad_norm": 3.828125, "learning_rate": 4.999927867150691e-05, "loss": 1.0872, "step": 382 }, { "epoch": 0.005445437496233218, "grad_norm": 3.40625, "learning_rate": 4.999926159931797e-05, "loss": 1.1259, "step": 384 }, { "epoch": 0.005473799149859433, "grad_norm": 3.296875, "learning_rate": 4.9999244327458256e-05, "loss": 1.063, "step": 386 }, { "epoch": 0.005502160803485647, "grad_norm": 4.375, "learning_rate": 4.999922685592791e-05, "loss": 1.138, "step": 388 }, { "epoch": 0.005530522457111862, "grad_norm": 3.953125, "learning_rate": 4.9999209184727076e-05, "loss": 1.09, "step": 390 }, { "epoch": 0.005558884110738077, "grad_norm": 4.03125, "learning_rate": 4.9999191313855884e-05, "loss": 1.0996, "step": 392 }, { "epoch": 0.005587245764364291, "grad_norm": 3.796875, "learning_rate": 4.999917324331449e-05, "loss": 1.0604, "step": 394 }, { "epoch": 0.005615607417990506, "grad_norm": 4.46875, "learning_rate": 4.999915497310302e-05, "loss": 1.1628, "step": 396 }, { "epoch": 0.005643969071616721, "grad_norm": 3.75, "learning_rate": 4.999913650322164e-05, "loss": 1.0717, "step": 398 }, { "epoch": 0.005672330725242935, "grad_norm": 3.0, "learning_rate": 4.9999117833670495e-05, "loss": 1.0827, "step": 400 }, { "epoch": 0.00570069237886915, "grad_norm": 3.65625, "learning_rate": 4.999909896444972e-05, "loss": 1.0837, "step": 402 }, { "epoch": 0.005729054032495365, "grad_norm": 3.09375, "learning_rate": 4.999907989555948e-05, "loss": 1.1118, "step": 404 }, { "epoch": 0.005757415686121579, "grad_norm": 4.40625, "learning_rate": 4.9999060626999914e-05, "loss": 1.0355, "step": 406 }, { "epoch": 0.005785777339747794, "grad_norm": 3.75, "learning_rate": 4.9999041158771195e-05, "loss": 1.1074, "step": 408 }, { "epoch": 0.005814138993374009, "grad_norm": 3.65625, "learning_rate": 4.999902149087345e-05, "loss": 1.1869, "step": 410 }, { "epoch": 0.005842500647000223, "grad_norm": 3.40625, "learning_rate": 4.9999001623306876e-05, "loss": 1.1101, "step": 412 }, { "epoch": 0.005870862300626438, "grad_norm": 3.53125, "learning_rate": 4.999898155607159e-05, "loss": 1.0199, "step": 414 }, { "epoch": 0.005899223954252653, "grad_norm": 3.1875, "learning_rate": 4.999896128916778e-05, "loss": 1.0981, "step": 416 }, { "epoch": 0.005927585607878867, "grad_norm": 4.25, "learning_rate": 4.999894082259559e-05, "loss": 1.0801, "step": 418 }, { "epoch": 0.005955947261505082, "grad_norm": 3.375, "learning_rate": 4.99989201563552e-05, "loss": 1.0877, "step": 420 }, { "epoch": 0.005984308915131297, "grad_norm": 3.515625, "learning_rate": 4.999889929044676e-05, "loss": 1.1017, "step": 422 }, { "epoch": 0.006012670568757511, "grad_norm": 4.3125, "learning_rate": 4.999887822487045e-05, "loss": 1.107, "step": 424 }, { "epoch": 0.006041032222383726, "grad_norm": 3.625, "learning_rate": 4.999885695962643e-05, "loss": 1.1297, "step": 426 }, { "epoch": 0.006069393876009941, "grad_norm": 4.0625, "learning_rate": 4.999883549471487e-05, "loss": 1.0946, "step": 428 }, { "epoch": 0.006097755529636155, "grad_norm": 4.0625, "learning_rate": 4.999881383013595e-05, "loss": 1.0647, "step": 430 }, { "epoch": 0.00612611718326237, "grad_norm": 4.0, "learning_rate": 4.999879196588983e-05, "loss": 1.1169, "step": 432 }, { "epoch": 0.006154478836888585, "grad_norm": 3.265625, "learning_rate": 4.9998769901976696e-05, "loss": 1.055, "step": 434 }, { "epoch": 0.006182840490514799, "grad_norm": 3.734375, "learning_rate": 4.999874763839671e-05, "loss": 1.0391, "step": 436 }, { "epoch": 0.006211202144141014, "grad_norm": 3.515625, "learning_rate": 4.999872517515007e-05, "loss": 1.0616, "step": 438 }, { "epoch": 0.006239563797767229, "grad_norm": 3.84375, "learning_rate": 4.999870251223694e-05, "loss": 1.1106, "step": 440 }, { "epoch": 0.006267925451393443, "grad_norm": 3.25, "learning_rate": 4.999867964965751e-05, "loss": 1.0544, "step": 442 }, { "epoch": 0.006296287105019658, "grad_norm": 3.515625, "learning_rate": 4.9998656587411954e-05, "loss": 1.1045, "step": 444 }, { "epoch": 0.006324648758645873, "grad_norm": 2.859375, "learning_rate": 4.999863332550045e-05, "loss": 1.1112, "step": 446 }, { "epoch": 0.006353010412272087, "grad_norm": 3.09375, "learning_rate": 4.9998609863923215e-05, "loss": 1.0418, "step": 448 }, { "epoch": 0.006381372065898302, "grad_norm": 3.828125, "learning_rate": 4.99985862026804e-05, "loss": 1.0795, "step": 450 }, { "epoch": 0.006409733719524517, "grad_norm": 3.3125, "learning_rate": 4.999856234177221e-05, "loss": 1.0808, "step": 452 }, { "epoch": 0.006438095373150731, "grad_norm": 4.09375, "learning_rate": 4.999853828119885e-05, "loss": 1.0493, "step": 454 }, { "epoch": 0.006466457026776946, "grad_norm": 3.46875, "learning_rate": 4.9998514020960485e-05, "loss": 1.0342, "step": 456 }, { "epoch": 0.006494818680403161, "grad_norm": 3.546875, "learning_rate": 4.999848956105733e-05, "loss": 1.1309, "step": 458 }, { "epoch": 0.006523180334029375, "grad_norm": 3.90625, "learning_rate": 4.9998464901489563e-05, "loss": 1.0944, "step": 460 }, { "epoch": 0.00655154198765559, "grad_norm": 3.46875, "learning_rate": 4.999844004225739e-05, "loss": 1.0529, "step": 462 }, { "epoch": 0.006579903641281805, "grad_norm": 3.484375, "learning_rate": 4.999841498336101e-05, "loss": 1.0789, "step": 464 }, { "epoch": 0.006608265294908019, "grad_norm": 3.640625, "learning_rate": 4.999838972480062e-05, "loss": 1.0436, "step": 466 }, { "epoch": 0.006636626948534234, "grad_norm": 3.328125, "learning_rate": 4.999836426657643e-05, "loss": 1.0754, "step": 468 }, { "epoch": 0.006664988602160449, "grad_norm": 2.984375, "learning_rate": 4.999833860868863e-05, "loss": 1.1059, "step": 470 }, { "epoch": 0.006693350255786663, "grad_norm": 3.78125, "learning_rate": 4.999831275113744e-05, "loss": 1.093, "step": 472 }, { "epoch": 0.006721711909412878, "grad_norm": 4.0625, "learning_rate": 4.9998286693923055e-05, "loss": 1.1176, "step": 474 }, { "epoch": 0.006750073563039093, "grad_norm": 4.0625, "learning_rate": 4.9998260437045686e-05, "loss": 1.0963, "step": 476 }, { "epoch": 0.006778435216665307, "grad_norm": 3.515625, "learning_rate": 4.9998233980505535e-05, "loss": 1.0579, "step": 478 }, { "epoch": 0.006806796870291522, "grad_norm": 3.0, "learning_rate": 4.999820732430284e-05, "loss": 1.0708, "step": 480 }, { "epoch": 0.006835158523917737, "grad_norm": 3.203125, "learning_rate": 4.9998180468437786e-05, "loss": 1.0267, "step": 482 }, { "epoch": 0.0068635201775439515, "grad_norm": 3.71875, "learning_rate": 4.9998153412910594e-05, "loss": 1.1024, "step": 484 }, { "epoch": 0.006891881831170166, "grad_norm": 3.375, "learning_rate": 4.999812615772149e-05, "loss": 1.1203, "step": 486 }, { "epoch": 0.006920243484796381, "grad_norm": 3.046875, "learning_rate": 4.999809870287068e-05, "loss": 0.9936, "step": 488 }, { "epoch": 0.0069486051384225955, "grad_norm": 3.609375, "learning_rate": 4.9998071048358384e-05, "loss": 1.0788, "step": 490 }, { "epoch": 0.0069769667920488105, "grad_norm": 3.03125, "learning_rate": 4.999804319418484e-05, "loss": 1.1216, "step": 492 }, { "epoch": 0.007005328445675025, "grad_norm": 3.640625, "learning_rate": 4.9998015140350255e-05, "loss": 1.037, "step": 494 }, { "epoch": 0.0070336900993012395, "grad_norm": 3.59375, "learning_rate": 4.999798688685484e-05, "loss": 1.1288, "step": 496 }, { "epoch": 0.0070620517529274545, "grad_norm": 2.984375, "learning_rate": 4.9997958433698856e-05, "loss": 1.0899, "step": 498 }, { "epoch": 0.0070904134065536695, "grad_norm": 3.265625, "learning_rate": 4.9997929780882504e-05, "loss": 1.106, "step": 500 }, { "epoch": 0.007118775060179884, "grad_norm": 3.296875, "learning_rate": 4.999790092840602e-05, "loss": 1.0528, "step": 502 }, { "epoch": 0.0071471367138060985, "grad_norm": 3.5, "learning_rate": 4.999787187626962e-05, "loss": 1.01, "step": 504 }, { "epoch": 0.0071754983674323135, "grad_norm": 3.484375, "learning_rate": 4.999784262447357e-05, "loss": 1.0104, "step": 506 }, { "epoch": 0.007203860021058528, "grad_norm": 3.203125, "learning_rate": 4.9997813173018074e-05, "loss": 1.0916, "step": 508 }, { "epoch": 0.007232221674684743, "grad_norm": 4.03125, "learning_rate": 4.999778352190337e-05, "loss": 1.0464, "step": 510 }, { "epoch": 0.0072605833283109576, "grad_norm": 3.015625, "learning_rate": 4.999775367112972e-05, "loss": 1.1138, "step": 512 }, { "epoch": 0.007288944981937172, "grad_norm": 3.578125, "learning_rate": 4.9997723620697335e-05, "loss": 1.0889, "step": 514 }, { "epoch": 0.007317306635563387, "grad_norm": 3.265625, "learning_rate": 4.999769337060646e-05, "loss": 1.0505, "step": 516 }, { "epoch": 0.007345668289189602, "grad_norm": 3.5, "learning_rate": 4.9997662920857344e-05, "loss": 1.0892, "step": 518 }, { "epoch": 0.007374029942815816, "grad_norm": 3.3125, "learning_rate": 4.9997632271450225e-05, "loss": 1.0791, "step": 520 }, { "epoch": 0.007402391596442031, "grad_norm": 3.421875, "learning_rate": 4.999760142238535e-05, "loss": 1.1102, "step": 522 }, { "epoch": 0.007430753250068246, "grad_norm": 3.546875, "learning_rate": 4.999757037366297e-05, "loss": 1.087, "step": 524 }, { "epoch": 0.00745911490369446, "grad_norm": 3.296875, "learning_rate": 4.999753912528332e-05, "loss": 1.0717, "step": 526 }, { "epoch": 0.007487476557320675, "grad_norm": 3.078125, "learning_rate": 4.9997507677246666e-05, "loss": 1.0555, "step": 528 }, { "epoch": 0.00751583821094689, "grad_norm": 3.671875, "learning_rate": 4.999747602955325e-05, "loss": 1.0958, "step": 530 }, { "epoch": 0.007544199864573104, "grad_norm": 3.859375, "learning_rate": 4.9997444182203316e-05, "loss": 1.022, "step": 532 }, { "epoch": 0.007572561518199319, "grad_norm": 4.40625, "learning_rate": 4.9997412135197134e-05, "loss": 1.0834, "step": 534 }, { "epoch": 0.007600923171825534, "grad_norm": 4.25, "learning_rate": 4.999737988853496e-05, "loss": 1.0382, "step": 536 }, { "epoch": 0.007629284825451748, "grad_norm": 3.328125, "learning_rate": 4.9997347442217035e-05, "loss": 1.0283, "step": 538 }, { "epoch": 0.007657646479077963, "grad_norm": 3.84375, "learning_rate": 4.9997314796243634e-05, "loss": 1.1022, "step": 540 }, { "epoch": 0.007686008132704177, "grad_norm": 3.296875, "learning_rate": 4.999728195061502e-05, "loss": 1.0579, "step": 542 }, { "epoch": 0.007714369786330392, "grad_norm": 3.4375, "learning_rate": 4.999724890533143e-05, "loss": 1.0533, "step": 544 }, { "epoch": 0.007742731439956607, "grad_norm": 3.5, "learning_rate": 4.999721566039316e-05, "loss": 1.083, "step": 546 }, { "epoch": 0.007771093093582821, "grad_norm": 3.21875, "learning_rate": 4.9997182215800455e-05, "loss": 1.0488, "step": 548 }, { "epoch": 0.007799454747209036, "grad_norm": 2.78125, "learning_rate": 4.999714857155359e-05, "loss": 1.0939, "step": 550 }, { "epoch": 0.00782781640083525, "grad_norm": 3.5625, "learning_rate": 4.999711472765283e-05, "loss": 1.0696, "step": 552 }, { "epoch": 0.007856178054461465, "grad_norm": 3.9375, "learning_rate": 4.999708068409845e-05, "loss": 1.0529, "step": 554 }, { "epoch": 0.00788453970808768, "grad_norm": 3.140625, "learning_rate": 4.9997046440890714e-05, "loss": 1.0261, "step": 556 }, { "epoch": 0.007912901361713895, "grad_norm": 3.5625, "learning_rate": 4.9997011998029905e-05, "loss": 1.016, "step": 558 }, { "epoch": 0.00794126301534011, "grad_norm": 3.25, "learning_rate": 4.99969773555163e-05, "loss": 1.0712, "step": 560 }, { "epoch": 0.007969624668966325, "grad_norm": 3.515625, "learning_rate": 4.999694251335016e-05, "loss": 1.0652, "step": 562 }, { "epoch": 0.007997986322592538, "grad_norm": 3.09375, "learning_rate": 4.999690747153178e-05, "loss": 1.0161, "step": 564 }, { "epoch": 0.008026347976218753, "grad_norm": 3.625, "learning_rate": 4.999687223006143e-05, "loss": 1.0946, "step": 566 }, { "epoch": 0.008054709629844968, "grad_norm": 3.421875, "learning_rate": 4.9996836788939396e-05, "loss": 1.0748, "step": 568 }, { "epoch": 0.008083071283471183, "grad_norm": 4.375, "learning_rate": 4.999680114816595e-05, "loss": 1.044, "step": 570 }, { "epoch": 0.008111432937097398, "grad_norm": 3.484375, "learning_rate": 4.9996765307741394e-05, "loss": 0.9668, "step": 572 }, { "epoch": 0.008139794590723613, "grad_norm": 3.21875, "learning_rate": 4.999672926766601e-05, "loss": 1.0584, "step": 574 }, { "epoch": 0.008168156244349826, "grad_norm": 3.484375, "learning_rate": 4.9996693027940076e-05, "loss": 1.09, "step": 576 }, { "epoch": 0.008196517897976041, "grad_norm": 3.203125, "learning_rate": 4.999665658856388e-05, "loss": 1.0735, "step": 578 }, { "epoch": 0.008224879551602256, "grad_norm": 3.140625, "learning_rate": 4.999661994953773e-05, "loss": 1.0357, "step": 580 }, { "epoch": 0.008253241205228471, "grad_norm": 3.5625, "learning_rate": 4.999658311086191e-05, "loss": 1.1033, "step": 582 }, { "epoch": 0.008281602858854686, "grad_norm": 3.265625, "learning_rate": 4.999654607253671e-05, "loss": 1.0494, "step": 584 }, { "epoch": 0.008309964512480901, "grad_norm": 2.859375, "learning_rate": 4.999650883456243e-05, "loss": 1.0304, "step": 586 }, { "epoch": 0.008338326166107114, "grad_norm": 3.0, "learning_rate": 4.9996471396939374e-05, "loss": 1.0845, "step": 588 }, { "epoch": 0.00836668781973333, "grad_norm": 3.265625, "learning_rate": 4.9996433759667826e-05, "loss": 1.0447, "step": 590 }, { "epoch": 0.008395049473359544, "grad_norm": 3.40625, "learning_rate": 4.99963959227481e-05, "loss": 1.0451, "step": 592 }, { "epoch": 0.00842341112698576, "grad_norm": 3.4375, "learning_rate": 4.9996357886180476e-05, "loss": 1.0794, "step": 594 }, { "epoch": 0.008451772780611974, "grad_norm": 4.0625, "learning_rate": 4.999631964996529e-05, "loss": 1.0491, "step": 596 }, { "epoch": 0.008480134434238189, "grad_norm": 3.265625, "learning_rate": 4.9996281214102826e-05, "loss": 1.0311, "step": 598 }, { "epoch": 0.008508496087864402, "grad_norm": 3.515625, "learning_rate": 4.99962425785934e-05, "loss": 1.0697, "step": 600 }, { "epoch": 0.008536857741490617, "grad_norm": 3.265625, "learning_rate": 4.999620374343732e-05, "loss": 1.0606, "step": 602 }, { "epoch": 0.008565219395116832, "grad_norm": 3.453125, "learning_rate": 4.999616470863489e-05, "loss": 1.0309, "step": 604 }, { "epoch": 0.008593581048743047, "grad_norm": 4.15625, "learning_rate": 4.9996125474186414e-05, "loss": 1.0405, "step": 606 }, { "epoch": 0.008621942702369262, "grad_norm": 3.1875, "learning_rate": 4.999608604009223e-05, "loss": 1.0147, "step": 608 }, { "epoch": 0.008650304355995477, "grad_norm": 3.5, "learning_rate": 4.999604640635264e-05, "loss": 1.0767, "step": 610 }, { "epoch": 0.00867866600962169, "grad_norm": 3.328125, "learning_rate": 4.999600657296796e-05, "loss": 1.0442, "step": 612 }, { "epoch": 0.008707027663247905, "grad_norm": 3.40625, "learning_rate": 4.99959665399385e-05, "loss": 1.0137, "step": 614 }, { "epoch": 0.00873538931687412, "grad_norm": 4.375, "learning_rate": 4.9995926307264594e-05, "loss": 1.0142, "step": 616 }, { "epoch": 0.008763750970500335, "grad_norm": 3.546875, "learning_rate": 4.999588587494656e-05, "loss": 1.0484, "step": 618 }, { "epoch": 0.00879211262412655, "grad_norm": 3.578125, "learning_rate": 4.9995845242984706e-05, "loss": 1.0453, "step": 620 }, { "epoch": 0.008820474277752764, "grad_norm": 3.6875, "learning_rate": 4.999580441137938e-05, "loss": 1.027, "step": 622 }, { "epoch": 0.008848835931378978, "grad_norm": 3.890625, "learning_rate": 4.9995763380130896e-05, "loss": 1.0741, "step": 624 }, { "epoch": 0.008877197585005193, "grad_norm": 3.703125, "learning_rate": 4.9995722149239575e-05, "loss": 1.0291, "step": 626 }, { "epoch": 0.008905559238631408, "grad_norm": 3.0625, "learning_rate": 4.999568071870576e-05, "loss": 1.0586, "step": 628 }, { "epoch": 0.008933920892257623, "grad_norm": 3.375, "learning_rate": 4.9995639088529776e-05, "loss": 1.0845, "step": 630 }, { "epoch": 0.008962282545883838, "grad_norm": 3.78125, "learning_rate": 4.9995597258711954e-05, "loss": 1.0618, "step": 632 }, { "epoch": 0.008990644199510052, "grad_norm": 3.140625, "learning_rate": 4.9995555229252635e-05, "loss": 1.0145, "step": 634 }, { "epoch": 0.009019005853136267, "grad_norm": 3.515625, "learning_rate": 4.999551300015214e-05, "loss": 1.1262, "step": 636 }, { "epoch": 0.009047367506762482, "grad_norm": 3.453125, "learning_rate": 4.999547057141082e-05, "loss": 0.9884, "step": 638 }, { "epoch": 0.009075729160388697, "grad_norm": 3.484375, "learning_rate": 4.9995427943029014e-05, "loss": 1.0423, "step": 640 }, { "epoch": 0.009104090814014911, "grad_norm": 3.390625, "learning_rate": 4.9995385115007055e-05, "loss": 1.0374, "step": 642 }, { "epoch": 0.009132452467641126, "grad_norm": 3.578125, "learning_rate": 4.9995342087345286e-05, "loss": 1.0917, "step": 644 }, { "epoch": 0.00916081412126734, "grad_norm": 3.21875, "learning_rate": 4.999529886004405e-05, "loss": 1.07, "step": 646 }, { "epoch": 0.009189175774893555, "grad_norm": 3.875, "learning_rate": 4.9995255433103696e-05, "loss": 1.1243, "step": 648 }, { "epoch": 0.00921753742851977, "grad_norm": 3.46875, "learning_rate": 4.9995211806524574e-05, "loss": 1.0334, "step": 650 }, { "epoch": 0.009245899082145985, "grad_norm": 3.359375, "learning_rate": 4.9995167980307024e-05, "loss": 1.0902, "step": 652 }, { "epoch": 0.0092742607357722, "grad_norm": 3.484375, "learning_rate": 4.9995123954451404e-05, "loss": 1.1037, "step": 654 }, { "epoch": 0.009302622389398415, "grad_norm": 3.0625, "learning_rate": 4.999507972895805e-05, "loss": 1.003, "step": 656 }, { "epoch": 0.009330984043024628, "grad_norm": 3.21875, "learning_rate": 4.9995035303827344e-05, "loss": 0.9697, "step": 658 }, { "epoch": 0.009359345696650843, "grad_norm": 3.453125, "learning_rate": 4.9994990679059616e-05, "loss": 1.0206, "step": 660 }, { "epoch": 0.009387707350277058, "grad_norm": 3.75, "learning_rate": 4.999494585465523e-05, "loss": 1.0407, "step": 662 }, { "epoch": 0.009416069003903273, "grad_norm": 3.71875, "learning_rate": 4.9994900830614546e-05, "loss": 1.0539, "step": 664 }, { "epoch": 0.009444430657529488, "grad_norm": 2.875, "learning_rate": 4.9994855606937916e-05, "loss": 1.0032, "step": 666 }, { "epoch": 0.009472792311155703, "grad_norm": 3.4375, "learning_rate": 4.999481018362571e-05, "loss": 1.0443, "step": 668 }, { "epoch": 0.009501153964781916, "grad_norm": 3.171875, "learning_rate": 4.9994764560678293e-05, "loss": 1.0054, "step": 670 }, { "epoch": 0.00952951561840813, "grad_norm": 3.265625, "learning_rate": 4.999471873809602e-05, "loss": 1.0936, "step": 672 }, { "epoch": 0.009557877272034346, "grad_norm": 4.09375, "learning_rate": 4.999467271587927e-05, "loss": 1.1194, "step": 674 }, { "epoch": 0.00958623892566056, "grad_norm": 3.140625, "learning_rate": 4.9994626494028384e-05, "loss": 1.0168, "step": 676 }, { "epoch": 0.009614600579286776, "grad_norm": 3.953125, "learning_rate": 4.999458007254376e-05, "loss": 1.0976, "step": 678 }, { "epoch": 0.00964296223291299, "grad_norm": 3.71875, "learning_rate": 4.9994533451425756e-05, "loss": 1.1002, "step": 680 }, { "epoch": 0.009671323886539204, "grad_norm": 3.0625, "learning_rate": 4.9994486630674744e-05, "loss": 1.0512, "step": 682 }, { "epoch": 0.009699685540165419, "grad_norm": 3.1875, "learning_rate": 4.999443961029111e-05, "loss": 1.0842, "step": 684 }, { "epoch": 0.009728047193791634, "grad_norm": 3.75, "learning_rate": 4.9994392390275216e-05, "loss": 0.9851, "step": 686 }, { "epoch": 0.009756408847417849, "grad_norm": 3.109375, "learning_rate": 4.999434497062743e-05, "loss": 0.9994, "step": 688 }, { "epoch": 0.009784770501044064, "grad_norm": 4.15625, "learning_rate": 4.999429735134816e-05, "loss": 1.0518, "step": 690 }, { "epoch": 0.009813132154670279, "grad_norm": 3.5, "learning_rate": 4.999424953243776e-05, "loss": 1.0893, "step": 692 }, { "epoch": 0.009841493808296492, "grad_norm": 2.921875, "learning_rate": 4.999420151389662e-05, "loss": 1.0451, "step": 694 }, { "epoch": 0.009869855461922707, "grad_norm": 3.71875, "learning_rate": 4.999415329572513e-05, "loss": 1.0503, "step": 696 }, { "epoch": 0.009898217115548922, "grad_norm": 3.09375, "learning_rate": 4.999410487792368e-05, "loss": 1.0118, "step": 698 }, { "epoch": 0.009926578769175137, "grad_norm": 3.359375, "learning_rate": 4.999405626049263e-05, "loss": 1.0035, "step": 700 }, { "epoch": 0.009954940422801352, "grad_norm": 3.125, "learning_rate": 4.99940074434324e-05, "loss": 1.0273, "step": 702 }, { "epoch": 0.009983302076427567, "grad_norm": 3.515625, "learning_rate": 4.999395842674336e-05, "loss": 1.0375, "step": 704 }, { "epoch": 0.01001166373005378, "grad_norm": 3.515625, "learning_rate": 4.99939092104259e-05, "loss": 1.1057, "step": 706 }, { "epoch": 0.010040025383679995, "grad_norm": 3.3125, "learning_rate": 4.9993859794480425e-05, "loss": 1.0436, "step": 708 }, { "epoch": 0.01006838703730621, "grad_norm": 3.3125, "learning_rate": 4.999381017890733e-05, "loss": 1.0869, "step": 710 }, { "epoch": 0.010096748690932425, "grad_norm": 2.859375, "learning_rate": 4.9993760363706996e-05, "loss": 1.0425, "step": 712 }, { "epoch": 0.01012511034455864, "grad_norm": 3.125, "learning_rate": 4.999371034887984e-05, "loss": 1.0183, "step": 714 }, { "epoch": 0.010153471998184855, "grad_norm": 3.1875, "learning_rate": 4.999366013442624e-05, "loss": 1.0579, "step": 716 }, { "epoch": 0.010181833651811068, "grad_norm": 3.375, "learning_rate": 4.999360972034663e-05, "loss": 1.1013, "step": 718 }, { "epoch": 0.010210195305437283, "grad_norm": 3.078125, "learning_rate": 4.999355910664138e-05, "loss": 1.0237, "step": 720 }, { "epoch": 0.010238556959063498, "grad_norm": 3.265625, "learning_rate": 4.9993508293310905e-05, "loss": 1.0493, "step": 722 }, { "epoch": 0.010266918612689713, "grad_norm": 3.390625, "learning_rate": 4.999345728035562e-05, "loss": 1.0047, "step": 724 }, { "epoch": 0.010295280266315928, "grad_norm": 3.859375, "learning_rate": 4.999340606777591e-05, "loss": 1.0385, "step": 726 }, { "epoch": 0.010323641919942143, "grad_norm": 5.03125, "learning_rate": 4.999335465557221e-05, "loss": 1.0464, "step": 728 }, { "epoch": 0.010352003573568356, "grad_norm": 3.5625, "learning_rate": 4.999330304374491e-05, "loss": 0.9834, "step": 730 }, { "epoch": 0.010380365227194571, "grad_norm": 3.421875, "learning_rate": 4.999325123229444e-05, "loss": 1.0757, "step": 732 }, { "epoch": 0.010408726880820786, "grad_norm": 3.609375, "learning_rate": 4.9993199221221206e-05, "loss": 1.0112, "step": 734 }, { "epoch": 0.010437088534447001, "grad_norm": 3.953125, "learning_rate": 4.999314701052562e-05, "loss": 1.1014, "step": 736 }, { "epoch": 0.010465450188073216, "grad_norm": 3.203125, "learning_rate": 4.99930946002081e-05, "loss": 1.0492, "step": 738 }, { "epoch": 0.010493811841699431, "grad_norm": 3.0625, "learning_rate": 4.999304199026907e-05, "loss": 1.0031, "step": 740 }, { "epoch": 0.010522173495325644, "grad_norm": 2.953125, "learning_rate": 4.999298918070894e-05, "loss": 1.0141, "step": 742 }, { "epoch": 0.01055053514895186, "grad_norm": 3.09375, "learning_rate": 4.999293617152815e-05, "loss": 1.0233, "step": 744 }, { "epoch": 0.010578896802578074, "grad_norm": 3.625, "learning_rate": 4.999288296272711e-05, "loss": 1.0374, "step": 746 }, { "epoch": 0.01060725845620429, "grad_norm": 3.46875, "learning_rate": 4.9992829554306234e-05, "loss": 1.0846, "step": 748 }, { "epoch": 0.010635620109830504, "grad_norm": 2.984375, "learning_rate": 4.9992775946265966e-05, "loss": 1.043, "step": 750 }, { "epoch": 0.01066398176345672, "grad_norm": 3.34375, "learning_rate": 4.999272213860674e-05, "loss": 0.9907, "step": 752 }, { "epoch": 0.010692343417082932, "grad_norm": 4.3125, "learning_rate": 4.999266813132896e-05, "loss": 1.088, "step": 754 }, { "epoch": 0.010720705070709147, "grad_norm": 3.203125, "learning_rate": 4.9992613924433085e-05, "loss": 1.0518, "step": 756 }, { "epoch": 0.010749066724335362, "grad_norm": 3.28125, "learning_rate": 4.999255951791952e-05, "loss": 1.0805, "step": 758 }, { "epoch": 0.010777428377961577, "grad_norm": 3.171875, "learning_rate": 4.9992504911788735e-05, "loss": 1.049, "step": 760 }, { "epoch": 0.010805790031587792, "grad_norm": 3.046875, "learning_rate": 4.9992450106041135e-05, "loss": 1.0692, "step": 762 }, { "epoch": 0.010834151685214007, "grad_norm": 2.984375, "learning_rate": 4.999239510067717e-05, "loss": 1.0051, "step": 764 }, { "epoch": 0.01086251333884022, "grad_norm": 3.578125, "learning_rate": 4.999233989569728e-05, "loss": 1.0144, "step": 766 }, { "epoch": 0.010890874992466435, "grad_norm": 3.125, "learning_rate": 4.9992284491101905e-05, "loss": 0.962, "step": 768 }, { "epoch": 0.01091923664609265, "grad_norm": 3.6875, "learning_rate": 4.999222888689149e-05, "loss": 1.0756, "step": 770 }, { "epoch": 0.010947598299718865, "grad_norm": 3.390625, "learning_rate": 4.9992173083066466e-05, "loss": 1.0974, "step": 772 }, { "epoch": 0.01097595995334508, "grad_norm": 3.6875, "learning_rate": 4.9992117079627296e-05, "loss": 1.0391, "step": 774 }, { "epoch": 0.011004321606971294, "grad_norm": 3.4375, "learning_rate": 4.999206087657442e-05, "loss": 1.013, "step": 776 }, { "epoch": 0.011032683260597509, "grad_norm": 3.375, "learning_rate": 4.999200447390828e-05, "loss": 0.9818, "step": 778 }, { "epoch": 0.011061044914223724, "grad_norm": 3.34375, "learning_rate": 4.999194787162934e-05, "loss": 0.9791, "step": 780 }, { "epoch": 0.011089406567849939, "grad_norm": 3.390625, "learning_rate": 4.999189106973804e-05, "loss": 1.0711, "step": 782 }, { "epoch": 0.011117768221476153, "grad_norm": 3.265625, "learning_rate": 4.9991834068234844e-05, "loss": 1.0469, "step": 784 }, { "epoch": 0.011146129875102368, "grad_norm": 3.1875, "learning_rate": 4.99917768671202e-05, "loss": 0.9811, "step": 786 }, { "epoch": 0.011174491528728582, "grad_norm": 3.0625, "learning_rate": 4.999171946639457e-05, "loss": 0.9695, "step": 788 }, { "epoch": 0.011202853182354797, "grad_norm": 2.875, "learning_rate": 4.99916618660584e-05, "loss": 1.0348, "step": 790 }, { "epoch": 0.011231214835981012, "grad_norm": 2.859375, "learning_rate": 4.999160406611218e-05, "loss": 1.0236, "step": 792 }, { "epoch": 0.011259576489607227, "grad_norm": 2.890625, "learning_rate": 4.9991546066556335e-05, "loss": 1.039, "step": 794 }, { "epoch": 0.011287938143233442, "grad_norm": 3.140625, "learning_rate": 4.9991487867391345e-05, "loss": 1.0567, "step": 796 }, { "epoch": 0.011316299796859657, "grad_norm": 3.265625, "learning_rate": 4.999142946861768e-05, "loss": 1.0297, "step": 798 }, { "epoch": 0.01134466145048587, "grad_norm": 3.765625, "learning_rate": 4.99913708702358e-05, "loss": 0.9283, "step": 800 }, { "epoch": 0.011373023104112085, "grad_norm": 3.359375, "learning_rate": 4.999131207224617e-05, "loss": 1.0652, "step": 802 }, { "epoch": 0.0114013847577383, "grad_norm": 3.203125, "learning_rate": 4.9991253074649266e-05, "loss": 1.0936, "step": 804 }, { "epoch": 0.011429746411364515, "grad_norm": 3.109375, "learning_rate": 4.9991193877445554e-05, "loss": 1.0151, "step": 806 }, { "epoch": 0.01145810806499073, "grad_norm": 3.34375, "learning_rate": 4.999113448063551e-05, "loss": 0.9899, "step": 808 }, { "epoch": 0.011486469718616945, "grad_norm": 3.703125, "learning_rate": 4.999107488421961e-05, "loss": 0.9936, "step": 810 }, { "epoch": 0.011514831372243158, "grad_norm": 2.953125, "learning_rate": 4.999101508819833e-05, "loss": 0.9536, "step": 812 }, { "epoch": 0.011543193025869373, "grad_norm": 3.28125, "learning_rate": 4.999095509257214e-05, "loss": 1.0291, "step": 814 }, { "epoch": 0.011571554679495588, "grad_norm": 3.140625, "learning_rate": 4.9990894897341534e-05, "loss": 1.0869, "step": 816 }, { "epoch": 0.011599916333121803, "grad_norm": 3.40625, "learning_rate": 4.999083450250698e-05, "loss": 1.0776, "step": 818 }, { "epoch": 0.011628277986748018, "grad_norm": 3.0, "learning_rate": 4.999077390806896e-05, "loss": 1.0554, "step": 820 }, { "epoch": 0.011656639640374233, "grad_norm": 2.984375, "learning_rate": 4.9990713114027966e-05, "loss": 1.0387, "step": 822 }, { "epoch": 0.011685001294000446, "grad_norm": 3.3125, "learning_rate": 4.9990652120384474e-05, "loss": 1.1099, "step": 824 }, { "epoch": 0.011713362947626661, "grad_norm": 3.59375, "learning_rate": 4.999059092713898e-05, "loss": 1.0429, "step": 826 }, { "epoch": 0.011741724601252876, "grad_norm": 2.953125, "learning_rate": 4.999052953429197e-05, "loss": 1.0017, "step": 828 }, { "epoch": 0.01177008625487909, "grad_norm": 3.234375, "learning_rate": 4.999046794184393e-05, "loss": 1.0694, "step": 830 }, { "epoch": 0.011798447908505306, "grad_norm": 3.265625, "learning_rate": 4.999040614979536e-05, "loss": 1.0168, "step": 832 }, { "epoch": 0.01182680956213152, "grad_norm": 3.796875, "learning_rate": 4.999034415814675e-05, "loss": 1.0606, "step": 834 }, { "epoch": 0.011855171215757734, "grad_norm": 2.953125, "learning_rate": 4.9990281966898595e-05, "loss": 1.0336, "step": 836 }, { "epoch": 0.011883532869383949, "grad_norm": 3.046875, "learning_rate": 4.9990219576051386e-05, "loss": 0.9751, "step": 838 }, { "epoch": 0.011911894523010164, "grad_norm": 3.203125, "learning_rate": 4.999015698560563e-05, "loss": 0.9751, "step": 840 }, { "epoch": 0.011940256176636379, "grad_norm": 3.375, "learning_rate": 4.999009419556182e-05, "loss": 1.0226, "step": 842 }, { "epoch": 0.011968617830262594, "grad_norm": 3.1875, "learning_rate": 4.999003120592047e-05, "loss": 1.0217, "step": 844 }, { "epoch": 0.011996979483888809, "grad_norm": 3.140625, "learning_rate": 4.998996801668206e-05, "loss": 1.061, "step": 846 }, { "epoch": 0.012025341137515022, "grad_norm": 3.109375, "learning_rate": 4.998990462784712e-05, "loss": 1.0456, "step": 848 }, { "epoch": 0.012053702791141237, "grad_norm": 4.28125, "learning_rate": 4.998984103941614e-05, "loss": 1.0514, "step": 850 }, { "epoch": 0.012082064444767452, "grad_norm": 3.1875, "learning_rate": 4.998977725138964e-05, "loss": 1.0695, "step": 852 }, { "epoch": 0.012110426098393667, "grad_norm": 3.328125, "learning_rate": 4.9989713263768115e-05, "loss": 1.0497, "step": 854 }, { "epoch": 0.012138787752019882, "grad_norm": 2.796875, "learning_rate": 4.998964907655208e-05, "loss": 1.0239, "step": 856 }, { "epoch": 0.012167149405646097, "grad_norm": 3.0, "learning_rate": 4.9989584689742056e-05, "loss": 0.9841, "step": 858 }, { "epoch": 0.01219551105927231, "grad_norm": 3.25, "learning_rate": 4.998952010333855e-05, "loss": 0.9966, "step": 860 }, { "epoch": 0.012223872712898525, "grad_norm": 3.921875, "learning_rate": 4.9989455317342076e-05, "loss": 0.9948, "step": 862 }, { "epoch": 0.01225223436652474, "grad_norm": 3.65625, "learning_rate": 4.998939033175316e-05, "loss": 1.0605, "step": 864 }, { "epoch": 0.012280596020150955, "grad_norm": 3.703125, "learning_rate": 4.998932514657232e-05, "loss": 1.0642, "step": 866 }, { "epoch": 0.01230895767377717, "grad_norm": 3.15625, "learning_rate": 4.998925976180007e-05, "loss": 1.038, "step": 868 }, { "epoch": 0.012337319327403385, "grad_norm": 3.015625, "learning_rate": 4.998919417743694e-05, "loss": 1.0077, "step": 870 }, { "epoch": 0.012365680981029598, "grad_norm": 3.546875, "learning_rate": 4.9989128393483445e-05, "loss": 1.0422, "step": 872 }, { "epoch": 0.012394042634655813, "grad_norm": 2.96875, "learning_rate": 4.9989062409940116e-05, "loss": 0.9849, "step": 874 }, { "epoch": 0.012422404288282028, "grad_norm": 3.5, "learning_rate": 4.998899622680747e-05, "loss": 1.0115, "step": 876 }, { "epoch": 0.012450765941908243, "grad_norm": 3.109375, "learning_rate": 4.998892984408605e-05, "loss": 0.9811, "step": 878 }, { "epoch": 0.012479127595534458, "grad_norm": 3.3125, "learning_rate": 4.998886326177638e-05, "loss": 1.0521, "step": 880 }, { "epoch": 0.012507489249160673, "grad_norm": 3.265625, "learning_rate": 4.998879647987901e-05, "loss": 1.0624, "step": 882 }, { "epoch": 0.012535850902786886, "grad_norm": 3.4375, "learning_rate": 4.998872949839444e-05, "loss": 1.0684, "step": 884 }, { "epoch": 0.012564212556413101, "grad_norm": 3.5625, "learning_rate": 4.9988662317323215e-05, "loss": 1.0897, "step": 886 }, { "epoch": 0.012592574210039316, "grad_norm": 2.953125, "learning_rate": 4.998859493666588e-05, "loss": 0.9441, "step": 888 }, { "epoch": 0.012620935863665531, "grad_norm": 3.015625, "learning_rate": 4.998852735642298e-05, "loss": 1.0306, "step": 890 }, { "epoch": 0.012649297517291746, "grad_norm": 3.171875, "learning_rate": 4.998845957659504e-05, "loss": 1.0119, "step": 892 }, { "epoch": 0.012677659170917961, "grad_norm": 2.96875, "learning_rate": 4.99883915971826e-05, "loss": 1.0314, "step": 894 }, { "epoch": 0.012706020824544174, "grad_norm": 3.203125, "learning_rate": 4.9988323418186224e-05, "loss": 1.0402, "step": 896 }, { "epoch": 0.01273438247817039, "grad_norm": 2.921875, "learning_rate": 4.998825503960643e-05, "loss": 0.9315, "step": 898 }, { "epoch": 0.012762744131796604, "grad_norm": 3.125, "learning_rate": 4.998818646144378e-05, "loss": 1.0016, "step": 900 }, { "epoch": 0.01279110578542282, "grad_norm": 3.25, "learning_rate": 4.998811768369882e-05, "loss": 1.0555, "step": 902 }, { "epoch": 0.012819467439049034, "grad_norm": 4.6875, "learning_rate": 4.9988048706372096e-05, "loss": 0.997, "step": 904 }, { "epoch": 0.01284782909267525, "grad_norm": 3.4375, "learning_rate": 4.998797952946416e-05, "loss": 1.0924, "step": 906 }, { "epoch": 0.012876190746301463, "grad_norm": 3.484375, "learning_rate": 4.998791015297556e-05, "loss": 0.9572, "step": 908 }, { "epoch": 0.012904552399927677, "grad_norm": 2.859375, "learning_rate": 4.998784057690686e-05, "loss": 1.0163, "step": 910 }, { "epoch": 0.012932914053553892, "grad_norm": 3.046875, "learning_rate": 4.9987770801258617e-05, "loss": 1.0219, "step": 912 }, { "epoch": 0.012961275707180107, "grad_norm": 3.0, "learning_rate": 4.998770082603137e-05, "loss": 1.0196, "step": 914 }, { "epoch": 0.012989637360806322, "grad_norm": 3.328125, "learning_rate": 4.9987630651225694e-05, "loss": 1.0136, "step": 916 }, { "epoch": 0.013017999014432537, "grad_norm": 3.0, "learning_rate": 4.998756027684215e-05, "loss": 1.0315, "step": 918 }, { "epoch": 0.01304636066805875, "grad_norm": 3.0625, "learning_rate": 4.998748970288129e-05, "loss": 1.0534, "step": 920 }, { "epoch": 0.013074722321684966, "grad_norm": 3.6875, "learning_rate": 4.998741892934368e-05, "loss": 1.0377, "step": 922 }, { "epoch": 0.01310308397531118, "grad_norm": 3.40625, "learning_rate": 4.99873479562299e-05, "loss": 0.9951, "step": 924 }, { "epoch": 0.013131445628937395, "grad_norm": 3.28125, "learning_rate": 4.998727678354049e-05, "loss": 0.9822, "step": 926 }, { "epoch": 0.01315980728256361, "grad_norm": 3.484375, "learning_rate": 4.998720541127605e-05, "loss": 0.9618, "step": 928 }, { "epoch": 0.013188168936189824, "grad_norm": 2.734375, "learning_rate": 4.998713383943712e-05, "loss": 0.9923, "step": 930 }, { "epoch": 0.013216530589816039, "grad_norm": 3.5625, "learning_rate": 4.998706206802429e-05, "loss": 1.0026, "step": 932 }, { "epoch": 0.013244892243442254, "grad_norm": 3.625, "learning_rate": 4.9986990097038134e-05, "loss": 1.0192, "step": 934 }, { "epoch": 0.013273253897068469, "grad_norm": 3.25, "learning_rate": 4.998691792647922e-05, "loss": 1.0003, "step": 936 }, { "epoch": 0.013301615550694684, "grad_norm": 3.21875, "learning_rate": 4.9986845556348116e-05, "loss": 0.9926, "step": 938 }, { "epoch": 0.013329977204320899, "grad_norm": 3.375, "learning_rate": 4.998677298664542e-05, "loss": 0.987, "step": 940 }, { "epoch": 0.013358338857947112, "grad_norm": 2.84375, "learning_rate": 4.9986700217371694e-05, "loss": 1.0087, "step": 942 }, { "epoch": 0.013386700511573327, "grad_norm": 3.15625, "learning_rate": 4.998662724852753e-05, "loss": 1.018, "step": 944 }, { "epoch": 0.013415062165199542, "grad_norm": 3.234375, "learning_rate": 4.9986554080113516e-05, "loss": 1.0081, "step": 946 }, { "epoch": 0.013443423818825757, "grad_norm": 3.015625, "learning_rate": 4.998648071213021e-05, "loss": 0.9899, "step": 948 }, { "epoch": 0.013471785472451972, "grad_norm": 3.21875, "learning_rate": 4.998640714457823e-05, "loss": 1.0116, "step": 950 }, { "epoch": 0.013500147126078187, "grad_norm": 3.34375, "learning_rate": 4.998633337745815e-05, "loss": 0.9632, "step": 952 }, { "epoch": 0.0135285087797044, "grad_norm": 3.859375, "learning_rate": 4.998625941077055e-05, "loss": 1.0225, "step": 954 }, { "epoch": 0.013556870433330615, "grad_norm": 2.96875, "learning_rate": 4.9986185244516035e-05, "loss": 0.9978, "step": 956 }, { "epoch": 0.01358523208695683, "grad_norm": 3.03125, "learning_rate": 4.9986110878695194e-05, "loss": 1.009, "step": 958 }, { "epoch": 0.013613593740583045, "grad_norm": 3.109375, "learning_rate": 4.9986036313308614e-05, "loss": 0.9876, "step": 960 }, { "epoch": 0.01364195539420926, "grad_norm": 2.78125, "learning_rate": 4.99859615483569e-05, "loss": 1.0383, "step": 962 }, { "epoch": 0.013670317047835475, "grad_norm": 3.78125, "learning_rate": 4.9985886583840634e-05, "loss": 1.0225, "step": 964 }, { "epoch": 0.013698678701461688, "grad_norm": 3.375, "learning_rate": 4.998581141976044e-05, "loss": 0.9649, "step": 966 }, { "epoch": 0.013727040355087903, "grad_norm": 3.109375, "learning_rate": 4.998573605611689e-05, "loss": 1.0711, "step": 968 }, { "epoch": 0.013755402008714118, "grad_norm": 3.75, "learning_rate": 4.998566049291061e-05, "loss": 1.0343, "step": 970 }, { "epoch": 0.013783763662340333, "grad_norm": 3.265625, "learning_rate": 4.9985584730142185e-05, "loss": 1.0372, "step": 972 }, { "epoch": 0.013812125315966548, "grad_norm": 4.125, "learning_rate": 4.998550876781224e-05, "loss": 0.9944, "step": 974 }, { "epoch": 0.013840486969592763, "grad_norm": 3.984375, "learning_rate": 4.998543260592136e-05, "loss": 1.0489, "step": 976 }, { "epoch": 0.013868848623218976, "grad_norm": 3.625, "learning_rate": 4.9985356244470165e-05, "loss": 1.0604, "step": 978 }, { "epoch": 0.013897210276845191, "grad_norm": 3.265625, "learning_rate": 4.9985279683459264e-05, "loss": 1.0474, "step": 980 }, { "epoch": 0.013925571930471406, "grad_norm": 3.25, "learning_rate": 4.998520292288927e-05, "loss": 0.9992, "step": 982 }, { "epoch": 0.013953933584097621, "grad_norm": 5.125, "learning_rate": 4.9985125962760794e-05, "loss": 1.0326, "step": 984 }, { "epoch": 0.013982295237723836, "grad_norm": 3.1875, "learning_rate": 4.998504880307444e-05, "loss": 0.9677, "step": 986 }, { "epoch": 0.01401065689135005, "grad_norm": 3.5625, "learning_rate": 4.998497144383084e-05, "loss": 1.0452, "step": 988 }, { "epoch": 0.014039018544976264, "grad_norm": 3.375, "learning_rate": 4.998489388503061e-05, "loss": 1.0298, "step": 990 }, { "epoch": 0.014067380198602479, "grad_norm": 3.53125, "learning_rate": 4.998481612667437e-05, "loss": 1.0814, "step": 992 }, { "epoch": 0.014095741852228694, "grad_norm": 3.15625, "learning_rate": 4.998473816876273e-05, "loss": 1.0021, "step": 994 }, { "epoch": 0.014124103505854909, "grad_norm": 3.671875, "learning_rate": 4.998466001129632e-05, "loss": 1.0291, "step": 996 }, { "epoch": 0.014152465159481124, "grad_norm": 3.21875, "learning_rate": 4.9984581654275764e-05, "loss": 1.038, "step": 998 }, { "epoch": 0.014180826813107339, "grad_norm": 3.46875, "learning_rate": 4.99845030977017e-05, "loss": 1.0501, "step": 1000 }, { "epoch": 0.014209188466733552, "grad_norm": 3.25, "learning_rate": 4.9984424341574724e-05, "loss": 0.9588, "step": 1002 }, { "epoch": 0.014237550120359767, "grad_norm": 3.40625, "learning_rate": 4.998434538589549e-05, "loss": 1.0341, "step": 1004 }, { "epoch": 0.014265911773985982, "grad_norm": 3.84375, "learning_rate": 4.998426623066463e-05, "loss": 1.0583, "step": 1006 }, { "epoch": 0.014294273427612197, "grad_norm": 2.9375, "learning_rate": 4.9984186875882764e-05, "loss": 0.9681, "step": 1008 }, { "epoch": 0.014322635081238412, "grad_norm": 3.203125, "learning_rate": 4.998410732155053e-05, "loss": 1.0716, "step": 1010 }, { "epoch": 0.014350996734864627, "grad_norm": 3.078125, "learning_rate": 4.998402756766857e-05, "loss": 1.0494, "step": 1012 }, { "epoch": 0.01437935838849084, "grad_norm": 3.390625, "learning_rate": 4.998394761423751e-05, "loss": 0.9572, "step": 1014 }, { "epoch": 0.014407720042117055, "grad_norm": 3.734375, "learning_rate": 4.9983867461257996e-05, "loss": 1.0078, "step": 1016 }, { "epoch": 0.01443608169574327, "grad_norm": 3.46875, "learning_rate": 4.998378710873067e-05, "loss": 1.0536, "step": 1018 }, { "epoch": 0.014464443349369485, "grad_norm": 3.21875, "learning_rate": 4.9983706556656165e-05, "loss": 0.9553, "step": 1020 }, { "epoch": 0.0144928050029957, "grad_norm": 3.078125, "learning_rate": 4.998362580503513e-05, "loss": 1.0132, "step": 1022 }, { "epoch": 0.014521166656621915, "grad_norm": 3.765625, "learning_rate": 4.9983544853868205e-05, "loss": 1.0496, "step": 1024 }, { "epoch": 0.014549528310248128, "grad_norm": 3.125, "learning_rate": 4.9983463703156045e-05, "loss": 1.0338, "step": 1026 }, { "epoch": 0.014577889963874343, "grad_norm": 3.265625, "learning_rate": 4.99833823528993e-05, "loss": 1.0567, "step": 1028 }, { "epoch": 0.014606251617500558, "grad_norm": 3.3125, "learning_rate": 4.99833008030986e-05, "loss": 1.0348, "step": 1030 }, { "epoch": 0.014634613271126773, "grad_norm": 3.0625, "learning_rate": 4.9983219053754627e-05, "loss": 1.0154, "step": 1032 }, { "epoch": 0.014662974924752988, "grad_norm": 3.53125, "learning_rate": 4.9983137104868005e-05, "loss": 0.9694, "step": 1034 }, { "epoch": 0.014691336578379203, "grad_norm": 3.84375, "learning_rate": 4.99830549564394e-05, "loss": 1.0494, "step": 1036 }, { "epoch": 0.014719698232005416, "grad_norm": 3.375, "learning_rate": 4.9982972608469474e-05, "loss": 0.997, "step": 1038 }, { "epoch": 0.014748059885631631, "grad_norm": 3.484375, "learning_rate": 4.998289006095888e-05, "loss": 1.0016, "step": 1040 }, { "epoch": 0.014776421539257846, "grad_norm": 2.984375, "learning_rate": 4.9982807313908273e-05, "loss": 1.0062, "step": 1042 }, { "epoch": 0.014804783192884061, "grad_norm": 3.25, "learning_rate": 4.9982724367318315e-05, "loss": 1.0157, "step": 1044 }, { "epoch": 0.014833144846510276, "grad_norm": 3.28125, "learning_rate": 4.998264122118968e-05, "loss": 1.0084, "step": 1046 }, { "epoch": 0.014861506500136491, "grad_norm": 2.921875, "learning_rate": 4.9982557875523016e-05, "loss": 1.0217, "step": 1048 }, { "epoch": 0.014889868153762705, "grad_norm": 3.515625, "learning_rate": 4.9982474330319e-05, "loss": 0.9879, "step": 1050 }, { "epoch": 0.01491822980738892, "grad_norm": 3.796875, "learning_rate": 4.9982390585578295e-05, "loss": 1.0983, "step": 1052 }, { "epoch": 0.014946591461015134, "grad_norm": 2.6875, "learning_rate": 4.998230664130157e-05, "loss": 0.9426, "step": 1054 }, { "epoch": 0.01497495311464135, "grad_norm": 3.21875, "learning_rate": 4.99822224974895e-05, "loss": 1.0467, "step": 1056 }, { "epoch": 0.015003314768267564, "grad_norm": 3.21875, "learning_rate": 4.998213815414274e-05, "loss": 1.0172, "step": 1058 }, { "epoch": 0.01503167642189378, "grad_norm": 3.515625, "learning_rate": 4.998205361126198e-05, "loss": 1.003, "step": 1060 }, { "epoch": 0.015060038075519993, "grad_norm": 3.765625, "learning_rate": 4.99819688688479e-05, "loss": 1.0466, "step": 1062 }, { "epoch": 0.015088399729146208, "grad_norm": 3.359375, "learning_rate": 4.998188392690116e-05, "loss": 0.9869, "step": 1064 }, { "epoch": 0.015116761382772423, "grad_norm": 3.0, "learning_rate": 4.9981798785422445e-05, "loss": 0.9381, "step": 1066 }, { "epoch": 0.015145123036398637, "grad_norm": 3.390625, "learning_rate": 4.998171344441244e-05, "loss": 1.0238, "step": 1068 }, { "epoch": 0.015173484690024852, "grad_norm": 3.125, "learning_rate": 4.998162790387183e-05, "loss": 0.9599, "step": 1070 }, { "epoch": 0.015201846343651067, "grad_norm": 3.40625, "learning_rate": 4.998154216380129e-05, "loss": 1.0138, "step": 1072 }, { "epoch": 0.01523020799727728, "grad_norm": 3.28125, "learning_rate": 4.998145622420149e-05, "loss": 0.9546, "step": 1074 }, { "epoch": 0.015258569650903496, "grad_norm": 3.46875, "learning_rate": 4.998137008507316e-05, "loss": 1.0203, "step": 1076 }, { "epoch": 0.01528693130452971, "grad_norm": 3.09375, "learning_rate": 4.998128374641694e-05, "loss": 0.9999, "step": 1078 }, { "epoch": 0.015315292958155926, "grad_norm": 3.03125, "learning_rate": 4.998119720823354e-05, "loss": 0.9725, "step": 1080 }, { "epoch": 0.01534365461178214, "grad_norm": 3.25, "learning_rate": 4.998111047052366e-05, "loss": 0.9585, "step": 1082 }, { "epoch": 0.015372016265408354, "grad_norm": 3.625, "learning_rate": 4.9981023533287984e-05, "loss": 1.0276, "step": 1084 }, { "epoch": 0.015400377919034569, "grad_norm": 3.265625, "learning_rate": 4.998093639652721e-05, "loss": 1.0176, "step": 1086 }, { "epoch": 0.015428739572660784, "grad_norm": 3.140625, "learning_rate": 4.998084906024202e-05, "loss": 0.9741, "step": 1088 }, { "epoch": 0.015457101226286999, "grad_norm": 3.71875, "learning_rate": 4.998076152443313e-05, "loss": 0.9763, "step": 1090 }, { "epoch": 0.015485462879913214, "grad_norm": 2.921875, "learning_rate": 4.9980673789101234e-05, "loss": 0.9724, "step": 1092 }, { "epoch": 0.015513824533539429, "grad_norm": 3.796875, "learning_rate": 4.9980585854247025e-05, "loss": 1.0816, "step": 1094 }, { "epoch": 0.015542186187165642, "grad_norm": 3.5625, "learning_rate": 4.998049771987121e-05, "loss": 1.0342, "step": 1096 }, { "epoch": 0.015570547840791857, "grad_norm": 3.28125, "learning_rate": 4.99804093859745e-05, "loss": 1.0689, "step": 1098 }, { "epoch": 0.015598909494418072, "grad_norm": 3.453125, "learning_rate": 4.998032085255759e-05, "loss": 0.9524, "step": 1100 }, { "epoch": 0.015627271148044287, "grad_norm": 3.203125, "learning_rate": 4.998023211962119e-05, "loss": 1.0013, "step": 1102 }, { "epoch": 0.0156556328016705, "grad_norm": 2.875, "learning_rate": 4.998014318716601e-05, "loss": 0.9643, "step": 1104 }, { "epoch": 0.015683994455296717, "grad_norm": 3.8125, "learning_rate": 4.998005405519276e-05, "loss": 0.9961, "step": 1106 }, { "epoch": 0.01571235610892293, "grad_norm": 3.234375, "learning_rate": 4.997996472370215e-05, "loss": 0.9969, "step": 1108 }, { "epoch": 0.015740717762549147, "grad_norm": 3.171875, "learning_rate": 4.9979875192694904e-05, "loss": 1.0208, "step": 1110 }, { "epoch": 0.01576907941617536, "grad_norm": 2.984375, "learning_rate": 4.997978546217172e-05, "loss": 0.9547, "step": 1112 }, { "epoch": 0.015797441069801573, "grad_norm": 3.40625, "learning_rate": 4.997969553213333e-05, "loss": 1.0023, "step": 1114 }, { "epoch": 0.01582580272342779, "grad_norm": 2.75, "learning_rate": 4.997960540258044e-05, "loss": 1.0083, "step": 1116 }, { "epoch": 0.015854164377054003, "grad_norm": 3.15625, "learning_rate": 4.997951507351378e-05, "loss": 0.9814, "step": 1118 }, { "epoch": 0.01588252603068022, "grad_norm": 3.28125, "learning_rate": 4.997942454493406e-05, "loss": 1.0643, "step": 1120 }, { "epoch": 0.015910887684306433, "grad_norm": 3.5, "learning_rate": 4.997933381684202e-05, "loss": 0.9972, "step": 1122 }, { "epoch": 0.01593924933793265, "grad_norm": 3.609375, "learning_rate": 4.997924288923836e-05, "loss": 1.0059, "step": 1124 }, { "epoch": 0.015967610991558863, "grad_norm": 3.515625, "learning_rate": 4.9979151762123834e-05, "loss": 1.0025, "step": 1126 }, { "epoch": 0.015995972645185076, "grad_norm": 3.203125, "learning_rate": 4.997906043549915e-05, "loss": 1.0496, "step": 1128 }, { "epoch": 0.016024334298811293, "grad_norm": 3.140625, "learning_rate": 4.9978968909365054e-05, "loss": 1.0303, "step": 1130 }, { "epoch": 0.016052695952437506, "grad_norm": 3.234375, "learning_rate": 4.997887718372226e-05, "loss": 1.0127, "step": 1132 }, { "epoch": 0.016081057606063723, "grad_norm": 3.34375, "learning_rate": 4.99787852585715e-05, "loss": 0.9693, "step": 1134 }, { "epoch": 0.016109419259689936, "grad_norm": 3.3125, "learning_rate": 4.997869313391353e-05, "loss": 1.0744, "step": 1136 }, { "epoch": 0.01613778091331615, "grad_norm": 3.28125, "learning_rate": 4.997860080974907e-05, "loss": 1.008, "step": 1138 }, { "epoch": 0.016166142566942366, "grad_norm": 3.15625, "learning_rate": 4.997850828607885e-05, "loss": 0.9879, "step": 1140 }, { "epoch": 0.01619450422056858, "grad_norm": 3.484375, "learning_rate": 4.997841556290362e-05, "loss": 0.9935, "step": 1142 }, { "epoch": 0.016222865874194796, "grad_norm": 3.140625, "learning_rate": 4.997832264022413e-05, "loss": 0.9493, "step": 1144 }, { "epoch": 0.01625122752782101, "grad_norm": 3.828125, "learning_rate": 4.9978229518041096e-05, "loss": 1.0122, "step": 1146 }, { "epoch": 0.016279589181447226, "grad_norm": 3.1875, "learning_rate": 4.997813619635529e-05, "loss": 0.9721, "step": 1148 }, { "epoch": 0.01630795083507344, "grad_norm": 3.046875, "learning_rate": 4.997804267516744e-05, "loss": 0.9836, "step": 1150 }, { "epoch": 0.016336312488699652, "grad_norm": 3.546875, "learning_rate": 4.997794895447829e-05, "loss": 1.0038, "step": 1152 }, { "epoch": 0.01636467414232587, "grad_norm": 3.359375, "learning_rate": 4.9977855034288603e-05, "loss": 1.0617, "step": 1154 }, { "epoch": 0.016393035795952082, "grad_norm": 3.234375, "learning_rate": 4.997776091459912e-05, "loss": 1.0078, "step": 1156 }, { "epoch": 0.0164213974495783, "grad_norm": 3.265625, "learning_rate": 4.99776665954106e-05, "loss": 1.0223, "step": 1158 }, { "epoch": 0.016449759103204512, "grad_norm": 3.4375, "learning_rate": 4.997757207672378e-05, "loss": 0.9568, "step": 1160 }, { "epoch": 0.016478120756830725, "grad_norm": 3.4375, "learning_rate": 4.997747735853943e-05, "loss": 0.9982, "step": 1162 }, { "epoch": 0.016506482410456942, "grad_norm": 3.5, "learning_rate": 4.99773824408583e-05, "loss": 1.0043, "step": 1164 }, { "epoch": 0.016534844064083155, "grad_norm": 4.03125, "learning_rate": 4.997728732368116e-05, "loss": 1.0374, "step": 1166 }, { "epoch": 0.016563205717709372, "grad_norm": 2.84375, "learning_rate": 4.997719200700875e-05, "loss": 0.9627, "step": 1168 }, { "epoch": 0.016591567371335585, "grad_norm": 3.140625, "learning_rate": 4.9977096490841846e-05, "loss": 1.0374, "step": 1170 }, { "epoch": 0.016619929024961802, "grad_norm": 2.859375, "learning_rate": 4.99770007751812e-05, "loss": 0.9399, "step": 1172 }, { "epoch": 0.016648290678588015, "grad_norm": 4.09375, "learning_rate": 4.9976904860027595e-05, "loss": 1.0486, "step": 1174 }, { "epoch": 0.01667665233221423, "grad_norm": 3.53125, "learning_rate": 4.997680874538178e-05, "loss": 1.0026, "step": 1176 }, { "epoch": 0.016705013985840445, "grad_norm": 3.328125, "learning_rate": 4.997671243124452e-05, "loss": 1.0324, "step": 1178 }, { "epoch": 0.01673337563946666, "grad_norm": 3.21875, "learning_rate": 4.9976615917616595e-05, "loss": 1.0327, "step": 1180 }, { "epoch": 0.016761737293092875, "grad_norm": 2.9375, "learning_rate": 4.997651920449878e-05, "loss": 0.9776, "step": 1182 }, { "epoch": 0.01679009894671909, "grad_norm": 2.75, "learning_rate": 4.9976422291891834e-05, "loss": 0.929, "step": 1184 }, { "epoch": 0.0168184606003453, "grad_norm": 3.265625, "learning_rate": 4.997632517979654e-05, "loss": 0.9847, "step": 1186 }, { "epoch": 0.01684682225397152, "grad_norm": 3.21875, "learning_rate": 4.997622786821367e-05, "loss": 1.0331, "step": 1188 }, { "epoch": 0.01687518390759773, "grad_norm": 3.390625, "learning_rate": 4.9976130357144e-05, "loss": 1.0048, "step": 1190 }, { "epoch": 0.016903545561223948, "grad_norm": 3.1875, "learning_rate": 4.997603264658832e-05, "loss": 0.9351, "step": 1192 }, { "epoch": 0.01693190721485016, "grad_norm": 3.484375, "learning_rate": 4.997593473654739e-05, "loss": 0.987, "step": 1194 }, { "epoch": 0.016960268868476378, "grad_norm": 3.328125, "learning_rate": 4.997583662702201e-05, "loss": 1.0626, "step": 1196 }, { "epoch": 0.01698863052210259, "grad_norm": 3.21875, "learning_rate": 4.997573831801296e-05, "loss": 1.0148, "step": 1198 }, { "epoch": 0.017016992175728805, "grad_norm": 3.09375, "learning_rate": 4.9975639809521005e-05, "loss": 0.9608, "step": 1200 }, { "epoch": 0.01704535382935502, "grad_norm": 2.90625, "learning_rate": 4.997554110154696e-05, "loss": 0.9711, "step": 1202 }, { "epoch": 0.017073715482981235, "grad_norm": 3.265625, "learning_rate": 4.997544219409161e-05, "loss": 1.0109, "step": 1204 }, { "epoch": 0.01710207713660745, "grad_norm": 3.453125, "learning_rate": 4.9975343087155726e-05, "loss": 1.0121, "step": 1206 }, { "epoch": 0.017130438790233665, "grad_norm": 3.6875, "learning_rate": 4.997524378074012e-05, "loss": 0.9851, "step": 1208 }, { "epoch": 0.017158800443859878, "grad_norm": 3.515625, "learning_rate": 4.9975144274845565e-05, "loss": 0.9395, "step": 1210 }, { "epoch": 0.017187162097486094, "grad_norm": 3.171875, "learning_rate": 4.997504456947287e-05, "loss": 0.9037, "step": 1212 }, { "epoch": 0.017215523751112308, "grad_norm": 3.203125, "learning_rate": 4.997494466462284e-05, "loss": 1.0069, "step": 1214 }, { "epoch": 0.017243885404738524, "grad_norm": 4.0, "learning_rate": 4.997484456029625e-05, "loss": 0.9966, "step": 1216 }, { "epoch": 0.017272247058364738, "grad_norm": 3.109375, "learning_rate": 4.997474425649391e-05, "loss": 0.9974, "step": 1218 }, { "epoch": 0.017300608711990954, "grad_norm": 2.96875, "learning_rate": 4.997464375321663e-05, "loss": 1.0135, "step": 1220 }, { "epoch": 0.017328970365617168, "grad_norm": 3.140625, "learning_rate": 4.997454305046519e-05, "loss": 0.9954, "step": 1222 }, { "epoch": 0.01735733201924338, "grad_norm": 3.0625, "learning_rate": 4.997444214824042e-05, "loss": 0.978, "step": 1224 }, { "epoch": 0.017385693672869598, "grad_norm": 3.15625, "learning_rate": 4.997434104654312e-05, "loss": 1.0173, "step": 1226 }, { "epoch": 0.01741405532649581, "grad_norm": 3.546875, "learning_rate": 4.997423974537408e-05, "loss": 1.0004, "step": 1228 }, { "epoch": 0.017442416980122027, "grad_norm": 3.25, "learning_rate": 4.997413824473413e-05, "loss": 1.0244, "step": 1230 }, { "epoch": 0.01747077863374824, "grad_norm": 3.65625, "learning_rate": 4.9974036544624063e-05, "loss": 1.0044, "step": 1232 }, { "epoch": 0.017499140287374454, "grad_norm": 3.25, "learning_rate": 4.9973934645044706e-05, "loss": 0.9482, "step": 1234 }, { "epoch": 0.01752750194100067, "grad_norm": 3.296875, "learning_rate": 4.997383254599687e-05, "loss": 0.995, "step": 1236 }, { "epoch": 0.017555863594626884, "grad_norm": 3.09375, "learning_rate": 4.9973730247481365e-05, "loss": 0.9939, "step": 1238 }, { "epoch": 0.0175842252482531, "grad_norm": 3.140625, "learning_rate": 4.997362774949901e-05, "loss": 1.0545, "step": 1240 }, { "epoch": 0.017612586901879314, "grad_norm": 3.53125, "learning_rate": 4.997352505205062e-05, "loss": 0.9429, "step": 1242 }, { "epoch": 0.017640948555505527, "grad_norm": 3.015625, "learning_rate": 4.997342215513703e-05, "loss": 1.006, "step": 1244 }, { "epoch": 0.017669310209131744, "grad_norm": 3.015625, "learning_rate": 4.9973319058759045e-05, "loss": 1.0089, "step": 1246 }, { "epoch": 0.017697671862757957, "grad_norm": 2.75, "learning_rate": 4.99732157629175e-05, "loss": 0.9778, "step": 1248 }, { "epoch": 0.017726033516384174, "grad_norm": 3.28125, "learning_rate": 4.9973112267613206e-05, "loss": 0.9763, "step": 1250 }, { "epoch": 0.017754395170010387, "grad_norm": 3.515625, "learning_rate": 4.9973008572847e-05, "loss": 0.9798, "step": 1252 }, { "epoch": 0.017782756823636604, "grad_norm": 3.171875, "learning_rate": 4.997290467861971e-05, "loss": 0.955, "step": 1254 }, { "epoch": 0.017811118477262817, "grad_norm": 3.078125, "learning_rate": 4.997280058493217e-05, "loss": 0.9724, "step": 1256 }, { "epoch": 0.01783948013088903, "grad_norm": 3.203125, "learning_rate": 4.9972696291785204e-05, "loss": 0.9792, "step": 1258 }, { "epoch": 0.017867841784515247, "grad_norm": 2.859375, "learning_rate": 4.997259179917964e-05, "loss": 0.9505, "step": 1260 }, { "epoch": 0.01789620343814146, "grad_norm": 3.234375, "learning_rate": 4.9972487107116336e-05, "loss": 0.9388, "step": 1262 }, { "epoch": 0.017924565091767677, "grad_norm": 3.609375, "learning_rate": 4.99723822155961e-05, "loss": 1.0486, "step": 1264 }, { "epoch": 0.01795292674539389, "grad_norm": 3.34375, "learning_rate": 4.997227712461978e-05, "loss": 1.0313, "step": 1266 }, { "epoch": 0.017981288399020103, "grad_norm": 3.578125, "learning_rate": 4.997217183418822e-05, "loss": 1.0311, "step": 1268 }, { "epoch": 0.01800965005264632, "grad_norm": 3.828125, "learning_rate": 4.997206634430226e-05, "loss": 1.0077, "step": 1270 }, { "epoch": 0.018038011706272533, "grad_norm": 4.40625, "learning_rate": 4.997196065496274e-05, "loss": 1.0111, "step": 1272 }, { "epoch": 0.01806637335989875, "grad_norm": 3.15625, "learning_rate": 4.99718547661705e-05, "loss": 0.9822, "step": 1274 }, { "epoch": 0.018094735013524963, "grad_norm": 3.65625, "learning_rate": 4.9971748677926396e-05, "loss": 0.998, "step": 1276 }, { "epoch": 0.01812309666715118, "grad_norm": 2.8125, "learning_rate": 4.9971642390231266e-05, "loss": 0.9695, "step": 1278 }, { "epoch": 0.018151458320777393, "grad_norm": 3.078125, "learning_rate": 4.9971535903085966e-05, "loss": 1.0122, "step": 1280 }, { "epoch": 0.018179819974403606, "grad_norm": 3.390625, "learning_rate": 4.997142921649134e-05, "loss": 0.9692, "step": 1282 }, { "epoch": 0.018208181628029823, "grad_norm": 3.0625, "learning_rate": 4.997132233044825e-05, "loss": 0.9654, "step": 1284 }, { "epoch": 0.018236543281656036, "grad_norm": 3.53125, "learning_rate": 4.9971215244957525e-05, "loss": 1.0124, "step": 1286 }, { "epoch": 0.018264904935282253, "grad_norm": 3.5, "learning_rate": 4.9971107960020054e-05, "loss": 1.0268, "step": 1288 }, { "epoch": 0.018293266588908466, "grad_norm": 3.390625, "learning_rate": 4.9971000475636674e-05, "loss": 0.964, "step": 1290 }, { "epoch": 0.01832162824253468, "grad_norm": 3.0, "learning_rate": 4.997089279180825e-05, "loss": 0.9454, "step": 1292 }, { "epoch": 0.018349989896160896, "grad_norm": 3.546875, "learning_rate": 4.997078490853563e-05, "loss": 1.0075, "step": 1294 }, { "epoch": 0.01837835154978711, "grad_norm": 2.890625, "learning_rate": 4.99706768258197e-05, "loss": 0.9953, "step": 1296 }, { "epoch": 0.018406713203413326, "grad_norm": 3.140625, "learning_rate": 4.99705685436613e-05, "loss": 0.9705, "step": 1298 }, { "epoch": 0.01843507485703954, "grad_norm": 3.34375, "learning_rate": 4.99704600620613e-05, "loss": 0.977, "step": 1300 }, { "epoch": 0.018463436510665756, "grad_norm": 3.25, "learning_rate": 4.997035138102058e-05, "loss": 1.0046, "step": 1302 }, { "epoch": 0.01849179816429197, "grad_norm": 3.015625, "learning_rate": 4.9970242500539985e-05, "loss": 0.9558, "step": 1304 }, { "epoch": 0.018520159817918182, "grad_norm": 3.421875, "learning_rate": 4.9970133420620415e-05, "loss": 0.9802, "step": 1306 }, { "epoch": 0.0185485214715444, "grad_norm": 2.984375, "learning_rate": 4.997002414126271e-05, "loss": 1.0092, "step": 1308 }, { "epoch": 0.018576883125170612, "grad_norm": 3.671875, "learning_rate": 4.9969914662467764e-05, "loss": 1.0219, "step": 1310 }, { "epoch": 0.01860524477879683, "grad_norm": 3.328125, "learning_rate": 4.996980498423644e-05, "loss": 1.0479, "step": 1312 }, { "epoch": 0.018633606432423042, "grad_norm": 3.25, "learning_rate": 4.9969695106569616e-05, "loss": 1.0206, "step": 1314 }, { "epoch": 0.018661968086049256, "grad_norm": 3.328125, "learning_rate": 4.996958502946818e-05, "loss": 0.986, "step": 1316 }, { "epoch": 0.018690329739675472, "grad_norm": 3.03125, "learning_rate": 4.9969474752933e-05, "loss": 0.9434, "step": 1318 }, { "epoch": 0.018718691393301685, "grad_norm": 2.90625, "learning_rate": 4.996936427696495e-05, "loss": 1.0413, "step": 1320 }, { "epoch": 0.018747053046927902, "grad_norm": 3.109375, "learning_rate": 4.9969253601564935e-05, "loss": 1.0068, "step": 1322 }, { "epoch": 0.018775414700554115, "grad_norm": 3.5, "learning_rate": 4.996914272673382e-05, "loss": 0.9977, "step": 1324 }, { "epoch": 0.018803776354180332, "grad_norm": 3.359375, "learning_rate": 4.9969031652472495e-05, "loss": 1.0753, "step": 1326 }, { "epoch": 0.018832138007806545, "grad_norm": 2.96875, "learning_rate": 4.996892037878185e-05, "loss": 0.975, "step": 1328 }, { "epoch": 0.01886049966143276, "grad_norm": 3.59375, "learning_rate": 4.9968808905662776e-05, "loss": 1.0303, "step": 1330 }, { "epoch": 0.018888861315058975, "grad_norm": 3.546875, "learning_rate": 4.996869723311616e-05, "loss": 1.0193, "step": 1332 }, { "epoch": 0.01891722296868519, "grad_norm": 3.359375, "learning_rate": 4.9968585361142885e-05, "loss": 1.0402, "step": 1334 }, { "epoch": 0.018945584622311405, "grad_norm": 3.265625, "learning_rate": 4.996847328974386e-05, "loss": 0.9332, "step": 1336 }, { "epoch": 0.01897394627593762, "grad_norm": 3.4375, "learning_rate": 4.996836101891997e-05, "loss": 1.0211, "step": 1338 }, { "epoch": 0.01900230792956383, "grad_norm": 3.625, "learning_rate": 4.996824854867212e-05, "loss": 1.0175, "step": 1340 }, { "epoch": 0.01903066958319005, "grad_norm": 3.8125, "learning_rate": 4.99681358790012e-05, "loss": 0.9963, "step": 1342 }, { "epoch": 0.01905903123681626, "grad_norm": 3.140625, "learning_rate": 4.996802300990811e-05, "loss": 1.0322, "step": 1344 }, { "epoch": 0.01908739289044248, "grad_norm": 3.953125, "learning_rate": 4.9967909941393764e-05, "loss": 0.9775, "step": 1346 }, { "epoch": 0.01911575454406869, "grad_norm": 3.109375, "learning_rate": 4.996779667345905e-05, "loss": 0.9432, "step": 1348 }, { "epoch": 0.01914411619769491, "grad_norm": 3.203125, "learning_rate": 4.996768320610489e-05, "loss": 0.9846, "step": 1350 }, { "epoch": 0.01917247785132112, "grad_norm": 3.171875, "learning_rate": 4.996756953933216e-05, "loss": 1.0315, "step": 1352 }, { "epoch": 0.019200839504947335, "grad_norm": 2.9375, "learning_rate": 4.9967455673141795e-05, "loss": 0.9563, "step": 1354 }, { "epoch": 0.01922920115857355, "grad_norm": 3.046875, "learning_rate": 4.9967341607534704e-05, "loss": 0.9794, "step": 1356 }, { "epoch": 0.019257562812199765, "grad_norm": 3.46875, "learning_rate": 4.996722734251178e-05, "loss": 1.0363, "step": 1358 }, { "epoch": 0.01928592446582598, "grad_norm": 3.1875, "learning_rate": 4.9967112878073944e-05, "loss": 1.0626, "step": 1360 }, { "epoch": 0.019314286119452195, "grad_norm": 3.078125, "learning_rate": 4.996699821422212e-05, "loss": 0.97, "step": 1362 }, { "epoch": 0.019342647773078408, "grad_norm": 3.546875, "learning_rate": 4.996688335095722e-05, "loss": 0.9886, "step": 1364 }, { "epoch": 0.019371009426704625, "grad_norm": 3.890625, "learning_rate": 4.996676828828014e-05, "loss": 0.95, "step": 1366 }, { "epoch": 0.019399371080330838, "grad_norm": 3.390625, "learning_rate": 4.996665302619183e-05, "loss": 0.9793, "step": 1368 }, { "epoch": 0.019427732733957054, "grad_norm": 3.125, "learning_rate": 4.9966537564693194e-05, "loss": 1.0197, "step": 1370 }, { "epoch": 0.019456094387583268, "grad_norm": 3.03125, "learning_rate": 4.996642190378515e-05, "loss": 1.0323, "step": 1372 }, { "epoch": 0.019484456041209484, "grad_norm": 3.078125, "learning_rate": 4.996630604346864e-05, "loss": 0.9732, "step": 1374 }, { "epoch": 0.019512817694835698, "grad_norm": 3.265625, "learning_rate": 4.996618998374458e-05, "loss": 1.0177, "step": 1376 }, { "epoch": 0.01954117934846191, "grad_norm": 2.921875, "learning_rate": 4.996607372461388e-05, "loss": 1.0088, "step": 1378 }, { "epoch": 0.019569541002088128, "grad_norm": 3.265625, "learning_rate": 4.99659572660775e-05, "loss": 0.9963, "step": 1380 }, { "epoch": 0.01959790265571434, "grad_norm": 3.328125, "learning_rate": 4.996584060813635e-05, "loss": 1.0344, "step": 1382 }, { "epoch": 0.019626264309340558, "grad_norm": 3.359375, "learning_rate": 4.996572375079136e-05, "loss": 0.9972, "step": 1384 }, { "epoch": 0.01965462596296677, "grad_norm": 4.15625, "learning_rate": 4.9965606694043474e-05, "loss": 1.049, "step": 1386 }, { "epoch": 0.019682987616592984, "grad_norm": 3.390625, "learning_rate": 4.996548943789362e-05, "loss": 0.9603, "step": 1388 }, { "epoch": 0.0197113492702192, "grad_norm": 3.03125, "learning_rate": 4.9965371982342735e-05, "loss": 0.9737, "step": 1390 }, { "epoch": 0.019739710923845414, "grad_norm": 3.65625, "learning_rate": 4.996525432739176e-05, "loss": 1.0291, "step": 1392 }, { "epoch": 0.01976807257747163, "grad_norm": 3.453125, "learning_rate": 4.996513647304164e-05, "loss": 1.0009, "step": 1394 }, { "epoch": 0.019796434231097844, "grad_norm": 3.65625, "learning_rate": 4.9965018419293306e-05, "loss": 0.998, "step": 1396 }, { "epoch": 0.019824795884724057, "grad_norm": 3.3125, "learning_rate": 4.99649001661477e-05, "loss": 0.9731, "step": 1398 }, { "epoch": 0.019853157538350274, "grad_norm": 2.984375, "learning_rate": 4.996478171360577e-05, "loss": 0.9657, "step": 1400 }, { "epoch": 0.019881519191976487, "grad_norm": 3.03125, "learning_rate": 4.996466306166847e-05, "loss": 0.9758, "step": 1402 }, { "epoch": 0.019909880845602704, "grad_norm": 3.125, "learning_rate": 4.996454421033674e-05, "loss": 1.0365, "step": 1404 }, { "epoch": 0.019938242499228917, "grad_norm": 3.234375, "learning_rate": 4.996442515961153e-05, "loss": 1.0159, "step": 1406 }, { "epoch": 0.019966604152855134, "grad_norm": 2.90625, "learning_rate": 4.996430590949379e-05, "loss": 0.9615, "step": 1408 }, { "epoch": 0.019994965806481347, "grad_norm": 3.15625, "learning_rate": 4.9964186459984474e-05, "loss": 1.0178, "step": 1410 }, { "epoch": 0.02002332746010756, "grad_norm": 3.25, "learning_rate": 4.996406681108453e-05, "loss": 0.9805, "step": 1412 }, { "epoch": 0.020051689113733777, "grad_norm": 3.234375, "learning_rate": 4.9963946962794925e-05, "loss": 1.0389, "step": 1414 }, { "epoch": 0.02008005076735999, "grad_norm": 3.046875, "learning_rate": 4.996382691511661e-05, "loss": 0.9989, "step": 1416 }, { "epoch": 0.020108412420986207, "grad_norm": 3.265625, "learning_rate": 4.996370666805055e-05, "loss": 1.0183, "step": 1418 }, { "epoch": 0.02013677407461242, "grad_norm": 3.71875, "learning_rate": 4.996358622159769e-05, "loss": 1.0716, "step": 1420 }, { "epoch": 0.020165135728238633, "grad_norm": 3.609375, "learning_rate": 4.9963465575759006e-05, "loss": 1.0465, "step": 1422 }, { "epoch": 0.02019349738186485, "grad_norm": 3.25, "learning_rate": 4.9963344730535455e-05, "loss": 0.9909, "step": 1424 }, { "epoch": 0.020221859035491063, "grad_norm": 3.0, "learning_rate": 4.9963223685928005e-05, "loss": 0.9544, "step": 1426 }, { "epoch": 0.02025022068911728, "grad_norm": 3.515625, "learning_rate": 4.9963102441937626e-05, "loss": 0.9657, "step": 1428 }, { "epoch": 0.020278582342743493, "grad_norm": 3.203125, "learning_rate": 4.996298099856528e-05, "loss": 0.9926, "step": 1430 }, { "epoch": 0.02030694399636971, "grad_norm": 3.3125, "learning_rate": 4.996285935581194e-05, "loss": 1.0087, "step": 1432 }, { "epoch": 0.020335305649995923, "grad_norm": 3.109375, "learning_rate": 4.996273751367858e-05, "loss": 0.9596, "step": 1434 }, { "epoch": 0.020363667303622136, "grad_norm": 3.328125, "learning_rate": 4.9962615472166166e-05, "loss": 1.0273, "step": 1436 }, { "epoch": 0.020392028957248353, "grad_norm": 2.859375, "learning_rate": 4.996249323127568e-05, "loss": 0.9797, "step": 1438 }, { "epoch": 0.020420390610874566, "grad_norm": 3.3125, "learning_rate": 4.9962370791008093e-05, "loss": 0.9683, "step": 1440 }, { "epoch": 0.020448752264500783, "grad_norm": 3.203125, "learning_rate": 4.996224815136439e-05, "loss": 1.0476, "step": 1442 }, { "epoch": 0.020477113918126996, "grad_norm": 3.359375, "learning_rate": 4.996212531234554e-05, "loss": 1.0006, "step": 1444 }, { "epoch": 0.02050547557175321, "grad_norm": 3.171875, "learning_rate": 4.996200227395254e-05, "loss": 1.0025, "step": 1446 }, { "epoch": 0.020533837225379426, "grad_norm": 3.265625, "learning_rate": 4.9961879036186355e-05, "loss": 0.9417, "step": 1448 }, { "epoch": 0.02056219887900564, "grad_norm": 3.4375, "learning_rate": 4.9961755599047975e-05, "loss": 0.9288, "step": 1450 }, { "epoch": 0.020590560532631856, "grad_norm": 3.390625, "learning_rate": 4.996163196253839e-05, "loss": 1.0549, "step": 1452 }, { "epoch": 0.02061892218625807, "grad_norm": 3.375, "learning_rate": 4.996150812665859e-05, "loss": 0.9749, "step": 1454 }, { "epoch": 0.020647283839884286, "grad_norm": 3.078125, "learning_rate": 4.9961384091409555e-05, "loss": 1.011, "step": 1456 }, { "epoch": 0.0206756454935105, "grad_norm": 2.96875, "learning_rate": 4.9961259856792285e-05, "loss": 0.9912, "step": 1458 }, { "epoch": 0.020704007147136713, "grad_norm": 2.953125, "learning_rate": 4.9961135422807765e-05, "loss": 0.9577, "step": 1460 }, { "epoch": 0.02073236880076293, "grad_norm": 3.046875, "learning_rate": 4.996101078945699e-05, "loss": 0.9743, "step": 1462 }, { "epoch": 0.020760730454389142, "grad_norm": 3.25, "learning_rate": 4.9960885956740956e-05, "loss": 1.011, "step": 1464 }, { "epoch": 0.02078909210801536, "grad_norm": 3.328125, "learning_rate": 4.9960760924660666e-05, "loss": 0.9702, "step": 1466 }, { "epoch": 0.020817453761641572, "grad_norm": 3.375, "learning_rate": 4.9960635693217103e-05, "loss": 0.99, "step": 1468 }, { "epoch": 0.020845815415267786, "grad_norm": 3.359375, "learning_rate": 4.996051026241129e-05, "loss": 0.9512, "step": 1470 }, { "epoch": 0.020874177068894002, "grad_norm": 2.9375, "learning_rate": 4.9960384632244216e-05, "loss": 1.0469, "step": 1472 }, { "epoch": 0.020902538722520216, "grad_norm": 2.640625, "learning_rate": 4.996025880271688e-05, "loss": 0.9317, "step": 1474 }, { "epoch": 0.020930900376146432, "grad_norm": 3.8125, "learning_rate": 4.996013277383029e-05, "loss": 0.9458, "step": 1476 }, { "epoch": 0.020959262029772645, "grad_norm": 3.046875, "learning_rate": 4.996000654558546e-05, "loss": 1.0269, "step": 1478 }, { "epoch": 0.020987623683398862, "grad_norm": 3.125, "learning_rate": 4.995988011798339e-05, "loss": 1.0523, "step": 1480 }, { "epoch": 0.021015985337025075, "grad_norm": 3.4375, "learning_rate": 4.9959753491025095e-05, "loss": 0.9595, "step": 1482 }, { "epoch": 0.02104434699065129, "grad_norm": 3.234375, "learning_rate": 4.995962666471158e-05, "loss": 1.0159, "step": 1484 }, { "epoch": 0.021072708644277505, "grad_norm": 2.75, "learning_rate": 4.9959499639043864e-05, "loss": 1.0325, "step": 1486 }, { "epoch": 0.02110107029790372, "grad_norm": 3.15625, "learning_rate": 4.995937241402297e-05, "loss": 0.9696, "step": 1488 }, { "epoch": 0.021129431951529935, "grad_norm": 3.015625, "learning_rate": 4.995924498964989e-05, "loss": 0.9668, "step": 1490 }, { "epoch": 0.02115779360515615, "grad_norm": 3.453125, "learning_rate": 4.9959117365925654e-05, "loss": 0.9939, "step": 1492 }, { "epoch": 0.021186155258782362, "grad_norm": 3.25, "learning_rate": 4.9958989542851283e-05, "loss": 0.9625, "step": 1494 }, { "epoch": 0.02121451691240858, "grad_norm": 2.78125, "learning_rate": 4.995886152042781e-05, "loss": 0.9667, "step": 1496 }, { "epoch": 0.02124287856603479, "grad_norm": 3.03125, "learning_rate": 4.995873329865624e-05, "loss": 0.9705, "step": 1498 }, { "epoch": 0.02127124021966101, "grad_norm": 4.125, "learning_rate": 4.9958604877537596e-05, "loss": 1.0069, "step": 1500 }, { "epoch": 0.02129960187328722, "grad_norm": 4.0625, "learning_rate": 4.9958476257072914e-05, "loss": 1.0081, "step": 1502 }, { "epoch": 0.02132796352691344, "grad_norm": 3.265625, "learning_rate": 4.995834743726322e-05, "loss": 0.9986, "step": 1504 }, { "epoch": 0.02135632518053965, "grad_norm": 3.875, "learning_rate": 4.995821841810954e-05, "loss": 0.9558, "step": 1506 }, { "epoch": 0.021384686834165865, "grad_norm": 2.96875, "learning_rate": 4.99580891996129e-05, "loss": 0.9662, "step": 1508 }, { "epoch": 0.02141304848779208, "grad_norm": 3.125, "learning_rate": 4.9957959781774344e-05, "loss": 0.9746, "step": 1510 }, { "epoch": 0.021441410141418295, "grad_norm": 3.375, "learning_rate": 4.9957830164594893e-05, "loss": 1.0243, "step": 1512 }, { "epoch": 0.02146977179504451, "grad_norm": 2.921875, "learning_rate": 4.9957700348075584e-05, "loss": 1.0064, "step": 1514 }, { "epoch": 0.021498133448670725, "grad_norm": 3.015625, "learning_rate": 4.995757033221747e-05, "loss": 0.9553, "step": 1516 }, { "epoch": 0.021526495102296938, "grad_norm": 3.09375, "learning_rate": 4.995744011702157e-05, "loss": 0.9645, "step": 1518 }, { "epoch": 0.021554856755923155, "grad_norm": 3.125, "learning_rate": 4.995730970248893e-05, "loss": 0.9829, "step": 1520 }, { "epoch": 0.021583218409549368, "grad_norm": 2.84375, "learning_rate": 4.995717908862059e-05, "loss": 0.9936, "step": 1522 }, { "epoch": 0.021611580063175585, "grad_norm": 3.3125, "learning_rate": 4.99570482754176e-05, "loss": 1.02, "step": 1524 }, { "epoch": 0.021639941716801798, "grad_norm": 2.96875, "learning_rate": 4.9956917262881e-05, "loss": 0.9703, "step": 1526 }, { "epoch": 0.021668303370428015, "grad_norm": 3.40625, "learning_rate": 4.995678605101184e-05, "loss": 0.9509, "step": 1528 }, { "epoch": 0.021696665024054228, "grad_norm": 4.34375, "learning_rate": 4.9956654639811165e-05, "loss": 0.9992, "step": 1530 }, { "epoch": 0.02172502667768044, "grad_norm": 3.203125, "learning_rate": 4.995652302928002e-05, "loss": 1.0186, "step": 1532 }, { "epoch": 0.021753388331306658, "grad_norm": 3.359375, "learning_rate": 4.9956391219419464e-05, "loss": 0.9736, "step": 1534 }, { "epoch": 0.02178174998493287, "grad_norm": 3.296875, "learning_rate": 4.9956259210230544e-05, "loss": 0.9685, "step": 1536 }, { "epoch": 0.021810111638559088, "grad_norm": 3.34375, "learning_rate": 4.995612700171432e-05, "loss": 1.0086, "step": 1538 }, { "epoch": 0.0218384732921853, "grad_norm": 3.0, "learning_rate": 4.9955994593871846e-05, "loss": 0.9383, "step": 1540 }, { "epoch": 0.021866834945811514, "grad_norm": 2.96875, "learning_rate": 4.9955861986704175e-05, "loss": 1.0382, "step": 1542 }, { "epoch": 0.02189519659943773, "grad_norm": 3.59375, "learning_rate": 4.995572918021237e-05, "loss": 0.9803, "step": 1544 }, { "epoch": 0.021923558253063944, "grad_norm": 3.5625, "learning_rate": 4.995559617439749e-05, "loss": 0.9705, "step": 1546 }, { "epoch": 0.02195191990669016, "grad_norm": 3.15625, "learning_rate": 4.995546296926061e-05, "loss": 1.0152, "step": 1548 }, { "epoch": 0.021980281560316374, "grad_norm": 3.1875, "learning_rate": 4.9955329564802774e-05, "loss": 0.9243, "step": 1550 }, { "epoch": 0.022008643213942587, "grad_norm": 3.1875, "learning_rate": 4.995519596102506e-05, "loss": 0.9974, "step": 1552 }, { "epoch": 0.022037004867568804, "grad_norm": 3.203125, "learning_rate": 4.995506215792852e-05, "loss": 0.9629, "step": 1554 }, { "epoch": 0.022065366521195017, "grad_norm": 3.390625, "learning_rate": 4.995492815551425e-05, "loss": 1.0142, "step": 1556 }, { "epoch": 0.022093728174821234, "grad_norm": 3.28125, "learning_rate": 4.995479395378329e-05, "loss": 0.9821, "step": 1558 }, { "epoch": 0.022122089828447447, "grad_norm": 3.140625, "learning_rate": 4.9954659552736736e-05, "loss": 0.9923, "step": 1560 }, { "epoch": 0.022150451482073664, "grad_norm": 2.78125, "learning_rate": 4.9954524952375646e-05, "loss": 0.9458, "step": 1562 }, { "epoch": 0.022178813135699877, "grad_norm": 3.1875, "learning_rate": 4.9954390152701104e-05, "loss": 1.0019, "step": 1564 }, { "epoch": 0.02220717478932609, "grad_norm": 3.1875, "learning_rate": 4.995425515371418e-05, "loss": 1.0018, "step": 1566 }, { "epoch": 0.022235536442952307, "grad_norm": 3.578125, "learning_rate": 4.995411995541596e-05, "loss": 0.9671, "step": 1568 }, { "epoch": 0.02226389809657852, "grad_norm": 2.953125, "learning_rate": 4.995398455780752e-05, "loss": 0.9419, "step": 1570 }, { "epoch": 0.022292259750204737, "grad_norm": 3.65625, "learning_rate": 4.995384896088994e-05, "loss": 0.9976, "step": 1572 }, { "epoch": 0.02232062140383095, "grad_norm": 3.1875, "learning_rate": 4.9953713164664294e-05, "loss": 1.0145, "step": 1574 }, { "epoch": 0.022348983057457163, "grad_norm": 3.71875, "learning_rate": 4.9953577169131696e-05, "loss": 1.0217, "step": 1576 }, { "epoch": 0.02237734471108338, "grad_norm": 2.96875, "learning_rate": 4.9953440974293194e-05, "loss": 0.9768, "step": 1578 }, { "epoch": 0.022405706364709593, "grad_norm": 3.234375, "learning_rate": 4.995330458014991e-05, "loss": 0.9724, "step": 1580 }, { "epoch": 0.02243406801833581, "grad_norm": 2.90625, "learning_rate": 4.9953167986702905e-05, "loss": 1.0012, "step": 1582 }, { "epoch": 0.022462429671962023, "grad_norm": 3.375, "learning_rate": 4.9953031193953285e-05, "loss": 0.9853, "step": 1584 }, { "epoch": 0.02249079132558824, "grad_norm": 3.0, "learning_rate": 4.9952894201902146e-05, "loss": 0.9182, "step": 1586 }, { "epoch": 0.022519152979214453, "grad_norm": 3.234375, "learning_rate": 4.995275701055058e-05, "loss": 0.9459, "step": 1588 }, { "epoch": 0.022547514632840666, "grad_norm": 2.921875, "learning_rate": 4.9952619619899665e-05, "loss": 0.9647, "step": 1590 }, { "epoch": 0.022575876286466883, "grad_norm": 3.109375, "learning_rate": 4.995248202995052e-05, "loss": 0.9983, "step": 1592 }, { "epoch": 0.022604237940093096, "grad_norm": 3.375, "learning_rate": 4.995234424070424e-05, "loss": 1.0178, "step": 1594 }, { "epoch": 0.022632599593719313, "grad_norm": 3.46875, "learning_rate": 4.995220625216192e-05, "loss": 0.9892, "step": 1596 }, { "epoch": 0.022660961247345526, "grad_norm": 3.21875, "learning_rate": 4.995206806432466e-05, "loss": 1.0128, "step": 1598 }, { "epoch": 0.02268932290097174, "grad_norm": 3.296875, "learning_rate": 4.9951929677193576e-05, "loss": 0.9676, "step": 1600 }, { "epoch": 0.022717684554597956, "grad_norm": 2.828125, "learning_rate": 4.995179109076976e-05, "loss": 0.8941, "step": 1602 }, { "epoch": 0.02274604620822417, "grad_norm": 3.171875, "learning_rate": 4.995165230505433e-05, "loss": 0.9697, "step": 1604 }, { "epoch": 0.022774407861850386, "grad_norm": 3.484375, "learning_rate": 4.9951513320048384e-05, "loss": 0.9746, "step": 1606 }, { "epoch": 0.0228027695154766, "grad_norm": 3.40625, "learning_rate": 4.9951374135753035e-05, "loss": 0.9942, "step": 1608 }, { "epoch": 0.022831131169102816, "grad_norm": 3.234375, "learning_rate": 4.995123475216941e-05, "loss": 1.0235, "step": 1610 }, { "epoch": 0.02285949282272903, "grad_norm": 3.0, "learning_rate": 4.995109516929859e-05, "loss": 0.9384, "step": 1612 }, { "epoch": 0.022887854476355243, "grad_norm": 3.09375, "learning_rate": 4.9950955387141715e-05, "loss": 0.9656, "step": 1614 }, { "epoch": 0.02291621612998146, "grad_norm": 3.390625, "learning_rate": 4.9950815405699905e-05, "loss": 1.0278, "step": 1616 }, { "epoch": 0.022944577783607673, "grad_norm": 3.3125, "learning_rate": 4.9950675224974256e-05, "loss": 1.0221, "step": 1618 }, { "epoch": 0.02297293943723389, "grad_norm": 3.0625, "learning_rate": 4.9950534844965904e-05, "loss": 1.017, "step": 1620 }, { "epoch": 0.023001301090860102, "grad_norm": 2.953125, "learning_rate": 4.995039426567596e-05, "loss": 0.9876, "step": 1622 }, { "epoch": 0.023029662744486316, "grad_norm": 3.6875, "learning_rate": 4.995025348710556e-05, "loss": 0.9416, "step": 1624 }, { "epoch": 0.023058024398112532, "grad_norm": 3.609375, "learning_rate": 4.995011250925582e-05, "loss": 1.0165, "step": 1626 }, { "epoch": 0.023086386051738746, "grad_norm": 4.0625, "learning_rate": 4.994997133212787e-05, "loss": 0.9674, "step": 1628 }, { "epoch": 0.023114747705364962, "grad_norm": 3.015625, "learning_rate": 4.9949829955722826e-05, "loss": 0.9787, "step": 1630 }, { "epoch": 0.023143109358991176, "grad_norm": 2.984375, "learning_rate": 4.994968838004184e-05, "loss": 0.9699, "step": 1632 }, { "epoch": 0.023171471012617392, "grad_norm": 3.15625, "learning_rate": 4.9949546605086015e-05, "loss": 0.9735, "step": 1634 }, { "epoch": 0.023199832666243606, "grad_norm": 3.59375, "learning_rate": 4.99494046308565e-05, "loss": 1.0388, "step": 1636 }, { "epoch": 0.02322819431986982, "grad_norm": 2.9375, "learning_rate": 4.9949262457354426e-05, "loss": 1.0158, "step": 1638 }, { "epoch": 0.023256555973496035, "grad_norm": 2.828125, "learning_rate": 4.9949120084580936e-05, "loss": 0.9289, "step": 1640 }, { "epoch": 0.02328491762712225, "grad_norm": 2.765625, "learning_rate": 4.994897751253715e-05, "loss": 0.9808, "step": 1642 }, { "epoch": 0.023313279280748465, "grad_norm": 3.171875, "learning_rate": 4.9948834741224226e-05, "loss": 0.9562, "step": 1644 }, { "epoch": 0.02334164093437468, "grad_norm": 3.09375, "learning_rate": 4.994869177064329e-05, "loss": 1.04, "step": 1646 }, { "epoch": 0.023370002588000892, "grad_norm": 3.015625, "learning_rate": 4.9948548600795485e-05, "loss": 0.9566, "step": 1648 }, { "epoch": 0.02339836424162711, "grad_norm": 3.140625, "learning_rate": 4.994840523168197e-05, "loss": 0.9769, "step": 1650 }, { "epoch": 0.023426725895253322, "grad_norm": 3.328125, "learning_rate": 4.994826166330386e-05, "loss": 0.9202, "step": 1652 }, { "epoch": 0.02345508754887954, "grad_norm": 3.5, "learning_rate": 4.994811789566234e-05, "loss": 0.9775, "step": 1654 }, { "epoch": 0.023483449202505752, "grad_norm": 3.296875, "learning_rate": 4.994797392875853e-05, "loss": 0.9515, "step": 1656 }, { "epoch": 0.02351181085613197, "grad_norm": 3.625, "learning_rate": 4.994782976259358e-05, "loss": 1.013, "step": 1658 }, { "epoch": 0.02354017250975818, "grad_norm": 3.515625, "learning_rate": 4.994768539716866e-05, "loss": 0.9947, "step": 1660 }, { "epoch": 0.023568534163384395, "grad_norm": 3.078125, "learning_rate": 4.9947540832484904e-05, "loss": 0.9654, "step": 1662 }, { "epoch": 0.02359689581701061, "grad_norm": 3.296875, "learning_rate": 4.994739606854348e-05, "loss": 0.9394, "step": 1664 }, { "epoch": 0.023625257470636825, "grad_norm": 2.96875, "learning_rate": 4.994725110534555e-05, "loss": 1.0203, "step": 1666 }, { "epoch": 0.02365361912426304, "grad_norm": 3.515625, "learning_rate": 4.9947105942892244e-05, "loss": 0.9882, "step": 1668 }, { "epoch": 0.023681980777889255, "grad_norm": 3.1875, "learning_rate": 4.994696058118476e-05, "loss": 0.9478, "step": 1670 }, { "epoch": 0.023710342431515468, "grad_norm": 2.9375, "learning_rate": 4.9946815020224215e-05, "loss": 0.9161, "step": 1672 }, { "epoch": 0.023738704085141685, "grad_norm": 3.1875, "learning_rate": 4.99466692600118e-05, "loss": 0.9529, "step": 1674 }, { "epoch": 0.023767065738767898, "grad_norm": 3.046875, "learning_rate": 4.994652330054868e-05, "loss": 0.9861, "step": 1676 }, { "epoch": 0.023795427392394115, "grad_norm": 2.953125, "learning_rate": 4.994637714183601e-05, "loss": 0.9934, "step": 1678 }, { "epoch": 0.023823789046020328, "grad_norm": 3.234375, "learning_rate": 4.994623078387497e-05, "loss": 0.9735, "step": 1680 }, { "epoch": 0.023852150699646545, "grad_norm": 3.3125, "learning_rate": 4.994608422666671e-05, "loss": 1.0267, "step": 1682 }, { "epoch": 0.023880512353272758, "grad_norm": 3.796875, "learning_rate": 4.994593747021242e-05, "loss": 0.9541, "step": 1684 }, { "epoch": 0.02390887400689897, "grad_norm": 3.046875, "learning_rate": 4.9945790514513255e-05, "loss": 1.0137, "step": 1686 }, { "epoch": 0.023937235660525188, "grad_norm": 3.09375, "learning_rate": 4.99456433595704e-05, "loss": 0.9788, "step": 1688 }, { "epoch": 0.0239655973141514, "grad_norm": 3.0, "learning_rate": 4.9945496005385036e-05, "loss": 0.9362, "step": 1690 }, { "epoch": 0.023993958967777618, "grad_norm": 3.25, "learning_rate": 4.994534845195832e-05, "loss": 0.9626, "step": 1692 }, { "epoch": 0.02402232062140383, "grad_norm": 3.578125, "learning_rate": 4.994520069929145e-05, "loss": 0.9866, "step": 1694 }, { "epoch": 0.024050682275030044, "grad_norm": 3.21875, "learning_rate": 4.994505274738559e-05, "loss": 0.9931, "step": 1696 }, { "epoch": 0.02407904392865626, "grad_norm": 3.078125, "learning_rate": 4.9944904596241936e-05, "loss": 1.0025, "step": 1698 }, { "epoch": 0.024107405582282474, "grad_norm": 3.125, "learning_rate": 4.994475624586167e-05, "loss": 0.9802, "step": 1700 }, { "epoch": 0.02413576723590869, "grad_norm": 3.171875, "learning_rate": 4.994460769624596e-05, "loss": 1.0029, "step": 1702 }, { "epoch": 0.024164128889534904, "grad_norm": 3.3125, "learning_rate": 4.994445894739601e-05, "loss": 0.9358, "step": 1704 }, { "epoch": 0.024192490543161117, "grad_norm": 3.34375, "learning_rate": 4.994430999931301e-05, "loss": 0.9599, "step": 1706 }, { "epoch": 0.024220852196787334, "grad_norm": 3.25, "learning_rate": 4.994416085199813e-05, "loss": 1.0063, "step": 1708 }, { "epoch": 0.024249213850413547, "grad_norm": 3.03125, "learning_rate": 4.9944011505452585e-05, "loss": 0.982, "step": 1710 }, { "epoch": 0.024277575504039764, "grad_norm": 3.484375, "learning_rate": 4.994386195967754e-05, "loss": 0.9737, "step": 1712 }, { "epoch": 0.024305937157665977, "grad_norm": 3.59375, "learning_rate": 4.994371221467422e-05, "loss": 0.9453, "step": 1714 }, { "epoch": 0.024334298811292194, "grad_norm": 3.140625, "learning_rate": 4.994356227044381e-05, "loss": 0.9314, "step": 1716 }, { "epoch": 0.024362660464918407, "grad_norm": 3.6875, "learning_rate": 4.994341212698749e-05, "loss": 1.0388, "step": 1718 }, { "epoch": 0.02439102211854462, "grad_norm": 3.265625, "learning_rate": 4.9943261784306485e-05, "loss": 0.998, "step": 1720 }, { "epoch": 0.024419383772170837, "grad_norm": 3.390625, "learning_rate": 4.9943111242401974e-05, "loss": 1.0044, "step": 1722 }, { "epoch": 0.02444774542579705, "grad_norm": 3.421875, "learning_rate": 4.994296050127518e-05, "loss": 0.9648, "step": 1724 }, { "epoch": 0.024476107079423267, "grad_norm": 3.34375, "learning_rate": 4.99428095609273e-05, "loss": 0.9665, "step": 1726 }, { "epoch": 0.02450446873304948, "grad_norm": 3.03125, "learning_rate": 4.9942658421359525e-05, "loss": 0.9839, "step": 1728 }, { "epoch": 0.024532830386675693, "grad_norm": 2.90625, "learning_rate": 4.994250708257308e-05, "loss": 0.9904, "step": 1730 }, { "epoch": 0.02456119204030191, "grad_norm": 3.515625, "learning_rate": 4.994235554456916e-05, "loss": 0.9641, "step": 1732 }, { "epoch": 0.024589553693928123, "grad_norm": 3.3125, "learning_rate": 4.9942203807348985e-05, "loss": 0.9384, "step": 1734 }, { "epoch": 0.02461791534755434, "grad_norm": 3.40625, "learning_rate": 4.994205187091377e-05, "loss": 1.0115, "step": 1736 }, { "epoch": 0.024646277001180553, "grad_norm": 3.625, "learning_rate": 4.994189973526472e-05, "loss": 0.9677, "step": 1738 }, { "epoch": 0.02467463865480677, "grad_norm": 2.890625, "learning_rate": 4.994174740040305e-05, "loss": 0.9269, "step": 1740 }, { "epoch": 0.024703000308432983, "grad_norm": 2.859375, "learning_rate": 4.994159486632999e-05, "loss": 0.9646, "step": 1742 }, { "epoch": 0.024731361962059197, "grad_norm": 3.359375, "learning_rate": 4.9941442133046735e-05, "loss": 0.9975, "step": 1744 }, { "epoch": 0.024759723615685413, "grad_norm": 3.09375, "learning_rate": 4.9941289200554524e-05, "loss": 0.9819, "step": 1746 }, { "epoch": 0.024788085269311626, "grad_norm": 3.671875, "learning_rate": 4.994113606885458e-05, "loss": 1.0289, "step": 1748 }, { "epoch": 0.024816446922937843, "grad_norm": 3.71875, "learning_rate": 4.9940982737948106e-05, "loss": 0.994, "step": 1750 }, { "epoch": 0.024844808576564056, "grad_norm": 2.9375, "learning_rate": 4.9940829207836346e-05, "loss": 0.8944, "step": 1752 }, { "epoch": 0.02487317023019027, "grad_norm": 3.1875, "learning_rate": 4.9940675478520516e-05, "loss": 0.9585, "step": 1754 }, { "epoch": 0.024901531883816486, "grad_norm": 2.75, "learning_rate": 4.994052155000185e-05, "loss": 0.9787, "step": 1756 }, { "epoch": 0.0249298935374427, "grad_norm": 3.140625, "learning_rate": 4.994036742228158e-05, "loss": 0.9633, "step": 1758 }, { "epoch": 0.024958255191068916, "grad_norm": 3.390625, "learning_rate": 4.994021309536092e-05, "loss": 1.0009, "step": 1760 }, { "epoch": 0.02498661684469513, "grad_norm": 3.0, "learning_rate": 4.9940058569241125e-05, "loss": 0.9903, "step": 1762 }, { "epoch": 0.025014978498321346, "grad_norm": 3.234375, "learning_rate": 4.993990384392341e-05, "loss": 0.9569, "step": 1764 }, { "epoch": 0.02504334015194756, "grad_norm": 3.15625, "learning_rate": 4.993974891940902e-05, "loss": 0.9532, "step": 1766 }, { "epoch": 0.025071701805573773, "grad_norm": 3.4375, "learning_rate": 4.99395937956992e-05, "loss": 0.9846, "step": 1768 }, { "epoch": 0.02510006345919999, "grad_norm": 3.03125, "learning_rate": 4.993943847279517e-05, "loss": 0.9399, "step": 1770 }, { "epoch": 0.025128425112826203, "grad_norm": 3.546875, "learning_rate": 4.9939282950698195e-05, "loss": 0.98, "step": 1772 }, { "epoch": 0.02515678676645242, "grad_norm": 3.078125, "learning_rate": 4.99391272294095e-05, "loss": 0.9429, "step": 1774 }, { "epoch": 0.025185148420078633, "grad_norm": 3.15625, "learning_rate": 4.993897130893033e-05, "loss": 1.0073, "step": 1776 }, { "epoch": 0.025213510073704846, "grad_norm": 3.25, "learning_rate": 4.9938815189261934e-05, "loss": 0.9578, "step": 1778 }, { "epoch": 0.025241871727331062, "grad_norm": 2.921875, "learning_rate": 4.993865887040555e-05, "loss": 0.926, "step": 1780 }, { "epoch": 0.025270233380957276, "grad_norm": 3.328125, "learning_rate": 4.9938502352362435e-05, "loss": 0.9475, "step": 1782 }, { "epoch": 0.025298595034583492, "grad_norm": 3.125, "learning_rate": 4.993834563513385e-05, "loss": 0.9597, "step": 1784 }, { "epoch": 0.025326956688209706, "grad_norm": 2.890625, "learning_rate": 4.9938188718721025e-05, "loss": 0.9446, "step": 1786 }, { "epoch": 0.025355318341835922, "grad_norm": 3.25, "learning_rate": 4.993803160312523e-05, "loss": 0.9787, "step": 1788 }, { "epoch": 0.025383679995462136, "grad_norm": 3.171875, "learning_rate": 4.9937874288347705e-05, "loss": 0.9527, "step": 1790 }, { "epoch": 0.02541204164908835, "grad_norm": 3.125, "learning_rate": 4.993771677438972e-05, "loss": 0.9587, "step": 1792 }, { "epoch": 0.025440403302714566, "grad_norm": 3.078125, "learning_rate": 4.993755906125253e-05, "loss": 0.9844, "step": 1794 }, { "epoch": 0.02546876495634078, "grad_norm": 3.40625, "learning_rate": 4.993740114893739e-05, "loss": 1.0143, "step": 1796 }, { "epoch": 0.025497126609966995, "grad_norm": 2.8125, "learning_rate": 4.993724303744556e-05, "loss": 0.9416, "step": 1798 }, { "epoch": 0.02552548826359321, "grad_norm": 3.09375, "learning_rate": 4.9937084726778314e-05, "loss": 0.9145, "step": 1800 }, { "epoch": 0.025553849917219422, "grad_norm": 3.171875, "learning_rate": 4.9936926216936905e-05, "loss": 1.0309, "step": 1802 }, { "epoch": 0.02558221157084564, "grad_norm": 3.359375, "learning_rate": 4.99367675079226e-05, "loss": 0.9573, "step": 1804 }, { "epoch": 0.025610573224471852, "grad_norm": 3.03125, "learning_rate": 4.993660859973667e-05, "loss": 0.942, "step": 1806 }, { "epoch": 0.02563893487809807, "grad_norm": 3.296875, "learning_rate": 4.993644949238039e-05, "loss": 0.9686, "step": 1808 }, { "epoch": 0.025667296531724282, "grad_norm": 3.078125, "learning_rate": 4.993629018585502e-05, "loss": 0.9753, "step": 1810 }, { "epoch": 0.0256956581853505, "grad_norm": 3.046875, "learning_rate": 4.993613068016184e-05, "loss": 0.9705, "step": 1812 }, { "epoch": 0.025724019838976712, "grad_norm": 2.765625, "learning_rate": 4.993597097530212e-05, "loss": 0.9948, "step": 1814 }, { "epoch": 0.025752381492602925, "grad_norm": 2.984375, "learning_rate": 4.9935811071277125e-05, "loss": 0.9672, "step": 1816 }, { "epoch": 0.02578074314622914, "grad_norm": 3.078125, "learning_rate": 4.993565096808816e-05, "loss": 0.9269, "step": 1818 }, { "epoch": 0.025809104799855355, "grad_norm": 3.0, "learning_rate": 4.9935490665736484e-05, "loss": 1.027, "step": 1820 }, { "epoch": 0.02583746645348157, "grad_norm": 3.078125, "learning_rate": 4.9935330164223376e-05, "loss": 0.9689, "step": 1822 }, { "epoch": 0.025865828107107785, "grad_norm": 3.5, "learning_rate": 4.993516946355012e-05, "loss": 0.9762, "step": 1824 }, { "epoch": 0.025894189760733998, "grad_norm": 3.21875, "learning_rate": 4.993500856371801e-05, "loss": 0.9226, "step": 1826 }, { "epoch": 0.025922551414360215, "grad_norm": 2.921875, "learning_rate": 4.993484746472832e-05, "loss": 0.9227, "step": 1828 }, { "epoch": 0.025950913067986428, "grad_norm": 3.03125, "learning_rate": 4.993468616658235e-05, "loss": 0.9408, "step": 1830 }, { "epoch": 0.025979274721612645, "grad_norm": 3.140625, "learning_rate": 4.993452466928137e-05, "loss": 1.005, "step": 1832 }, { "epoch": 0.026007636375238858, "grad_norm": 3.1875, "learning_rate": 4.9934362972826674e-05, "loss": 0.9385, "step": 1834 }, { "epoch": 0.026035998028865075, "grad_norm": 3.28125, "learning_rate": 4.993420107721956e-05, "loss": 1.0031, "step": 1836 }, { "epoch": 0.026064359682491288, "grad_norm": 3.5625, "learning_rate": 4.9934038982461315e-05, "loss": 0.9925, "step": 1838 }, { "epoch": 0.0260927213361175, "grad_norm": 3.03125, "learning_rate": 4.9933876688553246e-05, "loss": 0.9775, "step": 1840 }, { "epoch": 0.026121082989743718, "grad_norm": 3.015625, "learning_rate": 4.993371419549664e-05, "loss": 0.9831, "step": 1842 }, { "epoch": 0.02614944464336993, "grad_norm": 3.15625, "learning_rate": 4.99335515032928e-05, "loss": 0.9297, "step": 1844 }, { "epoch": 0.026177806296996148, "grad_norm": 3.03125, "learning_rate": 4.993338861194301e-05, "loss": 0.9388, "step": 1846 }, { "epoch": 0.02620616795062236, "grad_norm": 2.921875, "learning_rate": 4.9933225521448587e-05, "loss": 0.9543, "step": 1848 }, { "epoch": 0.026234529604248574, "grad_norm": 3.046875, "learning_rate": 4.993306223181082e-05, "loss": 0.9766, "step": 1850 }, { "epoch": 0.02626289125787479, "grad_norm": 3.125, "learning_rate": 4.993289874303103e-05, "loss": 0.9726, "step": 1852 }, { "epoch": 0.026291252911501004, "grad_norm": 3.109375, "learning_rate": 4.993273505511052e-05, "loss": 1.0022, "step": 1854 }, { "epoch": 0.02631961456512722, "grad_norm": 3.03125, "learning_rate": 4.993257116805059e-05, "loss": 1.0047, "step": 1856 }, { "epoch": 0.026347976218753434, "grad_norm": 3.0, "learning_rate": 4.993240708185254e-05, "loss": 0.9683, "step": 1858 }, { "epoch": 0.026376337872379647, "grad_norm": 3.671875, "learning_rate": 4.99322427965177e-05, "loss": 1.0202, "step": 1860 }, { "epoch": 0.026404699526005864, "grad_norm": 3.046875, "learning_rate": 4.993207831204738e-05, "loss": 0.9677, "step": 1862 }, { "epoch": 0.026433061179632077, "grad_norm": 3.546875, "learning_rate": 4.993191362844288e-05, "loss": 0.9698, "step": 1864 }, { "epoch": 0.026461422833258294, "grad_norm": 2.828125, "learning_rate": 4.993174874570552e-05, "loss": 0.9895, "step": 1866 }, { "epoch": 0.026489784486884507, "grad_norm": 3.046875, "learning_rate": 4.993158366383662e-05, "loss": 0.9978, "step": 1868 }, { "epoch": 0.026518146140510724, "grad_norm": 3.453125, "learning_rate": 4.993141838283751e-05, "loss": 1.0044, "step": 1870 }, { "epoch": 0.026546507794136937, "grad_norm": 3.484375, "learning_rate": 4.993125290270949e-05, "loss": 1.0062, "step": 1872 }, { "epoch": 0.02657486944776315, "grad_norm": 3.40625, "learning_rate": 4.993108722345389e-05, "loss": 0.9589, "step": 1874 }, { "epoch": 0.026603231101389367, "grad_norm": 3.0, "learning_rate": 4.993092134507203e-05, "loss": 1.0257, "step": 1876 }, { "epoch": 0.02663159275501558, "grad_norm": 3.015625, "learning_rate": 4.993075526756524e-05, "loss": 0.9612, "step": 1878 }, { "epoch": 0.026659954408641797, "grad_norm": 2.90625, "learning_rate": 4.993058899093485e-05, "loss": 0.9944, "step": 1880 }, { "epoch": 0.02668831606226801, "grad_norm": 2.96875, "learning_rate": 4.993042251518218e-05, "loss": 0.953, "step": 1882 }, { "epoch": 0.026716677715894224, "grad_norm": 2.90625, "learning_rate": 4.9930255840308565e-05, "loss": 0.9688, "step": 1884 }, { "epoch": 0.02674503936952044, "grad_norm": 3.078125, "learning_rate": 4.993008896631533e-05, "loss": 0.9505, "step": 1886 }, { "epoch": 0.026773401023146653, "grad_norm": 2.96875, "learning_rate": 4.9929921893203814e-05, "loss": 0.9508, "step": 1888 }, { "epoch": 0.02680176267677287, "grad_norm": 2.890625, "learning_rate": 4.992975462097534e-05, "loss": 0.9602, "step": 1890 }, { "epoch": 0.026830124330399083, "grad_norm": 3.265625, "learning_rate": 4.9929587149631265e-05, "loss": 0.9812, "step": 1892 }, { "epoch": 0.0268584859840253, "grad_norm": 3.140625, "learning_rate": 4.992941947917291e-05, "loss": 0.956, "step": 1894 }, { "epoch": 0.026886847637651513, "grad_norm": 3.3125, "learning_rate": 4.9929251609601625e-05, "loss": 0.9646, "step": 1896 }, { "epoch": 0.026915209291277727, "grad_norm": 3.375, "learning_rate": 4.992908354091874e-05, "loss": 1.0219, "step": 1898 }, { "epoch": 0.026943570944903943, "grad_norm": 3.15625, "learning_rate": 4.99289152731256e-05, "loss": 0.9343, "step": 1900 }, { "epoch": 0.026971932598530157, "grad_norm": 3.34375, "learning_rate": 4.9928746806223545e-05, "loss": 0.9743, "step": 1902 }, { "epoch": 0.027000294252156373, "grad_norm": 3.140625, "learning_rate": 4.992857814021393e-05, "loss": 0.9736, "step": 1904 }, { "epoch": 0.027028655905782586, "grad_norm": 2.9375, "learning_rate": 4.9928409275098106e-05, "loss": 0.9568, "step": 1906 }, { "epoch": 0.0270570175594088, "grad_norm": 3.109375, "learning_rate": 4.992824021087741e-05, "loss": 0.9995, "step": 1908 }, { "epoch": 0.027085379213035016, "grad_norm": 2.96875, "learning_rate": 4.99280709475532e-05, "loss": 0.9632, "step": 1910 }, { "epoch": 0.02711374086666123, "grad_norm": 2.984375, "learning_rate": 4.992790148512682e-05, "loss": 0.8786, "step": 1912 }, { "epoch": 0.027142102520287446, "grad_norm": 3.140625, "learning_rate": 4.992773182359963e-05, "loss": 1.0195, "step": 1914 }, { "epoch": 0.02717046417391366, "grad_norm": 3.015625, "learning_rate": 4.992756196297298e-05, "loss": 0.9875, "step": 1916 }, { "epoch": 0.027198825827539876, "grad_norm": 3.390625, "learning_rate": 4.9927391903248235e-05, "loss": 0.9795, "step": 1918 }, { "epoch": 0.02722718748116609, "grad_norm": 3.125, "learning_rate": 4.992722164442675e-05, "loss": 0.9745, "step": 1920 }, { "epoch": 0.027255549134792303, "grad_norm": 3.4375, "learning_rate": 4.9927051186509876e-05, "loss": 0.9001, "step": 1922 }, { "epoch": 0.02728391078841852, "grad_norm": 3.046875, "learning_rate": 4.992688052949899e-05, "loss": 0.9549, "step": 1924 }, { "epoch": 0.027312272442044733, "grad_norm": 3.171875, "learning_rate": 4.992670967339543e-05, "loss": 1.0138, "step": 1926 }, { "epoch": 0.02734063409567095, "grad_norm": 3.015625, "learning_rate": 4.9926538618200594e-05, "loss": 0.9425, "step": 1928 }, { "epoch": 0.027368995749297163, "grad_norm": 3.359375, "learning_rate": 4.992636736391583e-05, "loss": 0.928, "step": 1930 }, { "epoch": 0.027397357402923376, "grad_norm": 3.9375, "learning_rate": 4.99261959105425e-05, "loss": 0.9705, "step": 1932 }, { "epoch": 0.027425719056549593, "grad_norm": 3.203125, "learning_rate": 4.992602425808198e-05, "loss": 0.9711, "step": 1934 }, { "epoch": 0.027454080710175806, "grad_norm": 3.203125, "learning_rate": 4.992585240653564e-05, "loss": 0.9817, "step": 1936 }, { "epoch": 0.027482442363802023, "grad_norm": 3.203125, "learning_rate": 4.992568035590487e-05, "loss": 0.9607, "step": 1938 }, { "epoch": 0.027510804017428236, "grad_norm": 3.234375, "learning_rate": 4.992550810619101e-05, "loss": 0.9138, "step": 1940 }, { "epoch": 0.027539165671054452, "grad_norm": 3.3125, "learning_rate": 4.992533565739547e-05, "loss": 0.9808, "step": 1942 }, { "epoch": 0.027567527324680666, "grad_norm": 3.265625, "learning_rate": 4.9925163009519607e-05, "loss": 0.9404, "step": 1944 }, { "epoch": 0.02759588897830688, "grad_norm": 2.953125, "learning_rate": 4.992499016256479e-05, "loss": 0.9314, "step": 1946 }, { "epoch": 0.027624250631933096, "grad_norm": 3.25, "learning_rate": 4.992481711653243e-05, "loss": 0.9184, "step": 1948 }, { "epoch": 0.02765261228555931, "grad_norm": 2.921875, "learning_rate": 4.9924643871423895e-05, "loss": 0.9711, "step": 1950 }, { "epoch": 0.027680973939185526, "grad_norm": 3.109375, "learning_rate": 4.9924470427240556e-05, "loss": 0.9914, "step": 1952 }, { "epoch": 0.02770933559281174, "grad_norm": 3.125, "learning_rate": 4.9924296783983814e-05, "loss": 1.041, "step": 1954 }, { "epoch": 0.027737697246437952, "grad_norm": 2.734375, "learning_rate": 4.992412294165505e-05, "loss": 0.9332, "step": 1956 }, { "epoch": 0.02776605890006417, "grad_norm": 3.046875, "learning_rate": 4.9923948900255656e-05, "loss": 1.0057, "step": 1958 }, { "epoch": 0.027794420553690382, "grad_norm": 3.125, "learning_rate": 4.992377465978702e-05, "loss": 0.9565, "step": 1960 }, { "epoch": 0.0278227822073166, "grad_norm": 3.296875, "learning_rate": 4.9923600220250526e-05, "loss": 0.9139, "step": 1962 }, { "epoch": 0.027851143860942812, "grad_norm": 3.671875, "learning_rate": 4.992342558164758e-05, "loss": 0.9591, "step": 1964 }, { "epoch": 0.02787950551456903, "grad_norm": 3.8125, "learning_rate": 4.992325074397957e-05, "loss": 0.957, "step": 1966 }, { "epoch": 0.027907867168195242, "grad_norm": 3.09375, "learning_rate": 4.99230757072479e-05, "loss": 0.9193, "step": 1968 }, { "epoch": 0.027936228821821455, "grad_norm": 3.109375, "learning_rate": 4.992290047145396e-05, "loss": 0.9929, "step": 1970 }, { "epoch": 0.027964590475447672, "grad_norm": 2.90625, "learning_rate": 4.9922725036599146e-05, "loss": 0.9628, "step": 1972 }, { "epoch": 0.027992952129073885, "grad_norm": 3.296875, "learning_rate": 4.992254940268487e-05, "loss": 0.9564, "step": 1974 }, { "epoch": 0.0280213137827001, "grad_norm": 3.140625, "learning_rate": 4.992237356971252e-05, "loss": 0.9706, "step": 1976 }, { "epoch": 0.028049675436326315, "grad_norm": 3.078125, "learning_rate": 4.9922197537683524e-05, "loss": 0.9517, "step": 1978 }, { "epoch": 0.028078037089952528, "grad_norm": 3.015625, "learning_rate": 4.992202130659927e-05, "loss": 0.9207, "step": 1980 }, { "epoch": 0.028106398743578745, "grad_norm": 3.03125, "learning_rate": 4.992184487646116e-05, "loss": 0.9569, "step": 1982 }, { "epoch": 0.028134760397204958, "grad_norm": 3.65625, "learning_rate": 4.992166824727062e-05, "loss": 0.9525, "step": 1984 }, { "epoch": 0.028163122050831175, "grad_norm": 3.53125, "learning_rate": 4.992149141902905e-05, "loss": 0.9863, "step": 1986 }, { "epoch": 0.028191483704457388, "grad_norm": 3.4375, "learning_rate": 4.9921314391737875e-05, "loss": 0.9697, "step": 1988 }, { "epoch": 0.028219845358083605, "grad_norm": 3.25, "learning_rate": 4.992113716539849e-05, "loss": 0.9605, "step": 1990 }, { "epoch": 0.028248207011709818, "grad_norm": 3.40625, "learning_rate": 4.9920959740012326e-05, "loss": 0.9802, "step": 1992 }, { "epoch": 0.02827656866533603, "grad_norm": 3.03125, "learning_rate": 4.9920782115580785e-05, "loss": 0.9701, "step": 1994 }, { "epoch": 0.028304930318962248, "grad_norm": 2.921875, "learning_rate": 4.9920604292105304e-05, "loss": 0.9683, "step": 1996 }, { "epoch": 0.02833329197258846, "grad_norm": 2.796875, "learning_rate": 4.992042626958729e-05, "loss": 0.9577, "step": 1998 }, { "epoch": 0.028361653626214678, "grad_norm": 3.78125, "learning_rate": 4.992024804802817e-05, "loss": 0.9432, "step": 2000 }, { "epoch": 0.02839001527984089, "grad_norm": 3.3125, "learning_rate": 4.9920069627429375e-05, "loss": 0.9747, "step": 2002 }, { "epoch": 0.028418376933467104, "grad_norm": 3.34375, "learning_rate": 4.9919891007792315e-05, "loss": 0.9456, "step": 2004 }, { "epoch": 0.02844673858709332, "grad_norm": 3.1875, "learning_rate": 4.9919712189118424e-05, "loss": 1.0218, "step": 2006 }, { "epoch": 0.028475100240719534, "grad_norm": 3.609375, "learning_rate": 4.991953317140913e-05, "loss": 1.0013, "step": 2008 }, { "epoch": 0.02850346189434575, "grad_norm": 3.640625, "learning_rate": 4.991935395466586e-05, "loss": 0.9592, "step": 2010 }, { "epoch": 0.028531823547971964, "grad_norm": 3.34375, "learning_rate": 4.991917453889004e-05, "loss": 0.9389, "step": 2012 }, { "epoch": 0.028560185201598177, "grad_norm": 3.3125, "learning_rate": 4.991899492408313e-05, "loss": 1.0012, "step": 2014 }, { "epoch": 0.028588546855224394, "grad_norm": 3.390625, "learning_rate": 4.991881511024653e-05, "loss": 0.9487, "step": 2016 }, { "epoch": 0.028616908508850607, "grad_norm": 3.28125, "learning_rate": 4.991863509738169e-05, "loss": 1.0159, "step": 2018 }, { "epoch": 0.028645270162476824, "grad_norm": 2.96875, "learning_rate": 4.991845488549006e-05, "loss": 0.9811, "step": 2020 }, { "epoch": 0.028673631816103037, "grad_norm": 3.375, "learning_rate": 4.991827447457307e-05, "loss": 0.9509, "step": 2022 }, { "epoch": 0.028701993469729254, "grad_norm": 2.828125, "learning_rate": 4.991809386463216e-05, "loss": 0.9578, "step": 2024 }, { "epoch": 0.028730355123355467, "grad_norm": 3.203125, "learning_rate": 4.991791305566876e-05, "loss": 0.9471, "step": 2026 }, { "epoch": 0.02875871677698168, "grad_norm": 3.109375, "learning_rate": 4.991773204768433e-05, "loss": 0.9923, "step": 2028 }, { "epoch": 0.028787078430607897, "grad_norm": 2.984375, "learning_rate": 4.9917550840680317e-05, "loss": 0.9028, "step": 2030 }, { "epoch": 0.02881544008423411, "grad_norm": 3.09375, "learning_rate": 4.991736943465816e-05, "loss": 1.0057, "step": 2032 }, { "epoch": 0.028843801737860327, "grad_norm": 3.453125, "learning_rate": 4.9917187829619313e-05, "loss": 1.0544, "step": 2034 }, { "epoch": 0.02887216339148654, "grad_norm": 3.5, "learning_rate": 4.991700602556523e-05, "loss": 0.9443, "step": 2036 }, { "epoch": 0.028900525045112754, "grad_norm": 3.4375, "learning_rate": 4.9916824022497346e-05, "loss": 0.9566, "step": 2038 }, { "epoch": 0.02892888669873897, "grad_norm": 3.3125, "learning_rate": 4.991664182041714e-05, "loss": 1.0628, "step": 2040 }, { "epoch": 0.028957248352365184, "grad_norm": 3.53125, "learning_rate": 4.991645941932604e-05, "loss": 0.9259, "step": 2042 }, { "epoch": 0.0289856100059914, "grad_norm": 3.390625, "learning_rate": 4.991627681922553e-05, "loss": 0.9033, "step": 2044 }, { "epoch": 0.029013971659617614, "grad_norm": 3.453125, "learning_rate": 4.9916094020117044e-05, "loss": 0.9603, "step": 2046 }, { "epoch": 0.02904233331324383, "grad_norm": 3.0625, "learning_rate": 4.991591102200206e-05, "loss": 0.9132, "step": 2048 }, { "epoch": 0.029070694966870043, "grad_norm": 3.03125, "learning_rate": 4.991572782488203e-05, "loss": 0.9205, "step": 2050 }, { "epoch": 0.029099056620496257, "grad_norm": 2.921875, "learning_rate": 4.991554442875842e-05, "loss": 0.9435, "step": 2052 }, { "epoch": 0.029127418274122473, "grad_norm": 3.078125, "learning_rate": 4.991536083363269e-05, "loss": 0.9144, "step": 2054 }, { "epoch": 0.029155779927748687, "grad_norm": 3.296875, "learning_rate": 4.9915177039506325e-05, "loss": 0.9861, "step": 2056 }, { "epoch": 0.029184141581374903, "grad_norm": 3.875, "learning_rate": 4.991499304638077e-05, "loss": 1.0043, "step": 2058 }, { "epoch": 0.029212503235001117, "grad_norm": 3.359375, "learning_rate": 4.991480885425751e-05, "loss": 0.9888, "step": 2060 }, { "epoch": 0.02924086488862733, "grad_norm": 3.4375, "learning_rate": 4.9914624463138e-05, "loss": 0.9761, "step": 2062 }, { "epoch": 0.029269226542253547, "grad_norm": 3.25, "learning_rate": 4.9914439873023724e-05, "loss": 0.968, "step": 2064 }, { "epoch": 0.02929758819587976, "grad_norm": 3.046875, "learning_rate": 4.991425508391616e-05, "loss": 0.9443, "step": 2066 }, { "epoch": 0.029325949849505976, "grad_norm": 2.5625, "learning_rate": 4.9914070095816775e-05, "loss": 0.9272, "step": 2068 }, { "epoch": 0.02935431150313219, "grad_norm": 3.140625, "learning_rate": 4.991388490872705e-05, "loss": 0.9669, "step": 2070 }, { "epoch": 0.029382673156758406, "grad_norm": 3.703125, "learning_rate": 4.991369952264847e-05, "loss": 1.0146, "step": 2072 }, { "epoch": 0.02941103481038462, "grad_norm": 2.859375, "learning_rate": 4.991351393758251e-05, "loss": 0.8965, "step": 2074 }, { "epoch": 0.029439396464010833, "grad_norm": 3.234375, "learning_rate": 4.991332815353065e-05, "loss": 0.9345, "step": 2076 }, { "epoch": 0.02946775811763705, "grad_norm": 2.65625, "learning_rate": 4.991314217049437e-05, "loss": 0.9065, "step": 2078 }, { "epoch": 0.029496119771263263, "grad_norm": 3.125, "learning_rate": 4.991295598847517e-05, "loss": 0.9222, "step": 2080 }, { "epoch": 0.02952448142488948, "grad_norm": 3.109375, "learning_rate": 4.991276960747452e-05, "loss": 0.9114, "step": 2082 }, { "epoch": 0.029552843078515693, "grad_norm": 3.359375, "learning_rate": 4.991258302749393e-05, "loss": 0.9781, "step": 2084 }, { "epoch": 0.029581204732141906, "grad_norm": 2.8125, "learning_rate": 4.991239624853487e-05, "loss": 0.9602, "step": 2086 }, { "epoch": 0.029609566385768123, "grad_norm": 2.828125, "learning_rate": 4.991220927059884e-05, "loss": 0.9674, "step": 2088 }, { "epoch": 0.029637928039394336, "grad_norm": 2.9375, "learning_rate": 4.991202209368733e-05, "loss": 0.9406, "step": 2090 }, { "epoch": 0.029666289693020553, "grad_norm": 3.15625, "learning_rate": 4.991183471780184e-05, "loss": 0.9482, "step": 2092 }, { "epoch": 0.029694651346646766, "grad_norm": 3.546875, "learning_rate": 4.991164714294387e-05, "loss": 0.9941, "step": 2094 }, { "epoch": 0.029723013000272983, "grad_norm": 3.28125, "learning_rate": 4.991145936911491e-05, "loss": 0.9457, "step": 2096 }, { "epoch": 0.029751374653899196, "grad_norm": 3.390625, "learning_rate": 4.991127139631646e-05, "loss": 0.9496, "step": 2098 }, { "epoch": 0.02977973630752541, "grad_norm": 3.046875, "learning_rate": 4.9911083224550024e-05, "loss": 0.9594, "step": 2100 }, { "epoch": 0.029808097961151626, "grad_norm": 3.375, "learning_rate": 4.9910894853817106e-05, "loss": 0.9344, "step": 2102 }, { "epoch": 0.02983645961477784, "grad_norm": 3.359375, "learning_rate": 4.991070628411921e-05, "loss": 0.9193, "step": 2104 }, { "epoch": 0.029864821268404056, "grad_norm": 3.078125, "learning_rate": 4.991051751545784e-05, "loss": 0.9022, "step": 2106 }, { "epoch": 0.02989318292203027, "grad_norm": 2.78125, "learning_rate": 4.991032854783451e-05, "loss": 0.928, "step": 2108 }, { "epoch": 0.029921544575656482, "grad_norm": 3.359375, "learning_rate": 4.991013938125072e-05, "loss": 0.9092, "step": 2110 }, { "epoch": 0.0299499062292827, "grad_norm": 3.546875, "learning_rate": 4.990995001570798e-05, "loss": 0.989, "step": 2112 }, { "epoch": 0.029978267882908912, "grad_norm": 3.296875, "learning_rate": 4.990976045120781e-05, "loss": 0.9392, "step": 2114 }, { "epoch": 0.03000662953653513, "grad_norm": 2.984375, "learning_rate": 4.990957068775173e-05, "loss": 0.9926, "step": 2116 }, { "epoch": 0.030034991190161342, "grad_norm": 3.296875, "learning_rate": 4.990938072534124e-05, "loss": 0.9788, "step": 2118 }, { "epoch": 0.03006335284378756, "grad_norm": 3.078125, "learning_rate": 4.990919056397786e-05, "loss": 0.9279, "step": 2120 }, { "epoch": 0.030091714497413772, "grad_norm": 2.84375, "learning_rate": 4.990900020366313e-05, "loss": 0.9286, "step": 2122 }, { "epoch": 0.030120076151039985, "grad_norm": 2.875, "learning_rate": 4.990880964439853e-05, "loss": 0.8859, "step": 2124 }, { "epoch": 0.030148437804666202, "grad_norm": 3.3125, "learning_rate": 4.990861888618562e-05, "loss": 0.9428, "step": 2126 }, { "epoch": 0.030176799458292415, "grad_norm": 2.96875, "learning_rate": 4.990842792902591e-05, "loss": 1.0067, "step": 2128 }, { "epoch": 0.030205161111918632, "grad_norm": 2.71875, "learning_rate": 4.990823677292092e-05, "loss": 0.9069, "step": 2130 }, { "epoch": 0.030233522765544845, "grad_norm": 3.265625, "learning_rate": 4.99080454178722e-05, "loss": 0.9975, "step": 2132 }, { "epoch": 0.03026188441917106, "grad_norm": 3.34375, "learning_rate": 4.990785386388124e-05, "loss": 0.9642, "step": 2134 }, { "epoch": 0.030290246072797275, "grad_norm": 3.65625, "learning_rate": 4.990766211094959e-05, "loss": 1.0293, "step": 2136 }, { "epoch": 0.030318607726423488, "grad_norm": 3.34375, "learning_rate": 4.990747015907879e-05, "loss": 0.9714, "step": 2138 }, { "epoch": 0.030346969380049705, "grad_norm": 2.890625, "learning_rate": 4.9907278008270364e-05, "loss": 0.9857, "step": 2140 }, { "epoch": 0.030375331033675918, "grad_norm": 3.796875, "learning_rate": 4.990708565852584e-05, "loss": 0.9854, "step": 2142 }, { "epoch": 0.030403692687302135, "grad_norm": 3.21875, "learning_rate": 4.9906893109846765e-05, "loss": 0.9813, "step": 2144 }, { "epoch": 0.030432054340928348, "grad_norm": 3.328125, "learning_rate": 4.990670036223468e-05, "loss": 0.9661, "step": 2146 }, { "epoch": 0.03046041599455456, "grad_norm": 3.109375, "learning_rate": 4.990650741569111e-05, "loss": 0.9802, "step": 2148 }, { "epoch": 0.030488777648180778, "grad_norm": 3.109375, "learning_rate": 4.99063142702176e-05, "loss": 0.9553, "step": 2150 }, { "epoch": 0.03051713930180699, "grad_norm": 3.15625, "learning_rate": 4.9906120925815706e-05, "loss": 0.9449, "step": 2152 }, { "epoch": 0.030545500955433208, "grad_norm": 3.03125, "learning_rate": 4.990592738248696e-05, "loss": 0.9758, "step": 2154 }, { "epoch": 0.03057386260905942, "grad_norm": 2.9375, "learning_rate": 4.990573364023291e-05, "loss": 1.0034, "step": 2156 }, { "epoch": 0.030602224262685634, "grad_norm": 3.0, "learning_rate": 4.9905539699055104e-05, "loss": 0.954, "step": 2158 }, { "epoch": 0.03063058591631185, "grad_norm": 3.859375, "learning_rate": 4.99053455589551e-05, "loss": 0.982, "step": 2160 }, { "epoch": 0.030658947569938064, "grad_norm": 3.03125, "learning_rate": 4.990515121993442e-05, "loss": 0.9485, "step": 2162 }, { "epoch": 0.03068730922356428, "grad_norm": 2.796875, "learning_rate": 4.990495668199465e-05, "loss": 0.9123, "step": 2164 }, { "epoch": 0.030715670877190494, "grad_norm": 3.0625, "learning_rate": 4.990476194513733e-05, "loss": 0.94, "step": 2166 }, { "epoch": 0.030744032530816708, "grad_norm": 2.921875, "learning_rate": 4.990456700936401e-05, "loss": 0.9483, "step": 2168 }, { "epoch": 0.030772394184442924, "grad_norm": 3.3125, "learning_rate": 4.990437187467626e-05, "loss": 0.9642, "step": 2170 }, { "epoch": 0.030800755838069137, "grad_norm": 3.53125, "learning_rate": 4.990417654107562e-05, "loss": 0.9336, "step": 2172 }, { "epoch": 0.030829117491695354, "grad_norm": 3.34375, "learning_rate": 4.990398100856367e-05, "loss": 0.9772, "step": 2174 }, { "epoch": 0.030857479145321567, "grad_norm": 3.21875, "learning_rate": 4.990378527714196e-05, "loss": 0.9597, "step": 2176 }, { "epoch": 0.030885840798947784, "grad_norm": 3.0, "learning_rate": 4.9903589346812054e-05, "loss": 0.9722, "step": 2178 }, { "epoch": 0.030914202452573997, "grad_norm": 2.78125, "learning_rate": 4.990339321757552e-05, "loss": 0.9192, "step": 2180 }, { "epoch": 0.03094256410620021, "grad_norm": 3.078125, "learning_rate": 4.990319688943392e-05, "loss": 0.9073, "step": 2182 }, { "epoch": 0.030970925759826427, "grad_norm": 2.78125, "learning_rate": 4.990300036238883e-05, "loss": 0.9574, "step": 2184 }, { "epoch": 0.03099928741345264, "grad_norm": 3.046875, "learning_rate": 4.990280363644181e-05, "loss": 0.9759, "step": 2186 }, { "epoch": 0.031027649067078857, "grad_norm": 3.0625, "learning_rate": 4.990260671159444e-05, "loss": 0.9791, "step": 2188 }, { "epoch": 0.03105601072070507, "grad_norm": 2.953125, "learning_rate": 4.990240958784829e-05, "loss": 0.944, "step": 2190 }, { "epoch": 0.031084372374331284, "grad_norm": 2.796875, "learning_rate": 4.990221226520493e-05, "loss": 0.9255, "step": 2192 }, { "epoch": 0.0311127340279575, "grad_norm": 2.921875, "learning_rate": 4.990201474366594e-05, "loss": 0.9418, "step": 2194 }, { "epoch": 0.031141095681583714, "grad_norm": 3.234375, "learning_rate": 4.99018170232329e-05, "loss": 0.9401, "step": 2196 }, { "epoch": 0.03116945733520993, "grad_norm": 3.0625, "learning_rate": 4.990161910390739e-05, "loss": 0.9653, "step": 2198 }, { "epoch": 0.031197818988836144, "grad_norm": 3.34375, "learning_rate": 4.9901420985690985e-05, "loss": 0.9955, "step": 2200 }, { "epoch": 0.03122618064246236, "grad_norm": 3.484375, "learning_rate": 4.9901222668585266e-05, "loss": 0.9829, "step": 2202 }, { "epoch": 0.031254542296088574, "grad_norm": 3.1875, "learning_rate": 4.990102415259182e-05, "loss": 0.8901, "step": 2204 }, { "epoch": 0.03128290394971479, "grad_norm": 3.546875, "learning_rate": 4.990082543771224e-05, "loss": 1.0051, "step": 2206 }, { "epoch": 0.031311265603341, "grad_norm": 3.140625, "learning_rate": 4.99006265239481e-05, "loss": 0.9163, "step": 2208 }, { "epoch": 0.03133962725696722, "grad_norm": 2.84375, "learning_rate": 4.990042741130099e-05, "loss": 0.8886, "step": 2210 }, { "epoch": 0.03136798891059343, "grad_norm": 3.5, "learning_rate": 4.9900228099772516e-05, "loss": 0.9822, "step": 2212 }, { "epoch": 0.03139635056421965, "grad_norm": 3.03125, "learning_rate": 4.9900028589364254e-05, "loss": 0.9296, "step": 2214 }, { "epoch": 0.03142471221784586, "grad_norm": 3.15625, "learning_rate": 4.9899828880077806e-05, "loss": 0.9333, "step": 2216 }, { "epoch": 0.03145307387147207, "grad_norm": 3.34375, "learning_rate": 4.9899628971914764e-05, "loss": 0.9307, "step": 2218 }, { "epoch": 0.03148143552509829, "grad_norm": 3.40625, "learning_rate": 4.989942886487672e-05, "loss": 0.9732, "step": 2220 }, { "epoch": 0.031509797178724507, "grad_norm": 2.765625, "learning_rate": 4.989922855896528e-05, "loss": 0.953, "step": 2222 }, { "epoch": 0.03153815883235072, "grad_norm": 3.21875, "learning_rate": 4.989902805418204e-05, "loss": 0.9886, "step": 2224 }, { "epoch": 0.03156652048597693, "grad_norm": 3.640625, "learning_rate": 4.9898827350528605e-05, "loss": 0.9232, "step": 2226 }, { "epoch": 0.031594882139603146, "grad_norm": 3.140625, "learning_rate": 4.989862644800657e-05, "loss": 0.9158, "step": 2228 }, { "epoch": 0.031623243793229366, "grad_norm": 3.46875, "learning_rate": 4.989842534661755e-05, "loss": 0.9896, "step": 2230 }, { "epoch": 0.03165160544685558, "grad_norm": 3.40625, "learning_rate": 4.989822404636314e-05, "loss": 0.9294, "step": 2232 }, { "epoch": 0.03167996710048179, "grad_norm": 3.046875, "learning_rate": 4.989802254724496e-05, "loss": 0.9501, "step": 2234 }, { "epoch": 0.031708328754108006, "grad_norm": 3.203125, "learning_rate": 4.989782084926461e-05, "loss": 0.9218, "step": 2236 }, { "epoch": 0.031736690407734226, "grad_norm": 3.03125, "learning_rate": 4.989761895242371e-05, "loss": 0.9732, "step": 2238 }, { "epoch": 0.03176505206136044, "grad_norm": 3.4375, "learning_rate": 4.9897416856723856e-05, "loss": 0.984, "step": 2240 }, { "epoch": 0.03179341371498665, "grad_norm": 3.203125, "learning_rate": 4.989721456216668e-05, "loss": 0.901, "step": 2242 }, { "epoch": 0.031821775368612866, "grad_norm": 3.25, "learning_rate": 4.989701206875379e-05, "loss": 0.8876, "step": 2244 }, { "epoch": 0.03185013702223908, "grad_norm": 3.171875, "learning_rate": 4.9896809376486796e-05, "loss": 0.9965, "step": 2246 }, { "epoch": 0.0318784986758653, "grad_norm": 3.484375, "learning_rate": 4.989660648536734e-05, "loss": 1.0027, "step": 2248 }, { "epoch": 0.03190686032949151, "grad_norm": 2.984375, "learning_rate": 4.989640339539702e-05, "loss": 0.9797, "step": 2250 }, { "epoch": 0.031935221983117726, "grad_norm": 3.0625, "learning_rate": 4.9896200106577465e-05, "loss": 0.9285, "step": 2252 }, { "epoch": 0.03196358363674394, "grad_norm": 2.953125, "learning_rate": 4.989599661891029e-05, "loss": 0.9713, "step": 2254 }, { "epoch": 0.03199194529037015, "grad_norm": 2.78125, "learning_rate": 4.989579293239715e-05, "loss": 0.947, "step": 2256 }, { "epoch": 0.03202030694399637, "grad_norm": 2.84375, "learning_rate": 4.989558904703963e-05, "loss": 0.9117, "step": 2258 }, { "epoch": 0.032048668597622586, "grad_norm": 3.34375, "learning_rate": 4.989538496283939e-05, "loss": 0.9845, "step": 2260 }, { "epoch": 0.0320770302512488, "grad_norm": 3.40625, "learning_rate": 4.989518067979805e-05, "loss": 0.9599, "step": 2262 }, { "epoch": 0.03210539190487501, "grad_norm": 3.125, "learning_rate": 4.989497619791724e-05, "loss": 0.9601, "step": 2264 }, { "epoch": 0.032133753558501225, "grad_norm": 2.9375, "learning_rate": 4.98947715171986e-05, "loss": 0.9082, "step": 2266 }, { "epoch": 0.032162115212127446, "grad_norm": 3.3125, "learning_rate": 4.989456663764375e-05, "loss": 0.9261, "step": 2268 }, { "epoch": 0.03219047686575366, "grad_norm": 3.546875, "learning_rate": 4.9894361559254345e-05, "loss": 0.925, "step": 2270 }, { "epoch": 0.03221883851937987, "grad_norm": 3.703125, "learning_rate": 4.9894156282032e-05, "loss": 1.0176, "step": 2272 }, { "epoch": 0.032247200173006085, "grad_norm": 3.078125, "learning_rate": 4.989395080597838e-05, "loss": 0.9862, "step": 2274 }, { "epoch": 0.0322755618266323, "grad_norm": 3.609375, "learning_rate": 4.9893745131095116e-05, "loss": 0.9733, "step": 2276 }, { "epoch": 0.03230392348025852, "grad_norm": 3.0, "learning_rate": 4.989353925738384e-05, "loss": 0.9576, "step": 2278 }, { "epoch": 0.03233228513388473, "grad_norm": 2.984375, "learning_rate": 4.9893333184846214e-05, "loss": 0.9234, "step": 2280 }, { "epoch": 0.032360646787510945, "grad_norm": 3.390625, "learning_rate": 4.989312691348387e-05, "loss": 0.9416, "step": 2282 }, { "epoch": 0.03238900844113716, "grad_norm": 3.140625, "learning_rate": 4.989292044329846e-05, "loss": 0.9948, "step": 2284 }, { "epoch": 0.03241737009476338, "grad_norm": 3.140625, "learning_rate": 4.989271377429164e-05, "loss": 0.9116, "step": 2286 }, { "epoch": 0.03244573174838959, "grad_norm": 3.34375, "learning_rate": 4.9892506906465055e-05, "loss": 0.9318, "step": 2288 }, { "epoch": 0.032474093402015805, "grad_norm": 3.09375, "learning_rate": 4.989229983982035e-05, "loss": 1.0087, "step": 2290 }, { "epoch": 0.03250245505564202, "grad_norm": 3.421875, "learning_rate": 4.989209257435919e-05, "loss": 0.9494, "step": 2292 }, { "epoch": 0.03253081670926823, "grad_norm": 3.109375, "learning_rate": 4.989188511008322e-05, "loss": 0.9293, "step": 2294 }, { "epoch": 0.03255917836289445, "grad_norm": 2.96875, "learning_rate": 4.989167744699411e-05, "loss": 0.9368, "step": 2296 }, { "epoch": 0.032587540016520665, "grad_norm": 3.359375, "learning_rate": 4.9891469585093514e-05, "loss": 1.0246, "step": 2298 }, { "epoch": 0.03261590167014688, "grad_norm": 3.40625, "learning_rate": 4.989126152438309e-05, "loss": 0.9942, "step": 2300 }, { "epoch": 0.03264426332377309, "grad_norm": 2.984375, "learning_rate": 4.98910532648645e-05, "loss": 0.9162, "step": 2302 }, { "epoch": 0.032672624977399305, "grad_norm": 3.546875, "learning_rate": 4.98908448065394e-05, "loss": 0.9538, "step": 2304 }, { "epoch": 0.032700986631025525, "grad_norm": 3.0625, "learning_rate": 4.989063614940946e-05, "loss": 0.8911, "step": 2306 }, { "epoch": 0.03272934828465174, "grad_norm": 3.015625, "learning_rate": 4.9890427293476364e-05, "loss": 0.9499, "step": 2308 }, { "epoch": 0.03275770993827795, "grad_norm": 3.453125, "learning_rate": 4.9890218238741753e-05, "loss": 0.9706, "step": 2310 }, { "epoch": 0.032786071591904165, "grad_norm": 3.75, "learning_rate": 4.989000898520732e-05, "loss": 0.9541, "step": 2312 }, { "epoch": 0.03281443324553038, "grad_norm": 3.046875, "learning_rate": 4.988979953287472e-05, "loss": 0.9172, "step": 2314 }, { "epoch": 0.0328427948991566, "grad_norm": 3.40625, "learning_rate": 4.9889589881745625e-05, "loss": 0.9544, "step": 2316 }, { "epoch": 0.03287115655278281, "grad_norm": 3.625, "learning_rate": 4.9889380031821716e-05, "loss": 0.9865, "step": 2318 }, { "epoch": 0.032899518206409024, "grad_norm": 3.125, "learning_rate": 4.988916998310467e-05, "loss": 0.9481, "step": 2320 }, { "epoch": 0.03292787986003524, "grad_norm": 3.1875, "learning_rate": 4.9888959735596165e-05, "loss": 0.9383, "step": 2322 }, { "epoch": 0.03295624151366145, "grad_norm": 3.796875, "learning_rate": 4.988874928929788e-05, "loss": 0.9702, "step": 2324 }, { "epoch": 0.03298460316728767, "grad_norm": 3.203125, "learning_rate": 4.988853864421149e-05, "loss": 0.9261, "step": 2326 }, { "epoch": 0.033012964820913884, "grad_norm": 3.609375, "learning_rate": 4.9888327800338686e-05, "loss": 0.9288, "step": 2328 }, { "epoch": 0.0330413264745401, "grad_norm": 3.296875, "learning_rate": 4.988811675768115e-05, "loss": 0.984, "step": 2330 }, { "epoch": 0.03306968812816631, "grad_norm": 3.03125, "learning_rate": 4.9887905516240555e-05, "loss": 0.9621, "step": 2332 }, { "epoch": 0.033098049781792524, "grad_norm": 3.125, "learning_rate": 4.98876940760186e-05, "loss": 0.9327, "step": 2334 }, { "epoch": 0.033126411435418744, "grad_norm": 3.1875, "learning_rate": 4.988748243701698e-05, "loss": 0.9889, "step": 2336 }, { "epoch": 0.03315477308904496, "grad_norm": 3.0625, "learning_rate": 4.988727059923738e-05, "loss": 0.9402, "step": 2338 }, { "epoch": 0.03318313474267117, "grad_norm": 2.921875, "learning_rate": 4.9887058562681484e-05, "loss": 1.0266, "step": 2340 }, { "epoch": 0.033211496396297384, "grad_norm": 2.921875, "learning_rate": 4.988684632735099e-05, "loss": 0.9287, "step": 2342 }, { "epoch": 0.033239858049923604, "grad_norm": 3.171875, "learning_rate": 4.9886633893247595e-05, "loss": 0.989, "step": 2344 }, { "epoch": 0.03326821970354982, "grad_norm": 3.1875, "learning_rate": 4.9886421260372996e-05, "loss": 0.9689, "step": 2346 }, { "epoch": 0.03329658135717603, "grad_norm": 3.328125, "learning_rate": 4.9886208428728896e-05, "loss": 0.9496, "step": 2348 }, { "epoch": 0.033324943010802244, "grad_norm": 2.84375, "learning_rate": 4.988599539831698e-05, "loss": 0.9549, "step": 2350 }, { "epoch": 0.03335330466442846, "grad_norm": 3.140625, "learning_rate": 4.988578216913896e-05, "loss": 1.0257, "step": 2352 }, { "epoch": 0.03338166631805468, "grad_norm": 3.0, "learning_rate": 4.988556874119655e-05, "loss": 0.9331, "step": 2354 }, { "epoch": 0.03341002797168089, "grad_norm": 3.265625, "learning_rate": 4.988535511449143e-05, "loss": 1.0137, "step": 2356 }, { "epoch": 0.033438389625307104, "grad_norm": 3.0625, "learning_rate": 4.988514128902532e-05, "loss": 0.9388, "step": 2358 }, { "epoch": 0.03346675127893332, "grad_norm": 2.875, "learning_rate": 4.988492726479993e-05, "loss": 0.9213, "step": 2360 }, { "epoch": 0.03349511293255953, "grad_norm": 3.578125, "learning_rate": 4.988471304181697e-05, "loss": 0.9582, "step": 2362 }, { "epoch": 0.03352347458618575, "grad_norm": 2.703125, "learning_rate": 4.988449862007814e-05, "loss": 0.9151, "step": 2364 }, { "epoch": 0.033551836239811964, "grad_norm": 3.171875, "learning_rate": 4.9884283999585166e-05, "loss": 0.9248, "step": 2366 }, { "epoch": 0.03358019789343818, "grad_norm": 2.90625, "learning_rate": 4.9884069180339746e-05, "loss": 0.9681, "step": 2368 }, { "epoch": 0.03360855954706439, "grad_norm": 3.015625, "learning_rate": 4.9883854162343615e-05, "loss": 0.93, "step": 2370 }, { "epoch": 0.0336369212006906, "grad_norm": 3.234375, "learning_rate": 4.988363894559847e-05, "loss": 0.9593, "step": 2372 }, { "epoch": 0.03366528285431682, "grad_norm": 3.078125, "learning_rate": 4.9883423530106057e-05, "loss": 0.9575, "step": 2374 }, { "epoch": 0.03369364450794304, "grad_norm": 3.21875, "learning_rate": 4.988320791586807e-05, "loss": 0.9991, "step": 2376 }, { "epoch": 0.03372200616156925, "grad_norm": 3.40625, "learning_rate": 4.988299210288625e-05, "loss": 0.8904, "step": 2378 }, { "epoch": 0.03375036781519546, "grad_norm": 3.078125, "learning_rate": 4.9882776091162304e-05, "loss": 0.9536, "step": 2380 }, { "epoch": 0.033778729468821676, "grad_norm": 3.09375, "learning_rate": 4.9882559880697964e-05, "loss": 0.966, "step": 2382 }, { "epoch": 0.033807091122447896, "grad_norm": 3.421875, "learning_rate": 4.988234347149496e-05, "loss": 0.9086, "step": 2384 }, { "epoch": 0.03383545277607411, "grad_norm": 3.09375, "learning_rate": 4.988212686355502e-05, "loss": 0.9716, "step": 2386 }, { "epoch": 0.03386381442970032, "grad_norm": 2.828125, "learning_rate": 4.988191005687988e-05, "loss": 0.9497, "step": 2388 }, { "epoch": 0.033892176083326536, "grad_norm": 3.328125, "learning_rate": 4.988169305147126e-05, "loss": 0.912, "step": 2390 }, { "epoch": 0.033920537736952756, "grad_norm": 2.984375, "learning_rate": 4.988147584733089e-05, "loss": 0.9219, "step": 2392 }, { "epoch": 0.03394889939057897, "grad_norm": 3.84375, "learning_rate": 4.988125844446053e-05, "loss": 0.8951, "step": 2394 }, { "epoch": 0.03397726104420518, "grad_norm": 3.53125, "learning_rate": 4.988104084286189e-05, "loss": 1.0228, "step": 2396 }, { "epoch": 0.034005622697831396, "grad_norm": 3.328125, "learning_rate": 4.9880823042536715e-05, "loss": 0.9689, "step": 2398 }, { "epoch": 0.03403398435145761, "grad_norm": 3.203125, "learning_rate": 4.988060504348675e-05, "loss": 0.9503, "step": 2400 }, { "epoch": 0.03406234600508383, "grad_norm": 3.03125, "learning_rate": 4.988038684571373e-05, "loss": 1.0261, "step": 2402 }, { "epoch": 0.03409070765871004, "grad_norm": 2.890625, "learning_rate": 4.98801684492194e-05, "loss": 0.9457, "step": 2404 }, { "epoch": 0.034119069312336256, "grad_norm": 2.8125, "learning_rate": 4.987994985400551e-05, "loss": 0.9852, "step": 2406 }, { "epoch": 0.03414743096596247, "grad_norm": 3.171875, "learning_rate": 4.98797310600738e-05, "loss": 0.9465, "step": 2408 }, { "epoch": 0.03417579261958868, "grad_norm": 3.125, "learning_rate": 4.987951206742601e-05, "loss": 0.979, "step": 2410 }, { "epoch": 0.0342041542732149, "grad_norm": 3.125, "learning_rate": 4.987929287606391e-05, "loss": 0.9343, "step": 2412 }, { "epoch": 0.034232515926841116, "grad_norm": 3.171875, "learning_rate": 4.9879073485989234e-05, "loss": 0.9189, "step": 2414 }, { "epoch": 0.03426087758046733, "grad_norm": 3.140625, "learning_rate": 4.987885389720374e-05, "loss": 0.937, "step": 2416 }, { "epoch": 0.03428923923409354, "grad_norm": 3.015625, "learning_rate": 4.987863410970917e-05, "loss": 0.9374, "step": 2418 }, { "epoch": 0.034317600887719756, "grad_norm": 3.203125, "learning_rate": 4.98784141235073e-05, "loss": 0.9331, "step": 2420 }, { "epoch": 0.034345962541345976, "grad_norm": 2.96875, "learning_rate": 4.987819393859987e-05, "loss": 0.9507, "step": 2422 }, { "epoch": 0.03437432419497219, "grad_norm": 3.015625, "learning_rate": 4.987797355498866e-05, "loss": 0.957, "step": 2424 }, { "epoch": 0.0344026858485984, "grad_norm": 3.25, "learning_rate": 4.98777529726754e-05, "loss": 0.9512, "step": 2426 }, { "epoch": 0.034431047502224615, "grad_norm": 3.34375, "learning_rate": 4.987753219166188e-05, "loss": 0.9439, "step": 2428 }, { "epoch": 0.03445940915585083, "grad_norm": 3.25, "learning_rate": 4.987731121194984e-05, "loss": 0.9095, "step": 2430 }, { "epoch": 0.03448777080947705, "grad_norm": 3.15625, "learning_rate": 4.9877090033541065e-05, "loss": 0.9493, "step": 2432 }, { "epoch": 0.03451613246310326, "grad_norm": 3.078125, "learning_rate": 4.98768686564373e-05, "loss": 0.9257, "step": 2434 }, { "epoch": 0.034544494116729475, "grad_norm": 3.203125, "learning_rate": 4.987664708064034e-05, "loss": 0.9929, "step": 2436 }, { "epoch": 0.03457285577035569, "grad_norm": 2.953125, "learning_rate": 4.987642530615193e-05, "loss": 0.9026, "step": 2438 }, { "epoch": 0.03460121742398191, "grad_norm": 3.375, "learning_rate": 4.987620333297386e-05, "loss": 0.9847, "step": 2440 }, { "epoch": 0.03462957907760812, "grad_norm": 3.078125, "learning_rate": 4.9875981161107885e-05, "loss": 1.0044, "step": 2442 }, { "epoch": 0.034657940731234335, "grad_norm": 3.03125, "learning_rate": 4.9875758790555805e-05, "loss": 0.9583, "step": 2444 }, { "epoch": 0.03468630238486055, "grad_norm": 3.015625, "learning_rate": 4.987553622131936e-05, "loss": 0.9715, "step": 2446 }, { "epoch": 0.03471466403848676, "grad_norm": 3.140625, "learning_rate": 4.987531345340036e-05, "loss": 0.9161, "step": 2448 }, { "epoch": 0.03474302569211298, "grad_norm": 3.375, "learning_rate": 4.987509048680058e-05, "loss": 0.9912, "step": 2450 }, { "epoch": 0.034771387345739195, "grad_norm": 2.921875, "learning_rate": 4.9874867321521776e-05, "loss": 0.9411, "step": 2452 }, { "epoch": 0.03479974899936541, "grad_norm": 3.328125, "learning_rate": 4.9874643957565756e-05, "loss": 0.9868, "step": 2454 }, { "epoch": 0.03482811065299162, "grad_norm": 3.28125, "learning_rate": 4.9874420394934295e-05, "loss": 0.9404, "step": 2456 }, { "epoch": 0.034856472306617835, "grad_norm": 3.0, "learning_rate": 4.987419663362918e-05, "loss": 0.9341, "step": 2458 }, { "epoch": 0.034884833960244055, "grad_norm": 2.953125, "learning_rate": 4.98739726736522e-05, "loss": 0.9619, "step": 2460 }, { "epoch": 0.03491319561387027, "grad_norm": 3.46875, "learning_rate": 4.9873748515005134e-05, "loss": 0.9452, "step": 2462 }, { "epoch": 0.03494155726749648, "grad_norm": 3.21875, "learning_rate": 4.987352415768979e-05, "loss": 0.9458, "step": 2464 }, { "epoch": 0.034969918921122695, "grad_norm": 3.453125, "learning_rate": 4.987329960170794e-05, "loss": 0.9616, "step": 2466 }, { "epoch": 0.03499828057474891, "grad_norm": 3.140625, "learning_rate": 4.987307484706139e-05, "loss": 0.9194, "step": 2468 }, { "epoch": 0.03502664222837513, "grad_norm": 3.03125, "learning_rate": 4.987284989375193e-05, "loss": 0.9696, "step": 2470 }, { "epoch": 0.03505500388200134, "grad_norm": 3.125, "learning_rate": 4.987262474178136e-05, "loss": 0.9721, "step": 2472 }, { "epoch": 0.035083365535627554, "grad_norm": 3.125, "learning_rate": 4.987239939115148e-05, "loss": 1.0022, "step": 2474 }, { "epoch": 0.03511172718925377, "grad_norm": 3.0625, "learning_rate": 4.987217384186409e-05, "loss": 0.9376, "step": 2476 }, { "epoch": 0.03514008884287998, "grad_norm": 2.9375, "learning_rate": 4.987194809392098e-05, "loss": 0.962, "step": 2478 }, { "epoch": 0.0351684504965062, "grad_norm": 3.015625, "learning_rate": 4.9871722147323965e-05, "loss": 0.9192, "step": 2480 }, { "epoch": 0.035196812150132414, "grad_norm": 3.515625, "learning_rate": 4.9871496002074845e-05, "loss": 0.9698, "step": 2482 }, { "epoch": 0.03522517380375863, "grad_norm": 3.0, "learning_rate": 4.987126965817543e-05, "loss": 0.9654, "step": 2484 }, { "epoch": 0.03525353545738484, "grad_norm": 2.828125, "learning_rate": 4.987104311562753e-05, "loss": 0.9292, "step": 2486 }, { "epoch": 0.035281897111011054, "grad_norm": 3.078125, "learning_rate": 4.987081637443294e-05, "loss": 0.9167, "step": 2488 }, { "epoch": 0.035310258764637274, "grad_norm": 3.265625, "learning_rate": 4.987058943459348e-05, "loss": 0.9359, "step": 2490 }, { "epoch": 0.03533862041826349, "grad_norm": 3.484375, "learning_rate": 4.9870362296110974e-05, "loss": 0.9838, "step": 2492 }, { "epoch": 0.0353669820718897, "grad_norm": 3.1875, "learning_rate": 4.987013495898721e-05, "loss": 0.9883, "step": 2494 }, { "epoch": 0.035395343725515914, "grad_norm": 3.28125, "learning_rate": 4.986990742322404e-05, "loss": 0.9159, "step": 2496 }, { "epoch": 0.035423705379142134, "grad_norm": 2.640625, "learning_rate": 4.986967968882324e-05, "loss": 0.957, "step": 2498 }, { "epoch": 0.03545206703276835, "grad_norm": 2.984375, "learning_rate": 4.986945175578667e-05, "loss": 0.9826, "step": 2500 }, { "epoch": 0.03548042868639456, "grad_norm": 3.1875, "learning_rate": 4.986922362411611e-05, "loss": 0.9155, "step": 2502 }, { "epoch": 0.035508790340020774, "grad_norm": 2.953125, "learning_rate": 4.9868995293813415e-05, "loss": 0.9382, "step": 2504 }, { "epoch": 0.03553715199364699, "grad_norm": 2.609375, "learning_rate": 4.9868766764880394e-05, "loss": 0.8946, "step": 2506 }, { "epoch": 0.03556551364727321, "grad_norm": 3.0, "learning_rate": 4.9868538037318865e-05, "loss": 0.9481, "step": 2508 }, { "epoch": 0.03559387530089942, "grad_norm": 3.265625, "learning_rate": 4.986830911113067e-05, "loss": 0.9466, "step": 2510 }, { "epoch": 0.035622236954525634, "grad_norm": 3.375, "learning_rate": 4.986807998631764e-05, "loss": 0.9481, "step": 2512 }, { "epoch": 0.03565059860815185, "grad_norm": 2.90625, "learning_rate": 4.986785066288159e-05, "loss": 0.9291, "step": 2514 }, { "epoch": 0.03567896026177806, "grad_norm": 3.15625, "learning_rate": 4.986762114082435e-05, "loss": 0.9184, "step": 2516 }, { "epoch": 0.03570732191540428, "grad_norm": 3.296875, "learning_rate": 4.986739142014777e-05, "loss": 0.9388, "step": 2518 }, { "epoch": 0.035735683569030494, "grad_norm": 2.875, "learning_rate": 4.9867161500853674e-05, "loss": 0.937, "step": 2520 }, { "epoch": 0.03576404522265671, "grad_norm": 3.0, "learning_rate": 4.98669313829439e-05, "loss": 0.9204, "step": 2522 }, { "epoch": 0.03579240687628292, "grad_norm": 3.03125, "learning_rate": 4.986670106642028e-05, "loss": 0.9359, "step": 2524 }, { "epoch": 0.03582076852990913, "grad_norm": 3.40625, "learning_rate": 4.986647055128467e-05, "loss": 0.9309, "step": 2526 }, { "epoch": 0.03584913018353535, "grad_norm": 3.21875, "learning_rate": 4.9866239837538894e-05, "loss": 0.9727, "step": 2528 }, { "epoch": 0.03587749183716157, "grad_norm": 3.265625, "learning_rate": 4.986600892518482e-05, "loss": 1.0178, "step": 2530 }, { "epoch": 0.03590585349078778, "grad_norm": 3.453125, "learning_rate": 4.9865777814224254e-05, "loss": 0.9966, "step": 2532 }, { "epoch": 0.03593421514441399, "grad_norm": 3.34375, "learning_rate": 4.9865546504659063e-05, "loss": 0.9467, "step": 2534 }, { "epoch": 0.035962576798040206, "grad_norm": 3.328125, "learning_rate": 4.98653149964911e-05, "loss": 1.0089, "step": 2536 }, { "epoch": 0.03599093845166643, "grad_norm": 3.375, "learning_rate": 4.986508328972221e-05, "loss": 0.9364, "step": 2538 }, { "epoch": 0.03601930010529264, "grad_norm": 3.359375, "learning_rate": 4.986485138435424e-05, "loss": 0.9394, "step": 2540 }, { "epoch": 0.03604766175891885, "grad_norm": 2.765625, "learning_rate": 4.9864619280389036e-05, "loss": 0.9471, "step": 2542 }, { "epoch": 0.036076023412545066, "grad_norm": 3.65625, "learning_rate": 4.9864386977828466e-05, "loss": 0.9119, "step": 2544 }, { "epoch": 0.036104385066171286, "grad_norm": 3.015625, "learning_rate": 4.9864154476674374e-05, "loss": 0.8974, "step": 2546 }, { "epoch": 0.0361327467197975, "grad_norm": 2.90625, "learning_rate": 4.9863921776928624e-05, "loss": 0.9453, "step": 2548 }, { "epoch": 0.03616110837342371, "grad_norm": 3.28125, "learning_rate": 4.986368887859307e-05, "loss": 0.9797, "step": 2550 }, { "epoch": 0.036189470027049926, "grad_norm": 3.203125, "learning_rate": 4.986345578166958e-05, "loss": 0.9929, "step": 2552 }, { "epoch": 0.03621783168067614, "grad_norm": 3.3125, "learning_rate": 4.986322248616001e-05, "loss": 0.9148, "step": 2554 }, { "epoch": 0.03624619333430236, "grad_norm": 3.109375, "learning_rate": 4.986298899206622e-05, "loss": 0.9413, "step": 2556 }, { "epoch": 0.03627455498792857, "grad_norm": 3.046875, "learning_rate": 4.986275529939008e-05, "loss": 0.9648, "step": 2558 }, { "epoch": 0.036302916641554786, "grad_norm": 2.984375, "learning_rate": 4.986252140813346e-05, "loss": 0.9752, "step": 2560 }, { "epoch": 0.036331278295181, "grad_norm": 3.234375, "learning_rate": 4.9862287318298213e-05, "loss": 0.9226, "step": 2562 }, { "epoch": 0.03635963994880721, "grad_norm": 3.21875, "learning_rate": 4.9862053029886226e-05, "loss": 0.9403, "step": 2564 }, { "epoch": 0.03638800160243343, "grad_norm": 3.15625, "learning_rate": 4.986181854289936e-05, "loss": 0.994, "step": 2566 }, { "epoch": 0.036416363256059646, "grad_norm": 3.1875, "learning_rate": 4.9861583857339486e-05, "loss": 0.9743, "step": 2568 }, { "epoch": 0.03644472490968586, "grad_norm": 2.546875, "learning_rate": 4.9861348973208494e-05, "loss": 0.9341, "step": 2570 }, { "epoch": 0.03647308656331207, "grad_norm": 2.984375, "learning_rate": 4.9861113890508246e-05, "loss": 0.9457, "step": 2572 }, { "epoch": 0.036501448216938286, "grad_norm": 3.625, "learning_rate": 4.9860878609240626e-05, "loss": 1.0079, "step": 2574 }, { "epoch": 0.036529809870564506, "grad_norm": 3.765625, "learning_rate": 4.9860643129407504e-05, "loss": 0.9638, "step": 2576 }, { "epoch": 0.03655817152419072, "grad_norm": 3.015625, "learning_rate": 4.986040745101077e-05, "loss": 0.9799, "step": 2578 }, { "epoch": 0.03658653317781693, "grad_norm": 2.875, "learning_rate": 4.98601715740523e-05, "loss": 0.9228, "step": 2580 }, { "epoch": 0.036614894831443145, "grad_norm": 2.90625, "learning_rate": 4.985993549853398e-05, "loss": 0.9615, "step": 2582 }, { "epoch": 0.03664325648506936, "grad_norm": 3.234375, "learning_rate": 4.9859699224457704e-05, "loss": 0.9349, "step": 2584 }, { "epoch": 0.03667161813869558, "grad_norm": 3.21875, "learning_rate": 4.985946275182535e-05, "loss": 0.9544, "step": 2586 }, { "epoch": 0.03669997979232179, "grad_norm": 3.046875, "learning_rate": 4.985922608063881e-05, "loss": 0.9708, "step": 2588 }, { "epoch": 0.036728341445948005, "grad_norm": 3.296875, "learning_rate": 4.985898921089997e-05, "loss": 0.9114, "step": 2590 }, { "epoch": 0.03675670309957422, "grad_norm": 3.625, "learning_rate": 4.985875214261073e-05, "loss": 0.9517, "step": 2592 }, { "epoch": 0.03678506475320044, "grad_norm": 3.171875, "learning_rate": 4.9858514875772974e-05, "loss": 0.9283, "step": 2594 }, { "epoch": 0.03681342640682665, "grad_norm": 3.109375, "learning_rate": 4.98582774103886e-05, "loss": 0.9312, "step": 2596 }, { "epoch": 0.036841788060452865, "grad_norm": 3.078125, "learning_rate": 4.985803974645951e-05, "loss": 0.8272, "step": 2598 }, { "epoch": 0.03687014971407908, "grad_norm": 2.96875, "learning_rate": 4.98578018839876e-05, "loss": 0.8745, "step": 2600 }, { "epoch": 0.03689851136770529, "grad_norm": 2.921875, "learning_rate": 4.985756382297476e-05, "loss": 0.9505, "step": 2602 }, { "epoch": 0.03692687302133151, "grad_norm": 3.765625, "learning_rate": 4.985732556342291e-05, "loss": 0.9357, "step": 2604 }, { "epoch": 0.036955234674957725, "grad_norm": 3.28125, "learning_rate": 4.985708710533394e-05, "loss": 0.9069, "step": 2606 }, { "epoch": 0.03698359632858394, "grad_norm": 3.15625, "learning_rate": 4.985684844870975e-05, "loss": 0.9857, "step": 2608 }, { "epoch": 0.03701195798221015, "grad_norm": 3.203125, "learning_rate": 4.9856609593552264e-05, "loss": 0.9657, "step": 2610 }, { "epoch": 0.037040319635836365, "grad_norm": 3.140625, "learning_rate": 4.985637053986337e-05, "loss": 0.9611, "step": 2612 }, { "epoch": 0.037068681289462585, "grad_norm": 3.25, "learning_rate": 4.985613128764499e-05, "loss": 0.9181, "step": 2614 }, { "epoch": 0.0370970429430888, "grad_norm": 3.25, "learning_rate": 4.985589183689904e-05, "loss": 0.9769, "step": 2616 }, { "epoch": 0.03712540459671501, "grad_norm": 2.796875, "learning_rate": 4.985565218762741e-05, "loss": 0.9369, "step": 2618 }, { "epoch": 0.037153766250341225, "grad_norm": 3.203125, "learning_rate": 4.9855412339832034e-05, "loss": 1.0013, "step": 2620 }, { "epoch": 0.03718212790396744, "grad_norm": 3.234375, "learning_rate": 4.9855172293514826e-05, "loss": 0.9634, "step": 2622 }, { "epoch": 0.03721048955759366, "grad_norm": 2.9375, "learning_rate": 4.985493204867769e-05, "loss": 0.8902, "step": 2624 }, { "epoch": 0.03723885121121987, "grad_norm": 3.5, "learning_rate": 4.9854691605322566e-05, "loss": 0.9198, "step": 2626 }, { "epoch": 0.037267212864846085, "grad_norm": 3.078125, "learning_rate": 4.985445096345135e-05, "loss": 0.8597, "step": 2628 }, { "epoch": 0.0372955745184723, "grad_norm": 2.96875, "learning_rate": 4.985421012306598e-05, "loss": 0.8926, "step": 2630 }, { "epoch": 0.03732393617209851, "grad_norm": 3.109375, "learning_rate": 4.9853969084168386e-05, "loss": 0.8964, "step": 2632 }, { "epoch": 0.03735229782572473, "grad_norm": 2.9375, "learning_rate": 4.985372784676047e-05, "loss": 0.9236, "step": 2634 }, { "epoch": 0.037380659479350944, "grad_norm": 3.453125, "learning_rate": 4.985348641084418e-05, "loss": 0.9352, "step": 2636 }, { "epoch": 0.03740902113297716, "grad_norm": 3.171875, "learning_rate": 4.9853244776421434e-05, "loss": 0.9789, "step": 2638 }, { "epoch": 0.03743738278660337, "grad_norm": 3.0625, "learning_rate": 4.985300294349417e-05, "loss": 0.961, "step": 2640 }, { "epoch": 0.037465744440229584, "grad_norm": 2.796875, "learning_rate": 4.98527609120643e-05, "loss": 0.9507, "step": 2642 }, { "epoch": 0.037494106093855804, "grad_norm": 3.421875, "learning_rate": 4.985251868213377e-05, "loss": 0.9313, "step": 2644 }, { "epoch": 0.03752246774748202, "grad_norm": 3.1875, "learning_rate": 4.985227625370453e-05, "loss": 0.9657, "step": 2646 }, { "epoch": 0.03755082940110823, "grad_norm": 2.796875, "learning_rate": 4.9852033626778496e-05, "loss": 0.926, "step": 2648 }, { "epoch": 0.037579191054734444, "grad_norm": 3.15625, "learning_rate": 4.985179080135761e-05, "loss": 0.9057, "step": 2650 }, { "epoch": 0.037607552708360664, "grad_norm": 3.34375, "learning_rate": 4.985154777744382e-05, "loss": 0.9325, "step": 2652 }, { "epoch": 0.03763591436198688, "grad_norm": 3.0625, "learning_rate": 4.9851304555039046e-05, "loss": 0.9544, "step": 2654 }, { "epoch": 0.03766427601561309, "grad_norm": 3.21875, "learning_rate": 4.985106113414526e-05, "loss": 0.9114, "step": 2656 }, { "epoch": 0.037692637669239304, "grad_norm": 4.21875, "learning_rate": 4.985081751476438e-05, "loss": 0.9855, "step": 2658 }, { "epoch": 0.03772099932286552, "grad_norm": 3.6875, "learning_rate": 4.985057369689837e-05, "loss": 0.9693, "step": 2660 }, { "epoch": 0.03774936097649174, "grad_norm": 2.90625, "learning_rate": 4.985032968054917e-05, "loss": 0.9405, "step": 2662 }, { "epoch": 0.03777772263011795, "grad_norm": 4.0625, "learning_rate": 4.985008546571873e-05, "loss": 0.9643, "step": 2664 }, { "epoch": 0.037806084283744164, "grad_norm": 2.796875, "learning_rate": 4.9849841052408995e-05, "loss": 0.9336, "step": 2666 }, { "epoch": 0.03783444593737038, "grad_norm": 3.015625, "learning_rate": 4.9849596440621925e-05, "loss": 0.9628, "step": 2668 }, { "epoch": 0.03786280759099659, "grad_norm": 3.109375, "learning_rate": 4.984935163035946e-05, "loss": 1.0044, "step": 2670 }, { "epoch": 0.03789116924462281, "grad_norm": 3.3125, "learning_rate": 4.984910662162359e-05, "loss": 0.9322, "step": 2672 }, { "epoch": 0.037919530898249024, "grad_norm": 3.015625, "learning_rate": 4.9848861414416226e-05, "loss": 0.9411, "step": 2674 }, { "epoch": 0.03794789255187524, "grad_norm": 3.34375, "learning_rate": 4.984861600873936e-05, "loss": 0.9837, "step": 2676 }, { "epoch": 0.03797625420550145, "grad_norm": 2.90625, "learning_rate": 4.9848370404594937e-05, "loss": 0.9709, "step": 2678 }, { "epoch": 0.03800461585912766, "grad_norm": 2.953125, "learning_rate": 4.984812460198493e-05, "loss": 0.9074, "step": 2680 }, { "epoch": 0.038032977512753884, "grad_norm": 3.109375, "learning_rate": 4.984787860091128e-05, "loss": 0.9626, "step": 2682 }, { "epoch": 0.0380613391663801, "grad_norm": 3.203125, "learning_rate": 4.984763240137597e-05, "loss": 0.9058, "step": 2684 }, { "epoch": 0.03808970082000631, "grad_norm": 3.640625, "learning_rate": 4.984738600338097e-05, "loss": 0.9681, "step": 2686 }, { "epoch": 0.03811806247363252, "grad_norm": 3.15625, "learning_rate": 4.9847139406928234e-05, "loss": 0.9472, "step": 2688 }, { "epoch": 0.038146424127258736, "grad_norm": 3.171875, "learning_rate": 4.984689261201974e-05, "loss": 0.9497, "step": 2690 }, { "epoch": 0.03817478578088496, "grad_norm": 3.15625, "learning_rate": 4.9846645618657463e-05, "loss": 0.8817, "step": 2692 }, { "epoch": 0.03820314743451117, "grad_norm": 3.078125, "learning_rate": 4.984639842684336e-05, "loss": 0.9618, "step": 2694 }, { "epoch": 0.03823150908813738, "grad_norm": 3.078125, "learning_rate": 4.984615103657942e-05, "loss": 0.9814, "step": 2696 }, { "epoch": 0.038259870741763596, "grad_norm": 3.40625, "learning_rate": 4.984590344786761e-05, "loss": 0.9685, "step": 2698 }, { "epoch": 0.03828823239538982, "grad_norm": 3.25, "learning_rate": 4.9845655660709924e-05, "loss": 0.8749, "step": 2700 }, { "epoch": 0.03831659404901603, "grad_norm": 3.296875, "learning_rate": 4.984540767510833e-05, "loss": 0.9677, "step": 2702 }, { "epoch": 0.03834495570264224, "grad_norm": 2.90625, "learning_rate": 4.9845159491064786e-05, "loss": 0.8953, "step": 2704 }, { "epoch": 0.038373317356268456, "grad_norm": 3.1875, "learning_rate": 4.9844911108581314e-05, "loss": 0.9207, "step": 2706 }, { "epoch": 0.03840167900989467, "grad_norm": 2.71875, "learning_rate": 4.9844662527659876e-05, "loss": 0.8936, "step": 2708 }, { "epoch": 0.03843004066352089, "grad_norm": 3.078125, "learning_rate": 4.984441374830246e-05, "loss": 0.9655, "step": 2710 }, { "epoch": 0.0384584023171471, "grad_norm": 3.265625, "learning_rate": 4.984416477051106e-05, "loss": 0.9322, "step": 2712 }, { "epoch": 0.038486763970773316, "grad_norm": 2.828125, "learning_rate": 4.984391559428765e-05, "loss": 0.9132, "step": 2714 }, { "epoch": 0.03851512562439953, "grad_norm": 3.53125, "learning_rate": 4.984366621963423e-05, "loss": 0.9495, "step": 2716 }, { "epoch": 0.03854348727802574, "grad_norm": 3.40625, "learning_rate": 4.98434166465528e-05, "loss": 0.9573, "step": 2718 }, { "epoch": 0.03857184893165196, "grad_norm": 3.015625, "learning_rate": 4.9843166875045334e-05, "loss": 0.9406, "step": 2720 }, { "epoch": 0.038600210585278176, "grad_norm": 2.9375, "learning_rate": 4.984291690511384e-05, "loss": 0.9136, "step": 2722 }, { "epoch": 0.03862857223890439, "grad_norm": 3.546875, "learning_rate": 4.984266673676031e-05, "loss": 0.9374, "step": 2724 }, { "epoch": 0.0386569338925306, "grad_norm": 3.140625, "learning_rate": 4.984241636998674e-05, "loss": 0.9429, "step": 2726 }, { "epoch": 0.038685295546156816, "grad_norm": 3.328125, "learning_rate": 4.984216580479515e-05, "loss": 0.9108, "step": 2728 }, { "epoch": 0.038713657199783036, "grad_norm": 3.359375, "learning_rate": 4.9841915041187515e-05, "loss": 0.9416, "step": 2730 }, { "epoch": 0.03874201885340925, "grad_norm": 3.09375, "learning_rate": 4.984166407916584e-05, "loss": 0.9223, "step": 2732 }, { "epoch": 0.03877038050703546, "grad_norm": 2.640625, "learning_rate": 4.9841412918732145e-05, "loss": 0.9658, "step": 2734 }, { "epoch": 0.038798742160661676, "grad_norm": 3.453125, "learning_rate": 4.9841161559888426e-05, "loss": 1.0167, "step": 2736 }, { "epoch": 0.03882710381428789, "grad_norm": 3.328125, "learning_rate": 4.98409100026367e-05, "loss": 0.9754, "step": 2738 }, { "epoch": 0.03885546546791411, "grad_norm": 2.890625, "learning_rate": 4.984065824697896e-05, "loss": 0.886, "step": 2740 }, { "epoch": 0.03888382712154032, "grad_norm": 3.1875, "learning_rate": 4.984040629291723e-05, "loss": 0.9702, "step": 2742 }, { "epoch": 0.038912188775166535, "grad_norm": 2.859375, "learning_rate": 4.984015414045352e-05, "loss": 0.8762, "step": 2744 }, { "epoch": 0.03894055042879275, "grad_norm": 3.53125, "learning_rate": 4.9839901789589836e-05, "loss": 0.9153, "step": 2746 }, { "epoch": 0.03896891208241897, "grad_norm": 2.96875, "learning_rate": 4.98396492403282e-05, "loss": 0.895, "step": 2748 }, { "epoch": 0.03899727373604518, "grad_norm": 3.28125, "learning_rate": 4.9839396492670634e-05, "loss": 0.9373, "step": 2750 }, { "epoch": 0.039025635389671395, "grad_norm": 3.515625, "learning_rate": 4.9839143546619146e-05, "loss": 0.9833, "step": 2752 }, { "epoch": 0.03905399704329761, "grad_norm": 3.109375, "learning_rate": 4.9838890402175764e-05, "loss": 0.9074, "step": 2754 }, { "epoch": 0.03908235869692382, "grad_norm": 3.03125, "learning_rate": 4.983863705934251e-05, "loss": 0.9346, "step": 2756 }, { "epoch": 0.03911072035055004, "grad_norm": 2.9375, "learning_rate": 4.98383835181214e-05, "loss": 0.933, "step": 2758 }, { "epoch": 0.039139082004176255, "grad_norm": 3.09375, "learning_rate": 4.9838129778514466e-05, "loss": 0.9368, "step": 2760 }, { "epoch": 0.03916744365780247, "grad_norm": 2.984375, "learning_rate": 4.9837875840523734e-05, "loss": 0.9744, "step": 2762 }, { "epoch": 0.03919580531142868, "grad_norm": 3.203125, "learning_rate": 4.9837621704151224e-05, "loss": 0.9635, "step": 2764 }, { "epoch": 0.039224166965054895, "grad_norm": 3.140625, "learning_rate": 4.9837367369398984e-05, "loss": 1.0056, "step": 2766 }, { "epoch": 0.039252528618681115, "grad_norm": 2.921875, "learning_rate": 4.9837112836269026e-05, "loss": 0.9289, "step": 2768 }, { "epoch": 0.03928089027230733, "grad_norm": 3.046875, "learning_rate": 4.9836858104763395e-05, "loss": 0.9339, "step": 2770 }, { "epoch": 0.03930925192593354, "grad_norm": 2.84375, "learning_rate": 4.983660317488412e-05, "loss": 0.9718, "step": 2772 }, { "epoch": 0.039337613579559755, "grad_norm": 2.6875, "learning_rate": 4.9836348046633244e-05, "loss": 0.9478, "step": 2774 }, { "epoch": 0.03936597523318597, "grad_norm": 3.25, "learning_rate": 4.9836092720012795e-05, "loss": 0.9915, "step": 2776 }, { "epoch": 0.03939433688681219, "grad_norm": 3.125, "learning_rate": 4.983583719502482e-05, "loss": 0.8977, "step": 2778 }, { "epoch": 0.0394226985404384, "grad_norm": 3.265625, "learning_rate": 4.9835581471671356e-05, "loss": 0.979, "step": 2780 }, { "epoch": 0.039451060194064615, "grad_norm": 2.8125, "learning_rate": 4.9835325549954446e-05, "loss": 0.9441, "step": 2782 }, { "epoch": 0.03947942184769083, "grad_norm": 2.9375, "learning_rate": 4.983506942987613e-05, "loss": 0.9377, "step": 2784 }, { "epoch": 0.03950778350131704, "grad_norm": 3.375, "learning_rate": 4.983481311143846e-05, "loss": 0.9055, "step": 2786 }, { "epoch": 0.03953614515494326, "grad_norm": 4.125, "learning_rate": 4.983455659464348e-05, "loss": 0.9925, "step": 2788 }, { "epoch": 0.039564506808569475, "grad_norm": 3.03125, "learning_rate": 4.983429987949324e-05, "loss": 0.9112, "step": 2790 }, { "epoch": 0.03959286846219569, "grad_norm": 2.765625, "learning_rate": 4.983404296598979e-05, "loss": 0.9294, "step": 2792 }, { "epoch": 0.0396212301158219, "grad_norm": 3.234375, "learning_rate": 4.983378585413519e-05, "loss": 0.9394, "step": 2794 }, { "epoch": 0.039649591769448114, "grad_norm": 3.234375, "learning_rate": 4.983352854393148e-05, "loss": 0.9037, "step": 2796 }, { "epoch": 0.039677953423074334, "grad_norm": 3.0, "learning_rate": 4.9833271035380726e-05, "loss": 0.9349, "step": 2798 }, { "epoch": 0.03970631507670055, "grad_norm": 3.5, "learning_rate": 4.9833013328484965e-05, "loss": 0.9215, "step": 2800 }, { "epoch": 0.03973467673032676, "grad_norm": 3.140625, "learning_rate": 4.983275542324629e-05, "loss": 0.9314, "step": 2802 }, { "epoch": 0.039763038383952974, "grad_norm": 2.90625, "learning_rate": 4.9832497319666724e-05, "loss": 0.9567, "step": 2804 }, { "epoch": 0.039791400037579194, "grad_norm": 2.96875, "learning_rate": 4.983223901774836e-05, "loss": 0.9873, "step": 2806 }, { "epoch": 0.03981976169120541, "grad_norm": 3.1875, "learning_rate": 4.983198051749324e-05, "loss": 0.8804, "step": 2808 }, { "epoch": 0.03984812334483162, "grad_norm": 2.921875, "learning_rate": 4.9831721818903434e-05, "loss": 0.9592, "step": 2810 }, { "epoch": 0.039876484998457834, "grad_norm": 4.09375, "learning_rate": 4.983146292198101e-05, "loss": 0.9304, "step": 2812 }, { "epoch": 0.03990484665208405, "grad_norm": 3.4375, "learning_rate": 4.9831203826728036e-05, "loss": 0.9416, "step": 2814 }, { "epoch": 0.03993320830571027, "grad_norm": 3.21875, "learning_rate": 4.9830944533146584e-05, "loss": 1.0525, "step": 2816 }, { "epoch": 0.03996156995933648, "grad_norm": 2.828125, "learning_rate": 4.983068504123871e-05, "loss": 0.9605, "step": 2818 }, { "epoch": 0.039989931612962694, "grad_norm": 3.515625, "learning_rate": 4.9830425351006515e-05, "loss": 0.9575, "step": 2820 }, { "epoch": 0.04001829326658891, "grad_norm": 2.84375, "learning_rate": 4.983016546245205e-05, "loss": 0.9281, "step": 2822 }, { "epoch": 0.04004665492021512, "grad_norm": 3.03125, "learning_rate": 4.982990537557739e-05, "loss": 0.9023, "step": 2824 }, { "epoch": 0.04007501657384134, "grad_norm": 3.546875, "learning_rate": 4.982964509038463e-05, "loss": 0.9411, "step": 2826 }, { "epoch": 0.040103378227467554, "grad_norm": 3.359375, "learning_rate": 4.982938460687583e-05, "loss": 0.8976, "step": 2828 }, { "epoch": 0.04013173988109377, "grad_norm": 3.296875, "learning_rate": 4.982912392505308e-05, "loss": 0.9631, "step": 2830 }, { "epoch": 0.04016010153471998, "grad_norm": 3.015625, "learning_rate": 4.982886304491847e-05, "loss": 0.9476, "step": 2832 }, { "epoch": 0.04018846318834619, "grad_norm": 2.953125, "learning_rate": 4.982860196647406e-05, "loss": 0.9361, "step": 2834 }, { "epoch": 0.040216824841972414, "grad_norm": 2.859375, "learning_rate": 4.982834068972196e-05, "loss": 0.9101, "step": 2836 }, { "epoch": 0.04024518649559863, "grad_norm": 2.953125, "learning_rate": 4.9828079214664245e-05, "loss": 0.9587, "step": 2838 }, { "epoch": 0.04027354814922484, "grad_norm": 3.28125, "learning_rate": 4.982781754130301e-05, "loss": 0.896, "step": 2840 }, { "epoch": 0.04030190980285105, "grad_norm": 3.71875, "learning_rate": 4.9827555669640335e-05, "loss": 0.9158, "step": 2842 }, { "epoch": 0.04033027145647727, "grad_norm": 3.375, "learning_rate": 4.982729359967832e-05, "loss": 0.9883, "step": 2844 }, { "epoch": 0.04035863311010349, "grad_norm": 3.328125, "learning_rate": 4.9827031331419056e-05, "loss": 0.9299, "step": 2846 }, { "epoch": 0.0403869947637297, "grad_norm": 3.203125, "learning_rate": 4.982676886486463e-05, "loss": 0.9702, "step": 2848 }, { "epoch": 0.04041535641735591, "grad_norm": 2.828125, "learning_rate": 4.982650620001715e-05, "loss": 0.9169, "step": 2850 }, { "epoch": 0.040443718070982126, "grad_norm": 2.75, "learning_rate": 4.982624333687871e-05, "loss": 0.9169, "step": 2852 }, { "epoch": 0.04047207972460835, "grad_norm": 2.90625, "learning_rate": 4.982598027545141e-05, "loss": 0.9319, "step": 2854 }, { "epoch": 0.04050044137823456, "grad_norm": 3.078125, "learning_rate": 4.9825717015737336e-05, "loss": 0.9809, "step": 2856 }, { "epoch": 0.04052880303186077, "grad_norm": 3.015625, "learning_rate": 4.982545355773862e-05, "loss": 0.938, "step": 2858 }, { "epoch": 0.040557164685486986, "grad_norm": 3.890625, "learning_rate": 4.982518990145735e-05, "loss": 0.8894, "step": 2860 }, { "epoch": 0.0405855263391132, "grad_norm": 3.25, "learning_rate": 4.982492604689562e-05, "loss": 0.931, "step": 2862 }, { "epoch": 0.04061388799273942, "grad_norm": 2.953125, "learning_rate": 4.9824661994055564e-05, "loss": 0.9195, "step": 2864 }, { "epoch": 0.04064224964636563, "grad_norm": 3.109375, "learning_rate": 4.982439774293927e-05, "loss": 0.9341, "step": 2866 }, { "epoch": 0.040670611299991846, "grad_norm": 3.484375, "learning_rate": 4.982413329354885e-05, "loss": 0.9446, "step": 2868 }, { "epoch": 0.04069897295361806, "grad_norm": 3.421875, "learning_rate": 4.982386864588643e-05, "loss": 0.9015, "step": 2870 }, { "epoch": 0.04072733460724427, "grad_norm": 3.125, "learning_rate": 4.982360379995411e-05, "loss": 0.9191, "step": 2872 }, { "epoch": 0.04075569626087049, "grad_norm": 2.953125, "learning_rate": 4.982333875575401e-05, "loss": 0.9032, "step": 2874 }, { "epoch": 0.040784057914496706, "grad_norm": 3.265625, "learning_rate": 4.982307351328825e-05, "loss": 0.9839, "step": 2876 }, { "epoch": 0.04081241956812292, "grad_norm": 3.5, "learning_rate": 4.982280807255895e-05, "loss": 0.9874, "step": 2878 }, { "epoch": 0.04084078122174913, "grad_norm": 3.1875, "learning_rate": 4.982254243356823e-05, "loss": 0.9909, "step": 2880 }, { "epoch": 0.040869142875375346, "grad_norm": 3.34375, "learning_rate": 4.9822276596318195e-05, "loss": 0.9119, "step": 2882 }, { "epoch": 0.040897504529001566, "grad_norm": 2.96875, "learning_rate": 4.9822010560810986e-05, "loss": 0.8848, "step": 2884 }, { "epoch": 0.04092586618262778, "grad_norm": 3.03125, "learning_rate": 4.982174432704872e-05, "loss": 0.9456, "step": 2886 }, { "epoch": 0.04095422783625399, "grad_norm": 3.421875, "learning_rate": 4.982147789503353e-05, "loss": 0.9198, "step": 2888 }, { "epoch": 0.040982589489880206, "grad_norm": 3.34375, "learning_rate": 4.982121126476755e-05, "loss": 0.9682, "step": 2890 }, { "epoch": 0.04101095114350642, "grad_norm": 3.5625, "learning_rate": 4.982094443625289e-05, "loss": 1.0083, "step": 2892 }, { "epoch": 0.04103931279713264, "grad_norm": 2.625, "learning_rate": 4.982067740949169e-05, "loss": 0.9441, "step": 2894 }, { "epoch": 0.04106767445075885, "grad_norm": 3.25, "learning_rate": 4.982041018448609e-05, "loss": 0.9469, "step": 2896 }, { "epoch": 0.041096036104385066, "grad_norm": 3.125, "learning_rate": 4.982014276123821e-05, "loss": 0.9347, "step": 2898 }, { "epoch": 0.04112439775801128, "grad_norm": 3.109375, "learning_rate": 4.98198751397502e-05, "loss": 0.9864, "step": 2900 }, { "epoch": 0.0411527594116375, "grad_norm": 2.96875, "learning_rate": 4.981960732002419e-05, "loss": 0.9041, "step": 2902 }, { "epoch": 0.04118112106526371, "grad_norm": 3.015625, "learning_rate": 4.981933930206232e-05, "loss": 0.912, "step": 2904 }, { "epoch": 0.041209482718889925, "grad_norm": 2.984375, "learning_rate": 4.981907108586674e-05, "loss": 0.8821, "step": 2906 }, { "epoch": 0.04123784437251614, "grad_norm": 3.46875, "learning_rate": 4.981880267143957e-05, "loss": 0.9636, "step": 2908 }, { "epoch": 0.04126620602614235, "grad_norm": 2.828125, "learning_rate": 4.981853405878298e-05, "loss": 0.9409, "step": 2910 }, { "epoch": 0.04129456767976857, "grad_norm": 3.0, "learning_rate": 4.98182652478991e-05, "loss": 0.9295, "step": 2912 }, { "epoch": 0.041322929333394785, "grad_norm": 3.0625, "learning_rate": 4.981799623879008e-05, "loss": 0.9139, "step": 2914 }, { "epoch": 0.041351290987021, "grad_norm": 3.390625, "learning_rate": 4.981772703145806e-05, "loss": 0.8897, "step": 2916 }, { "epoch": 0.04137965264064721, "grad_norm": 3.125, "learning_rate": 4.981745762590521e-05, "loss": 0.9803, "step": 2918 }, { "epoch": 0.041408014294273425, "grad_norm": 3.140625, "learning_rate": 4.981718802213366e-05, "loss": 0.9084, "step": 2920 }, { "epoch": 0.041436375947899645, "grad_norm": 2.875, "learning_rate": 4.981691822014558e-05, "loss": 0.8994, "step": 2922 }, { "epoch": 0.04146473760152586, "grad_norm": 2.984375, "learning_rate": 4.981664821994312e-05, "loss": 0.9365, "step": 2924 }, { "epoch": 0.04149309925515207, "grad_norm": 3.90625, "learning_rate": 4.981637802152843e-05, "loss": 0.9405, "step": 2926 }, { "epoch": 0.041521460908778285, "grad_norm": 3.1875, "learning_rate": 4.981610762490368e-05, "loss": 0.9418, "step": 2928 }, { "epoch": 0.0415498225624045, "grad_norm": 3.3125, "learning_rate": 4.9815837030071024e-05, "loss": 0.9672, "step": 2930 }, { "epoch": 0.04157818421603072, "grad_norm": 3.234375, "learning_rate": 4.981556623703262e-05, "loss": 0.9216, "step": 2932 }, { "epoch": 0.04160654586965693, "grad_norm": 3.46875, "learning_rate": 4.981529524579063e-05, "loss": 0.9303, "step": 2934 }, { "epoch": 0.041634907523283145, "grad_norm": 3.34375, "learning_rate": 4.981502405634722e-05, "loss": 0.9041, "step": 2936 }, { "epoch": 0.04166326917690936, "grad_norm": 3.109375, "learning_rate": 4.9814752668704565e-05, "loss": 0.9281, "step": 2938 }, { "epoch": 0.04169163083053557, "grad_norm": 3.015625, "learning_rate": 4.981448108286482e-05, "loss": 0.9555, "step": 2940 }, { "epoch": 0.04171999248416179, "grad_norm": 3.1875, "learning_rate": 4.981420929883016e-05, "loss": 0.9433, "step": 2942 }, { "epoch": 0.041748354137788005, "grad_norm": 2.859375, "learning_rate": 4.981393731660276e-05, "loss": 0.9179, "step": 2944 }, { "epoch": 0.04177671579141422, "grad_norm": 3.609375, "learning_rate": 4.981366513618478e-05, "loss": 0.9292, "step": 2946 }, { "epoch": 0.04180507744504043, "grad_norm": 3.1875, "learning_rate": 4.9813392757578405e-05, "loss": 0.9108, "step": 2948 }, { "epoch": 0.041833439098666644, "grad_norm": 3.21875, "learning_rate": 4.9813120180785814e-05, "loss": 1.0379, "step": 2950 }, { "epoch": 0.041861800752292865, "grad_norm": 3.234375, "learning_rate": 4.981284740580916e-05, "loss": 0.9573, "step": 2952 }, { "epoch": 0.04189016240591908, "grad_norm": 3.09375, "learning_rate": 4.9812574432650654e-05, "loss": 0.9472, "step": 2954 }, { "epoch": 0.04191852405954529, "grad_norm": 3.328125, "learning_rate": 4.9812301261312456e-05, "loss": 0.9576, "step": 2956 }, { "epoch": 0.041946885713171504, "grad_norm": 3.078125, "learning_rate": 4.981202789179675e-05, "loss": 0.8965, "step": 2958 }, { "epoch": 0.041975247366797724, "grad_norm": 2.84375, "learning_rate": 4.981175432410573e-05, "loss": 0.8836, "step": 2960 }, { "epoch": 0.04200360902042394, "grad_norm": 3.375, "learning_rate": 4.981148055824157e-05, "loss": 0.8897, "step": 2962 }, { "epoch": 0.04203197067405015, "grad_norm": 3.375, "learning_rate": 4.981120659420646e-05, "loss": 0.9476, "step": 2964 }, { "epoch": 0.042060332327676364, "grad_norm": 3.421875, "learning_rate": 4.981093243200258e-05, "loss": 0.9885, "step": 2966 }, { "epoch": 0.04208869398130258, "grad_norm": 3.015625, "learning_rate": 4.981065807163214e-05, "loss": 0.9165, "step": 2968 }, { "epoch": 0.0421170556349288, "grad_norm": 2.984375, "learning_rate": 4.9810383513097316e-05, "loss": 0.9329, "step": 2970 }, { "epoch": 0.04214541728855501, "grad_norm": 3.28125, "learning_rate": 4.9810108756400294e-05, "loss": 0.9247, "step": 2972 }, { "epoch": 0.042173778942181224, "grad_norm": 2.921875, "learning_rate": 4.980983380154329e-05, "loss": 0.8931, "step": 2974 }, { "epoch": 0.04220214059580744, "grad_norm": 3.25, "learning_rate": 4.980955864852848e-05, "loss": 0.9464, "step": 2976 }, { "epoch": 0.04223050224943365, "grad_norm": 3.15625, "learning_rate": 4.9809283297358076e-05, "loss": 0.9112, "step": 2978 }, { "epoch": 0.04225886390305987, "grad_norm": 3.1875, "learning_rate": 4.980900774803427e-05, "loss": 0.9306, "step": 2980 }, { "epoch": 0.042287225556686084, "grad_norm": 3.90625, "learning_rate": 4.980873200055927e-05, "loss": 0.9246, "step": 2982 }, { "epoch": 0.0423155872103123, "grad_norm": 3.078125, "learning_rate": 4.980845605493526e-05, "loss": 0.9789, "step": 2984 }, { "epoch": 0.04234394886393851, "grad_norm": 3.21875, "learning_rate": 4.9808179911164466e-05, "loss": 0.9263, "step": 2986 }, { "epoch": 0.042372310517564724, "grad_norm": 3.53125, "learning_rate": 4.9807903569249085e-05, "loss": 0.9466, "step": 2988 }, { "epoch": 0.042400672171190944, "grad_norm": 3.296875, "learning_rate": 4.980762702919132e-05, "loss": 0.9118, "step": 2990 }, { "epoch": 0.04242903382481716, "grad_norm": 2.921875, "learning_rate": 4.980735029099338e-05, "loss": 0.9017, "step": 2992 }, { "epoch": 0.04245739547844337, "grad_norm": 3.328125, "learning_rate": 4.9807073354657485e-05, "loss": 0.9818, "step": 2994 }, { "epoch": 0.04248575713206958, "grad_norm": 3.828125, "learning_rate": 4.9806796220185836e-05, "loss": 0.9899, "step": 2996 }, { "epoch": 0.0425141187856958, "grad_norm": 2.953125, "learning_rate": 4.980651888758065e-05, "loss": 0.967, "step": 2998 }, { "epoch": 0.04254248043932202, "grad_norm": 3.09375, "learning_rate": 4.9806241356844155e-05, "loss": 0.9032, "step": 3000 }, { "epoch": 0.04257084209294823, "grad_norm": 2.703125, "learning_rate": 4.980596362797855e-05, "loss": 0.9328, "step": 3002 }, { "epoch": 0.04259920374657444, "grad_norm": 3.0625, "learning_rate": 4.980568570098605e-05, "loss": 0.9167, "step": 3004 }, { "epoch": 0.04262756540020066, "grad_norm": 3.03125, "learning_rate": 4.980540757586889e-05, "loss": 0.9344, "step": 3006 }, { "epoch": 0.04265592705382688, "grad_norm": 3.25, "learning_rate": 4.980512925262928e-05, "loss": 0.9981, "step": 3008 }, { "epoch": 0.04268428870745309, "grad_norm": 3.515625, "learning_rate": 4.980485073126946e-05, "loss": 0.9373, "step": 3010 }, { "epoch": 0.0427126503610793, "grad_norm": 3.046875, "learning_rate": 4.980457201179163e-05, "loss": 0.9236, "step": 3012 }, { "epoch": 0.042741012014705516, "grad_norm": 2.9375, "learning_rate": 4.9804293094198034e-05, "loss": 0.8755, "step": 3014 }, { "epoch": 0.04276937366833173, "grad_norm": 3.0, "learning_rate": 4.980401397849089e-05, "loss": 0.9276, "step": 3016 }, { "epoch": 0.04279773532195795, "grad_norm": 3.15625, "learning_rate": 4.9803734664672444e-05, "loss": 0.9319, "step": 3018 }, { "epoch": 0.04282609697558416, "grad_norm": 3.4375, "learning_rate": 4.9803455152744905e-05, "loss": 0.9213, "step": 3020 }, { "epoch": 0.042854458629210376, "grad_norm": 3.5, "learning_rate": 4.980317544271052e-05, "loss": 0.961, "step": 3022 }, { "epoch": 0.04288282028283659, "grad_norm": 3.328125, "learning_rate": 4.9802895534571516e-05, "loss": 0.9421, "step": 3024 }, { "epoch": 0.0429111819364628, "grad_norm": 3.34375, "learning_rate": 4.980261542833013e-05, "loss": 0.963, "step": 3026 }, { "epoch": 0.04293954359008902, "grad_norm": 2.703125, "learning_rate": 4.98023351239886e-05, "loss": 0.9187, "step": 3028 }, { "epoch": 0.042967905243715236, "grad_norm": 2.984375, "learning_rate": 4.980205462154917e-05, "loss": 0.9213, "step": 3030 }, { "epoch": 0.04299626689734145, "grad_norm": 3.125, "learning_rate": 4.980177392101407e-05, "loss": 0.915, "step": 3032 }, { "epoch": 0.04302462855096766, "grad_norm": 3.140625, "learning_rate": 4.980149302238555e-05, "loss": 0.9377, "step": 3034 }, { "epoch": 0.043052990204593876, "grad_norm": 2.765625, "learning_rate": 4.980121192566585e-05, "loss": 0.945, "step": 3036 }, { "epoch": 0.043081351858220096, "grad_norm": 3.03125, "learning_rate": 4.980093063085722e-05, "loss": 0.9242, "step": 3038 }, { "epoch": 0.04310971351184631, "grad_norm": 2.828125, "learning_rate": 4.9800649137961894e-05, "loss": 0.9368, "step": 3040 }, { "epoch": 0.04313807516547252, "grad_norm": 2.984375, "learning_rate": 4.980036744698214e-05, "loss": 0.9094, "step": 3042 }, { "epoch": 0.043166436819098736, "grad_norm": 3.5625, "learning_rate": 4.980008555792019e-05, "loss": 0.9681, "step": 3044 }, { "epoch": 0.04319479847272495, "grad_norm": 2.84375, "learning_rate": 4.979980347077831e-05, "loss": 0.8958, "step": 3046 }, { "epoch": 0.04322316012635117, "grad_norm": 3.109375, "learning_rate": 4.979952118555873e-05, "loss": 0.9428, "step": 3048 }, { "epoch": 0.04325152177997738, "grad_norm": 3.3125, "learning_rate": 4.979923870226373e-05, "loss": 0.8998, "step": 3050 }, { "epoch": 0.043279883433603596, "grad_norm": 3.046875, "learning_rate": 4.979895602089556e-05, "loss": 0.9338, "step": 3052 }, { "epoch": 0.04330824508722981, "grad_norm": 2.90625, "learning_rate": 4.979867314145646e-05, "loss": 0.908, "step": 3054 }, { "epoch": 0.04333660674085603, "grad_norm": 2.953125, "learning_rate": 4.979839006394872e-05, "loss": 0.9123, "step": 3056 }, { "epoch": 0.04336496839448224, "grad_norm": 3.015625, "learning_rate": 4.979810678837458e-05, "loss": 0.9776, "step": 3058 }, { "epoch": 0.043393330048108456, "grad_norm": 3.046875, "learning_rate": 4.9797823314736304e-05, "loss": 0.8977, "step": 3060 }, { "epoch": 0.04342169170173467, "grad_norm": 2.96875, "learning_rate": 4.979753964303616e-05, "loss": 1.0023, "step": 3062 }, { "epoch": 0.04345005335536088, "grad_norm": 3.078125, "learning_rate": 4.979725577327641e-05, "loss": 0.9269, "step": 3064 }, { "epoch": 0.0434784150089871, "grad_norm": 2.921875, "learning_rate": 4.979697170545933e-05, "loss": 0.9094, "step": 3066 }, { "epoch": 0.043506776662613315, "grad_norm": 3.28125, "learning_rate": 4.9796687439587184e-05, "loss": 0.97, "step": 3068 }, { "epoch": 0.04353513831623953, "grad_norm": 2.984375, "learning_rate": 4.9796402975662234e-05, "loss": 0.9724, "step": 3070 }, { "epoch": 0.04356349996986574, "grad_norm": 3.078125, "learning_rate": 4.9796118313686765e-05, "loss": 0.917, "step": 3072 }, { "epoch": 0.043591861623491955, "grad_norm": 3.421875, "learning_rate": 4.9795833453663046e-05, "loss": 0.9411, "step": 3074 }, { "epoch": 0.043620223277118175, "grad_norm": 3.421875, "learning_rate": 4.979554839559335e-05, "loss": 0.8897, "step": 3076 }, { "epoch": 0.04364858493074439, "grad_norm": 3.140625, "learning_rate": 4.9795263139479946e-05, "loss": 0.9741, "step": 3078 }, { "epoch": 0.0436769465843706, "grad_norm": 3.21875, "learning_rate": 4.9794977685325126e-05, "loss": 0.9598, "step": 3080 }, { "epoch": 0.043705308237996815, "grad_norm": 3.15625, "learning_rate": 4.9794692033131176e-05, "loss": 0.8977, "step": 3082 }, { "epoch": 0.04373366989162303, "grad_norm": 3.09375, "learning_rate": 4.9794406182900355e-05, "loss": 0.8904, "step": 3084 }, { "epoch": 0.04376203154524925, "grad_norm": 2.734375, "learning_rate": 4.9794120134634965e-05, "loss": 0.9249, "step": 3086 }, { "epoch": 0.04379039319887546, "grad_norm": 3.59375, "learning_rate": 4.9793833888337284e-05, "loss": 0.9548, "step": 3088 }, { "epoch": 0.043818754852501675, "grad_norm": 3.078125, "learning_rate": 4.9793547444009594e-05, "loss": 0.9262, "step": 3090 }, { "epoch": 0.04384711650612789, "grad_norm": 3.03125, "learning_rate": 4.979326080165419e-05, "loss": 0.9289, "step": 3092 }, { "epoch": 0.0438754781597541, "grad_norm": 2.9375, "learning_rate": 4.979297396127335e-05, "loss": 0.9124, "step": 3094 }, { "epoch": 0.04390383981338032, "grad_norm": 3.328125, "learning_rate": 4.979268692286938e-05, "loss": 0.905, "step": 3096 }, { "epoch": 0.043932201467006535, "grad_norm": 3.453125, "learning_rate": 4.9792399686444566e-05, "loss": 0.949, "step": 3098 }, { "epoch": 0.04396056312063275, "grad_norm": 3.390625, "learning_rate": 4.97921122520012e-05, "loss": 0.9305, "step": 3100 }, { "epoch": 0.04398892477425896, "grad_norm": 2.984375, "learning_rate": 4.979182461954158e-05, "loss": 0.9357, "step": 3102 }, { "epoch": 0.044017286427885174, "grad_norm": 2.953125, "learning_rate": 4.979153678906801e-05, "loss": 0.9737, "step": 3104 }, { "epoch": 0.044045648081511395, "grad_norm": 3.09375, "learning_rate": 4.9791248760582776e-05, "loss": 0.9552, "step": 3106 }, { "epoch": 0.04407400973513761, "grad_norm": 3.234375, "learning_rate": 4.9790960534088184e-05, "loss": 0.8867, "step": 3108 }, { "epoch": 0.04410237138876382, "grad_norm": 3.0625, "learning_rate": 4.9790672109586535e-05, "loss": 0.9771, "step": 3110 }, { "epoch": 0.044130733042390034, "grad_norm": 3.40625, "learning_rate": 4.979038348708014e-05, "loss": 0.961, "step": 3112 }, { "epoch": 0.044159094696016254, "grad_norm": 3.0, "learning_rate": 4.97900946665713e-05, "loss": 0.9418, "step": 3114 }, { "epoch": 0.04418745634964247, "grad_norm": 4.03125, "learning_rate": 4.978980564806231e-05, "loss": 0.9424, "step": 3116 }, { "epoch": 0.04421581800326868, "grad_norm": 3.078125, "learning_rate": 4.978951643155549e-05, "loss": 0.9673, "step": 3118 }, { "epoch": 0.044244179656894894, "grad_norm": 3.078125, "learning_rate": 4.978922701705316e-05, "loss": 0.9116, "step": 3120 }, { "epoch": 0.04427254131052111, "grad_norm": 3.15625, "learning_rate": 4.9788937404557615e-05, "loss": 0.9493, "step": 3122 }, { "epoch": 0.04430090296414733, "grad_norm": 2.96875, "learning_rate": 4.978864759407117e-05, "loss": 0.9975, "step": 3124 }, { "epoch": 0.04432926461777354, "grad_norm": 2.921875, "learning_rate": 4.978835758559615e-05, "loss": 0.9271, "step": 3126 }, { "epoch": 0.044357626271399754, "grad_norm": 3.171875, "learning_rate": 4.978806737913485e-05, "loss": 0.9636, "step": 3128 }, { "epoch": 0.04438598792502597, "grad_norm": 3.09375, "learning_rate": 4.9787776974689614e-05, "loss": 0.907, "step": 3130 }, { "epoch": 0.04441434957865218, "grad_norm": 3.140625, "learning_rate": 4.9787486372262746e-05, "loss": 0.9014, "step": 3132 }, { "epoch": 0.0444427112322784, "grad_norm": 2.859375, "learning_rate": 4.9787195571856584e-05, "loss": 0.9399, "step": 3134 }, { "epoch": 0.044471072885904614, "grad_norm": 3.28125, "learning_rate": 4.9786904573473425e-05, "loss": 0.907, "step": 3136 }, { "epoch": 0.04449943453953083, "grad_norm": 3.09375, "learning_rate": 4.9786613377115606e-05, "loss": 0.909, "step": 3138 }, { "epoch": 0.04452779619315704, "grad_norm": 3.359375, "learning_rate": 4.978632198278545e-05, "loss": 0.9499, "step": 3140 }, { "epoch": 0.044556157846783254, "grad_norm": 3.078125, "learning_rate": 4.9786030390485295e-05, "loss": 0.9809, "step": 3142 }, { "epoch": 0.044584519500409474, "grad_norm": 3.421875, "learning_rate": 4.978573860021746e-05, "loss": 0.9246, "step": 3144 }, { "epoch": 0.04461288115403569, "grad_norm": 3.140625, "learning_rate": 4.978544661198428e-05, "loss": 0.9493, "step": 3146 }, { "epoch": 0.0446412428076619, "grad_norm": 2.828125, "learning_rate": 4.9785154425788074e-05, "loss": 0.849, "step": 3148 }, { "epoch": 0.044669604461288114, "grad_norm": 3.09375, "learning_rate": 4.9784862041631196e-05, "loss": 0.893, "step": 3150 }, { "epoch": 0.04469796611491433, "grad_norm": 3.15625, "learning_rate": 4.978456945951597e-05, "loss": 0.9135, "step": 3152 }, { "epoch": 0.04472632776854055, "grad_norm": 3.421875, "learning_rate": 4.978427667944473e-05, "loss": 0.9909, "step": 3154 }, { "epoch": 0.04475468942216676, "grad_norm": 2.859375, "learning_rate": 4.978398370141982e-05, "loss": 0.9353, "step": 3156 }, { "epoch": 0.04478305107579297, "grad_norm": 2.96875, "learning_rate": 4.978369052544358e-05, "loss": 0.9553, "step": 3158 }, { "epoch": 0.04481141272941919, "grad_norm": 2.828125, "learning_rate": 4.9783397151518365e-05, "loss": 0.9367, "step": 3160 }, { "epoch": 0.04483977438304541, "grad_norm": 3.4375, "learning_rate": 4.978310357964648e-05, "loss": 0.93, "step": 3162 }, { "epoch": 0.04486813603667162, "grad_norm": 3.859375, "learning_rate": 4.978280980983031e-05, "loss": 0.9286, "step": 3164 }, { "epoch": 0.04489649769029783, "grad_norm": 3.875, "learning_rate": 4.978251584207218e-05, "loss": 0.919, "step": 3166 }, { "epoch": 0.044924859343924047, "grad_norm": 3.140625, "learning_rate": 4.978222167637444e-05, "loss": 0.9463, "step": 3168 }, { "epoch": 0.04495322099755026, "grad_norm": 3.140625, "learning_rate": 4.978192731273945e-05, "loss": 0.9604, "step": 3170 }, { "epoch": 0.04498158265117648, "grad_norm": 3.96875, "learning_rate": 4.978163275116955e-05, "loss": 1.0044, "step": 3172 }, { "epoch": 0.04500994430480269, "grad_norm": 3.25, "learning_rate": 4.9781337991667094e-05, "loss": 0.9188, "step": 3174 }, { "epoch": 0.045038305958428906, "grad_norm": 3.25, "learning_rate": 4.9781043034234443e-05, "loss": 0.9387, "step": 3176 }, { "epoch": 0.04506666761205512, "grad_norm": 3.4375, "learning_rate": 4.9780747878873944e-05, "loss": 0.8993, "step": 3178 }, { "epoch": 0.04509502926568133, "grad_norm": 2.765625, "learning_rate": 4.978045252558796e-05, "loss": 0.9147, "step": 3180 }, { "epoch": 0.04512339091930755, "grad_norm": 3.09375, "learning_rate": 4.978015697437884e-05, "loss": 0.9594, "step": 3182 }, { "epoch": 0.045151752572933766, "grad_norm": 3.53125, "learning_rate": 4.977986122524897e-05, "loss": 0.9976, "step": 3184 }, { "epoch": 0.04518011422655998, "grad_norm": 2.890625, "learning_rate": 4.977956527820069e-05, "loss": 0.913, "step": 3186 }, { "epoch": 0.04520847588018619, "grad_norm": 3.25, "learning_rate": 4.977926913323636e-05, "loss": 0.9059, "step": 3188 }, { "epoch": 0.045236837533812406, "grad_norm": 2.875, "learning_rate": 4.977897279035837e-05, "loss": 0.8823, "step": 3190 }, { "epoch": 0.045265199187438626, "grad_norm": 3.296875, "learning_rate": 4.9778676249569055e-05, "loss": 0.9317, "step": 3192 }, { "epoch": 0.04529356084106484, "grad_norm": 3.140625, "learning_rate": 4.977837951087081e-05, "loss": 0.9011, "step": 3194 }, { "epoch": 0.04532192249469105, "grad_norm": 3.328125, "learning_rate": 4.977808257426599e-05, "loss": 0.9612, "step": 3196 }, { "epoch": 0.045350284148317266, "grad_norm": 2.859375, "learning_rate": 4.977778543975698e-05, "loss": 0.9206, "step": 3198 }, { "epoch": 0.04537864580194348, "grad_norm": 3.5, "learning_rate": 4.9777488107346135e-05, "loss": 0.9314, "step": 3200 }, { "epoch": 0.0454070074555697, "grad_norm": 2.765625, "learning_rate": 4.977719057703585e-05, "loss": 0.9239, "step": 3202 }, { "epoch": 0.04543536910919591, "grad_norm": 3.25, "learning_rate": 4.977689284882849e-05, "loss": 0.931, "step": 3204 }, { "epoch": 0.045463730762822126, "grad_norm": 3.078125, "learning_rate": 4.9776594922726424e-05, "loss": 0.9285, "step": 3206 }, { "epoch": 0.04549209241644834, "grad_norm": 2.96875, "learning_rate": 4.9776296798732054e-05, "loss": 0.9376, "step": 3208 }, { "epoch": 0.04552045407007456, "grad_norm": 3.5625, "learning_rate": 4.977599847684774e-05, "loss": 0.9655, "step": 3210 }, { "epoch": 0.04554881572370077, "grad_norm": 3.296875, "learning_rate": 4.977569995707588e-05, "loss": 0.9456, "step": 3212 }, { "epoch": 0.045577177377326986, "grad_norm": 3.125, "learning_rate": 4.9775401239418844e-05, "loss": 0.8899, "step": 3214 }, { "epoch": 0.0456055390309532, "grad_norm": 3.125, "learning_rate": 4.977510232387903e-05, "loss": 0.9059, "step": 3216 }, { "epoch": 0.04563390068457941, "grad_norm": 3.484375, "learning_rate": 4.977480321045882e-05, "loss": 0.9738, "step": 3218 }, { "epoch": 0.04566226233820563, "grad_norm": 3.296875, "learning_rate": 4.9774503899160606e-05, "loss": 0.9015, "step": 3220 }, { "epoch": 0.045690623991831845, "grad_norm": 2.71875, "learning_rate": 4.9774204389986776e-05, "loss": 0.9299, "step": 3222 }, { "epoch": 0.04571898564545806, "grad_norm": 3.03125, "learning_rate": 4.977390468293973e-05, "loss": 0.9277, "step": 3224 }, { "epoch": 0.04574734729908427, "grad_norm": 3.28125, "learning_rate": 4.9773604778021844e-05, "loss": 0.9769, "step": 3226 }, { "epoch": 0.045775708952710485, "grad_norm": 3.25, "learning_rate": 4.9773304675235534e-05, "loss": 0.891, "step": 3228 }, { "epoch": 0.045804070606336705, "grad_norm": 3.234375, "learning_rate": 4.9773004374583174e-05, "loss": 0.9185, "step": 3230 }, { "epoch": 0.04583243225996292, "grad_norm": 3.1875, "learning_rate": 4.977270387606719e-05, "loss": 0.9706, "step": 3232 }, { "epoch": 0.04586079391358913, "grad_norm": 3.125, "learning_rate": 4.977240317968996e-05, "loss": 0.9086, "step": 3234 }, { "epoch": 0.045889155567215345, "grad_norm": 3.65625, "learning_rate": 4.9772102285453894e-05, "loss": 0.9956, "step": 3236 }, { "epoch": 0.04591751722084156, "grad_norm": 2.890625, "learning_rate": 4.97718011933614e-05, "loss": 0.9056, "step": 3238 }, { "epoch": 0.04594587887446778, "grad_norm": 3.1875, "learning_rate": 4.977149990341487e-05, "loss": 0.8912, "step": 3240 }, { "epoch": 0.04597424052809399, "grad_norm": 2.96875, "learning_rate": 4.977119841561672e-05, "loss": 0.9426, "step": 3242 }, { "epoch": 0.046002602181720205, "grad_norm": 2.75, "learning_rate": 4.9770896729969366e-05, "loss": 0.9448, "step": 3244 }, { "epoch": 0.04603096383534642, "grad_norm": 3.109375, "learning_rate": 4.977059484647519e-05, "loss": 0.9121, "step": 3246 }, { "epoch": 0.04605932548897263, "grad_norm": 3.09375, "learning_rate": 4.977029276513663e-05, "loss": 0.9288, "step": 3248 }, { "epoch": 0.04608768714259885, "grad_norm": 3.03125, "learning_rate": 4.976999048595609e-05, "loss": 0.9436, "step": 3250 }, { "epoch": 0.046116048796225065, "grad_norm": 3.21875, "learning_rate": 4.976968800893598e-05, "loss": 0.9197, "step": 3252 }, { "epoch": 0.04614441044985128, "grad_norm": 3.0625, "learning_rate": 4.976938533407873e-05, "loss": 0.9303, "step": 3254 }, { "epoch": 0.04617277210347749, "grad_norm": 3.0, "learning_rate": 4.9769082461386736e-05, "loss": 0.9191, "step": 3256 }, { "epoch": 0.046201133757103705, "grad_norm": 3.21875, "learning_rate": 4.976877939086243e-05, "loss": 0.976, "step": 3258 }, { "epoch": 0.046229495410729925, "grad_norm": 3.046875, "learning_rate": 4.9768476122508236e-05, "loss": 0.9114, "step": 3260 }, { "epoch": 0.04625785706435614, "grad_norm": 3.265625, "learning_rate": 4.976817265632657e-05, "loss": 0.907, "step": 3262 }, { "epoch": 0.04628621871798235, "grad_norm": 2.875, "learning_rate": 4.9767868992319854e-05, "loss": 0.9185, "step": 3264 }, { "epoch": 0.046314580371608564, "grad_norm": 3.015625, "learning_rate": 4.976756513049051e-05, "loss": 0.8994, "step": 3266 }, { "epoch": 0.046342942025234785, "grad_norm": 3.109375, "learning_rate": 4.9767261070840984e-05, "loss": 0.9589, "step": 3268 }, { "epoch": 0.046371303678861, "grad_norm": 3.09375, "learning_rate": 4.976695681337369e-05, "loss": 0.8996, "step": 3270 }, { "epoch": 0.04639966533248721, "grad_norm": 3.09375, "learning_rate": 4.9766652358091054e-05, "loss": 0.9569, "step": 3272 }, { "epoch": 0.046428026986113424, "grad_norm": 3.1875, "learning_rate": 4.976634770499552e-05, "loss": 0.9386, "step": 3274 }, { "epoch": 0.04645638863973964, "grad_norm": 3.0625, "learning_rate": 4.97660428540895e-05, "loss": 0.926, "step": 3276 }, { "epoch": 0.04648475029336586, "grad_norm": 3.09375, "learning_rate": 4.9765737805375465e-05, "loss": 0.9133, "step": 3278 }, { "epoch": 0.04651311194699207, "grad_norm": 3.4375, "learning_rate": 4.9765432558855815e-05, "loss": 0.9125, "step": 3280 }, { "epoch": 0.046541473600618284, "grad_norm": 3.046875, "learning_rate": 4.976512711453301e-05, "loss": 0.9468, "step": 3282 }, { "epoch": 0.0465698352542445, "grad_norm": 2.90625, "learning_rate": 4.9764821472409484e-05, "loss": 0.8747, "step": 3284 }, { "epoch": 0.04659819690787071, "grad_norm": 3.140625, "learning_rate": 4.9764515632487676e-05, "loss": 0.9263, "step": 3286 }, { "epoch": 0.04662655856149693, "grad_norm": 3.09375, "learning_rate": 4.976420959477002e-05, "loss": 0.9107, "step": 3288 }, { "epoch": 0.046654920215123144, "grad_norm": 3.09375, "learning_rate": 4.976390335925898e-05, "loss": 0.961, "step": 3290 }, { "epoch": 0.04668328186874936, "grad_norm": 2.984375, "learning_rate": 4.976359692595699e-05, "loss": 0.8902, "step": 3292 }, { "epoch": 0.04671164352237557, "grad_norm": 3.375, "learning_rate": 4.9763290294866505e-05, "loss": 0.9315, "step": 3294 }, { "epoch": 0.046740005176001784, "grad_norm": 3.34375, "learning_rate": 4.976298346598996e-05, "loss": 0.9785, "step": 3296 }, { "epoch": 0.046768366829628004, "grad_norm": 3.046875, "learning_rate": 4.976267643932982e-05, "loss": 0.9046, "step": 3298 }, { "epoch": 0.04679672848325422, "grad_norm": 3.296875, "learning_rate": 4.976236921488853e-05, "loss": 0.967, "step": 3300 }, { "epoch": 0.04682509013688043, "grad_norm": 3.375, "learning_rate": 4.9762061792668546e-05, "loss": 0.8963, "step": 3302 }, { "epoch": 0.046853451790506644, "grad_norm": 2.921875, "learning_rate": 4.976175417267232e-05, "loss": 0.9218, "step": 3304 }, { "epoch": 0.04688181344413286, "grad_norm": 3.078125, "learning_rate": 4.976144635490231e-05, "loss": 0.96, "step": 3306 }, { "epoch": 0.04691017509775908, "grad_norm": 2.890625, "learning_rate": 4.976113833936098e-05, "loss": 0.9446, "step": 3308 }, { "epoch": 0.04693853675138529, "grad_norm": 3.078125, "learning_rate": 4.9760830126050796e-05, "loss": 0.892, "step": 3310 }, { "epoch": 0.046966898405011503, "grad_norm": 3.046875, "learning_rate": 4.97605217149742e-05, "loss": 0.9317, "step": 3312 }, { "epoch": 0.04699526005863772, "grad_norm": 3.21875, "learning_rate": 4.976021310613366e-05, "loss": 0.918, "step": 3314 }, { "epoch": 0.04702362171226394, "grad_norm": 3.046875, "learning_rate": 4.9759904299531655e-05, "loss": 0.889, "step": 3316 }, { "epoch": 0.04705198336589015, "grad_norm": 3.40625, "learning_rate": 4.975959529517064e-05, "loss": 0.9074, "step": 3318 }, { "epoch": 0.04708034501951636, "grad_norm": 2.984375, "learning_rate": 4.9759286093053086e-05, "loss": 0.91, "step": 3320 }, { "epoch": 0.04710870667314258, "grad_norm": 2.96875, "learning_rate": 4.9758976693181464e-05, "loss": 0.9077, "step": 3322 }, { "epoch": 0.04713706832676879, "grad_norm": 3.140625, "learning_rate": 4.9758667095558244e-05, "loss": 0.9751, "step": 3324 }, { "epoch": 0.04716542998039501, "grad_norm": 3.15625, "learning_rate": 4.97583573001859e-05, "loss": 0.9422, "step": 3326 }, { "epoch": 0.04719379163402122, "grad_norm": 3.09375, "learning_rate": 4.97580473070669e-05, "loss": 0.9031, "step": 3328 }, { "epoch": 0.047222153287647436, "grad_norm": 2.890625, "learning_rate": 4.9757737116203726e-05, "loss": 0.9305, "step": 3330 }, { "epoch": 0.04725051494127365, "grad_norm": 3.390625, "learning_rate": 4.975742672759885e-05, "loss": 0.9085, "step": 3332 }, { "epoch": 0.04727887659489986, "grad_norm": 3.0, "learning_rate": 4.975711614125477e-05, "loss": 0.9191, "step": 3334 }, { "epoch": 0.04730723824852608, "grad_norm": 3.015625, "learning_rate": 4.9756805357173944e-05, "loss": 0.8889, "step": 3336 }, { "epoch": 0.047335599902152296, "grad_norm": 3.109375, "learning_rate": 4.975649437535885e-05, "loss": 0.9321, "step": 3338 }, { "epoch": 0.04736396155577851, "grad_norm": 2.96875, "learning_rate": 4.9756183195812e-05, "loss": 0.884, "step": 3340 }, { "epoch": 0.04739232320940472, "grad_norm": 3.078125, "learning_rate": 4.975587181853586e-05, "loss": 0.9119, "step": 3342 }, { "epoch": 0.047420684863030936, "grad_norm": 3.390625, "learning_rate": 4.9755560243532915e-05, "loss": 0.9104, "step": 3344 }, { "epoch": 0.047449046516657156, "grad_norm": 2.765625, "learning_rate": 4.9755248470805665e-05, "loss": 0.9129, "step": 3346 }, { "epoch": 0.04747740817028337, "grad_norm": 2.734375, "learning_rate": 4.97549365003566e-05, "loss": 0.9714, "step": 3348 }, { "epoch": 0.04750576982390958, "grad_norm": 3.390625, "learning_rate": 4.9754624332188194e-05, "loss": 0.905, "step": 3350 }, { "epoch": 0.047534131477535796, "grad_norm": 3.0, "learning_rate": 4.975431196630296e-05, "loss": 0.8572, "step": 3352 }, { "epoch": 0.04756249313116201, "grad_norm": 3.046875, "learning_rate": 4.975399940270338e-05, "loss": 0.9366, "step": 3354 }, { "epoch": 0.04759085478478823, "grad_norm": 3.171875, "learning_rate": 4.9753686641391955e-05, "loss": 0.9513, "step": 3356 }, { "epoch": 0.04761921643841444, "grad_norm": 2.921875, "learning_rate": 4.975337368237118e-05, "loss": 0.9167, "step": 3358 }, { "epoch": 0.047647578092040656, "grad_norm": 2.828125, "learning_rate": 4.975306052564357e-05, "loss": 0.8736, "step": 3360 }, { "epoch": 0.04767593974566687, "grad_norm": 3.203125, "learning_rate": 4.9752747171211614e-05, "loss": 0.9614, "step": 3362 }, { "epoch": 0.04770430139929309, "grad_norm": 2.625, "learning_rate": 4.975243361907781e-05, "loss": 0.9364, "step": 3364 }, { "epoch": 0.0477326630529193, "grad_norm": 2.765625, "learning_rate": 4.9752119869244665e-05, "loss": 0.8855, "step": 3366 }, { "epoch": 0.047761024706545516, "grad_norm": 3.21875, "learning_rate": 4.975180592171469e-05, "loss": 0.9412, "step": 3368 }, { "epoch": 0.04778938636017173, "grad_norm": 3.96875, "learning_rate": 4.975149177649039e-05, "loss": 0.9166, "step": 3370 }, { "epoch": 0.04781774801379794, "grad_norm": 2.984375, "learning_rate": 4.975117743357428e-05, "loss": 0.9002, "step": 3372 }, { "epoch": 0.04784610966742416, "grad_norm": 3.203125, "learning_rate": 4.975086289296886e-05, "loss": 0.9112, "step": 3374 }, { "epoch": 0.047874471321050376, "grad_norm": 3.21875, "learning_rate": 4.975054815467665e-05, "loss": 0.9567, "step": 3376 }, { "epoch": 0.04790283297467659, "grad_norm": 3.3125, "learning_rate": 4.975023321870015e-05, "loss": 0.9662, "step": 3378 }, { "epoch": 0.0479311946283028, "grad_norm": 2.84375, "learning_rate": 4.97499180850419e-05, "loss": 0.9169, "step": 3380 }, { "epoch": 0.047959556281929015, "grad_norm": 3.28125, "learning_rate": 4.974960275370439e-05, "loss": 0.9122, "step": 3382 }, { "epoch": 0.047987917935555235, "grad_norm": 3.359375, "learning_rate": 4.9749287224690166e-05, "loss": 0.9505, "step": 3384 }, { "epoch": 0.04801627958918145, "grad_norm": 3.328125, "learning_rate": 4.974897149800173e-05, "loss": 0.9609, "step": 3386 }, { "epoch": 0.04804464124280766, "grad_norm": 2.875, "learning_rate": 4.974865557364161e-05, "loss": 0.909, "step": 3388 }, { "epoch": 0.048073002896433875, "grad_norm": 2.734375, "learning_rate": 4.974833945161232e-05, "loss": 0.875, "step": 3390 }, { "epoch": 0.04810136455006009, "grad_norm": 3.09375, "learning_rate": 4.97480231319164e-05, "loss": 0.9045, "step": 3392 }, { "epoch": 0.04812972620368631, "grad_norm": 3.40625, "learning_rate": 4.974770661455636e-05, "loss": 0.9215, "step": 3394 }, { "epoch": 0.04815808785731252, "grad_norm": 3.75, "learning_rate": 4.974738989953474e-05, "loss": 0.9425, "step": 3396 }, { "epoch": 0.048186449510938735, "grad_norm": 2.8125, "learning_rate": 4.974707298685407e-05, "loss": 0.9353, "step": 3398 }, { "epoch": 0.04821481116456495, "grad_norm": 3.109375, "learning_rate": 4.974675587651687e-05, "loss": 0.9566, "step": 3400 }, { "epoch": 0.04824317281819116, "grad_norm": 3.4375, "learning_rate": 4.974643856852569e-05, "loss": 0.9053, "step": 3402 }, { "epoch": 0.04827153447181738, "grad_norm": 4.15625, "learning_rate": 4.974612106288305e-05, "loss": 0.9251, "step": 3404 }, { "epoch": 0.048299896125443595, "grad_norm": 3.75, "learning_rate": 4.974580335959148e-05, "loss": 0.8812, "step": 3406 }, { "epoch": 0.04832825777906981, "grad_norm": 2.9375, "learning_rate": 4.974548545865354e-05, "loss": 0.8967, "step": 3408 }, { "epoch": 0.04835661943269602, "grad_norm": 3.15625, "learning_rate": 4.974516736007176e-05, "loss": 0.8755, "step": 3410 }, { "epoch": 0.048384981086322235, "grad_norm": 3.15625, "learning_rate": 4.974484906384867e-05, "loss": 0.908, "step": 3412 }, { "epoch": 0.048413342739948455, "grad_norm": 3.21875, "learning_rate": 4.974453056998683e-05, "loss": 0.9206, "step": 3414 }, { "epoch": 0.04844170439357467, "grad_norm": 3.765625, "learning_rate": 4.9744211878488756e-05, "loss": 0.9799, "step": 3416 }, { "epoch": 0.04847006604720088, "grad_norm": 3.390625, "learning_rate": 4.974389298935702e-05, "loss": 0.9143, "step": 3418 }, { "epoch": 0.048498427700827094, "grad_norm": 2.859375, "learning_rate": 4.9743573902594164e-05, "loss": 0.886, "step": 3420 }, { "epoch": 0.048526789354453315, "grad_norm": 3.515625, "learning_rate": 4.974325461820273e-05, "loss": 0.9103, "step": 3422 }, { "epoch": 0.04855515100807953, "grad_norm": 3.34375, "learning_rate": 4.9742935136185276e-05, "loss": 0.9356, "step": 3424 }, { "epoch": 0.04858351266170574, "grad_norm": 3.140625, "learning_rate": 4.9742615456544336e-05, "loss": 0.9706, "step": 3426 }, { "epoch": 0.048611874315331954, "grad_norm": 3.171875, "learning_rate": 4.974229557928249e-05, "loss": 0.9055, "step": 3428 }, { "epoch": 0.04864023596895817, "grad_norm": 3.1875, "learning_rate": 4.974197550440227e-05, "loss": 0.9383, "step": 3430 }, { "epoch": 0.04866859762258439, "grad_norm": 3.1875, "learning_rate": 4.9741655231906246e-05, "loss": 0.9055, "step": 3432 }, { "epoch": 0.0486969592762106, "grad_norm": 3.234375, "learning_rate": 4.9741334761796974e-05, "loss": 0.8819, "step": 3434 }, { "epoch": 0.048725320929836814, "grad_norm": 3.125, "learning_rate": 4.9741014094077004e-05, "loss": 0.9568, "step": 3436 }, { "epoch": 0.04875368258346303, "grad_norm": 3.375, "learning_rate": 4.974069322874891e-05, "loss": 0.9157, "step": 3438 }, { "epoch": 0.04878204423708924, "grad_norm": 3.078125, "learning_rate": 4.974037216581524e-05, "loss": 0.9767, "step": 3440 }, { "epoch": 0.04881040589071546, "grad_norm": 3.265625, "learning_rate": 4.9740050905278577e-05, "loss": 0.9535, "step": 3442 }, { "epoch": 0.048838767544341674, "grad_norm": 2.96875, "learning_rate": 4.9739729447141476e-05, "loss": 0.8733, "step": 3444 }, { "epoch": 0.04886712919796789, "grad_norm": 3.109375, "learning_rate": 4.97394077914065e-05, "loss": 0.935, "step": 3446 }, { "epoch": 0.0488954908515941, "grad_norm": 3.171875, "learning_rate": 4.973908593807623e-05, "loss": 0.9171, "step": 3448 }, { "epoch": 0.048923852505220314, "grad_norm": 3.421875, "learning_rate": 4.973876388715323e-05, "loss": 0.9399, "step": 3450 }, { "epoch": 0.048952214158846534, "grad_norm": 3.171875, "learning_rate": 4.9738441638640064e-05, "loss": 0.9318, "step": 3452 }, { "epoch": 0.04898057581247275, "grad_norm": 3.421875, "learning_rate": 4.9738119192539326e-05, "loss": 0.8993, "step": 3454 }, { "epoch": 0.04900893746609896, "grad_norm": 2.859375, "learning_rate": 4.973779654885358e-05, "loss": 0.9413, "step": 3456 }, { "epoch": 0.049037299119725174, "grad_norm": 3.21875, "learning_rate": 4.9737473707585394e-05, "loss": 0.9562, "step": 3458 }, { "epoch": 0.04906566077335139, "grad_norm": 3.328125, "learning_rate": 4.9737150668737356e-05, "loss": 0.8997, "step": 3460 }, { "epoch": 0.04909402242697761, "grad_norm": 3.390625, "learning_rate": 4.973682743231205e-05, "loss": 0.9577, "step": 3462 }, { "epoch": 0.04912238408060382, "grad_norm": 3.203125, "learning_rate": 4.973650399831206e-05, "loss": 0.9161, "step": 3464 }, { "epoch": 0.049150745734230034, "grad_norm": 3.0, "learning_rate": 4.973618036673995e-05, "loss": 0.9951, "step": 3466 }, { "epoch": 0.04917910738785625, "grad_norm": 2.90625, "learning_rate": 4.973585653759832e-05, "loss": 0.9502, "step": 3468 }, { "epoch": 0.04920746904148247, "grad_norm": 3.03125, "learning_rate": 4.973553251088976e-05, "loss": 0.8939, "step": 3470 }, { "epoch": 0.04923583069510868, "grad_norm": 3.21875, "learning_rate": 4.973520828661684e-05, "loss": 0.9495, "step": 3472 }, { "epoch": 0.04926419234873489, "grad_norm": 2.953125, "learning_rate": 4.973488386478218e-05, "loss": 0.9154, "step": 3474 }, { "epoch": 0.04929255400236111, "grad_norm": 3.171875, "learning_rate": 4.9734559245388336e-05, "loss": 0.8997, "step": 3476 }, { "epoch": 0.04932091565598732, "grad_norm": 2.921875, "learning_rate": 4.973423442843793e-05, "loss": 0.8961, "step": 3478 }, { "epoch": 0.04934927730961354, "grad_norm": 2.828125, "learning_rate": 4.9733909413933535e-05, "loss": 0.9454, "step": 3480 }, { "epoch": 0.04937763896323975, "grad_norm": 3.453125, "learning_rate": 4.973358420187776e-05, "loss": 0.9108, "step": 3482 }, { "epoch": 0.04940600061686597, "grad_norm": 3.1875, "learning_rate": 4.9733258792273194e-05, "loss": 0.9783, "step": 3484 }, { "epoch": 0.04943436227049218, "grad_norm": 2.90625, "learning_rate": 4.973293318512244e-05, "loss": 0.9166, "step": 3486 }, { "epoch": 0.04946272392411839, "grad_norm": 3.125, "learning_rate": 4.973260738042811e-05, "loss": 0.9123, "step": 3488 }, { "epoch": 0.04949108557774461, "grad_norm": 3.25, "learning_rate": 4.973228137819278e-05, "loss": 0.9085, "step": 3490 }, { "epoch": 0.049519447231370826, "grad_norm": 2.890625, "learning_rate": 4.9731955178419075e-05, "loss": 0.9062, "step": 3492 }, { "epoch": 0.04954780888499704, "grad_norm": 3.28125, "learning_rate": 4.97316287811096e-05, "loss": 0.9447, "step": 3494 }, { "epoch": 0.04957617053862325, "grad_norm": 3.125, "learning_rate": 4.973130218626695e-05, "loss": 0.9568, "step": 3496 }, { "epoch": 0.049604532192249466, "grad_norm": 3.5, "learning_rate": 4.973097539389374e-05, "loss": 0.933, "step": 3498 }, { "epoch": 0.049632893845875686, "grad_norm": 3.0, "learning_rate": 4.973064840399259e-05, "loss": 0.9869, "step": 3500 }, { "epoch": 0.0496612554995019, "grad_norm": 2.875, "learning_rate": 4.973032121656609e-05, "loss": 0.9109, "step": 3502 }, { "epoch": 0.04968961715312811, "grad_norm": 3.296875, "learning_rate": 4.9729993831616874e-05, "loss": 0.9435, "step": 3504 }, { "epoch": 0.049717978806754326, "grad_norm": 3.25, "learning_rate": 4.972966624914753e-05, "loss": 0.9058, "step": 3506 }, { "epoch": 0.04974634046038054, "grad_norm": 3.296875, "learning_rate": 4.972933846916072e-05, "loss": 0.9087, "step": 3508 }, { "epoch": 0.04977470211400676, "grad_norm": 3.28125, "learning_rate": 4.972901049165901e-05, "loss": 0.9131, "step": 3510 }, { "epoch": 0.04980306376763297, "grad_norm": 3.09375, "learning_rate": 4.972868231664505e-05, "loss": 0.9046, "step": 3512 }, { "epoch": 0.049831425421259186, "grad_norm": 3.15625, "learning_rate": 4.972835394412146e-05, "loss": 0.9063, "step": 3514 }, { "epoch": 0.0498597870748854, "grad_norm": 2.984375, "learning_rate": 4.972802537409085e-05, "loss": 0.939, "step": 3516 }, { "epoch": 0.04988814872851162, "grad_norm": 3.484375, "learning_rate": 4.972769660655586e-05, "loss": 0.9446, "step": 3518 }, { "epoch": 0.04991651038213783, "grad_norm": 3.0, "learning_rate": 4.972736764151911e-05, "loss": 0.9604, "step": 3520 }, { "epoch": 0.049944872035764046, "grad_norm": 2.921875, "learning_rate": 4.972703847898321e-05, "loss": 0.913, "step": 3522 }, { "epoch": 0.04997323368939026, "grad_norm": 3.078125, "learning_rate": 4.972670911895082e-05, "loss": 0.9591, "step": 3524 }, { "epoch": 0.05000159534301647, "grad_norm": 3.171875, "learning_rate": 4.972637956142455e-05, "loss": 0.9043, "step": 3526 }, { "epoch": 0.05002995699664269, "grad_norm": 3.234375, "learning_rate": 4.972604980640704e-05, "loss": 0.8965, "step": 3528 }, { "epoch": 0.050058318650268906, "grad_norm": 3.109375, "learning_rate": 4.972571985390091e-05, "loss": 0.9392, "step": 3530 }, { "epoch": 0.05008668030389512, "grad_norm": 2.875, "learning_rate": 4.972538970390882e-05, "loss": 0.912, "step": 3532 }, { "epoch": 0.05011504195752133, "grad_norm": 3.390625, "learning_rate": 4.9725059356433386e-05, "loss": 0.9663, "step": 3534 }, { "epoch": 0.050143403611147545, "grad_norm": 2.78125, "learning_rate": 4.972472881147725e-05, "loss": 0.8937, "step": 3536 }, { "epoch": 0.050171765264773766, "grad_norm": 3.421875, "learning_rate": 4.972439806904307e-05, "loss": 0.9355, "step": 3538 }, { "epoch": 0.05020012691839998, "grad_norm": 3.671875, "learning_rate": 4.9724067129133456e-05, "loss": 0.8814, "step": 3540 }, { "epoch": 0.05022848857202619, "grad_norm": 3.234375, "learning_rate": 4.9723735991751076e-05, "loss": 0.9808, "step": 3542 }, { "epoch": 0.050256850225652405, "grad_norm": 3.34375, "learning_rate": 4.972340465689857e-05, "loss": 0.9412, "step": 3544 }, { "epoch": 0.05028521187927862, "grad_norm": 3.109375, "learning_rate": 4.972307312457858e-05, "loss": 0.9444, "step": 3546 }, { "epoch": 0.05031357353290484, "grad_norm": 3.296875, "learning_rate": 4.972274139479376e-05, "loss": 0.9428, "step": 3548 }, { "epoch": 0.05034193518653105, "grad_norm": 3.0625, "learning_rate": 4.972240946754675e-05, "loss": 0.9166, "step": 3550 }, { "epoch": 0.050370296840157265, "grad_norm": 3.203125, "learning_rate": 4.9722077342840204e-05, "loss": 0.9303, "step": 3552 }, { "epoch": 0.05039865849378348, "grad_norm": 3.234375, "learning_rate": 4.9721745020676784e-05, "loss": 0.9084, "step": 3554 }, { "epoch": 0.05042702014740969, "grad_norm": 2.828125, "learning_rate": 4.972141250105914e-05, "loss": 0.9475, "step": 3556 }, { "epoch": 0.05045538180103591, "grad_norm": 3.140625, "learning_rate": 4.9721079783989915e-05, "loss": 0.957, "step": 3558 }, { "epoch": 0.050483743454662125, "grad_norm": 2.90625, "learning_rate": 4.9720746869471784e-05, "loss": 0.9195, "step": 3560 }, { "epoch": 0.05051210510828834, "grad_norm": 3.03125, "learning_rate": 4.97204137575074e-05, "loss": 0.8872, "step": 3562 }, { "epoch": 0.05054046676191455, "grad_norm": 3.453125, "learning_rate": 4.972008044809941e-05, "loss": 0.8812, "step": 3564 }, { "epoch": 0.050568828415540765, "grad_norm": 3.53125, "learning_rate": 4.9719746941250506e-05, "loss": 0.9426, "step": 3566 }, { "epoch": 0.050597190069166985, "grad_norm": 3.359375, "learning_rate": 4.971941323696333e-05, "loss": 0.95, "step": 3568 }, { "epoch": 0.0506255517227932, "grad_norm": 3.453125, "learning_rate": 4.971907933524055e-05, "loss": 0.9044, "step": 3570 }, { "epoch": 0.05065391337641941, "grad_norm": 3.140625, "learning_rate": 4.971874523608483e-05, "loss": 0.9305, "step": 3572 }, { "epoch": 0.050682275030045625, "grad_norm": 3.125, "learning_rate": 4.971841093949885e-05, "loss": 0.8895, "step": 3574 }, { "epoch": 0.050710636683671845, "grad_norm": 3.015625, "learning_rate": 4.9718076445485264e-05, "loss": 0.9302, "step": 3576 }, { "epoch": 0.05073899833729806, "grad_norm": 3.03125, "learning_rate": 4.971774175404675e-05, "loss": 0.9504, "step": 3578 }, { "epoch": 0.05076735999092427, "grad_norm": 3.171875, "learning_rate": 4.9717406865186e-05, "loss": 0.9453, "step": 3580 }, { "epoch": 0.050795721644550484, "grad_norm": 3.0, "learning_rate": 4.9717071778905667e-05, "loss": 0.8936, "step": 3582 }, { "epoch": 0.0508240832981767, "grad_norm": 3.609375, "learning_rate": 4.9716736495208424e-05, "loss": 0.9045, "step": 3584 }, { "epoch": 0.05085244495180292, "grad_norm": 2.953125, "learning_rate": 4.9716401014096966e-05, "loss": 0.9282, "step": 3586 }, { "epoch": 0.05088080660542913, "grad_norm": 3.515625, "learning_rate": 4.971606533557396e-05, "loss": 0.9448, "step": 3588 }, { "epoch": 0.050909168259055344, "grad_norm": 2.8125, "learning_rate": 4.97157294596421e-05, "loss": 0.8974, "step": 3590 }, { "epoch": 0.05093752991268156, "grad_norm": 3.109375, "learning_rate": 4.9715393386304056e-05, "loss": 0.9332, "step": 3592 }, { "epoch": 0.05096589156630777, "grad_norm": 3.109375, "learning_rate": 4.971505711556251e-05, "loss": 0.8801, "step": 3594 }, { "epoch": 0.05099425321993399, "grad_norm": 3.25, "learning_rate": 4.971472064742016e-05, "loss": 0.9357, "step": 3596 }, { "epoch": 0.051022614873560204, "grad_norm": 3.078125, "learning_rate": 4.9714383981879684e-05, "loss": 0.9387, "step": 3598 }, { "epoch": 0.05105097652718642, "grad_norm": 2.859375, "learning_rate": 4.971404711894378e-05, "loss": 0.9277, "step": 3600 }, { "epoch": 0.05107933818081263, "grad_norm": 3.4375, "learning_rate": 4.971371005861514e-05, "loss": 0.901, "step": 3602 }, { "epoch": 0.051107699834438844, "grad_norm": 2.96875, "learning_rate": 4.971337280089643e-05, "loss": 0.9528, "step": 3604 }, { "epoch": 0.051136061488065064, "grad_norm": 3.171875, "learning_rate": 4.971303534579038e-05, "loss": 0.9334, "step": 3606 }, { "epoch": 0.05116442314169128, "grad_norm": 2.859375, "learning_rate": 4.971269769329966e-05, "loss": 0.8398, "step": 3608 }, { "epoch": 0.05119278479531749, "grad_norm": 3.046875, "learning_rate": 4.9712359843426975e-05, "loss": 0.9769, "step": 3610 }, { "epoch": 0.051221146448943704, "grad_norm": 2.953125, "learning_rate": 4.971202179617502e-05, "loss": 0.9377, "step": 3612 }, { "epoch": 0.05124950810256992, "grad_norm": 3.359375, "learning_rate": 4.9711683551546514e-05, "loss": 0.985, "step": 3614 }, { "epoch": 0.05127786975619614, "grad_norm": 3.046875, "learning_rate": 4.9711345109544124e-05, "loss": 0.9326, "step": 3616 }, { "epoch": 0.05130623140982235, "grad_norm": 3.3125, "learning_rate": 4.971100647017058e-05, "loss": 0.8943, "step": 3618 }, { "epoch": 0.051334593063448564, "grad_norm": 3.40625, "learning_rate": 4.9710667633428587e-05, "loss": 0.9193, "step": 3620 }, { "epoch": 0.05136295471707478, "grad_norm": 2.90625, "learning_rate": 4.9710328599320846e-05, "loss": 0.9574, "step": 3622 }, { "epoch": 0.051391316370701, "grad_norm": 3.3125, "learning_rate": 4.9709989367850044e-05, "loss": 0.9447, "step": 3624 }, { "epoch": 0.05141967802432721, "grad_norm": 3.25, "learning_rate": 4.970964993901892e-05, "loss": 0.8921, "step": 3626 }, { "epoch": 0.051448039677953424, "grad_norm": 3.125, "learning_rate": 4.970931031283017e-05, "loss": 0.9269, "step": 3628 }, { "epoch": 0.05147640133157964, "grad_norm": 3.453125, "learning_rate": 4.9708970489286514e-05, "loss": 0.9078, "step": 3630 }, { "epoch": 0.05150476298520585, "grad_norm": 3.15625, "learning_rate": 4.970863046839066e-05, "loss": 0.8863, "step": 3632 }, { "epoch": 0.05153312463883207, "grad_norm": 3.140625, "learning_rate": 4.970829025014533e-05, "loss": 0.9152, "step": 3634 }, { "epoch": 0.05156148629245828, "grad_norm": 3.8125, "learning_rate": 4.970794983455324e-05, "loss": 0.9653, "step": 3636 }, { "epoch": 0.0515898479460845, "grad_norm": 3.375, "learning_rate": 4.97076092216171e-05, "loss": 0.8976, "step": 3638 }, { "epoch": 0.05161820959971071, "grad_norm": 2.796875, "learning_rate": 4.970726841133963e-05, "loss": 0.8729, "step": 3640 }, { "epoch": 0.05164657125333692, "grad_norm": 3.1875, "learning_rate": 4.9706927403723574e-05, "loss": 0.98, "step": 3642 }, { "epoch": 0.05167493290696314, "grad_norm": 3.140625, "learning_rate": 4.9706586198771634e-05, "loss": 0.9131, "step": 3644 }, { "epoch": 0.051703294560589357, "grad_norm": 2.859375, "learning_rate": 4.970624479648654e-05, "loss": 0.8685, "step": 3646 }, { "epoch": 0.05173165621421557, "grad_norm": 2.984375, "learning_rate": 4.9705903196871026e-05, "loss": 0.8985, "step": 3648 }, { "epoch": 0.05176001786784178, "grad_norm": 2.828125, "learning_rate": 4.970556139992781e-05, "loss": 0.9154, "step": 3650 }, { "epoch": 0.051788379521467996, "grad_norm": 3.125, "learning_rate": 4.9705219405659635e-05, "loss": 0.9176, "step": 3652 }, { "epoch": 0.051816741175094216, "grad_norm": 3.4375, "learning_rate": 4.9704877214069223e-05, "loss": 0.9383, "step": 3654 }, { "epoch": 0.05184510282872043, "grad_norm": 3.078125, "learning_rate": 4.97045348251593e-05, "loss": 0.922, "step": 3656 }, { "epoch": 0.05187346448234664, "grad_norm": 3.078125, "learning_rate": 4.970419223893262e-05, "loss": 0.8998, "step": 3658 }, { "epoch": 0.051901826135972856, "grad_norm": 3.125, "learning_rate": 4.9703849455391906e-05, "loss": 0.9507, "step": 3660 }, { "epoch": 0.05193018778959907, "grad_norm": 2.828125, "learning_rate": 4.9703506474539894e-05, "loss": 0.8821, "step": 3662 }, { "epoch": 0.05195854944322529, "grad_norm": 3.640625, "learning_rate": 4.970316329637933e-05, "loss": 0.9471, "step": 3664 }, { "epoch": 0.0519869110968515, "grad_norm": 3.328125, "learning_rate": 4.9702819920912956e-05, "loss": 0.9287, "step": 3666 }, { "epoch": 0.052015272750477716, "grad_norm": 3.1875, "learning_rate": 4.970247634814351e-05, "loss": 0.9355, "step": 3668 }, { "epoch": 0.05204363440410393, "grad_norm": 3.125, "learning_rate": 4.970213257807374e-05, "loss": 0.9212, "step": 3670 }, { "epoch": 0.05207199605773015, "grad_norm": 3.140625, "learning_rate": 4.9701788610706384e-05, "loss": 0.9113, "step": 3672 }, { "epoch": 0.05210035771135636, "grad_norm": 3.625, "learning_rate": 4.97014444460442e-05, "loss": 0.94, "step": 3674 }, { "epoch": 0.052128719364982576, "grad_norm": 2.84375, "learning_rate": 4.970110008408993e-05, "loss": 0.8996, "step": 3676 }, { "epoch": 0.05215708101860879, "grad_norm": 3.3125, "learning_rate": 4.970075552484632e-05, "loss": 0.95, "step": 3678 }, { "epoch": 0.052185442672235, "grad_norm": 3.109375, "learning_rate": 4.970041076831614e-05, "loss": 0.9065, "step": 3680 }, { "epoch": 0.05221380432586122, "grad_norm": 2.9375, "learning_rate": 4.9700065814502125e-05, "loss": 0.8558, "step": 3682 }, { "epoch": 0.052242165979487436, "grad_norm": 2.90625, "learning_rate": 4.9699720663407035e-05, "loss": 0.9405, "step": 3684 }, { "epoch": 0.05227052763311365, "grad_norm": 3.140625, "learning_rate": 4.969937531503363e-05, "loss": 0.8957, "step": 3686 }, { "epoch": 0.05229888928673986, "grad_norm": 2.90625, "learning_rate": 4.9699029769384663e-05, "loss": 0.9366, "step": 3688 }, { "epoch": 0.052327250940366075, "grad_norm": 3.046875, "learning_rate": 4.9698684026462904e-05, "loss": 0.9426, "step": 3690 }, { "epoch": 0.052355612593992296, "grad_norm": 3.09375, "learning_rate": 4.9698338086271114e-05, "loss": 0.9271, "step": 3692 }, { "epoch": 0.05238397424761851, "grad_norm": 3.421875, "learning_rate": 4.969799194881204e-05, "loss": 0.9229, "step": 3694 }, { "epoch": 0.05241233590124472, "grad_norm": 3.078125, "learning_rate": 4.969764561408846e-05, "loss": 0.9287, "step": 3696 }, { "epoch": 0.052440697554870935, "grad_norm": 3.1875, "learning_rate": 4.9697299082103134e-05, "loss": 0.8985, "step": 3698 }, { "epoch": 0.05246905920849715, "grad_norm": 3.15625, "learning_rate": 4.969695235285884e-05, "loss": 0.8843, "step": 3700 }, { "epoch": 0.05249742086212337, "grad_norm": 2.828125, "learning_rate": 4.969660542635833e-05, "loss": 0.9025, "step": 3702 }, { "epoch": 0.05252578251574958, "grad_norm": 2.984375, "learning_rate": 4.969625830260439e-05, "loss": 0.9403, "step": 3704 }, { "epoch": 0.052554144169375795, "grad_norm": 3.234375, "learning_rate": 4.969591098159979e-05, "loss": 0.9139, "step": 3706 }, { "epoch": 0.05258250582300201, "grad_norm": 2.859375, "learning_rate": 4.969556346334731e-05, "loss": 0.8745, "step": 3708 }, { "epoch": 0.05261086747662822, "grad_norm": 3.421875, "learning_rate": 4.96952157478497e-05, "loss": 0.9437, "step": 3710 }, { "epoch": 0.05263922913025444, "grad_norm": 3.234375, "learning_rate": 4.969486783510976e-05, "loss": 0.924, "step": 3712 }, { "epoch": 0.052667590783880655, "grad_norm": 2.828125, "learning_rate": 4.9694519725130265e-05, "loss": 0.8821, "step": 3714 }, { "epoch": 0.05269595243750687, "grad_norm": 3.203125, "learning_rate": 4.9694171417914e-05, "loss": 0.8983, "step": 3716 }, { "epoch": 0.05272431409113308, "grad_norm": 2.984375, "learning_rate": 4.969382291346373e-05, "loss": 0.8759, "step": 3718 }, { "epoch": 0.052752675744759295, "grad_norm": 3.453125, "learning_rate": 4.969347421178226e-05, "loss": 0.9228, "step": 3720 }, { "epoch": 0.052781037398385515, "grad_norm": 3.375, "learning_rate": 4.9693125312872356e-05, "loss": 0.9237, "step": 3722 }, { "epoch": 0.05280939905201173, "grad_norm": 3.59375, "learning_rate": 4.969277621673682e-05, "loss": 0.9282, "step": 3724 }, { "epoch": 0.05283776070563794, "grad_norm": 2.984375, "learning_rate": 4.9692426923378436e-05, "loss": 0.9286, "step": 3726 }, { "epoch": 0.052866122359264155, "grad_norm": 3.109375, "learning_rate": 4.969207743279998e-05, "loss": 0.8906, "step": 3728 }, { "epoch": 0.052894484012890375, "grad_norm": 3.328125, "learning_rate": 4.9691727745004255e-05, "loss": 0.9362, "step": 3730 }, { "epoch": 0.05292284566651659, "grad_norm": 3.0, "learning_rate": 4.9691377859994056e-05, "loss": 0.921, "step": 3732 }, { "epoch": 0.0529512073201428, "grad_norm": 3.046875, "learning_rate": 4.9691027777772184e-05, "loss": 0.901, "step": 3734 }, { "epoch": 0.052979568973769015, "grad_norm": 3.125, "learning_rate": 4.969067749834142e-05, "loss": 0.9084, "step": 3736 }, { "epoch": 0.05300793062739523, "grad_norm": 3.015625, "learning_rate": 4.9690327021704563e-05, "loss": 0.9764, "step": 3738 }, { "epoch": 0.05303629228102145, "grad_norm": 3.328125, "learning_rate": 4.968997634786442e-05, "loss": 0.9496, "step": 3740 }, { "epoch": 0.05306465393464766, "grad_norm": 3.546875, "learning_rate": 4.9689625476823795e-05, "loss": 0.8638, "step": 3742 }, { "epoch": 0.053093015588273874, "grad_norm": 2.8125, "learning_rate": 4.968927440858548e-05, "loss": 0.9089, "step": 3744 }, { "epoch": 0.05312137724190009, "grad_norm": 3.859375, "learning_rate": 4.968892314315228e-05, "loss": 0.9581, "step": 3746 }, { "epoch": 0.0531497388955263, "grad_norm": 3.6875, "learning_rate": 4.968857168052701e-05, "loss": 0.9375, "step": 3748 }, { "epoch": 0.05317810054915252, "grad_norm": 3.015625, "learning_rate": 4.968822002071247e-05, "loss": 0.9311, "step": 3750 }, { "epoch": 0.053206462202778734, "grad_norm": 3.3125, "learning_rate": 4.9687868163711474e-05, "loss": 0.9389, "step": 3752 }, { "epoch": 0.05323482385640495, "grad_norm": 2.921875, "learning_rate": 4.9687516109526814e-05, "loss": 0.9258, "step": 3754 }, { "epoch": 0.05326318551003116, "grad_norm": 2.671875, "learning_rate": 4.968716385816133e-05, "loss": 0.9011, "step": 3756 }, { "epoch": 0.053291547163657374, "grad_norm": 3.171875, "learning_rate": 4.968681140961782e-05, "loss": 0.908, "step": 3758 }, { "epoch": 0.053319908817283594, "grad_norm": 3.21875, "learning_rate": 4.9686458763899094e-05, "loss": 0.9229, "step": 3760 }, { "epoch": 0.05334827047090981, "grad_norm": 3.40625, "learning_rate": 4.968610592100797e-05, "loss": 0.913, "step": 3762 }, { "epoch": 0.05337663212453602, "grad_norm": 3.171875, "learning_rate": 4.968575288094729e-05, "loss": 0.9435, "step": 3764 }, { "epoch": 0.053404993778162234, "grad_norm": 3.09375, "learning_rate": 4.968539964371984e-05, "loss": 0.9707, "step": 3766 }, { "epoch": 0.05343335543178845, "grad_norm": 2.875, "learning_rate": 4.968504620932847e-05, "loss": 0.9046, "step": 3768 }, { "epoch": 0.05346171708541467, "grad_norm": 2.84375, "learning_rate": 4.968469257777598e-05, "loss": 0.9455, "step": 3770 }, { "epoch": 0.05349007873904088, "grad_norm": 2.765625, "learning_rate": 4.96843387490652e-05, "loss": 0.9176, "step": 3772 }, { "epoch": 0.053518440392667094, "grad_norm": 2.9375, "learning_rate": 4.968398472319897e-05, "loss": 0.8523, "step": 3774 }, { "epoch": 0.05354680204629331, "grad_norm": 2.890625, "learning_rate": 4.96836305001801e-05, "loss": 0.8885, "step": 3776 }, { "epoch": 0.05357516369991953, "grad_norm": 3.171875, "learning_rate": 4.968327608001143e-05, "loss": 0.9748, "step": 3778 }, { "epoch": 0.05360352535354574, "grad_norm": 3.3125, "learning_rate": 4.968292146269579e-05, "loss": 0.9641, "step": 3780 }, { "epoch": 0.053631887007171954, "grad_norm": 3.375, "learning_rate": 4.9682566648236007e-05, "loss": 0.9717, "step": 3782 }, { "epoch": 0.05366024866079817, "grad_norm": 3.15625, "learning_rate": 4.968221163663491e-05, "loss": 0.9269, "step": 3784 }, { "epoch": 0.05368861031442438, "grad_norm": 3.578125, "learning_rate": 4.968185642789536e-05, "loss": 0.8923, "step": 3786 }, { "epoch": 0.0537169719680506, "grad_norm": 2.875, "learning_rate": 4.968150102202016e-05, "loss": 0.9235, "step": 3788 }, { "epoch": 0.053745333621676813, "grad_norm": 2.96875, "learning_rate": 4.9681145419012173e-05, "loss": 0.9232, "step": 3790 }, { "epoch": 0.05377369527530303, "grad_norm": 3.03125, "learning_rate": 4.968078961887423e-05, "loss": 0.9261, "step": 3792 }, { "epoch": 0.05380205692892924, "grad_norm": 3.40625, "learning_rate": 4.968043362160917e-05, "loss": 0.9291, "step": 3794 }, { "epoch": 0.05383041858255545, "grad_norm": 3.171875, "learning_rate": 4.9680077427219853e-05, "loss": 0.916, "step": 3796 }, { "epoch": 0.05385878023618167, "grad_norm": 3.390625, "learning_rate": 4.9679721035709095e-05, "loss": 0.9227, "step": 3798 }, { "epoch": 0.05388714188980789, "grad_norm": 3.65625, "learning_rate": 4.967936444707977e-05, "loss": 0.9122, "step": 3800 }, { "epoch": 0.0539155035434341, "grad_norm": 3.21875, "learning_rate": 4.967900766133471e-05, "loss": 0.9252, "step": 3802 }, { "epoch": 0.05394386519706031, "grad_norm": 2.921875, "learning_rate": 4.967865067847677e-05, "loss": 0.9387, "step": 3804 }, { "epoch": 0.053972226850686526, "grad_norm": 3.046875, "learning_rate": 4.967829349850881e-05, "loss": 0.9127, "step": 3806 }, { "epoch": 0.054000588504312746, "grad_norm": 3.453125, "learning_rate": 4.9677936121433665e-05, "loss": 0.8928, "step": 3808 }, { "epoch": 0.05402895015793896, "grad_norm": 2.890625, "learning_rate": 4.96775785472542e-05, "loss": 0.9075, "step": 3810 }, { "epoch": 0.05405731181156517, "grad_norm": 2.703125, "learning_rate": 4.967722077597327e-05, "loss": 0.8678, "step": 3812 }, { "epoch": 0.054085673465191386, "grad_norm": 2.78125, "learning_rate": 4.9676862807593726e-05, "loss": 0.9472, "step": 3814 }, { "epoch": 0.0541140351188176, "grad_norm": 2.921875, "learning_rate": 4.967650464211844e-05, "loss": 0.9258, "step": 3816 }, { "epoch": 0.05414239677244382, "grad_norm": 3.046875, "learning_rate": 4.9676146279550264e-05, "loss": 0.9811, "step": 3818 }, { "epoch": 0.05417075842607003, "grad_norm": 2.828125, "learning_rate": 4.9675787719892056e-05, "loss": 0.8911, "step": 3820 }, { "epoch": 0.054199120079696246, "grad_norm": 3.09375, "learning_rate": 4.967542896314669e-05, "loss": 0.876, "step": 3822 }, { "epoch": 0.05422748173332246, "grad_norm": 2.953125, "learning_rate": 4.967507000931702e-05, "loss": 0.8905, "step": 3824 }, { "epoch": 0.05425584338694868, "grad_norm": 3.296875, "learning_rate": 4.967471085840594e-05, "loss": 0.9459, "step": 3826 }, { "epoch": 0.05428420504057489, "grad_norm": 2.875, "learning_rate": 4.967435151041629e-05, "loss": 0.9338, "step": 3828 }, { "epoch": 0.054312566694201106, "grad_norm": 3.078125, "learning_rate": 4.9673991965350935e-05, "loss": 0.8941, "step": 3830 }, { "epoch": 0.05434092834782732, "grad_norm": 2.765625, "learning_rate": 4.967363222321277e-05, "loss": 0.8853, "step": 3832 }, { "epoch": 0.05436929000145353, "grad_norm": 3.03125, "learning_rate": 4.967327228400466e-05, "loss": 0.9733, "step": 3834 }, { "epoch": 0.05439765165507975, "grad_norm": 2.859375, "learning_rate": 4.967291214772948e-05, "loss": 0.9177, "step": 3836 }, { "epoch": 0.054426013308705966, "grad_norm": 2.96875, "learning_rate": 4.96725518143901e-05, "loss": 0.9368, "step": 3838 }, { "epoch": 0.05445437496233218, "grad_norm": 2.9375, "learning_rate": 4.96721912839894e-05, "loss": 0.9043, "step": 3840 }, { "epoch": 0.05448273661595839, "grad_norm": 2.671875, "learning_rate": 4.967183055653027e-05, "loss": 0.9427, "step": 3842 }, { "epoch": 0.054511098269584606, "grad_norm": 3.40625, "learning_rate": 4.9671469632015574e-05, "loss": 0.9332, "step": 3844 }, { "epoch": 0.054539459923210826, "grad_norm": 3.234375, "learning_rate": 4.967110851044822e-05, "loss": 0.9451, "step": 3846 }, { "epoch": 0.05456782157683704, "grad_norm": 3.609375, "learning_rate": 4.967074719183107e-05, "loss": 0.8989, "step": 3848 }, { "epoch": 0.05459618323046325, "grad_norm": 3.5, "learning_rate": 4.9670385676167005e-05, "loss": 0.9446, "step": 3850 }, { "epoch": 0.054624544884089465, "grad_norm": 2.796875, "learning_rate": 4.967002396345894e-05, "loss": 0.831, "step": 3852 }, { "epoch": 0.05465290653771568, "grad_norm": 2.984375, "learning_rate": 4.966966205370974e-05, "loss": 0.9183, "step": 3854 }, { "epoch": 0.0546812681913419, "grad_norm": 3.125, "learning_rate": 4.966929994692231e-05, "loss": 0.9089, "step": 3856 }, { "epoch": 0.05470962984496811, "grad_norm": 4.03125, "learning_rate": 4.966893764309953e-05, "loss": 0.9306, "step": 3858 }, { "epoch": 0.054737991498594325, "grad_norm": 3.203125, "learning_rate": 4.96685751422443e-05, "loss": 0.9314, "step": 3860 }, { "epoch": 0.05476635315222054, "grad_norm": 3.203125, "learning_rate": 4.966821244435952e-05, "loss": 0.8726, "step": 3862 }, { "epoch": 0.05479471480584675, "grad_norm": 3.0625, "learning_rate": 4.9667849549448074e-05, "loss": 0.9141, "step": 3864 }, { "epoch": 0.05482307645947297, "grad_norm": 3.75, "learning_rate": 4.966748645751287e-05, "loss": 0.9364, "step": 3866 }, { "epoch": 0.054851438113099185, "grad_norm": 3.28125, "learning_rate": 4.966712316855681e-05, "loss": 0.8801, "step": 3868 }, { "epoch": 0.0548797997667254, "grad_norm": 3.75, "learning_rate": 4.966675968258279e-05, "loss": 0.9289, "step": 3870 }, { "epoch": 0.05490816142035161, "grad_norm": 3.3125, "learning_rate": 4.966639599959372e-05, "loss": 0.9783, "step": 3872 }, { "epoch": 0.054936523073977825, "grad_norm": 3.40625, "learning_rate": 4.966603211959249e-05, "loss": 0.9019, "step": 3874 }, { "epoch": 0.054964884727604045, "grad_norm": 3.296875, "learning_rate": 4.966566804258203e-05, "loss": 0.9049, "step": 3876 }, { "epoch": 0.05499324638123026, "grad_norm": 2.875, "learning_rate": 4.966530376856522e-05, "loss": 0.9076, "step": 3878 }, { "epoch": 0.05502160803485647, "grad_norm": 3.03125, "learning_rate": 4.966493929754499e-05, "loss": 0.9655, "step": 3880 }, { "epoch": 0.055049969688482685, "grad_norm": 2.875, "learning_rate": 4.966457462952424e-05, "loss": 0.8643, "step": 3882 }, { "epoch": 0.055078331342108905, "grad_norm": 2.875, "learning_rate": 4.96642097645059e-05, "loss": 0.8833, "step": 3884 }, { "epoch": 0.05510669299573512, "grad_norm": 2.875, "learning_rate": 4.966384470249286e-05, "loss": 0.8916, "step": 3886 }, { "epoch": 0.05513505464936133, "grad_norm": 3.109375, "learning_rate": 4.9663479443488046e-05, "loss": 0.9021, "step": 3888 }, { "epoch": 0.055163416302987545, "grad_norm": 2.875, "learning_rate": 4.966311398749438e-05, "loss": 0.8903, "step": 3890 }, { "epoch": 0.05519177795661376, "grad_norm": 3.140625, "learning_rate": 4.9662748334514784e-05, "loss": 0.908, "step": 3892 }, { "epoch": 0.05522013961023998, "grad_norm": 3.703125, "learning_rate": 4.966238248455216e-05, "loss": 0.9403, "step": 3894 }, { "epoch": 0.05524850126386619, "grad_norm": 3.125, "learning_rate": 4.966201643760945e-05, "loss": 0.9352, "step": 3896 }, { "epoch": 0.055276862917492404, "grad_norm": 2.84375, "learning_rate": 4.966165019368957e-05, "loss": 0.9373, "step": 3898 }, { "epoch": 0.05530522457111862, "grad_norm": 3.5625, "learning_rate": 4.966128375279545e-05, "loss": 0.9486, "step": 3900 }, { "epoch": 0.05533358622474483, "grad_norm": 3.015625, "learning_rate": 4.966091711493e-05, "loss": 0.8962, "step": 3902 }, { "epoch": 0.05536194787837105, "grad_norm": 3.359375, "learning_rate": 4.9660550280096164e-05, "loss": 0.9285, "step": 3904 }, { "epoch": 0.055390309531997264, "grad_norm": 3.5625, "learning_rate": 4.966018324829687e-05, "loss": 0.8523, "step": 3906 }, { "epoch": 0.05541867118562348, "grad_norm": 2.984375, "learning_rate": 4.9659816019535045e-05, "loss": 0.9025, "step": 3908 }, { "epoch": 0.05544703283924969, "grad_norm": 3.15625, "learning_rate": 4.9659448593813626e-05, "loss": 0.9078, "step": 3910 }, { "epoch": 0.055475394492875904, "grad_norm": 3.34375, "learning_rate": 4.965908097113555e-05, "loss": 0.8764, "step": 3912 }, { "epoch": 0.055503756146502124, "grad_norm": 3.296875, "learning_rate": 4.9658713151503743e-05, "loss": 0.8835, "step": 3914 }, { "epoch": 0.05553211780012834, "grad_norm": 2.953125, "learning_rate": 4.9658345134921156e-05, "loss": 0.9239, "step": 3916 }, { "epoch": 0.05556047945375455, "grad_norm": 3.125, "learning_rate": 4.9657976921390716e-05, "loss": 0.9468, "step": 3918 }, { "epoch": 0.055588841107380764, "grad_norm": 3.546875, "learning_rate": 4.965760851091538e-05, "loss": 0.886, "step": 3920 }, { "epoch": 0.05561720276100698, "grad_norm": 3.09375, "learning_rate": 4.9657239903498064e-05, "loss": 0.9122, "step": 3922 }, { "epoch": 0.0556455644146332, "grad_norm": 3.5625, "learning_rate": 4.965687109914173e-05, "loss": 0.9522, "step": 3924 }, { "epoch": 0.05567392606825941, "grad_norm": 3.046875, "learning_rate": 4.9656502097849334e-05, "loss": 0.9198, "step": 3926 }, { "epoch": 0.055702287721885624, "grad_norm": 3.515625, "learning_rate": 4.965613289962381e-05, "loss": 0.9477, "step": 3928 }, { "epoch": 0.05573064937551184, "grad_norm": 2.890625, "learning_rate": 4.96557635044681e-05, "loss": 0.8735, "step": 3930 }, { "epoch": 0.05575901102913806, "grad_norm": 3.125, "learning_rate": 4.9655393912385164e-05, "loss": 0.9669, "step": 3932 }, { "epoch": 0.05578737268276427, "grad_norm": 3.140625, "learning_rate": 4.9655024123377956e-05, "loss": 0.9301, "step": 3934 }, { "epoch": 0.055815734336390484, "grad_norm": 3.03125, "learning_rate": 4.965465413744942e-05, "loss": 0.8876, "step": 3936 }, { "epoch": 0.0558440959900167, "grad_norm": 3.125, "learning_rate": 4.965428395460252e-05, "loss": 0.931, "step": 3938 }, { "epoch": 0.05587245764364291, "grad_norm": 3.140625, "learning_rate": 4.965391357484021e-05, "loss": 0.9693, "step": 3940 }, { "epoch": 0.05590081929726913, "grad_norm": 2.921875, "learning_rate": 4.965354299816545e-05, "loss": 0.9041, "step": 3942 }, { "epoch": 0.055929180950895344, "grad_norm": 3.296875, "learning_rate": 4.9653172224581195e-05, "loss": 0.9071, "step": 3944 }, { "epoch": 0.05595754260452156, "grad_norm": 3.46875, "learning_rate": 4.9652801254090405e-05, "loss": 0.9234, "step": 3946 }, { "epoch": 0.05598590425814777, "grad_norm": 2.984375, "learning_rate": 4.965243008669605e-05, "loss": 0.9222, "step": 3948 }, { "epoch": 0.05601426591177398, "grad_norm": 3.328125, "learning_rate": 4.96520587224011e-05, "loss": 0.9363, "step": 3950 }, { "epoch": 0.0560426275654002, "grad_norm": 3.484375, "learning_rate": 4.9651687161208505e-05, "loss": 0.9262, "step": 3952 }, { "epoch": 0.05607098921902642, "grad_norm": 3.421875, "learning_rate": 4.9651315403121246e-05, "loss": 0.9375, "step": 3954 }, { "epoch": 0.05609935087265263, "grad_norm": 2.796875, "learning_rate": 4.9650943448142276e-05, "loss": 0.882, "step": 3956 }, { "epoch": 0.05612771252627884, "grad_norm": 2.6875, "learning_rate": 4.9650571296274584e-05, "loss": 0.9223, "step": 3958 }, { "epoch": 0.056156074179905056, "grad_norm": 3.25, "learning_rate": 4.965019894752113e-05, "loss": 0.9141, "step": 3960 }, { "epoch": 0.05618443583353128, "grad_norm": 3.328125, "learning_rate": 4.9649826401884904e-05, "loss": 0.9411, "step": 3962 }, { "epoch": 0.05621279748715749, "grad_norm": 3.34375, "learning_rate": 4.964945365936886e-05, "loss": 0.9714, "step": 3964 }, { "epoch": 0.0562411591407837, "grad_norm": 3.046875, "learning_rate": 4.964908071997598e-05, "loss": 0.9085, "step": 3966 }, { "epoch": 0.056269520794409916, "grad_norm": 3.46875, "learning_rate": 4.9648707583709266e-05, "loss": 0.9553, "step": 3968 }, { "epoch": 0.05629788244803613, "grad_norm": 2.734375, "learning_rate": 4.964833425057167e-05, "loss": 0.9194, "step": 3970 }, { "epoch": 0.05632624410166235, "grad_norm": 2.796875, "learning_rate": 4.964796072056618e-05, "loss": 0.9152, "step": 3972 }, { "epoch": 0.05635460575528856, "grad_norm": 3.28125, "learning_rate": 4.964758699369579e-05, "loss": 0.9586, "step": 3974 }, { "epoch": 0.056382967408914776, "grad_norm": 2.78125, "learning_rate": 4.9647213069963475e-05, "loss": 0.9352, "step": 3976 }, { "epoch": 0.05641132906254099, "grad_norm": 2.921875, "learning_rate": 4.964683894937223e-05, "loss": 0.9609, "step": 3978 }, { "epoch": 0.05643969071616721, "grad_norm": 3.375, "learning_rate": 4.9646464631925035e-05, "loss": 0.8815, "step": 3980 }, { "epoch": 0.05646805236979342, "grad_norm": 2.890625, "learning_rate": 4.964609011762488e-05, "loss": 0.888, "step": 3982 }, { "epoch": 0.056496414023419636, "grad_norm": 2.90625, "learning_rate": 4.9645715406474766e-05, "loss": 0.9233, "step": 3984 }, { "epoch": 0.05652477567704585, "grad_norm": 2.90625, "learning_rate": 4.9645340498477676e-05, "loss": 0.9163, "step": 3986 }, { "epoch": 0.05655313733067206, "grad_norm": 2.859375, "learning_rate": 4.964496539363661e-05, "loss": 0.9275, "step": 3988 }, { "epoch": 0.05658149898429828, "grad_norm": 2.515625, "learning_rate": 4.9644590091954556e-05, "loss": 0.8584, "step": 3990 }, { "epoch": 0.056609860637924496, "grad_norm": 2.828125, "learning_rate": 4.964421459343452e-05, "loss": 0.8798, "step": 3992 }, { "epoch": 0.05663822229155071, "grad_norm": 3.15625, "learning_rate": 4.96438388980795e-05, "loss": 0.9352, "step": 3994 }, { "epoch": 0.05666658394517692, "grad_norm": 3.078125, "learning_rate": 4.9643463005892495e-05, "loss": 0.9005, "step": 3996 }, { "epoch": 0.056694945598803136, "grad_norm": 3.46875, "learning_rate": 4.96430869168765e-05, "loss": 0.9313, "step": 3998 }, { "epoch": 0.056723307252429356, "grad_norm": 3.1875, "learning_rate": 4.964271063103454e-05, "loss": 0.9333, "step": 4000 }, { "epoch": 0.05675166890605557, "grad_norm": 3.5, "learning_rate": 4.9642334148369595e-05, "loss": 0.898, "step": 4002 }, { "epoch": 0.05678003055968178, "grad_norm": 3.421875, "learning_rate": 4.964195746888469e-05, "loss": 0.9046, "step": 4004 }, { "epoch": 0.056808392213307995, "grad_norm": 2.953125, "learning_rate": 4.964158059258282e-05, "loss": 0.9282, "step": 4006 }, { "epoch": 0.05683675386693421, "grad_norm": 3.25, "learning_rate": 4.964120351946701e-05, "loss": 0.9305, "step": 4008 }, { "epoch": 0.05686511552056043, "grad_norm": 3.34375, "learning_rate": 4.964082624954026e-05, "loss": 0.9904, "step": 4010 }, { "epoch": 0.05689347717418664, "grad_norm": 2.78125, "learning_rate": 4.964044878280558e-05, "loss": 0.8854, "step": 4012 }, { "epoch": 0.056921838827812855, "grad_norm": 3.140625, "learning_rate": 4.9640071119266007e-05, "loss": 0.9607, "step": 4014 }, { "epoch": 0.05695020048143907, "grad_norm": 3.0625, "learning_rate": 4.9639693258924534e-05, "loss": 0.9621, "step": 4016 }, { "epoch": 0.05697856213506528, "grad_norm": 3.734375, "learning_rate": 4.9639315201784185e-05, "loss": 0.9136, "step": 4018 }, { "epoch": 0.0570069237886915, "grad_norm": 2.921875, "learning_rate": 4.963893694784799e-05, "loss": 0.8716, "step": 4020 }, { "epoch": 0.057035285442317715, "grad_norm": 2.875, "learning_rate": 4.9638558497118956e-05, "loss": 0.8394, "step": 4022 }, { "epoch": 0.05706364709594393, "grad_norm": 3.171875, "learning_rate": 4.9638179849600105e-05, "loss": 0.9747, "step": 4024 }, { "epoch": 0.05709200874957014, "grad_norm": 3.671875, "learning_rate": 4.963780100529448e-05, "loss": 0.9131, "step": 4026 }, { "epoch": 0.057120370403196355, "grad_norm": 3.203125, "learning_rate": 4.963742196420509e-05, "loss": 0.882, "step": 4028 }, { "epoch": 0.057148732056822575, "grad_norm": 3.25, "learning_rate": 4.9637042726334975e-05, "loss": 0.9566, "step": 4030 }, { "epoch": 0.05717709371044879, "grad_norm": 3.296875, "learning_rate": 4.963666329168715e-05, "loss": 0.9162, "step": 4032 }, { "epoch": 0.057205455364075, "grad_norm": 3.21875, "learning_rate": 4.963628366026465e-05, "loss": 0.962, "step": 4034 }, { "epoch": 0.057233817017701215, "grad_norm": 3.21875, "learning_rate": 4.9635903832070515e-05, "loss": 0.9595, "step": 4036 }, { "epoch": 0.057262178671327435, "grad_norm": 3.03125, "learning_rate": 4.9635523807107764e-05, "loss": 0.9642, "step": 4038 }, { "epoch": 0.05729054032495365, "grad_norm": 2.578125, "learning_rate": 4.963514358537945e-05, "loss": 0.8164, "step": 4040 }, { "epoch": 0.05731890197857986, "grad_norm": 3.453125, "learning_rate": 4.96347631668886e-05, "loss": 0.9387, "step": 4042 }, { "epoch": 0.057347263632206075, "grad_norm": 3.125, "learning_rate": 4.963438255163825e-05, "loss": 0.9071, "step": 4044 }, { "epoch": 0.05737562528583229, "grad_norm": 3.265625, "learning_rate": 4.963400173963144e-05, "loss": 0.8781, "step": 4046 }, { "epoch": 0.05740398693945851, "grad_norm": 2.875, "learning_rate": 4.963362073087122e-05, "loss": 0.9219, "step": 4048 }, { "epoch": 0.05743234859308472, "grad_norm": 3.1875, "learning_rate": 4.963323952536063e-05, "loss": 0.9128, "step": 4050 }, { "epoch": 0.057460710246710935, "grad_norm": 3.140625, "learning_rate": 4.963285812310271e-05, "loss": 0.9502, "step": 4052 }, { "epoch": 0.05748907190033715, "grad_norm": 2.96875, "learning_rate": 4.9632476524100515e-05, "loss": 0.9305, "step": 4054 }, { "epoch": 0.05751743355396336, "grad_norm": 3.015625, "learning_rate": 4.9632094728357084e-05, "loss": 0.9069, "step": 4056 }, { "epoch": 0.05754579520758958, "grad_norm": 3.421875, "learning_rate": 4.963171273587546e-05, "loss": 0.9272, "step": 4058 }, { "epoch": 0.057574156861215794, "grad_norm": 3.390625, "learning_rate": 4.963133054665871e-05, "loss": 0.9138, "step": 4060 }, { "epoch": 0.05760251851484201, "grad_norm": 3.203125, "learning_rate": 4.963094816070988e-05, "loss": 0.966, "step": 4062 }, { "epoch": 0.05763088016846822, "grad_norm": 3.140625, "learning_rate": 4.963056557803203e-05, "loss": 0.9608, "step": 4064 }, { "epoch": 0.057659241822094434, "grad_norm": 3.03125, "learning_rate": 4.96301827986282e-05, "loss": 0.9095, "step": 4066 }, { "epoch": 0.057687603475720654, "grad_norm": 3.125, "learning_rate": 4.9629799822501465e-05, "loss": 0.9448, "step": 4068 }, { "epoch": 0.05771596512934687, "grad_norm": 3.046875, "learning_rate": 4.962941664965487e-05, "loss": 0.9514, "step": 4070 }, { "epoch": 0.05774432678297308, "grad_norm": 3.109375, "learning_rate": 4.962903328009149e-05, "loss": 0.9531, "step": 4072 }, { "epoch": 0.057772688436599294, "grad_norm": 3.421875, "learning_rate": 4.9628649713814365e-05, "loss": 0.878, "step": 4074 }, { "epoch": 0.05780105009022551, "grad_norm": 2.96875, "learning_rate": 4.962826595082658e-05, "loss": 0.899, "step": 4076 }, { "epoch": 0.05782941174385173, "grad_norm": 3.59375, "learning_rate": 4.962788199113119e-05, "loss": 0.884, "step": 4078 }, { "epoch": 0.05785777339747794, "grad_norm": 2.828125, "learning_rate": 4.962749783473127e-05, "loss": 0.902, "step": 4080 }, { "epoch": 0.057886135051104154, "grad_norm": 3.203125, "learning_rate": 4.962711348162987e-05, "loss": 0.9249, "step": 4082 }, { "epoch": 0.05791449670473037, "grad_norm": 3.078125, "learning_rate": 4.962672893183009e-05, "loss": 0.9337, "step": 4084 }, { "epoch": 0.05794285835835659, "grad_norm": 3.296875, "learning_rate": 4.962634418533497e-05, "loss": 0.8881, "step": 4086 }, { "epoch": 0.0579712200119828, "grad_norm": 3.328125, "learning_rate": 4.96259592421476e-05, "loss": 0.888, "step": 4088 }, { "epoch": 0.057999581665609014, "grad_norm": 2.953125, "learning_rate": 4.9625574102271046e-05, "loss": 0.8787, "step": 4090 }, { "epoch": 0.05802794331923523, "grad_norm": 2.828125, "learning_rate": 4.96251887657084e-05, "loss": 0.8999, "step": 4092 }, { "epoch": 0.05805630497286144, "grad_norm": 2.984375, "learning_rate": 4.962480323246272e-05, "loss": 0.9003, "step": 4094 }, { "epoch": 0.05808466662648766, "grad_norm": 2.796875, "learning_rate": 4.962441750253709e-05, "loss": 0.8583, "step": 4096 }, { "epoch": 0.058113028280113874, "grad_norm": 3.046875, "learning_rate": 4.962403157593461e-05, "loss": 0.9175, "step": 4098 }, { "epoch": 0.05814138993374009, "grad_norm": 3.25, "learning_rate": 4.962364545265834e-05, "loss": 0.9352, "step": 4100 }, { "epoch": 0.0581697515873663, "grad_norm": 2.84375, "learning_rate": 4.9623259132711365e-05, "loss": 0.8515, "step": 4102 }, { "epoch": 0.05819811324099251, "grad_norm": 2.84375, "learning_rate": 4.9622872616096786e-05, "loss": 0.872, "step": 4104 }, { "epoch": 0.058226474894618734, "grad_norm": 2.921875, "learning_rate": 4.962248590281767e-05, "loss": 0.8794, "step": 4106 }, { "epoch": 0.05825483654824495, "grad_norm": 3.109375, "learning_rate": 4.962209899287713e-05, "loss": 0.8974, "step": 4108 }, { "epoch": 0.05828319820187116, "grad_norm": 3.296875, "learning_rate": 4.962171188627823e-05, "loss": 0.9136, "step": 4110 }, { "epoch": 0.05831155985549737, "grad_norm": 3.015625, "learning_rate": 4.9621324583024085e-05, "loss": 0.9142, "step": 4112 }, { "epoch": 0.058339921509123586, "grad_norm": 3.203125, "learning_rate": 4.962093708311778e-05, "loss": 0.8969, "step": 4114 }, { "epoch": 0.05836828316274981, "grad_norm": 3.1875, "learning_rate": 4.9620549386562396e-05, "loss": 0.9435, "step": 4116 }, { "epoch": 0.05839664481637602, "grad_norm": 3.015625, "learning_rate": 4.962016149336105e-05, "loss": 0.922, "step": 4118 }, { "epoch": 0.05842500647000223, "grad_norm": 3.0625, "learning_rate": 4.9619773403516835e-05, "loss": 0.8736, "step": 4120 }, { "epoch": 0.058453368123628446, "grad_norm": 3.046875, "learning_rate": 4.961938511703284e-05, "loss": 0.9218, "step": 4122 }, { "epoch": 0.05848172977725466, "grad_norm": 3.203125, "learning_rate": 4.9618996633912176e-05, "loss": 0.907, "step": 4124 }, { "epoch": 0.05851009143088088, "grad_norm": 3.140625, "learning_rate": 4.961860795415795e-05, "loss": 0.9015, "step": 4126 }, { "epoch": 0.05853845308450709, "grad_norm": 2.96875, "learning_rate": 4.9618219077773255e-05, "loss": 0.935, "step": 4128 }, { "epoch": 0.058566814738133306, "grad_norm": 2.984375, "learning_rate": 4.96178300047612e-05, "loss": 0.9105, "step": 4130 }, { "epoch": 0.05859517639175952, "grad_norm": 3.140625, "learning_rate": 4.96174407351249e-05, "loss": 0.9218, "step": 4132 }, { "epoch": 0.05862353804538574, "grad_norm": 3.25, "learning_rate": 4.961705126886746e-05, "loss": 0.9189, "step": 4134 }, { "epoch": 0.05865189969901195, "grad_norm": 3.078125, "learning_rate": 4.961666160599198e-05, "loss": 0.8609, "step": 4136 }, { "epoch": 0.058680261352638166, "grad_norm": 3.09375, "learning_rate": 4.9616271746501584e-05, "loss": 0.9286, "step": 4138 }, { "epoch": 0.05870862300626438, "grad_norm": 2.921875, "learning_rate": 4.961588169039939e-05, "loss": 0.9292, "step": 4140 }, { "epoch": 0.05873698465989059, "grad_norm": 3.015625, "learning_rate": 4.96154914376885e-05, "loss": 0.9492, "step": 4142 }, { "epoch": 0.05876534631351681, "grad_norm": 3.03125, "learning_rate": 4.961510098837205e-05, "loss": 0.9309, "step": 4144 }, { "epoch": 0.058793707967143026, "grad_norm": 3.140625, "learning_rate": 4.961471034245314e-05, "loss": 0.9205, "step": 4146 }, { "epoch": 0.05882206962076924, "grad_norm": 3.125, "learning_rate": 4.9614319499934894e-05, "loss": 0.9451, "step": 4148 }, { "epoch": 0.05885043127439545, "grad_norm": 3.703125, "learning_rate": 4.9613928460820434e-05, "loss": 0.9254, "step": 4150 }, { "epoch": 0.058878792928021666, "grad_norm": 3.328125, "learning_rate": 4.9613537225112893e-05, "loss": 0.9407, "step": 4152 }, { "epoch": 0.058907154581647886, "grad_norm": 3.125, "learning_rate": 4.961314579281538e-05, "loss": 0.8709, "step": 4154 }, { "epoch": 0.0589355162352741, "grad_norm": 3.171875, "learning_rate": 4.9612754163931044e-05, "loss": 0.8752, "step": 4156 }, { "epoch": 0.05896387788890031, "grad_norm": 3.015625, "learning_rate": 4.9612362338462984e-05, "loss": 0.9443, "step": 4158 }, { "epoch": 0.058992239542526526, "grad_norm": 3.25, "learning_rate": 4.961197031641435e-05, "loss": 0.9955, "step": 4160 }, { "epoch": 0.05902060119615274, "grad_norm": 3.15625, "learning_rate": 4.961157809778827e-05, "loss": 0.9343, "step": 4162 }, { "epoch": 0.05904896284977896, "grad_norm": 3.078125, "learning_rate": 4.9611185682587866e-05, "loss": 0.9154, "step": 4164 }, { "epoch": 0.05907732450340517, "grad_norm": 3.53125, "learning_rate": 4.9610793070816295e-05, "loss": 0.8669, "step": 4166 }, { "epoch": 0.059105686157031385, "grad_norm": 3.453125, "learning_rate": 4.961040026247666e-05, "loss": 0.9062, "step": 4168 }, { "epoch": 0.0591340478106576, "grad_norm": 3.015625, "learning_rate": 4.9610007257572135e-05, "loss": 0.9228, "step": 4170 }, { "epoch": 0.05916240946428381, "grad_norm": 2.6875, "learning_rate": 4.960961405610582e-05, "loss": 0.8419, "step": 4172 }, { "epoch": 0.05919077111791003, "grad_norm": 3.390625, "learning_rate": 4.960922065808089e-05, "loss": 0.8924, "step": 4174 }, { "epoch": 0.059219132771536245, "grad_norm": 3.296875, "learning_rate": 4.960882706350047e-05, "loss": 0.9452, "step": 4176 }, { "epoch": 0.05924749442516246, "grad_norm": 3.1875, "learning_rate": 4.9608433272367714e-05, "loss": 0.8585, "step": 4178 }, { "epoch": 0.05927585607878867, "grad_norm": 3.125, "learning_rate": 4.960803928468575e-05, "loss": 0.8603, "step": 4180 }, { "epoch": 0.059304217732414885, "grad_norm": 2.890625, "learning_rate": 4.960764510045774e-05, "loss": 0.898, "step": 4182 }, { "epoch": 0.059332579386041105, "grad_norm": 3.03125, "learning_rate": 4.960725071968683e-05, "loss": 0.8364, "step": 4184 }, { "epoch": 0.05936094103966732, "grad_norm": 3.1875, "learning_rate": 4.9606856142376156e-05, "loss": 0.921, "step": 4186 }, { "epoch": 0.05938930269329353, "grad_norm": 3.03125, "learning_rate": 4.960646136852889e-05, "loss": 0.9831, "step": 4188 }, { "epoch": 0.059417664346919745, "grad_norm": 2.890625, "learning_rate": 4.960606639814817e-05, "loss": 0.8861, "step": 4190 }, { "epoch": 0.059446026000545965, "grad_norm": 3.1875, "learning_rate": 4.960567123123716e-05, "loss": 0.9198, "step": 4192 }, { "epoch": 0.05947438765417218, "grad_norm": 3.25, "learning_rate": 4.960527586779902e-05, "loss": 0.9375, "step": 4194 }, { "epoch": 0.05950274930779839, "grad_norm": 2.984375, "learning_rate": 4.960488030783689e-05, "loss": 0.9685, "step": 4196 }, { "epoch": 0.059531110961424605, "grad_norm": 2.890625, "learning_rate": 4.960448455135394e-05, "loss": 0.9174, "step": 4198 }, { "epoch": 0.05955947261505082, "grad_norm": 3.125, "learning_rate": 4.960408859835333e-05, "loss": 0.9629, "step": 4200 }, { "epoch": 0.05958783426867704, "grad_norm": 3.375, "learning_rate": 4.960369244883823e-05, "loss": 0.9274, "step": 4202 }, { "epoch": 0.05961619592230325, "grad_norm": 2.828125, "learning_rate": 4.960329610281179e-05, "loss": 0.8886, "step": 4204 }, { "epoch": 0.059644557575929465, "grad_norm": 3.078125, "learning_rate": 4.9602899560277197e-05, "loss": 0.8974, "step": 4206 }, { "epoch": 0.05967291922955568, "grad_norm": 2.984375, "learning_rate": 4.9602502821237586e-05, "loss": 0.9225, "step": 4208 }, { "epoch": 0.05970128088318189, "grad_norm": 3.203125, "learning_rate": 4.960210588569616e-05, "loss": 0.9008, "step": 4210 }, { "epoch": 0.05972964253680811, "grad_norm": 2.796875, "learning_rate": 4.960170875365606e-05, "loss": 0.8873, "step": 4212 }, { "epoch": 0.059758004190434325, "grad_norm": 2.9375, "learning_rate": 4.9601311425120475e-05, "loss": 0.9013, "step": 4214 }, { "epoch": 0.05978636584406054, "grad_norm": 3.4375, "learning_rate": 4.960091390009258e-05, "loss": 0.9161, "step": 4216 }, { "epoch": 0.05981472749768675, "grad_norm": 3.0, "learning_rate": 4.960051617857554e-05, "loss": 0.8981, "step": 4218 }, { "epoch": 0.059843089151312964, "grad_norm": 2.828125, "learning_rate": 4.960011826057254e-05, "loss": 0.8686, "step": 4220 }, { "epoch": 0.059871450804939184, "grad_norm": 3.265625, "learning_rate": 4.959972014608675e-05, "loss": 0.8942, "step": 4222 }, { "epoch": 0.0598998124585654, "grad_norm": 3.46875, "learning_rate": 4.9599321835121355e-05, "loss": 0.8953, "step": 4224 }, { "epoch": 0.05992817411219161, "grad_norm": 3.828125, "learning_rate": 4.9598923327679534e-05, "loss": 0.8942, "step": 4226 }, { "epoch": 0.059956535765817824, "grad_norm": 2.765625, "learning_rate": 4.9598524623764476e-05, "loss": 0.9078, "step": 4228 }, { "epoch": 0.05998489741944404, "grad_norm": 2.84375, "learning_rate": 4.959812572337936e-05, "loss": 0.916, "step": 4230 }, { "epoch": 0.06001325907307026, "grad_norm": 3.0, "learning_rate": 4.959772662652737e-05, "loss": 0.9056, "step": 4232 }, { "epoch": 0.06004162072669647, "grad_norm": 3.140625, "learning_rate": 4.9597327333211703e-05, "loss": 0.9418, "step": 4234 }, { "epoch": 0.060069982380322684, "grad_norm": 2.84375, "learning_rate": 4.959692784343554e-05, "loss": 0.9352, "step": 4236 }, { "epoch": 0.0600983440339489, "grad_norm": 3.390625, "learning_rate": 4.959652815720207e-05, "loss": 0.9008, "step": 4238 }, { "epoch": 0.06012670568757512, "grad_norm": 3.328125, "learning_rate": 4.959612827451449e-05, "loss": 0.906, "step": 4240 }, { "epoch": 0.06015506734120133, "grad_norm": 2.953125, "learning_rate": 4.9595728195375996e-05, "loss": 0.9324, "step": 4242 }, { "epoch": 0.060183428994827544, "grad_norm": 3.265625, "learning_rate": 4.959532791978978e-05, "loss": 0.9592, "step": 4244 }, { "epoch": 0.06021179064845376, "grad_norm": 3.0625, "learning_rate": 4.959492744775904e-05, "loss": 0.9036, "step": 4246 }, { "epoch": 0.06024015230207997, "grad_norm": 3.359375, "learning_rate": 4.9594526779286966e-05, "loss": 0.9199, "step": 4248 }, { "epoch": 0.06026851395570619, "grad_norm": 3.375, "learning_rate": 4.959412591437678e-05, "loss": 0.9158, "step": 4250 }, { "epoch": 0.060296875609332404, "grad_norm": 3.328125, "learning_rate": 4.959372485303165e-05, "loss": 0.9309, "step": 4252 }, { "epoch": 0.06032523726295862, "grad_norm": 3.34375, "learning_rate": 4.959332359525482e-05, "loss": 0.9032, "step": 4254 }, { "epoch": 0.06035359891658483, "grad_norm": 3.109375, "learning_rate": 4.959292214104946e-05, "loss": 0.9427, "step": 4256 }, { "epoch": 0.06038196057021104, "grad_norm": 3.21875, "learning_rate": 4.9592520490418806e-05, "loss": 0.9504, "step": 4258 }, { "epoch": 0.060410322223837264, "grad_norm": 2.9375, "learning_rate": 4.9592118643366035e-05, "loss": 0.8932, "step": 4260 }, { "epoch": 0.06043868387746348, "grad_norm": 2.984375, "learning_rate": 4.959171659989438e-05, "loss": 0.9152, "step": 4262 }, { "epoch": 0.06046704553108969, "grad_norm": 2.71875, "learning_rate": 4.959131436000703e-05, "loss": 0.9027, "step": 4264 }, { "epoch": 0.0604954071847159, "grad_norm": 2.890625, "learning_rate": 4.959091192370723e-05, "loss": 0.8978, "step": 4266 }, { "epoch": 0.06052376883834212, "grad_norm": 2.84375, "learning_rate": 4.959050929099817e-05, "loss": 0.8886, "step": 4268 }, { "epoch": 0.06055213049196834, "grad_norm": 3.484375, "learning_rate": 4.9590106461883076e-05, "loss": 0.9047, "step": 4270 }, { "epoch": 0.06058049214559455, "grad_norm": 3.03125, "learning_rate": 4.9589703436365156e-05, "loss": 0.8836, "step": 4272 }, { "epoch": 0.06060885379922076, "grad_norm": 3.515625, "learning_rate": 4.958930021444763e-05, "loss": 0.9398, "step": 4274 }, { "epoch": 0.060637215452846976, "grad_norm": 3.640625, "learning_rate": 4.958889679613373e-05, "loss": 0.9166, "step": 4276 }, { "epoch": 0.06066557710647319, "grad_norm": 3.53125, "learning_rate": 4.9588493181426674e-05, "loss": 0.9233, "step": 4278 }, { "epoch": 0.06069393876009941, "grad_norm": 3.078125, "learning_rate": 4.958808937032968e-05, "loss": 0.8999, "step": 4280 }, { "epoch": 0.06072230041372562, "grad_norm": 3.046875, "learning_rate": 4.958768536284597e-05, "loss": 0.8856, "step": 4282 }, { "epoch": 0.060750662067351836, "grad_norm": 3.0, "learning_rate": 4.958728115897878e-05, "loss": 0.842, "step": 4284 }, { "epoch": 0.06077902372097805, "grad_norm": 3.328125, "learning_rate": 4.958687675873134e-05, "loss": 0.924, "step": 4286 }, { "epoch": 0.06080738537460427, "grad_norm": 2.625, "learning_rate": 4.958647216210688e-05, "loss": 0.8743, "step": 4288 }, { "epoch": 0.06083574702823048, "grad_norm": 3.265625, "learning_rate": 4.958606736910861e-05, "loss": 0.9053, "step": 4290 }, { "epoch": 0.060864108681856696, "grad_norm": 3.015625, "learning_rate": 4.9585662379739796e-05, "loss": 0.9297, "step": 4292 }, { "epoch": 0.06089247033548291, "grad_norm": 3.078125, "learning_rate": 4.958525719400365e-05, "loss": 0.9139, "step": 4294 }, { "epoch": 0.06092083198910912, "grad_norm": 2.859375, "learning_rate": 4.9584851811903414e-05, "loss": 0.8685, "step": 4296 }, { "epoch": 0.06094919364273534, "grad_norm": 3.015625, "learning_rate": 4.9584446233442326e-05, "loss": 0.9324, "step": 4298 }, { "epoch": 0.060977555296361556, "grad_norm": 3.15625, "learning_rate": 4.958404045862363e-05, "loss": 0.899, "step": 4300 }, { "epoch": 0.06100591694998777, "grad_norm": 2.9375, "learning_rate": 4.9583634487450565e-05, "loss": 0.9426, "step": 4302 }, { "epoch": 0.06103427860361398, "grad_norm": 3.0625, "learning_rate": 4.958322831992637e-05, "loss": 0.9031, "step": 4304 }, { "epoch": 0.061062640257240196, "grad_norm": 3.375, "learning_rate": 4.958282195605428e-05, "loss": 0.9681, "step": 4306 }, { "epoch": 0.061091001910866416, "grad_norm": 3.03125, "learning_rate": 4.958241539583757e-05, "loss": 0.9221, "step": 4308 }, { "epoch": 0.06111936356449263, "grad_norm": 3.046875, "learning_rate": 4.958200863927945e-05, "loss": 0.8804, "step": 4310 }, { "epoch": 0.06114772521811884, "grad_norm": 3.0, "learning_rate": 4.95816016863832e-05, "loss": 0.9227, "step": 4312 }, { "epoch": 0.061176086871745056, "grad_norm": 2.890625, "learning_rate": 4.958119453715205e-05, "loss": 0.9391, "step": 4314 }, { "epoch": 0.06120444852537127, "grad_norm": 4.0625, "learning_rate": 4.958078719158927e-05, "loss": 0.9791, "step": 4316 }, { "epoch": 0.06123281017899749, "grad_norm": 3.0, "learning_rate": 4.9580379649698096e-05, "loss": 0.9057, "step": 4318 }, { "epoch": 0.0612611718326237, "grad_norm": 2.859375, "learning_rate": 4.957997191148179e-05, "loss": 0.9394, "step": 4320 }, { "epoch": 0.061289533486249916, "grad_norm": 2.796875, "learning_rate": 4.957956397694361e-05, "loss": 0.8939, "step": 4322 }, { "epoch": 0.06131789513987613, "grad_norm": 3.234375, "learning_rate": 4.957915584608681e-05, "loss": 0.9503, "step": 4324 }, { "epoch": 0.06134625679350234, "grad_norm": 3.15625, "learning_rate": 4.957874751891466e-05, "loss": 0.8942, "step": 4326 }, { "epoch": 0.06137461844712856, "grad_norm": 3.265625, "learning_rate": 4.9578338995430406e-05, "loss": 0.9176, "step": 4328 }, { "epoch": 0.061402980100754775, "grad_norm": 3.859375, "learning_rate": 4.957793027563732e-05, "loss": 0.8954, "step": 4330 }, { "epoch": 0.06143134175438099, "grad_norm": 3.25, "learning_rate": 4.957752135953867e-05, "loss": 0.8904, "step": 4332 }, { "epoch": 0.0614597034080072, "grad_norm": 3.03125, "learning_rate": 4.9577112247137715e-05, "loss": 0.8943, "step": 4334 }, { "epoch": 0.061488065061633415, "grad_norm": 3.71875, "learning_rate": 4.9576702938437724e-05, "loss": 0.96, "step": 4336 }, { "epoch": 0.061516426715259635, "grad_norm": 3.46875, "learning_rate": 4.9576293433441974e-05, "loss": 0.9093, "step": 4338 }, { "epoch": 0.06154478836888585, "grad_norm": 3.4375, "learning_rate": 4.9575883732153725e-05, "loss": 0.937, "step": 4340 }, { "epoch": 0.06157315002251206, "grad_norm": 3.234375, "learning_rate": 4.957547383457625e-05, "loss": 0.9304, "step": 4342 }, { "epoch": 0.061601511676138275, "grad_norm": 3.1875, "learning_rate": 4.957506374071283e-05, "loss": 0.917, "step": 4344 }, { "epoch": 0.061629873329764495, "grad_norm": 3.296875, "learning_rate": 4.957465345056673e-05, "loss": 0.9286, "step": 4346 }, { "epoch": 0.06165823498339071, "grad_norm": 2.90625, "learning_rate": 4.957424296414125e-05, "loss": 0.9119, "step": 4348 }, { "epoch": 0.06168659663701692, "grad_norm": 3.296875, "learning_rate": 4.9573832281439636e-05, "loss": 0.9454, "step": 4350 }, { "epoch": 0.061714958290643135, "grad_norm": 2.890625, "learning_rate": 4.957342140246519e-05, "loss": 0.9039, "step": 4352 }, { "epoch": 0.06174331994426935, "grad_norm": 3.03125, "learning_rate": 4.957301032722119e-05, "loss": 0.8884, "step": 4354 }, { "epoch": 0.06177168159789557, "grad_norm": 2.953125, "learning_rate": 4.957259905571092e-05, "loss": 0.8839, "step": 4356 }, { "epoch": 0.06180004325152178, "grad_norm": 3.28125, "learning_rate": 4.9572187587937655e-05, "loss": 0.913, "step": 4358 }, { "epoch": 0.061828404905147995, "grad_norm": 3.265625, "learning_rate": 4.9571775923904694e-05, "loss": 0.899, "step": 4360 }, { "epoch": 0.06185676655877421, "grad_norm": 2.796875, "learning_rate": 4.957136406361532e-05, "loss": 0.9114, "step": 4362 }, { "epoch": 0.06188512821240042, "grad_norm": 2.921875, "learning_rate": 4.957095200707281e-05, "loss": 0.8548, "step": 4364 }, { "epoch": 0.06191348986602664, "grad_norm": 3.0, "learning_rate": 4.9570539754280484e-05, "loss": 0.9058, "step": 4366 }, { "epoch": 0.061941851519652855, "grad_norm": 2.921875, "learning_rate": 4.9570127305241606e-05, "loss": 0.8778, "step": 4368 }, { "epoch": 0.06197021317327907, "grad_norm": 3.46875, "learning_rate": 4.956971465995949e-05, "loss": 0.9196, "step": 4370 }, { "epoch": 0.06199857482690528, "grad_norm": 3.390625, "learning_rate": 4.956930181843742e-05, "loss": 0.8792, "step": 4372 }, { "epoch": 0.062026936480531494, "grad_norm": 2.96875, "learning_rate": 4.95688887806787e-05, "loss": 0.8845, "step": 4374 }, { "epoch": 0.062055298134157715, "grad_norm": 2.859375, "learning_rate": 4.9568475546686634e-05, "loss": 0.8971, "step": 4376 }, { "epoch": 0.06208365978778393, "grad_norm": 3.125, "learning_rate": 4.956806211646451e-05, "loss": 0.9152, "step": 4378 }, { "epoch": 0.06211202144141014, "grad_norm": 2.96875, "learning_rate": 4.956764849001563e-05, "loss": 0.8933, "step": 4380 }, { "epoch": 0.062140383095036354, "grad_norm": 3.390625, "learning_rate": 4.9567234667343305e-05, "loss": 0.9274, "step": 4382 }, { "epoch": 0.06216874474866257, "grad_norm": 3.3125, "learning_rate": 4.956682064845084e-05, "loss": 0.9549, "step": 4384 }, { "epoch": 0.06219710640228879, "grad_norm": 3.046875, "learning_rate": 4.956640643334154e-05, "loss": 0.8492, "step": 4386 }, { "epoch": 0.062225468055915, "grad_norm": 3.21875, "learning_rate": 4.95659920220187e-05, "loss": 0.9271, "step": 4388 }, { "epoch": 0.062253829709541214, "grad_norm": 2.8125, "learning_rate": 4.956557741448566e-05, "loss": 0.8903, "step": 4390 }, { "epoch": 0.06228219136316743, "grad_norm": 2.90625, "learning_rate": 4.956516261074571e-05, "loss": 0.8773, "step": 4392 }, { "epoch": 0.06231055301679365, "grad_norm": 3.046875, "learning_rate": 4.9564747610802165e-05, "loss": 0.9306, "step": 4394 }, { "epoch": 0.06233891467041986, "grad_norm": 2.984375, "learning_rate": 4.956433241465835e-05, "loss": 0.8718, "step": 4396 }, { "epoch": 0.062367276324046074, "grad_norm": 2.6875, "learning_rate": 4.9563917022317565e-05, "loss": 0.9326, "step": 4398 }, { "epoch": 0.06239563797767229, "grad_norm": 2.96875, "learning_rate": 4.956350143378315e-05, "loss": 0.8871, "step": 4400 }, { "epoch": 0.0624239996312985, "grad_norm": 2.984375, "learning_rate": 4.9563085649058395e-05, "loss": 0.8899, "step": 4402 }, { "epoch": 0.06245236128492472, "grad_norm": 3.109375, "learning_rate": 4.956266966814663e-05, "loss": 0.8899, "step": 4404 }, { "epoch": 0.062480722938550934, "grad_norm": 3.015625, "learning_rate": 4.95622534910512e-05, "loss": 0.8481, "step": 4406 }, { "epoch": 0.06250908459217715, "grad_norm": 3.15625, "learning_rate": 4.956183711777541e-05, "loss": 0.8748, "step": 4408 }, { "epoch": 0.06253744624580336, "grad_norm": 3.09375, "learning_rate": 4.956142054832259e-05, "loss": 0.8775, "step": 4410 }, { "epoch": 0.06256580789942957, "grad_norm": 3.28125, "learning_rate": 4.9561003782696055e-05, "loss": 0.8933, "step": 4412 }, { "epoch": 0.06259416955305579, "grad_norm": 3.125, "learning_rate": 4.956058682089916e-05, "loss": 0.9504, "step": 4414 }, { "epoch": 0.062622531206682, "grad_norm": 4.1875, "learning_rate": 4.956016966293521e-05, "loss": 0.9071, "step": 4416 }, { "epoch": 0.06265089286030823, "grad_norm": 3.125, "learning_rate": 4.955975230880755e-05, "loss": 0.8703, "step": 4418 }, { "epoch": 0.06267925451393444, "grad_norm": 3.078125, "learning_rate": 4.955933475851951e-05, "loss": 0.9153, "step": 4420 }, { "epoch": 0.06270761616756065, "grad_norm": 3.078125, "learning_rate": 4.9558917012074425e-05, "loss": 0.8569, "step": 4422 }, { "epoch": 0.06273597782118687, "grad_norm": 3.515625, "learning_rate": 4.955849906947563e-05, "loss": 0.8946, "step": 4424 }, { "epoch": 0.06276433947481308, "grad_norm": 3.3125, "learning_rate": 4.955808093072647e-05, "loss": 0.9502, "step": 4426 }, { "epoch": 0.0627927011284393, "grad_norm": 2.984375, "learning_rate": 4.9557662595830274e-05, "loss": 0.8673, "step": 4428 }, { "epoch": 0.0628210627820655, "grad_norm": 3.234375, "learning_rate": 4.9557244064790384e-05, "loss": 0.9455, "step": 4430 }, { "epoch": 0.06284942443569172, "grad_norm": 3.0625, "learning_rate": 4.9556825337610156e-05, "loss": 0.8943, "step": 4432 }, { "epoch": 0.06287778608931793, "grad_norm": 3.671875, "learning_rate": 4.955640641429293e-05, "loss": 0.8992, "step": 4434 }, { "epoch": 0.06290614774294415, "grad_norm": 3.0, "learning_rate": 4.955598729484204e-05, "loss": 0.9285, "step": 4436 }, { "epoch": 0.06293450939657037, "grad_norm": 3.046875, "learning_rate": 4.955556797926084e-05, "loss": 0.9537, "step": 4438 }, { "epoch": 0.06296287105019659, "grad_norm": 2.953125, "learning_rate": 4.955514846755269e-05, "loss": 0.9474, "step": 4440 }, { "epoch": 0.0629912327038228, "grad_norm": 3.25, "learning_rate": 4.9554728759720925e-05, "loss": 0.8976, "step": 4442 }, { "epoch": 0.06301959435744901, "grad_norm": 3.296875, "learning_rate": 4.95543088557689e-05, "loss": 0.899, "step": 4444 }, { "epoch": 0.06304795601107523, "grad_norm": 3.375, "learning_rate": 4.955388875569998e-05, "loss": 0.9235, "step": 4446 }, { "epoch": 0.06307631766470144, "grad_norm": 3.8125, "learning_rate": 4.9553468459517513e-05, "loss": 0.9637, "step": 4448 }, { "epoch": 0.06310467931832765, "grad_norm": 3.40625, "learning_rate": 4.9553047967224856e-05, "loss": 0.9181, "step": 4450 }, { "epoch": 0.06313304097195387, "grad_norm": 3.109375, "learning_rate": 4.955262727882536e-05, "loss": 0.8548, "step": 4452 }, { "epoch": 0.06316140262558008, "grad_norm": 3.109375, "learning_rate": 4.955220639432239e-05, "loss": 0.9494, "step": 4454 }, { "epoch": 0.06318976427920629, "grad_norm": 2.953125, "learning_rate": 4.955178531371932e-05, "loss": 0.8628, "step": 4456 }, { "epoch": 0.06321812593283252, "grad_norm": 3.265625, "learning_rate": 4.9551364037019496e-05, "loss": 0.8835, "step": 4458 }, { "epoch": 0.06324648758645873, "grad_norm": 2.9375, "learning_rate": 4.955094256422629e-05, "loss": 0.939, "step": 4460 }, { "epoch": 0.06327484924008495, "grad_norm": 3.515625, "learning_rate": 4.955052089534308e-05, "loss": 0.9424, "step": 4462 }, { "epoch": 0.06330321089371116, "grad_norm": 3.25, "learning_rate": 4.955009903037321e-05, "loss": 0.8945, "step": 4464 }, { "epoch": 0.06333157254733737, "grad_norm": 3.25, "learning_rate": 4.9549676969320055e-05, "loss": 0.9261, "step": 4466 }, { "epoch": 0.06335993420096359, "grad_norm": 2.875, "learning_rate": 4.954925471218701e-05, "loss": 0.9343, "step": 4468 }, { "epoch": 0.0633882958545898, "grad_norm": 3.296875, "learning_rate": 4.954883225897742e-05, "loss": 0.9182, "step": 4470 }, { "epoch": 0.06341665750821601, "grad_norm": 3.359375, "learning_rate": 4.954840960969467e-05, "loss": 0.8781, "step": 4472 }, { "epoch": 0.06344501916184223, "grad_norm": 3.59375, "learning_rate": 4.954798676434214e-05, "loss": 0.9237, "step": 4474 }, { "epoch": 0.06347338081546845, "grad_norm": 3.265625, "learning_rate": 4.95475637229232e-05, "loss": 0.8915, "step": 4476 }, { "epoch": 0.06350174246909467, "grad_norm": 3.234375, "learning_rate": 4.9547140485441224e-05, "loss": 0.9183, "step": 4478 }, { "epoch": 0.06353010412272088, "grad_norm": 3.296875, "learning_rate": 4.9546717051899604e-05, "loss": 0.9093, "step": 4480 }, { "epoch": 0.06355846577634709, "grad_norm": 3.375, "learning_rate": 4.9546293422301724e-05, "loss": 0.8853, "step": 4482 }, { "epoch": 0.0635868274299733, "grad_norm": 2.90625, "learning_rate": 4.9545869596650955e-05, "loss": 0.8812, "step": 4484 }, { "epoch": 0.06361518908359952, "grad_norm": 3.109375, "learning_rate": 4.954544557495069e-05, "loss": 0.869, "step": 4486 }, { "epoch": 0.06364355073722573, "grad_norm": 2.90625, "learning_rate": 4.9545021357204316e-05, "loss": 0.9197, "step": 4488 }, { "epoch": 0.06367191239085195, "grad_norm": 2.59375, "learning_rate": 4.954459694341521e-05, "loss": 0.852, "step": 4490 }, { "epoch": 0.06370027404447816, "grad_norm": 3.203125, "learning_rate": 4.954417233358678e-05, "loss": 0.8924, "step": 4492 }, { "epoch": 0.06372863569810437, "grad_norm": 3.03125, "learning_rate": 4.9543747527722404e-05, "loss": 0.8952, "step": 4494 }, { "epoch": 0.0637569973517306, "grad_norm": 3.0, "learning_rate": 4.954332252582549e-05, "loss": 0.8673, "step": 4496 }, { "epoch": 0.06378535900535681, "grad_norm": 3.125, "learning_rate": 4.954289732789941e-05, "loss": 0.9252, "step": 4498 }, { "epoch": 0.06381372065898303, "grad_norm": 2.953125, "learning_rate": 4.954247193394758e-05, "loss": 0.8773, "step": 4500 }, { "epoch": 0.06384208231260924, "grad_norm": 3.1875, "learning_rate": 4.954204634397338e-05, "loss": 0.9478, "step": 4502 }, { "epoch": 0.06387044396623545, "grad_norm": 3.109375, "learning_rate": 4.954162055798023e-05, "loss": 0.8976, "step": 4504 }, { "epoch": 0.06389880561986166, "grad_norm": 3.03125, "learning_rate": 4.954119457597151e-05, "loss": 0.9254, "step": 4506 }, { "epoch": 0.06392716727348788, "grad_norm": 2.609375, "learning_rate": 4.954076839795064e-05, "loss": 0.8699, "step": 4508 }, { "epoch": 0.06395552892711409, "grad_norm": 2.890625, "learning_rate": 4.954034202392101e-05, "loss": 0.9503, "step": 4510 }, { "epoch": 0.0639838905807403, "grad_norm": 2.953125, "learning_rate": 4.953991545388603e-05, "loss": 0.9343, "step": 4512 }, { "epoch": 0.06401225223436652, "grad_norm": 3.5, "learning_rate": 4.953948868784911e-05, "loss": 0.9285, "step": 4514 }, { "epoch": 0.06404061388799275, "grad_norm": 2.90625, "learning_rate": 4.9539061725813664e-05, "loss": 0.9171, "step": 4516 }, { "epoch": 0.06406897554161896, "grad_norm": 3.265625, "learning_rate": 4.953863456778309e-05, "loss": 0.8988, "step": 4518 }, { "epoch": 0.06409733719524517, "grad_norm": 3.640625, "learning_rate": 4.9538207213760804e-05, "loss": 0.9067, "step": 4520 }, { "epoch": 0.06412569884887138, "grad_norm": 3.15625, "learning_rate": 4.9537779663750225e-05, "loss": 0.8961, "step": 4522 }, { "epoch": 0.0641540605024976, "grad_norm": 2.625, "learning_rate": 4.953735191775476e-05, "loss": 0.9187, "step": 4524 }, { "epoch": 0.06418242215612381, "grad_norm": 2.890625, "learning_rate": 4.953692397577783e-05, "loss": 0.8512, "step": 4526 }, { "epoch": 0.06421078380975002, "grad_norm": 3.0, "learning_rate": 4.953649583782284e-05, "loss": 0.8694, "step": 4528 }, { "epoch": 0.06423914546337624, "grad_norm": 3.21875, "learning_rate": 4.953606750389324e-05, "loss": 0.8567, "step": 4530 }, { "epoch": 0.06426750711700245, "grad_norm": 3.109375, "learning_rate": 4.9535638973992416e-05, "loss": 0.8811, "step": 4532 }, { "epoch": 0.06429586877062868, "grad_norm": 3.15625, "learning_rate": 4.953521024812382e-05, "loss": 0.9173, "step": 4534 }, { "epoch": 0.06432423042425489, "grad_norm": 2.984375, "learning_rate": 4.9534781326290854e-05, "loss": 0.8845, "step": 4536 }, { "epoch": 0.0643525920778811, "grad_norm": 3.265625, "learning_rate": 4.953435220849696e-05, "loss": 0.8952, "step": 4538 }, { "epoch": 0.06438095373150732, "grad_norm": 3.578125, "learning_rate": 4.953392289474555e-05, "loss": 0.9113, "step": 4540 }, { "epoch": 0.06440931538513353, "grad_norm": 3.0, "learning_rate": 4.9533493385040067e-05, "loss": 0.921, "step": 4542 }, { "epoch": 0.06443767703875974, "grad_norm": 3.140625, "learning_rate": 4.953306367938393e-05, "loss": 0.9076, "step": 4544 }, { "epoch": 0.06446603869238596, "grad_norm": 3.140625, "learning_rate": 4.953263377778059e-05, "loss": 0.9131, "step": 4546 }, { "epoch": 0.06449440034601217, "grad_norm": 2.921875, "learning_rate": 4.953220368023346e-05, "loss": 0.9501, "step": 4548 }, { "epoch": 0.06452276199963838, "grad_norm": 3.3125, "learning_rate": 4.9531773386745986e-05, "loss": 0.9396, "step": 4550 }, { "epoch": 0.0645511236532646, "grad_norm": 3.203125, "learning_rate": 4.953134289732159e-05, "loss": 0.9094, "step": 4552 }, { "epoch": 0.06457948530689082, "grad_norm": 3.171875, "learning_rate": 4.953091221196373e-05, "loss": 0.8433, "step": 4554 }, { "epoch": 0.06460784696051704, "grad_norm": 2.84375, "learning_rate": 4.953048133067584e-05, "loss": 0.901, "step": 4556 }, { "epoch": 0.06463620861414325, "grad_norm": 3.359375, "learning_rate": 4.953005025346136e-05, "loss": 0.9377, "step": 4558 }, { "epoch": 0.06466457026776946, "grad_norm": 3.234375, "learning_rate": 4.952961898032373e-05, "loss": 0.8444, "step": 4560 }, { "epoch": 0.06469293192139568, "grad_norm": 3.15625, "learning_rate": 4.9529187511266395e-05, "loss": 0.9473, "step": 4562 }, { "epoch": 0.06472129357502189, "grad_norm": 2.921875, "learning_rate": 4.9528755846292806e-05, "loss": 0.8601, "step": 4564 }, { "epoch": 0.0647496552286481, "grad_norm": 3.09375, "learning_rate": 4.952832398540641e-05, "loss": 0.9027, "step": 4566 }, { "epoch": 0.06477801688227432, "grad_norm": 3.03125, "learning_rate": 4.952789192861065e-05, "loss": 0.8995, "step": 4568 }, { "epoch": 0.06480637853590053, "grad_norm": 3.15625, "learning_rate": 4.9527459675908986e-05, "loss": 0.8699, "step": 4570 }, { "epoch": 0.06483474018952676, "grad_norm": 3.1875, "learning_rate": 4.952702722730486e-05, "loss": 0.9362, "step": 4572 }, { "epoch": 0.06486310184315297, "grad_norm": 3.1875, "learning_rate": 4.9526594582801736e-05, "loss": 0.9343, "step": 4574 }, { "epoch": 0.06489146349677918, "grad_norm": 3.0, "learning_rate": 4.952616174240306e-05, "loss": 0.862, "step": 4576 }, { "epoch": 0.0649198251504054, "grad_norm": 3.1875, "learning_rate": 4.9525728706112295e-05, "loss": 0.8915, "step": 4578 }, { "epoch": 0.06494818680403161, "grad_norm": 2.78125, "learning_rate": 4.95252954739329e-05, "loss": 0.8781, "step": 4580 }, { "epoch": 0.06497654845765782, "grad_norm": 3.53125, "learning_rate": 4.952486204586834e-05, "loss": 0.8758, "step": 4582 }, { "epoch": 0.06500491011128404, "grad_norm": 3.171875, "learning_rate": 4.9524428421922066e-05, "loss": 0.9401, "step": 4584 }, { "epoch": 0.06503327176491025, "grad_norm": 3.5, "learning_rate": 4.952399460209755e-05, "loss": 0.8709, "step": 4586 }, { "epoch": 0.06506163341853646, "grad_norm": 2.859375, "learning_rate": 4.952356058639825e-05, "loss": 0.8392, "step": 4588 }, { "epoch": 0.06508999507216268, "grad_norm": 3.15625, "learning_rate": 4.9523126374827635e-05, "loss": 0.8782, "step": 4590 }, { "epoch": 0.0651183567257889, "grad_norm": 3.0, "learning_rate": 4.9522691967389175e-05, "loss": 0.9035, "step": 4592 }, { "epoch": 0.06514671837941512, "grad_norm": 3.09375, "learning_rate": 4.952225736408634e-05, "loss": 0.8941, "step": 4594 }, { "epoch": 0.06517508003304133, "grad_norm": 2.671875, "learning_rate": 4.952182256492259e-05, "loss": 0.8849, "step": 4596 }, { "epoch": 0.06520344168666754, "grad_norm": 3.359375, "learning_rate": 4.952138756990142e-05, "loss": 0.9264, "step": 4598 }, { "epoch": 0.06523180334029376, "grad_norm": 3.59375, "learning_rate": 4.952095237902629e-05, "loss": 0.9522, "step": 4600 }, { "epoch": 0.06526016499391997, "grad_norm": 3.4375, "learning_rate": 4.9520516992300675e-05, "loss": 0.8927, "step": 4602 }, { "epoch": 0.06528852664754618, "grad_norm": 3.046875, "learning_rate": 4.952008140972806e-05, "loss": 0.9275, "step": 4604 }, { "epoch": 0.0653168883011724, "grad_norm": 3.171875, "learning_rate": 4.951964563131191e-05, "loss": 0.8411, "step": 4606 }, { "epoch": 0.06534524995479861, "grad_norm": 3.09375, "learning_rate": 4.951920965705572e-05, "loss": 0.9227, "step": 4608 }, { "epoch": 0.06537361160842482, "grad_norm": 3.53125, "learning_rate": 4.9518773486962965e-05, "loss": 0.9514, "step": 4610 }, { "epoch": 0.06540197326205105, "grad_norm": 3.25, "learning_rate": 4.951833712103714e-05, "loss": 0.8834, "step": 4612 }, { "epoch": 0.06543033491567726, "grad_norm": 3.21875, "learning_rate": 4.951790055928171e-05, "loss": 0.9361, "step": 4614 }, { "epoch": 0.06545869656930348, "grad_norm": 3.515625, "learning_rate": 4.9517463801700185e-05, "loss": 0.8768, "step": 4616 }, { "epoch": 0.06548705822292969, "grad_norm": 3.140625, "learning_rate": 4.951702684829602e-05, "loss": 0.8992, "step": 4618 }, { "epoch": 0.0655154198765559, "grad_norm": 3.3125, "learning_rate": 4.951658969907275e-05, "loss": 0.9516, "step": 4620 }, { "epoch": 0.06554378153018212, "grad_norm": 2.90625, "learning_rate": 4.9516152354033826e-05, "loss": 0.8974, "step": 4622 }, { "epoch": 0.06557214318380833, "grad_norm": 3.078125, "learning_rate": 4.951571481318277e-05, "loss": 0.8681, "step": 4624 }, { "epoch": 0.06560050483743454, "grad_norm": 3.15625, "learning_rate": 4.951527707652305e-05, "loss": 0.8831, "step": 4626 }, { "epoch": 0.06562886649106076, "grad_norm": 2.984375, "learning_rate": 4.951483914405819e-05, "loss": 0.8918, "step": 4628 }, { "epoch": 0.06565722814468698, "grad_norm": 3.0, "learning_rate": 4.9514401015791666e-05, "loss": 0.9445, "step": 4630 }, { "epoch": 0.0656855897983132, "grad_norm": 3.28125, "learning_rate": 4.9513962691726986e-05, "loss": 0.9211, "step": 4632 }, { "epoch": 0.06571395145193941, "grad_norm": 3.515625, "learning_rate": 4.9513524171867666e-05, "loss": 0.965, "step": 4634 }, { "epoch": 0.06574231310556562, "grad_norm": 3.375, "learning_rate": 4.951308545621718e-05, "loss": 0.9273, "step": 4636 }, { "epoch": 0.06577067475919184, "grad_norm": 3.328125, "learning_rate": 4.951264654477905e-05, "loss": 0.8892, "step": 4638 }, { "epoch": 0.06579903641281805, "grad_norm": 3.09375, "learning_rate": 4.9512207437556766e-05, "loss": 0.8915, "step": 4640 }, { "epoch": 0.06582739806644426, "grad_norm": 3.140625, "learning_rate": 4.951176813455386e-05, "loss": 0.9026, "step": 4642 }, { "epoch": 0.06585575972007048, "grad_norm": 3.25, "learning_rate": 4.951132863577382e-05, "loss": 0.9051, "step": 4644 }, { "epoch": 0.06588412137369669, "grad_norm": 2.671875, "learning_rate": 4.9510888941220166e-05, "loss": 0.8719, "step": 4646 }, { "epoch": 0.0659124830273229, "grad_norm": 3.265625, "learning_rate": 4.951044905089641e-05, "loss": 0.8735, "step": 4648 }, { "epoch": 0.06594084468094913, "grad_norm": 3.53125, "learning_rate": 4.9510008964806055e-05, "loss": 0.9887, "step": 4650 }, { "epoch": 0.06596920633457534, "grad_norm": 2.90625, "learning_rate": 4.9509568682952627e-05, "loss": 0.8866, "step": 4652 }, { "epoch": 0.06599756798820156, "grad_norm": 2.9375, "learning_rate": 4.950912820533964e-05, "loss": 0.9101, "step": 4654 }, { "epoch": 0.06602592964182777, "grad_norm": 2.90625, "learning_rate": 4.950868753197061e-05, "loss": 0.9354, "step": 4656 }, { "epoch": 0.06605429129545398, "grad_norm": 3.828125, "learning_rate": 4.9508246662849055e-05, "loss": 0.9042, "step": 4658 }, { "epoch": 0.0660826529490802, "grad_norm": 3.21875, "learning_rate": 4.950780559797851e-05, "loss": 0.8734, "step": 4660 }, { "epoch": 0.06611101460270641, "grad_norm": 3.46875, "learning_rate": 4.950736433736248e-05, "loss": 0.9635, "step": 4662 }, { "epoch": 0.06613937625633262, "grad_norm": 2.71875, "learning_rate": 4.950692288100449e-05, "loss": 0.8971, "step": 4664 }, { "epoch": 0.06616773790995883, "grad_norm": 3.296875, "learning_rate": 4.9506481228908085e-05, "loss": 0.9066, "step": 4666 }, { "epoch": 0.06619609956358505, "grad_norm": 3.09375, "learning_rate": 4.950603938107677e-05, "loss": 0.9691, "step": 4668 }, { "epoch": 0.06622446121721128, "grad_norm": 3.296875, "learning_rate": 4.950559733751409e-05, "loss": 0.8895, "step": 4670 }, { "epoch": 0.06625282287083749, "grad_norm": 3.21875, "learning_rate": 4.9505155098223565e-05, "loss": 0.8744, "step": 4672 }, { "epoch": 0.0662811845244637, "grad_norm": 3.796875, "learning_rate": 4.950471266320873e-05, "loss": 0.931, "step": 4674 }, { "epoch": 0.06630954617808991, "grad_norm": 3.4375, "learning_rate": 4.9504270032473127e-05, "loss": 0.9097, "step": 4676 }, { "epoch": 0.06633790783171613, "grad_norm": 3.46875, "learning_rate": 4.950382720602028e-05, "loss": 0.9375, "step": 4678 }, { "epoch": 0.06636626948534234, "grad_norm": 2.78125, "learning_rate": 4.950338418385373e-05, "loss": 0.8558, "step": 4680 }, { "epoch": 0.06639463113896855, "grad_norm": 2.96875, "learning_rate": 4.9502940965977026e-05, "loss": 0.8984, "step": 4682 }, { "epoch": 0.06642299279259477, "grad_norm": 3.015625, "learning_rate": 4.950249755239369e-05, "loss": 0.8891, "step": 4684 }, { "epoch": 0.06645135444622098, "grad_norm": 3.453125, "learning_rate": 4.950205394310727e-05, "loss": 0.9366, "step": 4686 }, { "epoch": 0.06647971609984721, "grad_norm": 3.234375, "learning_rate": 4.9501610138121314e-05, "loss": 0.8813, "step": 4688 }, { "epoch": 0.06650807775347342, "grad_norm": 2.96875, "learning_rate": 4.950116613743936e-05, "loss": 0.9062, "step": 4690 }, { "epoch": 0.06653643940709963, "grad_norm": 2.875, "learning_rate": 4.9500721941064964e-05, "loss": 0.862, "step": 4692 }, { "epoch": 0.06656480106072585, "grad_norm": 3.109375, "learning_rate": 4.950027754900166e-05, "loss": 0.9169, "step": 4694 }, { "epoch": 0.06659316271435206, "grad_norm": 3.109375, "learning_rate": 4.9499832961253013e-05, "loss": 0.9028, "step": 4696 }, { "epoch": 0.06662152436797827, "grad_norm": 3.21875, "learning_rate": 4.949938817782256e-05, "loss": 0.8751, "step": 4698 }, { "epoch": 0.06664988602160449, "grad_norm": 3.234375, "learning_rate": 4.949894319871387e-05, "loss": 0.8997, "step": 4700 }, { "epoch": 0.0666782476752307, "grad_norm": 3.46875, "learning_rate": 4.949849802393047e-05, "loss": 0.909, "step": 4702 }, { "epoch": 0.06670660932885691, "grad_norm": 2.953125, "learning_rate": 4.9498052653475946e-05, "loss": 0.8715, "step": 4704 }, { "epoch": 0.06673497098248313, "grad_norm": 3.125, "learning_rate": 4.9497607087353835e-05, "loss": 0.9047, "step": 4706 }, { "epoch": 0.06676333263610935, "grad_norm": 3.125, "learning_rate": 4.94971613255677e-05, "loss": 0.9103, "step": 4708 }, { "epoch": 0.06679169428973557, "grad_norm": 3.21875, "learning_rate": 4.949671536812111e-05, "loss": 0.9101, "step": 4710 }, { "epoch": 0.06682005594336178, "grad_norm": 3.125, "learning_rate": 4.9496269215017624e-05, "loss": 0.8696, "step": 4712 }, { "epoch": 0.066848417596988, "grad_norm": 2.921875, "learning_rate": 4.9495822866260786e-05, "loss": 0.9196, "step": 4714 }, { "epoch": 0.06687677925061421, "grad_norm": 3.296875, "learning_rate": 4.949537632185419e-05, "loss": 0.9438, "step": 4716 }, { "epoch": 0.06690514090424042, "grad_norm": 3.4375, "learning_rate": 4.9494929581801384e-05, "loss": 0.9332, "step": 4718 }, { "epoch": 0.06693350255786663, "grad_norm": 2.859375, "learning_rate": 4.949448264610595e-05, "loss": 0.86, "step": 4720 }, { "epoch": 0.06696186421149285, "grad_norm": 2.96875, "learning_rate": 4.949403551477144e-05, "loss": 0.9432, "step": 4722 }, { "epoch": 0.06699022586511906, "grad_norm": 3.5, "learning_rate": 4.949358818780143e-05, "loss": 0.8539, "step": 4724 }, { "epoch": 0.06701858751874529, "grad_norm": 3.1875, "learning_rate": 4.949314066519951e-05, "loss": 0.8974, "step": 4726 }, { "epoch": 0.0670469491723715, "grad_norm": 3.3125, "learning_rate": 4.9492692946969234e-05, "loss": 0.9282, "step": 4728 }, { "epoch": 0.06707531082599771, "grad_norm": 3.203125, "learning_rate": 4.949224503311419e-05, "loss": 0.9234, "step": 4730 }, { "epoch": 0.06710367247962393, "grad_norm": 3.015625, "learning_rate": 4.9491796923637945e-05, "loss": 0.923, "step": 4732 }, { "epoch": 0.06713203413325014, "grad_norm": 3.21875, "learning_rate": 4.9491348618544085e-05, "loss": 0.9557, "step": 4734 }, { "epoch": 0.06716039578687635, "grad_norm": 3.1875, "learning_rate": 4.94909001178362e-05, "loss": 0.8622, "step": 4736 }, { "epoch": 0.06718875744050257, "grad_norm": 3.09375, "learning_rate": 4.949045142151785e-05, "loss": 0.8679, "step": 4738 }, { "epoch": 0.06721711909412878, "grad_norm": 3.140625, "learning_rate": 4.949000252959264e-05, "loss": 0.9219, "step": 4740 }, { "epoch": 0.067245480747755, "grad_norm": 3.03125, "learning_rate": 4.948955344206414e-05, "loss": 0.9315, "step": 4742 }, { "epoch": 0.0672738424013812, "grad_norm": 3.15625, "learning_rate": 4.948910415893595e-05, "loss": 0.9132, "step": 4744 }, { "epoch": 0.06730220405500743, "grad_norm": 3.34375, "learning_rate": 4.9488654680211645e-05, "loss": 0.9293, "step": 4746 }, { "epoch": 0.06733056570863365, "grad_norm": 3.140625, "learning_rate": 4.9488205005894826e-05, "loss": 0.9094, "step": 4748 }, { "epoch": 0.06735892736225986, "grad_norm": 3.078125, "learning_rate": 4.948775513598908e-05, "loss": 0.8796, "step": 4750 }, { "epoch": 0.06738728901588607, "grad_norm": 3.046875, "learning_rate": 4.9487305070498e-05, "loss": 0.8686, "step": 4752 }, { "epoch": 0.06741565066951229, "grad_norm": 3.03125, "learning_rate": 4.9486854809425175e-05, "loss": 0.8891, "step": 4754 }, { "epoch": 0.0674440123231385, "grad_norm": 3.078125, "learning_rate": 4.948640435277422e-05, "loss": 0.9095, "step": 4756 }, { "epoch": 0.06747237397676471, "grad_norm": 3.234375, "learning_rate": 4.948595370054871e-05, "loss": 0.9015, "step": 4758 }, { "epoch": 0.06750073563039093, "grad_norm": 3.109375, "learning_rate": 4.9485502852752254e-05, "loss": 0.8796, "step": 4760 }, { "epoch": 0.06752909728401714, "grad_norm": 3.359375, "learning_rate": 4.948505180938846e-05, "loss": 0.8553, "step": 4762 }, { "epoch": 0.06755745893764335, "grad_norm": 3.09375, "learning_rate": 4.948460057046092e-05, "loss": 0.9223, "step": 4764 }, { "epoch": 0.06758582059126958, "grad_norm": 3.078125, "learning_rate": 4.948414913597325e-05, "loss": 0.9168, "step": 4766 }, { "epoch": 0.06761418224489579, "grad_norm": 3.25, "learning_rate": 4.948369750592903e-05, "loss": 0.9121, "step": 4768 }, { "epoch": 0.067642543898522, "grad_norm": 3.109375, "learning_rate": 4.94832456803319e-05, "loss": 0.9086, "step": 4770 }, { "epoch": 0.06767090555214822, "grad_norm": 3.109375, "learning_rate": 4.948279365918544e-05, "loss": 0.9111, "step": 4772 }, { "epoch": 0.06769926720577443, "grad_norm": 3.296875, "learning_rate": 4.948234144249329e-05, "loss": 0.873, "step": 4774 }, { "epoch": 0.06772762885940065, "grad_norm": 3.421875, "learning_rate": 4.948188903025904e-05, "loss": 0.8919, "step": 4776 }, { "epoch": 0.06775599051302686, "grad_norm": 3.234375, "learning_rate": 4.948143642248631e-05, "loss": 0.8923, "step": 4778 }, { "epoch": 0.06778435216665307, "grad_norm": 3.5625, "learning_rate": 4.948098361917871e-05, "loss": 0.9039, "step": 4780 }, { "epoch": 0.06781271382027929, "grad_norm": 2.984375, "learning_rate": 4.948053062033986e-05, "loss": 0.9143, "step": 4782 }, { "epoch": 0.06784107547390551, "grad_norm": 3.15625, "learning_rate": 4.9480077425973396e-05, "loss": 0.9118, "step": 4784 }, { "epoch": 0.06786943712753173, "grad_norm": 3.484375, "learning_rate": 4.94796240360829e-05, "loss": 0.8754, "step": 4786 }, { "epoch": 0.06789779878115794, "grad_norm": 2.796875, "learning_rate": 4.947917045067202e-05, "loss": 0.89, "step": 4788 }, { "epoch": 0.06792616043478415, "grad_norm": 3.09375, "learning_rate": 4.947871666974437e-05, "loss": 0.9133, "step": 4790 }, { "epoch": 0.06795452208841037, "grad_norm": 3.375, "learning_rate": 4.947826269330359e-05, "loss": 0.8694, "step": 4792 }, { "epoch": 0.06798288374203658, "grad_norm": 3.140625, "learning_rate": 4.947780852135328e-05, "loss": 0.9042, "step": 4794 }, { "epoch": 0.06801124539566279, "grad_norm": 3.359375, "learning_rate": 4.9477354153897093e-05, "loss": 0.9627, "step": 4796 }, { "epoch": 0.068039607049289, "grad_norm": 3.078125, "learning_rate": 4.947689959093864e-05, "loss": 0.9197, "step": 4798 }, { "epoch": 0.06806796870291522, "grad_norm": 2.828125, "learning_rate": 4.947644483248155e-05, "loss": 0.8959, "step": 4800 }, { "epoch": 0.06809633035654143, "grad_norm": 3.4375, "learning_rate": 4.947598987852947e-05, "loss": 0.899, "step": 4802 }, { "epoch": 0.06812469201016766, "grad_norm": 3.359375, "learning_rate": 4.9475534729086026e-05, "loss": 0.9141, "step": 4804 }, { "epoch": 0.06815305366379387, "grad_norm": 2.953125, "learning_rate": 4.947507938415485e-05, "loss": 0.8935, "step": 4806 }, { "epoch": 0.06818141531742009, "grad_norm": 3.1875, "learning_rate": 4.947462384373959e-05, "loss": 0.8559, "step": 4808 }, { "epoch": 0.0682097769710463, "grad_norm": 3.140625, "learning_rate": 4.947416810784386e-05, "loss": 0.9231, "step": 4810 }, { "epoch": 0.06823813862467251, "grad_norm": 3.078125, "learning_rate": 4.947371217647133e-05, "loss": 0.9026, "step": 4812 }, { "epoch": 0.06826650027829873, "grad_norm": 3.328125, "learning_rate": 4.9473256049625627e-05, "loss": 0.8978, "step": 4814 }, { "epoch": 0.06829486193192494, "grad_norm": 3.125, "learning_rate": 4.94727997273104e-05, "loss": 0.9079, "step": 4816 }, { "epoch": 0.06832322358555115, "grad_norm": 3.09375, "learning_rate": 4.947234320952928e-05, "loss": 0.9053, "step": 4818 }, { "epoch": 0.06835158523917736, "grad_norm": 2.8125, "learning_rate": 4.947188649628593e-05, "loss": 0.9019, "step": 4820 }, { "epoch": 0.06837994689280358, "grad_norm": 3.015625, "learning_rate": 4.9471429587583985e-05, "loss": 0.9115, "step": 4822 }, { "epoch": 0.0684083085464298, "grad_norm": 3.578125, "learning_rate": 4.9470972483427105e-05, "loss": 0.8846, "step": 4824 }, { "epoch": 0.06843667020005602, "grad_norm": 2.9375, "learning_rate": 4.947051518381893e-05, "loss": 0.8998, "step": 4826 }, { "epoch": 0.06846503185368223, "grad_norm": 2.96875, "learning_rate": 4.947005768876312e-05, "loss": 0.8873, "step": 4828 }, { "epoch": 0.06849339350730844, "grad_norm": 3.1875, "learning_rate": 4.946959999826333e-05, "loss": 0.9077, "step": 4830 }, { "epoch": 0.06852175516093466, "grad_norm": 2.890625, "learning_rate": 4.946914211232321e-05, "loss": 0.8898, "step": 4832 }, { "epoch": 0.06855011681456087, "grad_norm": 2.96875, "learning_rate": 4.946868403094642e-05, "loss": 0.8703, "step": 4834 }, { "epoch": 0.06857847846818708, "grad_norm": 4.125, "learning_rate": 4.946822575413662e-05, "loss": 0.9202, "step": 4836 }, { "epoch": 0.0686068401218133, "grad_norm": 3.046875, "learning_rate": 4.946776728189746e-05, "loss": 0.9301, "step": 4838 }, { "epoch": 0.06863520177543951, "grad_norm": 3.21875, "learning_rate": 4.946730861423262e-05, "loss": 0.9239, "step": 4840 }, { "epoch": 0.06866356342906574, "grad_norm": 3.0, "learning_rate": 4.9466849751145754e-05, "loss": 0.8734, "step": 4842 }, { "epoch": 0.06869192508269195, "grad_norm": 3.734375, "learning_rate": 4.946639069264053e-05, "loss": 0.9039, "step": 4844 }, { "epoch": 0.06872028673631816, "grad_norm": 3.390625, "learning_rate": 4.94659314387206e-05, "loss": 0.9275, "step": 4846 }, { "epoch": 0.06874864838994438, "grad_norm": 2.828125, "learning_rate": 4.946547198938966e-05, "loss": 0.9069, "step": 4848 }, { "epoch": 0.06877701004357059, "grad_norm": 3.46875, "learning_rate": 4.946501234465135e-05, "loss": 0.8998, "step": 4850 }, { "epoch": 0.0688053716971968, "grad_norm": 3.265625, "learning_rate": 4.9464552504509353e-05, "loss": 0.8747, "step": 4852 }, { "epoch": 0.06883373335082302, "grad_norm": 3.5, "learning_rate": 4.946409246896735e-05, "loss": 0.9337, "step": 4854 }, { "epoch": 0.06886209500444923, "grad_norm": 3.296875, "learning_rate": 4.946363223802901e-05, "loss": 0.8646, "step": 4856 }, { "epoch": 0.06889045665807544, "grad_norm": 3.1875, "learning_rate": 4.9463171811698004e-05, "loss": 0.9242, "step": 4858 }, { "epoch": 0.06891881831170166, "grad_norm": 3.640625, "learning_rate": 4.946271118997802e-05, "loss": 0.9542, "step": 4860 }, { "epoch": 0.06894717996532788, "grad_norm": 3.328125, "learning_rate": 4.946225037287272e-05, "loss": 0.8801, "step": 4862 }, { "epoch": 0.0689755416189541, "grad_norm": 3.171875, "learning_rate": 4.9461789360385805e-05, "loss": 0.9439, "step": 4864 }, { "epoch": 0.06900390327258031, "grad_norm": 3.328125, "learning_rate": 4.946132815252094e-05, "loss": 0.9156, "step": 4866 }, { "epoch": 0.06903226492620652, "grad_norm": 3.484375, "learning_rate": 4.946086674928181e-05, "loss": 0.9218, "step": 4868 }, { "epoch": 0.06906062657983274, "grad_norm": 3.171875, "learning_rate": 4.946040515067211e-05, "loss": 0.8954, "step": 4870 }, { "epoch": 0.06908898823345895, "grad_norm": 3.515625, "learning_rate": 4.945994335669552e-05, "loss": 0.8171, "step": 4872 }, { "epoch": 0.06911734988708516, "grad_norm": 3.234375, "learning_rate": 4.945948136735574e-05, "loss": 0.9116, "step": 4874 }, { "epoch": 0.06914571154071138, "grad_norm": 3.34375, "learning_rate": 4.945901918265645e-05, "loss": 0.9184, "step": 4876 }, { "epoch": 0.06917407319433759, "grad_norm": 3.140625, "learning_rate": 4.945855680260133e-05, "loss": 0.8684, "step": 4878 }, { "epoch": 0.06920243484796382, "grad_norm": 2.578125, "learning_rate": 4.94580942271941e-05, "loss": 0.791, "step": 4880 }, { "epoch": 0.06923079650159003, "grad_norm": 3.140625, "learning_rate": 4.945763145643844e-05, "loss": 0.8931, "step": 4882 }, { "epoch": 0.06925915815521624, "grad_norm": 2.828125, "learning_rate": 4.945716849033803e-05, "loss": 0.9043, "step": 4884 }, { "epoch": 0.06928751980884246, "grad_norm": 3.3125, "learning_rate": 4.94567053288966e-05, "loss": 0.8478, "step": 4886 }, { "epoch": 0.06931588146246867, "grad_norm": 2.859375, "learning_rate": 4.945624197211782e-05, "loss": 0.8988, "step": 4888 }, { "epoch": 0.06934424311609488, "grad_norm": 2.984375, "learning_rate": 4.9455778420005415e-05, "loss": 0.8681, "step": 4890 }, { "epoch": 0.0693726047697211, "grad_norm": 3.0625, "learning_rate": 4.945531467256307e-05, "loss": 0.9221, "step": 4892 }, { "epoch": 0.06940096642334731, "grad_norm": 3.078125, "learning_rate": 4.9454850729794503e-05, "loss": 0.9234, "step": 4894 }, { "epoch": 0.06942932807697352, "grad_norm": 3.28125, "learning_rate": 4.9454386591703404e-05, "loss": 0.8899, "step": 4896 }, { "epoch": 0.06945768973059974, "grad_norm": 3.125, "learning_rate": 4.945392225829349e-05, "loss": 0.9384, "step": 4898 }, { "epoch": 0.06948605138422596, "grad_norm": 3.03125, "learning_rate": 4.945345772956848e-05, "loss": 0.912, "step": 4900 }, { "epoch": 0.06951441303785218, "grad_norm": 2.828125, "learning_rate": 4.945299300553206e-05, "loss": 0.9086, "step": 4902 }, { "epoch": 0.06954277469147839, "grad_norm": 2.65625, "learning_rate": 4.945252808618795e-05, "loss": 0.8864, "step": 4904 }, { "epoch": 0.0695711363451046, "grad_norm": 3.140625, "learning_rate": 4.9452062971539875e-05, "loss": 0.9153, "step": 4906 }, { "epoch": 0.06959949799873082, "grad_norm": 3.0625, "learning_rate": 4.945159766159154e-05, "loss": 0.8683, "step": 4908 }, { "epoch": 0.06962785965235703, "grad_norm": 2.90625, "learning_rate": 4.945113215634666e-05, "loss": 0.8259, "step": 4910 }, { "epoch": 0.06965622130598324, "grad_norm": 3.328125, "learning_rate": 4.9450666455808965e-05, "loss": 0.919, "step": 4912 }, { "epoch": 0.06968458295960946, "grad_norm": 3.0625, "learning_rate": 4.9450200559982165e-05, "loss": 0.8744, "step": 4914 }, { "epoch": 0.06971294461323567, "grad_norm": 3.328125, "learning_rate": 4.9449734468869973e-05, "loss": 0.8782, "step": 4916 }, { "epoch": 0.06974130626686188, "grad_norm": 2.90625, "learning_rate": 4.9449268182476125e-05, "loss": 0.9095, "step": 4918 }, { "epoch": 0.06976966792048811, "grad_norm": 3.28125, "learning_rate": 4.9448801700804346e-05, "loss": 0.884, "step": 4920 }, { "epoch": 0.06979802957411432, "grad_norm": 2.890625, "learning_rate": 4.944833502385835e-05, "loss": 0.8555, "step": 4922 }, { "epoch": 0.06982639122774054, "grad_norm": 3.140625, "learning_rate": 4.944786815164187e-05, "loss": 0.8914, "step": 4924 }, { "epoch": 0.06985475288136675, "grad_norm": 3.296875, "learning_rate": 4.944740108415865e-05, "loss": 0.8677, "step": 4926 }, { "epoch": 0.06988311453499296, "grad_norm": 3.421875, "learning_rate": 4.94469338214124e-05, "loss": 0.9808, "step": 4928 }, { "epoch": 0.06991147618861918, "grad_norm": 3.6875, "learning_rate": 4.944646636340685e-05, "loss": 0.924, "step": 4930 }, { "epoch": 0.06993983784224539, "grad_norm": 3.1875, "learning_rate": 4.944599871014576e-05, "loss": 0.9308, "step": 4932 }, { "epoch": 0.0699681994958716, "grad_norm": 2.953125, "learning_rate": 4.9445530861632836e-05, "loss": 0.9063, "step": 4934 }, { "epoch": 0.06999656114949782, "grad_norm": 2.75, "learning_rate": 4.944506281787182e-05, "loss": 0.8603, "step": 4936 }, { "epoch": 0.07002492280312404, "grad_norm": 2.765625, "learning_rate": 4.9444594578866464e-05, "loss": 0.8926, "step": 4938 }, { "epoch": 0.07005328445675026, "grad_norm": 3.09375, "learning_rate": 4.944412614462051e-05, "loss": 0.9773, "step": 4940 }, { "epoch": 0.07008164611037647, "grad_norm": 2.96875, "learning_rate": 4.9443657515137674e-05, "loss": 0.8809, "step": 4942 }, { "epoch": 0.07011000776400268, "grad_norm": 3.125, "learning_rate": 4.944318869042173e-05, "loss": 0.8797, "step": 4944 }, { "epoch": 0.0701383694176289, "grad_norm": 3.03125, "learning_rate": 4.9442719670476396e-05, "loss": 0.851, "step": 4946 }, { "epoch": 0.07016673107125511, "grad_norm": 3.015625, "learning_rate": 4.944225045530543e-05, "loss": 0.9138, "step": 4948 }, { "epoch": 0.07019509272488132, "grad_norm": 3.171875, "learning_rate": 4.944178104491258e-05, "loss": 0.8986, "step": 4950 }, { "epoch": 0.07022345437850754, "grad_norm": 3.140625, "learning_rate": 4.944131143930161e-05, "loss": 0.9003, "step": 4952 }, { "epoch": 0.07025181603213375, "grad_norm": 3.0, "learning_rate": 4.944084163847624e-05, "loss": 0.9021, "step": 4954 }, { "epoch": 0.07028017768575996, "grad_norm": 3.09375, "learning_rate": 4.944037164244024e-05, "loss": 0.9215, "step": 4956 }, { "epoch": 0.07030853933938619, "grad_norm": 3.0625, "learning_rate": 4.943990145119736e-05, "loss": 0.9127, "step": 4958 }, { "epoch": 0.0703369009930124, "grad_norm": 3.109375, "learning_rate": 4.943943106475136e-05, "loss": 0.8826, "step": 4960 }, { "epoch": 0.07036526264663862, "grad_norm": 3.3125, "learning_rate": 4.943896048310599e-05, "loss": 0.8451, "step": 4962 }, { "epoch": 0.07039362430026483, "grad_norm": 3.015625, "learning_rate": 4.943848970626502e-05, "loss": 0.9363, "step": 4964 }, { "epoch": 0.07042198595389104, "grad_norm": 3.140625, "learning_rate": 4.943801873423219e-05, "loss": 0.9009, "step": 4966 }, { "epoch": 0.07045034760751726, "grad_norm": 3.21875, "learning_rate": 4.9437547567011286e-05, "loss": 0.9169, "step": 4968 }, { "epoch": 0.07047870926114347, "grad_norm": 3.015625, "learning_rate": 4.9437076204606056e-05, "loss": 0.8814, "step": 4970 }, { "epoch": 0.07050707091476968, "grad_norm": 3.296875, "learning_rate": 4.943660464702027e-05, "loss": 0.8545, "step": 4972 }, { "epoch": 0.0705354325683959, "grad_norm": 3.59375, "learning_rate": 4.943613289425769e-05, "loss": 0.9296, "step": 4974 }, { "epoch": 0.07056379422202211, "grad_norm": 3.40625, "learning_rate": 4.943566094632209e-05, "loss": 0.8731, "step": 4976 }, { "epoch": 0.07059215587564834, "grad_norm": 3.15625, "learning_rate": 4.943518880321723e-05, "loss": 0.9074, "step": 4978 }, { "epoch": 0.07062051752927455, "grad_norm": 3.6875, "learning_rate": 4.9434716464946895e-05, "loss": 0.8793, "step": 4980 }, { "epoch": 0.07064887918290076, "grad_norm": 3.125, "learning_rate": 4.943424393151485e-05, "loss": 0.9093, "step": 4982 }, { "epoch": 0.07067724083652697, "grad_norm": 3.40625, "learning_rate": 4.9433771202924864e-05, "loss": 0.9451, "step": 4984 }, { "epoch": 0.07070560249015319, "grad_norm": 3.1875, "learning_rate": 4.943329827918071e-05, "loss": 0.9089, "step": 4986 }, { "epoch": 0.0707339641437794, "grad_norm": 3.15625, "learning_rate": 4.9432825160286186e-05, "loss": 0.8978, "step": 4988 }, { "epoch": 0.07076232579740561, "grad_norm": 3.171875, "learning_rate": 4.943235184624505e-05, "loss": 0.9008, "step": 4990 }, { "epoch": 0.07079068745103183, "grad_norm": 2.921875, "learning_rate": 4.943187833706109e-05, "loss": 0.8975, "step": 4992 }, { "epoch": 0.07081904910465804, "grad_norm": 3.21875, "learning_rate": 4.94314046327381e-05, "loss": 0.9021, "step": 4994 }, { "epoch": 0.07084741075828427, "grad_norm": 3.21875, "learning_rate": 4.943093073327984e-05, "loss": 0.95, "step": 4996 }, { "epoch": 0.07087577241191048, "grad_norm": 3.1875, "learning_rate": 4.943045663869011e-05, "loss": 0.9109, "step": 4998 }, { "epoch": 0.0709041340655367, "grad_norm": 2.84375, "learning_rate": 4.942998234897269e-05, "loss": 0.8498, "step": 5000 }, { "epoch": 0.07093249571916291, "grad_norm": 3.265625, "learning_rate": 4.9429507864131375e-05, "loss": 0.9046, "step": 5002 }, { "epoch": 0.07096085737278912, "grad_norm": 3.34375, "learning_rate": 4.942903318416995e-05, "loss": 0.9088, "step": 5004 }, { "epoch": 0.07098921902641533, "grad_norm": 3.203125, "learning_rate": 4.942855830909221e-05, "loss": 0.841, "step": 5006 }, { "epoch": 0.07101758068004155, "grad_norm": 3.359375, "learning_rate": 4.9428083238901945e-05, "loss": 0.9115, "step": 5008 }, { "epoch": 0.07104594233366776, "grad_norm": 2.96875, "learning_rate": 4.9427607973602946e-05, "loss": 0.8759, "step": 5010 }, { "epoch": 0.07107430398729397, "grad_norm": 3.359375, "learning_rate": 4.9427132513199015e-05, "loss": 0.923, "step": 5012 }, { "epoch": 0.07110266564092019, "grad_norm": 3.578125, "learning_rate": 4.9426656857693946e-05, "loss": 0.8979, "step": 5014 }, { "epoch": 0.07113102729454641, "grad_norm": 3.359375, "learning_rate": 4.942618100709154e-05, "loss": 0.9151, "step": 5016 }, { "epoch": 0.07115938894817263, "grad_norm": 3.359375, "learning_rate": 4.94257049613956e-05, "loss": 0.9702, "step": 5018 }, { "epoch": 0.07118775060179884, "grad_norm": 3.015625, "learning_rate": 4.942522872060993e-05, "loss": 0.8755, "step": 5020 }, { "epoch": 0.07121611225542505, "grad_norm": 3.015625, "learning_rate": 4.942475228473832e-05, "loss": 0.9218, "step": 5022 }, { "epoch": 0.07124447390905127, "grad_norm": 3.359375, "learning_rate": 4.942427565378458e-05, "loss": 0.8962, "step": 5024 }, { "epoch": 0.07127283556267748, "grad_norm": 3.3125, "learning_rate": 4.942379882775253e-05, "loss": 0.9124, "step": 5026 }, { "epoch": 0.0713011972163037, "grad_norm": 2.71875, "learning_rate": 4.942332180664597e-05, "loss": 0.8774, "step": 5028 }, { "epoch": 0.07132955886992991, "grad_norm": 2.84375, "learning_rate": 4.942284459046871e-05, "loss": 0.8982, "step": 5030 }, { "epoch": 0.07135792052355612, "grad_norm": 3.40625, "learning_rate": 4.9422367179224555e-05, "loss": 0.9087, "step": 5032 }, { "epoch": 0.07138628217718235, "grad_norm": 3.640625, "learning_rate": 4.942188957291732e-05, "loss": 0.9145, "step": 5034 }, { "epoch": 0.07141464383080856, "grad_norm": 3.625, "learning_rate": 4.9421411771550844e-05, "loss": 0.9258, "step": 5036 }, { "epoch": 0.07144300548443477, "grad_norm": 2.8125, "learning_rate": 4.9420933775128907e-05, "loss": 0.9102, "step": 5038 }, { "epoch": 0.07147136713806099, "grad_norm": 3.1875, "learning_rate": 4.9420455583655346e-05, "loss": 0.8685, "step": 5040 }, { "epoch": 0.0714997287916872, "grad_norm": 3.203125, "learning_rate": 4.9419977197133984e-05, "loss": 0.9087, "step": 5042 }, { "epoch": 0.07152809044531341, "grad_norm": 3.03125, "learning_rate": 4.941949861556863e-05, "loss": 0.9242, "step": 5044 }, { "epoch": 0.07155645209893963, "grad_norm": 2.953125, "learning_rate": 4.94190198389631e-05, "loss": 0.8829, "step": 5046 }, { "epoch": 0.07158481375256584, "grad_norm": 3.15625, "learning_rate": 4.941854086732125e-05, "loss": 0.8801, "step": 5048 }, { "epoch": 0.07161317540619205, "grad_norm": 3.34375, "learning_rate": 4.941806170064687e-05, "loss": 0.9401, "step": 5050 }, { "epoch": 0.07164153705981827, "grad_norm": 3.203125, "learning_rate": 4.9417582338943815e-05, "loss": 0.921, "step": 5052 }, { "epoch": 0.0716698987134445, "grad_norm": 2.875, "learning_rate": 4.9417102782215896e-05, "loss": 0.927, "step": 5054 }, { "epoch": 0.0716982603670707, "grad_norm": 3.078125, "learning_rate": 4.941662303046695e-05, "loss": 0.9085, "step": 5056 }, { "epoch": 0.07172662202069692, "grad_norm": 2.828125, "learning_rate": 4.94161430837008e-05, "loss": 0.8364, "step": 5058 }, { "epoch": 0.07175498367432313, "grad_norm": 3.703125, "learning_rate": 4.941566294192129e-05, "loss": 0.9031, "step": 5060 }, { "epoch": 0.07178334532794935, "grad_norm": 3.734375, "learning_rate": 4.9415182605132255e-05, "loss": 0.9796, "step": 5062 }, { "epoch": 0.07181170698157556, "grad_norm": 3.015625, "learning_rate": 4.941470207333753e-05, "loss": 0.8777, "step": 5064 }, { "epoch": 0.07184006863520177, "grad_norm": 3.328125, "learning_rate": 4.941422134654095e-05, "loss": 0.9068, "step": 5066 }, { "epoch": 0.07186843028882799, "grad_norm": 2.953125, "learning_rate": 4.9413740424746345e-05, "loss": 0.8382, "step": 5068 }, { "epoch": 0.0718967919424542, "grad_norm": 2.96875, "learning_rate": 4.9413259307957576e-05, "loss": 0.8776, "step": 5070 }, { "epoch": 0.07192515359608041, "grad_norm": 2.875, "learning_rate": 4.9412777996178474e-05, "loss": 0.909, "step": 5072 }, { "epoch": 0.07195351524970664, "grad_norm": 2.953125, "learning_rate": 4.941229648941289e-05, "loss": 0.9247, "step": 5074 }, { "epoch": 0.07198187690333285, "grad_norm": 2.78125, "learning_rate": 4.9411814787664664e-05, "loss": 0.9028, "step": 5076 }, { "epoch": 0.07201023855695907, "grad_norm": 3.390625, "learning_rate": 4.9411332890937646e-05, "loss": 0.9285, "step": 5078 }, { "epoch": 0.07203860021058528, "grad_norm": 2.953125, "learning_rate": 4.941085079923567e-05, "loss": 0.9397, "step": 5080 }, { "epoch": 0.07206696186421149, "grad_norm": 3.25, "learning_rate": 4.9410368512562624e-05, "loss": 0.8836, "step": 5082 }, { "epoch": 0.0720953235178377, "grad_norm": 3.015625, "learning_rate": 4.940988603092232e-05, "loss": 0.9514, "step": 5084 }, { "epoch": 0.07212368517146392, "grad_norm": 3.234375, "learning_rate": 4.940940335431863e-05, "loss": 0.8837, "step": 5086 }, { "epoch": 0.07215204682509013, "grad_norm": 3.265625, "learning_rate": 4.940892048275542e-05, "loss": 0.8616, "step": 5088 }, { "epoch": 0.07218040847871635, "grad_norm": 3.3125, "learning_rate": 4.940843741623652e-05, "loss": 0.8934, "step": 5090 }, { "epoch": 0.07220877013234257, "grad_norm": 3.265625, "learning_rate": 4.94079541547658e-05, "loss": 0.95, "step": 5092 }, { "epoch": 0.07223713178596879, "grad_norm": 2.875, "learning_rate": 4.940747069834713e-05, "loss": 0.8857, "step": 5094 }, { "epoch": 0.072265493439595, "grad_norm": 3.0, "learning_rate": 4.940698704698436e-05, "loss": 0.8629, "step": 5096 }, { "epoch": 0.07229385509322121, "grad_norm": 3.765625, "learning_rate": 4.940650320068137e-05, "loss": 0.8864, "step": 5098 }, { "epoch": 0.07232221674684743, "grad_norm": 3.15625, "learning_rate": 4.940601915944199e-05, "loss": 0.9356, "step": 5100 }, { "epoch": 0.07235057840047364, "grad_norm": 3.265625, "learning_rate": 4.940553492327012e-05, "loss": 0.9046, "step": 5102 }, { "epoch": 0.07237894005409985, "grad_norm": 3.3125, "learning_rate": 4.9405050492169614e-05, "loss": 0.9554, "step": 5104 }, { "epoch": 0.07240730170772607, "grad_norm": 3.15625, "learning_rate": 4.940456586614434e-05, "loss": 0.8741, "step": 5106 }, { "epoch": 0.07243566336135228, "grad_norm": 3.0625, "learning_rate": 4.9404081045198164e-05, "loss": 0.9147, "step": 5108 }, { "epoch": 0.07246402501497849, "grad_norm": 2.859375, "learning_rate": 4.940359602933498e-05, "loss": 0.8676, "step": 5110 }, { "epoch": 0.07249238666860472, "grad_norm": 2.9375, "learning_rate": 4.940311081855863e-05, "loss": 0.8993, "step": 5112 }, { "epoch": 0.07252074832223093, "grad_norm": 3.15625, "learning_rate": 4.940262541287302e-05, "loss": 0.9169, "step": 5114 }, { "epoch": 0.07254910997585715, "grad_norm": 3.3125, "learning_rate": 4.9402139812282e-05, "loss": 0.9215, "step": 5116 }, { "epoch": 0.07257747162948336, "grad_norm": 3.484375, "learning_rate": 4.9401654016789466e-05, "loss": 0.8859, "step": 5118 }, { "epoch": 0.07260583328310957, "grad_norm": 3.296875, "learning_rate": 4.94011680263993e-05, "loss": 0.9126, "step": 5120 }, { "epoch": 0.07263419493673579, "grad_norm": 3.21875, "learning_rate": 4.940068184111537e-05, "loss": 0.8635, "step": 5122 }, { "epoch": 0.072662556590362, "grad_norm": 3.328125, "learning_rate": 4.9400195460941576e-05, "loss": 0.8939, "step": 5124 }, { "epoch": 0.07269091824398821, "grad_norm": 3.375, "learning_rate": 4.9399708885881784e-05, "loss": 0.9239, "step": 5126 }, { "epoch": 0.07271927989761443, "grad_norm": 3.171875, "learning_rate": 4.939922211593989e-05, "loss": 0.9314, "step": 5128 }, { "epoch": 0.07274764155124064, "grad_norm": 2.953125, "learning_rate": 4.939873515111979e-05, "loss": 0.88, "step": 5130 }, { "epoch": 0.07277600320486687, "grad_norm": 3.390625, "learning_rate": 4.939824799142536e-05, "loss": 0.9403, "step": 5132 }, { "epoch": 0.07280436485849308, "grad_norm": 3.234375, "learning_rate": 4.939776063686049e-05, "loss": 0.866, "step": 5134 }, { "epoch": 0.07283272651211929, "grad_norm": 3.015625, "learning_rate": 4.939727308742908e-05, "loss": 0.9141, "step": 5136 }, { "epoch": 0.0728610881657455, "grad_norm": 3.015625, "learning_rate": 4.939678534313503e-05, "loss": 0.8756, "step": 5138 }, { "epoch": 0.07288944981937172, "grad_norm": 3.59375, "learning_rate": 4.9396297403982225e-05, "loss": 0.8975, "step": 5140 }, { "epoch": 0.07291781147299793, "grad_norm": 3.46875, "learning_rate": 4.939580926997457e-05, "loss": 0.9002, "step": 5142 }, { "epoch": 0.07294617312662414, "grad_norm": 3.75, "learning_rate": 4.939532094111595e-05, "loss": 0.9568, "step": 5144 }, { "epoch": 0.07297453478025036, "grad_norm": 3.53125, "learning_rate": 4.9394832417410285e-05, "loss": 0.9316, "step": 5146 }, { "epoch": 0.07300289643387657, "grad_norm": 3.359375, "learning_rate": 4.9394343698861453e-05, "loss": 0.9398, "step": 5148 }, { "epoch": 0.0730312580875028, "grad_norm": 2.890625, "learning_rate": 4.939385478547338e-05, "loss": 0.9241, "step": 5150 }, { "epoch": 0.07305961974112901, "grad_norm": 3.0625, "learning_rate": 4.939336567724996e-05, "loss": 0.883, "step": 5152 }, { "epoch": 0.07308798139475522, "grad_norm": 3.203125, "learning_rate": 4.939287637419511e-05, "loss": 0.9274, "step": 5154 }, { "epoch": 0.07311634304838144, "grad_norm": 3.1875, "learning_rate": 4.9392386876312715e-05, "loss": 0.8555, "step": 5156 }, { "epoch": 0.07314470470200765, "grad_norm": 2.984375, "learning_rate": 4.9391897183606706e-05, "loss": 0.8862, "step": 5158 }, { "epoch": 0.07317306635563386, "grad_norm": 3.03125, "learning_rate": 4.9391407296080985e-05, "loss": 0.9889, "step": 5160 }, { "epoch": 0.07320142800926008, "grad_norm": 3.125, "learning_rate": 4.939091721373946e-05, "loss": 0.9273, "step": 5162 }, { "epoch": 0.07322978966288629, "grad_norm": 2.96875, "learning_rate": 4.9390426936586066e-05, "loss": 0.8659, "step": 5164 }, { "epoch": 0.0732581513165125, "grad_norm": 3.5625, "learning_rate": 4.9389936464624695e-05, "loss": 0.9335, "step": 5166 }, { "epoch": 0.07328651297013872, "grad_norm": 3.578125, "learning_rate": 4.938944579785928e-05, "loss": 0.8202, "step": 5168 }, { "epoch": 0.07331487462376494, "grad_norm": 2.984375, "learning_rate": 4.938895493629374e-05, "loss": 0.8634, "step": 5170 }, { "epoch": 0.07334323627739116, "grad_norm": 2.984375, "learning_rate": 4.938846387993198e-05, "loss": 0.899, "step": 5172 }, { "epoch": 0.07337159793101737, "grad_norm": 3.21875, "learning_rate": 4.938797262877793e-05, "loss": 0.927, "step": 5174 }, { "epoch": 0.07339995958464358, "grad_norm": 3.5625, "learning_rate": 4.938748118283553e-05, "loss": 0.8834, "step": 5176 }, { "epoch": 0.0734283212382698, "grad_norm": 2.8125, "learning_rate": 4.938698954210868e-05, "loss": 0.8675, "step": 5178 }, { "epoch": 0.07345668289189601, "grad_norm": 2.71875, "learning_rate": 4.938649770660132e-05, "loss": 0.8624, "step": 5180 }, { "epoch": 0.07348504454552222, "grad_norm": 3.0625, "learning_rate": 4.9386005676317385e-05, "loss": 0.8336, "step": 5182 }, { "epoch": 0.07351340619914844, "grad_norm": 2.984375, "learning_rate": 4.938551345126078e-05, "loss": 0.9037, "step": 5184 }, { "epoch": 0.07354176785277465, "grad_norm": 2.9375, "learning_rate": 4.9385021031435466e-05, "loss": 0.8866, "step": 5186 }, { "epoch": 0.07357012950640088, "grad_norm": 3.34375, "learning_rate": 4.9384528416845355e-05, "loss": 0.9415, "step": 5188 }, { "epoch": 0.07359849116002709, "grad_norm": 3.15625, "learning_rate": 4.938403560749439e-05, "loss": 0.9414, "step": 5190 }, { "epoch": 0.0736268528136533, "grad_norm": 3.359375, "learning_rate": 4.938354260338651e-05, "loss": 0.9028, "step": 5192 }, { "epoch": 0.07365521446727952, "grad_norm": 2.671875, "learning_rate": 4.938304940452564e-05, "loss": 0.8778, "step": 5194 }, { "epoch": 0.07368357612090573, "grad_norm": 3.0625, "learning_rate": 4.938255601091574e-05, "loss": 0.8695, "step": 5196 }, { "epoch": 0.07371193777453194, "grad_norm": 3.375, "learning_rate": 4.9382062422560735e-05, "loss": 0.9239, "step": 5198 }, { "epoch": 0.07374029942815816, "grad_norm": 3.046875, "learning_rate": 4.938156863946456e-05, "loss": 0.8914, "step": 5200 }, { "epoch": 0.07376866108178437, "grad_norm": 2.71875, "learning_rate": 4.938107466163119e-05, "loss": 0.9011, "step": 5202 }, { "epoch": 0.07379702273541058, "grad_norm": 3.65625, "learning_rate": 4.938058048906453e-05, "loss": 0.8847, "step": 5204 }, { "epoch": 0.0738253843890368, "grad_norm": 2.875, "learning_rate": 4.938008612176856e-05, "loss": 0.882, "step": 5206 }, { "epoch": 0.07385374604266302, "grad_norm": 2.90625, "learning_rate": 4.9379591559747215e-05, "loss": 0.8947, "step": 5208 }, { "epoch": 0.07388210769628924, "grad_norm": 3.234375, "learning_rate": 4.9379096803004444e-05, "loss": 0.8932, "step": 5210 }, { "epoch": 0.07391046934991545, "grad_norm": 3.5625, "learning_rate": 4.93786018515442e-05, "loss": 0.9078, "step": 5212 }, { "epoch": 0.07393883100354166, "grad_norm": 2.921875, "learning_rate": 4.9378106705370445e-05, "loss": 0.8919, "step": 5214 }, { "epoch": 0.07396719265716788, "grad_norm": 2.953125, "learning_rate": 4.937761136448711e-05, "loss": 0.9291, "step": 5216 }, { "epoch": 0.07399555431079409, "grad_norm": 3.46875, "learning_rate": 4.937711582889818e-05, "loss": 0.9132, "step": 5218 }, { "epoch": 0.0740239159644203, "grad_norm": 3.296875, "learning_rate": 4.93766200986076e-05, "loss": 0.9557, "step": 5220 }, { "epoch": 0.07405227761804652, "grad_norm": 2.953125, "learning_rate": 4.937612417361932e-05, "loss": 0.9311, "step": 5222 }, { "epoch": 0.07408063927167273, "grad_norm": 3.09375, "learning_rate": 4.937562805393731e-05, "loss": 0.8904, "step": 5224 }, { "epoch": 0.07410900092529894, "grad_norm": 3.09375, "learning_rate": 4.937513173956554e-05, "loss": 0.8949, "step": 5226 }, { "epoch": 0.07413736257892517, "grad_norm": 3.171875, "learning_rate": 4.9374635230507964e-05, "loss": 0.8684, "step": 5228 }, { "epoch": 0.07416572423255138, "grad_norm": 2.96875, "learning_rate": 4.937413852676855e-05, "loss": 0.9395, "step": 5230 }, { "epoch": 0.0741940858861776, "grad_norm": 3.03125, "learning_rate": 4.937364162835127e-05, "loss": 0.8616, "step": 5232 }, { "epoch": 0.07422244753980381, "grad_norm": 3.109375, "learning_rate": 4.9373144535260086e-05, "loss": 0.875, "step": 5234 }, { "epoch": 0.07425080919343002, "grad_norm": 2.546875, "learning_rate": 4.937264724749897e-05, "loss": 0.89, "step": 5236 }, { "epoch": 0.07427917084705624, "grad_norm": 3.234375, "learning_rate": 4.937214976507189e-05, "loss": 0.9526, "step": 5238 }, { "epoch": 0.07430753250068245, "grad_norm": 3.171875, "learning_rate": 4.937165208798283e-05, "loss": 0.8587, "step": 5240 }, { "epoch": 0.07433589415430866, "grad_norm": 3.109375, "learning_rate": 4.937115421623577e-05, "loss": 0.8753, "step": 5242 }, { "epoch": 0.07436425580793488, "grad_norm": 2.71875, "learning_rate": 4.937065614983466e-05, "loss": 0.8933, "step": 5244 }, { "epoch": 0.0743926174615611, "grad_norm": 2.984375, "learning_rate": 4.93701578887835e-05, "loss": 0.898, "step": 5246 }, { "epoch": 0.07442097911518732, "grad_norm": 3.375, "learning_rate": 4.9369659433086256e-05, "loss": 0.8563, "step": 5248 }, { "epoch": 0.07444934076881353, "grad_norm": 3.015625, "learning_rate": 4.9369160782746927e-05, "loss": 0.8778, "step": 5250 }, { "epoch": 0.07447770242243974, "grad_norm": 2.96875, "learning_rate": 4.9368661937769475e-05, "loss": 0.8942, "step": 5252 }, { "epoch": 0.07450606407606596, "grad_norm": 2.890625, "learning_rate": 4.936816289815791e-05, "loss": 0.874, "step": 5254 }, { "epoch": 0.07453442572969217, "grad_norm": 3.25, "learning_rate": 4.936766366391619e-05, "loss": 0.9194, "step": 5256 }, { "epoch": 0.07456278738331838, "grad_norm": 3.0625, "learning_rate": 4.936716423504832e-05, "loss": 0.8969, "step": 5258 }, { "epoch": 0.0745911490369446, "grad_norm": 3.140625, "learning_rate": 4.936666461155828e-05, "loss": 0.9198, "step": 5260 }, { "epoch": 0.07461951069057081, "grad_norm": 3.265625, "learning_rate": 4.9366164793450066e-05, "loss": 0.9038, "step": 5262 }, { "epoch": 0.07464787234419702, "grad_norm": 3.1875, "learning_rate": 4.936566478072767e-05, "loss": 0.8808, "step": 5264 }, { "epoch": 0.07467623399782325, "grad_norm": 3.046875, "learning_rate": 4.9365164573395086e-05, "loss": 0.872, "step": 5266 }, { "epoch": 0.07470459565144946, "grad_norm": 2.875, "learning_rate": 4.93646641714563e-05, "loss": 0.8713, "step": 5268 }, { "epoch": 0.07473295730507568, "grad_norm": 3.140625, "learning_rate": 4.936416357491532e-05, "loss": 0.8961, "step": 5270 }, { "epoch": 0.07476131895870189, "grad_norm": 3.265625, "learning_rate": 4.9363662783776146e-05, "loss": 0.9058, "step": 5272 }, { "epoch": 0.0747896806123281, "grad_norm": 3.65625, "learning_rate": 4.9363161798042766e-05, "loss": 0.8826, "step": 5274 }, { "epoch": 0.07481804226595432, "grad_norm": 3.09375, "learning_rate": 4.936266061771919e-05, "loss": 0.8865, "step": 5276 }, { "epoch": 0.07484640391958053, "grad_norm": 2.890625, "learning_rate": 4.9362159242809415e-05, "loss": 0.8901, "step": 5278 }, { "epoch": 0.07487476557320674, "grad_norm": 2.703125, "learning_rate": 4.9361657673317455e-05, "loss": 0.9303, "step": 5280 }, { "epoch": 0.07490312722683296, "grad_norm": 2.9375, "learning_rate": 4.93611559092473e-05, "loss": 0.8695, "step": 5282 }, { "epoch": 0.07493148888045917, "grad_norm": 3.328125, "learning_rate": 4.936065395060298e-05, "loss": 0.9216, "step": 5284 }, { "epoch": 0.0749598505340854, "grad_norm": 2.8125, "learning_rate": 4.936015179738849e-05, "loss": 0.88, "step": 5286 }, { "epoch": 0.07498821218771161, "grad_norm": 3.0, "learning_rate": 4.9359649449607834e-05, "loss": 0.9641, "step": 5288 }, { "epoch": 0.07501657384133782, "grad_norm": 3.140625, "learning_rate": 4.935914690726504e-05, "loss": 0.8961, "step": 5290 }, { "epoch": 0.07504493549496404, "grad_norm": 2.796875, "learning_rate": 4.935864417036412e-05, "loss": 0.8868, "step": 5292 }, { "epoch": 0.07507329714859025, "grad_norm": 3.296875, "learning_rate": 4.9358141238909075e-05, "loss": 0.8834, "step": 5294 }, { "epoch": 0.07510165880221646, "grad_norm": 2.640625, "learning_rate": 4.9357638112903936e-05, "loss": 0.8888, "step": 5296 }, { "epoch": 0.07513002045584267, "grad_norm": 3.296875, "learning_rate": 4.935713479235271e-05, "loss": 0.9013, "step": 5298 }, { "epoch": 0.07515838210946889, "grad_norm": 2.859375, "learning_rate": 4.9356631277259436e-05, "loss": 0.8759, "step": 5300 }, { "epoch": 0.0751867437630951, "grad_norm": 2.96875, "learning_rate": 4.935612756762811e-05, "loss": 0.9136, "step": 5302 }, { "epoch": 0.07521510541672133, "grad_norm": 2.859375, "learning_rate": 4.935562366346278e-05, "loss": 0.9295, "step": 5304 }, { "epoch": 0.07524346707034754, "grad_norm": 3.5625, "learning_rate": 4.935511956476745e-05, "loss": 0.8987, "step": 5306 }, { "epoch": 0.07527182872397375, "grad_norm": 3.046875, "learning_rate": 4.9354615271546164e-05, "loss": 0.859, "step": 5308 }, { "epoch": 0.07530019037759997, "grad_norm": 3.09375, "learning_rate": 4.935411078380293e-05, "loss": 0.9349, "step": 5310 }, { "epoch": 0.07532855203122618, "grad_norm": 3.078125, "learning_rate": 4.93536061015418e-05, "loss": 0.9032, "step": 5312 }, { "epoch": 0.0753569136848524, "grad_norm": 3.015625, "learning_rate": 4.935310122476679e-05, "loss": 0.8784, "step": 5314 }, { "epoch": 0.07538527533847861, "grad_norm": 3.1875, "learning_rate": 4.935259615348193e-05, "loss": 0.8633, "step": 5316 }, { "epoch": 0.07541363699210482, "grad_norm": 3.09375, "learning_rate": 4.935209088769127e-05, "loss": 0.8513, "step": 5318 }, { "epoch": 0.07544199864573103, "grad_norm": 3.0625, "learning_rate": 4.9351585427398835e-05, "loss": 0.907, "step": 5320 }, { "epoch": 0.07547036029935725, "grad_norm": 3.328125, "learning_rate": 4.935107977260865e-05, "loss": 0.8551, "step": 5322 }, { "epoch": 0.07549872195298347, "grad_norm": 2.984375, "learning_rate": 4.9350573923324786e-05, "loss": 0.89, "step": 5324 }, { "epoch": 0.07552708360660969, "grad_norm": 3.15625, "learning_rate": 4.935006787955126e-05, "loss": 0.8992, "step": 5326 }, { "epoch": 0.0755554452602359, "grad_norm": 3.140625, "learning_rate": 4.93495616412921e-05, "loss": 0.9355, "step": 5328 }, { "epoch": 0.07558380691386211, "grad_norm": 3.0, "learning_rate": 4.934905520855138e-05, "loss": 0.8623, "step": 5330 }, { "epoch": 0.07561216856748833, "grad_norm": 3.1875, "learning_rate": 4.934854858133313e-05, "loss": 0.9072, "step": 5332 }, { "epoch": 0.07564053022111454, "grad_norm": 3.140625, "learning_rate": 4.93480417596414e-05, "loss": 0.8938, "step": 5334 }, { "epoch": 0.07566889187474075, "grad_norm": 3.03125, "learning_rate": 4.9347534743480236e-05, "loss": 0.9031, "step": 5336 }, { "epoch": 0.07569725352836697, "grad_norm": 3.09375, "learning_rate": 4.934702753285368e-05, "loss": 0.962, "step": 5338 }, { "epoch": 0.07572561518199318, "grad_norm": 2.671875, "learning_rate": 4.934652012776581e-05, "loss": 0.8733, "step": 5340 }, { "epoch": 0.07575397683561941, "grad_norm": 3.21875, "learning_rate": 4.934601252822064e-05, "loss": 0.9047, "step": 5342 }, { "epoch": 0.07578233848924562, "grad_norm": 3.375, "learning_rate": 4.934550473422226e-05, "loss": 0.9506, "step": 5344 }, { "epoch": 0.07581070014287183, "grad_norm": 3.171875, "learning_rate": 4.93449967457747e-05, "loss": 0.8966, "step": 5346 }, { "epoch": 0.07583906179649805, "grad_norm": 3.078125, "learning_rate": 4.934448856288203e-05, "loss": 0.8488, "step": 5348 }, { "epoch": 0.07586742345012426, "grad_norm": 2.953125, "learning_rate": 4.93439801855483e-05, "loss": 0.8768, "step": 5350 }, { "epoch": 0.07589578510375047, "grad_norm": 3.109375, "learning_rate": 4.9343471613777584e-05, "loss": 0.8928, "step": 5352 }, { "epoch": 0.07592414675737669, "grad_norm": 2.90625, "learning_rate": 4.934296284757394e-05, "loss": 0.8584, "step": 5354 }, { "epoch": 0.0759525084110029, "grad_norm": 3.3125, "learning_rate": 4.934245388694142e-05, "loss": 0.9175, "step": 5356 }, { "epoch": 0.07598087006462911, "grad_norm": 2.9375, "learning_rate": 4.934194473188409e-05, "loss": 0.8755, "step": 5358 }, { "epoch": 0.07600923171825533, "grad_norm": 3.171875, "learning_rate": 4.9341435382406045e-05, "loss": 0.859, "step": 5360 }, { "epoch": 0.07603759337188155, "grad_norm": 2.765625, "learning_rate": 4.934092583851132e-05, "loss": 0.8689, "step": 5362 }, { "epoch": 0.07606595502550777, "grad_norm": 2.71875, "learning_rate": 4.934041610020399e-05, "loss": 0.866, "step": 5364 }, { "epoch": 0.07609431667913398, "grad_norm": 3.296875, "learning_rate": 4.933990616748814e-05, "loss": 0.832, "step": 5366 }, { "epoch": 0.0761226783327602, "grad_norm": 3.03125, "learning_rate": 4.9339396040367835e-05, "loss": 0.9287, "step": 5368 }, { "epoch": 0.0761510399863864, "grad_norm": 3.359375, "learning_rate": 4.933888571884716e-05, "loss": 0.8963, "step": 5370 }, { "epoch": 0.07617940164001262, "grad_norm": 2.953125, "learning_rate": 4.933837520293017e-05, "loss": 0.8709, "step": 5372 }, { "epoch": 0.07620776329363883, "grad_norm": 3.625, "learning_rate": 4.9337864492620954e-05, "loss": 0.9416, "step": 5374 }, { "epoch": 0.07623612494726505, "grad_norm": 3.09375, "learning_rate": 4.9337353587923594e-05, "loss": 0.8525, "step": 5376 }, { "epoch": 0.07626448660089126, "grad_norm": 3.421875, "learning_rate": 4.9336842488842175e-05, "loss": 0.8782, "step": 5378 }, { "epoch": 0.07629284825451747, "grad_norm": 3.671875, "learning_rate": 4.9336331195380755e-05, "loss": 0.8989, "step": 5380 }, { "epoch": 0.0763212099081437, "grad_norm": 3.125, "learning_rate": 4.933581970754345e-05, "loss": 0.8819, "step": 5382 }, { "epoch": 0.07634957156176991, "grad_norm": 3.484375, "learning_rate": 4.933530802533432e-05, "loss": 0.9443, "step": 5384 }, { "epoch": 0.07637793321539613, "grad_norm": 3.21875, "learning_rate": 4.933479614875747e-05, "loss": 0.8655, "step": 5386 }, { "epoch": 0.07640629486902234, "grad_norm": 3.1875, "learning_rate": 4.9334284077816974e-05, "loss": 0.8881, "step": 5388 }, { "epoch": 0.07643465652264855, "grad_norm": 3.015625, "learning_rate": 4.933377181251693e-05, "loss": 0.8895, "step": 5390 }, { "epoch": 0.07646301817627477, "grad_norm": 3.40625, "learning_rate": 4.933325935286142e-05, "loss": 0.9179, "step": 5392 }, { "epoch": 0.07649137982990098, "grad_norm": 3.1875, "learning_rate": 4.933274669885456e-05, "loss": 0.8599, "step": 5394 }, { "epoch": 0.07651974148352719, "grad_norm": 3.0625, "learning_rate": 4.933223385050041e-05, "loss": 0.9354, "step": 5396 }, { "epoch": 0.0765481031371534, "grad_norm": 3.5625, "learning_rate": 4.933172080780309e-05, "loss": 0.92, "step": 5398 }, { "epoch": 0.07657646479077963, "grad_norm": 3.296875, "learning_rate": 4.9331207570766706e-05, "loss": 0.8878, "step": 5400 }, { "epoch": 0.07660482644440585, "grad_norm": 3.078125, "learning_rate": 4.933069413939534e-05, "loss": 0.8723, "step": 5402 }, { "epoch": 0.07663318809803206, "grad_norm": 2.953125, "learning_rate": 4.9330180513693095e-05, "loss": 0.8998, "step": 5404 }, { "epoch": 0.07666154975165827, "grad_norm": 3.265625, "learning_rate": 4.932966669366407e-05, "loss": 0.893, "step": 5406 }, { "epoch": 0.07668991140528449, "grad_norm": 2.890625, "learning_rate": 4.932915267931237e-05, "loss": 0.8777, "step": 5408 }, { "epoch": 0.0767182730589107, "grad_norm": 3.109375, "learning_rate": 4.932863847064212e-05, "loss": 0.9067, "step": 5410 }, { "epoch": 0.07674663471253691, "grad_norm": 3.265625, "learning_rate": 4.9328124067657406e-05, "loss": 0.8636, "step": 5412 }, { "epoch": 0.07677499636616313, "grad_norm": 2.984375, "learning_rate": 4.932760947036234e-05, "loss": 0.9323, "step": 5414 }, { "epoch": 0.07680335801978934, "grad_norm": 2.984375, "learning_rate": 4.932709467876104e-05, "loss": 0.9058, "step": 5416 }, { "epoch": 0.07683171967341555, "grad_norm": 2.953125, "learning_rate": 4.932657969285761e-05, "loss": 0.878, "step": 5418 }, { "epoch": 0.07686008132704178, "grad_norm": 2.796875, "learning_rate": 4.932606451265616e-05, "loss": 0.9243, "step": 5420 }, { "epoch": 0.07688844298066799, "grad_norm": 2.96875, "learning_rate": 4.932554913816081e-05, "loss": 0.8622, "step": 5422 }, { "epoch": 0.0769168046342942, "grad_norm": 2.921875, "learning_rate": 4.932503356937568e-05, "loss": 0.8482, "step": 5424 }, { "epoch": 0.07694516628792042, "grad_norm": 3.3125, "learning_rate": 4.9324517806304884e-05, "loss": 0.9288, "step": 5426 }, { "epoch": 0.07697352794154663, "grad_norm": 3.015625, "learning_rate": 4.932400184895254e-05, "loss": 0.9244, "step": 5428 }, { "epoch": 0.07700188959517285, "grad_norm": 2.84375, "learning_rate": 4.932348569732278e-05, "loss": 0.8565, "step": 5430 }, { "epoch": 0.07703025124879906, "grad_norm": 3.34375, "learning_rate": 4.932296935141971e-05, "loss": 0.8736, "step": 5432 }, { "epoch": 0.07705861290242527, "grad_norm": 2.96875, "learning_rate": 4.9322452811247455e-05, "loss": 0.8823, "step": 5434 }, { "epoch": 0.07708697455605149, "grad_norm": 2.9375, "learning_rate": 4.932193607681016e-05, "loss": 0.8742, "step": 5436 }, { "epoch": 0.0771153362096777, "grad_norm": 3.015625, "learning_rate": 4.932141914811193e-05, "loss": 0.8538, "step": 5438 }, { "epoch": 0.07714369786330393, "grad_norm": 3.1875, "learning_rate": 4.932090202515691e-05, "loss": 0.911, "step": 5440 }, { "epoch": 0.07717205951693014, "grad_norm": 2.890625, "learning_rate": 4.932038470794922e-05, "loss": 0.8644, "step": 5442 }, { "epoch": 0.07720042117055635, "grad_norm": 2.890625, "learning_rate": 4.931986719649299e-05, "loss": 0.8782, "step": 5444 }, { "epoch": 0.07722878282418257, "grad_norm": 3.1875, "learning_rate": 4.9319349490792365e-05, "loss": 0.8774, "step": 5446 }, { "epoch": 0.07725714447780878, "grad_norm": 3.140625, "learning_rate": 4.931883159085147e-05, "loss": 0.9193, "step": 5448 }, { "epoch": 0.07728550613143499, "grad_norm": 3.109375, "learning_rate": 4.9318313496674446e-05, "loss": 0.8686, "step": 5450 }, { "epoch": 0.0773138677850612, "grad_norm": 2.75, "learning_rate": 4.931779520826543e-05, "loss": 0.9091, "step": 5452 }, { "epoch": 0.07734222943868742, "grad_norm": 3.5, "learning_rate": 4.9317276725628566e-05, "loss": 0.8835, "step": 5454 }, { "epoch": 0.07737059109231363, "grad_norm": 3.078125, "learning_rate": 4.931675804876798e-05, "loss": 0.9199, "step": 5456 }, { "epoch": 0.07739895274593986, "grad_norm": 3.515625, "learning_rate": 4.931623917768784e-05, "loss": 0.9874, "step": 5458 }, { "epoch": 0.07742731439956607, "grad_norm": 3.015625, "learning_rate": 4.931572011239227e-05, "loss": 0.8394, "step": 5460 }, { "epoch": 0.07745567605319228, "grad_norm": 2.875, "learning_rate": 4.9315200852885415e-05, "loss": 0.8789, "step": 5462 }, { "epoch": 0.0774840377068185, "grad_norm": 3.40625, "learning_rate": 4.931468139917144e-05, "loss": 0.9248, "step": 5464 }, { "epoch": 0.07751239936044471, "grad_norm": 3.125, "learning_rate": 4.9314161751254476e-05, "loss": 0.9036, "step": 5466 }, { "epoch": 0.07754076101407092, "grad_norm": 2.765625, "learning_rate": 4.931364190913868e-05, "loss": 0.9053, "step": 5468 }, { "epoch": 0.07756912266769714, "grad_norm": 3.109375, "learning_rate": 4.93131218728282e-05, "loss": 0.8805, "step": 5470 }, { "epoch": 0.07759748432132335, "grad_norm": 3.3125, "learning_rate": 4.93126016423272e-05, "loss": 0.9209, "step": 5472 }, { "epoch": 0.07762584597494956, "grad_norm": 3.25, "learning_rate": 4.931208121763983e-05, "loss": 0.8672, "step": 5474 }, { "epoch": 0.07765420762857578, "grad_norm": 2.828125, "learning_rate": 4.931156059877025e-05, "loss": 0.8389, "step": 5476 }, { "epoch": 0.077682569282202, "grad_norm": 3.046875, "learning_rate": 4.9311039785722605e-05, "loss": 0.9165, "step": 5478 }, { "epoch": 0.07771093093582822, "grad_norm": 3.15625, "learning_rate": 4.931051877850106e-05, "loss": 0.9001, "step": 5480 }, { "epoch": 0.07773929258945443, "grad_norm": 3.578125, "learning_rate": 4.930999757710979e-05, "loss": 0.916, "step": 5482 }, { "epoch": 0.07776765424308064, "grad_norm": 3.21875, "learning_rate": 4.930947618155294e-05, "loss": 0.9157, "step": 5484 }, { "epoch": 0.07779601589670686, "grad_norm": 3.03125, "learning_rate": 4.9308954591834686e-05, "loss": 0.9036, "step": 5486 }, { "epoch": 0.07782437755033307, "grad_norm": 3.015625, "learning_rate": 4.930843280795919e-05, "loss": 0.9086, "step": 5488 }, { "epoch": 0.07785273920395928, "grad_norm": 3.03125, "learning_rate": 4.9307910829930624e-05, "loss": 0.9022, "step": 5490 }, { "epoch": 0.0778811008575855, "grad_norm": 3.046875, "learning_rate": 4.930738865775315e-05, "loss": 0.9016, "step": 5492 }, { "epoch": 0.07790946251121171, "grad_norm": 3.53125, "learning_rate": 4.930686629143094e-05, "loss": 0.9501, "step": 5494 }, { "epoch": 0.07793782416483794, "grad_norm": 2.953125, "learning_rate": 4.930634373096817e-05, "loss": 0.8992, "step": 5496 }, { "epoch": 0.07796618581846415, "grad_norm": 2.953125, "learning_rate": 4.930582097636901e-05, "loss": 0.8981, "step": 5498 }, { "epoch": 0.07799454747209036, "grad_norm": 3.3125, "learning_rate": 4.930529802763764e-05, "loss": 0.8843, "step": 5500 }, { "epoch": 0.07802290912571658, "grad_norm": 3.09375, "learning_rate": 4.9304774884778224e-05, "loss": 0.9155, "step": 5502 }, { "epoch": 0.07805127077934279, "grad_norm": 3.234375, "learning_rate": 4.930425154779496e-05, "loss": 0.9095, "step": 5504 }, { "epoch": 0.078079632432969, "grad_norm": 2.921875, "learning_rate": 4.930372801669202e-05, "loss": 0.8991, "step": 5506 }, { "epoch": 0.07810799408659522, "grad_norm": 2.640625, "learning_rate": 4.930320429147357e-05, "loss": 0.8325, "step": 5508 }, { "epoch": 0.07813635574022143, "grad_norm": 3.359375, "learning_rate": 4.9302680372143814e-05, "loss": 0.9053, "step": 5510 }, { "epoch": 0.07816471739384764, "grad_norm": 3.390625, "learning_rate": 4.930215625870693e-05, "loss": 0.8721, "step": 5512 }, { "epoch": 0.07819307904747386, "grad_norm": 3.203125, "learning_rate": 4.9301631951167104e-05, "loss": 0.8696, "step": 5514 }, { "epoch": 0.07822144070110008, "grad_norm": 2.890625, "learning_rate": 4.930110744952852e-05, "loss": 0.8564, "step": 5516 }, { "epoch": 0.0782498023547263, "grad_norm": 3.03125, "learning_rate": 4.9300582753795366e-05, "loss": 0.8688, "step": 5518 }, { "epoch": 0.07827816400835251, "grad_norm": 3.25, "learning_rate": 4.9300057863971846e-05, "loss": 0.9093, "step": 5520 }, { "epoch": 0.07830652566197872, "grad_norm": 3.203125, "learning_rate": 4.929953278006213e-05, "loss": 0.893, "step": 5522 }, { "epoch": 0.07833488731560494, "grad_norm": 3.078125, "learning_rate": 4.929900750207044e-05, "loss": 0.8534, "step": 5524 }, { "epoch": 0.07836324896923115, "grad_norm": 3.359375, "learning_rate": 4.9298482030000945e-05, "loss": 0.9451, "step": 5526 }, { "epoch": 0.07839161062285736, "grad_norm": 3.203125, "learning_rate": 4.929795636385786e-05, "loss": 0.8788, "step": 5528 }, { "epoch": 0.07841997227648358, "grad_norm": 3.265625, "learning_rate": 4.929743050364537e-05, "loss": 0.9086, "step": 5530 }, { "epoch": 0.07844833393010979, "grad_norm": 2.734375, "learning_rate": 4.9296904449367685e-05, "loss": 0.8937, "step": 5532 }, { "epoch": 0.078476695583736, "grad_norm": 3.46875, "learning_rate": 4.9296378201029004e-05, "loss": 0.8557, "step": 5534 }, { "epoch": 0.07850505723736223, "grad_norm": 3.21875, "learning_rate": 4.9295851758633536e-05, "loss": 0.8467, "step": 5536 }, { "epoch": 0.07853341889098844, "grad_norm": 2.96875, "learning_rate": 4.929532512218547e-05, "loss": 0.8489, "step": 5538 }, { "epoch": 0.07856178054461466, "grad_norm": 3.65625, "learning_rate": 4.9294798291689025e-05, "loss": 0.8713, "step": 5540 }, { "epoch": 0.07859014219824087, "grad_norm": 3.0625, "learning_rate": 4.9294271267148405e-05, "loss": 0.9089, "step": 5542 }, { "epoch": 0.07861850385186708, "grad_norm": 3.359375, "learning_rate": 4.9293744048567825e-05, "loss": 0.9075, "step": 5544 }, { "epoch": 0.0786468655054933, "grad_norm": 3.578125, "learning_rate": 4.9293216635951484e-05, "loss": 0.9247, "step": 5546 }, { "epoch": 0.07867522715911951, "grad_norm": 2.78125, "learning_rate": 4.9292689029303606e-05, "loss": 0.86, "step": 5548 }, { "epoch": 0.07870358881274572, "grad_norm": 3.03125, "learning_rate": 4.92921612286284e-05, "loss": 0.9092, "step": 5550 }, { "epoch": 0.07873195046637194, "grad_norm": 3.0, "learning_rate": 4.929163323393008e-05, "loss": 0.9006, "step": 5552 }, { "epoch": 0.07876031211999816, "grad_norm": 3.0, "learning_rate": 4.929110504521286e-05, "loss": 0.863, "step": 5554 }, { "epoch": 0.07878867377362438, "grad_norm": 3.109375, "learning_rate": 4.929057666248097e-05, "loss": 0.89, "step": 5556 }, { "epoch": 0.07881703542725059, "grad_norm": 3.125, "learning_rate": 4.929004808573863e-05, "loss": 0.8854, "step": 5558 }, { "epoch": 0.0788453970808768, "grad_norm": 3.28125, "learning_rate": 4.928951931499004e-05, "loss": 0.908, "step": 5560 }, { "epoch": 0.07887375873450302, "grad_norm": 2.984375, "learning_rate": 4.928899035023945e-05, "loss": 0.935, "step": 5562 }, { "epoch": 0.07890212038812923, "grad_norm": 3.15625, "learning_rate": 4.928846119149108e-05, "loss": 0.9024, "step": 5564 }, { "epoch": 0.07893048204175544, "grad_norm": 3.015625, "learning_rate": 4.928793183874913e-05, "loss": 0.8569, "step": 5566 }, { "epoch": 0.07895884369538166, "grad_norm": 3.21875, "learning_rate": 4.928740229201787e-05, "loss": 0.8635, "step": 5568 }, { "epoch": 0.07898720534900787, "grad_norm": 2.875, "learning_rate": 4.92868725513015e-05, "loss": 0.8701, "step": 5570 }, { "epoch": 0.07901556700263408, "grad_norm": 3.015625, "learning_rate": 4.928634261660425e-05, "loss": 0.8604, "step": 5572 }, { "epoch": 0.07904392865626031, "grad_norm": 3.0, "learning_rate": 4.928581248793037e-05, "loss": 0.8806, "step": 5574 }, { "epoch": 0.07907229030988652, "grad_norm": 3.4375, "learning_rate": 4.9285282165284085e-05, "loss": 0.9289, "step": 5576 }, { "epoch": 0.07910065196351274, "grad_norm": 2.828125, "learning_rate": 4.9284751648669634e-05, "loss": 0.9016, "step": 5578 }, { "epoch": 0.07912901361713895, "grad_norm": 3.1875, "learning_rate": 4.928422093809125e-05, "loss": 0.8545, "step": 5580 }, { "epoch": 0.07915737527076516, "grad_norm": 3.078125, "learning_rate": 4.9283690033553174e-05, "loss": 0.8787, "step": 5582 }, { "epoch": 0.07918573692439138, "grad_norm": 2.890625, "learning_rate": 4.928315893505965e-05, "loss": 0.8473, "step": 5584 }, { "epoch": 0.07921409857801759, "grad_norm": 3.0, "learning_rate": 4.928262764261491e-05, "loss": 0.8591, "step": 5586 }, { "epoch": 0.0792424602316438, "grad_norm": 3.140625, "learning_rate": 4.92820961562232e-05, "loss": 0.8527, "step": 5588 }, { "epoch": 0.07927082188527002, "grad_norm": 3.375, "learning_rate": 4.928156447588877e-05, "loss": 0.8503, "step": 5590 }, { "epoch": 0.07929918353889623, "grad_norm": 3.359375, "learning_rate": 4.928103260161587e-05, "loss": 0.8904, "step": 5592 }, { "epoch": 0.07932754519252246, "grad_norm": 2.90625, "learning_rate": 4.928050053340874e-05, "loss": 0.9232, "step": 5594 }, { "epoch": 0.07935590684614867, "grad_norm": 3.078125, "learning_rate": 4.9279968271271634e-05, "loss": 0.8704, "step": 5596 }, { "epoch": 0.07938426849977488, "grad_norm": 3.078125, "learning_rate": 4.9279435815208805e-05, "loss": 0.9102, "step": 5598 }, { "epoch": 0.0794126301534011, "grad_norm": 2.96875, "learning_rate": 4.927890316522449e-05, "loss": 0.8681, "step": 5600 }, { "epoch": 0.07944099180702731, "grad_norm": 2.984375, "learning_rate": 4.927837032132297e-05, "loss": 0.888, "step": 5602 }, { "epoch": 0.07946935346065352, "grad_norm": 3.140625, "learning_rate": 4.927783728350848e-05, "loss": 0.9215, "step": 5604 }, { "epoch": 0.07949771511427974, "grad_norm": 3.125, "learning_rate": 4.927730405178529e-05, "loss": 0.8644, "step": 5606 }, { "epoch": 0.07952607676790595, "grad_norm": 3.28125, "learning_rate": 4.927677062615765e-05, "loss": 0.9114, "step": 5608 }, { "epoch": 0.07955443842153216, "grad_norm": 2.671875, "learning_rate": 4.9276237006629824e-05, "loss": 0.8623, "step": 5610 }, { "epoch": 0.07958280007515839, "grad_norm": 2.890625, "learning_rate": 4.927570319320607e-05, "loss": 0.879, "step": 5612 }, { "epoch": 0.0796111617287846, "grad_norm": 2.75, "learning_rate": 4.927516918589066e-05, "loss": 0.8618, "step": 5614 }, { "epoch": 0.07963952338241082, "grad_norm": 3.25, "learning_rate": 4.927463498468785e-05, "loss": 0.9158, "step": 5616 }, { "epoch": 0.07966788503603703, "grad_norm": 3.46875, "learning_rate": 4.927410058960191e-05, "loss": 0.8627, "step": 5618 }, { "epoch": 0.07969624668966324, "grad_norm": 3.609375, "learning_rate": 4.927356600063712e-05, "loss": 0.9368, "step": 5620 }, { "epoch": 0.07972460834328945, "grad_norm": 2.9375, "learning_rate": 4.927303121779773e-05, "loss": 0.9188, "step": 5622 }, { "epoch": 0.07975296999691567, "grad_norm": 2.90625, "learning_rate": 4.927249624108802e-05, "loss": 0.9131, "step": 5624 }, { "epoch": 0.07978133165054188, "grad_norm": 2.75, "learning_rate": 4.927196107051227e-05, "loss": 0.8504, "step": 5626 }, { "epoch": 0.0798096933041681, "grad_norm": 3.40625, "learning_rate": 4.9271425706074744e-05, "loss": 0.9109, "step": 5628 }, { "epoch": 0.07983805495779431, "grad_norm": 3.109375, "learning_rate": 4.9270890147779726e-05, "loss": 0.8672, "step": 5630 }, { "epoch": 0.07986641661142053, "grad_norm": 3.34375, "learning_rate": 4.927035439563149e-05, "loss": 0.8931, "step": 5632 }, { "epoch": 0.07989477826504675, "grad_norm": 3.21875, "learning_rate": 4.926981844963431e-05, "loss": 0.8786, "step": 5634 }, { "epoch": 0.07992313991867296, "grad_norm": 3.0, "learning_rate": 4.9269282309792465e-05, "loss": 0.8745, "step": 5636 }, { "epoch": 0.07995150157229917, "grad_norm": 3.03125, "learning_rate": 4.926874597611025e-05, "loss": 0.8981, "step": 5638 }, { "epoch": 0.07997986322592539, "grad_norm": 2.703125, "learning_rate": 4.9268209448591944e-05, "loss": 0.8774, "step": 5640 }, { "epoch": 0.0800082248795516, "grad_norm": 2.734375, "learning_rate": 4.9267672727241834e-05, "loss": 0.8884, "step": 5642 }, { "epoch": 0.08003658653317781, "grad_norm": 2.84375, "learning_rate": 4.92671358120642e-05, "loss": 0.8539, "step": 5644 }, { "epoch": 0.08006494818680403, "grad_norm": 3.125, "learning_rate": 4.926659870306333e-05, "loss": 0.8138, "step": 5646 }, { "epoch": 0.08009330984043024, "grad_norm": 3.53125, "learning_rate": 4.926606140024352e-05, "loss": 0.9178, "step": 5648 }, { "epoch": 0.08012167149405647, "grad_norm": 3.078125, "learning_rate": 4.926552390360906e-05, "loss": 0.8908, "step": 5650 }, { "epoch": 0.08015003314768268, "grad_norm": 2.71875, "learning_rate": 4.9264986213164235e-05, "loss": 0.883, "step": 5652 }, { "epoch": 0.0801783948013089, "grad_norm": 3.421875, "learning_rate": 4.9264448328913356e-05, "loss": 0.8965, "step": 5654 }, { "epoch": 0.08020675645493511, "grad_norm": 2.96875, "learning_rate": 4.92639102508607e-05, "loss": 0.9029, "step": 5656 }, { "epoch": 0.08023511810856132, "grad_norm": 3.09375, "learning_rate": 4.926337197901059e-05, "loss": 0.9045, "step": 5658 }, { "epoch": 0.08026347976218753, "grad_norm": 3.890625, "learning_rate": 4.92628335133673e-05, "loss": 0.9001, "step": 5660 }, { "epoch": 0.08029184141581375, "grad_norm": 2.890625, "learning_rate": 4.926229485393513e-05, "loss": 0.8524, "step": 5662 }, { "epoch": 0.08032020306943996, "grad_norm": 3.03125, "learning_rate": 4.926175600071841e-05, "loss": 0.9297, "step": 5664 }, { "epoch": 0.08034856472306617, "grad_norm": 2.90625, "learning_rate": 4.9261216953721415e-05, "loss": 0.8889, "step": 5666 }, { "epoch": 0.08037692637669239, "grad_norm": 2.984375, "learning_rate": 4.926067771294846e-05, "loss": 0.8874, "step": 5668 }, { "epoch": 0.08040528803031861, "grad_norm": 3.5625, "learning_rate": 4.926013827840387e-05, "loss": 0.9031, "step": 5670 }, { "epoch": 0.08043364968394483, "grad_norm": 3.546875, "learning_rate": 4.925959865009193e-05, "loss": 0.9212, "step": 5672 }, { "epoch": 0.08046201133757104, "grad_norm": 3.234375, "learning_rate": 4.925905882801696e-05, "loss": 0.8823, "step": 5674 }, { "epoch": 0.08049037299119725, "grad_norm": 3.296875, "learning_rate": 4.9258518812183266e-05, "loss": 0.9034, "step": 5676 }, { "epoch": 0.08051873464482347, "grad_norm": 3.0, "learning_rate": 4.925797860259516e-05, "loss": 0.9158, "step": 5678 }, { "epoch": 0.08054709629844968, "grad_norm": 2.9375, "learning_rate": 4.9257438199256956e-05, "loss": 0.946, "step": 5680 }, { "epoch": 0.0805754579520759, "grad_norm": 3.3125, "learning_rate": 4.9256897602172986e-05, "loss": 0.8964, "step": 5682 }, { "epoch": 0.0806038196057021, "grad_norm": 3.296875, "learning_rate": 4.9256356811347555e-05, "loss": 0.9, "step": 5684 }, { "epoch": 0.08063218125932832, "grad_norm": 3.171875, "learning_rate": 4.925581582678498e-05, "loss": 0.9061, "step": 5686 }, { "epoch": 0.08066054291295453, "grad_norm": 3.46875, "learning_rate": 4.925527464848959e-05, "loss": 0.9439, "step": 5688 }, { "epoch": 0.08068890456658076, "grad_norm": 2.796875, "learning_rate": 4.9254733276465704e-05, "loss": 0.8705, "step": 5690 }, { "epoch": 0.08071726622020697, "grad_norm": 3.28125, "learning_rate": 4.925419171071765e-05, "loss": 0.859, "step": 5692 }, { "epoch": 0.08074562787383319, "grad_norm": 3.25, "learning_rate": 4.925364995124974e-05, "loss": 0.9201, "step": 5694 }, { "epoch": 0.0807739895274594, "grad_norm": 3.5625, "learning_rate": 4.9253107998066305e-05, "loss": 0.9282, "step": 5696 }, { "epoch": 0.08080235118108561, "grad_norm": 3.03125, "learning_rate": 4.9252565851171694e-05, "loss": 0.9211, "step": 5698 }, { "epoch": 0.08083071283471183, "grad_norm": 3.078125, "learning_rate": 4.925202351057021e-05, "loss": 0.928, "step": 5700 }, { "epoch": 0.08085907448833804, "grad_norm": 3.046875, "learning_rate": 4.925148097626621e-05, "loss": 0.8961, "step": 5702 }, { "epoch": 0.08088743614196425, "grad_norm": 2.875, "learning_rate": 4.9250938248264e-05, "loss": 0.898, "step": 5704 }, { "epoch": 0.08091579779559047, "grad_norm": 3.109375, "learning_rate": 4.9250395326567934e-05, "loss": 0.9004, "step": 5706 }, { "epoch": 0.0809441594492167, "grad_norm": 3.21875, "learning_rate": 4.924985221118234e-05, "loss": 0.7934, "step": 5708 }, { "epoch": 0.0809725211028429, "grad_norm": 3.09375, "learning_rate": 4.924930890211156e-05, "loss": 0.9151, "step": 5710 }, { "epoch": 0.08100088275646912, "grad_norm": 3.734375, "learning_rate": 4.9248765399359934e-05, "loss": 0.9909, "step": 5712 }, { "epoch": 0.08102924441009533, "grad_norm": 3.28125, "learning_rate": 4.9248221702931804e-05, "loss": 0.916, "step": 5714 }, { "epoch": 0.08105760606372155, "grad_norm": 3.359375, "learning_rate": 4.924767781283151e-05, "loss": 0.9348, "step": 5716 }, { "epoch": 0.08108596771734776, "grad_norm": 3.203125, "learning_rate": 4.924713372906339e-05, "loss": 0.8824, "step": 5718 }, { "epoch": 0.08111432937097397, "grad_norm": 3.1875, "learning_rate": 4.9246589451631795e-05, "loss": 0.8308, "step": 5720 }, { "epoch": 0.08114269102460019, "grad_norm": 3.0625, "learning_rate": 4.9246044980541084e-05, "loss": 0.8767, "step": 5722 }, { "epoch": 0.0811710526782264, "grad_norm": 2.734375, "learning_rate": 4.924550031579558e-05, "loss": 0.9068, "step": 5724 }, { "epoch": 0.08119941433185261, "grad_norm": 3.53125, "learning_rate": 4.924495545739966e-05, "loss": 0.8897, "step": 5726 }, { "epoch": 0.08122777598547884, "grad_norm": 3.171875, "learning_rate": 4.924441040535765e-05, "loss": 0.8774, "step": 5728 }, { "epoch": 0.08125613763910505, "grad_norm": 3.015625, "learning_rate": 4.924386515967393e-05, "loss": 0.916, "step": 5730 }, { "epoch": 0.08128449929273127, "grad_norm": 2.765625, "learning_rate": 4.924331972035284e-05, "loss": 0.8465, "step": 5732 }, { "epoch": 0.08131286094635748, "grad_norm": 3.015625, "learning_rate": 4.9242774087398734e-05, "loss": 0.8574, "step": 5734 }, { "epoch": 0.08134122259998369, "grad_norm": 3.21875, "learning_rate": 4.9242228260815984e-05, "loss": 0.9077, "step": 5736 }, { "epoch": 0.0813695842536099, "grad_norm": 3.171875, "learning_rate": 4.924168224060893e-05, "loss": 0.8847, "step": 5738 }, { "epoch": 0.08139794590723612, "grad_norm": 2.90625, "learning_rate": 4.9241136026781945e-05, "loss": 0.8297, "step": 5740 }, { "epoch": 0.08142630756086233, "grad_norm": 3.3125, "learning_rate": 4.924058961933939e-05, "loss": 0.8788, "step": 5742 }, { "epoch": 0.08145466921448855, "grad_norm": 3.078125, "learning_rate": 4.924004301828563e-05, "loss": 0.9296, "step": 5744 }, { "epoch": 0.08148303086811476, "grad_norm": 3.09375, "learning_rate": 4.9239496223625024e-05, "loss": 0.8882, "step": 5746 }, { "epoch": 0.08151139252174099, "grad_norm": 3.453125, "learning_rate": 4.9238949235361956e-05, "loss": 0.8954, "step": 5748 }, { "epoch": 0.0815397541753672, "grad_norm": 3.015625, "learning_rate": 4.923840205350077e-05, "loss": 0.8805, "step": 5750 }, { "epoch": 0.08156811582899341, "grad_norm": 2.640625, "learning_rate": 4.9237854678045855e-05, "loss": 0.8748, "step": 5752 }, { "epoch": 0.08159647748261963, "grad_norm": 2.8125, "learning_rate": 4.923730710900158e-05, "loss": 0.8876, "step": 5754 }, { "epoch": 0.08162483913624584, "grad_norm": 2.96875, "learning_rate": 4.923675934637232e-05, "loss": 0.8558, "step": 5756 }, { "epoch": 0.08165320078987205, "grad_norm": 3.28125, "learning_rate": 4.923621139016245e-05, "loss": 0.8878, "step": 5758 }, { "epoch": 0.08168156244349827, "grad_norm": 4.4375, "learning_rate": 4.923566324037633e-05, "loss": 0.9007, "step": 5760 }, { "epoch": 0.08170992409712448, "grad_norm": 3.71875, "learning_rate": 4.923511489701835e-05, "loss": 0.8933, "step": 5762 }, { "epoch": 0.08173828575075069, "grad_norm": 3.15625, "learning_rate": 4.92345663600929e-05, "loss": 0.8637, "step": 5764 }, { "epoch": 0.08176664740437692, "grad_norm": 3.265625, "learning_rate": 4.9234017629604357e-05, "loss": 0.8815, "step": 5766 }, { "epoch": 0.08179500905800313, "grad_norm": 3.015625, "learning_rate": 4.923346870555709e-05, "loss": 0.9399, "step": 5768 }, { "epoch": 0.08182337071162935, "grad_norm": 3.21875, "learning_rate": 4.923291958795549e-05, "loss": 0.9007, "step": 5770 }, { "epoch": 0.08185173236525556, "grad_norm": 2.984375, "learning_rate": 4.9232370276803955e-05, "loss": 0.8199, "step": 5772 }, { "epoch": 0.08188009401888177, "grad_norm": 3.046875, "learning_rate": 4.923182077210685e-05, "loss": 0.9075, "step": 5774 }, { "epoch": 0.08190845567250798, "grad_norm": 3.421875, "learning_rate": 4.923127107386858e-05, "loss": 0.8536, "step": 5776 }, { "epoch": 0.0819368173261342, "grad_norm": 3.125, "learning_rate": 4.923072118209354e-05, "loss": 0.9445, "step": 5778 }, { "epoch": 0.08196517897976041, "grad_norm": 3.21875, "learning_rate": 4.923017109678611e-05, "loss": 0.8769, "step": 5780 }, { "epoch": 0.08199354063338662, "grad_norm": 3.65625, "learning_rate": 4.922962081795068e-05, "loss": 0.9123, "step": 5782 }, { "epoch": 0.08202190228701284, "grad_norm": 3.5625, "learning_rate": 4.9229070345591656e-05, "loss": 0.8722, "step": 5784 }, { "epoch": 0.08205026394063906, "grad_norm": 3.109375, "learning_rate": 4.922851967971344e-05, "loss": 0.8751, "step": 5786 }, { "epoch": 0.08207862559426528, "grad_norm": 3.0, "learning_rate": 4.9227968820320404e-05, "loss": 0.9033, "step": 5788 }, { "epoch": 0.08210698724789149, "grad_norm": 2.90625, "learning_rate": 4.9227417767416975e-05, "loss": 0.9412, "step": 5790 }, { "epoch": 0.0821353489015177, "grad_norm": 3.1875, "learning_rate": 4.922686652100754e-05, "loss": 0.8889, "step": 5792 }, { "epoch": 0.08216371055514392, "grad_norm": 2.796875, "learning_rate": 4.922631508109652e-05, "loss": 0.92, "step": 5794 }, { "epoch": 0.08219207220877013, "grad_norm": 3.1875, "learning_rate": 4.922576344768829e-05, "loss": 0.903, "step": 5796 }, { "epoch": 0.08222043386239634, "grad_norm": 3.375, "learning_rate": 4.9225211620787284e-05, "loss": 0.9077, "step": 5798 }, { "epoch": 0.08224879551602256, "grad_norm": 2.984375, "learning_rate": 4.922465960039788e-05, "loss": 0.8957, "step": 5800 }, { "epoch": 0.08227715716964877, "grad_norm": 3.015625, "learning_rate": 4.922410738652452e-05, "loss": 0.8679, "step": 5802 }, { "epoch": 0.082305518823275, "grad_norm": 2.828125, "learning_rate": 4.922355497917159e-05, "loss": 0.8547, "step": 5804 }, { "epoch": 0.08233388047690121, "grad_norm": 3.21875, "learning_rate": 4.9223002378343514e-05, "loss": 0.9045, "step": 5806 }, { "epoch": 0.08236224213052742, "grad_norm": 3.1875, "learning_rate": 4.9222449584044707e-05, "loss": 0.9052, "step": 5808 }, { "epoch": 0.08239060378415364, "grad_norm": 3.0625, "learning_rate": 4.922189659627957e-05, "loss": 0.8865, "step": 5810 }, { "epoch": 0.08241896543777985, "grad_norm": 3.34375, "learning_rate": 4.9221343415052534e-05, "loss": 0.8903, "step": 5812 }, { "epoch": 0.08244732709140606, "grad_norm": 2.90625, "learning_rate": 4.922079004036801e-05, "loss": 0.8591, "step": 5814 }, { "epoch": 0.08247568874503228, "grad_norm": 3.078125, "learning_rate": 4.922023647223042e-05, "loss": 0.8843, "step": 5816 }, { "epoch": 0.08250405039865849, "grad_norm": 2.859375, "learning_rate": 4.9219682710644186e-05, "loss": 0.9006, "step": 5818 }, { "epoch": 0.0825324120522847, "grad_norm": 3.5, "learning_rate": 4.921912875561373e-05, "loss": 0.8692, "step": 5820 }, { "epoch": 0.08256077370591092, "grad_norm": 3.1875, "learning_rate": 4.9218574607143485e-05, "loss": 0.8704, "step": 5822 }, { "epoch": 0.08258913535953714, "grad_norm": 3.59375, "learning_rate": 4.921802026523786e-05, "loss": 0.8662, "step": 5824 }, { "epoch": 0.08261749701316336, "grad_norm": 2.828125, "learning_rate": 4.92174657299013e-05, "loss": 0.8701, "step": 5826 }, { "epoch": 0.08264585866678957, "grad_norm": 3.046875, "learning_rate": 4.921691100113822e-05, "loss": 0.8527, "step": 5828 }, { "epoch": 0.08267422032041578, "grad_norm": 3.46875, "learning_rate": 4.9216356078953064e-05, "loss": 0.9265, "step": 5830 }, { "epoch": 0.082702581974042, "grad_norm": 3.03125, "learning_rate": 4.921580096335025e-05, "loss": 0.8574, "step": 5832 }, { "epoch": 0.08273094362766821, "grad_norm": 3.453125, "learning_rate": 4.921524565433422e-05, "loss": 0.9335, "step": 5834 }, { "epoch": 0.08275930528129442, "grad_norm": 3.078125, "learning_rate": 4.9214690151909404e-05, "loss": 0.8836, "step": 5836 }, { "epoch": 0.08278766693492064, "grad_norm": 3.171875, "learning_rate": 4.921413445608025e-05, "loss": 0.9238, "step": 5838 }, { "epoch": 0.08281602858854685, "grad_norm": 3.09375, "learning_rate": 4.9213578566851185e-05, "loss": 0.8502, "step": 5840 }, { "epoch": 0.08284439024217306, "grad_norm": 3.03125, "learning_rate": 4.921302248422665e-05, "loss": 0.9062, "step": 5842 }, { "epoch": 0.08287275189579929, "grad_norm": 3.09375, "learning_rate": 4.92124662082111e-05, "loss": 0.8693, "step": 5844 }, { "epoch": 0.0829011135494255, "grad_norm": 3.0625, "learning_rate": 4.921190973880895e-05, "loss": 0.9055, "step": 5846 }, { "epoch": 0.08292947520305172, "grad_norm": 2.90625, "learning_rate": 4.9211353076024676e-05, "loss": 0.8249, "step": 5848 }, { "epoch": 0.08295783685667793, "grad_norm": 2.984375, "learning_rate": 4.9210796219862705e-05, "loss": 0.9075, "step": 5850 }, { "epoch": 0.08298619851030414, "grad_norm": 2.875, "learning_rate": 4.921023917032749e-05, "loss": 0.8871, "step": 5852 }, { "epoch": 0.08301456016393036, "grad_norm": 2.9375, "learning_rate": 4.920968192742348e-05, "loss": 0.8636, "step": 5854 }, { "epoch": 0.08304292181755657, "grad_norm": 2.984375, "learning_rate": 4.9209124491155126e-05, "loss": 0.8656, "step": 5856 }, { "epoch": 0.08307128347118278, "grad_norm": 3.140625, "learning_rate": 4.920856686152688e-05, "loss": 0.8762, "step": 5858 }, { "epoch": 0.083099645124809, "grad_norm": 2.9375, "learning_rate": 4.9208009038543196e-05, "loss": 0.863, "step": 5860 }, { "epoch": 0.08312800677843522, "grad_norm": 2.875, "learning_rate": 4.9207451022208525e-05, "loss": 0.8807, "step": 5862 }, { "epoch": 0.08315636843206144, "grad_norm": 2.765625, "learning_rate": 4.920689281252734e-05, "loss": 0.8575, "step": 5864 }, { "epoch": 0.08318473008568765, "grad_norm": 2.734375, "learning_rate": 4.920633440950407e-05, "loss": 0.8613, "step": 5866 }, { "epoch": 0.08321309173931386, "grad_norm": 2.84375, "learning_rate": 4.920577581314321e-05, "loss": 0.8734, "step": 5868 }, { "epoch": 0.08324145339294008, "grad_norm": 3.625, "learning_rate": 4.920521702344919e-05, "loss": 0.8866, "step": 5870 }, { "epoch": 0.08326981504656629, "grad_norm": 3.390625, "learning_rate": 4.9204658040426496e-05, "loss": 0.9465, "step": 5872 }, { "epoch": 0.0832981767001925, "grad_norm": 2.828125, "learning_rate": 4.920409886407959e-05, "loss": 0.879, "step": 5874 }, { "epoch": 0.08332653835381872, "grad_norm": 3.671875, "learning_rate": 4.920353949441291e-05, "loss": 0.8683, "step": 5876 }, { "epoch": 0.08335490000744493, "grad_norm": 3.09375, "learning_rate": 4.920297993143096e-05, "loss": 0.888, "step": 5878 }, { "epoch": 0.08338326166107114, "grad_norm": 2.84375, "learning_rate": 4.92024201751382e-05, "loss": 0.8611, "step": 5880 }, { "epoch": 0.08341162331469737, "grad_norm": 2.828125, "learning_rate": 4.920186022553909e-05, "loss": 0.8931, "step": 5882 }, { "epoch": 0.08343998496832358, "grad_norm": 3.25, "learning_rate": 4.92013000826381e-05, "loss": 0.8709, "step": 5884 }, { "epoch": 0.0834683466219498, "grad_norm": 3.28125, "learning_rate": 4.920073974643972e-05, "loss": 0.9245, "step": 5886 }, { "epoch": 0.08349670827557601, "grad_norm": 3.1875, "learning_rate": 4.9200179216948414e-05, "loss": 0.9039, "step": 5888 }, { "epoch": 0.08352506992920222, "grad_norm": 2.921875, "learning_rate": 4.919961849416867e-05, "loss": 0.8616, "step": 5890 }, { "epoch": 0.08355343158282844, "grad_norm": 3.046875, "learning_rate": 4.919905757810496e-05, "loss": 0.8229, "step": 5892 }, { "epoch": 0.08358179323645465, "grad_norm": 3.640625, "learning_rate": 4.919849646876175e-05, "loss": 0.8999, "step": 5894 }, { "epoch": 0.08361015489008086, "grad_norm": 3.59375, "learning_rate": 4.919793516614354e-05, "loss": 0.9154, "step": 5896 }, { "epoch": 0.08363851654370708, "grad_norm": 2.859375, "learning_rate": 4.919737367025481e-05, "loss": 0.8571, "step": 5898 }, { "epoch": 0.08366687819733329, "grad_norm": 3.3125, "learning_rate": 4.919681198110004e-05, "loss": 0.8856, "step": 5900 }, { "epoch": 0.08369523985095952, "grad_norm": 3.453125, "learning_rate": 4.919625009868373e-05, "loss": 0.933, "step": 5902 }, { "epoch": 0.08372360150458573, "grad_norm": 2.6875, "learning_rate": 4.919568802301034e-05, "loss": 0.816, "step": 5904 }, { "epoch": 0.08375196315821194, "grad_norm": 2.921875, "learning_rate": 4.9195125754084384e-05, "loss": 0.8829, "step": 5906 }, { "epoch": 0.08378032481183816, "grad_norm": 3.53125, "learning_rate": 4.9194563291910346e-05, "loss": 0.9458, "step": 5908 }, { "epoch": 0.08380868646546437, "grad_norm": 3.671875, "learning_rate": 4.9194000636492714e-05, "loss": 0.9059, "step": 5910 }, { "epoch": 0.08383704811909058, "grad_norm": 3.34375, "learning_rate": 4.9193437787835987e-05, "loss": 0.934, "step": 5912 }, { "epoch": 0.0838654097727168, "grad_norm": 3.390625, "learning_rate": 4.9192874745944664e-05, "loss": 0.9071, "step": 5914 }, { "epoch": 0.08389377142634301, "grad_norm": 3.03125, "learning_rate": 4.9192311510823226e-05, "loss": 0.8792, "step": 5916 }, { "epoch": 0.08392213307996922, "grad_norm": 3.0625, "learning_rate": 4.9191748082476184e-05, "loss": 0.8679, "step": 5918 }, { "epoch": 0.08395049473359545, "grad_norm": 2.984375, "learning_rate": 4.919118446090804e-05, "loss": 0.8767, "step": 5920 }, { "epoch": 0.08397885638722166, "grad_norm": 2.890625, "learning_rate": 4.919062064612329e-05, "loss": 0.9052, "step": 5922 }, { "epoch": 0.08400721804084788, "grad_norm": 3.265625, "learning_rate": 4.919005663812645e-05, "loss": 0.9091, "step": 5924 }, { "epoch": 0.08403557969447409, "grad_norm": 3.03125, "learning_rate": 4.9189492436922e-05, "loss": 0.8364, "step": 5926 }, { "epoch": 0.0840639413481003, "grad_norm": 2.71875, "learning_rate": 4.918892804251446e-05, "loss": 0.8281, "step": 5928 }, { "epoch": 0.08409230300172651, "grad_norm": 3.15625, "learning_rate": 4.9188363454908345e-05, "loss": 0.8739, "step": 5930 }, { "epoch": 0.08412066465535273, "grad_norm": 2.875, "learning_rate": 4.918779867410817e-05, "loss": 0.9088, "step": 5932 }, { "epoch": 0.08414902630897894, "grad_norm": 3.109375, "learning_rate": 4.9187233700118416e-05, "loss": 0.875, "step": 5934 }, { "epoch": 0.08417738796260515, "grad_norm": 3.09375, "learning_rate": 4.918666853294362e-05, "loss": 0.8548, "step": 5936 }, { "epoch": 0.08420574961623137, "grad_norm": 3.078125, "learning_rate": 4.9186103172588284e-05, "loss": 0.8903, "step": 5938 }, { "epoch": 0.0842341112698576, "grad_norm": 3.734375, "learning_rate": 4.918553761905692e-05, "loss": 0.9125, "step": 5940 }, { "epoch": 0.08426247292348381, "grad_norm": 2.9375, "learning_rate": 4.918497187235407e-05, "loss": 0.8749, "step": 5942 }, { "epoch": 0.08429083457711002, "grad_norm": 2.953125, "learning_rate": 4.9184405932484236e-05, "loss": 0.9243, "step": 5944 }, { "epoch": 0.08431919623073623, "grad_norm": 3.140625, "learning_rate": 4.918383979945194e-05, "loss": 0.9738, "step": 5946 }, { "epoch": 0.08434755788436245, "grad_norm": 3.296875, "learning_rate": 4.9183273473261695e-05, "loss": 0.9077, "step": 5948 }, { "epoch": 0.08437591953798866, "grad_norm": 2.90625, "learning_rate": 4.918270695391803e-05, "loss": 0.8804, "step": 5950 }, { "epoch": 0.08440428119161487, "grad_norm": 2.90625, "learning_rate": 4.918214024142547e-05, "loss": 0.9062, "step": 5952 }, { "epoch": 0.08443264284524109, "grad_norm": 3.125, "learning_rate": 4.918157333578855e-05, "loss": 0.9333, "step": 5954 }, { "epoch": 0.0844610044988673, "grad_norm": 3.28125, "learning_rate": 4.918100623701179e-05, "loss": 0.9088, "step": 5956 }, { "epoch": 0.08448936615249353, "grad_norm": 3.28125, "learning_rate": 4.9180438945099716e-05, "loss": 0.9038, "step": 5958 }, { "epoch": 0.08451772780611974, "grad_norm": 3.15625, "learning_rate": 4.917987146005687e-05, "loss": 0.8887, "step": 5960 }, { "epoch": 0.08454608945974595, "grad_norm": 3.328125, "learning_rate": 4.917930378188778e-05, "loss": 0.9105, "step": 5962 }, { "epoch": 0.08457445111337217, "grad_norm": 3.046875, "learning_rate": 4.917873591059696e-05, "loss": 0.8854, "step": 5964 }, { "epoch": 0.08460281276699838, "grad_norm": 3.046875, "learning_rate": 4.917816784618898e-05, "loss": 0.9006, "step": 5966 }, { "epoch": 0.0846311744206246, "grad_norm": 2.703125, "learning_rate": 4.9177599588668354e-05, "loss": 0.8863, "step": 5968 }, { "epoch": 0.08465953607425081, "grad_norm": 2.71875, "learning_rate": 4.917703113803963e-05, "loss": 0.8521, "step": 5970 }, { "epoch": 0.08468789772787702, "grad_norm": 3.4375, "learning_rate": 4.917646249430735e-05, "loss": 0.9113, "step": 5972 }, { "epoch": 0.08471625938150323, "grad_norm": 3.140625, "learning_rate": 4.9175893657476046e-05, "loss": 0.9184, "step": 5974 }, { "epoch": 0.08474462103512945, "grad_norm": 2.953125, "learning_rate": 4.917532462755027e-05, "loss": 0.8483, "step": 5976 }, { "epoch": 0.08477298268875567, "grad_norm": 3.109375, "learning_rate": 4.917475540453456e-05, "loss": 0.9417, "step": 5978 }, { "epoch": 0.08480134434238189, "grad_norm": 2.984375, "learning_rate": 4.917418598843347e-05, "loss": 0.9145, "step": 5980 }, { "epoch": 0.0848297059960081, "grad_norm": 3.09375, "learning_rate": 4.917361637925154e-05, "loss": 0.865, "step": 5982 }, { "epoch": 0.08485806764963431, "grad_norm": 2.828125, "learning_rate": 4.9173046576993325e-05, "loss": 0.8642, "step": 5984 }, { "epoch": 0.08488642930326053, "grad_norm": 3.03125, "learning_rate": 4.917247658166337e-05, "loss": 0.9066, "step": 5986 }, { "epoch": 0.08491479095688674, "grad_norm": 2.96875, "learning_rate": 4.917190639326624e-05, "loss": 0.9135, "step": 5988 }, { "epoch": 0.08494315261051295, "grad_norm": 3.28125, "learning_rate": 4.917133601180648e-05, "loss": 0.8589, "step": 5990 }, { "epoch": 0.08497151426413917, "grad_norm": 2.96875, "learning_rate": 4.9170765437288644e-05, "loss": 0.8566, "step": 5992 }, { "epoch": 0.08499987591776538, "grad_norm": 3.328125, "learning_rate": 4.9170194669717295e-05, "loss": 0.8724, "step": 5994 }, { "epoch": 0.0850282375713916, "grad_norm": 3.28125, "learning_rate": 4.9169623709096993e-05, "loss": 0.89, "step": 5996 }, { "epoch": 0.08505659922501782, "grad_norm": 3.421875, "learning_rate": 4.9169052555432286e-05, "loss": 0.9137, "step": 5998 }, { "epoch": 0.08508496087864403, "grad_norm": 3.28125, "learning_rate": 4.9168481208727746e-05, "loss": 0.8921, "step": 6000 }, { "epoch": 0.08511332253227025, "grad_norm": 2.921875, "learning_rate": 4.9167909668987935e-05, "loss": 0.8855, "step": 6002 }, { "epoch": 0.08514168418589646, "grad_norm": 3.421875, "learning_rate": 4.916733793621742e-05, "loss": 0.9018, "step": 6004 }, { "epoch": 0.08517004583952267, "grad_norm": 2.953125, "learning_rate": 4.916676601042076e-05, "loss": 0.8416, "step": 6006 }, { "epoch": 0.08519840749314889, "grad_norm": 3.34375, "learning_rate": 4.916619389160254e-05, "loss": 0.8922, "step": 6008 }, { "epoch": 0.0852267691467751, "grad_norm": 3.15625, "learning_rate": 4.9165621579767306e-05, "loss": 0.9117, "step": 6010 }, { "epoch": 0.08525513080040131, "grad_norm": 2.984375, "learning_rate": 4.9165049074919646e-05, "loss": 0.8551, "step": 6012 }, { "epoch": 0.08528349245402753, "grad_norm": 3.375, "learning_rate": 4.916447637706412e-05, "loss": 0.9438, "step": 6014 }, { "epoch": 0.08531185410765375, "grad_norm": 2.796875, "learning_rate": 4.916390348620531e-05, "loss": 0.8533, "step": 6016 }, { "epoch": 0.08534021576127997, "grad_norm": 3.046875, "learning_rate": 4.91633304023478e-05, "loss": 0.8668, "step": 6018 }, { "epoch": 0.08536857741490618, "grad_norm": 3.375, "learning_rate": 4.916275712549615e-05, "loss": 0.8782, "step": 6020 }, { "epoch": 0.0853969390685324, "grad_norm": 2.828125, "learning_rate": 4.916218365565495e-05, "loss": 0.9206, "step": 6022 }, { "epoch": 0.0854253007221586, "grad_norm": 2.84375, "learning_rate": 4.916160999282878e-05, "loss": 0.9033, "step": 6024 }, { "epoch": 0.08545366237578482, "grad_norm": 3.40625, "learning_rate": 4.916103613702222e-05, "loss": 0.8546, "step": 6026 }, { "epoch": 0.08548202402941103, "grad_norm": 3.03125, "learning_rate": 4.916046208823985e-05, "loss": 0.8447, "step": 6028 }, { "epoch": 0.08551038568303725, "grad_norm": 2.921875, "learning_rate": 4.915988784648625e-05, "loss": 0.8674, "step": 6030 }, { "epoch": 0.08553874733666346, "grad_norm": 3.125, "learning_rate": 4.915931341176603e-05, "loss": 0.9199, "step": 6032 }, { "epoch": 0.08556710899028967, "grad_norm": 3.09375, "learning_rate": 4.9158738784083754e-05, "loss": 0.8927, "step": 6034 }, { "epoch": 0.0855954706439159, "grad_norm": 3.0, "learning_rate": 4.915816396344402e-05, "loss": 0.8932, "step": 6036 }, { "epoch": 0.08562383229754211, "grad_norm": 3.171875, "learning_rate": 4.9157588949851416e-05, "loss": 0.8885, "step": 6038 }, { "epoch": 0.08565219395116833, "grad_norm": 3.25, "learning_rate": 4.9157013743310543e-05, "loss": 0.8951, "step": 6040 }, { "epoch": 0.08568055560479454, "grad_norm": 2.875, "learning_rate": 4.915643834382599e-05, "loss": 0.8843, "step": 6042 }, { "epoch": 0.08570891725842075, "grad_norm": 3.015625, "learning_rate": 4.915586275140235e-05, "loss": 0.8564, "step": 6044 }, { "epoch": 0.08573727891204697, "grad_norm": 2.921875, "learning_rate": 4.915528696604422e-05, "loss": 0.892, "step": 6046 }, { "epoch": 0.08576564056567318, "grad_norm": 3.0625, "learning_rate": 4.915471098775621e-05, "loss": 0.8733, "step": 6048 }, { "epoch": 0.08579400221929939, "grad_norm": 2.9375, "learning_rate": 4.915413481654291e-05, "loss": 0.874, "step": 6050 }, { "epoch": 0.0858223638729256, "grad_norm": 3.1875, "learning_rate": 4.915355845240892e-05, "loss": 0.8983, "step": 6052 }, { "epoch": 0.08585072552655182, "grad_norm": 3.0625, "learning_rate": 4.915298189535885e-05, "loss": 0.8738, "step": 6054 }, { "epoch": 0.08587908718017805, "grad_norm": 2.90625, "learning_rate": 4.91524051453973e-05, "loss": 0.8658, "step": 6056 }, { "epoch": 0.08590744883380426, "grad_norm": 3.0625, "learning_rate": 4.915182820252888e-05, "loss": 0.9114, "step": 6058 }, { "epoch": 0.08593581048743047, "grad_norm": 2.96875, "learning_rate": 4.91512510667582e-05, "loss": 0.8587, "step": 6060 }, { "epoch": 0.08596417214105669, "grad_norm": 3.015625, "learning_rate": 4.915067373808987e-05, "loss": 0.8865, "step": 6062 }, { "epoch": 0.0859925337946829, "grad_norm": 3.015625, "learning_rate": 4.9150096216528494e-05, "loss": 0.9233, "step": 6064 }, { "epoch": 0.08602089544830911, "grad_norm": 3.375, "learning_rate": 4.914951850207868e-05, "loss": 0.9379, "step": 6066 }, { "epoch": 0.08604925710193533, "grad_norm": 3.1875, "learning_rate": 4.914894059474506e-05, "loss": 0.9065, "step": 6068 }, { "epoch": 0.08607761875556154, "grad_norm": 3.390625, "learning_rate": 4.914836249453224e-05, "loss": 0.8954, "step": 6070 }, { "epoch": 0.08610598040918775, "grad_norm": 3.25, "learning_rate": 4.914778420144484e-05, "loss": 0.9241, "step": 6072 }, { "epoch": 0.08613434206281398, "grad_norm": 2.84375, "learning_rate": 4.914720571548747e-05, "loss": 0.8352, "step": 6074 }, { "epoch": 0.08616270371644019, "grad_norm": 2.96875, "learning_rate": 4.914662703666477e-05, "loss": 0.883, "step": 6076 }, { "epoch": 0.0861910653700664, "grad_norm": 2.9375, "learning_rate": 4.914604816498134e-05, "loss": 0.8734, "step": 6078 }, { "epoch": 0.08621942702369262, "grad_norm": 3.234375, "learning_rate": 4.914546910044181e-05, "loss": 0.9095, "step": 6080 }, { "epoch": 0.08624778867731883, "grad_norm": 3.46875, "learning_rate": 4.914488984305081e-05, "loss": 0.874, "step": 6082 }, { "epoch": 0.08627615033094505, "grad_norm": 2.984375, "learning_rate": 4.9144310392812976e-05, "loss": 0.871, "step": 6084 }, { "epoch": 0.08630451198457126, "grad_norm": 2.984375, "learning_rate": 4.914373074973291e-05, "loss": 0.8627, "step": 6086 }, { "epoch": 0.08633287363819747, "grad_norm": 2.890625, "learning_rate": 4.914315091381526e-05, "loss": 0.8534, "step": 6088 }, { "epoch": 0.08636123529182368, "grad_norm": 2.796875, "learning_rate": 4.9142570885064656e-05, "loss": 0.8676, "step": 6090 }, { "epoch": 0.0863895969454499, "grad_norm": 3.03125, "learning_rate": 4.914199066348573e-05, "loss": 0.9228, "step": 6092 }, { "epoch": 0.08641795859907613, "grad_norm": 3.078125, "learning_rate": 4.914141024908311e-05, "loss": 0.8517, "step": 6094 }, { "epoch": 0.08644632025270234, "grad_norm": 3.203125, "learning_rate": 4.914082964186143e-05, "loss": 0.9163, "step": 6096 }, { "epoch": 0.08647468190632855, "grad_norm": 3.046875, "learning_rate": 4.914024884182534e-05, "loss": 0.9037, "step": 6098 }, { "epoch": 0.08650304355995476, "grad_norm": 2.734375, "learning_rate": 4.913966784897948e-05, "loss": 0.9012, "step": 6100 }, { "epoch": 0.08653140521358098, "grad_norm": 2.84375, "learning_rate": 4.913908666332847e-05, "loss": 0.8996, "step": 6102 }, { "epoch": 0.08655976686720719, "grad_norm": 3.078125, "learning_rate": 4.913850528487697e-05, "loss": 0.8699, "step": 6104 }, { "epoch": 0.0865881285208334, "grad_norm": 3.015625, "learning_rate": 4.913792371362962e-05, "loss": 0.8741, "step": 6106 }, { "epoch": 0.08661649017445962, "grad_norm": 2.921875, "learning_rate": 4.913734194959105e-05, "loss": 0.8788, "step": 6108 }, { "epoch": 0.08664485182808583, "grad_norm": 2.96875, "learning_rate": 4.9136759992765936e-05, "loss": 0.905, "step": 6110 }, { "epoch": 0.08667321348171206, "grad_norm": 3.125, "learning_rate": 4.9136177843158906e-05, "loss": 0.9122, "step": 6112 }, { "epoch": 0.08670157513533827, "grad_norm": 3.0, "learning_rate": 4.913559550077461e-05, "loss": 0.8507, "step": 6114 }, { "epoch": 0.08672993678896448, "grad_norm": 3.640625, "learning_rate": 4.913501296561771e-05, "loss": 0.8961, "step": 6116 }, { "epoch": 0.0867582984425907, "grad_norm": 3.421875, "learning_rate": 4.9134430237692845e-05, "loss": 0.8672, "step": 6118 }, { "epoch": 0.08678666009621691, "grad_norm": 3.0625, "learning_rate": 4.913384731700468e-05, "loss": 0.8761, "step": 6120 }, { "epoch": 0.08681502174984312, "grad_norm": 3.046875, "learning_rate": 4.913326420355787e-05, "loss": 0.8454, "step": 6122 }, { "epoch": 0.08684338340346934, "grad_norm": 3.0625, "learning_rate": 4.913268089735706e-05, "loss": 0.9223, "step": 6124 }, { "epoch": 0.08687174505709555, "grad_norm": 3.0, "learning_rate": 4.9132097398406916e-05, "loss": 0.851, "step": 6126 }, { "epoch": 0.08690010671072176, "grad_norm": 3.15625, "learning_rate": 4.913151370671211e-05, "loss": 0.8899, "step": 6128 }, { "epoch": 0.08692846836434798, "grad_norm": 3.03125, "learning_rate": 4.913092982227729e-05, "loss": 0.8859, "step": 6130 }, { "epoch": 0.0869568300179742, "grad_norm": 3.140625, "learning_rate": 4.913034574510713e-05, "loss": 0.8453, "step": 6132 }, { "epoch": 0.08698519167160042, "grad_norm": 3.359375, "learning_rate": 4.9129761475206285e-05, "loss": 0.8815, "step": 6134 }, { "epoch": 0.08701355332522663, "grad_norm": 3.359375, "learning_rate": 4.9129177012579416e-05, "loss": 0.9054, "step": 6136 }, { "epoch": 0.08704191497885284, "grad_norm": 3.0625, "learning_rate": 4.912859235723122e-05, "loss": 0.8965, "step": 6138 }, { "epoch": 0.08707027663247906, "grad_norm": 3.171875, "learning_rate": 4.912800750916633e-05, "loss": 0.8775, "step": 6140 }, { "epoch": 0.08709863828610527, "grad_norm": 3.015625, "learning_rate": 4.912742246838944e-05, "loss": 0.8736, "step": 6142 }, { "epoch": 0.08712699993973148, "grad_norm": 3.125, "learning_rate": 4.912683723490522e-05, "loss": 0.918, "step": 6144 }, { "epoch": 0.0871553615933577, "grad_norm": 2.9375, "learning_rate": 4.912625180871834e-05, "loss": 0.8984, "step": 6146 }, { "epoch": 0.08718372324698391, "grad_norm": 2.84375, "learning_rate": 4.9125666189833483e-05, "loss": 0.9164, "step": 6148 }, { "epoch": 0.08721208490061012, "grad_norm": 2.953125, "learning_rate": 4.912508037825531e-05, "loss": 0.9069, "step": 6150 }, { "epoch": 0.08724044655423635, "grad_norm": 3.421875, "learning_rate": 4.9124494373988516e-05, "loss": 0.8824, "step": 6152 }, { "epoch": 0.08726880820786256, "grad_norm": 3.359375, "learning_rate": 4.912390817703778e-05, "loss": 0.8687, "step": 6154 }, { "epoch": 0.08729716986148878, "grad_norm": 3.453125, "learning_rate": 4.912332178740778e-05, "loss": 0.9036, "step": 6156 }, { "epoch": 0.08732553151511499, "grad_norm": 3.34375, "learning_rate": 4.91227352051032e-05, "loss": 0.8007, "step": 6158 }, { "epoch": 0.0873538931687412, "grad_norm": 3.015625, "learning_rate": 4.912214843012872e-05, "loss": 0.8963, "step": 6160 }, { "epoch": 0.08738225482236742, "grad_norm": 3.28125, "learning_rate": 4.912156146248903e-05, "loss": 0.8479, "step": 6162 }, { "epoch": 0.08741061647599363, "grad_norm": 3.03125, "learning_rate": 4.9120974302188824e-05, "loss": 0.9364, "step": 6164 }, { "epoch": 0.08743897812961984, "grad_norm": 3.109375, "learning_rate": 4.912038694923279e-05, "loss": 0.8637, "step": 6166 }, { "epoch": 0.08746733978324606, "grad_norm": 3.265625, "learning_rate": 4.911979940362561e-05, "loss": 0.9153, "step": 6168 }, { "epoch": 0.08749570143687228, "grad_norm": 3.078125, "learning_rate": 4.911921166537199e-05, "loss": 0.8486, "step": 6170 }, { "epoch": 0.0875240630904985, "grad_norm": 3.078125, "learning_rate": 4.911862373447661e-05, "loss": 0.8495, "step": 6172 }, { "epoch": 0.08755242474412471, "grad_norm": 3.078125, "learning_rate": 4.911803561094418e-05, "loss": 0.8741, "step": 6174 }, { "epoch": 0.08758078639775092, "grad_norm": 2.953125, "learning_rate": 4.911744729477939e-05, "loss": 0.8693, "step": 6176 }, { "epoch": 0.08760914805137714, "grad_norm": 2.984375, "learning_rate": 4.911685878598694e-05, "loss": 0.8658, "step": 6178 }, { "epoch": 0.08763750970500335, "grad_norm": 3.140625, "learning_rate": 4.9116270084571524e-05, "loss": 0.9404, "step": 6180 }, { "epoch": 0.08766587135862956, "grad_norm": 3.3125, "learning_rate": 4.911568119053786e-05, "loss": 0.9266, "step": 6182 }, { "epoch": 0.08769423301225578, "grad_norm": 3.828125, "learning_rate": 4.9115092103890636e-05, "loss": 0.9067, "step": 6184 }, { "epoch": 0.08772259466588199, "grad_norm": 2.96875, "learning_rate": 4.911450282463457e-05, "loss": 0.8358, "step": 6186 }, { "epoch": 0.0877509563195082, "grad_norm": 2.890625, "learning_rate": 4.911391335277435e-05, "loss": 0.891, "step": 6188 }, { "epoch": 0.08777931797313443, "grad_norm": 2.734375, "learning_rate": 4.91133236883147e-05, "loss": 0.8321, "step": 6190 }, { "epoch": 0.08780767962676064, "grad_norm": 3.234375, "learning_rate": 4.9112733831260325e-05, "loss": 0.9442, "step": 6192 }, { "epoch": 0.08783604128038686, "grad_norm": 2.734375, "learning_rate": 4.911214378161594e-05, "loss": 0.9014, "step": 6194 }, { "epoch": 0.08786440293401307, "grad_norm": 3.359375, "learning_rate": 4.911155353938625e-05, "loss": 0.8835, "step": 6196 }, { "epoch": 0.08789276458763928, "grad_norm": 3.1875, "learning_rate": 4.911096310457598e-05, "loss": 0.8834, "step": 6198 }, { "epoch": 0.0879211262412655, "grad_norm": 3.15625, "learning_rate": 4.9110372477189834e-05, "loss": 0.8237, "step": 6200 }, { "epoch": 0.08794948789489171, "grad_norm": 3.21875, "learning_rate": 4.910978165723253e-05, "loss": 0.9121, "step": 6202 }, { "epoch": 0.08797784954851792, "grad_norm": 3.125, "learning_rate": 4.91091906447088e-05, "loss": 0.8913, "step": 6204 }, { "epoch": 0.08800621120214414, "grad_norm": 3.078125, "learning_rate": 4.910859943962336e-05, "loss": 0.9398, "step": 6206 }, { "epoch": 0.08803457285577035, "grad_norm": 2.9375, "learning_rate": 4.910800804198092e-05, "loss": 0.8788, "step": 6208 }, { "epoch": 0.08806293450939658, "grad_norm": 3.1875, "learning_rate": 4.910741645178621e-05, "loss": 0.9172, "step": 6210 }, { "epoch": 0.08809129616302279, "grad_norm": 2.9375, "learning_rate": 4.9106824669043964e-05, "loss": 0.8654, "step": 6212 }, { "epoch": 0.088119657816649, "grad_norm": 2.984375, "learning_rate": 4.91062326937589e-05, "loss": 0.8587, "step": 6214 }, { "epoch": 0.08814801947027522, "grad_norm": 3.5, "learning_rate": 4.910564052593575e-05, "loss": 0.904, "step": 6216 }, { "epoch": 0.08817638112390143, "grad_norm": 2.859375, "learning_rate": 4.910504816557923e-05, "loss": 0.8675, "step": 6218 }, { "epoch": 0.08820474277752764, "grad_norm": 3.15625, "learning_rate": 4.910445561269409e-05, "loss": 0.8958, "step": 6220 }, { "epoch": 0.08823310443115386, "grad_norm": 3.359375, "learning_rate": 4.9103862867285056e-05, "loss": 0.8552, "step": 6222 }, { "epoch": 0.08826146608478007, "grad_norm": 2.90625, "learning_rate": 4.910326992935686e-05, "loss": 0.9202, "step": 6224 }, { "epoch": 0.08828982773840628, "grad_norm": 2.9375, "learning_rate": 4.910267679891424e-05, "loss": 0.8459, "step": 6226 }, { "epoch": 0.08831818939203251, "grad_norm": 3.109375, "learning_rate": 4.9102083475961934e-05, "loss": 0.8979, "step": 6228 }, { "epoch": 0.08834655104565872, "grad_norm": 2.96875, "learning_rate": 4.910148996050467e-05, "loss": 0.9162, "step": 6230 }, { "epoch": 0.08837491269928494, "grad_norm": 3.21875, "learning_rate": 4.9100896252547215e-05, "loss": 0.8816, "step": 6232 }, { "epoch": 0.08840327435291115, "grad_norm": 3.484375, "learning_rate": 4.910030235209428e-05, "loss": 0.8194, "step": 6234 }, { "epoch": 0.08843163600653736, "grad_norm": 3.21875, "learning_rate": 4.9099708259150634e-05, "loss": 0.9006, "step": 6236 }, { "epoch": 0.08845999766016358, "grad_norm": 2.84375, "learning_rate": 4.9099113973721e-05, "loss": 0.8937, "step": 6238 }, { "epoch": 0.08848835931378979, "grad_norm": 3.171875, "learning_rate": 4.909851949581015e-05, "loss": 0.8965, "step": 6240 }, { "epoch": 0.088516720967416, "grad_norm": 3.015625, "learning_rate": 4.90979248254228e-05, "loss": 0.8777, "step": 6242 }, { "epoch": 0.08854508262104221, "grad_norm": 3.015625, "learning_rate": 4.909732996256373e-05, "loss": 0.8811, "step": 6244 }, { "epoch": 0.08857344427466843, "grad_norm": 3.0, "learning_rate": 4.909673490723767e-05, "loss": 0.914, "step": 6246 }, { "epoch": 0.08860180592829466, "grad_norm": 2.75, "learning_rate": 4.909613965944938e-05, "loss": 0.873, "step": 6248 }, { "epoch": 0.08863016758192087, "grad_norm": 3.078125, "learning_rate": 4.909554421920362e-05, "loss": 0.8939, "step": 6250 }, { "epoch": 0.08865852923554708, "grad_norm": 3.328125, "learning_rate": 4.909494858650514e-05, "loss": 0.9186, "step": 6252 }, { "epoch": 0.0886868908891733, "grad_norm": 3.328125, "learning_rate": 4.90943527613587e-05, "loss": 0.8537, "step": 6254 }, { "epoch": 0.08871525254279951, "grad_norm": 3.21875, "learning_rate": 4.909375674376906e-05, "loss": 0.8592, "step": 6256 }, { "epoch": 0.08874361419642572, "grad_norm": 3.0, "learning_rate": 4.909316053374097e-05, "loss": 0.8785, "step": 6258 }, { "epoch": 0.08877197585005193, "grad_norm": 2.875, "learning_rate": 4.9092564131279204e-05, "loss": 0.8761, "step": 6260 }, { "epoch": 0.08880033750367815, "grad_norm": 3.1875, "learning_rate": 4.909196753638852e-05, "loss": 0.8668, "step": 6262 }, { "epoch": 0.08882869915730436, "grad_norm": 3.015625, "learning_rate": 4.909137074907368e-05, "loss": 0.884, "step": 6264 }, { "epoch": 0.08885706081093059, "grad_norm": 3.484375, "learning_rate": 4.909077376933947e-05, "loss": 0.9353, "step": 6266 }, { "epoch": 0.0888854224645568, "grad_norm": 2.953125, "learning_rate": 4.9090176597190626e-05, "loss": 0.9077, "step": 6268 }, { "epoch": 0.08891378411818301, "grad_norm": 2.875, "learning_rate": 4.908957923263194e-05, "loss": 0.8874, "step": 6270 }, { "epoch": 0.08894214577180923, "grad_norm": 2.75, "learning_rate": 4.9088981675668185e-05, "loss": 0.9148, "step": 6272 }, { "epoch": 0.08897050742543544, "grad_norm": 3.1875, "learning_rate": 4.908838392630412e-05, "loss": 0.8923, "step": 6274 }, { "epoch": 0.08899886907906165, "grad_norm": 3.203125, "learning_rate": 4.908778598454453e-05, "loss": 0.904, "step": 6276 }, { "epoch": 0.08902723073268787, "grad_norm": 2.78125, "learning_rate": 4.908718785039418e-05, "loss": 0.863, "step": 6278 }, { "epoch": 0.08905559238631408, "grad_norm": 3.296875, "learning_rate": 4.908658952385786e-05, "loss": 0.8702, "step": 6280 }, { "epoch": 0.0890839540399403, "grad_norm": 3.6875, "learning_rate": 4.908599100494034e-05, "loss": 0.874, "step": 6282 }, { "epoch": 0.08911231569356651, "grad_norm": 3.21875, "learning_rate": 4.90853922936464e-05, "loss": 0.8578, "step": 6284 }, { "epoch": 0.08914067734719273, "grad_norm": 2.78125, "learning_rate": 4.908479338998083e-05, "loss": 0.8572, "step": 6286 }, { "epoch": 0.08916903900081895, "grad_norm": 2.859375, "learning_rate": 4.9084194293948416e-05, "loss": 0.8532, "step": 6288 }, { "epoch": 0.08919740065444516, "grad_norm": 3.109375, "learning_rate": 4.9083595005553926e-05, "loss": 0.9366, "step": 6290 }, { "epoch": 0.08922576230807137, "grad_norm": 3.15625, "learning_rate": 4.9082995524802157e-05, "loss": 0.9382, "step": 6292 }, { "epoch": 0.08925412396169759, "grad_norm": 3.71875, "learning_rate": 4.908239585169789e-05, "loss": 0.9359, "step": 6294 }, { "epoch": 0.0892824856153238, "grad_norm": 3.078125, "learning_rate": 4.908179598624594e-05, "loss": 0.925, "step": 6296 }, { "epoch": 0.08931084726895001, "grad_norm": 3.015625, "learning_rate": 4.908119592845106e-05, "loss": 0.8669, "step": 6298 }, { "epoch": 0.08933920892257623, "grad_norm": 3.3125, "learning_rate": 4.908059567831808e-05, "loss": 0.8603, "step": 6300 }, { "epoch": 0.08936757057620244, "grad_norm": 3.609375, "learning_rate": 4.907999523585176e-05, "loss": 0.8601, "step": 6302 }, { "epoch": 0.08939593222982865, "grad_norm": 2.734375, "learning_rate": 4.907939460105692e-05, "loss": 0.8594, "step": 6304 }, { "epoch": 0.08942429388345488, "grad_norm": 3.203125, "learning_rate": 4.907879377393835e-05, "loss": 0.8719, "step": 6306 }, { "epoch": 0.0894526555370811, "grad_norm": 2.78125, "learning_rate": 4.907819275450084e-05, "loss": 0.9258, "step": 6308 }, { "epoch": 0.08948101719070731, "grad_norm": 3.390625, "learning_rate": 4.90775915427492e-05, "loss": 0.8811, "step": 6310 }, { "epoch": 0.08950937884433352, "grad_norm": 3.3125, "learning_rate": 4.907699013868824e-05, "loss": 0.9276, "step": 6312 }, { "epoch": 0.08953774049795973, "grad_norm": 3.578125, "learning_rate": 4.907638854232274e-05, "loss": 0.8658, "step": 6314 }, { "epoch": 0.08956610215158595, "grad_norm": 3.078125, "learning_rate": 4.907578675365753e-05, "loss": 0.8448, "step": 6316 }, { "epoch": 0.08959446380521216, "grad_norm": 2.9375, "learning_rate": 4.90751847726974e-05, "loss": 0.8545, "step": 6318 }, { "epoch": 0.08962282545883837, "grad_norm": 3.0, "learning_rate": 4.9074582599447165e-05, "loss": 0.8594, "step": 6320 }, { "epoch": 0.08965118711246459, "grad_norm": 3.140625, "learning_rate": 4.9073980233911635e-05, "loss": 0.864, "step": 6322 }, { "epoch": 0.08967954876609081, "grad_norm": 3.453125, "learning_rate": 4.9073377676095614e-05, "loss": 0.9123, "step": 6324 }, { "epoch": 0.08970791041971703, "grad_norm": 3.328125, "learning_rate": 4.907277492600393e-05, "loss": 0.8668, "step": 6326 }, { "epoch": 0.08973627207334324, "grad_norm": 2.984375, "learning_rate": 4.907217198364138e-05, "loss": 0.8822, "step": 6328 }, { "epoch": 0.08976463372696945, "grad_norm": 3.390625, "learning_rate": 4.907156884901278e-05, "loss": 0.829, "step": 6330 }, { "epoch": 0.08979299538059567, "grad_norm": 2.875, "learning_rate": 4.907096552212296e-05, "loss": 0.8687, "step": 6332 }, { "epoch": 0.08982135703422188, "grad_norm": 3.6875, "learning_rate": 4.907036200297673e-05, "loss": 0.8842, "step": 6334 }, { "epoch": 0.08984971868784809, "grad_norm": 3.25, "learning_rate": 4.906975829157893e-05, "loss": 0.9013, "step": 6336 }, { "epoch": 0.0898780803414743, "grad_norm": 3.296875, "learning_rate": 4.906915438793435e-05, "loss": 0.8717, "step": 6338 }, { "epoch": 0.08990644199510052, "grad_norm": 3.25, "learning_rate": 4.906855029204782e-05, "loss": 0.8495, "step": 6340 }, { "epoch": 0.08993480364872673, "grad_norm": 3.4375, "learning_rate": 4.906794600392419e-05, "loss": 0.8963, "step": 6342 }, { "epoch": 0.08996316530235296, "grad_norm": 3.25, "learning_rate": 4.906734152356826e-05, "loss": 0.9054, "step": 6344 }, { "epoch": 0.08999152695597917, "grad_norm": 3.0625, "learning_rate": 4.906673685098487e-05, "loss": 0.8951, "step": 6346 }, { "epoch": 0.09001988860960539, "grad_norm": 3.015625, "learning_rate": 4.906613198617886e-05, "loss": 0.8859, "step": 6348 }, { "epoch": 0.0900482502632316, "grad_norm": 3.015625, "learning_rate": 4.906552692915503e-05, "loss": 0.8411, "step": 6350 }, { "epoch": 0.09007661191685781, "grad_norm": 2.953125, "learning_rate": 4.906492167991824e-05, "loss": 0.8694, "step": 6352 }, { "epoch": 0.09010497357048403, "grad_norm": 2.96875, "learning_rate": 4.906431623847332e-05, "loss": 0.8775, "step": 6354 }, { "epoch": 0.09013333522411024, "grad_norm": 2.796875, "learning_rate": 4.9063710604825085e-05, "loss": 0.8967, "step": 6356 }, { "epoch": 0.09016169687773645, "grad_norm": 2.953125, "learning_rate": 4.90631047789784e-05, "loss": 0.8717, "step": 6358 }, { "epoch": 0.09019005853136267, "grad_norm": 2.84375, "learning_rate": 4.9062498760938094e-05, "loss": 0.925, "step": 6360 }, { "epoch": 0.09021842018498888, "grad_norm": 3.296875, "learning_rate": 4.9061892550709e-05, "loss": 0.9312, "step": 6362 }, { "epoch": 0.0902467818386151, "grad_norm": 2.890625, "learning_rate": 4.906128614829597e-05, "loss": 0.8783, "step": 6364 }, { "epoch": 0.09027514349224132, "grad_norm": 3.609375, "learning_rate": 4.906067955370384e-05, "loss": 0.8773, "step": 6366 }, { "epoch": 0.09030350514586753, "grad_norm": 3.078125, "learning_rate": 4.906007276693746e-05, "loss": 0.8603, "step": 6368 }, { "epoch": 0.09033186679949375, "grad_norm": 3.03125, "learning_rate": 4.905946578800167e-05, "loss": 0.8712, "step": 6370 }, { "epoch": 0.09036022845311996, "grad_norm": 2.953125, "learning_rate": 4.905885861690133e-05, "loss": 0.8405, "step": 6372 }, { "epoch": 0.09038859010674617, "grad_norm": 3.015625, "learning_rate": 4.905825125364127e-05, "loss": 0.8595, "step": 6374 }, { "epoch": 0.09041695176037239, "grad_norm": 3.1875, "learning_rate": 4.9057643698226355e-05, "loss": 0.8645, "step": 6376 }, { "epoch": 0.0904453134139986, "grad_norm": 2.84375, "learning_rate": 4.9057035950661435e-05, "loss": 0.8645, "step": 6378 }, { "epoch": 0.09047367506762481, "grad_norm": 3.15625, "learning_rate": 4.9056428010951374e-05, "loss": 0.9341, "step": 6380 }, { "epoch": 0.09050203672125104, "grad_norm": 3.234375, "learning_rate": 4.905581987910101e-05, "loss": 0.8564, "step": 6382 }, { "epoch": 0.09053039837487725, "grad_norm": 3.40625, "learning_rate": 4.9055211555115206e-05, "loss": 0.8833, "step": 6384 }, { "epoch": 0.09055876002850347, "grad_norm": 3.25, "learning_rate": 4.9054603038998824e-05, "loss": 0.8759, "step": 6386 }, { "epoch": 0.09058712168212968, "grad_norm": 3.015625, "learning_rate": 4.905399433075672e-05, "loss": 0.8734, "step": 6388 }, { "epoch": 0.09061548333575589, "grad_norm": 3.046875, "learning_rate": 4.905338543039377e-05, "loss": 0.9035, "step": 6390 }, { "epoch": 0.0906438449893821, "grad_norm": 3.015625, "learning_rate": 4.9052776337914816e-05, "loss": 0.9069, "step": 6392 }, { "epoch": 0.09067220664300832, "grad_norm": 3.4375, "learning_rate": 4.9052167053324736e-05, "loss": 0.8845, "step": 6394 }, { "epoch": 0.09070056829663453, "grad_norm": 3.765625, "learning_rate": 4.90515575766284e-05, "loss": 0.8457, "step": 6396 }, { "epoch": 0.09072892995026074, "grad_norm": 3.046875, "learning_rate": 4.9050947907830665e-05, "loss": 0.8829, "step": 6398 }, { "epoch": 0.09075729160388696, "grad_norm": 3.390625, "learning_rate": 4.9050338046936404e-05, "loss": 0.8961, "step": 6400 }, { "epoch": 0.09078565325751319, "grad_norm": 3.421875, "learning_rate": 4.9049727993950486e-05, "loss": 0.9306, "step": 6402 }, { "epoch": 0.0908140149111394, "grad_norm": 3.078125, "learning_rate": 4.904911774887779e-05, "loss": 0.8468, "step": 6404 }, { "epoch": 0.09084237656476561, "grad_norm": 2.984375, "learning_rate": 4.9048507311723184e-05, "loss": 0.9169, "step": 6406 }, { "epoch": 0.09087073821839182, "grad_norm": 3.53125, "learning_rate": 4.904789668249154e-05, "loss": 0.8817, "step": 6408 }, { "epoch": 0.09089909987201804, "grad_norm": 3.015625, "learning_rate": 4.9047285861187755e-05, "loss": 0.8795, "step": 6410 }, { "epoch": 0.09092746152564425, "grad_norm": 3.125, "learning_rate": 4.9046674847816685e-05, "loss": 0.889, "step": 6412 }, { "epoch": 0.09095582317927046, "grad_norm": 3.0625, "learning_rate": 4.9046063642383224e-05, "loss": 0.8819, "step": 6414 }, { "epoch": 0.09098418483289668, "grad_norm": 2.953125, "learning_rate": 4.904545224489225e-05, "loss": 0.8743, "step": 6416 }, { "epoch": 0.09101254648652289, "grad_norm": 2.8125, "learning_rate": 4.904484065534864e-05, "loss": 0.8897, "step": 6418 }, { "epoch": 0.09104090814014912, "grad_norm": 2.765625, "learning_rate": 4.904422887375728e-05, "loss": 0.8486, "step": 6420 }, { "epoch": 0.09106926979377533, "grad_norm": 2.6875, "learning_rate": 4.904361690012307e-05, "loss": 0.8739, "step": 6422 }, { "epoch": 0.09109763144740154, "grad_norm": 3.0, "learning_rate": 4.904300473445088e-05, "loss": 0.8787, "step": 6424 }, { "epoch": 0.09112599310102776, "grad_norm": 3.671875, "learning_rate": 4.904239237674561e-05, "loss": 0.9144, "step": 6426 }, { "epoch": 0.09115435475465397, "grad_norm": 2.96875, "learning_rate": 4.9041779827012156e-05, "loss": 0.9224, "step": 6428 }, { "epoch": 0.09118271640828018, "grad_norm": 3.0, "learning_rate": 4.904116708525539e-05, "loss": 0.8553, "step": 6430 }, { "epoch": 0.0912110780619064, "grad_norm": 2.515625, "learning_rate": 4.9040554151480235e-05, "loss": 0.8409, "step": 6432 }, { "epoch": 0.09123943971553261, "grad_norm": 2.921875, "learning_rate": 4.9039941025691556e-05, "loss": 0.938, "step": 6434 }, { "epoch": 0.09126780136915882, "grad_norm": 3.09375, "learning_rate": 4.9039327707894264e-05, "loss": 0.8864, "step": 6436 }, { "epoch": 0.09129616302278504, "grad_norm": 3.1875, "learning_rate": 4.903871419809327e-05, "loss": 0.8494, "step": 6438 }, { "epoch": 0.09132452467641126, "grad_norm": 3.0625, "learning_rate": 4.903810049629346e-05, "loss": 0.9219, "step": 6440 }, { "epoch": 0.09135288633003748, "grad_norm": 3.015625, "learning_rate": 4.903748660249973e-05, "loss": 0.9118, "step": 6442 }, { "epoch": 0.09138124798366369, "grad_norm": 3.171875, "learning_rate": 4.9036872516716994e-05, "loss": 0.9096, "step": 6444 }, { "epoch": 0.0914096096372899, "grad_norm": 2.921875, "learning_rate": 4.903625823895015e-05, "loss": 0.8269, "step": 6446 }, { "epoch": 0.09143797129091612, "grad_norm": 3.125, "learning_rate": 4.903564376920412e-05, "loss": 0.8904, "step": 6448 }, { "epoch": 0.09146633294454233, "grad_norm": 2.875, "learning_rate": 4.9035029107483785e-05, "loss": 0.8602, "step": 6450 }, { "epoch": 0.09149469459816854, "grad_norm": 3.140625, "learning_rate": 4.903441425379408e-05, "loss": 0.9092, "step": 6452 }, { "epoch": 0.09152305625179476, "grad_norm": 3.0, "learning_rate": 4.903379920813991e-05, "loss": 0.937, "step": 6454 }, { "epoch": 0.09155141790542097, "grad_norm": 2.796875, "learning_rate": 4.9033183970526174e-05, "loss": 0.8677, "step": 6456 }, { "epoch": 0.09157977955904718, "grad_norm": 2.828125, "learning_rate": 4.903256854095779e-05, "loss": 0.8993, "step": 6458 }, { "epoch": 0.09160814121267341, "grad_norm": 3.171875, "learning_rate": 4.903195291943969e-05, "loss": 0.8779, "step": 6460 }, { "epoch": 0.09163650286629962, "grad_norm": 2.78125, "learning_rate": 4.903133710597677e-05, "loss": 0.896, "step": 6462 }, { "epoch": 0.09166486451992584, "grad_norm": 3.765625, "learning_rate": 4.9030721100573963e-05, "loss": 0.9207, "step": 6464 }, { "epoch": 0.09169322617355205, "grad_norm": 3.59375, "learning_rate": 4.903010490323618e-05, "loss": 0.9265, "step": 6466 }, { "epoch": 0.09172158782717826, "grad_norm": 3.171875, "learning_rate": 4.9029488513968346e-05, "loss": 0.8462, "step": 6468 }, { "epoch": 0.09174994948080448, "grad_norm": 3.15625, "learning_rate": 4.902887193277539e-05, "loss": 0.8527, "step": 6470 }, { "epoch": 0.09177831113443069, "grad_norm": 3.21875, "learning_rate": 4.902825515966223e-05, "loss": 0.9096, "step": 6472 }, { "epoch": 0.0918066727880569, "grad_norm": 2.828125, "learning_rate": 4.902763819463379e-05, "loss": 0.8442, "step": 6474 }, { "epoch": 0.09183503444168312, "grad_norm": 3.125, "learning_rate": 4.9027021037695e-05, "loss": 0.899, "step": 6476 }, { "epoch": 0.09186339609530934, "grad_norm": 3.125, "learning_rate": 4.9026403688850795e-05, "loss": 0.8729, "step": 6478 }, { "epoch": 0.09189175774893556, "grad_norm": 3.34375, "learning_rate": 4.902578614810609e-05, "loss": 0.9065, "step": 6480 }, { "epoch": 0.09192011940256177, "grad_norm": 3.234375, "learning_rate": 4.902516841546584e-05, "loss": 0.8838, "step": 6482 }, { "epoch": 0.09194848105618798, "grad_norm": 3.265625, "learning_rate": 4.9024550490934965e-05, "loss": 0.8958, "step": 6484 }, { "epoch": 0.0919768427098142, "grad_norm": 2.984375, "learning_rate": 4.90239323745184e-05, "loss": 0.8858, "step": 6486 }, { "epoch": 0.09200520436344041, "grad_norm": 2.921875, "learning_rate": 4.902331406622109e-05, "loss": 0.828, "step": 6488 }, { "epoch": 0.09203356601706662, "grad_norm": 2.859375, "learning_rate": 4.902269556604796e-05, "loss": 0.8768, "step": 6490 }, { "epoch": 0.09206192767069284, "grad_norm": 3.25, "learning_rate": 4.902207687400396e-05, "loss": 0.8645, "step": 6492 }, { "epoch": 0.09209028932431905, "grad_norm": 3.171875, "learning_rate": 4.902145799009404e-05, "loss": 0.8542, "step": 6494 }, { "epoch": 0.09211865097794526, "grad_norm": 3.0, "learning_rate": 4.902083891432312e-05, "loss": 0.9246, "step": 6496 }, { "epoch": 0.09214701263157149, "grad_norm": 3.484375, "learning_rate": 4.902021964669617e-05, "loss": 0.8607, "step": 6498 }, { "epoch": 0.0921753742851977, "grad_norm": 3.015625, "learning_rate": 4.901960018721812e-05, "loss": 0.8936, "step": 6500 }, { "epoch": 0.09220373593882392, "grad_norm": 3.46875, "learning_rate": 4.901898053589391e-05, "loss": 0.8963, "step": 6502 }, { "epoch": 0.09223209759245013, "grad_norm": 3.03125, "learning_rate": 4.901836069272851e-05, "loss": 0.8849, "step": 6504 }, { "epoch": 0.09226045924607634, "grad_norm": 3.5, "learning_rate": 4.901774065772686e-05, "loss": 0.9654, "step": 6506 }, { "epoch": 0.09228882089970256, "grad_norm": 2.890625, "learning_rate": 4.901712043089392e-05, "loss": 0.924, "step": 6508 }, { "epoch": 0.09231718255332877, "grad_norm": 3.296875, "learning_rate": 4.9016500012234626e-05, "loss": 0.8835, "step": 6510 }, { "epoch": 0.09234554420695498, "grad_norm": 3.265625, "learning_rate": 4.901587940175394e-05, "loss": 0.8731, "step": 6512 }, { "epoch": 0.0923739058605812, "grad_norm": 3.109375, "learning_rate": 4.9015258599456835e-05, "loss": 0.8667, "step": 6514 }, { "epoch": 0.09240226751420741, "grad_norm": 2.90625, "learning_rate": 4.9014637605348255e-05, "loss": 0.8724, "step": 6516 }, { "epoch": 0.09243062916783364, "grad_norm": 3.109375, "learning_rate": 4.9014016419433164e-05, "loss": 0.894, "step": 6518 }, { "epoch": 0.09245899082145985, "grad_norm": 2.9375, "learning_rate": 4.9013395041716515e-05, "loss": 0.8424, "step": 6520 }, { "epoch": 0.09248735247508606, "grad_norm": 2.828125, "learning_rate": 4.901277347220329e-05, "loss": 0.8635, "step": 6522 }, { "epoch": 0.09251571412871228, "grad_norm": 2.984375, "learning_rate": 4.901215171089843e-05, "loss": 0.8755, "step": 6524 }, { "epoch": 0.09254407578233849, "grad_norm": 3.28125, "learning_rate": 4.901152975780692e-05, "loss": 0.8763, "step": 6526 }, { "epoch": 0.0925724374359647, "grad_norm": 3.59375, "learning_rate": 4.9010907612933716e-05, "loss": 0.9336, "step": 6528 }, { "epoch": 0.09260079908959092, "grad_norm": 3.25, "learning_rate": 4.90102852762838e-05, "loss": 0.8731, "step": 6530 }, { "epoch": 0.09262916074321713, "grad_norm": 3.140625, "learning_rate": 4.9009662747862115e-05, "loss": 0.863, "step": 6532 }, { "epoch": 0.09265752239684334, "grad_norm": 3.40625, "learning_rate": 4.900904002767367e-05, "loss": 0.8919, "step": 6534 }, { "epoch": 0.09268588405046957, "grad_norm": 3.796875, "learning_rate": 4.900841711572341e-05, "loss": 0.8807, "step": 6536 }, { "epoch": 0.09271424570409578, "grad_norm": 2.953125, "learning_rate": 4.900779401201633e-05, "loss": 0.8986, "step": 6538 }, { "epoch": 0.092742607357722, "grad_norm": 2.6875, "learning_rate": 4.900717071655739e-05, "loss": 0.8573, "step": 6540 }, { "epoch": 0.09277096901134821, "grad_norm": 3.03125, "learning_rate": 4.9006547229351587e-05, "loss": 0.8298, "step": 6542 }, { "epoch": 0.09279933066497442, "grad_norm": 2.890625, "learning_rate": 4.900592355040388e-05, "loss": 0.865, "step": 6544 }, { "epoch": 0.09282769231860064, "grad_norm": 2.953125, "learning_rate": 4.900529967971926e-05, "loss": 0.9298, "step": 6546 }, { "epoch": 0.09285605397222685, "grad_norm": 3.09375, "learning_rate": 4.900467561730272e-05, "loss": 0.8454, "step": 6548 }, { "epoch": 0.09288441562585306, "grad_norm": 3.109375, "learning_rate": 4.900405136315923e-05, "loss": 0.9067, "step": 6550 }, { "epoch": 0.09291277727947928, "grad_norm": 2.796875, "learning_rate": 4.900342691729378e-05, "loss": 0.8655, "step": 6552 }, { "epoch": 0.09294113893310549, "grad_norm": 3.234375, "learning_rate": 4.900280227971136e-05, "loss": 0.8891, "step": 6554 }, { "epoch": 0.09296950058673172, "grad_norm": 3.046875, "learning_rate": 4.9002177450416946e-05, "loss": 0.8318, "step": 6556 }, { "epoch": 0.09299786224035793, "grad_norm": 3.09375, "learning_rate": 4.9001552429415554e-05, "loss": 0.867, "step": 6558 }, { "epoch": 0.09302622389398414, "grad_norm": 3.71875, "learning_rate": 4.9000927216712154e-05, "loss": 0.945, "step": 6560 }, { "epoch": 0.09305458554761036, "grad_norm": 3.09375, "learning_rate": 4.900030181231175e-05, "loss": 0.8675, "step": 6562 }, { "epoch": 0.09308294720123657, "grad_norm": 3.234375, "learning_rate": 4.8999676216219335e-05, "loss": 0.8982, "step": 6564 }, { "epoch": 0.09311130885486278, "grad_norm": 3.109375, "learning_rate": 4.89990504284399e-05, "loss": 0.8988, "step": 6566 }, { "epoch": 0.093139670508489, "grad_norm": 3.078125, "learning_rate": 4.8998424448978454e-05, "loss": 0.9393, "step": 6568 }, { "epoch": 0.09316803216211521, "grad_norm": 3.390625, "learning_rate": 4.899779827783999e-05, "loss": 0.9388, "step": 6570 }, { "epoch": 0.09319639381574142, "grad_norm": 3.046875, "learning_rate": 4.899717191502951e-05, "loss": 0.8086, "step": 6572 }, { "epoch": 0.09322475546936765, "grad_norm": 3.171875, "learning_rate": 4.899654536055202e-05, "loss": 0.89, "step": 6574 }, { "epoch": 0.09325311712299386, "grad_norm": 3.109375, "learning_rate": 4.899591861441252e-05, "loss": 0.8898, "step": 6576 }, { "epoch": 0.09328147877662007, "grad_norm": 2.734375, "learning_rate": 4.899529167661602e-05, "loss": 0.8941, "step": 6578 }, { "epoch": 0.09330984043024629, "grad_norm": 2.9375, "learning_rate": 4.8994664547167524e-05, "loss": 0.8683, "step": 6580 }, { "epoch": 0.0933382020838725, "grad_norm": 2.875, "learning_rate": 4.8994037226072037e-05, "loss": 0.7914, "step": 6582 }, { "epoch": 0.09336656373749871, "grad_norm": 2.84375, "learning_rate": 4.899340971333458e-05, "loss": 0.8767, "step": 6584 }, { "epoch": 0.09339492539112493, "grad_norm": 3.0625, "learning_rate": 4.899278200896015e-05, "loss": 0.8491, "step": 6586 }, { "epoch": 0.09342328704475114, "grad_norm": 3.03125, "learning_rate": 4.8992154112953784e-05, "loss": 0.9286, "step": 6588 }, { "epoch": 0.09345164869837735, "grad_norm": 3.125, "learning_rate": 4.899152602532047e-05, "loss": 0.8441, "step": 6590 }, { "epoch": 0.09348001035200357, "grad_norm": 3.03125, "learning_rate": 4.899089774606525e-05, "loss": 0.8873, "step": 6592 }, { "epoch": 0.0935083720056298, "grad_norm": 3.4375, "learning_rate": 4.899026927519311e-05, "loss": 0.8617, "step": 6594 }, { "epoch": 0.09353673365925601, "grad_norm": 2.859375, "learning_rate": 4.89896406127091e-05, "loss": 0.8339, "step": 6596 }, { "epoch": 0.09356509531288222, "grad_norm": 3.09375, "learning_rate": 4.898901175861824e-05, "loss": 0.8763, "step": 6598 }, { "epoch": 0.09359345696650843, "grad_norm": 2.9375, "learning_rate": 4.898838271292553e-05, "loss": 0.8788, "step": 6600 }, { "epoch": 0.09362181862013465, "grad_norm": 3.203125, "learning_rate": 4.8987753475636014e-05, "loss": 0.9189, "step": 6602 }, { "epoch": 0.09365018027376086, "grad_norm": 3.390625, "learning_rate": 4.8987124046754704e-05, "loss": 0.8549, "step": 6604 }, { "epoch": 0.09367854192738707, "grad_norm": 3.0, "learning_rate": 4.8986494426286644e-05, "loss": 0.8558, "step": 6606 }, { "epoch": 0.09370690358101329, "grad_norm": 2.84375, "learning_rate": 4.898586461423684e-05, "loss": 0.838, "step": 6608 }, { "epoch": 0.0937352652346395, "grad_norm": 3.0, "learning_rate": 4.898523461061035e-05, "loss": 0.8473, "step": 6610 }, { "epoch": 0.09376362688826571, "grad_norm": 2.96875, "learning_rate": 4.898460441541218e-05, "loss": 0.8665, "step": 6612 }, { "epoch": 0.09379198854189194, "grad_norm": 3.296875, "learning_rate": 4.8983974028647386e-05, "loss": 0.9024, "step": 6614 }, { "epoch": 0.09382035019551815, "grad_norm": 3.15625, "learning_rate": 4.898334345032099e-05, "loss": 0.9029, "step": 6616 }, { "epoch": 0.09384871184914437, "grad_norm": 3.265625, "learning_rate": 4.898271268043801e-05, "loss": 0.9052, "step": 6618 }, { "epoch": 0.09387707350277058, "grad_norm": 3.109375, "learning_rate": 4.8982081719003525e-05, "loss": 0.8748, "step": 6620 }, { "epoch": 0.0939054351563968, "grad_norm": 3.140625, "learning_rate": 4.8981450566022545e-05, "loss": 0.8654, "step": 6622 }, { "epoch": 0.09393379681002301, "grad_norm": 3.09375, "learning_rate": 4.898081922150013e-05, "loss": 0.8366, "step": 6624 }, { "epoch": 0.09396215846364922, "grad_norm": 3.359375, "learning_rate": 4.89801876854413e-05, "loss": 0.913, "step": 6626 }, { "epoch": 0.09399052011727543, "grad_norm": 3.0, "learning_rate": 4.897955595785112e-05, "loss": 0.8899, "step": 6628 }, { "epoch": 0.09401888177090165, "grad_norm": 3.25, "learning_rate": 4.897892403873462e-05, "loss": 0.9034, "step": 6630 }, { "epoch": 0.09404724342452787, "grad_norm": 2.546875, "learning_rate": 4.897829192809686e-05, "loss": 0.8241, "step": 6632 }, { "epoch": 0.09407560507815409, "grad_norm": 3.234375, "learning_rate": 4.8977659625942886e-05, "loss": 0.8724, "step": 6634 }, { "epoch": 0.0941039667317803, "grad_norm": 3.21875, "learning_rate": 4.8977027132277736e-05, "loss": 0.8877, "step": 6636 }, { "epoch": 0.09413232838540651, "grad_norm": 2.90625, "learning_rate": 4.8976394447106474e-05, "loss": 0.8993, "step": 6638 }, { "epoch": 0.09416069003903273, "grad_norm": 2.984375, "learning_rate": 4.8975761570434155e-05, "loss": 0.8916, "step": 6640 }, { "epoch": 0.09418905169265894, "grad_norm": 2.953125, "learning_rate": 4.897512850226582e-05, "loss": 0.8337, "step": 6642 }, { "epoch": 0.09421741334628515, "grad_norm": 3.4375, "learning_rate": 4.897449524260654e-05, "loss": 0.879, "step": 6644 }, { "epoch": 0.09424577499991137, "grad_norm": 3.484375, "learning_rate": 4.8973861791461374e-05, "loss": 0.8903, "step": 6646 }, { "epoch": 0.09427413665353758, "grad_norm": 3.09375, "learning_rate": 4.897322814883537e-05, "loss": 0.8776, "step": 6648 }, { "epoch": 0.09430249830716379, "grad_norm": 3.5625, "learning_rate": 4.897259431473359e-05, "loss": 0.9234, "step": 6650 }, { "epoch": 0.09433085996079002, "grad_norm": 3.0625, "learning_rate": 4.89719602891611e-05, "loss": 0.9039, "step": 6652 }, { "epoch": 0.09435922161441623, "grad_norm": 2.671875, "learning_rate": 4.897132607212297e-05, "loss": 0.8646, "step": 6654 }, { "epoch": 0.09438758326804245, "grad_norm": 2.984375, "learning_rate": 4.897069166362425e-05, "loss": 0.8429, "step": 6656 }, { "epoch": 0.09441594492166866, "grad_norm": 3.171875, "learning_rate": 4.8970057063670026e-05, "loss": 0.877, "step": 6658 }, { "epoch": 0.09444430657529487, "grad_norm": 3.234375, "learning_rate": 4.896942227226535e-05, "loss": 0.8628, "step": 6660 }, { "epoch": 0.09447266822892109, "grad_norm": 3.140625, "learning_rate": 4.896878728941531e-05, "loss": 0.8393, "step": 6662 }, { "epoch": 0.0945010298825473, "grad_norm": 3.140625, "learning_rate": 4.896815211512496e-05, "loss": 0.8779, "step": 6664 }, { "epoch": 0.09452939153617351, "grad_norm": 2.9375, "learning_rate": 4.8967516749399375e-05, "loss": 0.8602, "step": 6666 }, { "epoch": 0.09455775318979973, "grad_norm": 3.015625, "learning_rate": 4.896688119224365e-05, "loss": 0.8643, "step": 6668 }, { "epoch": 0.09458611484342594, "grad_norm": 3.40625, "learning_rate": 4.896624544366284e-05, "loss": 0.9394, "step": 6670 }, { "epoch": 0.09461447649705217, "grad_norm": 2.890625, "learning_rate": 4.896560950366202e-05, "loss": 0.8657, "step": 6672 }, { "epoch": 0.09464283815067838, "grad_norm": 3.125, "learning_rate": 4.896497337224629e-05, "loss": 0.886, "step": 6674 }, { "epoch": 0.09467119980430459, "grad_norm": 2.8125, "learning_rate": 4.8964337049420716e-05, "loss": 0.8796, "step": 6676 }, { "epoch": 0.0946995614579308, "grad_norm": 3.46875, "learning_rate": 4.896370053519038e-05, "loss": 0.8908, "step": 6678 }, { "epoch": 0.09472792311155702, "grad_norm": 3.21875, "learning_rate": 4.8963063829560376e-05, "loss": 0.8701, "step": 6680 }, { "epoch": 0.09475628476518323, "grad_norm": 3.125, "learning_rate": 4.8962426932535775e-05, "loss": 0.8794, "step": 6682 }, { "epoch": 0.09478464641880945, "grad_norm": 3.34375, "learning_rate": 4.8961789844121674e-05, "loss": 0.8332, "step": 6684 }, { "epoch": 0.09481300807243566, "grad_norm": 3.015625, "learning_rate": 4.896115256432316e-05, "loss": 0.9035, "step": 6686 }, { "epoch": 0.09484136972606187, "grad_norm": 3.1875, "learning_rate": 4.896051509314532e-05, "loss": 0.9023, "step": 6688 }, { "epoch": 0.0948697313796881, "grad_norm": 2.78125, "learning_rate": 4.895987743059325e-05, "loss": 0.8909, "step": 6690 }, { "epoch": 0.09489809303331431, "grad_norm": 3.5625, "learning_rate": 4.895923957667204e-05, "loss": 0.8785, "step": 6692 }, { "epoch": 0.09492645468694053, "grad_norm": 2.859375, "learning_rate": 4.8958601531386796e-05, "loss": 0.8752, "step": 6694 }, { "epoch": 0.09495481634056674, "grad_norm": 2.984375, "learning_rate": 4.895796329474259e-05, "loss": 0.882, "step": 6696 }, { "epoch": 0.09498317799419295, "grad_norm": 3.046875, "learning_rate": 4.8957324866744534e-05, "loss": 0.8596, "step": 6698 }, { "epoch": 0.09501153964781917, "grad_norm": 3.40625, "learning_rate": 4.895668624739773e-05, "loss": 0.8922, "step": 6700 }, { "epoch": 0.09503990130144538, "grad_norm": 3.203125, "learning_rate": 4.8956047436707276e-05, "loss": 0.8653, "step": 6702 }, { "epoch": 0.09506826295507159, "grad_norm": 2.984375, "learning_rate": 4.895540843467827e-05, "loss": 0.9129, "step": 6704 }, { "epoch": 0.0950966246086978, "grad_norm": 3.265625, "learning_rate": 4.895476924131582e-05, "loss": 0.9321, "step": 6706 }, { "epoch": 0.09512498626232402, "grad_norm": 3.09375, "learning_rate": 4.895412985662504e-05, "loss": 0.8858, "step": 6708 }, { "epoch": 0.09515334791595025, "grad_norm": 3.28125, "learning_rate": 4.8953490280611015e-05, "loss": 0.8923, "step": 6710 }, { "epoch": 0.09518170956957646, "grad_norm": 3.546875, "learning_rate": 4.895285051327887e-05, "loss": 0.89, "step": 6712 }, { "epoch": 0.09521007122320267, "grad_norm": 3.140625, "learning_rate": 4.895221055463371e-05, "loss": 0.8784, "step": 6714 }, { "epoch": 0.09523843287682889, "grad_norm": 3.125, "learning_rate": 4.895157040468065e-05, "loss": 0.8888, "step": 6716 }, { "epoch": 0.0952667945304551, "grad_norm": 3.0, "learning_rate": 4.895093006342479e-05, "loss": 0.8994, "step": 6718 }, { "epoch": 0.09529515618408131, "grad_norm": 2.9375, "learning_rate": 4.895028953087126e-05, "loss": 0.8219, "step": 6720 }, { "epoch": 0.09532351783770752, "grad_norm": 2.96875, "learning_rate": 4.894964880702517e-05, "loss": 0.8751, "step": 6722 }, { "epoch": 0.09535187949133374, "grad_norm": 2.8125, "learning_rate": 4.894900789189164e-05, "loss": 0.8539, "step": 6724 }, { "epoch": 0.09538024114495995, "grad_norm": 3.234375, "learning_rate": 4.894836678547578e-05, "loss": 0.8832, "step": 6726 }, { "epoch": 0.09540860279858618, "grad_norm": 3.03125, "learning_rate": 4.894772548778272e-05, "loss": 0.8679, "step": 6728 }, { "epoch": 0.09543696445221239, "grad_norm": 2.953125, "learning_rate": 4.8947083998817576e-05, "loss": 0.8917, "step": 6730 }, { "epoch": 0.0954653261058386, "grad_norm": 3.25, "learning_rate": 4.894644231858548e-05, "loss": 0.8467, "step": 6732 }, { "epoch": 0.09549368775946482, "grad_norm": 3.0625, "learning_rate": 4.894580044709155e-05, "loss": 0.8977, "step": 6734 }, { "epoch": 0.09552204941309103, "grad_norm": 3.4375, "learning_rate": 4.8945158384340914e-05, "loss": 0.9222, "step": 6736 }, { "epoch": 0.09555041106671724, "grad_norm": 3.40625, "learning_rate": 4.89445161303387e-05, "loss": 0.8785, "step": 6738 }, { "epoch": 0.09557877272034346, "grad_norm": 3.109375, "learning_rate": 4.894387368509005e-05, "loss": 0.8621, "step": 6740 }, { "epoch": 0.09560713437396967, "grad_norm": 3.15625, "learning_rate": 4.894323104860007e-05, "loss": 0.8665, "step": 6742 }, { "epoch": 0.09563549602759588, "grad_norm": 3.078125, "learning_rate": 4.894258822087391e-05, "loss": 0.8774, "step": 6744 }, { "epoch": 0.0956638576812221, "grad_norm": 3.359375, "learning_rate": 4.8941945201916706e-05, "loss": 0.848, "step": 6746 }, { "epoch": 0.09569221933484832, "grad_norm": 3.171875, "learning_rate": 4.894130199173358e-05, "loss": 0.9119, "step": 6748 }, { "epoch": 0.09572058098847454, "grad_norm": 2.6875, "learning_rate": 4.894065859032969e-05, "loss": 0.8922, "step": 6750 }, { "epoch": 0.09574894264210075, "grad_norm": 3.796875, "learning_rate": 4.894001499771015e-05, "loss": 0.8957, "step": 6752 }, { "epoch": 0.09577730429572696, "grad_norm": 2.890625, "learning_rate": 4.893937121388012e-05, "loss": 0.8455, "step": 6754 }, { "epoch": 0.09580566594935318, "grad_norm": 3.0625, "learning_rate": 4.893872723884473e-05, "loss": 0.8426, "step": 6756 }, { "epoch": 0.09583402760297939, "grad_norm": 3.53125, "learning_rate": 4.893808307260914e-05, "loss": 0.8797, "step": 6758 }, { "epoch": 0.0958623892566056, "grad_norm": 3.0625, "learning_rate": 4.893743871517847e-05, "loss": 0.8905, "step": 6760 }, { "epoch": 0.09589075091023182, "grad_norm": 3.0, "learning_rate": 4.8936794166557895e-05, "loss": 0.8852, "step": 6762 }, { "epoch": 0.09591911256385803, "grad_norm": 3.265625, "learning_rate": 4.893614942675254e-05, "loss": 0.8762, "step": 6764 }, { "epoch": 0.09594747421748424, "grad_norm": 2.984375, "learning_rate": 4.893550449576756e-05, "loss": 0.8425, "step": 6766 }, { "epoch": 0.09597583587111047, "grad_norm": 3.078125, "learning_rate": 4.893485937360812e-05, "loss": 0.8618, "step": 6768 }, { "epoch": 0.09600419752473668, "grad_norm": 3.28125, "learning_rate": 4.893421406027935e-05, "loss": 0.9292, "step": 6770 }, { "epoch": 0.0960325591783629, "grad_norm": 3.359375, "learning_rate": 4.8933568555786416e-05, "loss": 0.8818, "step": 6772 }, { "epoch": 0.09606092083198911, "grad_norm": 2.921875, "learning_rate": 4.893292286013448e-05, "loss": 0.8698, "step": 6774 }, { "epoch": 0.09608928248561532, "grad_norm": 3.21875, "learning_rate": 4.89322769733287e-05, "loss": 0.9213, "step": 6776 }, { "epoch": 0.09611764413924154, "grad_norm": 2.921875, "learning_rate": 4.8931630895374215e-05, "loss": 0.8597, "step": 6778 }, { "epoch": 0.09614600579286775, "grad_norm": 3.125, "learning_rate": 4.89309846262762e-05, "loss": 0.8499, "step": 6780 }, { "epoch": 0.09617436744649396, "grad_norm": 3.609375, "learning_rate": 4.8930338166039815e-05, "loss": 0.8768, "step": 6782 }, { "epoch": 0.09620272910012018, "grad_norm": 2.921875, "learning_rate": 4.892969151467022e-05, "loss": 0.8947, "step": 6784 }, { "epoch": 0.0962310907537464, "grad_norm": 2.65625, "learning_rate": 4.892904467217258e-05, "loss": 0.8554, "step": 6786 }, { "epoch": 0.09625945240737262, "grad_norm": 3.375, "learning_rate": 4.892839763855208e-05, "loss": 0.8592, "step": 6788 }, { "epoch": 0.09628781406099883, "grad_norm": 3.078125, "learning_rate": 4.892775041381386e-05, "loss": 0.8741, "step": 6790 }, { "epoch": 0.09631617571462504, "grad_norm": 2.984375, "learning_rate": 4.8927102997963105e-05, "loss": 0.8321, "step": 6792 }, { "epoch": 0.09634453736825126, "grad_norm": 3.046875, "learning_rate": 4.8926455391004974e-05, "loss": 0.8295, "step": 6794 }, { "epoch": 0.09637289902187747, "grad_norm": 3.359375, "learning_rate": 4.892580759294466e-05, "loss": 0.9103, "step": 6796 }, { "epoch": 0.09640126067550368, "grad_norm": 3.296875, "learning_rate": 4.8925159603787316e-05, "loss": 0.8493, "step": 6798 }, { "epoch": 0.0964296223291299, "grad_norm": 3.3125, "learning_rate": 4.8924511423538136e-05, "loss": 0.8576, "step": 6800 }, { "epoch": 0.09645798398275611, "grad_norm": 2.71875, "learning_rate": 4.892386305220228e-05, "loss": 0.9026, "step": 6802 }, { "epoch": 0.09648634563638232, "grad_norm": 3.109375, "learning_rate": 4.892321448978494e-05, "loss": 0.8667, "step": 6804 }, { "epoch": 0.09651470729000855, "grad_norm": 3.078125, "learning_rate": 4.892256573629128e-05, "loss": 0.8576, "step": 6806 }, { "epoch": 0.09654306894363476, "grad_norm": 3.234375, "learning_rate": 4.89219167917265e-05, "loss": 0.8883, "step": 6808 }, { "epoch": 0.09657143059726098, "grad_norm": 2.921875, "learning_rate": 4.8921267656095775e-05, "loss": 0.8876, "step": 6810 }, { "epoch": 0.09659979225088719, "grad_norm": 3.3125, "learning_rate": 4.8920618329404286e-05, "loss": 0.9091, "step": 6812 }, { "epoch": 0.0966281539045134, "grad_norm": 3.1875, "learning_rate": 4.8919968811657224e-05, "loss": 0.893, "step": 6814 }, { "epoch": 0.09665651555813962, "grad_norm": 2.921875, "learning_rate": 4.891931910285978e-05, "loss": 0.8439, "step": 6816 }, { "epoch": 0.09668487721176583, "grad_norm": 2.984375, "learning_rate": 4.891866920301713e-05, "loss": 0.929, "step": 6818 }, { "epoch": 0.09671323886539204, "grad_norm": 2.625, "learning_rate": 4.8918019112134484e-05, "loss": 0.8322, "step": 6820 }, { "epoch": 0.09674160051901826, "grad_norm": 3.359375, "learning_rate": 4.891736883021701e-05, "loss": 0.8926, "step": 6822 }, { "epoch": 0.09676996217264447, "grad_norm": 2.9375, "learning_rate": 4.891671835726993e-05, "loss": 0.8846, "step": 6824 }, { "epoch": 0.0967983238262707, "grad_norm": 2.953125, "learning_rate": 4.8916067693298426e-05, "loss": 0.8818, "step": 6826 }, { "epoch": 0.09682668547989691, "grad_norm": 3.5, "learning_rate": 4.891541683830768e-05, "loss": 0.8972, "step": 6828 }, { "epoch": 0.09685504713352312, "grad_norm": 2.8125, "learning_rate": 4.891476579230292e-05, "loss": 0.8713, "step": 6830 }, { "epoch": 0.09688340878714934, "grad_norm": 3.484375, "learning_rate": 4.891411455528932e-05, "loss": 0.8244, "step": 6832 }, { "epoch": 0.09691177044077555, "grad_norm": 3.0, "learning_rate": 4.89134631272721e-05, "loss": 0.8848, "step": 6834 }, { "epoch": 0.09694013209440176, "grad_norm": 3.46875, "learning_rate": 4.891281150825644e-05, "loss": 0.871, "step": 6836 }, { "epoch": 0.09696849374802798, "grad_norm": 3.21875, "learning_rate": 4.8912159698247575e-05, "loss": 0.9193, "step": 6838 }, { "epoch": 0.09699685540165419, "grad_norm": 3.125, "learning_rate": 4.891150769725069e-05, "loss": 0.8347, "step": 6840 }, { "epoch": 0.0970252170552804, "grad_norm": 3.421875, "learning_rate": 4.8910855505271e-05, "loss": 0.8634, "step": 6842 }, { "epoch": 0.09705357870890663, "grad_norm": 2.84375, "learning_rate": 4.8910203122313705e-05, "loss": 0.8941, "step": 6844 }, { "epoch": 0.09708194036253284, "grad_norm": 3.109375, "learning_rate": 4.890955054838403e-05, "loss": 0.9007, "step": 6846 }, { "epoch": 0.09711030201615906, "grad_norm": 2.640625, "learning_rate": 4.890889778348718e-05, "loss": 0.8651, "step": 6848 }, { "epoch": 0.09713866366978527, "grad_norm": 3.078125, "learning_rate": 4.8908244827628365e-05, "loss": 0.7985, "step": 6850 }, { "epoch": 0.09716702532341148, "grad_norm": 2.875, "learning_rate": 4.89075916808128e-05, "loss": 0.8701, "step": 6852 }, { "epoch": 0.0971953869770377, "grad_norm": 3.046875, "learning_rate": 4.8906938343045716e-05, "loss": 0.859, "step": 6854 }, { "epoch": 0.09722374863066391, "grad_norm": 3.0625, "learning_rate": 4.890628481433232e-05, "loss": 0.8579, "step": 6856 }, { "epoch": 0.09725211028429012, "grad_norm": 3.421875, "learning_rate": 4.8905631094677826e-05, "loss": 0.8686, "step": 6858 }, { "epoch": 0.09728047193791634, "grad_norm": 3.21875, "learning_rate": 4.890497718408746e-05, "loss": 0.8887, "step": 6860 }, { "epoch": 0.09730883359154255, "grad_norm": 3.125, "learning_rate": 4.8904323082566456e-05, "loss": 0.8644, "step": 6862 }, { "epoch": 0.09733719524516878, "grad_norm": 3.296875, "learning_rate": 4.890366879012002e-05, "loss": 0.888, "step": 6864 }, { "epoch": 0.09736555689879499, "grad_norm": 3.453125, "learning_rate": 4.890301430675339e-05, "loss": 0.9147, "step": 6866 }, { "epoch": 0.0973939185524212, "grad_norm": 2.953125, "learning_rate": 4.890235963247179e-05, "loss": 0.8721, "step": 6868 }, { "epoch": 0.09742228020604742, "grad_norm": 2.859375, "learning_rate": 4.8901704767280456e-05, "loss": 0.8306, "step": 6870 }, { "epoch": 0.09745064185967363, "grad_norm": 3.4375, "learning_rate": 4.890104971118461e-05, "loss": 0.8914, "step": 6872 }, { "epoch": 0.09747900351329984, "grad_norm": 3.21875, "learning_rate": 4.890039446418948e-05, "loss": 0.9188, "step": 6874 }, { "epoch": 0.09750736516692605, "grad_norm": 3.09375, "learning_rate": 4.889973902630031e-05, "loss": 0.8805, "step": 6876 }, { "epoch": 0.09753572682055227, "grad_norm": 2.859375, "learning_rate": 4.889908339752233e-05, "loss": 0.8722, "step": 6878 }, { "epoch": 0.09756408847417848, "grad_norm": 2.921875, "learning_rate": 4.889842757786077e-05, "loss": 0.8739, "step": 6880 }, { "epoch": 0.09759245012780471, "grad_norm": 3.4375, "learning_rate": 4.889777156732088e-05, "loss": 0.8725, "step": 6882 }, { "epoch": 0.09762081178143092, "grad_norm": 3.1875, "learning_rate": 4.88971153659079e-05, "loss": 0.8994, "step": 6884 }, { "epoch": 0.09764917343505713, "grad_norm": 2.765625, "learning_rate": 4.8896458973627056e-05, "loss": 0.8805, "step": 6886 }, { "epoch": 0.09767753508868335, "grad_norm": 3.3125, "learning_rate": 4.8895802390483604e-05, "loss": 0.9145, "step": 6888 }, { "epoch": 0.09770589674230956, "grad_norm": 2.84375, "learning_rate": 4.8895145616482785e-05, "loss": 0.857, "step": 6890 }, { "epoch": 0.09773425839593577, "grad_norm": 3.046875, "learning_rate": 4.8894488651629844e-05, "loss": 0.8997, "step": 6892 }, { "epoch": 0.09776262004956199, "grad_norm": 3.328125, "learning_rate": 4.889383149593003e-05, "loss": 0.9074, "step": 6894 }, { "epoch": 0.0977909817031882, "grad_norm": 2.828125, "learning_rate": 4.889317414938859e-05, "loss": 0.8476, "step": 6896 }, { "epoch": 0.09781934335681441, "grad_norm": 2.890625, "learning_rate": 4.889251661201077e-05, "loss": 0.8836, "step": 6898 }, { "epoch": 0.09784770501044063, "grad_norm": 3.125, "learning_rate": 4.8891858883801825e-05, "loss": 0.8658, "step": 6900 }, { "epoch": 0.09787606666406685, "grad_norm": 3.125, "learning_rate": 4.8891200964767014e-05, "loss": 0.838, "step": 6902 }, { "epoch": 0.09790442831769307, "grad_norm": 3.34375, "learning_rate": 4.889054285491159e-05, "loss": 0.869, "step": 6904 }, { "epoch": 0.09793278997131928, "grad_norm": 3.390625, "learning_rate": 4.888988455424081e-05, "loss": 0.9054, "step": 6906 }, { "epoch": 0.0979611516249455, "grad_norm": 2.984375, "learning_rate": 4.8889226062759914e-05, "loss": 0.8587, "step": 6908 }, { "epoch": 0.09798951327857171, "grad_norm": 3.125, "learning_rate": 4.8888567380474184e-05, "loss": 0.8719, "step": 6910 }, { "epoch": 0.09801787493219792, "grad_norm": 3.3125, "learning_rate": 4.888790850738887e-05, "loss": 0.8956, "step": 6912 }, { "epoch": 0.09804623658582413, "grad_norm": 3.15625, "learning_rate": 4.888724944350924e-05, "loss": 0.8679, "step": 6914 }, { "epoch": 0.09807459823945035, "grad_norm": 3.125, "learning_rate": 4.888659018884056e-05, "loss": 0.8953, "step": 6916 }, { "epoch": 0.09810295989307656, "grad_norm": 3.078125, "learning_rate": 4.8885930743388086e-05, "loss": 0.8775, "step": 6918 }, { "epoch": 0.09813132154670277, "grad_norm": 3.046875, "learning_rate": 4.888527110715709e-05, "loss": 0.9056, "step": 6920 }, { "epoch": 0.098159683200329, "grad_norm": 3.015625, "learning_rate": 4.888461128015283e-05, "loss": 0.8887, "step": 6922 }, { "epoch": 0.09818804485395521, "grad_norm": 2.90625, "learning_rate": 4.888395126238061e-05, "loss": 0.8972, "step": 6924 }, { "epoch": 0.09821640650758143, "grad_norm": 2.78125, "learning_rate": 4.888329105384566e-05, "loss": 0.8811, "step": 6926 }, { "epoch": 0.09824476816120764, "grad_norm": 3.546875, "learning_rate": 4.888263065455328e-05, "loss": 0.8866, "step": 6928 }, { "epoch": 0.09827312981483385, "grad_norm": 3.109375, "learning_rate": 4.888197006450873e-05, "loss": 0.8863, "step": 6930 }, { "epoch": 0.09830149146846007, "grad_norm": 3.0, "learning_rate": 4.8881309283717305e-05, "loss": 0.8646, "step": 6932 }, { "epoch": 0.09832985312208628, "grad_norm": 3.0, "learning_rate": 4.888064831218425e-05, "loss": 0.872, "step": 6934 }, { "epoch": 0.0983582147757125, "grad_norm": 2.90625, "learning_rate": 4.887998714991488e-05, "loss": 0.8798, "step": 6936 }, { "epoch": 0.0983865764293387, "grad_norm": 2.921875, "learning_rate": 4.8879325796914455e-05, "loss": 0.9518, "step": 6938 }, { "epoch": 0.09841493808296493, "grad_norm": 2.984375, "learning_rate": 4.887866425318826e-05, "loss": 0.89, "step": 6940 }, { "epoch": 0.09844329973659115, "grad_norm": 2.703125, "learning_rate": 4.8878002518741585e-05, "loss": 0.8011, "step": 6942 }, { "epoch": 0.09847166139021736, "grad_norm": 3.484375, "learning_rate": 4.88773405935797e-05, "loss": 0.8383, "step": 6944 }, { "epoch": 0.09850002304384357, "grad_norm": 2.984375, "learning_rate": 4.887667847770791e-05, "loss": 0.8918, "step": 6946 }, { "epoch": 0.09852838469746979, "grad_norm": 3.21875, "learning_rate": 4.88760161711315e-05, "loss": 0.8785, "step": 6948 }, { "epoch": 0.098556746351096, "grad_norm": 3.28125, "learning_rate": 4.887535367385576e-05, "loss": 0.8639, "step": 6950 }, { "epoch": 0.09858510800472221, "grad_norm": 3.15625, "learning_rate": 4.8874690985885975e-05, "loss": 0.8672, "step": 6952 }, { "epoch": 0.09861346965834843, "grad_norm": 2.90625, "learning_rate": 4.887402810722743e-05, "loss": 0.8679, "step": 6954 }, { "epoch": 0.09864183131197464, "grad_norm": 2.890625, "learning_rate": 4.887336503788544e-05, "loss": 0.9038, "step": 6956 }, { "epoch": 0.09867019296560085, "grad_norm": 3.09375, "learning_rate": 4.887270177786529e-05, "loss": 0.8563, "step": 6958 }, { "epoch": 0.09869855461922708, "grad_norm": 3.234375, "learning_rate": 4.8872038327172276e-05, "loss": 0.9246, "step": 6960 }, { "epoch": 0.0987269162728533, "grad_norm": 3.109375, "learning_rate": 4.88713746858117e-05, "loss": 0.93, "step": 6962 }, { "epoch": 0.0987552779264795, "grad_norm": 3.0, "learning_rate": 4.887071085378886e-05, "loss": 0.8827, "step": 6964 }, { "epoch": 0.09878363958010572, "grad_norm": 3.046875, "learning_rate": 4.8870046831109064e-05, "loss": 0.8984, "step": 6966 }, { "epoch": 0.09881200123373193, "grad_norm": 3.28125, "learning_rate": 4.886938261777761e-05, "loss": 0.8696, "step": 6968 }, { "epoch": 0.09884036288735815, "grad_norm": 3.109375, "learning_rate": 4.886871821379981e-05, "loss": 0.8766, "step": 6970 }, { "epoch": 0.09886872454098436, "grad_norm": 3.15625, "learning_rate": 4.886805361918096e-05, "loss": 0.8598, "step": 6972 }, { "epoch": 0.09889708619461057, "grad_norm": 2.875, "learning_rate": 4.886738883392637e-05, "loss": 0.8779, "step": 6974 }, { "epoch": 0.09892544784823679, "grad_norm": 2.984375, "learning_rate": 4.886672385804136e-05, "loss": 0.892, "step": 6976 }, { "epoch": 0.098953809501863, "grad_norm": 3.296875, "learning_rate": 4.886605869153124e-05, "loss": 0.8992, "step": 6978 }, { "epoch": 0.09898217115548923, "grad_norm": 3.265625, "learning_rate": 4.886539333440131e-05, "loss": 0.9366, "step": 6980 }, { "epoch": 0.09901053280911544, "grad_norm": 3.4375, "learning_rate": 4.8864727786656886e-05, "loss": 0.8638, "step": 6982 }, { "epoch": 0.09903889446274165, "grad_norm": 3.421875, "learning_rate": 4.88640620483033e-05, "loss": 0.8521, "step": 6984 }, { "epoch": 0.09906725611636787, "grad_norm": 3.078125, "learning_rate": 4.886339611934585e-05, "loss": 0.8615, "step": 6986 }, { "epoch": 0.09909561776999408, "grad_norm": 2.984375, "learning_rate": 4.886272999978987e-05, "loss": 0.9117, "step": 6988 }, { "epoch": 0.09912397942362029, "grad_norm": 2.71875, "learning_rate": 4.8862063689640666e-05, "loss": 0.8302, "step": 6990 }, { "epoch": 0.0991523410772465, "grad_norm": 3.0625, "learning_rate": 4.886139718890358e-05, "loss": 0.8786, "step": 6992 }, { "epoch": 0.09918070273087272, "grad_norm": 3.140625, "learning_rate": 4.8860730497583914e-05, "loss": 0.8645, "step": 6994 }, { "epoch": 0.09920906438449893, "grad_norm": 3.015625, "learning_rate": 4.8860063615687e-05, "loss": 0.9151, "step": 6996 }, { "epoch": 0.09923742603812516, "grad_norm": 3.328125, "learning_rate": 4.885939654321817e-05, "loss": 0.8901, "step": 6998 }, { "epoch": 0.09926578769175137, "grad_norm": 3.21875, "learning_rate": 4.885872928018276e-05, "loss": 0.8513, "step": 7000 }, { "epoch": 0.09929414934537759, "grad_norm": 3.171875, "learning_rate": 4.885806182658607e-05, "loss": 0.9091, "step": 7002 }, { "epoch": 0.0993225109990038, "grad_norm": 2.96875, "learning_rate": 4.8857394182433456e-05, "loss": 0.8872, "step": 7004 }, { "epoch": 0.09935087265263001, "grad_norm": 2.984375, "learning_rate": 4.885672634773024e-05, "loss": 0.8998, "step": 7006 }, { "epoch": 0.09937923430625623, "grad_norm": 3.328125, "learning_rate": 4.885605832248177e-05, "loss": 0.8582, "step": 7008 }, { "epoch": 0.09940759595988244, "grad_norm": 2.71875, "learning_rate": 4.885539010669336e-05, "loss": 0.8353, "step": 7010 }, { "epoch": 0.09943595761350865, "grad_norm": 3.0625, "learning_rate": 4.885472170037037e-05, "loss": 0.9026, "step": 7012 }, { "epoch": 0.09946431926713487, "grad_norm": 3.046875, "learning_rate": 4.885405310351811e-05, "loss": 0.8705, "step": 7014 }, { "epoch": 0.09949268092076108, "grad_norm": 3.21875, "learning_rate": 4.885338431614195e-05, "loss": 0.8947, "step": 7016 }, { "epoch": 0.0995210425743873, "grad_norm": 2.953125, "learning_rate": 4.8852715338247214e-05, "loss": 0.8672, "step": 7018 }, { "epoch": 0.09954940422801352, "grad_norm": 3.328125, "learning_rate": 4.8852046169839245e-05, "loss": 0.9253, "step": 7020 }, { "epoch": 0.09957776588163973, "grad_norm": 3.296875, "learning_rate": 4.8851376810923396e-05, "loss": 0.8568, "step": 7022 }, { "epoch": 0.09960612753526595, "grad_norm": 3.0, "learning_rate": 4.885070726150501e-05, "loss": 0.8611, "step": 7024 }, { "epoch": 0.09963448918889216, "grad_norm": 3.1875, "learning_rate": 4.8850037521589434e-05, "loss": 0.8688, "step": 7026 }, { "epoch": 0.09966285084251837, "grad_norm": 2.65625, "learning_rate": 4.8849367591182014e-05, "loss": 0.8865, "step": 7028 }, { "epoch": 0.09969121249614459, "grad_norm": 3.125, "learning_rate": 4.8848697470288105e-05, "loss": 0.8672, "step": 7030 }, { "epoch": 0.0997195741497708, "grad_norm": 3.421875, "learning_rate": 4.884802715891307e-05, "loss": 0.874, "step": 7032 }, { "epoch": 0.09974793580339701, "grad_norm": 2.921875, "learning_rate": 4.8847356657062236e-05, "loss": 0.9212, "step": 7034 }, { "epoch": 0.09977629745702324, "grad_norm": 3.15625, "learning_rate": 4.884668596474098e-05, "loss": 0.8985, "step": 7036 }, { "epoch": 0.09980465911064945, "grad_norm": 3.46875, "learning_rate": 4.884601508195465e-05, "loss": 0.8831, "step": 7038 }, { "epoch": 0.09983302076427567, "grad_norm": 3.359375, "learning_rate": 4.8845344008708615e-05, "loss": 0.9153, "step": 7040 }, { "epoch": 0.09986138241790188, "grad_norm": 3.03125, "learning_rate": 4.884467274500822e-05, "loss": 0.9009, "step": 7042 }, { "epoch": 0.09988974407152809, "grad_norm": 3.28125, "learning_rate": 4.884400129085884e-05, "loss": 0.8834, "step": 7044 }, { "epoch": 0.0999181057251543, "grad_norm": 2.859375, "learning_rate": 4.884332964626582e-05, "loss": 0.8377, "step": 7046 }, { "epoch": 0.09994646737878052, "grad_norm": 2.984375, "learning_rate": 4.884265781123455e-05, "loss": 0.8779, "step": 7048 }, { "epoch": 0.09997482903240673, "grad_norm": 2.890625, "learning_rate": 4.884198578577037e-05, "loss": 0.9041, "step": 7050 }, { "epoch": 0.10000319068603294, "grad_norm": 3.046875, "learning_rate": 4.884131356987867e-05, "loss": 0.8658, "step": 7052 }, { "epoch": 0.10003155233965916, "grad_norm": 3.15625, "learning_rate": 4.88406411635648e-05, "loss": 0.9014, "step": 7054 }, { "epoch": 0.10005991399328538, "grad_norm": 2.78125, "learning_rate": 4.8839968566834136e-05, "loss": 0.8374, "step": 7056 }, { "epoch": 0.1000882756469116, "grad_norm": 3.015625, "learning_rate": 4.8839295779692064e-05, "loss": 0.8786, "step": 7058 }, { "epoch": 0.10011663730053781, "grad_norm": 3.453125, "learning_rate": 4.883862280214393e-05, "loss": 0.9005, "step": 7060 }, { "epoch": 0.10014499895416402, "grad_norm": 2.90625, "learning_rate": 4.883794963419514e-05, "loss": 0.8782, "step": 7062 }, { "epoch": 0.10017336060779024, "grad_norm": 3.125, "learning_rate": 4.883727627585105e-05, "loss": 0.8752, "step": 7064 }, { "epoch": 0.10020172226141645, "grad_norm": 3.140625, "learning_rate": 4.8836602727117054e-05, "loss": 0.8277, "step": 7066 }, { "epoch": 0.10023008391504266, "grad_norm": 3.0, "learning_rate": 4.8835928987998516e-05, "loss": 0.8267, "step": 7068 }, { "epoch": 0.10025844556866888, "grad_norm": 2.9375, "learning_rate": 4.883525505850082e-05, "loss": 0.8731, "step": 7070 }, { "epoch": 0.10028680722229509, "grad_norm": 3.15625, "learning_rate": 4.883458093862935e-05, "loss": 0.8688, "step": 7072 }, { "epoch": 0.1003151688759213, "grad_norm": 3.078125, "learning_rate": 4.8833906628389495e-05, "loss": 0.8729, "step": 7074 }, { "epoch": 0.10034353052954753, "grad_norm": 2.796875, "learning_rate": 4.8833232127786636e-05, "loss": 0.8916, "step": 7076 }, { "epoch": 0.10037189218317374, "grad_norm": 3.34375, "learning_rate": 4.8832557436826166e-05, "loss": 0.8974, "step": 7078 }, { "epoch": 0.10040025383679996, "grad_norm": 2.9375, "learning_rate": 4.8831882555513473e-05, "loss": 0.8785, "step": 7080 }, { "epoch": 0.10042861549042617, "grad_norm": 3.078125, "learning_rate": 4.883120748385394e-05, "loss": 0.8784, "step": 7082 }, { "epoch": 0.10045697714405238, "grad_norm": 3.15625, "learning_rate": 4.8830532221852966e-05, "loss": 0.88, "step": 7084 }, { "epoch": 0.1004853387976786, "grad_norm": 2.859375, "learning_rate": 4.882985676951594e-05, "loss": 0.8852, "step": 7086 }, { "epoch": 0.10051370045130481, "grad_norm": 3.34375, "learning_rate": 4.882918112684826e-05, "loss": 0.8429, "step": 7088 }, { "epoch": 0.10054206210493102, "grad_norm": 3.328125, "learning_rate": 4.8828505293855315e-05, "loss": 0.9146, "step": 7090 }, { "epoch": 0.10057042375855724, "grad_norm": 2.890625, "learning_rate": 4.882782927054251e-05, "loss": 0.8439, "step": 7092 }, { "epoch": 0.10059878541218346, "grad_norm": 2.578125, "learning_rate": 4.882715305691525e-05, "loss": 0.8956, "step": 7094 }, { "epoch": 0.10062714706580968, "grad_norm": 2.890625, "learning_rate": 4.8826476652978924e-05, "loss": 0.8756, "step": 7096 }, { "epoch": 0.10065550871943589, "grad_norm": 3.34375, "learning_rate": 4.882580005873894e-05, "loss": 0.8739, "step": 7098 }, { "epoch": 0.1006838703730621, "grad_norm": 3.046875, "learning_rate": 4.8825123274200703e-05, "loss": 0.8709, "step": 7100 }, { "epoch": 0.10071223202668832, "grad_norm": 2.921875, "learning_rate": 4.882444629936962e-05, "loss": 0.886, "step": 7102 }, { "epoch": 0.10074059368031453, "grad_norm": 3.453125, "learning_rate": 4.882376913425109e-05, "loss": 0.9081, "step": 7104 }, { "epoch": 0.10076895533394074, "grad_norm": 3.4375, "learning_rate": 4.882309177885053e-05, "loss": 0.9085, "step": 7106 }, { "epoch": 0.10079731698756696, "grad_norm": 3.328125, "learning_rate": 4.8822414233173344e-05, "loss": 0.8605, "step": 7108 }, { "epoch": 0.10082567864119317, "grad_norm": 2.734375, "learning_rate": 4.8821736497224954e-05, "loss": 0.895, "step": 7110 }, { "epoch": 0.10085404029481938, "grad_norm": 3.015625, "learning_rate": 4.882105857101076e-05, "loss": 0.8375, "step": 7112 }, { "epoch": 0.10088240194844561, "grad_norm": 3.0, "learning_rate": 4.8820380454536185e-05, "loss": 0.8829, "step": 7114 }, { "epoch": 0.10091076360207182, "grad_norm": 2.953125, "learning_rate": 4.8819702147806634e-05, "loss": 0.884, "step": 7116 }, { "epoch": 0.10093912525569804, "grad_norm": 3.078125, "learning_rate": 4.881902365082755e-05, "loss": 0.886, "step": 7118 }, { "epoch": 0.10096748690932425, "grad_norm": 2.984375, "learning_rate": 4.881834496360432e-05, "loss": 0.9413, "step": 7120 }, { "epoch": 0.10099584856295046, "grad_norm": 3.15625, "learning_rate": 4.881766608614238e-05, "loss": 0.8656, "step": 7122 }, { "epoch": 0.10102421021657668, "grad_norm": 3.28125, "learning_rate": 4.881698701844716e-05, "loss": 0.7929, "step": 7124 }, { "epoch": 0.10105257187020289, "grad_norm": 3.015625, "learning_rate": 4.881630776052407e-05, "loss": 0.8803, "step": 7126 }, { "epoch": 0.1010809335238291, "grad_norm": 3.09375, "learning_rate": 4.8815628312378545e-05, "loss": 0.8243, "step": 7128 }, { "epoch": 0.10110929517745532, "grad_norm": 2.875, "learning_rate": 4.8814948674016016e-05, "loss": 0.8313, "step": 7130 }, { "epoch": 0.10113765683108153, "grad_norm": 3.53125, "learning_rate": 4.881426884544189e-05, "loss": 0.8632, "step": 7132 }, { "epoch": 0.10116601848470776, "grad_norm": 3.0, "learning_rate": 4.881358882666162e-05, "loss": 0.8474, "step": 7134 }, { "epoch": 0.10119438013833397, "grad_norm": 3.484375, "learning_rate": 4.881290861768062e-05, "loss": 0.8663, "step": 7136 }, { "epoch": 0.10122274179196018, "grad_norm": 2.875, "learning_rate": 4.881222821850433e-05, "loss": 0.8217, "step": 7138 }, { "epoch": 0.1012511034455864, "grad_norm": 3.328125, "learning_rate": 4.881154762913819e-05, "loss": 0.8989, "step": 7140 }, { "epoch": 0.10127946509921261, "grad_norm": 2.8125, "learning_rate": 4.881086684958763e-05, "loss": 0.8452, "step": 7142 }, { "epoch": 0.10130782675283882, "grad_norm": 3.46875, "learning_rate": 4.881018587985809e-05, "loss": 0.8775, "step": 7144 }, { "epoch": 0.10133618840646504, "grad_norm": 3.3125, "learning_rate": 4.8809504719955e-05, "loss": 0.9285, "step": 7146 }, { "epoch": 0.10136455006009125, "grad_norm": 3.515625, "learning_rate": 4.880882336988381e-05, "loss": 0.9253, "step": 7148 }, { "epoch": 0.10139291171371746, "grad_norm": 3.46875, "learning_rate": 4.8808141829649964e-05, "loss": 0.9214, "step": 7150 }, { "epoch": 0.10142127336734369, "grad_norm": 2.90625, "learning_rate": 4.8807460099258906e-05, "loss": 0.8635, "step": 7152 }, { "epoch": 0.1014496350209699, "grad_norm": 2.78125, "learning_rate": 4.8806778178716066e-05, "loss": 0.8674, "step": 7154 }, { "epoch": 0.10147799667459612, "grad_norm": 3.21875, "learning_rate": 4.8806096068026906e-05, "loss": 0.8859, "step": 7156 }, { "epoch": 0.10150635832822233, "grad_norm": 2.90625, "learning_rate": 4.8805413767196874e-05, "loss": 0.8697, "step": 7158 }, { "epoch": 0.10153471998184854, "grad_norm": 2.96875, "learning_rate": 4.8804731276231405e-05, "loss": 0.8871, "step": 7160 }, { "epoch": 0.10156308163547476, "grad_norm": 3.046875, "learning_rate": 4.880404859513596e-05, "loss": 0.8387, "step": 7162 }, { "epoch": 0.10159144328910097, "grad_norm": 2.765625, "learning_rate": 4.8803365723915995e-05, "loss": 0.8323, "step": 7164 }, { "epoch": 0.10161980494272718, "grad_norm": 3.234375, "learning_rate": 4.880268266257696e-05, "loss": 0.8585, "step": 7166 }, { "epoch": 0.1016481665963534, "grad_norm": 3.109375, "learning_rate": 4.880199941112431e-05, "loss": 0.9004, "step": 7168 }, { "epoch": 0.10167652824997961, "grad_norm": 3.40625, "learning_rate": 4.88013159695635e-05, "loss": 0.9089, "step": 7170 }, { "epoch": 0.10170488990360584, "grad_norm": 3.21875, "learning_rate": 4.880063233789999e-05, "loss": 0.8965, "step": 7172 }, { "epoch": 0.10173325155723205, "grad_norm": 2.671875, "learning_rate": 4.879994851613925e-05, "loss": 0.8207, "step": 7174 }, { "epoch": 0.10176161321085826, "grad_norm": 3.03125, "learning_rate": 4.879926450428674e-05, "loss": 0.8805, "step": 7176 }, { "epoch": 0.10178997486448448, "grad_norm": 3.09375, "learning_rate": 4.87985803023479e-05, "loss": 0.8897, "step": 7178 }, { "epoch": 0.10181833651811069, "grad_norm": 3.15625, "learning_rate": 4.879789591032822e-05, "loss": 0.8809, "step": 7180 }, { "epoch": 0.1018466981717369, "grad_norm": 3.21875, "learning_rate": 4.879721132823315e-05, "loss": 0.865, "step": 7182 }, { "epoch": 0.10187505982536312, "grad_norm": 3.125, "learning_rate": 4.8796526556068176e-05, "loss": 0.8321, "step": 7184 }, { "epoch": 0.10190342147898933, "grad_norm": 2.8125, "learning_rate": 4.8795841593838757e-05, "loss": 0.8595, "step": 7186 }, { "epoch": 0.10193178313261554, "grad_norm": 2.9375, "learning_rate": 4.8795156441550364e-05, "loss": 0.8677, "step": 7188 }, { "epoch": 0.10196014478624177, "grad_norm": 3.0, "learning_rate": 4.879447109920846e-05, "loss": 0.8886, "step": 7190 }, { "epoch": 0.10198850643986798, "grad_norm": 3.125, "learning_rate": 4.879378556681854e-05, "loss": 0.883, "step": 7192 }, { "epoch": 0.1020168680934942, "grad_norm": 3.5625, "learning_rate": 4.879309984438606e-05, "loss": 0.9499, "step": 7194 }, { "epoch": 0.10204522974712041, "grad_norm": 3.328125, "learning_rate": 4.8792413931916504e-05, "loss": 0.8033, "step": 7196 }, { "epoch": 0.10207359140074662, "grad_norm": 2.84375, "learning_rate": 4.8791727829415356e-05, "loss": 0.8815, "step": 7198 }, { "epoch": 0.10210195305437283, "grad_norm": 3.109375, "learning_rate": 4.8791041536888086e-05, "loss": 0.9328, "step": 7200 }, { "epoch": 0.10213031470799905, "grad_norm": 3.09375, "learning_rate": 4.879035505434019e-05, "loss": 0.9427, "step": 7202 }, { "epoch": 0.10215867636162526, "grad_norm": 3.140625, "learning_rate": 4.8789668381777133e-05, "loss": 0.9012, "step": 7204 }, { "epoch": 0.10218703801525147, "grad_norm": 3.390625, "learning_rate": 4.87889815192044e-05, "loss": 0.847, "step": 7206 }, { "epoch": 0.10221539966887769, "grad_norm": 3.203125, "learning_rate": 4.87882944666275e-05, "loss": 0.9231, "step": 7208 }, { "epoch": 0.10224376132250391, "grad_norm": 2.828125, "learning_rate": 4.87876072240519e-05, "loss": 0.8774, "step": 7210 }, { "epoch": 0.10227212297613013, "grad_norm": 2.59375, "learning_rate": 4.8786919791483094e-05, "loss": 0.8415, "step": 7212 }, { "epoch": 0.10230048462975634, "grad_norm": 3.3125, "learning_rate": 4.878623216892657e-05, "loss": 0.8605, "step": 7214 }, { "epoch": 0.10232884628338255, "grad_norm": 3.546875, "learning_rate": 4.878554435638783e-05, "loss": 0.9108, "step": 7216 }, { "epoch": 0.10235720793700877, "grad_norm": 3.25, "learning_rate": 4.8784856353872355e-05, "loss": 0.9039, "step": 7218 }, { "epoch": 0.10238556959063498, "grad_norm": 2.96875, "learning_rate": 4.878416816138565e-05, "loss": 0.8559, "step": 7220 }, { "epoch": 0.1024139312442612, "grad_norm": 2.546875, "learning_rate": 4.8783479778933207e-05, "loss": 0.8971, "step": 7222 }, { "epoch": 0.10244229289788741, "grad_norm": 2.890625, "learning_rate": 4.8782791206520516e-05, "loss": 0.8577, "step": 7224 }, { "epoch": 0.10247065455151362, "grad_norm": 3.34375, "learning_rate": 4.87821024441531e-05, "loss": 0.8918, "step": 7226 }, { "epoch": 0.10249901620513983, "grad_norm": 3.34375, "learning_rate": 4.878141349183643e-05, "loss": 0.9011, "step": 7228 }, { "epoch": 0.10252737785876606, "grad_norm": 3.171875, "learning_rate": 4.878072434957604e-05, "loss": 0.8378, "step": 7230 }, { "epoch": 0.10255573951239227, "grad_norm": 2.890625, "learning_rate": 4.87800350173774e-05, "loss": 0.885, "step": 7232 }, { "epoch": 0.10258410116601849, "grad_norm": 3.28125, "learning_rate": 4.877934549524604e-05, "loss": 0.863, "step": 7234 }, { "epoch": 0.1026124628196447, "grad_norm": 3.5, "learning_rate": 4.877865578318747e-05, "loss": 0.8909, "step": 7236 }, { "epoch": 0.10264082447327091, "grad_norm": 2.96875, "learning_rate": 4.8777965881207187e-05, "loss": 0.8651, "step": 7238 }, { "epoch": 0.10266918612689713, "grad_norm": 2.90625, "learning_rate": 4.8777275789310704e-05, "loss": 0.8706, "step": 7240 }, { "epoch": 0.10269754778052334, "grad_norm": 2.90625, "learning_rate": 4.877658550750353e-05, "loss": 0.8697, "step": 7242 }, { "epoch": 0.10272590943414955, "grad_norm": 2.921875, "learning_rate": 4.877589503579119e-05, "loss": 0.8967, "step": 7244 }, { "epoch": 0.10275427108777577, "grad_norm": 3.1875, "learning_rate": 4.877520437417919e-05, "loss": 0.8963, "step": 7246 }, { "epoch": 0.102782632741402, "grad_norm": 3.328125, "learning_rate": 4.877451352267304e-05, "loss": 0.8984, "step": 7248 }, { "epoch": 0.10281099439502821, "grad_norm": 3.21875, "learning_rate": 4.877382248127826e-05, "loss": 0.8345, "step": 7250 }, { "epoch": 0.10283935604865442, "grad_norm": 2.640625, "learning_rate": 4.877313125000038e-05, "loss": 0.8885, "step": 7252 }, { "epoch": 0.10286771770228063, "grad_norm": 3.296875, "learning_rate": 4.877243982884492e-05, "loss": 0.875, "step": 7254 }, { "epoch": 0.10289607935590685, "grad_norm": 3.015625, "learning_rate": 4.877174821781739e-05, "loss": 0.8792, "step": 7256 }, { "epoch": 0.10292444100953306, "grad_norm": 3.015625, "learning_rate": 4.877105641692332e-05, "loss": 0.9297, "step": 7258 }, { "epoch": 0.10295280266315927, "grad_norm": 2.9375, "learning_rate": 4.877036442616825e-05, "loss": 0.889, "step": 7260 }, { "epoch": 0.10298116431678549, "grad_norm": 3.21875, "learning_rate": 4.876967224555768e-05, "loss": 0.8604, "step": 7262 }, { "epoch": 0.1030095259704117, "grad_norm": 3.125, "learning_rate": 4.876897987509716e-05, "loss": 0.8782, "step": 7264 }, { "epoch": 0.10303788762403791, "grad_norm": 3.203125, "learning_rate": 4.87682873147922e-05, "loss": 0.8677, "step": 7266 }, { "epoch": 0.10306624927766414, "grad_norm": 2.9375, "learning_rate": 4.876759456464836e-05, "loss": 0.845, "step": 7268 }, { "epoch": 0.10309461093129035, "grad_norm": 2.921875, "learning_rate": 4.876690162467115e-05, "loss": 0.8587, "step": 7270 }, { "epoch": 0.10312297258491657, "grad_norm": 3.34375, "learning_rate": 4.8766208494866114e-05, "loss": 0.8591, "step": 7272 }, { "epoch": 0.10315133423854278, "grad_norm": 3.390625, "learning_rate": 4.8765515175238784e-05, "loss": 0.8937, "step": 7274 }, { "epoch": 0.103179695892169, "grad_norm": 3.3125, "learning_rate": 4.8764821665794705e-05, "loss": 0.9215, "step": 7276 }, { "epoch": 0.1032080575457952, "grad_norm": 2.84375, "learning_rate": 4.87641279665394e-05, "loss": 0.9414, "step": 7278 }, { "epoch": 0.10323641919942142, "grad_norm": 3.140625, "learning_rate": 4.876343407747842e-05, "loss": 0.8638, "step": 7280 }, { "epoch": 0.10326478085304763, "grad_norm": 3.078125, "learning_rate": 4.876273999861731e-05, "loss": 0.8491, "step": 7282 }, { "epoch": 0.10329314250667385, "grad_norm": 2.9375, "learning_rate": 4.876204572996161e-05, "loss": 0.905, "step": 7284 }, { "epoch": 0.10332150416030006, "grad_norm": 3.0, "learning_rate": 4.876135127151687e-05, "loss": 0.8716, "step": 7286 }, { "epoch": 0.10334986581392629, "grad_norm": 3.09375, "learning_rate": 4.876065662328863e-05, "loss": 0.9147, "step": 7288 }, { "epoch": 0.1033782274675525, "grad_norm": 3.03125, "learning_rate": 4.875996178528244e-05, "loss": 0.8869, "step": 7290 }, { "epoch": 0.10340658912117871, "grad_norm": 2.90625, "learning_rate": 4.8759266757503855e-05, "loss": 0.899, "step": 7292 }, { "epoch": 0.10343495077480493, "grad_norm": 3.171875, "learning_rate": 4.875857153995841e-05, "loss": 0.8916, "step": 7294 }, { "epoch": 0.10346331242843114, "grad_norm": 3.390625, "learning_rate": 4.875787613265168e-05, "loss": 0.8721, "step": 7296 }, { "epoch": 0.10349167408205735, "grad_norm": 3.09375, "learning_rate": 4.8757180535589205e-05, "loss": 0.8522, "step": 7298 }, { "epoch": 0.10352003573568357, "grad_norm": 3.125, "learning_rate": 4.8756484748776545e-05, "loss": 0.8732, "step": 7300 }, { "epoch": 0.10354839738930978, "grad_norm": 2.828125, "learning_rate": 4.8755788772219256e-05, "loss": 0.8753, "step": 7302 }, { "epoch": 0.10357675904293599, "grad_norm": 3.046875, "learning_rate": 4.87550926059229e-05, "loss": 0.9476, "step": 7304 }, { "epoch": 0.10360512069656222, "grad_norm": 3.375, "learning_rate": 4.875439624989303e-05, "loss": 0.9234, "step": 7306 }, { "epoch": 0.10363348235018843, "grad_norm": 3.359375, "learning_rate": 4.8753699704135214e-05, "loss": 0.9191, "step": 7308 }, { "epoch": 0.10366184400381465, "grad_norm": 3.140625, "learning_rate": 4.875300296865502e-05, "loss": 0.9309, "step": 7310 }, { "epoch": 0.10369020565744086, "grad_norm": 2.8125, "learning_rate": 4.8752306043458e-05, "loss": 0.8689, "step": 7312 }, { "epoch": 0.10371856731106707, "grad_norm": 3.125, "learning_rate": 4.875160892854973e-05, "loss": 0.8908, "step": 7314 }, { "epoch": 0.10374692896469329, "grad_norm": 3.328125, "learning_rate": 4.875091162393578e-05, "loss": 0.9103, "step": 7316 }, { "epoch": 0.1037752906183195, "grad_norm": 2.953125, "learning_rate": 4.875021412962171e-05, "loss": 0.8483, "step": 7318 }, { "epoch": 0.10380365227194571, "grad_norm": 3.296875, "learning_rate": 4.87495164456131e-05, "loss": 0.8228, "step": 7320 }, { "epoch": 0.10383201392557193, "grad_norm": 3.25, "learning_rate": 4.874881857191551e-05, "loss": 0.9272, "step": 7322 }, { "epoch": 0.10386037557919814, "grad_norm": 3.4375, "learning_rate": 4.874812050853452e-05, "loss": 0.861, "step": 7324 }, { "epoch": 0.10388873723282437, "grad_norm": 2.734375, "learning_rate": 4.8747422255475715e-05, "loss": 0.8491, "step": 7326 }, { "epoch": 0.10391709888645058, "grad_norm": 3.65625, "learning_rate": 4.874672381274467e-05, "loss": 0.8729, "step": 7328 }, { "epoch": 0.10394546054007679, "grad_norm": 3.0625, "learning_rate": 4.874602518034694e-05, "loss": 0.8043, "step": 7330 }, { "epoch": 0.103973822193703, "grad_norm": 3.109375, "learning_rate": 4.8745326358288133e-05, "loss": 0.8663, "step": 7332 }, { "epoch": 0.10400218384732922, "grad_norm": 3.3125, "learning_rate": 4.8744627346573825e-05, "loss": 0.8564, "step": 7334 }, { "epoch": 0.10403054550095543, "grad_norm": 3.21875, "learning_rate": 4.874392814520959e-05, "loss": 0.8679, "step": 7336 }, { "epoch": 0.10405890715458165, "grad_norm": 2.84375, "learning_rate": 4.874322875420102e-05, "loss": 0.8373, "step": 7338 }, { "epoch": 0.10408726880820786, "grad_norm": 2.921875, "learning_rate": 4.87425291735537e-05, "loss": 0.8581, "step": 7340 }, { "epoch": 0.10411563046183407, "grad_norm": 3.484375, "learning_rate": 4.874182940327321e-05, "loss": 0.9144, "step": 7342 }, { "epoch": 0.1041439921154603, "grad_norm": 3.609375, "learning_rate": 4.874112944336514e-05, "loss": 0.8847, "step": 7344 }, { "epoch": 0.10417235376908651, "grad_norm": 3.109375, "learning_rate": 4.87404292938351e-05, "loss": 0.926, "step": 7346 }, { "epoch": 0.10420071542271273, "grad_norm": 3.328125, "learning_rate": 4.873972895468866e-05, "loss": 0.9011, "step": 7348 }, { "epoch": 0.10422907707633894, "grad_norm": 2.90625, "learning_rate": 4.8739028425931424e-05, "loss": 0.8559, "step": 7350 }, { "epoch": 0.10425743872996515, "grad_norm": 3.03125, "learning_rate": 4.8738327707568974e-05, "loss": 0.9049, "step": 7352 }, { "epoch": 0.10428580038359136, "grad_norm": 2.8125, "learning_rate": 4.8737626799606926e-05, "loss": 0.8314, "step": 7354 }, { "epoch": 0.10431416203721758, "grad_norm": 3.265625, "learning_rate": 4.873692570205087e-05, "loss": 0.9264, "step": 7356 }, { "epoch": 0.10434252369084379, "grad_norm": 3.03125, "learning_rate": 4.8736224414906406e-05, "loss": 0.845, "step": 7358 }, { "epoch": 0.10437088534447, "grad_norm": 3.0625, "learning_rate": 4.8735522938179135e-05, "loss": 0.8742, "step": 7360 }, { "epoch": 0.10439924699809622, "grad_norm": 3.546875, "learning_rate": 4.8734821271874656e-05, "loss": 0.927, "step": 7362 }, { "epoch": 0.10442760865172244, "grad_norm": 2.90625, "learning_rate": 4.873411941599857e-05, "loss": 0.8471, "step": 7364 }, { "epoch": 0.10445597030534866, "grad_norm": 3.359375, "learning_rate": 4.87334173705565e-05, "loss": 0.8937, "step": 7366 }, { "epoch": 0.10448433195897487, "grad_norm": 3.03125, "learning_rate": 4.873271513555404e-05, "loss": 0.9161, "step": 7368 }, { "epoch": 0.10451269361260108, "grad_norm": 3.1875, "learning_rate": 4.87320127109968e-05, "loss": 0.8386, "step": 7370 }, { "epoch": 0.1045410552662273, "grad_norm": 3.1875, "learning_rate": 4.873131009689039e-05, "loss": 0.8389, "step": 7372 }, { "epoch": 0.10456941691985351, "grad_norm": 3.21875, "learning_rate": 4.8730607293240424e-05, "loss": 0.9325, "step": 7374 }, { "epoch": 0.10459777857347972, "grad_norm": 3.046875, "learning_rate": 4.872990430005252e-05, "loss": 0.905, "step": 7376 }, { "epoch": 0.10462614022710594, "grad_norm": 3.0, "learning_rate": 4.872920111733228e-05, "loss": 0.895, "step": 7378 }, { "epoch": 0.10465450188073215, "grad_norm": 3.140625, "learning_rate": 4.8728497745085334e-05, "loss": 0.9069, "step": 7380 }, { "epoch": 0.10468286353435836, "grad_norm": 3.03125, "learning_rate": 4.872779418331729e-05, "loss": 0.8949, "step": 7382 }, { "epoch": 0.10471122518798459, "grad_norm": 3.0, "learning_rate": 4.872709043203377e-05, "loss": 0.9123, "step": 7384 }, { "epoch": 0.1047395868416108, "grad_norm": 3.359375, "learning_rate": 4.872638649124039e-05, "loss": 0.8278, "step": 7386 }, { "epoch": 0.10476794849523702, "grad_norm": 3.046875, "learning_rate": 4.872568236094279e-05, "loss": 0.8543, "step": 7388 }, { "epoch": 0.10479631014886323, "grad_norm": 3.171875, "learning_rate": 4.872497804114658e-05, "loss": 0.8938, "step": 7390 }, { "epoch": 0.10482467180248944, "grad_norm": 3.28125, "learning_rate": 4.872427353185739e-05, "loss": 0.9027, "step": 7392 }, { "epoch": 0.10485303345611566, "grad_norm": 2.96875, "learning_rate": 4.872356883308084e-05, "loss": 0.8652, "step": 7394 }, { "epoch": 0.10488139510974187, "grad_norm": 2.984375, "learning_rate": 4.872286394482257e-05, "loss": 0.8498, "step": 7396 }, { "epoch": 0.10490975676336808, "grad_norm": 3.265625, "learning_rate": 4.87221588670882e-05, "loss": 0.9081, "step": 7398 }, { "epoch": 0.1049381184169943, "grad_norm": 3.453125, "learning_rate": 4.872145359988336e-05, "loss": 0.8792, "step": 7400 }, { "epoch": 0.10496648007062052, "grad_norm": 2.984375, "learning_rate": 4.872074814321369e-05, "loss": 0.871, "step": 7402 }, { "epoch": 0.10499484172424674, "grad_norm": 3.03125, "learning_rate": 4.8720042497084825e-05, "loss": 0.8327, "step": 7404 }, { "epoch": 0.10502320337787295, "grad_norm": 3.015625, "learning_rate": 4.871933666150239e-05, "loss": 0.8459, "step": 7406 }, { "epoch": 0.10505156503149916, "grad_norm": 2.796875, "learning_rate": 4.871863063647204e-05, "loss": 0.8134, "step": 7408 }, { "epoch": 0.10507992668512538, "grad_norm": 3.1875, "learning_rate": 4.8717924421999406e-05, "loss": 0.8849, "step": 7410 }, { "epoch": 0.10510828833875159, "grad_norm": 3.4375, "learning_rate": 4.871721801809013e-05, "loss": 0.8259, "step": 7412 }, { "epoch": 0.1051366499923778, "grad_norm": 3.296875, "learning_rate": 4.8716511424749845e-05, "loss": 0.9026, "step": 7414 }, { "epoch": 0.10516501164600402, "grad_norm": 3.09375, "learning_rate": 4.87158046419842e-05, "loss": 0.8755, "step": 7416 }, { "epoch": 0.10519337329963023, "grad_norm": 2.859375, "learning_rate": 4.871509766979884e-05, "loss": 0.8278, "step": 7418 }, { "epoch": 0.10522173495325644, "grad_norm": 3.390625, "learning_rate": 4.871439050819942e-05, "loss": 0.8896, "step": 7420 }, { "epoch": 0.10525009660688267, "grad_norm": 2.921875, "learning_rate": 4.871368315719158e-05, "loss": 0.8555, "step": 7422 }, { "epoch": 0.10527845826050888, "grad_norm": 2.859375, "learning_rate": 4.871297561678096e-05, "loss": 0.8728, "step": 7424 }, { "epoch": 0.1053068199141351, "grad_norm": 3.03125, "learning_rate": 4.871226788697324e-05, "loss": 0.881, "step": 7426 }, { "epoch": 0.10533518156776131, "grad_norm": 3.421875, "learning_rate": 4.871155996777404e-05, "loss": 0.8798, "step": 7428 }, { "epoch": 0.10536354322138752, "grad_norm": 2.875, "learning_rate": 4.871085185918904e-05, "loss": 0.7989, "step": 7430 }, { "epoch": 0.10539190487501374, "grad_norm": 2.71875, "learning_rate": 4.871014356122388e-05, "loss": 0.8545, "step": 7432 }, { "epoch": 0.10542026652863995, "grad_norm": 3.40625, "learning_rate": 4.8709435073884225e-05, "loss": 0.8969, "step": 7434 }, { "epoch": 0.10544862818226616, "grad_norm": 2.921875, "learning_rate": 4.870872639717572e-05, "loss": 0.8759, "step": 7436 }, { "epoch": 0.10547698983589238, "grad_norm": 3.03125, "learning_rate": 4.8708017531104046e-05, "loss": 0.8897, "step": 7438 }, { "epoch": 0.10550535148951859, "grad_norm": 2.75, "learning_rate": 4.8707308475674854e-05, "loss": 0.8757, "step": 7440 }, { "epoch": 0.10553371314314482, "grad_norm": 3.125, "learning_rate": 4.8706599230893805e-05, "loss": 0.9025, "step": 7442 }, { "epoch": 0.10556207479677103, "grad_norm": 3.359375, "learning_rate": 4.870588979676657e-05, "loss": 0.8409, "step": 7444 }, { "epoch": 0.10559043645039724, "grad_norm": 3.015625, "learning_rate": 4.870518017329881e-05, "loss": 0.8367, "step": 7446 }, { "epoch": 0.10561879810402346, "grad_norm": 3.34375, "learning_rate": 4.870447036049619e-05, "loss": 0.9288, "step": 7448 }, { "epoch": 0.10564715975764967, "grad_norm": 3.140625, "learning_rate": 4.870376035836439e-05, "loss": 0.8653, "step": 7450 }, { "epoch": 0.10567552141127588, "grad_norm": 2.96875, "learning_rate": 4.870305016690908e-05, "loss": 0.896, "step": 7452 }, { "epoch": 0.1057038830649021, "grad_norm": 2.859375, "learning_rate": 4.8702339786135916e-05, "loss": 0.8739, "step": 7454 }, { "epoch": 0.10573224471852831, "grad_norm": 3.234375, "learning_rate": 4.870162921605059e-05, "loss": 0.9137, "step": 7456 }, { "epoch": 0.10576060637215452, "grad_norm": 3.125, "learning_rate": 4.870091845665876e-05, "loss": 0.8627, "step": 7458 }, { "epoch": 0.10578896802578075, "grad_norm": 3.078125, "learning_rate": 4.870020750796613e-05, "loss": 0.861, "step": 7460 }, { "epoch": 0.10581732967940696, "grad_norm": 2.859375, "learning_rate": 4.8699496369978346e-05, "loss": 0.8924, "step": 7462 }, { "epoch": 0.10584569133303318, "grad_norm": 3.203125, "learning_rate": 4.8698785042701114e-05, "loss": 0.9369, "step": 7464 }, { "epoch": 0.10587405298665939, "grad_norm": 2.8125, "learning_rate": 4.8698073526140106e-05, "loss": 0.8787, "step": 7466 }, { "epoch": 0.1059024146402856, "grad_norm": 2.828125, "learning_rate": 4.8697361820301e-05, "loss": 0.8653, "step": 7468 }, { "epoch": 0.10593077629391182, "grad_norm": 3.234375, "learning_rate": 4.869664992518949e-05, "loss": 0.8889, "step": 7470 }, { "epoch": 0.10595913794753803, "grad_norm": 3.03125, "learning_rate": 4.869593784081124e-05, "loss": 0.8551, "step": 7472 }, { "epoch": 0.10598749960116424, "grad_norm": 2.953125, "learning_rate": 4.869522556717198e-05, "loss": 0.8701, "step": 7474 }, { "epoch": 0.10601586125479046, "grad_norm": 3.15625, "learning_rate": 4.8694513104277354e-05, "loss": 0.8334, "step": 7476 }, { "epoch": 0.10604422290841667, "grad_norm": 3.078125, "learning_rate": 4.869380045213307e-05, "loss": 0.8744, "step": 7478 }, { "epoch": 0.1060725845620429, "grad_norm": 2.796875, "learning_rate": 4.8693087610744834e-05, "loss": 0.8219, "step": 7480 }, { "epoch": 0.10610094621566911, "grad_norm": 3.53125, "learning_rate": 4.8692374580118314e-05, "loss": 0.8934, "step": 7482 }, { "epoch": 0.10612930786929532, "grad_norm": 3.296875, "learning_rate": 4.8691661360259226e-05, "loss": 0.8704, "step": 7484 }, { "epoch": 0.10615766952292154, "grad_norm": 2.90625, "learning_rate": 4.869094795117326e-05, "loss": 0.8408, "step": 7486 }, { "epoch": 0.10618603117654775, "grad_norm": 3.046875, "learning_rate": 4.86902343528661e-05, "loss": 0.8397, "step": 7488 }, { "epoch": 0.10621439283017396, "grad_norm": 3.1875, "learning_rate": 4.868952056534347e-05, "loss": 0.8923, "step": 7490 }, { "epoch": 0.10624275448380018, "grad_norm": 3.1875, "learning_rate": 4.868880658861106e-05, "loss": 0.8336, "step": 7492 }, { "epoch": 0.10627111613742639, "grad_norm": 3.203125, "learning_rate": 4.868809242267457e-05, "loss": 0.9178, "step": 7494 }, { "epoch": 0.1062994777910526, "grad_norm": 3.21875, "learning_rate": 4.86873780675397e-05, "loss": 0.8791, "step": 7496 }, { "epoch": 0.10632783944467883, "grad_norm": 3.4375, "learning_rate": 4.8686663523212165e-05, "loss": 0.9042, "step": 7498 }, { "epoch": 0.10635620109830504, "grad_norm": 3.078125, "learning_rate": 4.8685948789697665e-05, "loss": 0.9165, "step": 7500 }, { "epoch": 0.10638456275193126, "grad_norm": 2.75, "learning_rate": 4.868523386700192e-05, "loss": 0.867, "step": 7502 }, { "epoch": 0.10641292440555747, "grad_norm": 3.484375, "learning_rate": 4.8684518755130625e-05, "loss": 0.8907, "step": 7504 }, { "epoch": 0.10644128605918368, "grad_norm": 3.453125, "learning_rate": 4.86838034540895e-05, "loss": 0.819, "step": 7506 }, { "epoch": 0.1064696477128099, "grad_norm": 3.21875, "learning_rate": 4.8683087963884265e-05, "loss": 0.9127, "step": 7508 }, { "epoch": 0.10649800936643611, "grad_norm": 2.859375, "learning_rate": 4.868237228452062e-05, "loss": 0.8602, "step": 7510 }, { "epoch": 0.10652637102006232, "grad_norm": 2.90625, "learning_rate": 4.868165641600429e-05, "loss": 0.8292, "step": 7512 }, { "epoch": 0.10655473267368853, "grad_norm": 2.828125, "learning_rate": 4.868094035834099e-05, "loss": 0.814, "step": 7514 }, { "epoch": 0.10658309432731475, "grad_norm": 2.703125, "learning_rate": 4.8680224111536446e-05, "loss": 0.8579, "step": 7516 }, { "epoch": 0.10661145598094098, "grad_norm": 2.890625, "learning_rate": 4.867950767559636e-05, "loss": 0.8953, "step": 7518 }, { "epoch": 0.10663981763456719, "grad_norm": 3.03125, "learning_rate": 4.867879105052647e-05, "loss": 0.8894, "step": 7520 }, { "epoch": 0.1066681792881934, "grad_norm": 2.9375, "learning_rate": 4.8678074236332505e-05, "loss": 0.9014, "step": 7522 }, { "epoch": 0.10669654094181961, "grad_norm": 2.9375, "learning_rate": 4.867735723302017e-05, "loss": 0.8258, "step": 7524 }, { "epoch": 0.10672490259544583, "grad_norm": 3.203125, "learning_rate": 4.867664004059521e-05, "loss": 0.8979, "step": 7526 }, { "epoch": 0.10675326424907204, "grad_norm": 3.53125, "learning_rate": 4.8675922659063346e-05, "loss": 0.8396, "step": 7528 }, { "epoch": 0.10678162590269825, "grad_norm": 3.359375, "learning_rate": 4.867520508843031e-05, "loss": 0.8861, "step": 7530 }, { "epoch": 0.10680998755632447, "grad_norm": 2.90625, "learning_rate": 4.8674487328701836e-05, "loss": 0.8807, "step": 7532 }, { "epoch": 0.10683834920995068, "grad_norm": 3.203125, "learning_rate": 4.8673769379883635e-05, "loss": 0.8782, "step": 7534 }, { "epoch": 0.1068667108635769, "grad_norm": 2.953125, "learning_rate": 4.8673051241981476e-05, "loss": 0.9107, "step": 7536 }, { "epoch": 0.10689507251720312, "grad_norm": 2.875, "learning_rate": 4.867233291500107e-05, "loss": 0.82, "step": 7538 }, { "epoch": 0.10692343417082933, "grad_norm": 3.109375, "learning_rate": 4.867161439894817e-05, "loss": 0.8931, "step": 7540 }, { "epoch": 0.10695179582445555, "grad_norm": 3.21875, "learning_rate": 4.86708956938285e-05, "loss": 0.9229, "step": 7542 }, { "epoch": 0.10698015747808176, "grad_norm": 2.78125, "learning_rate": 4.867017679964781e-05, "loss": 0.8501, "step": 7544 }, { "epoch": 0.10700851913170797, "grad_norm": 3.28125, "learning_rate": 4.8669457716411835e-05, "loss": 0.8319, "step": 7546 }, { "epoch": 0.10703688078533419, "grad_norm": 3.109375, "learning_rate": 4.8668738444126324e-05, "loss": 0.9763, "step": 7548 }, { "epoch": 0.1070652424389604, "grad_norm": 2.953125, "learning_rate": 4.8668018982797026e-05, "loss": 0.9042, "step": 7550 }, { "epoch": 0.10709360409258661, "grad_norm": 2.71875, "learning_rate": 4.866729933242968e-05, "loss": 0.8283, "step": 7552 }, { "epoch": 0.10712196574621283, "grad_norm": 3.4375, "learning_rate": 4.866657949303003e-05, "loss": 0.8516, "step": 7554 }, { "epoch": 0.10715032739983905, "grad_norm": 3.078125, "learning_rate": 4.866585946460384e-05, "loss": 0.862, "step": 7556 }, { "epoch": 0.10717868905346527, "grad_norm": 2.90625, "learning_rate": 4.8665139247156844e-05, "loss": 0.8642, "step": 7558 }, { "epoch": 0.10720705070709148, "grad_norm": 2.953125, "learning_rate": 4.86644188406948e-05, "loss": 0.8646, "step": 7560 }, { "epoch": 0.1072354123607177, "grad_norm": 3.046875, "learning_rate": 4.8663698245223466e-05, "loss": 0.819, "step": 7562 }, { "epoch": 0.10726377401434391, "grad_norm": 3.109375, "learning_rate": 4.86629774607486e-05, "loss": 0.8754, "step": 7564 }, { "epoch": 0.10729213566797012, "grad_norm": 3.0625, "learning_rate": 4.866225648727595e-05, "loss": 0.9102, "step": 7566 }, { "epoch": 0.10732049732159633, "grad_norm": 3.15625, "learning_rate": 4.866153532481129e-05, "loss": 0.8569, "step": 7568 }, { "epoch": 0.10734885897522255, "grad_norm": 2.984375, "learning_rate": 4.866081397336035e-05, "loss": 0.8626, "step": 7570 }, { "epoch": 0.10737722062884876, "grad_norm": 3.140625, "learning_rate": 4.866009243292893e-05, "loss": 0.873, "step": 7572 }, { "epoch": 0.10740558228247497, "grad_norm": 2.921875, "learning_rate": 4.8659370703522766e-05, "loss": 0.8573, "step": 7574 }, { "epoch": 0.1074339439361012, "grad_norm": 3.359375, "learning_rate": 4.865864878514763e-05, "loss": 0.9001, "step": 7576 }, { "epoch": 0.10746230558972741, "grad_norm": 3.0625, "learning_rate": 4.865792667780928e-05, "loss": 0.8549, "step": 7578 }, { "epoch": 0.10749066724335363, "grad_norm": 2.6875, "learning_rate": 4.86572043815135e-05, "loss": 0.8648, "step": 7580 }, { "epoch": 0.10751902889697984, "grad_norm": 2.921875, "learning_rate": 4.865648189626605e-05, "loss": 0.9075, "step": 7582 }, { "epoch": 0.10754739055060605, "grad_norm": 2.609375, "learning_rate": 4.86557592220727e-05, "loss": 0.8659, "step": 7584 }, { "epoch": 0.10757575220423227, "grad_norm": 2.8125, "learning_rate": 4.865503635893923e-05, "loss": 0.8087, "step": 7586 }, { "epoch": 0.10760411385785848, "grad_norm": 3.109375, "learning_rate": 4.86543133068714e-05, "loss": 0.899, "step": 7588 }, { "epoch": 0.1076324755114847, "grad_norm": 3.453125, "learning_rate": 4.8653590065874996e-05, "loss": 0.8587, "step": 7590 }, { "epoch": 0.1076608371651109, "grad_norm": 2.828125, "learning_rate": 4.865286663595578e-05, "loss": 0.8757, "step": 7592 }, { "epoch": 0.10768919881873712, "grad_norm": 3.203125, "learning_rate": 4.865214301711956e-05, "loss": 0.9114, "step": 7594 }, { "epoch": 0.10771756047236335, "grad_norm": 3.25, "learning_rate": 4.8651419209372076e-05, "loss": 0.8567, "step": 7596 }, { "epoch": 0.10774592212598956, "grad_norm": 3.015625, "learning_rate": 4.8650695212719136e-05, "loss": 0.8875, "step": 7598 }, { "epoch": 0.10777428377961577, "grad_norm": 3.109375, "learning_rate": 4.864997102716652e-05, "loss": 0.8102, "step": 7600 }, { "epoch": 0.10780264543324199, "grad_norm": 3.21875, "learning_rate": 4.8649246652720005e-05, "loss": 0.8942, "step": 7602 }, { "epoch": 0.1078310070868682, "grad_norm": 3.90625, "learning_rate": 4.864852208938538e-05, "loss": 0.8921, "step": 7604 }, { "epoch": 0.10785936874049441, "grad_norm": 2.890625, "learning_rate": 4.8647797337168435e-05, "loss": 0.8501, "step": 7606 }, { "epoch": 0.10788773039412063, "grad_norm": 2.765625, "learning_rate": 4.864707239607495e-05, "loss": 0.8539, "step": 7608 }, { "epoch": 0.10791609204774684, "grad_norm": 3.0625, "learning_rate": 4.864634726611072e-05, "loss": 0.867, "step": 7610 }, { "epoch": 0.10794445370137305, "grad_norm": 3.125, "learning_rate": 4.864562194728154e-05, "loss": 0.861, "step": 7612 }, { "epoch": 0.10797281535499928, "grad_norm": 2.921875, "learning_rate": 4.86448964395932e-05, "loss": 0.8554, "step": 7614 }, { "epoch": 0.10800117700862549, "grad_norm": 3.21875, "learning_rate": 4.8644170743051497e-05, "loss": 0.8579, "step": 7616 }, { "epoch": 0.1080295386622517, "grad_norm": 3.890625, "learning_rate": 4.8643444857662224e-05, "loss": 0.859, "step": 7618 }, { "epoch": 0.10805790031587792, "grad_norm": 3.4375, "learning_rate": 4.864271878343117e-05, "loss": 0.9224, "step": 7620 }, { "epoch": 0.10808626196950413, "grad_norm": 3.0625, "learning_rate": 4.864199252036415e-05, "loss": 0.9292, "step": 7622 }, { "epoch": 0.10811462362313035, "grad_norm": 3.046875, "learning_rate": 4.864126606846697e-05, "loss": 0.9004, "step": 7624 }, { "epoch": 0.10814298527675656, "grad_norm": 3.078125, "learning_rate": 4.8640539427745414e-05, "loss": 0.8514, "step": 7626 }, { "epoch": 0.10817134693038277, "grad_norm": 3.265625, "learning_rate": 4.863981259820529e-05, "loss": 0.8948, "step": 7628 }, { "epoch": 0.10819970858400899, "grad_norm": 3.03125, "learning_rate": 4.863908557985241e-05, "loss": 0.8709, "step": 7630 }, { "epoch": 0.1082280702376352, "grad_norm": 2.921875, "learning_rate": 4.863835837269257e-05, "loss": 0.8388, "step": 7632 }, { "epoch": 0.10825643189126143, "grad_norm": 2.796875, "learning_rate": 4.863763097673159e-05, "loss": 0.8828, "step": 7634 }, { "epoch": 0.10828479354488764, "grad_norm": 2.875, "learning_rate": 4.8636903391975274e-05, "loss": 0.8438, "step": 7636 }, { "epoch": 0.10831315519851385, "grad_norm": 3.046875, "learning_rate": 4.863617561842943e-05, "loss": 0.8806, "step": 7638 }, { "epoch": 0.10834151685214007, "grad_norm": 2.890625, "learning_rate": 4.863544765609988e-05, "loss": 0.8787, "step": 7640 }, { "epoch": 0.10836987850576628, "grad_norm": 2.984375, "learning_rate": 4.863471950499243e-05, "loss": 0.8523, "step": 7642 }, { "epoch": 0.10839824015939249, "grad_norm": 2.71875, "learning_rate": 4.86339911651129e-05, "loss": 0.8835, "step": 7644 }, { "epoch": 0.1084266018130187, "grad_norm": 3.40625, "learning_rate": 4.863326263646711e-05, "loss": 0.8293, "step": 7646 }, { "epoch": 0.10845496346664492, "grad_norm": 3.328125, "learning_rate": 4.863253391906087e-05, "loss": 0.8707, "step": 7648 }, { "epoch": 0.10848332512027113, "grad_norm": 3.25, "learning_rate": 4.863180501290001e-05, "loss": 0.8456, "step": 7650 }, { "epoch": 0.10851168677389736, "grad_norm": 3.140625, "learning_rate": 4.863107591799034e-05, "loss": 0.8222, "step": 7652 }, { "epoch": 0.10854004842752357, "grad_norm": 2.828125, "learning_rate": 4.8630346634337696e-05, "loss": 0.8754, "step": 7654 }, { "epoch": 0.10856841008114979, "grad_norm": 2.765625, "learning_rate": 4.86296171619479e-05, "loss": 0.808, "step": 7656 }, { "epoch": 0.108596771734776, "grad_norm": 3.140625, "learning_rate": 4.862888750082677e-05, "loss": 0.8544, "step": 7658 }, { "epoch": 0.10862513338840221, "grad_norm": 3.21875, "learning_rate": 4.862815765098014e-05, "loss": 0.9172, "step": 7660 }, { "epoch": 0.10865349504202843, "grad_norm": 3.421875, "learning_rate": 4.862742761241384e-05, "loss": 0.8594, "step": 7662 }, { "epoch": 0.10868185669565464, "grad_norm": 2.96875, "learning_rate": 4.86266973851337e-05, "loss": 0.8896, "step": 7664 }, { "epoch": 0.10871021834928085, "grad_norm": 3.078125, "learning_rate": 4.862596696914555e-05, "loss": 0.8482, "step": 7666 }, { "epoch": 0.10873858000290706, "grad_norm": 2.90625, "learning_rate": 4.8625236364455226e-05, "loss": 0.823, "step": 7668 }, { "epoch": 0.10876694165653328, "grad_norm": 3.1875, "learning_rate": 4.8624505571068565e-05, "loss": 0.8739, "step": 7670 }, { "epoch": 0.1087953033101595, "grad_norm": 3.078125, "learning_rate": 4.86237745889914e-05, "loss": 0.8752, "step": 7672 }, { "epoch": 0.10882366496378572, "grad_norm": 3.203125, "learning_rate": 4.862304341822958e-05, "loss": 0.8367, "step": 7674 }, { "epoch": 0.10885202661741193, "grad_norm": 3.046875, "learning_rate": 4.8622312058788935e-05, "loss": 0.841, "step": 7676 }, { "epoch": 0.10888038827103814, "grad_norm": 2.921875, "learning_rate": 4.8621580510675304e-05, "loss": 0.8829, "step": 7678 }, { "epoch": 0.10890874992466436, "grad_norm": 2.828125, "learning_rate": 4.862084877389453e-05, "loss": 0.8568, "step": 7680 }, { "epoch": 0.10893711157829057, "grad_norm": 3.0625, "learning_rate": 4.862011684845246e-05, "loss": 0.9124, "step": 7682 }, { "epoch": 0.10896547323191678, "grad_norm": 3.171875, "learning_rate": 4.8619384734354946e-05, "loss": 0.8851, "step": 7684 }, { "epoch": 0.108993834885543, "grad_norm": 3.28125, "learning_rate": 4.861865243160783e-05, "loss": 0.8675, "step": 7686 }, { "epoch": 0.10902219653916921, "grad_norm": 2.953125, "learning_rate": 4.861791994021696e-05, "loss": 0.8901, "step": 7688 }, { "epoch": 0.10905055819279542, "grad_norm": 3.0625, "learning_rate": 4.861718726018819e-05, "loss": 0.8517, "step": 7690 }, { "epoch": 0.10907891984642165, "grad_norm": 3.15625, "learning_rate": 4.861645439152738e-05, "loss": 0.8966, "step": 7692 }, { "epoch": 0.10910728150004786, "grad_norm": 2.828125, "learning_rate": 4.861572133424036e-05, "loss": 0.8769, "step": 7694 }, { "epoch": 0.10913564315367408, "grad_norm": 3.0, "learning_rate": 4.8614988088333e-05, "loss": 0.8444, "step": 7696 }, { "epoch": 0.10916400480730029, "grad_norm": 3.140625, "learning_rate": 4.861425465381115e-05, "loss": 0.8887, "step": 7698 }, { "epoch": 0.1091923664609265, "grad_norm": 2.859375, "learning_rate": 4.861352103068069e-05, "loss": 0.8668, "step": 7700 }, { "epoch": 0.10922072811455272, "grad_norm": 3.265625, "learning_rate": 4.8612787218947454e-05, "loss": 0.8787, "step": 7702 }, { "epoch": 0.10924908976817893, "grad_norm": 3.28125, "learning_rate": 4.8612053218617306e-05, "loss": 0.9003, "step": 7704 }, { "epoch": 0.10927745142180514, "grad_norm": 3.109375, "learning_rate": 4.861131902969612e-05, "loss": 0.8872, "step": 7706 }, { "epoch": 0.10930581307543136, "grad_norm": 2.9375, "learning_rate": 4.861058465218975e-05, "loss": 0.8587, "step": 7708 }, { "epoch": 0.10933417472905758, "grad_norm": 2.765625, "learning_rate": 4.860985008610407e-05, "loss": 0.8539, "step": 7710 }, { "epoch": 0.1093625363826838, "grad_norm": 3.03125, "learning_rate": 4.860911533144494e-05, "loss": 0.8328, "step": 7712 }, { "epoch": 0.10939089803631001, "grad_norm": 3.421875, "learning_rate": 4.860838038821823e-05, "loss": 0.8593, "step": 7714 }, { "epoch": 0.10941925968993622, "grad_norm": 2.921875, "learning_rate": 4.8607645256429814e-05, "loss": 0.8849, "step": 7716 }, { "epoch": 0.10944762134356244, "grad_norm": 3.25, "learning_rate": 4.860690993608556e-05, "loss": 0.8658, "step": 7718 }, { "epoch": 0.10947598299718865, "grad_norm": 3.265625, "learning_rate": 4.860617442719134e-05, "loss": 0.8538, "step": 7720 }, { "epoch": 0.10950434465081486, "grad_norm": 2.890625, "learning_rate": 4.860543872975303e-05, "loss": 0.8457, "step": 7722 }, { "epoch": 0.10953270630444108, "grad_norm": 2.875, "learning_rate": 4.86047028437765e-05, "loss": 0.8489, "step": 7724 }, { "epoch": 0.10956106795806729, "grad_norm": 2.90625, "learning_rate": 4.860396676926765e-05, "loss": 0.9111, "step": 7726 }, { "epoch": 0.1095894296116935, "grad_norm": 3.21875, "learning_rate": 4.8603230506232335e-05, "loss": 0.8974, "step": 7728 }, { "epoch": 0.10961779126531973, "grad_norm": 2.9375, "learning_rate": 4.860249405467644e-05, "loss": 0.8547, "step": 7730 }, { "epoch": 0.10964615291894594, "grad_norm": 2.96875, "learning_rate": 4.860175741460585e-05, "loss": 0.9357, "step": 7732 }, { "epoch": 0.10967451457257216, "grad_norm": 3.34375, "learning_rate": 4.860102058602646e-05, "loss": 0.8819, "step": 7734 }, { "epoch": 0.10970287622619837, "grad_norm": 3.65625, "learning_rate": 4.860028356894414e-05, "loss": 0.875, "step": 7736 }, { "epoch": 0.10973123787982458, "grad_norm": 3.609375, "learning_rate": 4.859954636336478e-05, "loss": 0.8494, "step": 7738 }, { "epoch": 0.1097595995334508, "grad_norm": 3.203125, "learning_rate": 4.859880896929426e-05, "loss": 0.9084, "step": 7740 }, { "epoch": 0.10978796118707701, "grad_norm": 3.296875, "learning_rate": 4.8598071386738485e-05, "loss": 0.8661, "step": 7742 }, { "epoch": 0.10981632284070322, "grad_norm": 2.90625, "learning_rate": 4.859733361570334e-05, "loss": 0.8116, "step": 7744 }, { "epoch": 0.10984468449432944, "grad_norm": 3.171875, "learning_rate": 4.8596595656194725e-05, "loss": 0.8557, "step": 7746 }, { "epoch": 0.10987304614795565, "grad_norm": 2.78125, "learning_rate": 4.8595857508218524e-05, "loss": 0.821, "step": 7748 }, { "epoch": 0.10990140780158188, "grad_norm": 3.15625, "learning_rate": 4.859511917178063e-05, "loss": 0.8575, "step": 7750 }, { "epoch": 0.10992976945520809, "grad_norm": 3.09375, "learning_rate": 4.8594380646886945e-05, "loss": 0.8532, "step": 7752 }, { "epoch": 0.1099581311088343, "grad_norm": 3.09375, "learning_rate": 4.8593641933543364e-05, "loss": 0.9046, "step": 7754 }, { "epoch": 0.10998649276246052, "grad_norm": 2.90625, "learning_rate": 4.85929030317558e-05, "loss": 0.8357, "step": 7756 }, { "epoch": 0.11001485441608673, "grad_norm": 2.84375, "learning_rate": 4.859216394153014e-05, "loss": 0.8932, "step": 7758 }, { "epoch": 0.11004321606971294, "grad_norm": 3.09375, "learning_rate": 4.85914246628723e-05, "loss": 0.8889, "step": 7760 }, { "epoch": 0.11007157772333916, "grad_norm": 3.3125, "learning_rate": 4.859068519578818e-05, "loss": 0.8687, "step": 7762 }, { "epoch": 0.11009993937696537, "grad_norm": 3.03125, "learning_rate": 4.858994554028367e-05, "loss": 0.8622, "step": 7764 }, { "epoch": 0.11012830103059158, "grad_norm": 3.359375, "learning_rate": 4.8589205696364705e-05, "loss": 0.8766, "step": 7766 }, { "epoch": 0.11015666268421781, "grad_norm": 3.03125, "learning_rate": 4.858846566403717e-05, "loss": 0.8323, "step": 7768 }, { "epoch": 0.11018502433784402, "grad_norm": 3.359375, "learning_rate": 4.858772544330699e-05, "loss": 0.8889, "step": 7770 }, { "epoch": 0.11021338599147024, "grad_norm": 3.0625, "learning_rate": 4.8586985034180076e-05, "loss": 0.8716, "step": 7772 }, { "epoch": 0.11024174764509645, "grad_norm": 3.28125, "learning_rate": 4.8586244436662336e-05, "loss": 0.8842, "step": 7774 }, { "epoch": 0.11027010929872266, "grad_norm": 2.859375, "learning_rate": 4.858550365075969e-05, "loss": 0.8635, "step": 7776 }, { "epoch": 0.11029847095234888, "grad_norm": 3.046875, "learning_rate": 4.858476267647806e-05, "loss": 0.8807, "step": 7778 }, { "epoch": 0.11032683260597509, "grad_norm": 2.828125, "learning_rate": 4.8584021513823344e-05, "loss": 0.8731, "step": 7780 }, { "epoch": 0.1103551942596013, "grad_norm": 3.578125, "learning_rate": 4.858328016280148e-05, "loss": 0.9016, "step": 7782 }, { "epoch": 0.11038355591322752, "grad_norm": 3.125, "learning_rate": 4.8582538623418385e-05, "loss": 0.8853, "step": 7784 }, { "epoch": 0.11041191756685373, "grad_norm": 2.75, "learning_rate": 4.8581796895679986e-05, "loss": 0.7872, "step": 7786 }, { "epoch": 0.11044027922047996, "grad_norm": 3.0625, "learning_rate": 4.85810549795922e-05, "loss": 0.8786, "step": 7788 }, { "epoch": 0.11046864087410617, "grad_norm": 2.78125, "learning_rate": 4.858031287516095e-05, "loss": 0.8093, "step": 7790 }, { "epoch": 0.11049700252773238, "grad_norm": 3.21875, "learning_rate": 4.8579570582392176e-05, "loss": 0.8321, "step": 7792 }, { "epoch": 0.1105253641813586, "grad_norm": 2.609375, "learning_rate": 4.85788281012918e-05, "loss": 0.812, "step": 7794 }, { "epoch": 0.11055372583498481, "grad_norm": 3.109375, "learning_rate": 4.857808543186574e-05, "loss": 0.8467, "step": 7796 }, { "epoch": 0.11058208748861102, "grad_norm": 2.921875, "learning_rate": 4.857734257411994e-05, "loss": 0.8737, "step": 7798 }, { "epoch": 0.11061044914223724, "grad_norm": 2.9375, "learning_rate": 4.857659952806034e-05, "loss": 0.8156, "step": 7800 }, { "epoch": 0.11063881079586345, "grad_norm": 3.140625, "learning_rate": 4.857585629369287e-05, "loss": 0.8812, "step": 7802 }, { "epoch": 0.11066717244948966, "grad_norm": 3.25, "learning_rate": 4.8575112871023456e-05, "loss": 0.8817, "step": 7804 }, { "epoch": 0.11069553410311589, "grad_norm": 3.125, "learning_rate": 4.8574369260058036e-05, "loss": 0.8363, "step": 7806 }, { "epoch": 0.1107238957567421, "grad_norm": 2.984375, "learning_rate": 4.857362546080257e-05, "loss": 0.8146, "step": 7808 }, { "epoch": 0.11075225741036832, "grad_norm": 3.03125, "learning_rate": 4.8572881473262975e-05, "loss": 0.8528, "step": 7810 }, { "epoch": 0.11078061906399453, "grad_norm": 3.109375, "learning_rate": 4.857213729744521e-05, "loss": 0.8651, "step": 7812 }, { "epoch": 0.11080898071762074, "grad_norm": 3.03125, "learning_rate": 4.857139293335521e-05, "loss": 0.8672, "step": 7814 }, { "epoch": 0.11083734237124696, "grad_norm": 2.9375, "learning_rate": 4.8570648380998926e-05, "loss": 0.8589, "step": 7816 }, { "epoch": 0.11086570402487317, "grad_norm": 3.203125, "learning_rate": 4.85699036403823e-05, "loss": 0.8626, "step": 7818 }, { "epoch": 0.11089406567849938, "grad_norm": 3.3125, "learning_rate": 4.8569158711511275e-05, "loss": 0.9068, "step": 7820 }, { "epoch": 0.1109224273321256, "grad_norm": 3.21875, "learning_rate": 4.8568413594391814e-05, "loss": 0.8506, "step": 7822 }, { "epoch": 0.11095078898575181, "grad_norm": 3.453125, "learning_rate": 4.856766828902986e-05, "loss": 0.8823, "step": 7824 }, { "epoch": 0.11097915063937804, "grad_norm": 3.328125, "learning_rate": 4.8566922795431366e-05, "loss": 0.8587, "step": 7826 }, { "epoch": 0.11100751229300425, "grad_norm": 3.015625, "learning_rate": 4.856617711360229e-05, "loss": 0.8912, "step": 7828 }, { "epoch": 0.11103587394663046, "grad_norm": 2.859375, "learning_rate": 4.856543124354858e-05, "loss": 0.8665, "step": 7830 }, { "epoch": 0.11106423560025667, "grad_norm": 2.890625, "learning_rate": 4.8564685185276204e-05, "loss": 0.8964, "step": 7832 }, { "epoch": 0.11109259725388289, "grad_norm": 3.1875, "learning_rate": 4.856393893879111e-05, "loss": 0.8733, "step": 7834 }, { "epoch": 0.1111209589075091, "grad_norm": 3.03125, "learning_rate": 4.856319250409927e-05, "loss": 0.8829, "step": 7836 }, { "epoch": 0.11114932056113531, "grad_norm": 3.03125, "learning_rate": 4.8562445881206645e-05, "loss": 0.8341, "step": 7838 }, { "epoch": 0.11117768221476153, "grad_norm": 3.1875, "learning_rate": 4.856169907011918e-05, "loss": 0.8954, "step": 7840 }, { "epoch": 0.11120604386838774, "grad_norm": 3.625, "learning_rate": 4.856095207084286e-05, "loss": 0.9248, "step": 7842 }, { "epoch": 0.11123440552201395, "grad_norm": 3.46875, "learning_rate": 4.8560204883383645e-05, "loss": 0.8557, "step": 7844 }, { "epoch": 0.11126276717564018, "grad_norm": 2.984375, "learning_rate": 4.85594575077475e-05, "loss": 0.8953, "step": 7846 }, { "epoch": 0.1112911288292664, "grad_norm": 2.8125, "learning_rate": 4.85587099439404e-05, "loss": 0.8676, "step": 7848 }, { "epoch": 0.11131949048289261, "grad_norm": 2.859375, "learning_rate": 4.855796219196831e-05, "loss": 0.8728, "step": 7850 }, { "epoch": 0.11134785213651882, "grad_norm": 2.640625, "learning_rate": 4.85572142518372e-05, "loss": 0.8341, "step": 7852 }, { "epoch": 0.11137621379014503, "grad_norm": 2.90625, "learning_rate": 4.855646612355306e-05, "loss": 0.7628, "step": 7854 }, { "epoch": 0.11140457544377125, "grad_norm": 3.171875, "learning_rate": 4.855571780712184e-05, "loss": 0.8515, "step": 7856 }, { "epoch": 0.11143293709739746, "grad_norm": 3.140625, "learning_rate": 4.855496930254954e-05, "loss": 0.8772, "step": 7858 }, { "epoch": 0.11146129875102367, "grad_norm": 3.171875, "learning_rate": 4.8554220609842124e-05, "loss": 0.8694, "step": 7860 }, { "epoch": 0.11148966040464989, "grad_norm": 2.984375, "learning_rate": 4.8553471729005577e-05, "loss": 0.8692, "step": 7862 }, { "epoch": 0.11151802205827611, "grad_norm": 3.15625, "learning_rate": 4.8552722660045884e-05, "loss": 0.8998, "step": 7864 }, { "epoch": 0.11154638371190233, "grad_norm": 3.1875, "learning_rate": 4.855197340296902e-05, "loss": 0.8899, "step": 7866 }, { "epoch": 0.11157474536552854, "grad_norm": 3.0, "learning_rate": 4.855122395778098e-05, "loss": 0.8794, "step": 7868 }, { "epoch": 0.11160310701915475, "grad_norm": 2.96875, "learning_rate": 4.8550474324487735e-05, "loss": 0.8914, "step": 7870 }, { "epoch": 0.11163146867278097, "grad_norm": 3.3125, "learning_rate": 4.8549724503095286e-05, "loss": 0.861, "step": 7872 }, { "epoch": 0.11165983032640718, "grad_norm": 3.0625, "learning_rate": 4.854897449360962e-05, "loss": 0.847, "step": 7874 }, { "epoch": 0.1116881919800334, "grad_norm": 2.890625, "learning_rate": 4.854822429603672e-05, "loss": 0.8836, "step": 7876 }, { "epoch": 0.11171655363365961, "grad_norm": 2.875, "learning_rate": 4.854747391038258e-05, "loss": 0.8499, "step": 7878 }, { "epoch": 0.11174491528728582, "grad_norm": 2.9375, "learning_rate": 4.85467233366532e-05, "loss": 0.9359, "step": 7880 }, { "epoch": 0.11177327694091203, "grad_norm": 3.359375, "learning_rate": 4.854597257485456e-05, "loss": 0.9127, "step": 7882 }, { "epoch": 0.11180163859453826, "grad_norm": 3.171875, "learning_rate": 4.8545221624992674e-05, "loss": 0.9026, "step": 7884 }, { "epoch": 0.11183000024816447, "grad_norm": 3.046875, "learning_rate": 4.854447048707354e-05, "loss": 0.8246, "step": 7886 }, { "epoch": 0.11185836190179069, "grad_norm": 3.28125, "learning_rate": 4.854371916110313e-05, "loss": 0.8877, "step": 7888 }, { "epoch": 0.1118867235554169, "grad_norm": 2.9375, "learning_rate": 4.854296764708748e-05, "loss": 0.8906, "step": 7890 }, { "epoch": 0.11191508520904311, "grad_norm": 2.96875, "learning_rate": 4.854221594503258e-05, "loss": 0.874, "step": 7892 }, { "epoch": 0.11194344686266933, "grad_norm": 3.015625, "learning_rate": 4.8541464054944416e-05, "loss": 0.876, "step": 7894 }, { "epoch": 0.11197180851629554, "grad_norm": 3.3125, "learning_rate": 4.854071197682902e-05, "loss": 0.8959, "step": 7896 }, { "epoch": 0.11200017016992175, "grad_norm": 3.203125, "learning_rate": 4.8539959710692383e-05, "loss": 0.8503, "step": 7898 }, { "epoch": 0.11202853182354797, "grad_norm": 3.0, "learning_rate": 4.853920725654052e-05, "loss": 0.8716, "step": 7900 }, { "epoch": 0.11205689347717418, "grad_norm": 2.828125, "learning_rate": 4.8538454614379435e-05, "loss": 0.8459, "step": 7902 }, { "epoch": 0.1120852551308004, "grad_norm": 2.84375, "learning_rate": 4.8537701784215145e-05, "loss": 0.8725, "step": 7904 }, { "epoch": 0.11211361678442662, "grad_norm": 3.578125, "learning_rate": 4.853694876605366e-05, "loss": 0.8952, "step": 7906 }, { "epoch": 0.11214197843805283, "grad_norm": 2.9375, "learning_rate": 4.8536195559901e-05, "loss": 0.8656, "step": 7908 }, { "epoch": 0.11217034009167905, "grad_norm": 3.359375, "learning_rate": 4.8535442165763166e-05, "loss": 0.9008, "step": 7910 }, { "epoch": 0.11219870174530526, "grad_norm": 2.71875, "learning_rate": 4.853468858364619e-05, "loss": 0.8839, "step": 7912 }, { "epoch": 0.11222706339893147, "grad_norm": 3.15625, "learning_rate": 4.853393481355609e-05, "loss": 0.9129, "step": 7914 }, { "epoch": 0.11225542505255769, "grad_norm": 2.671875, "learning_rate": 4.853318085549888e-05, "loss": 0.868, "step": 7916 }, { "epoch": 0.1122837867061839, "grad_norm": 2.875, "learning_rate": 4.853242670948059e-05, "loss": 0.8683, "step": 7918 }, { "epoch": 0.11231214835981011, "grad_norm": 3.234375, "learning_rate": 4.8531672375507234e-05, "loss": 0.8505, "step": 7920 }, { "epoch": 0.11234051001343634, "grad_norm": 3.421875, "learning_rate": 4.853091785358485e-05, "loss": 0.9313, "step": 7922 }, { "epoch": 0.11236887166706255, "grad_norm": 3.0625, "learning_rate": 4.8530163143719445e-05, "loss": 0.8849, "step": 7924 }, { "epoch": 0.11239723332068877, "grad_norm": 3.109375, "learning_rate": 4.8529408245917055e-05, "loss": 0.8613, "step": 7926 }, { "epoch": 0.11242559497431498, "grad_norm": 3.125, "learning_rate": 4.8528653160183715e-05, "loss": 0.9259, "step": 7928 }, { "epoch": 0.11245395662794119, "grad_norm": 3.484375, "learning_rate": 4.852789788652546e-05, "loss": 0.8758, "step": 7930 }, { "epoch": 0.1124823182815674, "grad_norm": 3.53125, "learning_rate": 4.8527142424948305e-05, "loss": 0.8767, "step": 7932 }, { "epoch": 0.11251067993519362, "grad_norm": 2.90625, "learning_rate": 4.8526386775458296e-05, "loss": 0.8126, "step": 7934 }, { "epoch": 0.11253904158881983, "grad_norm": 2.859375, "learning_rate": 4.8525630938061475e-05, "loss": 0.8656, "step": 7936 }, { "epoch": 0.11256740324244605, "grad_norm": 3.203125, "learning_rate": 4.8524874912763865e-05, "loss": 0.9178, "step": 7938 }, { "epoch": 0.11259576489607226, "grad_norm": 3.171875, "learning_rate": 4.8524118699571505e-05, "loss": 0.929, "step": 7940 }, { "epoch": 0.11262412654969849, "grad_norm": 2.890625, "learning_rate": 4.8523362298490446e-05, "loss": 0.8987, "step": 7942 }, { "epoch": 0.1126524882033247, "grad_norm": 3.125, "learning_rate": 4.8522605709526725e-05, "loss": 0.8599, "step": 7944 }, { "epoch": 0.11268084985695091, "grad_norm": 2.859375, "learning_rate": 4.852184893268638e-05, "loss": 0.8894, "step": 7946 }, { "epoch": 0.11270921151057713, "grad_norm": 3.078125, "learning_rate": 4.852109196797546e-05, "loss": 0.8124, "step": 7948 }, { "epoch": 0.11273757316420334, "grad_norm": 3.234375, "learning_rate": 4.852033481540001e-05, "loss": 0.8303, "step": 7950 }, { "epoch": 0.11276593481782955, "grad_norm": 3.203125, "learning_rate": 4.8519577474966074e-05, "loss": 0.8817, "step": 7952 }, { "epoch": 0.11279429647145577, "grad_norm": 3.234375, "learning_rate": 4.8518819946679705e-05, "loss": 0.8241, "step": 7954 }, { "epoch": 0.11282265812508198, "grad_norm": 3.34375, "learning_rate": 4.8518062230546954e-05, "loss": 0.9189, "step": 7956 }, { "epoch": 0.11285101977870819, "grad_norm": 3.015625, "learning_rate": 4.851730432657387e-05, "loss": 0.8897, "step": 7958 }, { "epoch": 0.11287938143233442, "grad_norm": 2.984375, "learning_rate": 4.8516546234766506e-05, "loss": 0.8995, "step": 7960 }, { "epoch": 0.11290774308596063, "grad_norm": 2.96875, "learning_rate": 4.851578795513092e-05, "loss": 0.8992, "step": 7962 }, { "epoch": 0.11293610473958685, "grad_norm": 3.015625, "learning_rate": 4.8515029487673166e-05, "loss": 0.9026, "step": 7964 }, { "epoch": 0.11296446639321306, "grad_norm": 2.9375, "learning_rate": 4.851427083239931e-05, "loss": 0.9006, "step": 7966 }, { "epoch": 0.11299282804683927, "grad_norm": 3.0, "learning_rate": 4.85135119893154e-05, "loss": 0.9256, "step": 7968 }, { "epoch": 0.11302118970046549, "grad_norm": 3.265625, "learning_rate": 4.85127529584275e-05, "loss": 0.9173, "step": 7970 }, { "epoch": 0.1130495513540917, "grad_norm": 3.109375, "learning_rate": 4.8511993739741676e-05, "loss": 0.8615, "step": 7972 }, { "epoch": 0.11307791300771791, "grad_norm": 3.125, "learning_rate": 4.8511234333263985e-05, "loss": 0.8467, "step": 7974 }, { "epoch": 0.11310627466134412, "grad_norm": 3.078125, "learning_rate": 4.85104747390005e-05, "loss": 0.8834, "step": 7976 }, { "epoch": 0.11313463631497034, "grad_norm": 3.15625, "learning_rate": 4.8509714956957284e-05, "loss": 0.9022, "step": 7978 }, { "epoch": 0.11316299796859657, "grad_norm": 3.21875, "learning_rate": 4.850895498714041e-05, "loss": 0.8876, "step": 7980 }, { "epoch": 0.11319135962222278, "grad_norm": 3.21875, "learning_rate": 4.850819482955594e-05, "loss": 0.8673, "step": 7982 }, { "epoch": 0.11321972127584899, "grad_norm": 3.25, "learning_rate": 4.850743448420995e-05, "loss": 0.8536, "step": 7984 }, { "epoch": 0.1132480829294752, "grad_norm": 3.34375, "learning_rate": 4.8506673951108516e-05, "loss": 0.8754, "step": 7986 }, { "epoch": 0.11327644458310142, "grad_norm": 2.84375, "learning_rate": 4.850591323025771e-05, "loss": 0.8892, "step": 7988 }, { "epoch": 0.11330480623672763, "grad_norm": 3.265625, "learning_rate": 4.850515232166361e-05, "loss": 0.8894, "step": 7990 }, { "epoch": 0.11333316789035384, "grad_norm": 3.375, "learning_rate": 4.8504391225332277e-05, "loss": 0.9214, "step": 7992 }, { "epoch": 0.11336152954398006, "grad_norm": 3.75, "learning_rate": 4.8503629941269815e-05, "loss": 0.8862, "step": 7994 }, { "epoch": 0.11338989119760627, "grad_norm": 3.65625, "learning_rate": 4.8502868469482294e-05, "loss": 0.8707, "step": 7996 }, { "epoch": 0.11341825285123248, "grad_norm": 2.96875, "learning_rate": 4.850210680997579e-05, "loss": 0.9019, "step": 7998 }, { "epoch": 0.11344661450485871, "grad_norm": 3.109375, "learning_rate": 4.850134496275639e-05, "loss": 0.917, "step": 8000 }, { "epoch": 0.11347497615848492, "grad_norm": 3.0, "learning_rate": 4.8500582927830185e-05, "loss": 0.8535, "step": 8002 }, { "epoch": 0.11350333781211114, "grad_norm": 2.828125, "learning_rate": 4.849982070520326e-05, "loss": 0.9012, "step": 8004 }, { "epoch": 0.11353169946573735, "grad_norm": 2.984375, "learning_rate": 4.849905829488169e-05, "loss": 0.8476, "step": 8006 }, { "epoch": 0.11356006111936356, "grad_norm": 3.25, "learning_rate": 4.849829569687158e-05, "loss": 0.8604, "step": 8008 }, { "epoch": 0.11358842277298978, "grad_norm": 3.171875, "learning_rate": 4.849753291117901e-05, "loss": 0.8776, "step": 8010 }, { "epoch": 0.11361678442661599, "grad_norm": 2.953125, "learning_rate": 4.849676993781008e-05, "loss": 0.8435, "step": 8012 }, { "epoch": 0.1136451460802422, "grad_norm": 3.171875, "learning_rate": 4.849600677677089e-05, "loss": 0.7998, "step": 8014 }, { "epoch": 0.11367350773386842, "grad_norm": 2.96875, "learning_rate": 4.849524342806751e-05, "loss": 0.9253, "step": 8016 }, { "epoch": 0.11370186938749464, "grad_norm": 3.5625, "learning_rate": 4.8494479891706065e-05, "loss": 0.8453, "step": 8018 }, { "epoch": 0.11373023104112086, "grad_norm": 3.03125, "learning_rate": 4.849371616769264e-05, "loss": 0.8574, "step": 8020 }, { "epoch": 0.11375859269474707, "grad_norm": 3.0, "learning_rate": 4.8492952256033333e-05, "loss": 0.9206, "step": 8022 }, { "epoch": 0.11378695434837328, "grad_norm": 3.0, "learning_rate": 4.8492188156734255e-05, "loss": 0.8401, "step": 8024 }, { "epoch": 0.1138153160019995, "grad_norm": 3.234375, "learning_rate": 4.84914238698015e-05, "loss": 0.8721, "step": 8026 }, { "epoch": 0.11384367765562571, "grad_norm": 3.265625, "learning_rate": 4.849065939524117e-05, "loss": 0.8884, "step": 8028 }, { "epoch": 0.11387203930925192, "grad_norm": 2.84375, "learning_rate": 4.848989473305938e-05, "loss": 0.8851, "step": 8030 }, { "epoch": 0.11390040096287814, "grad_norm": 2.984375, "learning_rate": 4.848912988326224e-05, "loss": 0.8398, "step": 8032 }, { "epoch": 0.11392876261650435, "grad_norm": 3.609375, "learning_rate": 4.848836484585585e-05, "loss": 0.895, "step": 8034 }, { "epoch": 0.11395712427013056, "grad_norm": 3.359375, "learning_rate": 4.848759962084633e-05, "loss": 0.9215, "step": 8036 }, { "epoch": 0.11398548592375679, "grad_norm": 3.328125, "learning_rate": 4.8486834208239775e-05, "loss": 0.8578, "step": 8038 }, { "epoch": 0.114013847577383, "grad_norm": 3.046875, "learning_rate": 4.848606860804231e-05, "loss": 0.9114, "step": 8040 }, { "epoch": 0.11404220923100922, "grad_norm": 3.046875, "learning_rate": 4.8485302820260045e-05, "loss": 0.9222, "step": 8042 }, { "epoch": 0.11407057088463543, "grad_norm": 3.015625, "learning_rate": 4.848453684489911e-05, "loss": 0.8245, "step": 8044 }, { "epoch": 0.11409893253826164, "grad_norm": 3.171875, "learning_rate": 4.848377068196561e-05, "loss": 0.8483, "step": 8046 }, { "epoch": 0.11412729419188786, "grad_norm": 2.828125, "learning_rate": 4.8483004331465664e-05, "loss": 0.8632, "step": 8048 }, { "epoch": 0.11415565584551407, "grad_norm": 2.6875, "learning_rate": 4.84822377934054e-05, "loss": 0.8302, "step": 8050 }, { "epoch": 0.11418401749914028, "grad_norm": 3.09375, "learning_rate": 4.848147106779093e-05, "loss": 0.8575, "step": 8052 }, { "epoch": 0.1142123791527665, "grad_norm": 2.921875, "learning_rate": 4.848070415462839e-05, "loss": 0.8622, "step": 8054 }, { "epoch": 0.11424074080639271, "grad_norm": 3.265625, "learning_rate": 4.84799370539239e-05, "loss": 0.8864, "step": 8056 }, { "epoch": 0.11426910246001894, "grad_norm": 3.203125, "learning_rate": 4.8479169765683575e-05, "loss": 0.8852, "step": 8058 }, { "epoch": 0.11429746411364515, "grad_norm": 3.25, "learning_rate": 4.8478402289913566e-05, "loss": 0.8212, "step": 8060 }, { "epoch": 0.11432582576727136, "grad_norm": 3.0, "learning_rate": 4.847763462661999e-05, "loss": 0.8793, "step": 8062 }, { "epoch": 0.11435418742089758, "grad_norm": 3.390625, "learning_rate": 4.847686677580898e-05, "loss": 0.8992, "step": 8064 }, { "epoch": 0.11438254907452379, "grad_norm": 3.25, "learning_rate": 4.847609873748667e-05, "loss": 0.9071, "step": 8066 }, { "epoch": 0.11441091072815, "grad_norm": 3.65625, "learning_rate": 4.847533051165919e-05, "loss": 0.886, "step": 8068 }, { "epoch": 0.11443927238177622, "grad_norm": 3.34375, "learning_rate": 4.8474562098332686e-05, "loss": 0.8981, "step": 8070 }, { "epoch": 0.11446763403540243, "grad_norm": 2.953125, "learning_rate": 4.847379349751329e-05, "loss": 0.8392, "step": 8072 }, { "epoch": 0.11449599568902864, "grad_norm": 3.046875, "learning_rate": 4.847302470920713e-05, "loss": 0.8519, "step": 8074 }, { "epoch": 0.11452435734265487, "grad_norm": 3.453125, "learning_rate": 4.847225573342036e-05, "loss": 0.8529, "step": 8076 }, { "epoch": 0.11455271899628108, "grad_norm": 3.1875, "learning_rate": 4.847148657015912e-05, "loss": 0.9027, "step": 8078 }, { "epoch": 0.1145810806499073, "grad_norm": 3.1875, "learning_rate": 4.847071721942955e-05, "loss": 0.904, "step": 8080 }, { "epoch": 0.11460944230353351, "grad_norm": 2.8125, "learning_rate": 4.846994768123779e-05, "loss": 0.8345, "step": 8082 }, { "epoch": 0.11463780395715972, "grad_norm": 2.859375, "learning_rate": 4.846917795559e-05, "loss": 0.8978, "step": 8084 }, { "epoch": 0.11466616561078594, "grad_norm": 3.046875, "learning_rate": 4.8468408042492316e-05, "loss": 0.8859, "step": 8086 }, { "epoch": 0.11469452726441215, "grad_norm": 3.109375, "learning_rate": 4.8467637941950895e-05, "loss": 0.8554, "step": 8088 }, { "epoch": 0.11472288891803836, "grad_norm": 3.140625, "learning_rate": 4.846686765397189e-05, "loss": 0.8391, "step": 8090 }, { "epoch": 0.11475125057166458, "grad_norm": 3.078125, "learning_rate": 4.8466097178561435e-05, "loss": 0.8798, "step": 8092 }, { "epoch": 0.11477961222529079, "grad_norm": 3.140625, "learning_rate": 4.846532651572571e-05, "loss": 0.9073, "step": 8094 }, { "epoch": 0.11480797387891702, "grad_norm": 3.3125, "learning_rate": 4.8464555665470846e-05, "loss": 0.8932, "step": 8096 }, { "epoch": 0.11483633553254323, "grad_norm": 2.84375, "learning_rate": 4.846378462780301e-05, "loss": 0.838, "step": 8098 }, { "epoch": 0.11486469718616944, "grad_norm": 3.234375, "learning_rate": 4.8463013402728376e-05, "loss": 0.87, "step": 8100 }, { "epoch": 0.11489305883979566, "grad_norm": 2.734375, "learning_rate": 4.8462241990253077e-05, "loss": 0.8483, "step": 8102 }, { "epoch": 0.11492142049342187, "grad_norm": 3.25, "learning_rate": 4.8461470390383284e-05, "loss": 0.9068, "step": 8104 }, { "epoch": 0.11494978214704808, "grad_norm": 3.0, "learning_rate": 4.8460698603125165e-05, "loss": 0.8483, "step": 8106 }, { "epoch": 0.1149781438006743, "grad_norm": 2.953125, "learning_rate": 4.8459926628484885e-05, "loss": 0.9265, "step": 8108 }, { "epoch": 0.11500650545430051, "grad_norm": 2.75, "learning_rate": 4.8459154466468606e-05, "loss": 0.861, "step": 8110 }, { "epoch": 0.11503486710792672, "grad_norm": 3.265625, "learning_rate": 4.845838211708249e-05, "loss": 0.8518, "step": 8112 }, { "epoch": 0.11506322876155295, "grad_norm": 2.953125, "learning_rate": 4.845760958033272e-05, "loss": 0.8733, "step": 8114 }, { "epoch": 0.11509159041517916, "grad_norm": 3.25, "learning_rate": 4.845683685622545e-05, "loss": 0.9133, "step": 8116 }, { "epoch": 0.11511995206880538, "grad_norm": 3.296875, "learning_rate": 4.845606394476686e-05, "loss": 0.8744, "step": 8118 }, { "epoch": 0.11514831372243159, "grad_norm": 2.78125, "learning_rate": 4.845529084596313e-05, "loss": 0.8499, "step": 8120 }, { "epoch": 0.1151766753760578, "grad_norm": 3.046875, "learning_rate": 4.8454517559820414e-05, "loss": 0.8626, "step": 8122 }, { "epoch": 0.11520503702968402, "grad_norm": 3.1875, "learning_rate": 4.845374408634491e-05, "loss": 0.8838, "step": 8124 }, { "epoch": 0.11523339868331023, "grad_norm": 3.109375, "learning_rate": 4.845297042554279e-05, "loss": 0.8479, "step": 8126 }, { "epoch": 0.11526176033693644, "grad_norm": 3.4375, "learning_rate": 4.8452196577420225e-05, "loss": 0.8448, "step": 8128 }, { "epoch": 0.11529012199056266, "grad_norm": 3.0, "learning_rate": 4.8451422541983405e-05, "loss": 0.8875, "step": 8130 }, { "epoch": 0.11531848364418887, "grad_norm": 3.328125, "learning_rate": 4.845064831923851e-05, "loss": 0.9164, "step": 8132 }, { "epoch": 0.1153468452978151, "grad_norm": 2.8125, "learning_rate": 4.8449873909191726e-05, "loss": 0.8871, "step": 8134 }, { "epoch": 0.11537520695144131, "grad_norm": 3.140625, "learning_rate": 4.844909931184923e-05, "loss": 0.8491, "step": 8136 }, { "epoch": 0.11540356860506752, "grad_norm": 3.21875, "learning_rate": 4.8448324527217214e-05, "loss": 0.8617, "step": 8138 }, { "epoch": 0.11543193025869374, "grad_norm": 2.90625, "learning_rate": 4.8447549555301865e-05, "loss": 0.8755, "step": 8140 }, { "epoch": 0.11546029191231995, "grad_norm": 2.9375, "learning_rate": 4.8446774396109375e-05, "loss": 0.8763, "step": 8142 }, { "epoch": 0.11548865356594616, "grad_norm": 3.34375, "learning_rate": 4.844599904964593e-05, "loss": 0.8828, "step": 8144 }, { "epoch": 0.11551701521957237, "grad_norm": 3.046875, "learning_rate": 4.844522351591774e-05, "loss": 0.8937, "step": 8146 }, { "epoch": 0.11554537687319859, "grad_norm": 3.109375, "learning_rate": 4.844444779493097e-05, "loss": 0.8962, "step": 8148 }, { "epoch": 0.1155737385268248, "grad_norm": 3.625, "learning_rate": 4.8443671886691835e-05, "loss": 0.8446, "step": 8150 }, { "epoch": 0.11560210018045101, "grad_norm": 3.078125, "learning_rate": 4.844289579120653e-05, "loss": 0.8854, "step": 8152 }, { "epoch": 0.11563046183407724, "grad_norm": 2.8125, "learning_rate": 4.8442119508481265e-05, "loss": 0.8721, "step": 8154 }, { "epoch": 0.11565882348770345, "grad_norm": 2.96875, "learning_rate": 4.8441343038522216e-05, "loss": 0.802, "step": 8156 }, { "epoch": 0.11568718514132967, "grad_norm": 3.140625, "learning_rate": 4.8440566381335594e-05, "loss": 0.8544, "step": 8158 }, { "epoch": 0.11571554679495588, "grad_norm": 3.03125, "learning_rate": 4.843978953692762e-05, "loss": 0.7877, "step": 8160 }, { "epoch": 0.1157439084485821, "grad_norm": 2.9375, "learning_rate": 4.8439012505304465e-05, "loss": 0.8352, "step": 8162 }, { "epoch": 0.11577227010220831, "grad_norm": 2.984375, "learning_rate": 4.843823528647236e-05, "loss": 0.8102, "step": 8164 }, { "epoch": 0.11580063175583452, "grad_norm": 3.1875, "learning_rate": 4.843745788043751e-05, "loss": 0.8624, "step": 8166 }, { "epoch": 0.11582899340946073, "grad_norm": 3.0, "learning_rate": 4.843668028720611e-05, "loss": 0.8971, "step": 8168 }, { "epoch": 0.11585735506308695, "grad_norm": 3.3125, "learning_rate": 4.84359025067844e-05, "loss": 0.8684, "step": 8170 }, { "epoch": 0.11588571671671317, "grad_norm": 2.890625, "learning_rate": 4.8435124539178564e-05, "loss": 0.8245, "step": 8172 }, { "epoch": 0.11591407837033939, "grad_norm": 2.9375, "learning_rate": 4.8434346384394824e-05, "loss": 0.8286, "step": 8174 }, { "epoch": 0.1159424400239656, "grad_norm": 3.234375, "learning_rate": 4.8433568042439396e-05, "loss": 0.9082, "step": 8176 }, { "epoch": 0.11597080167759181, "grad_norm": 2.78125, "learning_rate": 4.84327895133185e-05, "loss": 0.8416, "step": 8178 }, { "epoch": 0.11599916333121803, "grad_norm": 2.828125, "learning_rate": 4.8432010797038346e-05, "loss": 0.8625, "step": 8180 }, { "epoch": 0.11602752498484424, "grad_norm": 3.578125, "learning_rate": 4.843123189360516e-05, "loss": 0.8997, "step": 8182 }, { "epoch": 0.11605588663847045, "grad_norm": 3.09375, "learning_rate": 4.8430452803025166e-05, "loss": 0.9148, "step": 8184 }, { "epoch": 0.11608424829209667, "grad_norm": 2.8125, "learning_rate": 4.842967352530458e-05, "loss": 0.8548, "step": 8186 }, { "epoch": 0.11611260994572288, "grad_norm": 2.921875, "learning_rate": 4.842889406044963e-05, "loss": 0.8563, "step": 8188 }, { "epoch": 0.1161409715993491, "grad_norm": 3.078125, "learning_rate": 4.842811440846654e-05, "loss": 0.8812, "step": 8190 }, { "epoch": 0.11616933325297532, "grad_norm": 3.046875, "learning_rate": 4.8427334569361537e-05, "loss": 0.8445, "step": 8192 }, { "epoch": 0.11619769490660153, "grad_norm": 3.25, "learning_rate": 4.842655454314085e-05, "loss": 0.8815, "step": 8194 }, { "epoch": 0.11622605656022775, "grad_norm": 3.09375, "learning_rate": 4.842577432981071e-05, "loss": 0.8299, "step": 8196 }, { "epoch": 0.11625441821385396, "grad_norm": 3.03125, "learning_rate": 4.842499392937735e-05, "loss": 0.9029, "step": 8198 }, { "epoch": 0.11628277986748017, "grad_norm": 2.796875, "learning_rate": 4.8424213341847e-05, "loss": 0.8334, "step": 8200 }, { "epoch": 0.11631114152110639, "grad_norm": 3.390625, "learning_rate": 4.842343256722589e-05, "loss": 0.8758, "step": 8202 }, { "epoch": 0.1163395031747326, "grad_norm": 3.1875, "learning_rate": 4.8422651605520265e-05, "loss": 0.8164, "step": 8204 }, { "epoch": 0.11636786482835881, "grad_norm": 3.0, "learning_rate": 4.842187045673636e-05, "loss": 0.9418, "step": 8206 }, { "epoch": 0.11639622648198503, "grad_norm": 2.796875, "learning_rate": 4.842108912088042e-05, "loss": 0.8683, "step": 8208 }, { "epoch": 0.11642458813561124, "grad_norm": 3.171875, "learning_rate": 4.842030759795867e-05, "loss": 0.9266, "step": 8210 }, { "epoch": 0.11645294978923747, "grad_norm": 2.84375, "learning_rate": 4.841952588797736e-05, "loss": 0.8221, "step": 8212 }, { "epoch": 0.11648131144286368, "grad_norm": 3.03125, "learning_rate": 4.8418743990942736e-05, "loss": 0.8566, "step": 8214 }, { "epoch": 0.1165096730964899, "grad_norm": 3.09375, "learning_rate": 4.8417961906861056e-05, "loss": 0.9089, "step": 8216 }, { "epoch": 0.1165380347501161, "grad_norm": 3.15625, "learning_rate": 4.841717963573853e-05, "loss": 0.8891, "step": 8218 }, { "epoch": 0.11656639640374232, "grad_norm": 2.859375, "learning_rate": 4.841639717758144e-05, "loss": 0.8544, "step": 8220 }, { "epoch": 0.11659475805736853, "grad_norm": 2.71875, "learning_rate": 4.841561453239602e-05, "loss": 0.8665, "step": 8222 }, { "epoch": 0.11662311971099475, "grad_norm": 3.0, "learning_rate": 4.841483170018853e-05, "loss": 0.8294, "step": 8224 }, { "epoch": 0.11665148136462096, "grad_norm": 3.53125, "learning_rate": 4.841404868096522e-05, "loss": 0.8711, "step": 8226 }, { "epoch": 0.11667984301824717, "grad_norm": 2.828125, "learning_rate": 4.841326547473233e-05, "loss": 0.8554, "step": 8228 }, { "epoch": 0.1167082046718734, "grad_norm": 3.203125, "learning_rate": 4.841248208149614e-05, "loss": 0.8348, "step": 8230 }, { "epoch": 0.11673656632549961, "grad_norm": 3.015625, "learning_rate": 4.841169850126288e-05, "loss": 0.8496, "step": 8232 }, { "epoch": 0.11676492797912583, "grad_norm": 3.625, "learning_rate": 4.841091473403884e-05, "loss": 0.8694, "step": 8234 }, { "epoch": 0.11679328963275204, "grad_norm": 3.0625, "learning_rate": 4.841013077983025e-05, "loss": 0.9311, "step": 8236 }, { "epoch": 0.11682165128637825, "grad_norm": 3.109375, "learning_rate": 4.8409346638643384e-05, "loss": 0.8777, "step": 8238 }, { "epoch": 0.11685001294000447, "grad_norm": 3.1875, "learning_rate": 4.8408562310484506e-05, "loss": 0.8664, "step": 8240 }, { "epoch": 0.11687837459363068, "grad_norm": 2.890625, "learning_rate": 4.840777779535988e-05, "loss": 0.8774, "step": 8242 }, { "epoch": 0.11690673624725689, "grad_norm": 3.328125, "learning_rate": 4.8406993093275776e-05, "loss": 0.8839, "step": 8244 }, { "epoch": 0.1169350979008831, "grad_norm": 3.171875, "learning_rate": 4.8406208204238454e-05, "loss": 0.8533, "step": 8246 }, { "epoch": 0.11696345955450932, "grad_norm": 2.8125, "learning_rate": 4.840542312825418e-05, "loss": 0.881, "step": 8248 }, { "epoch": 0.11699182120813555, "grad_norm": 3.046875, "learning_rate": 4.840463786532924e-05, "loss": 0.8626, "step": 8250 }, { "epoch": 0.11702018286176176, "grad_norm": 3.09375, "learning_rate": 4.8403852415469885e-05, "loss": 0.8745, "step": 8252 }, { "epoch": 0.11704854451538797, "grad_norm": 3.171875, "learning_rate": 4.840306677868241e-05, "loss": 0.8693, "step": 8254 }, { "epoch": 0.11707690616901419, "grad_norm": 3.125, "learning_rate": 4.840228095497308e-05, "loss": 0.9052, "step": 8256 }, { "epoch": 0.1171052678226404, "grad_norm": 2.921875, "learning_rate": 4.840149494434816e-05, "loss": 0.8575, "step": 8258 }, { "epoch": 0.11713362947626661, "grad_norm": 3.25, "learning_rate": 4.840070874681395e-05, "loss": 0.8639, "step": 8260 }, { "epoch": 0.11716199112989283, "grad_norm": 3.34375, "learning_rate": 4.839992236237672e-05, "loss": 0.8783, "step": 8262 }, { "epoch": 0.11719035278351904, "grad_norm": 3.015625, "learning_rate": 4.839913579104274e-05, "loss": 0.851, "step": 8264 }, { "epoch": 0.11721871443714525, "grad_norm": 2.9375, "learning_rate": 4.83983490328183e-05, "loss": 0.9167, "step": 8266 }, { "epoch": 0.11724707609077148, "grad_norm": 3.328125, "learning_rate": 4.8397562087709694e-05, "loss": 0.8287, "step": 8268 }, { "epoch": 0.11727543774439769, "grad_norm": 3.28125, "learning_rate": 4.83967749557232e-05, "loss": 0.8643, "step": 8270 }, { "epoch": 0.1173037993980239, "grad_norm": 3.3125, "learning_rate": 4.839598763686509e-05, "loss": 0.8385, "step": 8272 }, { "epoch": 0.11733216105165012, "grad_norm": 2.9375, "learning_rate": 4.839520013114168e-05, "loss": 0.9057, "step": 8274 }, { "epoch": 0.11736052270527633, "grad_norm": 3.234375, "learning_rate": 4.839441243855924e-05, "loss": 0.8957, "step": 8276 }, { "epoch": 0.11738888435890255, "grad_norm": 2.90625, "learning_rate": 4.839362455912407e-05, "loss": 0.8609, "step": 8278 }, { "epoch": 0.11741724601252876, "grad_norm": 3.21875, "learning_rate": 4.839283649284246e-05, "loss": 0.8467, "step": 8280 }, { "epoch": 0.11744560766615497, "grad_norm": 3.0625, "learning_rate": 4.8392048239720703e-05, "loss": 0.8524, "step": 8282 }, { "epoch": 0.11747396931978119, "grad_norm": 2.875, "learning_rate": 4.83912597997651e-05, "loss": 0.8375, "step": 8284 }, { "epoch": 0.1175023309734074, "grad_norm": 3.0, "learning_rate": 4.839047117298195e-05, "loss": 0.8778, "step": 8286 }, { "epoch": 0.11753069262703363, "grad_norm": 3.03125, "learning_rate": 4.838968235937753e-05, "loss": 0.8063, "step": 8288 }, { "epoch": 0.11755905428065984, "grad_norm": 2.9375, "learning_rate": 4.8388893358958165e-05, "loss": 0.8759, "step": 8290 }, { "epoch": 0.11758741593428605, "grad_norm": 3.015625, "learning_rate": 4.838810417173015e-05, "loss": 0.8938, "step": 8292 }, { "epoch": 0.11761577758791227, "grad_norm": 3.078125, "learning_rate": 4.838731479769979e-05, "loss": 0.8824, "step": 8294 }, { "epoch": 0.11764413924153848, "grad_norm": 3.3125, "learning_rate": 4.8386525236873384e-05, "loss": 0.9117, "step": 8296 }, { "epoch": 0.11767250089516469, "grad_norm": 2.828125, "learning_rate": 4.838573548925724e-05, "loss": 0.8755, "step": 8298 }, { "epoch": 0.1177008625487909, "grad_norm": 3.203125, "learning_rate": 4.838494555485767e-05, "loss": 0.8574, "step": 8300 }, { "epoch": 0.11772922420241712, "grad_norm": 2.96875, "learning_rate": 4.838415543368098e-05, "loss": 0.8887, "step": 8302 }, { "epoch": 0.11775758585604333, "grad_norm": 2.609375, "learning_rate": 4.838336512573347e-05, "loss": 0.8382, "step": 8304 }, { "epoch": 0.11778594750966954, "grad_norm": 3.015625, "learning_rate": 4.838257463102147e-05, "loss": 0.8615, "step": 8306 }, { "epoch": 0.11781430916329577, "grad_norm": 3.375, "learning_rate": 4.8381783949551295e-05, "loss": 0.8565, "step": 8308 }, { "epoch": 0.11784267081692198, "grad_norm": 3.015625, "learning_rate": 4.838099308132924e-05, "loss": 0.8639, "step": 8310 }, { "epoch": 0.1178710324705482, "grad_norm": 3.265625, "learning_rate": 4.8380202026361644e-05, "loss": 0.8429, "step": 8312 }, { "epoch": 0.11789939412417441, "grad_norm": 3.125, "learning_rate": 4.837941078465481e-05, "loss": 0.8475, "step": 8314 }, { "epoch": 0.11792775577780062, "grad_norm": 2.84375, "learning_rate": 4.837861935621506e-05, "loss": 0.8565, "step": 8316 }, { "epoch": 0.11795611743142684, "grad_norm": 3.015625, "learning_rate": 4.837782774104872e-05, "loss": 0.8642, "step": 8318 }, { "epoch": 0.11798447908505305, "grad_norm": 3.046875, "learning_rate": 4.837703593916211e-05, "loss": 0.8899, "step": 8320 }, { "epoch": 0.11801284073867926, "grad_norm": 3.109375, "learning_rate": 4.837624395056155e-05, "loss": 0.8407, "step": 8322 }, { "epoch": 0.11804120239230548, "grad_norm": 3.203125, "learning_rate": 4.837545177525338e-05, "loss": 0.8545, "step": 8324 }, { "epoch": 0.1180695640459317, "grad_norm": 2.765625, "learning_rate": 4.837465941324392e-05, "loss": 0.8881, "step": 8326 }, { "epoch": 0.11809792569955792, "grad_norm": 3.484375, "learning_rate": 4.837386686453949e-05, "loss": 0.9075, "step": 8328 }, { "epoch": 0.11812628735318413, "grad_norm": 3.140625, "learning_rate": 4.8373074129146424e-05, "loss": 0.8665, "step": 8330 }, { "epoch": 0.11815464900681034, "grad_norm": 3.921875, "learning_rate": 4.837228120707106e-05, "loss": 0.8682, "step": 8332 }, { "epoch": 0.11818301066043656, "grad_norm": 2.765625, "learning_rate": 4.837148809831972e-05, "loss": 0.8887, "step": 8334 }, { "epoch": 0.11821137231406277, "grad_norm": 2.765625, "learning_rate": 4.8370694802898754e-05, "loss": 0.8734, "step": 8336 }, { "epoch": 0.11823973396768898, "grad_norm": 3.140625, "learning_rate": 4.836990132081448e-05, "loss": 0.9076, "step": 8338 }, { "epoch": 0.1182680956213152, "grad_norm": 2.953125, "learning_rate": 4.8369107652073254e-05, "loss": 0.9139, "step": 8340 }, { "epoch": 0.11829645727494141, "grad_norm": 3.015625, "learning_rate": 4.8368313796681404e-05, "loss": 0.9144, "step": 8342 }, { "epoch": 0.11832481892856762, "grad_norm": 3.171875, "learning_rate": 4.8367519754645274e-05, "loss": 0.8735, "step": 8344 }, { "epoch": 0.11835318058219385, "grad_norm": 3.1875, "learning_rate": 4.83667255259712e-05, "loss": 0.8377, "step": 8346 }, { "epoch": 0.11838154223582006, "grad_norm": 2.8125, "learning_rate": 4.8365931110665534e-05, "loss": 0.8461, "step": 8348 }, { "epoch": 0.11840990388944628, "grad_norm": 2.921875, "learning_rate": 4.836513650873462e-05, "loss": 0.8697, "step": 8350 }, { "epoch": 0.11843826554307249, "grad_norm": 3.28125, "learning_rate": 4.83643417201848e-05, "loss": 0.8493, "step": 8352 }, { "epoch": 0.1184666271966987, "grad_norm": 3.125, "learning_rate": 4.836354674502242e-05, "loss": 0.8517, "step": 8354 }, { "epoch": 0.11849498885032492, "grad_norm": 2.765625, "learning_rate": 4.8362751583253846e-05, "loss": 0.8306, "step": 8356 }, { "epoch": 0.11852335050395113, "grad_norm": 3.515625, "learning_rate": 4.836195623488541e-05, "loss": 0.864, "step": 8358 }, { "epoch": 0.11855171215757734, "grad_norm": 2.953125, "learning_rate": 4.836116069992347e-05, "loss": 0.8596, "step": 8360 }, { "epoch": 0.11858007381120356, "grad_norm": 3.453125, "learning_rate": 4.8360364978374384e-05, "loss": 0.9108, "step": 8362 }, { "epoch": 0.11860843546482977, "grad_norm": 3.203125, "learning_rate": 4.83595690702445e-05, "loss": 0.8731, "step": 8364 }, { "epoch": 0.118636797118456, "grad_norm": 3.25, "learning_rate": 4.8358772975540184e-05, "loss": 0.8666, "step": 8366 }, { "epoch": 0.11866515877208221, "grad_norm": 2.78125, "learning_rate": 4.8357976694267785e-05, "loss": 0.8623, "step": 8368 }, { "epoch": 0.11869352042570842, "grad_norm": 2.890625, "learning_rate": 4.8357180226433676e-05, "loss": 0.8372, "step": 8370 }, { "epoch": 0.11872188207933464, "grad_norm": 3.109375, "learning_rate": 4.8356383572044206e-05, "loss": 0.8686, "step": 8372 }, { "epoch": 0.11875024373296085, "grad_norm": 2.96875, "learning_rate": 4.835558673110574e-05, "loss": 0.8271, "step": 8374 }, { "epoch": 0.11877860538658706, "grad_norm": 2.796875, "learning_rate": 4.835478970362465e-05, "loss": 0.8645, "step": 8376 }, { "epoch": 0.11880696704021328, "grad_norm": 3.25, "learning_rate": 4.83539924896073e-05, "loss": 0.8684, "step": 8378 }, { "epoch": 0.11883532869383949, "grad_norm": 2.84375, "learning_rate": 4.835319508906004e-05, "loss": 0.8739, "step": 8380 }, { "epoch": 0.1188636903474657, "grad_norm": 3.515625, "learning_rate": 4.8352397501989265e-05, "loss": 0.8647, "step": 8382 }, { "epoch": 0.11889205200109193, "grad_norm": 2.875, "learning_rate": 4.835159972840133e-05, "loss": 0.8605, "step": 8384 }, { "epoch": 0.11892041365471814, "grad_norm": 2.734375, "learning_rate": 4.835080176830261e-05, "loss": 0.8833, "step": 8386 }, { "epoch": 0.11894877530834436, "grad_norm": 3.078125, "learning_rate": 4.835000362169949e-05, "loss": 0.8472, "step": 8388 }, { "epoch": 0.11897713696197057, "grad_norm": 2.96875, "learning_rate": 4.834920528859832e-05, "loss": 0.8665, "step": 8390 }, { "epoch": 0.11900549861559678, "grad_norm": 2.875, "learning_rate": 4.8348406769005494e-05, "loss": 0.8529, "step": 8392 }, { "epoch": 0.119033860269223, "grad_norm": 3.203125, "learning_rate": 4.8347608062927385e-05, "loss": 0.8668, "step": 8394 }, { "epoch": 0.11906222192284921, "grad_norm": 3.375, "learning_rate": 4.834680917037037e-05, "loss": 0.8712, "step": 8396 }, { "epoch": 0.11909058357647542, "grad_norm": 3.25, "learning_rate": 4.8346010091340836e-05, "loss": 0.8391, "step": 8398 }, { "epoch": 0.11911894523010164, "grad_norm": 2.96875, "learning_rate": 4.834521082584516e-05, "loss": 0.8695, "step": 8400 }, { "epoch": 0.11914730688372785, "grad_norm": 2.890625, "learning_rate": 4.834441137388973e-05, "loss": 0.8445, "step": 8402 }, { "epoch": 0.11917566853735408, "grad_norm": 3.03125, "learning_rate": 4.834361173548093e-05, "loss": 0.8103, "step": 8404 }, { "epoch": 0.11920403019098029, "grad_norm": 3.03125, "learning_rate": 4.834281191062515e-05, "loss": 0.8743, "step": 8406 }, { "epoch": 0.1192323918446065, "grad_norm": 2.71875, "learning_rate": 4.834201189932877e-05, "loss": 0.8514, "step": 8408 }, { "epoch": 0.11926075349823272, "grad_norm": 3.125, "learning_rate": 4.834121170159819e-05, "loss": 0.826, "step": 8410 }, { "epoch": 0.11928911515185893, "grad_norm": 3.046875, "learning_rate": 4.8340411317439785e-05, "loss": 0.864, "step": 8412 }, { "epoch": 0.11931747680548514, "grad_norm": 3.34375, "learning_rate": 4.8339610746859966e-05, "loss": 0.8547, "step": 8414 }, { "epoch": 0.11934583845911136, "grad_norm": 3.5625, "learning_rate": 4.833880998986511e-05, "loss": 0.9125, "step": 8416 }, { "epoch": 0.11937420011273757, "grad_norm": 2.78125, "learning_rate": 4.833800904646163e-05, "loss": 0.8258, "step": 8418 }, { "epoch": 0.11940256176636378, "grad_norm": 2.875, "learning_rate": 4.833720791665591e-05, "loss": 0.8578, "step": 8420 }, { "epoch": 0.11943092341999001, "grad_norm": 2.921875, "learning_rate": 4.833640660045436e-05, "loss": 0.8859, "step": 8422 }, { "epoch": 0.11945928507361622, "grad_norm": 3.078125, "learning_rate": 4.833560509786337e-05, "loss": 0.8563, "step": 8424 }, { "epoch": 0.11948764672724244, "grad_norm": 2.96875, "learning_rate": 4.8334803408889345e-05, "loss": 0.8291, "step": 8426 }, { "epoch": 0.11951600838086865, "grad_norm": 3.125, "learning_rate": 4.8334001533538685e-05, "loss": 0.9, "step": 8428 }, { "epoch": 0.11954437003449486, "grad_norm": 3.484375, "learning_rate": 4.83331994718178e-05, "loss": 0.8944, "step": 8430 }, { "epoch": 0.11957273168812108, "grad_norm": 3.421875, "learning_rate": 4.8332397223733104e-05, "loss": 0.9166, "step": 8432 }, { "epoch": 0.11960109334174729, "grad_norm": 2.890625, "learning_rate": 4.833159478929099e-05, "loss": 0.8563, "step": 8434 }, { "epoch": 0.1196294549953735, "grad_norm": 3.109375, "learning_rate": 4.833079216849787e-05, "loss": 0.8793, "step": 8436 }, { "epoch": 0.11965781664899972, "grad_norm": 3.15625, "learning_rate": 4.8329989361360155e-05, "loss": 0.8793, "step": 8438 }, { "epoch": 0.11968617830262593, "grad_norm": 3.015625, "learning_rate": 4.832918636788426e-05, "loss": 0.8855, "step": 8440 }, { "epoch": 0.11971453995625216, "grad_norm": 3.234375, "learning_rate": 4.8328383188076595e-05, "loss": 0.8766, "step": 8442 }, { "epoch": 0.11974290160987837, "grad_norm": 3.046875, "learning_rate": 4.832757982194359e-05, "loss": 0.8859, "step": 8444 }, { "epoch": 0.11977126326350458, "grad_norm": 2.96875, "learning_rate": 4.832677626949164e-05, "loss": 0.8623, "step": 8446 }, { "epoch": 0.1197996249171308, "grad_norm": 2.75, "learning_rate": 4.832597253072717e-05, "loss": 0.8329, "step": 8448 }, { "epoch": 0.11982798657075701, "grad_norm": 3.15625, "learning_rate": 4.83251686056566e-05, "loss": 0.8863, "step": 8450 }, { "epoch": 0.11985634822438322, "grad_norm": 3.046875, "learning_rate": 4.8324364494286364e-05, "loss": 0.8871, "step": 8452 }, { "epoch": 0.11988470987800944, "grad_norm": 2.984375, "learning_rate": 4.8323560196622866e-05, "loss": 0.8744, "step": 8454 }, { "epoch": 0.11991307153163565, "grad_norm": 3.3125, "learning_rate": 4.832275571267253e-05, "loss": 0.8234, "step": 8456 }, { "epoch": 0.11994143318526186, "grad_norm": 2.921875, "learning_rate": 4.83219510424418e-05, "loss": 0.8324, "step": 8458 }, { "epoch": 0.11996979483888807, "grad_norm": 3.5625, "learning_rate": 4.832114618593708e-05, "loss": 0.8861, "step": 8460 }, { "epoch": 0.1199981564925143, "grad_norm": 3.4375, "learning_rate": 4.8320341143164815e-05, "loss": 0.8909, "step": 8462 }, { "epoch": 0.12002651814614052, "grad_norm": 3.03125, "learning_rate": 4.8319535914131434e-05, "loss": 0.8721, "step": 8464 }, { "epoch": 0.12005487979976673, "grad_norm": 3.84375, "learning_rate": 4.831873049884336e-05, "loss": 0.824, "step": 8466 }, { "epoch": 0.12008324145339294, "grad_norm": 3.0625, "learning_rate": 4.8317924897307035e-05, "loss": 0.8189, "step": 8468 }, { "epoch": 0.12011160310701915, "grad_norm": 3.234375, "learning_rate": 4.8317119109528886e-05, "loss": 0.8414, "step": 8470 }, { "epoch": 0.12013996476064537, "grad_norm": 3.234375, "learning_rate": 4.8316313135515343e-05, "loss": 0.8718, "step": 8472 }, { "epoch": 0.12016832641427158, "grad_norm": 3.328125, "learning_rate": 4.831550697527286e-05, "loss": 0.8493, "step": 8474 }, { "epoch": 0.1201966880678978, "grad_norm": 3.328125, "learning_rate": 4.831470062880787e-05, "loss": 0.8701, "step": 8476 }, { "epoch": 0.12022504972152401, "grad_norm": 3.296875, "learning_rate": 4.831389409612681e-05, "loss": 0.8936, "step": 8478 }, { "epoch": 0.12025341137515023, "grad_norm": 3.515625, "learning_rate": 4.831308737723612e-05, "loss": 0.8974, "step": 8480 }, { "epoch": 0.12028177302877645, "grad_norm": 3.125, "learning_rate": 4.831228047214224e-05, "loss": 0.8711, "step": 8482 }, { "epoch": 0.12031013468240266, "grad_norm": 3.15625, "learning_rate": 4.8311473380851633e-05, "loss": 0.9041, "step": 8484 }, { "epoch": 0.12033849633602887, "grad_norm": 2.921875, "learning_rate": 4.831066610337073e-05, "loss": 0.8245, "step": 8486 }, { "epoch": 0.12036685798965509, "grad_norm": 2.84375, "learning_rate": 4.8309858639705975e-05, "loss": 0.8002, "step": 8488 }, { "epoch": 0.1203952196432813, "grad_norm": 3.078125, "learning_rate": 4.8309050989863835e-05, "loss": 0.8904, "step": 8490 }, { "epoch": 0.12042358129690751, "grad_norm": 3.1875, "learning_rate": 4.830824315385074e-05, "loss": 0.8781, "step": 8492 }, { "epoch": 0.12045194295053373, "grad_norm": 2.90625, "learning_rate": 4.8307435131673156e-05, "loss": 0.8535, "step": 8494 }, { "epoch": 0.12048030460415994, "grad_norm": 3.296875, "learning_rate": 4.830662692333753e-05, "loss": 0.8917, "step": 8496 }, { "epoch": 0.12050866625778615, "grad_norm": 3.125, "learning_rate": 4.830581852885033e-05, "loss": 0.9012, "step": 8498 }, { "epoch": 0.12053702791141238, "grad_norm": 3.40625, "learning_rate": 4.8305009948217996e-05, "loss": 0.868, "step": 8500 }, { "epoch": 0.1205653895650386, "grad_norm": 3.0, "learning_rate": 4.8304201181446994e-05, "loss": 0.8877, "step": 8502 }, { "epoch": 0.12059375121866481, "grad_norm": 3.0625, "learning_rate": 4.8303392228543774e-05, "loss": 0.8987, "step": 8504 }, { "epoch": 0.12062211287229102, "grad_norm": 3.078125, "learning_rate": 4.830258308951482e-05, "loss": 0.8723, "step": 8506 }, { "epoch": 0.12065047452591723, "grad_norm": 2.671875, "learning_rate": 4.8301773764366575e-05, "loss": 0.8401, "step": 8508 }, { "epoch": 0.12067883617954345, "grad_norm": 3.0, "learning_rate": 4.8300964253105505e-05, "loss": 0.8818, "step": 8510 }, { "epoch": 0.12070719783316966, "grad_norm": 2.84375, "learning_rate": 4.830015455573809e-05, "loss": 0.8766, "step": 8512 }, { "epoch": 0.12073555948679587, "grad_norm": 3.203125, "learning_rate": 4.829934467227077e-05, "loss": 0.8667, "step": 8514 }, { "epoch": 0.12076392114042209, "grad_norm": 2.984375, "learning_rate": 4.8298534602710046e-05, "loss": 0.9016, "step": 8516 }, { "epoch": 0.1207922827940483, "grad_norm": 3.203125, "learning_rate": 4.829772434706236e-05, "loss": 0.8896, "step": 8518 }, { "epoch": 0.12082064444767453, "grad_norm": 2.96875, "learning_rate": 4.829691390533421e-05, "loss": 0.8687, "step": 8520 }, { "epoch": 0.12084900610130074, "grad_norm": 2.8125, "learning_rate": 4.829610327753204e-05, "loss": 0.8608, "step": 8522 }, { "epoch": 0.12087736775492695, "grad_norm": 2.96875, "learning_rate": 4.829529246366234e-05, "loss": 0.8652, "step": 8524 }, { "epoch": 0.12090572940855317, "grad_norm": 3.203125, "learning_rate": 4.82944814637316e-05, "loss": 0.8385, "step": 8526 }, { "epoch": 0.12093409106217938, "grad_norm": 2.984375, "learning_rate": 4.829367027774627e-05, "loss": 0.8317, "step": 8528 }, { "epoch": 0.1209624527158056, "grad_norm": 3.140625, "learning_rate": 4.829285890571285e-05, "loss": 0.842, "step": 8530 }, { "epoch": 0.1209908143694318, "grad_norm": 3.046875, "learning_rate": 4.82920473476378e-05, "loss": 0.7847, "step": 8532 }, { "epoch": 0.12101917602305802, "grad_norm": 3.25, "learning_rate": 4.8291235603527616e-05, "loss": 0.8997, "step": 8534 }, { "epoch": 0.12104753767668423, "grad_norm": 3.078125, "learning_rate": 4.8290423673388785e-05, "loss": 0.8754, "step": 8536 }, { "epoch": 0.12107589933031046, "grad_norm": 3.140625, "learning_rate": 4.828961155722779e-05, "loss": 0.8678, "step": 8538 }, { "epoch": 0.12110426098393667, "grad_norm": 3.0625, "learning_rate": 4.828879925505111e-05, "loss": 0.8077, "step": 8540 }, { "epoch": 0.12113262263756289, "grad_norm": 2.890625, "learning_rate": 4.828798676686524e-05, "loss": 0.8912, "step": 8542 }, { "epoch": 0.1211609842911891, "grad_norm": 2.90625, "learning_rate": 4.828717409267666e-05, "loss": 0.9141, "step": 8544 }, { "epoch": 0.12118934594481531, "grad_norm": 3.28125, "learning_rate": 4.8286361232491874e-05, "loss": 0.8847, "step": 8546 }, { "epoch": 0.12121770759844153, "grad_norm": 2.921875, "learning_rate": 4.828554818631737e-05, "loss": 0.8409, "step": 8548 }, { "epoch": 0.12124606925206774, "grad_norm": 3.28125, "learning_rate": 4.8284734954159625e-05, "loss": 0.8453, "step": 8550 }, { "epoch": 0.12127443090569395, "grad_norm": 3.15625, "learning_rate": 4.828392153602516e-05, "loss": 0.909, "step": 8552 }, { "epoch": 0.12130279255932017, "grad_norm": 3.453125, "learning_rate": 4.828310793192046e-05, "loss": 0.8885, "step": 8554 }, { "epoch": 0.12133115421294638, "grad_norm": 2.9375, "learning_rate": 4.8282294141852025e-05, "loss": 0.8568, "step": 8556 }, { "epoch": 0.1213595158665726, "grad_norm": 2.96875, "learning_rate": 4.828148016582635e-05, "loss": 0.8679, "step": 8558 }, { "epoch": 0.12138787752019882, "grad_norm": 3.0625, "learning_rate": 4.828066600384994e-05, "loss": 0.8932, "step": 8560 }, { "epoch": 0.12141623917382503, "grad_norm": 3.21875, "learning_rate": 4.8279851655929295e-05, "loss": 0.8862, "step": 8562 }, { "epoch": 0.12144460082745125, "grad_norm": 3.6875, "learning_rate": 4.827903712207094e-05, "loss": 0.9029, "step": 8564 }, { "epoch": 0.12147296248107746, "grad_norm": 3.203125, "learning_rate": 4.8278222402281346e-05, "loss": 0.8556, "step": 8566 }, { "epoch": 0.12150132413470367, "grad_norm": 2.890625, "learning_rate": 4.827740749656704e-05, "loss": 0.8117, "step": 8568 }, { "epoch": 0.12152968578832989, "grad_norm": 2.9375, "learning_rate": 4.8276592404934525e-05, "loss": 0.8179, "step": 8570 }, { "epoch": 0.1215580474419561, "grad_norm": 2.921875, "learning_rate": 4.827577712739031e-05, "loss": 0.8246, "step": 8572 }, { "epoch": 0.12158640909558231, "grad_norm": 3.09375, "learning_rate": 4.827496166394092e-05, "loss": 0.8809, "step": 8574 }, { "epoch": 0.12161477074920854, "grad_norm": 3.1875, "learning_rate": 4.827414601459286e-05, "loss": 0.8279, "step": 8576 }, { "epoch": 0.12164313240283475, "grad_norm": 3.53125, "learning_rate": 4.827333017935264e-05, "loss": 0.8232, "step": 8578 }, { "epoch": 0.12167149405646097, "grad_norm": 3.09375, "learning_rate": 4.827251415822678e-05, "loss": 0.8568, "step": 8580 }, { "epoch": 0.12169985571008718, "grad_norm": 2.96875, "learning_rate": 4.8271697951221794e-05, "loss": 0.8042, "step": 8582 }, { "epoch": 0.12172821736371339, "grad_norm": 3.125, "learning_rate": 4.8270881558344204e-05, "loss": 0.8624, "step": 8584 }, { "epoch": 0.1217565790173396, "grad_norm": 2.8125, "learning_rate": 4.827006497960054e-05, "loss": 0.8578, "step": 8586 }, { "epoch": 0.12178494067096582, "grad_norm": 2.953125, "learning_rate": 4.8269248214997304e-05, "loss": 0.837, "step": 8588 }, { "epoch": 0.12181330232459203, "grad_norm": 3.0625, "learning_rate": 4.826843126454104e-05, "loss": 0.8317, "step": 8590 }, { "epoch": 0.12184166397821825, "grad_norm": 3.1875, "learning_rate": 4.826761412823825e-05, "loss": 0.8624, "step": 8592 }, { "epoch": 0.12187002563184446, "grad_norm": 2.96875, "learning_rate": 4.8266796806095485e-05, "loss": 0.8794, "step": 8594 }, { "epoch": 0.12189838728547069, "grad_norm": 3.25, "learning_rate": 4.8265979298119254e-05, "loss": 0.8948, "step": 8596 }, { "epoch": 0.1219267489390969, "grad_norm": 3.265625, "learning_rate": 4.8265161604316103e-05, "loss": 0.8149, "step": 8598 }, { "epoch": 0.12195511059272311, "grad_norm": 3.65625, "learning_rate": 4.826434372469255e-05, "loss": 0.8483, "step": 8600 }, { "epoch": 0.12198347224634933, "grad_norm": 3.078125, "learning_rate": 4.826352565925513e-05, "loss": 0.8608, "step": 8602 }, { "epoch": 0.12201183389997554, "grad_norm": 3.140625, "learning_rate": 4.8262707408010384e-05, "loss": 0.8456, "step": 8604 }, { "epoch": 0.12204019555360175, "grad_norm": 3.34375, "learning_rate": 4.826188897096484e-05, "loss": 0.8634, "step": 8606 }, { "epoch": 0.12206855720722797, "grad_norm": 3.203125, "learning_rate": 4.8261070348125035e-05, "loss": 0.8311, "step": 8608 }, { "epoch": 0.12209691886085418, "grad_norm": 3.5, "learning_rate": 4.826025153949751e-05, "loss": 0.8895, "step": 8610 }, { "epoch": 0.12212528051448039, "grad_norm": 3.40625, "learning_rate": 4.825943254508881e-05, "loss": 0.9245, "step": 8612 }, { "epoch": 0.1221536421681066, "grad_norm": 3.1875, "learning_rate": 4.825861336490547e-05, "loss": 0.8913, "step": 8614 }, { "epoch": 0.12218200382173283, "grad_norm": 3.109375, "learning_rate": 4.8257793998954024e-05, "loss": 0.8572, "step": 8616 }, { "epoch": 0.12221036547535905, "grad_norm": 3.09375, "learning_rate": 4.825697444724103e-05, "loss": 0.8646, "step": 8618 }, { "epoch": 0.12223872712898526, "grad_norm": 2.734375, "learning_rate": 4.8256154709773035e-05, "loss": 0.8617, "step": 8620 }, { "epoch": 0.12226708878261147, "grad_norm": 3.015625, "learning_rate": 4.825533478655658e-05, "loss": 0.8595, "step": 8622 }, { "epoch": 0.12229545043623768, "grad_norm": 2.984375, "learning_rate": 4.825451467759822e-05, "loss": 0.8369, "step": 8624 }, { "epoch": 0.1223238120898639, "grad_norm": 3.234375, "learning_rate": 4.825369438290449e-05, "loss": 0.8611, "step": 8626 }, { "epoch": 0.12235217374349011, "grad_norm": 3.0, "learning_rate": 4.825287390248195e-05, "loss": 0.8573, "step": 8628 }, { "epoch": 0.12238053539711632, "grad_norm": 3.25, "learning_rate": 4.825205323633717e-05, "loss": 0.8253, "step": 8630 }, { "epoch": 0.12240889705074254, "grad_norm": 2.734375, "learning_rate": 4.8251232384476675e-05, "loss": 0.8948, "step": 8632 }, { "epoch": 0.12243725870436876, "grad_norm": 3.8125, "learning_rate": 4.8250411346907044e-05, "loss": 0.8541, "step": 8634 }, { "epoch": 0.12246562035799498, "grad_norm": 3.375, "learning_rate": 4.8249590123634825e-05, "loss": 0.8583, "step": 8636 }, { "epoch": 0.12249398201162119, "grad_norm": 2.953125, "learning_rate": 4.8248768714666584e-05, "loss": 0.8781, "step": 8638 }, { "epoch": 0.1225223436652474, "grad_norm": 3.328125, "learning_rate": 4.8247947120008866e-05, "loss": 0.8946, "step": 8640 }, { "epoch": 0.12255070531887362, "grad_norm": 3.59375, "learning_rate": 4.8247125339668244e-05, "loss": 0.8876, "step": 8642 }, { "epoch": 0.12257906697249983, "grad_norm": 3.09375, "learning_rate": 4.824630337365129e-05, "loss": 0.8431, "step": 8644 }, { "epoch": 0.12260742862612604, "grad_norm": 3.328125, "learning_rate": 4.8245481221964556e-05, "loss": 0.8514, "step": 8646 }, { "epoch": 0.12263579027975226, "grad_norm": 3.15625, "learning_rate": 4.8244658884614614e-05, "loss": 0.882, "step": 8648 }, { "epoch": 0.12266415193337847, "grad_norm": 2.9375, "learning_rate": 4.824383636160803e-05, "loss": 0.8749, "step": 8650 }, { "epoch": 0.12269251358700468, "grad_norm": 3.0625, "learning_rate": 4.8243013652951374e-05, "loss": 0.8617, "step": 8652 }, { "epoch": 0.12272087524063091, "grad_norm": 3.046875, "learning_rate": 4.8242190758651215e-05, "loss": 0.8744, "step": 8654 }, { "epoch": 0.12274923689425712, "grad_norm": 3.0625, "learning_rate": 4.8241367678714136e-05, "loss": 0.8644, "step": 8656 }, { "epoch": 0.12277759854788334, "grad_norm": 2.9375, "learning_rate": 4.8240544413146704e-05, "loss": 0.8118, "step": 8658 }, { "epoch": 0.12280596020150955, "grad_norm": 3.328125, "learning_rate": 4.8239720961955484e-05, "loss": 0.843, "step": 8660 }, { "epoch": 0.12283432185513576, "grad_norm": 3.4375, "learning_rate": 4.823889732514707e-05, "loss": 0.8804, "step": 8662 }, { "epoch": 0.12286268350876198, "grad_norm": 3.015625, "learning_rate": 4.8238073502728035e-05, "loss": 0.8136, "step": 8664 }, { "epoch": 0.12289104516238819, "grad_norm": 2.703125, "learning_rate": 4.8237249494704945e-05, "loss": 0.8604, "step": 8666 }, { "epoch": 0.1229194068160144, "grad_norm": 3.3125, "learning_rate": 4.82364253010844e-05, "loss": 0.8516, "step": 8668 }, { "epoch": 0.12294776846964062, "grad_norm": 3.15625, "learning_rate": 4.823560092187298e-05, "loss": 0.8706, "step": 8670 }, { "epoch": 0.12297613012326683, "grad_norm": 2.890625, "learning_rate": 4.823477635707726e-05, "loss": 0.8669, "step": 8672 }, { "epoch": 0.12300449177689306, "grad_norm": 2.96875, "learning_rate": 4.8233951606703834e-05, "loss": 0.8203, "step": 8674 }, { "epoch": 0.12303285343051927, "grad_norm": 3.296875, "learning_rate": 4.823312667075929e-05, "loss": 0.8478, "step": 8676 }, { "epoch": 0.12306121508414548, "grad_norm": 2.640625, "learning_rate": 4.8232301549250205e-05, "loss": 0.8653, "step": 8678 }, { "epoch": 0.1230895767377717, "grad_norm": 2.875, "learning_rate": 4.823147624218318e-05, "loss": 0.8418, "step": 8680 }, { "epoch": 0.12311793839139791, "grad_norm": 3.15625, "learning_rate": 4.823065074956481e-05, "loss": 0.8273, "step": 8682 }, { "epoch": 0.12314630004502412, "grad_norm": 3.1875, "learning_rate": 4.8229825071401684e-05, "loss": 0.8528, "step": 8684 }, { "epoch": 0.12317466169865034, "grad_norm": 3.203125, "learning_rate": 4.822899920770039e-05, "loss": 0.924, "step": 8686 }, { "epoch": 0.12320302335227655, "grad_norm": 2.90625, "learning_rate": 4.822817315846754e-05, "loss": 0.8865, "step": 8688 }, { "epoch": 0.12323138500590276, "grad_norm": 2.859375, "learning_rate": 4.822734692370971e-05, "loss": 0.8755, "step": 8690 }, { "epoch": 0.12325974665952899, "grad_norm": 2.796875, "learning_rate": 4.8226520503433515e-05, "loss": 0.812, "step": 8692 }, { "epoch": 0.1232881083131552, "grad_norm": 3.359375, "learning_rate": 4.8225693897645553e-05, "loss": 0.857, "step": 8694 }, { "epoch": 0.12331646996678142, "grad_norm": 3.78125, "learning_rate": 4.822486710635242e-05, "loss": 0.8343, "step": 8696 }, { "epoch": 0.12334483162040763, "grad_norm": 3.515625, "learning_rate": 4.822404012956073e-05, "loss": 0.8484, "step": 8698 }, { "epoch": 0.12337319327403384, "grad_norm": 3.1875, "learning_rate": 4.8223212967277076e-05, "loss": 0.8963, "step": 8700 }, { "epoch": 0.12340155492766006, "grad_norm": 3.328125, "learning_rate": 4.822238561950808e-05, "loss": 0.872, "step": 8702 }, { "epoch": 0.12342991658128627, "grad_norm": 2.96875, "learning_rate": 4.8221558086260335e-05, "loss": 0.8551, "step": 8704 }, { "epoch": 0.12345827823491248, "grad_norm": 2.90625, "learning_rate": 4.8220730367540457e-05, "loss": 0.8356, "step": 8706 }, { "epoch": 0.1234866398885387, "grad_norm": 2.828125, "learning_rate": 4.8219902463355063e-05, "loss": 0.8848, "step": 8708 }, { "epoch": 0.12351500154216491, "grad_norm": 3.046875, "learning_rate": 4.821907437371075e-05, "loss": 0.8333, "step": 8710 }, { "epoch": 0.12354336319579114, "grad_norm": 2.9375, "learning_rate": 4.821824609861414e-05, "loss": 0.8625, "step": 8712 }, { "epoch": 0.12357172484941735, "grad_norm": 3.71875, "learning_rate": 4.821741763807186e-05, "loss": 0.9097, "step": 8714 }, { "epoch": 0.12360008650304356, "grad_norm": 3.046875, "learning_rate": 4.8216588992090506e-05, "loss": 0.8241, "step": 8716 }, { "epoch": 0.12362844815666978, "grad_norm": 3.140625, "learning_rate": 4.821576016067671e-05, "loss": 0.902, "step": 8718 }, { "epoch": 0.12365680981029599, "grad_norm": 2.921875, "learning_rate": 4.8214931143837094e-05, "loss": 0.8378, "step": 8720 }, { "epoch": 0.1236851714639222, "grad_norm": 3.0, "learning_rate": 4.821410194157827e-05, "loss": 0.8269, "step": 8722 }, { "epoch": 0.12371353311754842, "grad_norm": 3.28125, "learning_rate": 4.8213272553906874e-05, "loss": 0.9035, "step": 8724 }, { "epoch": 0.12374189477117463, "grad_norm": 2.875, "learning_rate": 4.8212442980829514e-05, "loss": 0.8323, "step": 8726 }, { "epoch": 0.12377025642480084, "grad_norm": 3.09375, "learning_rate": 4.821161322235282e-05, "loss": 0.8546, "step": 8728 }, { "epoch": 0.12379861807842707, "grad_norm": 2.984375, "learning_rate": 4.8210783278483436e-05, "loss": 0.8694, "step": 8730 }, { "epoch": 0.12382697973205328, "grad_norm": 3.78125, "learning_rate": 4.8209953149227966e-05, "loss": 0.8477, "step": 8732 }, { "epoch": 0.1238553413856795, "grad_norm": 2.84375, "learning_rate": 4.8209122834593055e-05, "loss": 0.8352, "step": 8734 }, { "epoch": 0.12388370303930571, "grad_norm": 3.203125, "learning_rate": 4.8208292334585335e-05, "loss": 0.8596, "step": 8736 }, { "epoch": 0.12391206469293192, "grad_norm": 3.125, "learning_rate": 4.8207461649211434e-05, "loss": 0.8692, "step": 8738 }, { "epoch": 0.12394042634655814, "grad_norm": 2.984375, "learning_rate": 4.820663077847799e-05, "loss": 0.8382, "step": 8740 }, { "epoch": 0.12396878800018435, "grad_norm": 3.0625, "learning_rate": 4.820579972239163e-05, "loss": 0.878, "step": 8742 }, { "epoch": 0.12399714965381056, "grad_norm": 3.75, "learning_rate": 4.820496848095901e-05, "loss": 0.8751, "step": 8744 }, { "epoch": 0.12402551130743678, "grad_norm": 2.734375, "learning_rate": 4.8204137054186754e-05, "loss": 0.8316, "step": 8746 }, { "epoch": 0.12405387296106299, "grad_norm": 3.078125, "learning_rate": 4.820330544208151e-05, "loss": 0.8491, "step": 8748 }, { "epoch": 0.12408223461468922, "grad_norm": 3.203125, "learning_rate": 4.820247364464991e-05, "loss": 0.8777, "step": 8750 }, { "epoch": 0.12411059626831543, "grad_norm": 3.5, "learning_rate": 4.8201641661898625e-05, "loss": 0.9081, "step": 8752 }, { "epoch": 0.12413895792194164, "grad_norm": 3.078125, "learning_rate": 4.820080949383427e-05, "loss": 0.8801, "step": 8754 }, { "epoch": 0.12416731957556786, "grad_norm": 3.0625, "learning_rate": 4.819997714046349e-05, "loss": 0.8725, "step": 8756 }, { "epoch": 0.12419568122919407, "grad_norm": 3.140625, "learning_rate": 4.819914460179295e-05, "loss": 0.8306, "step": 8758 }, { "epoch": 0.12422404288282028, "grad_norm": 3.234375, "learning_rate": 4.81983118778293e-05, "loss": 0.8387, "step": 8760 }, { "epoch": 0.1242524045364465, "grad_norm": 3.15625, "learning_rate": 4.819747896857919e-05, "loss": 0.882, "step": 8762 }, { "epoch": 0.12428076619007271, "grad_norm": 2.96875, "learning_rate": 4.819664587404926e-05, "loss": 0.8897, "step": 8764 }, { "epoch": 0.12430912784369892, "grad_norm": 3.0625, "learning_rate": 4.819581259424618e-05, "loss": 0.8318, "step": 8766 }, { "epoch": 0.12433748949732513, "grad_norm": 2.984375, "learning_rate": 4.8194979129176595e-05, "loss": 0.8593, "step": 8768 }, { "epoch": 0.12436585115095136, "grad_norm": 2.59375, "learning_rate": 4.819414547884716e-05, "loss": 0.8314, "step": 8770 }, { "epoch": 0.12439421280457758, "grad_norm": 2.8125, "learning_rate": 4.8193311643264543e-05, "loss": 0.8472, "step": 8772 }, { "epoch": 0.12442257445820379, "grad_norm": 3.03125, "learning_rate": 4.819247762243539e-05, "loss": 0.8711, "step": 8774 }, { "epoch": 0.12445093611183, "grad_norm": 3.21875, "learning_rate": 4.819164341636638e-05, "loss": 0.8478, "step": 8776 }, { "epoch": 0.12447929776545621, "grad_norm": 3.109375, "learning_rate": 4.819080902506416e-05, "loss": 0.8828, "step": 8778 }, { "epoch": 0.12450765941908243, "grad_norm": 2.890625, "learning_rate": 4.8189974448535414e-05, "loss": 0.8097, "step": 8780 }, { "epoch": 0.12453602107270864, "grad_norm": 3.21875, "learning_rate": 4.818913968678679e-05, "loss": 0.9076, "step": 8782 }, { "epoch": 0.12456438272633485, "grad_norm": 2.9375, "learning_rate": 4.8188304739824954e-05, "loss": 0.8711, "step": 8784 }, { "epoch": 0.12459274437996107, "grad_norm": 3.265625, "learning_rate": 4.8187469607656585e-05, "loss": 0.8879, "step": 8786 }, { "epoch": 0.1246211060335873, "grad_norm": 2.90625, "learning_rate": 4.818663429028836e-05, "loss": 0.8392, "step": 8788 }, { "epoch": 0.12464946768721351, "grad_norm": 3.3125, "learning_rate": 4.818579878772693e-05, "loss": 0.832, "step": 8790 }, { "epoch": 0.12467782934083972, "grad_norm": 3.046875, "learning_rate": 4.818496309997898e-05, "loss": 0.8323, "step": 8792 }, { "epoch": 0.12470619099446593, "grad_norm": 2.984375, "learning_rate": 4.818412722705118e-05, "loss": 0.8139, "step": 8794 }, { "epoch": 0.12473455264809215, "grad_norm": 3.0625, "learning_rate": 4.818329116895021e-05, "loss": 0.9156, "step": 8796 }, { "epoch": 0.12476291430171836, "grad_norm": 2.90625, "learning_rate": 4.818245492568275e-05, "loss": 0.862, "step": 8798 }, { "epoch": 0.12479127595534457, "grad_norm": 3.203125, "learning_rate": 4.818161849725548e-05, "loss": 0.911, "step": 8800 }, { "epoch": 0.12481963760897079, "grad_norm": 3.28125, "learning_rate": 4.818078188367506e-05, "loss": 0.8587, "step": 8802 }, { "epoch": 0.124847999262597, "grad_norm": 3.25, "learning_rate": 4.817994508494821e-05, "loss": 0.9121, "step": 8804 }, { "epoch": 0.12487636091622321, "grad_norm": 3.875, "learning_rate": 4.817910810108158e-05, "loss": 0.915, "step": 8806 }, { "epoch": 0.12490472256984944, "grad_norm": 2.84375, "learning_rate": 4.817827093208187e-05, "loss": 0.8542, "step": 8808 }, { "epoch": 0.12493308422347565, "grad_norm": 2.96875, "learning_rate": 4.817743357795577e-05, "loss": 0.8523, "step": 8810 }, { "epoch": 0.12496144587710187, "grad_norm": 3.078125, "learning_rate": 4.817659603870995e-05, "loss": 0.8904, "step": 8812 }, { "epoch": 0.12498980753072808, "grad_norm": 2.953125, "learning_rate": 4.8175758314351124e-05, "loss": 0.8686, "step": 8814 }, { "epoch": 0.1250181691843543, "grad_norm": 2.828125, "learning_rate": 4.8174920404885963e-05, "loss": 0.8899, "step": 8816 }, { "epoch": 0.1250465308379805, "grad_norm": 3.234375, "learning_rate": 4.817408231032117e-05, "loss": 0.9097, "step": 8818 }, { "epoch": 0.12507489249160672, "grad_norm": 3.09375, "learning_rate": 4.8173244030663435e-05, "loss": 0.9061, "step": 8820 }, { "epoch": 0.12510325414523293, "grad_norm": 3.28125, "learning_rate": 4.8172405565919456e-05, "loss": 0.856, "step": 8822 }, { "epoch": 0.12513161579885915, "grad_norm": 3.484375, "learning_rate": 4.8171566916095926e-05, "loss": 0.8399, "step": 8824 }, { "epoch": 0.12515997745248536, "grad_norm": 3.3125, "learning_rate": 4.8170728081199546e-05, "loss": 0.8352, "step": 8826 }, { "epoch": 0.12518833910611157, "grad_norm": 2.859375, "learning_rate": 4.816988906123702e-05, "loss": 0.8485, "step": 8828 }, { "epoch": 0.1252167007597378, "grad_norm": 2.921875, "learning_rate": 4.8169049856215035e-05, "loss": 0.8281, "step": 8830 }, { "epoch": 0.125245062413364, "grad_norm": 2.84375, "learning_rate": 4.816821046614031e-05, "loss": 0.8731, "step": 8832 }, { "epoch": 0.1252734240669902, "grad_norm": 3.0, "learning_rate": 4.816737089101954e-05, "loss": 0.8252, "step": 8834 }, { "epoch": 0.12530178572061645, "grad_norm": 3.515625, "learning_rate": 4.816653113085944e-05, "loss": 0.9154, "step": 8836 }, { "epoch": 0.12533014737424267, "grad_norm": 3.453125, "learning_rate": 4.81656911856667e-05, "loss": 0.8918, "step": 8838 }, { "epoch": 0.12535850902786888, "grad_norm": 3.0625, "learning_rate": 4.816485105544805e-05, "loss": 0.8523, "step": 8840 }, { "epoch": 0.1253868706814951, "grad_norm": 3.015625, "learning_rate": 4.8164010740210176e-05, "loss": 0.8291, "step": 8842 }, { "epoch": 0.1254152323351213, "grad_norm": 2.9375, "learning_rate": 4.816317023995981e-05, "loss": 0.8592, "step": 8844 }, { "epoch": 0.12544359398874752, "grad_norm": 2.90625, "learning_rate": 4.816232955470366e-05, "loss": 0.8897, "step": 8846 }, { "epoch": 0.12547195564237373, "grad_norm": 3.265625, "learning_rate": 4.816148868444844e-05, "loss": 0.8004, "step": 8848 }, { "epoch": 0.12550031729599995, "grad_norm": 2.90625, "learning_rate": 4.8160647629200864e-05, "loss": 0.8337, "step": 8850 }, { "epoch": 0.12552867894962616, "grad_norm": 3.578125, "learning_rate": 4.815980638896765e-05, "loss": 0.8617, "step": 8852 }, { "epoch": 0.12555704060325237, "grad_norm": 3.0625, "learning_rate": 4.8158964963755516e-05, "loss": 0.8675, "step": 8854 }, { "epoch": 0.1255854022568786, "grad_norm": 3.03125, "learning_rate": 4.815812335357118e-05, "loss": 0.8829, "step": 8856 }, { "epoch": 0.1256137639105048, "grad_norm": 3.3125, "learning_rate": 4.815728155842138e-05, "loss": 0.8577, "step": 8858 }, { "epoch": 0.125642125564131, "grad_norm": 3.4375, "learning_rate": 4.815643957831282e-05, "loss": 0.8712, "step": 8860 }, { "epoch": 0.12567048721775723, "grad_norm": 3.359375, "learning_rate": 4.815559741325223e-05, "loss": 0.8786, "step": 8862 }, { "epoch": 0.12569884887138344, "grad_norm": 3.203125, "learning_rate": 4.815475506324635e-05, "loss": 0.8536, "step": 8864 }, { "epoch": 0.12572721052500965, "grad_norm": 2.953125, "learning_rate": 4.815391252830189e-05, "loss": 0.8682, "step": 8866 }, { "epoch": 0.12575557217863587, "grad_norm": 3.4375, "learning_rate": 4.8153069808425585e-05, "loss": 0.8796, "step": 8868 }, { "epoch": 0.12578393383226208, "grad_norm": 2.765625, "learning_rate": 4.815222690362417e-05, "loss": 0.8347, "step": 8870 }, { "epoch": 0.1258122954858883, "grad_norm": 2.890625, "learning_rate": 4.815138381390437e-05, "loss": 0.8665, "step": 8872 }, { "epoch": 0.1258406571395145, "grad_norm": 3.296875, "learning_rate": 4.8150540539272927e-05, "loss": 0.8459, "step": 8874 }, { "epoch": 0.12586901879314075, "grad_norm": 3.234375, "learning_rate": 4.814969707973657e-05, "loss": 0.8658, "step": 8876 }, { "epoch": 0.12589738044676696, "grad_norm": 2.890625, "learning_rate": 4.8148853435302036e-05, "loss": 0.8425, "step": 8878 }, { "epoch": 0.12592574210039317, "grad_norm": 3.21875, "learning_rate": 4.8148009605976076e-05, "loss": 0.8801, "step": 8880 }, { "epoch": 0.1259541037540194, "grad_norm": 3.65625, "learning_rate": 4.814716559176541e-05, "loss": 0.9048, "step": 8882 }, { "epoch": 0.1259824654076456, "grad_norm": 3.03125, "learning_rate": 4.8146321392676784e-05, "loss": 0.8931, "step": 8884 }, { "epoch": 0.1260108270612718, "grad_norm": 3.765625, "learning_rate": 4.814547700871696e-05, "loss": 0.8837, "step": 8886 }, { "epoch": 0.12603918871489803, "grad_norm": 2.984375, "learning_rate": 4.814463243989266e-05, "loss": 0.8589, "step": 8888 }, { "epoch": 0.12606755036852424, "grad_norm": 3.25, "learning_rate": 4.814378768621063e-05, "loss": 0.9094, "step": 8890 }, { "epoch": 0.12609591202215045, "grad_norm": 3.3125, "learning_rate": 4.8142942747677634e-05, "loss": 0.883, "step": 8892 }, { "epoch": 0.12612427367577667, "grad_norm": 3.109375, "learning_rate": 4.814209762430041e-05, "loss": 0.8434, "step": 8894 }, { "epoch": 0.12615263532940288, "grad_norm": 3.1875, "learning_rate": 4.814125231608571e-05, "loss": 0.8382, "step": 8896 }, { "epoch": 0.1261809969830291, "grad_norm": 3.296875, "learning_rate": 4.814040682304027e-05, "loss": 0.9266, "step": 8898 }, { "epoch": 0.1262093586366553, "grad_norm": 3.171875, "learning_rate": 4.813956114517087e-05, "loss": 0.8859, "step": 8900 }, { "epoch": 0.12623772029028152, "grad_norm": 3.046875, "learning_rate": 4.813871528248425e-05, "loss": 0.8471, "step": 8902 }, { "epoch": 0.12626608194390773, "grad_norm": 2.953125, "learning_rate": 4.813786923498717e-05, "loss": 0.7952, "step": 8904 }, { "epoch": 0.12629444359753395, "grad_norm": 3.1875, "learning_rate": 4.8137023002686375e-05, "loss": 0.8673, "step": 8906 }, { "epoch": 0.12632280525116016, "grad_norm": 3.046875, "learning_rate": 4.813617658558864e-05, "loss": 0.8803, "step": 8908 }, { "epoch": 0.12635116690478637, "grad_norm": 2.671875, "learning_rate": 4.813532998370072e-05, "loss": 0.8262, "step": 8910 }, { "epoch": 0.12637952855841258, "grad_norm": 3.0625, "learning_rate": 4.8134483197029376e-05, "loss": 0.845, "step": 8912 }, { "epoch": 0.12640789021203883, "grad_norm": 2.90625, "learning_rate": 4.8133636225581375e-05, "loss": 0.8407, "step": 8914 }, { "epoch": 0.12643625186566504, "grad_norm": 3.09375, "learning_rate": 4.813278906936348e-05, "loss": 0.8272, "step": 8916 }, { "epoch": 0.12646461351929125, "grad_norm": 3.421875, "learning_rate": 4.813194172838245e-05, "loss": 0.8921, "step": 8918 }, { "epoch": 0.12649297517291747, "grad_norm": 2.859375, "learning_rate": 4.813109420264506e-05, "loss": 0.8778, "step": 8920 }, { "epoch": 0.12652133682654368, "grad_norm": 2.984375, "learning_rate": 4.813024649215807e-05, "loss": 0.8918, "step": 8922 }, { "epoch": 0.1265496984801699, "grad_norm": 2.625, "learning_rate": 4.812939859692826e-05, "loss": 0.8372, "step": 8924 }, { "epoch": 0.1265780601337961, "grad_norm": 3.1875, "learning_rate": 4.812855051696241e-05, "loss": 0.8307, "step": 8926 }, { "epoch": 0.12660642178742232, "grad_norm": 2.953125, "learning_rate": 4.8127702252267285e-05, "loss": 0.8263, "step": 8928 }, { "epoch": 0.12663478344104853, "grad_norm": 2.96875, "learning_rate": 4.8126853802849655e-05, "loss": 0.8785, "step": 8930 }, { "epoch": 0.12666314509467475, "grad_norm": 3.5, "learning_rate": 4.8126005168716305e-05, "loss": 0.8976, "step": 8932 }, { "epoch": 0.12669150674830096, "grad_norm": 3.234375, "learning_rate": 4.8125156349874004e-05, "loss": 0.8982, "step": 8934 }, { "epoch": 0.12671986840192717, "grad_norm": 3.234375, "learning_rate": 4.812430734632954e-05, "loss": 0.8745, "step": 8936 }, { "epoch": 0.12674823005555338, "grad_norm": 3.109375, "learning_rate": 4.8123458158089687e-05, "loss": 0.8837, "step": 8938 }, { "epoch": 0.1267765917091796, "grad_norm": 3.203125, "learning_rate": 4.812260878516124e-05, "loss": 0.8361, "step": 8940 }, { "epoch": 0.1268049533628058, "grad_norm": 3.609375, "learning_rate": 4.812175922755096e-05, "loss": 0.8675, "step": 8942 }, { "epoch": 0.12683331501643202, "grad_norm": 2.734375, "learning_rate": 4.8120909485265657e-05, "loss": 0.8505, "step": 8944 }, { "epoch": 0.12686167667005824, "grad_norm": 2.890625, "learning_rate": 4.8120059558312106e-05, "loss": 0.8447, "step": 8946 }, { "epoch": 0.12689003832368445, "grad_norm": 2.9375, "learning_rate": 4.81192094466971e-05, "loss": 0.8789, "step": 8948 }, { "epoch": 0.12691839997731066, "grad_norm": 3.15625, "learning_rate": 4.811835915042742e-05, "loss": 0.8275, "step": 8950 }, { "epoch": 0.1269467616309369, "grad_norm": 3.40625, "learning_rate": 4.811750866950986e-05, "loss": 0.8483, "step": 8952 }, { "epoch": 0.12697512328456312, "grad_norm": 3.015625, "learning_rate": 4.811665800395123e-05, "loss": 0.8461, "step": 8954 }, { "epoch": 0.12700348493818933, "grad_norm": 2.9375, "learning_rate": 4.81158071537583e-05, "loss": 0.8788, "step": 8956 }, { "epoch": 0.12703184659181554, "grad_norm": 3.015625, "learning_rate": 4.811495611893788e-05, "loss": 0.8773, "step": 8958 }, { "epoch": 0.12706020824544176, "grad_norm": 2.90625, "learning_rate": 4.8114104899496764e-05, "loss": 0.853, "step": 8960 }, { "epoch": 0.12708856989906797, "grad_norm": 3.28125, "learning_rate": 4.8113253495441745e-05, "loss": 0.8581, "step": 8962 }, { "epoch": 0.12711693155269418, "grad_norm": 3.015625, "learning_rate": 4.8112401906779636e-05, "loss": 0.8587, "step": 8964 }, { "epoch": 0.1271452932063204, "grad_norm": 3.484375, "learning_rate": 4.811155013351723e-05, "loss": 0.8961, "step": 8966 }, { "epoch": 0.1271736548599466, "grad_norm": 3.15625, "learning_rate": 4.8110698175661326e-05, "loss": 0.9038, "step": 8968 }, { "epoch": 0.12720201651357282, "grad_norm": 3.234375, "learning_rate": 4.810984603321874e-05, "loss": 0.8236, "step": 8970 }, { "epoch": 0.12723037816719904, "grad_norm": 2.703125, "learning_rate": 4.810899370619627e-05, "loss": 0.8669, "step": 8972 }, { "epoch": 0.12725873982082525, "grad_norm": 3.296875, "learning_rate": 4.810814119460073e-05, "loss": 0.8748, "step": 8974 }, { "epoch": 0.12728710147445146, "grad_norm": 3.125, "learning_rate": 4.8107288498438916e-05, "loss": 0.8738, "step": 8976 }, { "epoch": 0.12731546312807768, "grad_norm": 3.203125, "learning_rate": 4.810643561771766e-05, "loss": 0.8775, "step": 8978 }, { "epoch": 0.1273438247817039, "grad_norm": 3.609375, "learning_rate": 4.810558255244375e-05, "loss": 0.8725, "step": 8980 }, { "epoch": 0.1273721864353301, "grad_norm": 2.9375, "learning_rate": 4.810472930262402e-05, "loss": 0.8497, "step": 8982 }, { "epoch": 0.12740054808895632, "grad_norm": 2.9375, "learning_rate": 4.810387586826527e-05, "loss": 0.9181, "step": 8984 }, { "epoch": 0.12742890974258253, "grad_norm": 2.8125, "learning_rate": 4.810302224937432e-05, "loss": 0.8439, "step": 8986 }, { "epoch": 0.12745727139620874, "grad_norm": 2.9375, "learning_rate": 4.8102168445957996e-05, "loss": 0.808, "step": 8988 }, { "epoch": 0.12748563304983498, "grad_norm": 3.25, "learning_rate": 4.810131445802312e-05, "loss": 0.8537, "step": 8990 }, { "epoch": 0.1275139947034612, "grad_norm": 3.21875, "learning_rate": 4.810046028557649e-05, "loss": 0.8944, "step": 8992 }, { "epoch": 0.1275423563570874, "grad_norm": 3.28125, "learning_rate": 4.8099605928624944e-05, "loss": 0.8728, "step": 8994 }, { "epoch": 0.12757071801071362, "grad_norm": 3.09375, "learning_rate": 4.809875138717531e-05, "loss": 0.9011, "step": 8996 }, { "epoch": 0.12759907966433984, "grad_norm": 3.171875, "learning_rate": 4.80978966612344e-05, "loss": 0.8736, "step": 8998 }, { "epoch": 0.12762744131796605, "grad_norm": 2.90625, "learning_rate": 4.809704175080906e-05, "loss": 0.8695, "step": 9000 }, { "epoch": 0.12765580297159226, "grad_norm": 3.296875, "learning_rate": 4.80961866559061e-05, "loss": 0.8884, "step": 9002 }, { "epoch": 0.12768416462521848, "grad_norm": 2.875, "learning_rate": 4.809533137653236e-05, "loss": 0.8581, "step": 9004 }, { "epoch": 0.1277125262788447, "grad_norm": 2.921875, "learning_rate": 4.809447591269467e-05, "loss": 0.8966, "step": 9006 }, { "epoch": 0.1277408879324709, "grad_norm": 2.96875, "learning_rate": 4.8093620264399855e-05, "loss": 0.8069, "step": 9008 }, { "epoch": 0.12776924958609712, "grad_norm": 3.3125, "learning_rate": 4.809276443165475e-05, "loss": 0.8908, "step": 9010 }, { "epoch": 0.12779761123972333, "grad_norm": 3.078125, "learning_rate": 4.8091908414466206e-05, "loss": 0.9391, "step": 9012 }, { "epoch": 0.12782597289334954, "grad_norm": 3.1875, "learning_rate": 4.809105221284105e-05, "loss": 0.8821, "step": 9014 }, { "epoch": 0.12785433454697576, "grad_norm": 2.984375, "learning_rate": 4.809019582678611e-05, "loss": 0.8484, "step": 9016 }, { "epoch": 0.12788269620060197, "grad_norm": 3.234375, "learning_rate": 4.808933925630824e-05, "loss": 0.8844, "step": 9018 }, { "epoch": 0.12791105785422818, "grad_norm": 2.6875, "learning_rate": 4.8088482501414275e-05, "loss": 0.8529, "step": 9020 }, { "epoch": 0.1279394195078544, "grad_norm": 2.71875, "learning_rate": 4.808762556211106e-05, "loss": 0.8581, "step": 9022 }, { "epoch": 0.1279677811614806, "grad_norm": 3.015625, "learning_rate": 4.808676843840545e-05, "loss": 0.8596, "step": 9024 }, { "epoch": 0.12799614281510682, "grad_norm": 3.171875, "learning_rate": 4.808591113030427e-05, "loss": 0.8472, "step": 9026 }, { "epoch": 0.12802450446873304, "grad_norm": 2.96875, "learning_rate": 4.808505363781438e-05, "loss": 0.8094, "step": 9028 }, { "epoch": 0.12805286612235928, "grad_norm": 3.5, "learning_rate": 4.8084195960942625e-05, "loss": 0.8465, "step": 9030 }, { "epoch": 0.1280812277759855, "grad_norm": 3.140625, "learning_rate": 4.8083338099695864e-05, "loss": 0.8526, "step": 9032 }, { "epoch": 0.1281095894296117, "grad_norm": 3.21875, "learning_rate": 4.808248005408094e-05, "loss": 0.8141, "step": 9034 }, { "epoch": 0.12813795108323792, "grad_norm": 3.265625, "learning_rate": 4.808162182410471e-05, "loss": 0.8798, "step": 9036 }, { "epoch": 0.12816631273686413, "grad_norm": 3.34375, "learning_rate": 4.8080763409774016e-05, "loss": 0.8588, "step": 9038 }, { "epoch": 0.12819467439049034, "grad_norm": 3.015625, "learning_rate": 4.8079904811095736e-05, "loss": 0.8075, "step": 9040 }, { "epoch": 0.12822303604411656, "grad_norm": 3.3125, "learning_rate": 4.807904602807671e-05, "loss": 0.8171, "step": 9042 }, { "epoch": 0.12825139769774277, "grad_norm": 2.984375, "learning_rate": 4.8078187060723807e-05, "loss": 0.8923, "step": 9044 }, { "epoch": 0.12827975935136898, "grad_norm": 3.140625, "learning_rate": 4.807732790904388e-05, "loss": 0.8604, "step": 9046 }, { "epoch": 0.1283081210049952, "grad_norm": 3.171875, "learning_rate": 4.80764685730438e-05, "loss": 0.8598, "step": 9048 }, { "epoch": 0.1283364826586214, "grad_norm": 3.171875, "learning_rate": 4.807560905273043e-05, "loss": 0.8373, "step": 9050 }, { "epoch": 0.12836484431224762, "grad_norm": 2.78125, "learning_rate": 4.807474934811063e-05, "loss": 0.8712, "step": 9052 }, { "epoch": 0.12839320596587384, "grad_norm": 2.875, "learning_rate": 4.807388945919127e-05, "loss": 0.8161, "step": 9054 }, { "epoch": 0.12842156761950005, "grad_norm": 3.03125, "learning_rate": 4.8073029385979204e-05, "loss": 0.8262, "step": 9056 }, { "epoch": 0.12844992927312626, "grad_norm": 3.265625, "learning_rate": 4.8072169128481324e-05, "loss": 0.9214, "step": 9058 }, { "epoch": 0.12847829092675248, "grad_norm": 3.078125, "learning_rate": 4.807130868670449e-05, "loss": 0.8353, "step": 9060 }, { "epoch": 0.1285066525803787, "grad_norm": 3.25, "learning_rate": 4.807044806065557e-05, "loss": 0.8481, "step": 9062 }, { "epoch": 0.1285350142340049, "grad_norm": 3.0625, "learning_rate": 4.8069587250341444e-05, "loss": 0.8691, "step": 9064 }, { "epoch": 0.12856337588763112, "grad_norm": 3.15625, "learning_rate": 4.806872625576898e-05, "loss": 0.8491, "step": 9066 }, { "epoch": 0.12859173754125736, "grad_norm": 3.03125, "learning_rate": 4.8067865076945075e-05, "loss": 0.8643, "step": 9068 }, { "epoch": 0.12862009919488357, "grad_norm": 3.28125, "learning_rate": 4.806700371387658e-05, "loss": 0.878, "step": 9070 }, { "epoch": 0.12864846084850978, "grad_norm": 3.125, "learning_rate": 4.8066142166570397e-05, "loss": 0.956, "step": 9072 }, { "epoch": 0.128676822502136, "grad_norm": 3.1875, "learning_rate": 4.806528043503339e-05, "loss": 0.8875, "step": 9074 }, { "epoch": 0.1287051841557622, "grad_norm": 3.3125, "learning_rate": 4.806441851927245e-05, "loss": 0.8668, "step": 9076 }, { "epoch": 0.12873354580938842, "grad_norm": 3.28125, "learning_rate": 4.8063556419294466e-05, "loss": 0.8465, "step": 9078 }, { "epoch": 0.12876190746301464, "grad_norm": 3.265625, "learning_rate": 4.8062694135106316e-05, "loss": 0.8528, "step": 9080 }, { "epoch": 0.12879026911664085, "grad_norm": 2.796875, "learning_rate": 4.806183166671489e-05, "loss": 0.8483, "step": 9082 }, { "epoch": 0.12881863077026706, "grad_norm": 3.890625, "learning_rate": 4.8060969014127074e-05, "loss": 0.8898, "step": 9084 }, { "epoch": 0.12884699242389328, "grad_norm": 3.28125, "learning_rate": 4.806010617734977e-05, "loss": 0.9074, "step": 9086 }, { "epoch": 0.1288753540775195, "grad_norm": 3.0, "learning_rate": 4.805924315638985e-05, "loss": 0.8672, "step": 9088 }, { "epoch": 0.1289037157311457, "grad_norm": 2.5625, "learning_rate": 4.8058379951254215e-05, "loss": 0.8655, "step": 9090 }, { "epoch": 0.12893207738477191, "grad_norm": 2.96875, "learning_rate": 4.805751656194977e-05, "loss": 0.8731, "step": 9092 }, { "epoch": 0.12896043903839813, "grad_norm": 3.140625, "learning_rate": 4.8056652988483396e-05, "loss": 0.816, "step": 9094 }, { "epoch": 0.12898880069202434, "grad_norm": 3.1875, "learning_rate": 4.8055789230862006e-05, "loss": 0.8694, "step": 9096 }, { "epoch": 0.12901716234565055, "grad_norm": 3.34375, "learning_rate": 4.8054925289092475e-05, "loss": 0.8725, "step": 9098 }, { "epoch": 0.12904552399927677, "grad_norm": 3.0, "learning_rate": 4.805406116318173e-05, "loss": 0.9176, "step": 9100 }, { "epoch": 0.12907388565290298, "grad_norm": 2.984375, "learning_rate": 4.805319685313666e-05, "loss": 0.8544, "step": 9102 }, { "epoch": 0.1291022473065292, "grad_norm": 3.171875, "learning_rate": 4.8052332358964155e-05, "loss": 0.8694, "step": 9104 }, { "epoch": 0.12913060896015544, "grad_norm": 2.90625, "learning_rate": 4.8051467680671145e-05, "loss": 0.8471, "step": 9106 }, { "epoch": 0.12915897061378165, "grad_norm": 3.21875, "learning_rate": 4.8050602818264526e-05, "loss": 0.8667, "step": 9108 }, { "epoch": 0.12918733226740786, "grad_norm": 3.078125, "learning_rate": 4.80497377717512e-05, "loss": 0.8992, "step": 9110 }, { "epoch": 0.12921569392103407, "grad_norm": 3.21875, "learning_rate": 4.804887254113809e-05, "loss": 0.8551, "step": 9112 }, { "epoch": 0.1292440555746603, "grad_norm": 2.96875, "learning_rate": 4.804800712643209e-05, "loss": 0.8483, "step": 9114 }, { "epoch": 0.1292724172282865, "grad_norm": 3.03125, "learning_rate": 4.804714152764012e-05, "loss": 0.8873, "step": 9116 }, { "epoch": 0.12930077888191271, "grad_norm": 3.703125, "learning_rate": 4.80462757447691e-05, "loss": 0.8553, "step": 9118 }, { "epoch": 0.12932914053553893, "grad_norm": 3.125, "learning_rate": 4.8045409777825934e-05, "loss": 0.8563, "step": 9120 }, { "epoch": 0.12935750218916514, "grad_norm": 2.8125, "learning_rate": 4.804454362681754e-05, "loss": 0.8281, "step": 9122 }, { "epoch": 0.12938586384279135, "grad_norm": 3.15625, "learning_rate": 4.8043677291750835e-05, "loss": 0.8523, "step": 9124 }, { "epoch": 0.12941422549641757, "grad_norm": 3.078125, "learning_rate": 4.804281077263275e-05, "loss": 0.8406, "step": 9126 }, { "epoch": 0.12944258715004378, "grad_norm": 3.28125, "learning_rate": 4.80419440694702e-05, "loss": 0.8729, "step": 9128 }, { "epoch": 0.12947094880367, "grad_norm": 3.09375, "learning_rate": 4.80410771822701e-05, "loss": 0.8524, "step": 9130 }, { "epoch": 0.1294993104572962, "grad_norm": 2.65625, "learning_rate": 4.804021011103939e-05, "loss": 0.8637, "step": 9132 }, { "epoch": 0.12952767211092242, "grad_norm": 3.109375, "learning_rate": 4.8039342855784974e-05, "loss": 0.849, "step": 9134 }, { "epoch": 0.12955603376454863, "grad_norm": 3.171875, "learning_rate": 4.80384754165138e-05, "loss": 0.8894, "step": 9136 }, { "epoch": 0.12958439541817485, "grad_norm": 3.21875, "learning_rate": 4.803760779323278e-05, "loss": 0.8689, "step": 9138 }, { "epoch": 0.12961275707180106, "grad_norm": 2.8125, "learning_rate": 4.8036739985948845e-05, "loss": 0.8518, "step": 9140 }, { "epoch": 0.12964111872542727, "grad_norm": 3.234375, "learning_rate": 4.803587199466893e-05, "loss": 0.9258, "step": 9142 }, { "epoch": 0.12966948037905351, "grad_norm": 2.96875, "learning_rate": 4.803500381939997e-05, "loss": 0.899, "step": 9144 }, { "epoch": 0.12969784203267973, "grad_norm": 3.25, "learning_rate": 4.803413546014891e-05, "loss": 0.8846, "step": 9146 }, { "epoch": 0.12972620368630594, "grad_norm": 3.21875, "learning_rate": 4.8033266916922666e-05, "loss": 0.8986, "step": 9148 }, { "epoch": 0.12975456533993215, "grad_norm": 2.921875, "learning_rate": 4.8032398189728176e-05, "loss": 0.8403, "step": 9150 }, { "epoch": 0.12978292699355837, "grad_norm": 3.078125, "learning_rate": 4.8031529278572394e-05, "loss": 0.8755, "step": 9152 }, { "epoch": 0.12981128864718458, "grad_norm": 3.109375, "learning_rate": 4.803066018346225e-05, "loss": 0.8886, "step": 9154 }, { "epoch": 0.1298396503008108, "grad_norm": 2.984375, "learning_rate": 4.802979090440469e-05, "loss": 0.83, "step": 9156 }, { "epoch": 0.129868011954437, "grad_norm": 3.0, "learning_rate": 4.802892144140665e-05, "loss": 0.8827, "step": 9158 }, { "epoch": 0.12989637360806322, "grad_norm": 3.03125, "learning_rate": 4.802805179447508e-05, "loss": 0.8363, "step": 9160 }, { "epoch": 0.12992473526168943, "grad_norm": 3.046875, "learning_rate": 4.802718196361692e-05, "loss": 0.9085, "step": 9162 }, { "epoch": 0.12995309691531565, "grad_norm": 2.953125, "learning_rate": 4.802631194883913e-05, "loss": 0.925, "step": 9164 }, { "epoch": 0.12998145856894186, "grad_norm": 3.28125, "learning_rate": 4.802544175014865e-05, "loss": 0.8595, "step": 9166 }, { "epoch": 0.13000982022256807, "grad_norm": 3.140625, "learning_rate": 4.802457136755242e-05, "loss": 0.8998, "step": 9168 }, { "epoch": 0.1300381818761943, "grad_norm": 3.125, "learning_rate": 4.802370080105741e-05, "loss": 0.8732, "step": 9170 }, { "epoch": 0.1300665435298205, "grad_norm": 3.1875, "learning_rate": 4.802283005067057e-05, "loss": 0.848, "step": 9172 }, { "epoch": 0.1300949051834467, "grad_norm": 3.171875, "learning_rate": 4.802195911639884e-05, "loss": 0.8926, "step": 9174 }, { "epoch": 0.13012326683707293, "grad_norm": 3.625, "learning_rate": 4.802108799824919e-05, "loss": 0.8575, "step": 9176 }, { "epoch": 0.13015162849069914, "grad_norm": 3.125, "learning_rate": 4.802021669622858e-05, "loss": 0.855, "step": 9178 }, { "epoch": 0.13017999014432535, "grad_norm": 3.515625, "learning_rate": 4.801934521034396e-05, "loss": 0.8964, "step": 9180 }, { "epoch": 0.13020835179795157, "grad_norm": 2.984375, "learning_rate": 4.80184735406023e-05, "loss": 0.8436, "step": 9182 }, { "epoch": 0.1302367134515778, "grad_norm": 3.046875, "learning_rate": 4.801760168701055e-05, "loss": 0.7832, "step": 9184 }, { "epoch": 0.13026507510520402, "grad_norm": 3.0625, "learning_rate": 4.8016729649575686e-05, "loss": 0.8683, "step": 9186 }, { "epoch": 0.13029343675883023, "grad_norm": 2.84375, "learning_rate": 4.801585742830466e-05, "loss": 0.8638, "step": 9188 }, { "epoch": 0.13032179841245645, "grad_norm": 3.1875, "learning_rate": 4.801498502320445e-05, "loss": 0.8526, "step": 9190 }, { "epoch": 0.13035016006608266, "grad_norm": 3.125, "learning_rate": 4.801411243428202e-05, "loss": 0.8005, "step": 9192 }, { "epoch": 0.13037852171970887, "grad_norm": 3.125, "learning_rate": 4.801323966154434e-05, "loss": 0.874, "step": 9194 }, { "epoch": 0.1304068833733351, "grad_norm": 3.125, "learning_rate": 4.801236670499837e-05, "loss": 0.8263, "step": 9196 }, { "epoch": 0.1304352450269613, "grad_norm": 3.125, "learning_rate": 4.80114935646511e-05, "loss": 0.8854, "step": 9198 }, { "epoch": 0.1304636066805875, "grad_norm": 2.8125, "learning_rate": 4.8010620240509495e-05, "loss": 0.8502, "step": 9200 }, { "epoch": 0.13049196833421373, "grad_norm": 3.140625, "learning_rate": 4.800974673258054e-05, "loss": 0.8639, "step": 9202 }, { "epoch": 0.13052032998783994, "grad_norm": 3.546875, "learning_rate": 4.8008873040871196e-05, "loss": 0.8744, "step": 9204 }, { "epoch": 0.13054869164146615, "grad_norm": 2.953125, "learning_rate": 4.800799916538844e-05, "loss": 0.8234, "step": 9206 }, { "epoch": 0.13057705329509237, "grad_norm": 2.75, "learning_rate": 4.800712510613927e-05, "loss": 0.7832, "step": 9208 }, { "epoch": 0.13060541494871858, "grad_norm": 3.015625, "learning_rate": 4.800625086313065e-05, "loss": 0.8322, "step": 9210 }, { "epoch": 0.1306337766023448, "grad_norm": 2.828125, "learning_rate": 4.8005376436369576e-05, "loss": 0.8051, "step": 9212 }, { "epoch": 0.130662138255971, "grad_norm": 2.984375, "learning_rate": 4.8004501825863026e-05, "loss": 0.8336, "step": 9214 }, { "epoch": 0.13069049990959722, "grad_norm": 3.34375, "learning_rate": 4.800362703161798e-05, "loss": 0.8402, "step": 9216 }, { "epoch": 0.13071886156322343, "grad_norm": 3.859375, "learning_rate": 4.800275205364144e-05, "loss": 0.8642, "step": 9218 }, { "epoch": 0.13074722321684965, "grad_norm": 2.875, "learning_rate": 4.800187689194038e-05, "loss": 0.8994, "step": 9220 }, { "epoch": 0.13077558487047589, "grad_norm": 3.109375, "learning_rate": 4.80010015465218e-05, "loss": 0.8558, "step": 9222 }, { "epoch": 0.1308039465241021, "grad_norm": 3.09375, "learning_rate": 4.8000126017392676e-05, "loss": 0.865, "step": 9224 }, { "epoch": 0.1308323081777283, "grad_norm": 3.265625, "learning_rate": 4.7999250304560017e-05, "loss": 0.8295, "step": 9226 }, { "epoch": 0.13086066983135453, "grad_norm": 3.046875, "learning_rate": 4.7998374408030814e-05, "loss": 0.8605, "step": 9228 }, { "epoch": 0.13088903148498074, "grad_norm": 2.9375, "learning_rate": 4.7997498327812054e-05, "loss": 0.877, "step": 9230 }, { "epoch": 0.13091739313860695, "grad_norm": 3.0, "learning_rate": 4.7996622063910744e-05, "loss": 0.8349, "step": 9232 }, { "epoch": 0.13094575479223317, "grad_norm": 2.84375, "learning_rate": 4.7995745616333876e-05, "loss": 0.8846, "step": 9234 }, { "epoch": 0.13097411644585938, "grad_norm": 3.484375, "learning_rate": 4.799486898508846e-05, "loss": 0.877, "step": 9236 }, { "epoch": 0.1310024780994856, "grad_norm": 2.921875, "learning_rate": 4.799399217018149e-05, "loss": 0.8521, "step": 9238 }, { "epoch": 0.1310308397531118, "grad_norm": 3.359375, "learning_rate": 4.7993115171619975e-05, "loss": 0.8627, "step": 9240 }, { "epoch": 0.13105920140673802, "grad_norm": 3.15625, "learning_rate": 4.7992237989410904e-05, "loss": 0.8441, "step": 9242 }, { "epoch": 0.13108756306036423, "grad_norm": 3.0, "learning_rate": 4.79913606235613e-05, "loss": 0.8465, "step": 9244 }, { "epoch": 0.13111592471399044, "grad_norm": 3.171875, "learning_rate": 4.7990483074078175e-05, "loss": 0.8582, "step": 9246 }, { "epoch": 0.13114428636761666, "grad_norm": 3.28125, "learning_rate": 4.798960534096851e-05, "loss": 0.916, "step": 9248 }, { "epoch": 0.13117264802124287, "grad_norm": 3.15625, "learning_rate": 4.798872742423935e-05, "loss": 0.8798, "step": 9250 }, { "epoch": 0.13120100967486908, "grad_norm": 3.328125, "learning_rate": 4.798784932389768e-05, "loss": 0.8598, "step": 9252 }, { "epoch": 0.1312293713284953, "grad_norm": 3.359375, "learning_rate": 4.798697103995053e-05, "loss": 0.8761, "step": 9254 }, { "epoch": 0.1312577329821215, "grad_norm": 3.09375, "learning_rate": 4.7986092572404906e-05, "loss": 0.8479, "step": 9256 }, { "epoch": 0.13128609463574772, "grad_norm": 2.9375, "learning_rate": 4.7985213921267834e-05, "loss": 0.8668, "step": 9258 }, { "epoch": 0.13131445628937397, "grad_norm": 2.96875, "learning_rate": 4.798433508654632e-05, "loss": 0.8423, "step": 9260 }, { "epoch": 0.13134281794300018, "grad_norm": 3.265625, "learning_rate": 4.798345606824739e-05, "loss": 0.8759, "step": 9262 }, { "epoch": 0.1313711795966264, "grad_norm": 2.921875, "learning_rate": 4.798257686637806e-05, "loss": 0.8022, "step": 9264 }, { "epoch": 0.1313995412502526, "grad_norm": 3.203125, "learning_rate": 4.798169748094536e-05, "loss": 0.8389, "step": 9266 }, { "epoch": 0.13142790290387882, "grad_norm": 2.90625, "learning_rate": 4.798081791195631e-05, "loss": 0.841, "step": 9268 }, { "epoch": 0.13145626455750503, "grad_norm": 3.03125, "learning_rate": 4.7979938159417925e-05, "loss": 0.7989, "step": 9270 }, { "epoch": 0.13148462621113124, "grad_norm": 3.46875, "learning_rate": 4.7979058223337246e-05, "loss": 0.8867, "step": 9272 }, { "epoch": 0.13151298786475746, "grad_norm": 2.9375, "learning_rate": 4.7978178103721306e-05, "loss": 0.8614, "step": 9274 }, { "epoch": 0.13154134951838367, "grad_norm": 2.859375, "learning_rate": 4.797729780057712e-05, "loss": 0.8182, "step": 9276 }, { "epoch": 0.13156971117200988, "grad_norm": 3.171875, "learning_rate": 4.7976417313911715e-05, "loss": 0.8683, "step": 9278 }, { "epoch": 0.1315980728256361, "grad_norm": 3.390625, "learning_rate": 4.797553664373213e-05, "loss": 0.8519, "step": 9280 }, { "epoch": 0.1316264344792623, "grad_norm": 3.140625, "learning_rate": 4.797465579004542e-05, "loss": 0.9274, "step": 9282 }, { "epoch": 0.13165479613288852, "grad_norm": 3.25, "learning_rate": 4.797377475285859e-05, "loss": 0.8448, "step": 9284 }, { "epoch": 0.13168315778651474, "grad_norm": 2.890625, "learning_rate": 4.797289353217869e-05, "loss": 0.8573, "step": 9286 }, { "epoch": 0.13171151944014095, "grad_norm": 3.0625, "learning_rate": 4.7972012128012754e-05, "loss": 0.8346, "step": 9288 }, { "epoch": 0.13173988109376716, "grad_norm": 3.078125, "learning_rate": 4.797113054036783e-05, "loss": 0.8266, "step": 9290 }, { "epoch": 0.13176824274739338, "grad_norm": 3.0, "learning_rate": 4.797024876925095e-05, "loss": 0.8027, "step": 9292 }, { "epoch": 0.1317966044010196, "grad_norm": 2.765625, "learning_rate": 4.796936681466916e-05, "loss": 0.8075, "step": 9294 }, { "epoch": 0.1318249660546458, "grad_norm": 3.25, "learning_rate": 4.796848467662951e-05, "loss": 0.8408, "step": 9296 }, { "epoch": 0.13185332770827204, "grad_norm": 3.296875, "learning_rate": 4.796760235513904e-05, "loss": 0.8735, "step": 9298 }, { "epoch": 0.13188168936189826, "grad_norm": 3.140625, "learning_rate": 4.79667198502048e-05, "loss": 0.8966, "step": 9300 }, { "epoch": 0.13191005101552447, "grad_norm": 2.875, "learning_rate": 4.796583716183383e-05, "loss": 0.8467, "step": 9302 }, { "epoch": 0.13193841266915068, "grad_norm": 3.265625, "learning_rate": 4.7964954290033195e-05, "loss": 0.8881, "step": 9304 }, { "epoch": 0.1319667743227769, "grad_norm": 3.21875, "learning_rate": 4.796407123480993e-05, "loss": 0.8007, "step": 9306 }, { "epoch": 0.1319951359764031, "grad_norm": 3.15625, "learning_rate": 4.79631879961711e-05, "loss": 0.8423, "step": 9308 }, { "epoch": 0.13202349763002932, "grad_norm": 2.890625, "learning_rate": 4.7962304574123754e-05, "loss": 0.8666, "step": 9310 }, { "epoch": 0.13205185928365554, "grad_norm": 3.28125, "learning_rate": 4.7961420968674955e-05, "loss": 0.8424, "step": 9312 }, { "epoch": 0.13208022093728175, "grad_norm": 2.640625, "learning_rate": 4.796053717983175e-05, "loss": 0.8447, "step": 9314 }, { "epoch": 0.13210858259090796, "grad_norm": 2.796875, "learning_rate": 4.795965320760121e-05, "loss": 0.8442, "step": 9316 }, { "epoch": 0.13213694424453418, "grad_norm": 3.0625, "learning_rate": 4.795876905199038e-05, "loss": 0.8652, "step": 9318 }, { "epoch": 0.1321653058981604, "grad_norm": 3.4375, "learning_rate": 4.7957884713006334e-05, "loss": 0.8523, "step": 9320 }, { "epoch": 0.1321936675517866, "grad_norm": 3.25, "learning_rate": 4.7957000190656134e-05, "loss": 0.7876, "step": 9322 }, { "epoch": 0.13222202920541282, "grad_norm": 3.015625, "learning_rate": 4.795611548494684e-05, "loss": 0.8601, "step": 9324 }, { "epoch": 0.13225039085903903, "grad_norm": 3.40625, "learning_rate": 4.795523059588552e-05, "loss": 0.9373, "step": 9326 }, { "epoch": 0.13227875251266524, "grad_norm": 3.125, "learning_rate": 4.7954345523479235e-05, "loss": 0.8288, "step": 9328 }, { "epoch": 0.13230711416629146, "grad_norm": 2.953125, "learning_rate": 4.795346026773507e-05, "loss": 0.8257, "step": 9330 }, { "epoch": 0.13233547581991767, "grad_norm": 3.296875, "learning_rate": 4.7952574828660086e-05, "loss": 0.8779, "step": 9332 }, { "epoch": 0.13236383747354388, "grad_norm": 3.171875, "learning_rate": 4.795168920626135e-05, "loss": 0.8499, "step": 9334 }, { "epoch": 0.1323921991271701, "grad_norm": 3.046875, "learning_rate": 4.795080340054595e-05, "loss": 0.9124, "step": 9336 }, { "epoch": 0.13242056078079634, "grad_norm": 3.171875, "learning_rate": 4.794991741152095e-05, "loss": 0.848, "step": 9338 }, { "epoch": 0.13244892243442255, "grad_norm": 3.25, "learning_rate": 4.794903123919343e-05, "loss": 0.8716, "step": 9340 }, { "epoch": 0.13247728408804876, "grad_norm": 3.125, "learning_rate": 4.794814488357046e-05, "loss": 0.8554, "step": 9342 }, { "epoch": 0.13250564574167498, "grad_norm": 2.984375, "learning_rate": 4.794725834465913e-05, "loss": 0.8326, "step": 9344 }, { "epoch": 0.1325340073953012, "grad_norm": 3.21875, "learning_rate": 4.794637162246652e-05, "loss": 0.8678, "step": 9346 }, { "epoch": 0.1325623690489274, "grad_norm": 3.21875, "learning_rate": 4.794548471699971e-05, "loss": 0.8293, "step": 9348 }, { "epoch": 0.13259073070255362, "grad_norm": 3.15625, "learning_rate": 4.794459762826578e-05, "loss": 0.8594, "step": 9350 }, { "epoch": 0.13261909235617983, "grad_norm": 3.265625, "learning_rate": 4.7943710356271816e-05, "loss": 0.9143, "step": 9352 }, { "epoch": 0.13264745400980604, "grad_norm": 3.1875, "learning_rate": 4.7942822901024906e-05, "loss": 0.8479, "step": 9354 }, { "epoch": 0.13267581566343226, "grad_norm": 2.96875, "learning_rate": 4.794193526253215e-05, "loss": 0.8305, "step": 9356 }, { "epoch": 0.13270417731705847, "grad_norm": 3.328125, "learning_rate": 4.794104744080061e-05, "loss": 0.8722, "step": 9358 }, { "epoch": 0.13273253897068468, "grad_norm": 3.140625, "learning_rate": 4.794015943583741e-05, "loss": 0.8982, "step": 9360 }, { "epoch": 0.1327609006243109, "grad_norm": 2.984375, "learning_rate": 4.793927124764962e-05, "loss": 0.837, "step": 9362 }, { "epoch": 0.1327892622779371, "grad_norm": 3.046875, "learning_rate": 4.7938382876244334e-05, "loss": 0.8604, "step": 9364 }, { "epoch": 0.13281762393156332, "grad_norm": 3.234375, "learning_rate": 4.793749432162866e-05, "loss": 0.8655, "step": 9366 }, { "epoch": 0.13284598558518954, "grad_norm": 2.90625, "learning_rate": 4.793660558380969e-05, "loss": 0.8463, "step": 9368 }, { "epoch": 0.13287434723881575, "grad_norm": 2.78125, "learning_rate": 4.793571666279452e-05, "loss": 0.835, "step": 9370 }, { "epoch": 0.13290270889244196, "grad_norm": 3.015625, "learning_rate": 4.793482755859026e-05, "loss": 0.8698, "step": 9372 }, { "epoch": 0.13293107054606818, "grad_norm": 3.0625, "learning_rate": 4.793393827120399e-05, "loss": 0.8619, "step": 9374 }, { "epoch": 0.13295943219969442, "grad_norm": 3.25, "learning_rate": 4.7933048800642824e-05, "loss": 0.8675, "step": 9376 }, { "epoch": 0.13298779385332063, "grad_norm": 2.84375, "learning_rate": 4.793215914691388e-05, "loss": 0.7908, "step": 9378 }, { "epoch": 0.13301615550694684, "grad_norm": 3.03125, "learning_rate": 4.793126931002424e-05, "loss": 0.8718, "step": 9380 }, { "epoch": 0.13304451716057306, "grad_norm": 3.09375, "learning_rate": 4.793037928998103e-05, "loss": 0.8544, "step": 9382 }, { "epoch": 0.13307287881419927, "grad_norm": 3.234375, "learning_rate": 4.7929489086791345e-05, "loss": 0.8418, "step": 9384 }, { "epoch": 0.13310124046782548, "grad_norm": 3.109375, "learning_rate": 4.79285987004623e-05, "loss": 0.8832, "step": 9386 }, { "epoch": 0.1331296021214517, "grad_norm": 3.171875, "learning_rate": 4.792770813100101e-05, "loss": 0.8773, "step": 9388 }, { "epoch": 0.1331579637750779, "grad_norm": 2.96875, "learning_rate": 4.792681737841459e-05, "loss": 0.8121, "step": 9390 }, { "epoch": 0.13318632542870412, "grad_norm": 3.03125, "learning_rate": 4.792592644271015e-05, "loss": 0.8542, "step": 9392 }, { "epoch": 0.13321468708233034, "grad_norm": 2.8125, "learning_rate": 4.792503532389481e-05, "loss": 0.8474, "step": 9394 }, { "epoch": 0.13324304873595655, "grad_norm": 3.234375, "learning_rate": 4.792414402197568e-05, "loss": 0.887, "step": 9396 }, { "epoch": 0.13327141038958276, "grad_norm": 3.421875, "learning_rate": 4.792325253695987e-05, "loss": 0.8668, "step": 9398 }, { "epoch": 0.13329977204320897, "grad_norm": 3.5625, "learning_rate": 4.792236086885453e-05, "loss": 0.8674, "step": 9400 }, { "epoch": 0.1333281336968352, "grad_norm": 2.734375, "learning_rate": 4.7921469017666756e-05, "loss": 0.8189, "step": 9402 }, { "epoch": 0.1333564953504614, "grad_norm": 3.28125, "learning_rate": 4.7920576983403684e-05, "loss": 0.8895, "step": 9404 }, { "epoch": 0.13338485700408761, "grad_norm": 3.609375, "learning_rate": 4.791968476607244e-05, "loss": 0.8661, "step": 9406 }, { "epoch": 0.13341321865771383, "grad_norm": 2.734375, "learning_rate": 4.791879236568013e-05, "loss": 0.8701, "step": 9408 }, { "epoch": 0.13344158031134004, "grad_norm": 2.875, "learning_rate": 4.791789978223391e-05, "loss": 0.8424, "step": 9410 }, { "epoch": 0.13346994196496625, "grad_norm": 3.078125, "learning_rate": 4.791700701574089e-05, "loss": 0.9, "step": 9412 }, { "epoch": 0.1334983036185925, "grad_norm": 3.171875, "learning_rate": 4.791611406620821e-05, "loss": 0.8082, "step": 9414 }, { "epoch": 0.1335266652722187, "grad_norm": 3.15625, "learning_rate": 4.791522093364299e-05, "loss": 0.8809, "step": 9416 }, { "epoch": 0.13355502692584492, "grad_norm": 3.34375, "learning_rate": 4.791432761805238e-05, "loss": 0.9061, "step": 9418 }, { "epoch": 0.13358338857947114, "grad_norm": 3.625, "learning_rate": 4.79134341194435e-05, "loss": 0.8561, "step": 9420 }, { "epoch": 0.13361175023309735, "grad_norm": 3.265625, "learning_rate": 4.7912540437823506e-05, "loss": 0.8975, "step": 9422 }, { "epoch": 0.13364011188672356, "grad_norm": 3.109375, "learning_rate": 4.791164657319951e-05, "loss": 0.8804, "step": 9424 }, { "epoch": 0.13366847354034977, "grad_norm": 2.875, "learning_rate": 4.7910752525578676e-05, "loss": 0.7955, "step": 9426 }, { "epoch": 0.133696835193976, "grad_norm": 2.796875, "learning_rate": 4.790985829496812e-05, "loss": 0.8097, "step": 9428 }, { "epoch": 0.1337251968476022, "grad_norm": 3.21875, "learning_rate": 4.790896388137501e-05, "loss": 0.7972, "step": 9430 }, { "epoch": 0.13375355850122841, "grad_norm": 3.3125, "learning_rate": 4.790806928480647e-05, "loss": 0.8619, "step": 9432 }, { "epoch": 0.13378192015485463, "grad_norm": 2.921875, "learning_rate": 4.790717450526966e-05, "loss": 0.8208, "step": 9434 }, { "epoch": 0.13381028180848084, "grad_norm": 3.03125, "learning_rate": 4.7906279542771714e-05, "loss": 0.8632, "step": 9436 }, { "epoch": 0.13383864346210705, "grad_norm": 2.71875, "learning_rate": 4.7905384397319784e-05, "loss": 0.8662, "step": 9438 }, { "epoch": 0.13386700511573327, "grad_norm": 3.125, "learning_rate": 4.790448906892102e-05, "loss": 0.8648, "step": 9440 }, { "epoch": 0.13389536676935948, "grad_norm": 3.296875, "learning_rate": 4.790359355758258e-05, "loss": 0.8868, "step": 9442 }, { "epoch": 0.1339237284229857, "grad_norm": 3.265625, "learning_rate": 4.790269786331161e-05, "loss": 0.864, "step": 9444 }, { "epoch": 0.1339520900766119, "grad_norm": 3.0625, "learning_rate": 4.7901801986115254e-05, "loss": 0.858, "step": 9446 }, { "epoch": 0.13398045173023812, "grad_norm": 2.921875, "learning_rate": 4.790090592600069e-05, "loss": 0.8854, "step": 9448 }, { "epoch": 0.13400881338386433, "grad_norm": 3.0625, "learning_rate": 4.7900009682975056e-05, "loss": 0.8537, "step": 9450 }, { "epoch": 0.13403717503749057, "grad_norm": 3.59375, "learning_rate": 4.789911325704552e-05, "loss": 0.8308, "step": 9452 }, { "epoch": 0.1340655366911168, "grad_norm": 2.96875, "learning_rate": 4.789821664821924e-05, "loss": 0.8557, "step": 9454 }, { "epoch": 0.134093898344743, "grad_norm": 3.046875, "learning_rate": 4.789731985650338e-05, "loss": 0.8707, "step": 9456 }, { "epoch": 0.13412225999836921, "grad_norm": 3.265625, "learning_rate": 4.789642288190509e-05, "loss": 0.8556, "step": 9458 }, { "epoch": 0.13415062165199543, "grad_norm": 2.71875, "learning_rate": 4.7895525724431546e-05, "loss": 0.8327, "step": 9460 }, { "epoch": 0.13417898330562164, "grad_norm": 3.28125, "learning_rate": 4.789462838408991e-05, "loss": 0.8553, "step": 9462 }, { "epoch": 0.13420734495924785, "grad_norm": 3.140625, "learning_rate": 4.789373086088736e-05, "loss": 0.9202, "step": 9464 }, { "epoch": 0.13423570661287407, "grad_norm": 2.890625, "learning_rate": 4.789283315483104e-05, "loss": 0.8443, "step": 9466 }, { "epoch": 0.13426406826650028, "grad_norm": 2.796875, "learning_rate": 4.7891935265928146e-05, "loss": 0.8217, "step": 9468 }, { "epoch": 0.1342924299201265, "grad_norm": 2.96875, "learning_rate": 4.7891037194185834e-05, "loss": 0.8868, "step": 9470 }, { "epoch": 0.1343207915737527, "grad_norm": 3.234375, "learning_rate": 4.7890138939611285e-05, "loss": 0.8625, "step": 9472 }, { "epoch": 0.13434915322737892, "grad_norm": 3.78125, "learning_rate": 4.788924050221166e-05, "loss": 0.8653, "step": 9474 }, { "epoch": 0.13437751488100513, "grad_norm": 3.21875, "learning_rate": 4.788834188199415e-05, "loss": 0.8778, "step": 9476 }, { "epoch": 0.13440587653463135, "grad_norm": 3.03125, "learning_rate": 4.788744307896593e-05, "loss": 0.8455, "step": 9478 }, { "epoch": 0.13443423818825756, "grad_norm": 3.25, "learning_rate": 4.788654409313417e-05, "loss": 0.8521, "step": 9480 }, { "epoch": 0.13446259984188377, "grad_norm": 2.828125, "learning_rate": 4.788564492450606e-05, "loss": 0.8577, "step": 9482 }, { "epoch": 0.13449096149551, "grad_norm": 3.015625, "learning_rate": 4.788474557308877e-05, "loss": 0.8495, "step": 9484 }, { "epoch": 0.1345193231491362, "grad_norm": 3.21875, "learning_rate": 4.78838460388895e-05, "loss": 0.8899, "step": 9486 }, { "epoch": 0.1345476848027624, "grad_norm": 3.203125, "learning_rate": 4.788294632191542e-05, "loss": 0.8866, "step": 9488 }, { "epoch": 0.13457604645638863, "grad_norm": 3.21875, "learning_rate": 4.788204642217373e-05, "loss": 0.9318, "step": 9490 }, { "epoch": 0.13460440811001487, "grad_norm": 3.046875, "learning_rate": 4.78811463396716e-05, "loss": 0.8219, "step": 9492 }, { "epoch": 0.13463276976364108, "grad_norm": 2.921875, "learning_rate": 4.7880246074416224e-05, "loss": 0.8227, "step": 9494 }, { "epoch": 0.1346611314172673, "grad_norm": 3.296875, "learning_rate": 4.7879345626414804e-05, "loss": 0.9071, "step": 9496 }, { "epoch": 0.1346894930708935, "grad_norm": 3.15625, "learning_rate": 4.787844499567452e-05, "loss": 0.8425, "step": 9498 }, { "epoch": 0.13471785472451972, "grad_norm": 3.046875, "learning_rate": 4.787754418220258e-05, "loss": 0.8426, "step": 9500 }, { "epoch": 0.13474621637814593, "grad_norm": 3.234375, "learning_rate": 4.787664318600615e-05, "loss": 0.8813, "step": 9502 }, { "epoch": 0.13477457803177215, "grad_norm": 3.1875, "learning_rate": 4.787574200709246e-05, "loss": 0.8562, "step": 9504 }, { "epoch": 0.13480293968539836, "grad_norm": 2.828125, "learning_rate": 4.787484064546869e-05, "loss": 0.8287, "step": 9506 }, { "epoch": 0.13483130133902457, "grad_norm": 3.5, "learning_rate": 4.787393910114204e-05, "loss": 0.8647, "step": 9508 }, { "epoch": 0.1348596629926508, "grad_norm": 3.4375, "learning_rate": 4.7873037374119716e-05, "loss": 0.8606, "step": 9510 }, { "epoch": 0.134888024646277, "grad_norm": 3.125, "learning_rate": 4.787213546440892e-05, "loss": 0.8586, "step": 9512 }, { "epoch": 0.1349163862999032, "grad_norm": 3.21875, "learning_rate": 4.7871233372016844e-05, "loss": 0.8125, "step": 9514 }, { "epoch": 0.13494474795352943, "grad_norm": 3.03125, "learning_rate": 4.7870331096950706e-05, "loss": 0.8427, "step": 9516 }, { "epoch": 0.13497310960715564, "grad_norm": 3.359375, "learning_rate": 4.7869428639217706e-05, "loss": 0.8612, "step": 9518 }, { "epoch": 0.13500147126078185, "grad_norm": 3.40625, "learning_rate": 4.786852599882505e-05, "loss": 0.891, "step": 9520 }, { "epoch": 0.13502983291440807, "grad_norm": 2.734375, "learning_rate": 4.7867623175779955e-05, "loss": 0.8563, "step": 9522 }, { "epoch": 0.13505819456803428, "grad_norm": 2.90625, "learning_rate": 4.786672017008963e-05, "loss": 0.8838, "step": 9524 }, { "epoch": 0.1350865562216605, "grad_norm": 3.09375, "learning_rate": 4.786581698176129e-05, "loss": 0.8651, "step": 9526 }, { "epoch": 0.1351149178752867, "grad_norm": 2.84375, "learning_rate": 4.786491361080214e-05, "loss": 0.8792, "step": 9528 }, { "epoch": 0.13514327952891295, "grad_norm": 2.90625, "learning_rate": 4.78640100572194e-05, "loss": 0.8631, "step": 9530 }, { "epoch": 0.13517164118253916, "grad_norm": 3.21875, "learning_rate": 4.7863106321020285e-05, "loss": 0.8525, "step": 9532 }, { "epoch": 0.13520000283616537, "grad_norm": 2.828125, "learning_rate": 4.786220240221202e-05, "loss": 0.8351, "step": 9534 }, { "epoch": 0.13522836448979159, "grad_norm": 3.140625, "learning_rate": 4.786129830080181e-05, "loss": 0.8511, "step": 9536 }, { "epoch": 0.1352567261434178, "grad_norm": 2.9375, "learning_rate": 4.78603940167969e-05, "loss": 0.8393, "step": 9538 }, { "epoch": 0.135285087797044, "grad_norm": 2.796875, "learning_rate": 4.785948955020448e-05, "loss": 0.8694, "step": 9540 }, { "epoch": 0.13531344945067023, "grad_norm": 2.90625, "learning_rate": 4.78585849010318e-05, "loss": 0.8945, "step": 9542 }, { "epoch": 0.13534181110429644, "grad_norm": 3.09375, "learning_rate": 4.785768006928608e-05, "loss": 0.9118, "step": 9544 }, { "epoch": 0.13537017275792265, "grad_norm": 3.015625, "learning_rate": 4.785677505497454e-05, "loss": 0.8287, "step": 9546 }, { "epoch": 0.13539853441154887, "grad_norm": 3.0, "learning_rate": 4.7855869858104426e-05, "loss": 0.8384, "step": 9548 }, { "epoch": 0.13542689606517508, "grad_norm": 3.015625, "learning_rate": 4.785496447868295e-05, "loss": 0.8659, "step": 9550 }, { "epoch": 0.1354552577188013, "grad_norm": 3.5625, "learning_rate": 4.7854058916717336e-05, "loss": 0.8575, "step": 9552 }, { "epoch": 0.1354836193724275, "grad_norm": 2.703125, "learning_rate": 4.785315317221484e-05, "loss": 0.8098, "step": 9554 }, { "epoch": 0.13551198102605372, "grad_norm": 3.0, "learning_rate": 4.785224724518268e-05, "loss": 0.8222, "step": 9556 }, { "epoch": 0.13554034267967993, "grad_norm": 3.1875, "learning_rate": 4.78513411356281e-05, "loss": 0.8712, "step": 9558 }, { "epoch": 0.13556870433330614, "grad_norm": 3.015625, "learning_rate": 4.7850434843558335e-05, "loss": 0.8667, "step": 9560 }, { "epoch": 0.13559706598693236, "grad_norm": 3.046875, "learning_rate": 4.784952836898062e-05, "loss": 0.8606, "step": 9562 }, { "epoch": 0.13562542764055857, "grad_norm": 3.234375, "learning_rate": 4.78486217119022e-05, "loss": 0.8305, "step": 9564 }, { "epoch": 0.13565378929418478, "grad_norm": 3.109375, "learning_rate": 4.7847714872330316e-05, "loss": 0.8512, "step": 9566 }, { "epoch": 0.13568215094781103, "grad_norm": 3.171875, "learning_rate": 4.784680785027221e-05, "loss": 0.8591, "step": 9568 }, { "epoch": 0.13571051260143724, "grad_norm": 3.0, "learning_rate": 4.784590064573512e-05, "loss": 0.7925, "step": 9570 }, { "epoch": 0.13573887425506345, "grad_norm": 3.09375, "learning_rate": 4.78449932587263e-05, "loss": 0.9063, "step": 9572 }, { "epoch": 0.13576723590868967, "grad_norm": 3.515625, "learning_rate": 4.7844085689253e-05, "loss": 0.8593, "step": 9574 }, { "epoch": 0.13579559756231588, "grad_norm": 3.09375, "learning_rate": 4.784317793732246e-05, "loss": 0.8915, "step": 9576 }, { "epoch": 0.1358239592159421, "grad_norm": 2.96875, "learning_rate": 4.7842270002941934e-05, "loss": 0.8596, "step": 9578 }, { "epoch": 0.1358523208695683, "grad_norm": 3.421875, "learning_rate": 4.784136188611867e-05, "loss": 0.8806, "step": 9580 }, { "epoch": 0.13588068252319452, "grad_norm": 3.0, "learning_rate": 4.784045358685993e-05, "loss": 0.8176, "step": 9582 }, { "epoch": 0.13590904417682073, "grad_norm": 3.015625, "learning_rate": 4.783954510517296e-05, "loss": 0.8076, "step": 9584 }, { "epoch": 0.13593740583044694, "grad_norm": 3.390625, "learning_rate": 4.783863644106502e-05, "loss": 0.8612, "step": 9586 }, { "epoch": 0.13596576748407316, "grad_norm": 3.109375, "learning_rate": 4.7837727594543374e-05, "loss": 0.9014, "step": 9588 }, { "epoch": 0.13599412913769937, "grad_norm": 3.375, "learning_rate": 4.783681856561527e-05, "loss": 0.8419, "step": 9590 }, { "epoch": 0.13602249079132558, "grad_norm": 3.09375, "learning_rate": 4.7835909354287975e-05, "loss": 0.8436, "step": 9592 }, { "epoch": 0.1360508524449518, "grad_norm": 2.9375, "learning_rate": 4.783499996056875e-05, "loss": 0.8537, "step": 9594 }, { "epoch": 0.136079214098578, "grad_norm": 3.34375, "learning_rate": 4.783409038446485e-05, "loss": 0.8933, "step": 9596 }, { "epoch": 0.13610757575220422, "grad_norm": 3.109375, "learning_rate": 4.783318062598355e-05, "loss": 0.9062, "step": 9598 }, { "epoch": 0.13613593740583044, "grad_norm": 3.4375, "learning_rate": 4.783227068513212e-05, "loss": 0.8731, "step": 9600 }, { "epoch": 0.13616429905945665, "grad_norm": 2.953125, "learning_rate": 4.783136056191781e-05, "loss": 0.8348, "step": 9602 }, { "epoch": 0.13619266071308286, "grad_norm": 3.5, "learning_rate": 4.7830450256347907e-05, "loss": 0.8867, "step": 9604 }, { "epoch": 0.1362210223667091, "grad_norm": 3.0625, "learning_rate": 4.782953976842968e-05, "loss": 0.8585, "step": 9606 }, { "epoch": 0.13624938402033532, "grad_norm": 3.296875, "learning_rate": 4.7828629098170384e-05, "loss": 0.8652, "step": 9608 }, { "epoch": 0.13627774567396153, "grad_norm": 3.359375, "learning_rate": 4.7827718245577313e-05, "loss": 0.8762, "step": 9610 }, { "epoch": 0.13630610732758774, "grad_norm": 3.046875, "learning_rate": 4.782680721065773e-05, "loss": 0.8931, "step": 9612 }, { "epoch": 0.13633446898121396, "grad_norm": 2.96875, "learning_rate": 4.7825895993418915e-05, "loss": 0.8768, "step": 9614 }, { "epoch": 0.13636283063484017, "grad_norm": 3.125, "learning_rate": 4.782498459386815e-05, "loss": 0.8769, "step": 9616 }, { "epoch": 0.13639119228846638, "grad_norm": 3.125, "learning_rate": 4.7824073012012704e-05, "loss": 0.8461, "step": 9618 }, { "epoch": 0.1364195539420926, "grad_norm": 3.046875, "learning_rate": 4.782316124785987e-05, "loss": 0.84, "step": 9620 }, { "epoch": 0.1364479155957188, "grad_norm": 2.796875, "learning_rate": 4.782224930141693e-05, "loss": 0.8248, "step": 9622 }, { "epoch": 0.13647627724934502, "grad_norm": 2.75, "learning_rate": 4.782133717269115e-05, "loss": 0.828, "step": 9624 }, { "epoch": 0.13650463890297124, "grad_norm": 3.296875, "learning_rate": 4.7820424861689835e-05, "loss": 0.8086, "step": 9626 }, { "epoch": 0.13653300055659745, "grad_norm": 2.9375, "learning_rate": 4.7819512368420265e-05, "loss": 0.8648, "step": 9628 }, { "epoch": 0.13656136221022366, "grad_norm": 2.953125, "learning_rate": 4.781859969288973e-05, "loss": 0.8266, "step": 9630 }, { "epoch": 0.13658972386384988, "grad_norm": 2.8125, "learning_rate": 4.7817686835105513e-05, "loss": 0.8522, "step": 9632 }, { "epoch": 0.1366180855174761, "grad_norm": 3.421875, "learning_rate": 4.781677379507491e-05, "loss": 0.8819, "step": 9634 }, { "epoch": 0.1366464471711023, "grad_norm": 2.796875, "learning_rate": 4.781586057280521e-05, "loss": 0.9147, "step": 9636 }, { "epoch": 0.13667480882472852, "grad_norm": 2.828125, "learning_rate": 4.7814947168303716e-05, "loss": 0.8184, "step": 9638 }, { "epoch": 0.13670317047835473, "grad_norm": 3.3125, "learning_rate": 4.781403358157771e-05, "loss": 0.9015, "step": 9640 }, { "epoch": 0.13673153213198094, "grad_norm": 3.078125, "learning_rate": 4.78131198126345e-05, "loss": 0.8835, "step": 9642 }, { "epoch": 0.13675989378560716, "grad_norm": 3.03125, "learning_rate": 4.781220586148138e-05, "loss": 0.8635, "step": 9644 }, { "epoch": 0.1367882554392334, "grad_norm": 3.25, "learning_rate": 4.781129172812565e-05, "loss": 0.9092, "step": 9646 }, { "epoch": 0.1368166170928596, "grad_norm": 2.84375, "learning_rate": 4.781037741257461e-05, "loss": 0.8278, "step": 9648 }, { "epoch": 0.13684497874648582, "grad_norm": 2.765625, "learning_rate": 4.780946291483557e-05, "loss": 0.8255, "step": 9650 }, { "epoch": 0.13687334040011204, "grad_norm": 3.078125, "learning_rate": 4.7808548234915826e-05, "loss": 0.807, "step": 9652 }, { "epoch": 0.13690170205373825, "grad_norm": 3.015625, "learning_rate": 4.780763337282268e-05, "loss": 0.8346, "step": 9654 }, { "epoch": 0.13693006370736446, "grad_norm": 3.1875, "learning_rate": 4.780671832856345e-05, "loss": 0.9045, "step": 9656 }, { "epoch": 0.13695842536099068, "grad_norm": 3.15625, "learning_rate": 4.780580310214544e-05, "loss": 0.8907, "step": 9658 }, { "epoch": 0.1369867870146169, "grad_norm": 3.078125, "learning_rate": 4.780488769357596e-05, "loss": 0.8827, "step": 9660 }, { "epoch": 0.1370151486682431, "grad_norm": 3.03125, "learning_rate": 4.7803972102862314e-05, "loss": 0.8603, "step": 9662 }, { "epoch": 0.13704351032186932, "grad_norm": 2.8125, "learning_rate": 4.780305633001183e-05, "loss": 0.8513, "step": 9664 }, { "epoch": 0.13707187197549553, "grad_norm": 3.0625, "learning_rate": 4.780214037503181e-05, "loss": 0.8648, "step": 9666 }, { "epoch": 0.13710023362912174, "grad_norm": 2.84375, "learning_rate": 4.7801224237929574e-05, "loss": 0.8421, "step": 9668 }, { "epoch": 0.13712859528274796, "grad_norm": 2.71875, "learning_rate": 4.780030791871244e-05, "loss": 0.8696, "step": 9670 }, { "epoch": 0.13715695693637417, "grad_norm": 3.078125, "learning_rate": 4.7799391417387727e-05, "loss": 0.8732, "step": 9672 }, { "epoch": 0.13718531859000038, "grad_norm": 3.015625, "learning_rate": 4.779847473396275e-05, "loss": 0.8645, "step": 9674 }, { "epoch": 0.1372136802436266, "grad_norm": 3.125, "learning_rate": 4.779755786844484e-05, "loss": 0.892, "step": 9676 }, { "epoch": 0.1372420418972528, "grad_norm": 3.125, "learning_rate": 4.779664082084131e-05, "loss": 0.8746, "step": 9678 }, { "epoch": 0.13727040355087902, "grad_norm": 3.3125, "learning_rate": 4.779572359115949e-05, "loss": 0.8194, "step": 9680 }, { "epoch": 0.13729876520450524, "grad_norm": 2.953125, "learning_rate": 4.77948061794067e-05, "loss": 0.8346, "step": 9682 }, { "epoch": 0.13732712685813148, "grad_norm": 3.296875, "learning_rate": 4.779388858559028e-05, "loss": 0.8399, "step": 9684 }, { "epoch": 0.1373554885117577, "grad_norm": 3.46875, "learning_rate": 4.779297080971756e-05, "loss": 0.8884, "step": 9686 }, { "epoch": 0.1373838501653839, "grad_norm": 2.9375, "learning_rate": 4.7792052851795856e-05, "loss": 0.8905, "step": 9688 }, { "epoch": 0.13741221181901012, "grad_norm": 2.9375, "learning_rate": 4.77911347118325e-05, "loss": 0.8121, "step": 9690 }, { "epoch": 0.13744057347263633, "grad_norm": 2.9375, "learning_rate": 4.779021638983483e-05, "loss": 0.8386, "step": 9692 }, { "epoch": 0.13746893512626254, "grad_norm": 3.484375, "learning_rate": 4.778929788581018e-05, "loss": 0.8886, "step": 9694 }, { "epoch": 0.13749729677988876, "grad_norm": 3.484375, "learning_rate": 4.77883791997659e-05, "loss": 0.8446, "step": 9696 }, { "epoch": 0.13752565843351497, "grad_norm": 2.9375, "learning_rate": 4.77874603317093e-05, "loss": 0.8943, "step": 9698 }, { "epoch": 0.13755402008714118, "grad_norm": 3.0, "learning_rate": 4.7786541281647745e-05, "loss": 0.8148, "step": 9700 }, { "epoch": 0.1375823817407674, "grad_norm": 3.375, "learning_rate": 4.778562204958856e-05, "loss": 0.9129, "step": 9702 }, { "epoch": 0.1376107433943936, "grad_norm": 3.234375, "learning_rate": 4.778470263553909e-05, "loss": 0.8649, "step": 9704 }, { "epoch": 0.13763910504801982, "grad_norm": 2.953125, "learning_rate": 4.778378303950669e-05, "loss": 0.8664, "step": 9706 }, { "epoch": 0.13766746670164604, "grad_norm": 3.84375, "learning_rate": 4.7782863261498686e-05, "loss": 0.9259, "step": 9708 }, { "epoch": 0.13769582835527225, "grad_norm": 3.015625, "learning_rate": 4.778194330152243e-05, "loss": 0.8506, "step": 9710 }, { "epoch": 0.13772419000889846, "grad_norm": 3.09375, "learning_rate": 4.7781023159585295e-05, "loss": 0.9133, "step": 9712 }, { "epoch": 0.13775255166252467, "grad_norm": 3.28125, "learning_rate": 4.7780102835694583e-05, "loss": 0.8754, "step": 9714 }, { "epoch": 0.1377809133161509, "grad_norm": 3.0625, "learning_rate": 4.7779182329857685e-05, "loss": 0.8788, "step": 9716 }, { "epoch": 0.1378092749697771, "grad_norm": 3.5, "learning_rate": 4.777826164208193e-05, "loss": 0.9233, "step": 9718 }, { "epoch": 0.13783763662340331, "grad_norm": 3.0, "learning_rate": 4.777734077237469e-05, "loss": 0.8675, "step": 9720 }, { "epoch": 0.13786599827702956, "grad_norm": 3.03125, "learning_rate": 4.777641972074331e-05, "loss": 0.8138, "step": 9722 }, { "epoch": 0.13789435993065577, "grad_norm": 3.5, "learning_rate": 4.7775498487195135e-05, "loss": 0.9171, "step": 9724 }, { "epoch": 0.13792272158428198, "grad_norm": 3.25, "learning_rate": 4.7774577071737536e-05, "loss": 0.8829, "step": 9726 }, { "epoch": 0.1379510832379082, "grad_norm": 2.78125, "learning_rate": 4.777365547437788e-05, "loss": 0.823, "step": 9728 }, { "epoch": 0.1379794448915344, "grad_norm": 3.03125, "learning_rate": 4.777273369512351e-05, "loss": 0.7923, "step": 9730 }, { "epoch": 0.13800780654516062, "grad_norm": 4.0625, "learning_rate": 4.7771811733981797e-05, "loss": 0.8381, "step": 9732 }, { "epoch": 0.13803616819878683, "grad_norm": 2.921875, "learning_rate": 4.777088959096011e-05, "loss": 0.8322, "step": 9734 }, { "epoch": 0.13806452985241305, "grad_norm": 3.1875, "learning_rate": 4.776996726606581e-05, "loss": 0.8754, "step": 9736 }, { "epoch": 0.13809289150603926, "grad_norm": 3.0, "learning_rate": 4.7769044759306256e-05, "loss": 0.8735, "step": 9738 }, { "epoch": 0.13812125315966547, "grad_norm": 3.015625, "learning_rate": 4.776812207068883e-05, "loss": 0.8242, "step": 9740 }, { "epoch": 0.1381496148132917, "grad_norm": 3.0625, "learning_rate": 4.776719920022089e-05, "loss": 0.8891, "step": 9742 }, { "epoch": 0.1381779764669179, "grad_norm": 2.875, "learning_rate": 4.776627614790982e-05, "loss": 0.8782, "step": 9744 }, { "epoch": 0.13820633812054411, "grad_norm": 3.34375, "learning_rate": 4.776535291376297e-05, "loss": 0.863, "step": 9746 }, { "epoch": 0.13823469977417033, "grad_norm": 3.0625, "learning_rate": 4.776442949778773e-05, "loss": 0.8439, "step": 9748 }, { "epoch": 0.13826306142779654, "grad_norm": 2.921875, "learning_rate": 4.776350589999148e-05, "loss": 0.8195, "step": 9750 }, { "epoch": 0.13829142308142275, "grad_norm": 2.8125, "learning_rate": 4.776258212038159e-05, "loss": 0.8392, "step": 9752 }, { "epoch": 0.13831978473504897, "grad_norm": 3.140625, "learning_rate": 4.776165815896544e-05, "loss": 0.8137, "step": 9754 }, { "epoch": 0.13834814638867518, "grad_norm": 2.953125, "learning_rate": 4.77607340157504e-05, "loss": 0.8081, "step": 9756 }, { "epoch": 0.1383765080423014, "grad_norm": 2.90625, "learning_rate": 4.775980969074386e-05, "loss": 0.8619, "step": 9758 }, { "epoch": 0.13840486969592763, "grad_norm": 3.234375, "learning_rate": 4.775888518395321e-05, "loss": 0.8601, "step": 9760 }, { "epoch": 0.13843323134955385, "grad_norm": 3.328125, "learning_rate": 4.775796049538582e-05, "loss": 0.9255, "step": 9762 }, { "epoch": 0.13846159300318006, "grad_norm": 3.3125, "learning_rate": 4.7757035625049084e-05, "loss": 0.8264, "step": 9764 }, { "epoch": 0.13848995465680627, "grad_norm": 2.90625, "learning_rate": 4.775611057295039e-05, "loss": 0.8394, "step": 9766 }, { "epoch": 0.1385183163104325, "grad_norm": 3.15625, "learning_rate": 4.775518533909711e-05, "loss": 0.8104, "step": 9768 }, { "epoch": 0.1385466779640587, "grad_norm": 3.328125, "learning_rate": 4.775425992349666e-05, "loss": 0.8605, "step": 9770 }, { "epoch": 0.13857503961768491, "grad_norm": 2.90625, "learning_rate": 4.775333432615641e-05, "loss": 0.8251, "step": 9772 }, { "epoch": 0.13860340127131113, "grad_norm": 3.21875, "learning_rate": 4.775240854708376e-05, "loss": 0.8508, "step": 9774 }, { "epoch": 0.13863176292493734, "grad_norm": 3.109375, "learning_rate": 4.775148258628611e-05, "loss": 0.7978, "step": 9776 }, { "epoch": 0.13866012457856355, "grad_norm": 3.4375, "learning_rate": 4.775055644377086e-05, "loss": 0.8389, "step": 9778 }, { "epoch": 0.13868848623218977, "grad_norm": 3.21875, "learning_rate": 4.774963011954538e-05, "loss": 0.8475, "step": 9780 }, { "epoch": 0.13871684788581598, "grad_norm": 3.21875, "learning_rate": 4.7748703613617095e-05, "loss": 0.8287, "step": 9782 }, { "epoch": 0.1387452095394422, "grad_norm": 3.34375, "learning_rate": 4.7747776925993404e-05, "loss": 0.8915, "step": 9784 }, { "epoch": 0.1387735711930684, "grad_norm": 3.546875, "learning_rate": 4.774685005668169e-05, "loss": 0.8942, "step": 9786 }, { "epoch": 0.13880193284669462, "grad_norm": 3.34375, "learning_rate": 4.7745923005689374e-05, "loss": 0.8831, "step": 9788 }, { "epoch": 0.13883029450032083, "grad_norm": 2.9375, "learning_rate": 4.774499577302385e-05, "loss": 0.9236, "step": 9790 }, { "epoch": 0.13885865615394705, "grad_norm": 2.78125, "learning_rate": 4.774406835869253e-05, "loss": 0.8147, "step": 9792 }, { "epoch": 0.13888701780757326, "grad_norm": 2.703125, "learning_rate": 4.774314076270282e-05, "loss": 0.8255, "step": 9794 }, { "epoch": 0.13891537946119947, "grad_norm": 3.5, "learning_rate": 4.774221298506213e-05, "loss": 0.8877, "step": 9796 }, { "epoch": 0.1389437411148257, "grad_norm": 3.1875, "learning_rate": 4.774128502577786e-05, "loss": 0.8505, "step": 9798 }, { "epoch": 0.13897210276845193, "grad_norm": 2.96875, "learning_rate": 4.774035688485744e-05, "loss": 0.8191, "step": 9800 }, { "epoch": 0.13900046442207814, "grad_norm": 3.203125, "learning_rate": 4.7739428562308266e-05, "loss": 0.8258, "step": 9802 }, { "epoch": 0.13902882607570435, "grad_norm": 3.0625, "learning_rate": 4.773850005813776e-05, "loss": 0.8342, "step": 9804 }, { "epoch": 0.13905718772933057, "grad_norm": 3.15625, "learning_rate": 4.7737571372353335e-05, "loss": 0.8472, "step": 9806 }, { "epoch": 0.13908554938295678, "grad_norm": 3.046875, "learning_rate": 4.773664250496241e-05, "loss": 0.8194, "step": 9808 }, { "epoch": 0.139113911036583, "grad_norm": 3.625, "learning_rate": 4.773571345597241e-05, "loss": 0.9342, "step": 9810 }, { "epoch": 0.1391422726902092, "grad_norm": 3.109375, "learning_rate": 4.773478422539075e-05, "loss": 0.8469, "step": 9812 }, { "epoch": 0.13917063434383542, "grad_norm": 3.0, "learning_rate": 4.7733854813224855e-05, "loss": 0.8441, "step": 9814 }, { "epoch": 0.13919899599746163, "grad_norm": 3.3125, "learning_rate": 4.773292521948214e-05, "loss": 0.8465, "step": 9816 }, { "epoch": 0.13922735765108785, "grad_norm": 3.234375, "learning_rate": 4.773199544417003e-05, "loss": 0.8675, "step": 9818 }, { "epoch": 0.13925571930471406, "grad_norm": 3.015625, "learning_rate": 4.7731065487295966e-05, "loss": 0.8517, "step": 9820 }, { "epoch": 0.13928408095834027, "grad_norm": 3.203125, "learning_rate": 4.773013534886735e-05, "loss": 0.8653, "step": 9822 }, { "epoch": 0.13931244261196649, "grad_norm": 3.15625, "learning_rate": 4.772920502889164e-05, "loss": 0.8876, "step": 9824 }, { "epoch": 0.1393408042655927, "grad_norm": 2.8125, "learning_rate": 4.772827452737626e-05, "loss": 0.8165, "step": 9826 }, { "epoch": 0.1393691659192189, "grad_norm": 3.0625, "learning_rate": 4.7727343844328626e-05, "loss": 0.8395, "step": 9828 }, { "epoch": 0.13939752757284513, "grad_norm": 2.828125, "learning_rate": 4.7726412979756183e-05, "loss": 0.9051, "step": 9830 }, { "epoch": 0.13942588922647134, "grad_norm": 3.140625, "learning_rate": 4.772548193366636e-05, "loss": 0.8455, "step": 9832 }, { "epoch": 0.13945425088009755, "grad_norm": 2.921875, "learning_rate": 4.7724550706066604e-05, "loss": 0.8069, "step": 9834 }, { "epoch": 0.13948261253372377, "grad_norm": 3.921875, "learning_rate": 4.772361929696434e-05, "loss": 0.8995, "step": 9836 }, { "epoch": 0.13951097418735, "grad_norm": 3.109375, "learning_rate": 4.772268770636701e-05, "loss": 0.8693, "step": 9838 }, { "epoch": 0.13953933584097622, "grad_norm": 3.3125, "learning_rate": 4.772175593428206e-05, "loss": 0.8537, "step": 9840 }, { "epoch": 0.13956769749460243, "grad_norm": 3.453125, "learning_rate": 4.7720823980716934e-05, "loss": 0.8723, "step": 9842 }, { "epoch": 0.13959605914822865, "grad_norm": 3.28125, "learning_rate": 4.771989184567907e-05, "loss": 0.8922, "step": 9844 }, { "epoch": 0.13962442080185486, "grad_norm": 2.828125, "learning_rate": 4.7718959529175915e-05, "loss": 0.8948, "step": 9846 }, { "epoch": 0.13965278245548107, "grad_norm": 3.71875, "learning_rate": 4.771802703121492e-05, "loss": 0.8548, "step": 9848 }, { "epoch": 0.13968114410910729, "grad_norm": 3.421875, "learning_rate": 4.7717094351803515e-05, "loss": 0.871, "step": 9850 }, { "epoch": 0.1397095057627335, "grad_norm": 2.71875, "learning_rate": 4.771616149094917e-05, "loss": 0.8462, "step": 9852 }, { "epoch": 0.1397378674163597, "grad_norm": 2.84375, "learning_rate": 4.771522844865932e-05, "loss": 0.8349, "step": 9854 }, { "epoch": 0.13976622906998593, "grad_norm": 3.453125, "learning_rate": 4.771429522494144e-05, "loss": 0.8386, "step": 9856 }, { "epoch": 0.13979459072361214, "grad_norm": 3.046875, "learning_rate": 4.771336181980296e-05, "loss": 0.8664, "step": 9858 }, { "epoch": 0.13982295237723835, "grad_norm": 3.453125, "learning_rate": 4.7712428233251336e-05, "loss": 0.9148, "step": 9860 }, { "epoch": 0.13985131403086457, "grad_norm": 3.125, "learning_rate": 4.771149446529405e-05, "loss": 0.8802, "step": 9862 }, { "epoch": 0.13987967568449078, "grad_norm": 2.984375, "learning_rate": 4.771056051593853e-05, "loss": 0.8807, "step": 9864 }, { "epoch": 0.139908037338117, "grad_norm": 3.0625, "learning_rate": 4.770962638519225e-05, "loss": 0.8685, "step": 9866 }, { "epoch": 0.1399363989917432, "grad_norm": 3.3125, "learning_rate": 4.7708692073062674e-05, "loss": 0.824, "step": 9868 }, { "epoch": 0.13996476064536942, "grad_norm": 3.25, "learning_rate": 4.770775757955726e-05, "loss": 0.8914, "step": 9870 }, { "epoch": 0.13999312229899563, "grad_norm": 3.09375, "learning_rate": 4.770682290468347e-05, "loss": 0.8658, "step": 9872 }, { "epoch": 0.14002148395262184, "grad_norm": 3.203125, "learning_rate": 4.7705888048448774e-05, "loss": 0.8861, "step": 9874 }, { "epoch": 0.14004984560624809, "grad_norm": 2.703125, "learning_rate": 4.770495301086063e-05, "loss": 0.8257, "step": 9876 }, { "epoch": 0.1400782072598743, "grad_norm": 3.265625, "learning_rate": 4.770401779192651e-05, "loss": 0.7909, "step": 9878 }, { "epoch": 0.1401065689135005, "grad_norm": 3.078125, "learning_rate": 4.770308239165389e-05, "loss": 0.8363, "step": 9880 }, { "epoch": 0.14013493056712673, "grad_norm": 3.171875, "learning_rate": 4.770214681005024e-05, "loss": 0.818, "step": 9882 }, { "epoch": 0.14016329222075294, "grad_norm": 3.1875, "learning_rate": 4.7701211047123025e-05, "loss": 0.8739, "step": 9884 }, { "epoch": 0.14019165387437915, "grad_norm": 3.015625, "learning_rate": 4.770027510287972e-05, "loss": 0.8339, "step": 9886 }, { "epoch": 0.14022001552800537, "grad_norm": 2.921875, "learning_rate": 4.769933897732781e-05, "loss": 0.8304, "step": 9888 }, { "epoch": 0.14024837718163158, "grad_norm": 2.890625, "learning_rate": 4.769840267047476e-05, "loss": 0.8929, "step": 9890 }, { "epoch": 0.1402767388352578, "grad_norm": 3.203125, "learning_rate": 4.769746618232805e-05, "loss": 0.8473, "step": 9892 }, { "epoch": 0.140305100488884, "grad_norm": 3.8125, "learning_rate": 4.769652951289518e-05, "loss": 0.8598, "step": 9894 }, { "epoch": 0.14033346214251022, "grad_norm": 3.65625, "learning_rate": 4.76955926621836e-05, "loss": 0.8372, "step": 9896 }, { "epoch": 0.14036182379613643, "grad_norm": 3.359375, "learning_rate": 4.769465563020081e-05, "loss": 0.8666, "step": 9898 }, { "epoch": 0.14039018544976264, "grad_norm": 3.0, "learning_rate": 4.7693718416954296e-05, "loss": 0.8561, "step": 9900 }, { "epoch": 0.14041854710338886, "grad_norm": 3.109375, "learning_rate": 4.7692781022451536e-05, "loss": 0.8811, "step": 9902 }, { "epoch": 0.14044690875701507, "grad_norm": 3.03125, "learning_rate": 4.7691843446700024e-05, "loss": 0.8468, "step": 9904 }, { "epoch": 0.14047527041064128, "grad_norm": 2.859375, "learning_rate": 4.7690905689707245e-05, "loss": 0.8298, "step": 9906 }, { "epoch": 0.1405036320642675, "grad_norm": 2.859375, "learning_rate": 4.7689967751480694e-05, "loss": 0.8544, "step": 9908 }, { "epoch": 0.1405319937178937, "grad_norm": 2.859375, "learning_rate": 4.768902963202784e-05, "loss": 0.8216, "step": 9910 }, { "epoch": 0.14056035537151992, "grad_norm": 2.84375, "learning_rate": 4.76880913313562e-05, "loss": 0.8521, "step": 9912 }, { "epoch": 0.14058871702514616, "grad_norm": 3.125, "learning_rate": 4.768715284947327e-05, "loss": 0.8552, "step": 9914 }, { "epoch": 0.14061707867877238, "grad_norm": 3.25, "learning_rate": 4.768621418638653e-05, "loss": 0.8592, "step": 9916 }, { "epoch": 0.1406454403323986, "grad_norm": 2.78125, "learning_rate": 4.768527534210349e-05, "loss": 0.8285, "step": 9918 }, { "epoch": 0.1406738019860248, "grad_norm": 3.796875, "learning_rate": 4.768433631663163e-05, "loss": 0.8686, "step": 9920 }, { "epoch": 0.14070216363965102, "grad_norm": 2.875, "learning_rate": 4.768339710997847e-05, "loss": 0.859, "step": 9922 }, { "epoch": 0.14073052529327723, "grad_norm": 3.09375, "learning_rate": 4.768245772215151e-05, "loss": 0.823, "step": 9924 }, { "epoch": 0.14075888694690344, "grad_norm": 3.21875, "learning_rate": 4.768151815315824e-05, "loss": 0.8552, "step": 9926 }, { "epoch": 0.14078724860052966, "grad_norm": 3.234375, "learning_rate": 4.768057840300617e-05, "loss": 0.8626, "step": 9928 }, { "epoch": 0.14081561025415587, "grad_norm": 3.046875, "learning_rate": 4.7679638471702817e-05, "loss": 0.8933, "step": 9930 }, { "epoch": 0.14084397190778208, "grad_norm": 2.828125, "learning_rate": 4.767869835925567e-05, "loss": 0.8456, "step": 9932 }, { "epoch": 0.1408723335614083, "grad_norm": 3.0625, "learning_rate": 4.767775806567225e-05, "loss": 0.8259, "step": 9934 }, { "epoch": 0.1409006952150345, "grad_norm": 3.65625, "learning_rate": 4.767681759096006e-05, "loss": 0.8667, "step": 9936 }, { "epoch": 0.14092905686866072, "grad_norm": 3.5625, "learning_rate": 4.767587693512662e-05, "loss": 0.8894, "step": 9938 }, { "epoch": 0.14095741852228694, "grad_norm": 3.390625, "learning_rate": 4.767493609817943e-05, "loss": 0.8851, "step": 9940 }, { "epoch": 0.14098578017591315, "grad_norm": 3.0625, "learning_rate": 4.767399508012603e-05, "loss": 0.8475, "step": 9942 }, { "epoch": 0.14101414182953936, "grad_norm": 2.734375, "learning_rate": 4.7673053880973904e-05, "loss": 0.8871, "step": 9944 }, { "epoch": 0.14104250348316558, "grad_norm": 2.984375, "learning_rate": 4.7672112500730594e-05, "loss": 0.8521, "step": 9946 }, { "epoch": 0.1410708651367918, "grad_norm": 2.859375, "learning_rate": 4.76711709394036e-05, "loss": 0.8152, "step": 9948 }, { "epoch": 0.141099226790418, "grad_norm": 3.015625, "learning_rate": 4.767022919700046e-05, "loss": 0.8853, "step": 9950 }, { "epoch": 0.14112758844404422, "grad_norm": 3.203125, "learning_rate": 4.7669287273528676e-05, "loss": 0.814, "step": 9952 }, { "epoch": 0.14115595009767046, "grad_norm": 3.25, "learning_rate": 4.7668345168995794e-05, "loss": 0.8751, "step": 9954 }, { "epoch": 0.14118431175129667, "grad_norm": 2.921875, "learning_rate": 4.7667402883409315e-05, "loss": 0.8147, "step": 9956 }, { "epoch": 0.14121267340492288, "grad_norm": 3.390625, "learning_rate": 4.7666460416776794e-05, "loss": 0.91, "step": 9958 }, { "epoch": 0.1412410350585491, "grad_norm": 3.3125, "learning_rate": 4.7665517769105726e-05, "loss": 0.8375, "step": 9960 }, { "epoch": 0.1412693967121753, "grad_norm": 3.078125, "learning_rate": 4.7664574940403666e-05, "loss": 0.8531, "step": 9962 }, { "epoch": 0.14129775836580152, "grad_norm": 3.015625, "learning_rate": 4.766363193067813e-05, "loss": 0.8683, "step": 9964 }, { "epoch": 0.14132612001942774, "grad_norm": 3.046875, "learning_rate": 4.766268873993666e-05, "loss": 0.783, "step": 9966 }, { "epoch": 0.14135448167305395, "grad_norm": 3.734375, "learning_rate": 4.766174536818677e-05, "loss": 0.8508, "step": 9968 }, { "epoch": 0.14138284332668016, "grad_norm": 3.078125, "learning_rate": 4.7660801815436015e-05, "loss": 0.9063, "step": 9970 }, { "epoch": 0.14141120498030638, "grad_norm": 3.09375, "learning_rate": 4.7659858081691936e-05, "loss": 0.8375, "step": 9972 }, { "epoch": 0.1414395666339326, "grad_norm": 3.140625, "learning_rate": 4.765891416696204e-05, "loss": 0.8516, "step": 9974 }, { "epoch": 0.1414679282875588, "grad_norm": 3.171875, "learning_rate": 4.76579700712539e-05, "loss": 0.8882, "step": 9976 }, { "epoch": 0.14149628994118502, "grad_norm": 3.234375, "learning_rate": 4.765702579457504e-05, "loss": 0.8333, "step": 9978 }, { "epoch": 0.14152465159481123, "grad_norm": 3.203125, "learning_rate": 4.765608133693299e-05, "loss": 0.8676, "step": 9980 }, { "epoch": 0.14155301324843744, "grad_norm": 2.953125, "learning_rate": 4.7655136698335326e-05, "loss": 0.8437, "step": 9982 }, { "epoch": 0.14158137490206366, "grad_norm": 3.015625, "learning_rate": 4.765419187878956e-05, "loss": 0.8221, "step": 9984 }, { "epoch": 0.14160973655568987, "grad_norm": 2.78125, "learning_rate": 4.765324687830326e-05, "loss": 0.8472, "step": 9986 }, { "epoch": 0.14163809820931608, "grad_norm": 3.3125, "learning_rate": 4.7652301696883964e-05, "loss": 0.8415, "step": 9988 }, { "epoch": 0.1416664598629423, "grad_norm": 2.921875, "learning_rate": 4.765135633453922e-05, "loss": 0.9079, "step": 9990 }, { "epoch": 0.14169482151656854, "grad_norm": 3.265625, "learning_rate": 4.7650410791276584e-05, "loss": 0.902, "step": 9992 }, { "epoch": 0.14172318317019475, "grad_norm": 2.9375, "learning_rate": 4.7649465067103606e-05, "loss": 0.836, "step": 9994 }, { "epoch": 0.14175154482382096, "grad_norm": 2.765625, "learning_rate": 4.7648519162027836e-05, "loss": 0.7711, "step": 9996 }, { "epoch": 0.14177990647744718, "grad_norm": 3.015625, "learning_rate": 4.764757307605684e-05, "loss": 0.8369, "step": 9998 }, { "epoch": 0.1418082681310734, "grad_norm": 2.9375, "learning_rate": 4.764662680919817e-05, "loss": 0.8296, "step": 10000 }, { "epoch": 0.1418366297846996, "grad_norm": 3.421875, "learning_rate": 4.764568036145938e-05, "loss": 0.8628, "step": 10002 }, { "epoch": 0.14186499143832582, "grad_norm": 3.28125, "learning_rate": 4.7644733732848025e-05, "loss": 0.889, "step": 10004 }, { "epoch": 0.14189335309195203, "grad_norm": 3.375, "learning_rate": 4.764378692337167e-05, "loss": 0.8759, "step": 10006 }, { "epoch": 0.14192171474557824, "grad_norm": 3.328125, "learning_rate": 4.764283993303788e-05, "loss": 0.8765, "step": 10008 }, { "epoch": 0.14195007639920446, "grad_norm": 2.875, "learning_rate": 4.7641892761854216e-05, "loss": 0.8435, "step": 10010 }, { "epoch": 0.14197843805283067, "grad_norm": 3.40625, "learning_rate": 4.7640945409828255e-05, "loss": 0.8862, "step": 10012 }, { "epoch": 0.14200679970645688, "grad_norm": 3.4375, "learning_rate": 4.763999787696754e-05, "loss": 0.8723, "step": 10014 }, { "epoch": 0.1420351613600831, "grad_norm": 2.921875, "learning_rate": 4.7639050163279654e-05, "loss": 0.8466, "step": 10016 }, { "epoch": 0.1420635230137093, "grad_norm": 3.21875, "learning_rate": 4.7638102268772166e-05, "loss": 0.8607, "step": 10018 }, { "epoch": 0.14209188466733552, "grad_norm": 3.078125, "learning_rate": 4.763715419345265e-05, "loss": 0.851, "step": 10020 }, { "epoch": 0.14212024632096174, "grad_norm": 3.234375, "learning_rate": 4.763620593732867e-05, "loss": 0.8344, "step": 10022 }, { "epoch": 0.14214860797458795, "grad_norm": 2.890625, "learning_rate": 4.7635257500407804e-05, "loss": 0.8483, "step": 10024 }, { "epoch": 0.14217696962821416, "grad_norm": 2.875, "learning_rate": 4.763430888269762e-05, "loss": 0.8426, "step": 10026 }, { "epoch": 0.14220533128184037, "grad_norm": 3.234375, "learning_rate": 4.763336008420571e-05, "loss": 0.8417, "step": 10028 }, { "epoch": 0.14223369293546662, "grad_norm": 3.0, "learning_rate": 4.763241110493964e-05, "loss": 0.8286, "step": 10030 }, { "epoch": 0.14226205458909283, "grad_norm": 3.296875, "learning_rate": 4.7631461944906994e-05, "loss": 0.8649, "step": 10032 }, { "epoch": 0.14229041624271904, "grad_norm": 3.75, "learning_rate": 4.7630512604115354e-05, "loss": 0.8481, "step": 10034 }, { "epoch": 0.14231877789634526, "grad_norm": 3.046875, "learning_rate": 4.762956308257229e-05, "loss": 0.8222, "step": 10036 }, { "epoch": 0.14234713954997147, "grad_norm": 3.453125, "learning_rate": 4.7628613380285405e-05, "loss": 0.8978, "step": 10038 }, { "epoch": 0.14237550120359768, "grad_norm": 3.359375, "learning_rate": 4.7627663497262276e-05, "loss": 0.8074, "step": 10040 }, { "epoch": 0.1424038628572239, "grad_norm": 3.125, "learning_rate": 4.7626713433510485e-05, "loss": 0.8174, "step": 10042 }, { "epoch": 0.1424322245108501, "grad_norm": 3.234375, "learning_rate": 4.762576318903763e-05, "loss": 0.8344, "step": 10044 }, { "epoch": 0.14246058616447632, "grad_norm": 3.140625, "learning_rate": 4.762481276385129e-05, "loss": 0.895, "step": 10046 }, { "epoch": 0.14248894781810253, "grad_norm": 3.015625, "learning_rate": 4.762386215795907e-05, "loss": 0.8598, "step": 10048 }, { "epoch": 0.14251730947172875, "grad_norm": 3.125, "learning_rate": 4.762291137136854e-05, "loss": 0.8493, "step": 10050 }, { "epoch": 0.14254567112535496, "grad_norm": 3.34375, "learning_rate": 4.7621960404087316e-05, "loss": 0.8935, "step": 10052 }, { "epoch": 0.14257403277898117, "grad_norm": 2.890625, "learning_rate": 4.7621009256122985e-05, "loss": 0.8644, "step": 10054 }, { "epoch": 0.1426023944326074, "grad_norm": 3.140625, "learning_rate": 4.7620057927483145e-05, "loss": 0.8487, "step": 10056 }, { "epoch": 0.1426307560862336, "grad_norm": 3.46875, "learning_rate": 4.761910641817539e-05, "loss": 0.9167, "step": 10058 }, { "epoch": 0.14265911773985981, "grad_norm": 3.234375, "learning_rate": 4.761815472820733e-05, "loss": 0.9637, "step": 10060 }, { "epoch": 0.14268747939348603, "grad_norm": 2.96875, "learning_rate": 4.761720285758655e-05, "loss": 0.817, "step": 10062 }, { "epoch": 0.14271584104711224, "grad_norm": 2.96875, "learning_rate": 4.761625080632067e-05, "loss": 0.8475, "step": 10064 }, { "epoch": 0.14274420270073845, "grad_norm": 3.0, "learning_rate": 4.761529857441728e-05, "loss": 0.8297, "step": 10066 }, { "epoch": 0.1427725643543647, "grad_norm": 3.09375, "learning_rate": 4.7614346161884e-05, "loss": 0.8912, "step": 10068 }, { "epoch": 0.1428009260079909, "grad_norm": 2.890625, "learning_rate": 4.761339356872843e-05, "loss": 0.8751, "step": 10070 }, { "epoch": 0.14282928766161712, "grad_norm": 3.171875, "learning_rate": 4.761244079495817e-05, "loss": 0.8644, "step": 10072 }, { "epoch": 0.14285764931524333, "grad_norm": 3.015625, "learning_rate": 4.761148784058084e-05, "loss": 0.8362, "step": 10074 }, { "epoch": 0.14288601096886955, "grad_norm": 3.21875, "learning_rate": 4.761053470560404e-05, "loss": 0.8419, "step": 10076 }, { "epoch": 0.14291437262249576, "grad_norm": 3.0625, "learning_rate": 4.76095813900354e-05, "loss": 0.9393, "step": 10078 }, { "epoch": 0.14294273427612197, "grad_norm": 2.96875, "learning_rate": 4.760862789388253e-05, "loss": 0.8559, "step": 10080 }, { "epoch": 0.1429710959297482, "grad_norm": 3.09375, "learning_rate": 4.7607674217153034e-05, "loss": 0.8438, "step": 10082 }, { "epoch": 0.1429994575833744, "grad_norm": 2.90625, "learning_rate": 4.760672035985453e-05, "loss": 0.9016, "step": 10084 }, { "epoch": 0.14302781923700061, "grad_norm": 3.015625, "learning_rate": 4.7605766321994646e-05, "loss": 0.8682, "step": 10086 }, { "epoch": 0.14305618089062683, "grad_norm": 3.3125, "learning_rate": 4.760481210358101e-05, "loss": 0.8857, "step": 10088 }, { "epoch": 0.14308454254425304, "grad_norm": 3.109375, "learning_rate": 4.7603857704621226e-05, "loss": 0.8203, "step": 10090 }, { "epoch": 0.14311290419787925, "grad_norm": 2.9375, "learning_rate": 4.7602903125122914e-05, "loss": 0.8423, "step": 10092 }, { "epoch": 0.14314126585150547, "grad_norm": 3.09375, "learning_rate": 4.760194836509372e-05, "loss": 0.8828, "step": 10094 }, { "epoch": 0.14316962750513168, "grad_norm": 3.0, "learning_rate": 4.760099342454125e-05, "loss": 0.8315, "step": 10096 }, { "epoch": 0.1431979891587579, "grad_norm": 3.0625, "learning_rate": 4.760003830347314e-05, "loss": 0.8567, "step": 10098 }, { "epoch": 0.1432263508123841, "grad_norm": 3.0625, "learning_rate": 4.759908300189702e-05, "loss": 0.8294, "step": 10100 }, { "epoch": 0.14325471246601032, "grad_norm": 3.296875, "learning_rate": 4.75981275198205e-05, "loss": 0.8795, "step": 10102 }, { "epoch": 0.14328307411963653, "grad_norm": 2.875, "learning_rate": 4.759717185725124e-05, "loss": 0.8557, "step": 10104 }, { "epoch": 0.14331143577326275, "grad_norm": 3.25, "learning_rate": 4.759621601419687e-05, "loss": 0.85, "step": 10106 }, { "epoch": 0.143339797426889, "grad_norm": 3.25, "learning_rate": 4.7595259990665004e-05, "loss": 0.8639, "step": 10108 }, { "epoch": 0.1433681590805152, "grad_norm": 3.53125, "learning_rate": 4.759430378666329e-05, "loss": 0.864, "step": 10110 }, { "epoch": 0.1433965207341414, "grad_norm": 2.75, "learning_rate": 4.759334740219937e-05, "loss": 0.8776, "step": 10112 }, { "epoch": 0.14342488238776763, "grad_norm": 2.734375, "learning_rate": 4.7592390837280875e-05, "loss": 0.8177, "step": 10114 }, { "epoch": 0.14345324404139384, "grad_norm": 3.203125, "learning_rate": 4.759143409191544e-05, "loss": 0.8178, "step": 10116 }, { "epoch": 0.14348160569502005, "grad_norm": 3.0625, "learning_rate": 4.7590477166110725e-05, "loss": 0.8379, "step": 10118 }, { "epoch": 0.14350996734864627, "grad_norm": 3.375, "learning_rate": 4.758952005987436e-05, "loss": 0.8449, "step": 10120 }, { "epoch": 0.14353832900227248, "grad_norm": 3.078125, "learning_rate": 4.758856277321398e-05, "loss": 0.8611, "step": 10122 }, { "epoch": 0.1435666906558987, "grad_norm": 3.28125, "learning_rate": 4.758760530613726e-05, "loss": 0.8688, "step": 10124 }, { "epoch": 0.1435950523095249, "grad_norm": 2.890625, "learning_rate": 4.758664765865182e-05, "loss": 0.8216, "step": 10126 }, { "epoch": 0.14362341396315112, "grad_norm": 2.875, "learning_rate": 4.7585689830765324e-05, "loss": 0.8344, "step": 10128 }, { "epoch": 0.14365177561677733, "grad_norm": 3.328125, "learning_rate": 4.7584731822485414e-05, "loss": 0.8587, "step": 10130 }, { "epoch": 0.14368013727040355, "grad_norm": 3.078125, "learning_rate": 4.758377363381974e-05, "loss": 0.8801, "step": 10132 }, { "epoch": 0.14370849892402976, "grad_norm": 3.75, "learning_rate": 4.758281526477596e-05, "loss": 0.8767, "step": 10134 }, { "epoch": 0.14373686057765597, "grad_norm": 3.125, "learning_rate": 4.758185671536174e-05, "loss": 0.8736, "step": 10136 }, { "epoch": 0.14376522223128219, "grad_norm": 3.03125, "learning_rate": 4.758089798558471e-05, "loss": 0.8429, "step": 10138 }, { "epoch": 0.1437935838849084, "grad_norm": 3.125, "learning_rate": 4.757993907545255e-05, "loss": 0.8449, "step": 10140 }, { "epoch": 0.1438219455385346, "grad_norm": 3.109375, "learning_rate": 4.75789799849729e-05, "loss": 0.8694, "step": 10142 }, { "epoch": 0.14385030719216083, "grad_norm": 2.875, "learning_rate": 4.7578020714153446e-05, "loss": 0.8685, "step": 10144 }, { "epoch": 0.14387866884578707, "grad_norm": 3.078125, "learning_rate": 4.757706126300183e-05, "loss": 0.8633, "step": 10146 }, { "epoch": 0.14390703049941328, "grad_norm": 3.1875, "learning_rate": 4.757610163152572e-05, "loss": 0.8711, "step": 10148 }, { "epoch": 0.1439353921530395, "grad_norm": 3.03125, "learning_rate": 4.7575141819732774e-05, "loss": 0.8226, "step": 10150 }, { "epoch": 0.1439637538066657, "grad_norm": 3.03125, "learning_rate": 4.7574181827630666e-05, "loss": 0.8743, "step": 10152 }, { "epoch": 0.14399211546029192, "grad_norm": 3.4375, "learning_rate": 4.7573221655227064e-05, "loss": 0.8883, "step": 10154 }, { "epoch": 0.14402047711391813, "grad_norm": 3.328125, "learning_rate": 4.7572261302529633e-05, "loss": 0.8785, "step": 10156 }, { "epoch": 0.14404883876754435, "grad_norm": 3.625, "learning_rate": 4.7571300769546044e-05, "loss": 0.8874, "step": 10158 }, { "epoch": 0.14407720042117056, "grad_norm": 3.0, "learning_rate": 4.757034005628398e-05, "loss": 0.8502, "step": 10160 }, { "epoch": 0.14410556207479677, "grad_norm": 3.578125, "learning_rate": 4.7569379162751094e-05, "loss": 0.8543, "step": 10162 }, { "epoch": 0.14413392372842299, "grad_norm": 3.3125, "learning_rate": 4.756841808895508e-05, "loss": 0.8199, "step": 10164 }, { "epoch": 0.1441622853820492, "grad_norm": 3.15625, "learning_rate": 4.75674568349036e-05, "loss": 0.8687, "step": 10166 }, { "epoch": 0.1441906470356754, "grad_norm": 2.984375, "learning_rate": 4.756649540060434e-05, "loss": 0.8429, "step": 10168 }, { "epoch": 0.14421900868930163, "grad_norm": 2.921875, "learning_rate": 4.756553378606497e-05, "loss": 0.8835, "step": 10170 }, { "epoch": 0.14424737034292784, "grad_norm": 3.125, "learning_rate": 4.7564571991293184e-05, "loss": 0.8422, "step": 10172 }, { "epoch": 0.14427573199655405, "grad_norm": 2.953125, "learning_rate": 4.756361001629664e-05, "loss": 0.8378, "step": 10174 }, { "epoch": 0.14430409365018027, "grad_norm": 3.515625, "learning_rate": 4.756264786108306e-05, "loss": 0.834, "step": 10176 }, { "epoch": 0.14433245530380648, "grad_norm": 2.828125, "learning_rate": 4.75616855256601e-05, "loss": 0.8525, "step": 10178 }, { "epoch": 0.1443608169574327, "grad_norm": 3.078125, "learning_rate": 4.7560723010035446e-05, "loss": 0.8611, "step": 10180 }, { "epoch": 0.1443891786110589, "grad_norm": 2.953125, "learning_rate": 4.7559760314216794e-05, "loss": 0.8685, "step": 10182 }, { "epoch": 0.14441754026468515, "grad_norm": 3.0, "learning_rate": 4.755879743821183e-05, "loss": 0.8823, "step": 10184 }, { "epoch": 0.14444590191831136, "grad_norm": 3.21875, "learning_rate": 4.755783438202825e-05, "loss": 0.8883, "step": 10186 }, { "epoch": 0.14447426357193757, "grad_norm": 3.296875, "learning_rate": 4.755687114567375e-05, "loss": 0.8464, "step": 10188 }, { "epoch": 0.14450262522556379, "grad_norm": 2.953125, "learning_rate": 4.755590772915601e-05, "loss": 0.8345, "step": 10190 }, { "epoch": 0.14453098687919, "grad_norm": 2.859375, "learning_rate": 4.7554944132482724e-05, "loss": 0.8596, "step": 10192 }, { "epoch": 0.1445593485328162, "grad_norm": 2.828125, "learning_rate": 4.75539803556616e-05, "loss": 0.8417, "step": 10194 }, { "epoch": 0.14458771018644243, "grad_norm": 3.609375, "learning_rate": 4.7553016398700333e-05, "loss": 0.8772, "step": 10196 }, { "epoch": 0.14461607184006864, "grad_norm": 2.671875, "learning_rate": 4.755205226160662e-05, "loss": 0.8432, "step": 10198 }, { "epoch": 0.14464443349369485, "grad_norm": 2.9375, "learning_rate": 4.7551087944388153e-05, "loss": 0.8546, "step": 10200 }, { "epoch": 0.14467279514732106, "grad_norm": 2.984375, "learning_rate": 4.7550123447052646e-05, "loss": 0.8456, "step": 10202 }, { "epoch": 0.14470115680094728, "grad_norm": 3.109375, "learning_rate": 4.75491587696078e-05, "loss": 0.8132, "step": 10204 }, { "epoch": 0.1447295184545735, "grad_norm": 3.234375, "learning_rate": 4.754819391206132e-05, "loss": 0.9357, "step": 10206 }, { "epoch": 0.1447578801081997, "grad_norm": 3.171875, "learning_rate": 4.754722887442091e-05, "loss": 0.8581, "step": 10208 }, { "epoch": 0.14478624176182592, "grad_norm": 2.90625, "learning_rate": 4.754626365669428e-05, "loss": 0.9172, "step": 10210 }, { "epoch": 0.14481460341545213, "grad_norm": 3.03125, "learning_rate": 4.754529825888914e-05, "loss": 0.8368, "step": 10212 }, { "epoch": 0.14484296506907834, "grad_norm": 3.21875, "learning_rate": 4.7544332681013194e-05, "loss": 0.7964, "step": 10214 }, { "epoch": 0.14487132672270456, "grad_norm": 2.59375, "learning_rate": 4.754336692307416e-05, "loss": 0.8434, "step": 10216 }, { "epoch": 0.14489968837633077, "grad_norm": 3.3125, "learning_rate": 4.7542400985079755e-05, "loss": 0.9134, "step": 10218 }, { "epoch": 0.14492805002995698, "grad_norm": 2.796875, "learning_rate": 4.754143486703768e-05, "loss": 0.8269, "step": 10220 }, { "epoch": 0.14495641168358323, "grad_norm": 3.59375, "learning_rate": 4.754046856895568e-05, "loss": 0.8812, "step": 10222 }, { "epoch": 0.14498477333720944, "grad_norm": 2.921875, "learning_rate": 4.753950209084144e-05, "loss": 0.8358, "step": 10224 }, { "epoch": 0.14501313499083565, "grad_norm": 3.359375, "learning_rate": 4.753853543270269e-05, "loss": 0.8704, "step": 10226 }, { "epoch": 0.14504149664446186, "grad_norm": 3.015625, "learning_rate": 4.753756859454717e-05, "loss": 0.7999, "step": 10228 }, { "epoch": 0.14506985829808808, "grad_norm": 3.359375, "learning_rate": 4.753660157638257e-05, "loss": 0.8822, "step": 10230 }, { "epoch": 0.1450982199517143, "grad_norm": 3.125, "learning_rate": 4.7535634378216636e-05, "loss": 0.8762, "step": 10232 }, { "epoch": 0.1451265816053405, "grad_norm": 2.71875, "learning_rate": 4.7534667000057084e-05, "loss": 0.8235, "step": 10234 }, { "epoch": 0.14515494325896672, "grad_norm": 3.390625, "learning_rate": 4.753369944191165e-05, "loss": 0.8641, "step": 10236 }, { "epoch": 0.14518330491259293, "grad_norm": 3.09375, "learning_rate": 4.753273170378805e-05, "loss": 0.8542, "step": 10238 }, { "epoch": 0.14521166656621914, "grad_norm": 2.859375, "learning_rate": 4.753176378569403e-05, "loss": 0.8415, "step": 10240 }, { "epoch": 0.14524002821984536, "grad_norm": 3.34375, "learning_rate": 4.75307956876373e-05, "loss": 0.8207, "step": 10242 }, { "epoch": 0.14526838987347157, "grad_norm": 2.734375, "learning_rate": 4.75298274096256e-05, "loss": 0.8583, "step": 10244 }, { "epoch": 0.14529675152709778, "grad_norm": 3.125, "learning_rate": 4.752885895166667e-05, "loss": 0.8689, "step": 10246 }, { "epoch": 0.145325113180724, "grad_norm": 2.859375, "learning_rate": 4.752789031376824e-05, "loss": 0.8414, "step": 10248 }, { "epoch": 0.1453534748343502, "grad_norm": 3.203125, "learning_rate": 4.7526921495938046e-05, "loss": 0.8676, "step": 10250 }, { "epoch": 0.14538183648797642, "grad_norm": 3.390625, "learning_rate": 4.752595249818383e-05, "loss": 0.8573, "step": 10252 }, { "epoch": 0.14541019814160264, "grad_norm": 2.671875, "learning_rate": 4.752498332051333e-05, "loss": 0.824, "step": 10254 }, { "epoch": 0.14543855979522885, "grad_norm": 2.78125, "learning_rate": 4.7524013962934285e-05, "loss": 0.8317, "step": 10256 }, { "epoch": 0.14546692144885506, "grad_norm": 3.40625, "learning_rate": 4.752304442545443e-05, "loss": 0.8743, "step": 10258 }, { "epoch": 0.14549528310248128, "grad_norm": 3.28125, "learning_rate": 4.752207470808153e-05, "loss": 0.8796, "step": 10260 }, { "epoch": 0.14552364475610752, "grad_norm": 3.453125, "learning_rate": 4.752110481082331e-05, "loss": 0.8975, "step": 10262 }, { "epoch": 0.14555200640973373, "grad_norm": 3.171875, "learning_rate": 4.7520134733687524e-05, "loss": 0.8503, "step": 10264 }, { "epoch": 0.14558036806335994, "grad_norm": 3.109375, "learning_rate": 4.751916447668192e-05, "loss": 0.857, "step": 10266 }, { "epoch": 0.14560872971698616, "grad_norm": 2.890625, "learning_rate": 4.751819403981425e-05, "loss": 0.8186, "step": 10268 }, { "epoch": 0.14563709137061237, "grad_norm": 4.1875, "learning_rate": 4.7517223423092255e-05, "loss": 0.8538, "step": 10270 }, { "epoch": 0.14566545302423858, "grad_norm": 3.265625, "learning_rate": 4.75162526265237e-05, "loss": 0.8414, "step": 10272 }, { "epoch": 0.1456938146778648, "grad_norm": 3.203125, "learning_rate": 4.751528165011633e-05, "loss": 0.8533, "step": 10274 }, { "epoch": 0.145722176331491, "grad_norm": 3.140625, "learning_rate": 4.751431049387791e-05, "loss": 0.9075, "step": 10276 }, { "epoch": 0.14575053798511722, "grad_norm": 3.03125, "learning_rate": 4.751333915781618e-05, "loss": 0.8609, "step": 10278 }, { "epoch": 0.14577889963874344, "grad_norm": 3.40625, "learning_rate": 4.751236764193892e-05, "loss": 0.9451, "step": 10280 }, { "epoch": 0.14580726129236965, "grad_norm": 3.15625, "learning_rate": 4.751139594625388e-05, "loss": 0.8609, "step": 10282 }, { "epoch": 0.14583562294599586, "grad_norm": 3.140625, "learning_rate": 4.7510424070768814e-05, "loss": 0.87, "step": 10284 }, { "epoch": 0.14586398459962208, "grad_norm": 3.328125, "learning_rate": 4.7509452015491486e-05, "loss": 0.8629, "step": 10286 }, { "epoch": 0.1458923462532483, "grad_norm": 2.96875, "learning_rate": 4.750847978042966e-05, "loss": 0.8541, "step": 10288 }, { "epoch": 0.1459207079068745, "grad_norm": 3.0, "learning_rate": 4.750750736559111e-05, "loss": 0.8653, "step": 10290 }, { "epoch": 0.14594906956050072, "grad_norm": 3.09375, "learning_rate": 4.7506534770983595e-05, "loss": 0.94, "step": 10292 }, { "epoch": 0.14597743121412693, "grad_norm": 2.890625, "learning_rate": 4.750556199661489e-05, "loss": 0.8824, "step": 10294 }, { "epoch": 0.14600579286775314, "grad_norm": 3.28125, "learning_rate": 4.750458904249276e-05, "loss": 0.8646, "step": 10296 }, { "epoch": 0.14603415452137936, "grad_norm": 3.1875, "learning_rate": 4.750361590862498e-05, "loss": 0.866, "step": 10298 }, { "epoch": 0.1460625161750056, "grad_norm": 3.09375, "learning_rate": 4.750264259501931e-05, "loss": 0.8373, "step": 10300 }, { "epoch": 0.1460908778286318, "grad_norm": 3.3125, "learning_rate": 4.7501669101683535e-05, "loss": 0.7949, "step": 10302 }, { "epoch": 0.14611923948225802, "grad_norm": 3.34375, "learning_rate": 4.750069542862543e-05, "loss": 0.8714, "step": 10304 }, { "epoch": 0.14614760113588424, "grad_norm": 2.84375, "learning_rate": 4.749972157585276e-05, "loss": 0.8706, "step": 10306 }, { "epoch": 0.14617596278951045, "grad_norm": 3.25, "learning_rate": 4.7498747543373325e-05, "loss": 0.844, "step": 10308 }, { "epoch": 0.14620432444313666, "grad_norm": 2.953125, "learning_rate": 4.749777333119489e-05, "loss": 0.8659, "step": 10310 }, { "epoch": 0.14623268609676288, "grad_norm": 3.28125, "learning_rate": 4.749679893932524e-05, "loss": 0.9178, "step": 10312 }, { "epoch": 0.1462610477503891, "grad_norm": 3.359375, "learning_rate": 4.749582436777215e-05, "loss": 0.8853, "step": 10314 }, { "epoch": 0.1462894094040153, "grad_norm": 2.71875, "learning_rate": 4.7494849616543416e-05, "loss": 0.8272, "step": 10316 }, { "epoch": 0.14631777105764152, "grad_norm": 3.109375, "learning_rate": 4.7493874685646816e-05, "loss": 0.9145, "step": 10318 }, { "epoch": 0.14634613271126773, "grad_norm": 2.921875, "learning_rate": 4.7492899575090144e-05, "loss": 0.8589, "step": 10320 }, { "epoch": 0.14637449436489394, "grad_norm": 2.890625, "learning_rate": 4.749192428488117e-05, "loss": 0.8397, "step": 10322 }, { "epoch": 0.14640285601852016, "grad_norm": 2.984375, "learning_rate": 4.749094881502771e-05, "loss": 0.9036, "step": 10324 }, { "epoch": 0.14643121767214637, "grad_norm": 2.859375, "learning_rate": 4.748997316553754e-05, "loss": 0.8489, "step": 10326 }, { "epoch": 0.14645957932577258, "grad_norm": 3.171875, "learning_rate": 4.7488997336418445e-05, "loss": 0.8667, "step": 10328 }, { "epoch": 0.1464879409793988, "grad_norm": 3.390625, "learning_rate": 4.7488021327678234e-05, "loss": 0.8245, "step": 10330 }, { "epoch": 0.146516302633025, "grad_norm": 3.265625, "learning_rate": 4.748704513932469e-05, "loss": 0.8592, "step": 10332 }, { "epoch": 0.14654466428665122, "grad_norm": 2.96875, "learning_rate": 4.748606877136563e-05, "loss": 0.8478, "step": 10334 }, { "epoch": 0.14657302594027743, "grad_norm": 2.96875, "learning_rate": 4.748509222380883e-05, "loss": 0.8704, "step": 10336 }, { "epoch": 0.14660138759390368, "grad_norm": 3.234375, "learning_rate": 4.7484115496662095e-05, "loss": 0.8525, "step": 10338 }, { "epoch": 0.1466297492475299, "grad_norm": 3.296875, "learning_rate": 4.7483138589933244e-05, "loss": 0.877, "step": 10340 }, { "epoch": 0.1466581109011561, "grad_norm": 3.03125, "learning_rate": 4.7482161503630053e-05, "loss": 0.8192, "step": 10342 }, { "epoch": 0.14668647255478232, "grad_norm": 3.21875, "learning_rate": 4.748118423776034e-05, "loss": 0.8604, "step": 10344 }, { "epoch": 0.14671483420840853, "grad_norm": 2.5625, "learning_rate": 4.748020679233191e-05, "loss": 0.8537, "step": 10346 }, { "epoch": 0.14674319586203474, "grad_norm": 3.46875, "learning_rate": 4.7479229167352565e-05, "loss": 0.8026, "step": 10348 }, { "epoch": 0.14677155751566096, "grad_norm": 3.046875, "learning_rate": 4.747825136283013e-05, "loss": 0.7927, "step": 10350 }, { "epoch": 0.14679991916928717, "grad_norm": 2.875, "learning_rate": 4.747727337877239e-05, "loss": 0.8409, "step": 10352 }, { "epoch": 0.14682828082291338, "grad_norm": 3.0, "learning_rate": 4.747629521518717e-05, "loss": 0.8509, "step": 10354 }, { "epoch": 0.1468566424765396, "grad_norm": 2.640625, "learning_rate": 4.7475316872082286e-05, "loss": 0.8428, "step": 10356 }, { "epoch": 0.1468850041301658, "grad_norm": 3.4375, "learning_rate": 4.7474338349465544e-05, "loss": 0.8995, "step": 10358 }, { "epoch": 0.14691336578379202, "grad_norm": 3.09375, "learning_rate": 4.747335964734476e-05, "loss": 0.8691, "step": 10360 }, { "epoch": 0.14694172743741823, "grad_norm": 3.078125, "learning_rate": 4.747238076572777e-05, "loss": 0.8624, "step": 10362 }, { "epoch": 0.14697008909104445, "grad_norm": 3.0625, "learning_rate": 4.747140170462235e-05, "loss": 0.8676, "step": 10364 }, { "epoch": 0.14699845074467066, "grad_norm": 2.96875, "learning_rate": 4.747042246403636e-05, "loss": 0.8325, "step": 10366 }, { "epoch": 0.14702681239829687, "grad_norm": 3.03125, "learning_rate": 4.74694430439776e-05, "loss": 0.9016, "step": 10368 }, { "epoch": 0.1470551740519231, "grad_norm": 3.28125, "learning_rate": 4.746846344445391e-05, "loss": 0.8643, "step": 10370 }, { "epoch": 0.1470835357055493, "grad_norm": 2.9375, "learning_rate": 4.74674836654731e-05, "loss": 0.893, "step": 10372 }, { "epoch": 0.14711189735917551, "grad_norm": 2.984375, "learning_rate": 4.7466503707043e-05, "loss": 0.8298, "step": 10374 }, { "epoch": 0.14714025901280176, "grad_norm": 2.890625, "learning_rate": 4.746552356917143e-05, "loss": 0.8708, "step": 10376 }, { "epoch": 0.14716862066642797, "grad_norm": 2.875, "learning_rate": 4.7464543251866226e-05, "loss": 0.9084, "step": 10378 }, { "epoch": 0.14719698232005418, "grad_norm": 2.96875, "learning_rate": 4.746356275513522e-05, "loss": 0.8751, "step": 10380 }, { "epoch": 0.1472253439736804, "grad_norm": 3.234375, "learning_rate": 4.746258207898624e-05, "loss": 0.8707, "step": 10382 }, { "epoch": 0.1472537056273066, "grad_norm": 3.140625, "learning_rate": 4.746160122342712e-05, "loss": 0.8554, "step": 10384 }, { "epoch": 0.14728206728093282, "grad_norm": 2.84375, "learning_rate": 4.746062018846569e-05, "loss": 0.8656, "step": 10386 }, { "epoch": 0.14731042893455903, "grad_norm": 3.265625, "learning_rate": 4.745963897410979e-05, "loss": 0.8141, "step": 10388 }, { "epoch": 0.14733879058818525, "grad_norm": 3.140625, "learning_rate": 4.745865758036725e-05, "loss": 0.8266, "step": 10390 }, { "epoch": 0.14736715224181146, "grad_norm": 3.234375, "learning_rate": 4.745767600724592e-05, "loss": 0.8982, "step": 10392 }, { "epoch": 0.14739551389543767, "grad_norm": 3.125, "learning_rate": 4.7456694254753635e-05, "loss": 0.8489, "step": 10394 }, { "epoch": 0.1474238755490639, "grad_norm": 2.984375, "learning_rate": 4.7455712322898236e-05, "loss": 0.8759, "step": 10396 }, { "epoch": 0.1474522372026901, "grad_norm": 2.953125, "learning_rate": 4.745473021168756e-05, "loss": 0.8554, "step": 10398 }, { "epoch": 0.14748059885631631, "grad_norm": 2.96875, "learning_rate": 4.7453747921129456e-05, "loss": 0.9012, "step": 10400 }, { "epoch": 0.14750896050994253, "grad_norm": 3.5, "learning_rate": 4.7452765451231776e-05, "loss": 0.8792, "step": 10402 }, { "epoch": 0.14753732216356874, "grad_norm": 3.546875, "learning_rate": 4.7451782802002354e-05, "loss": 0.845, "step": 10404 }, { "epoch": 0.14756568381719495, "grad_norm": 3.09375, "learning_rate": 4.7450799973449044e-05, "loss": 0.8844, "step": 10406 }, { "epoch": 0.14759404547082117, "grad_norm": 3.34375, "learning_rate": 4.74498169655797e-05, "loss": 0.8484, "step": 10408 }, { "epoch": 0.14762240712444738, "grad_norm": 3.046875, "learning_rate": 4.744883377840217e-05, "loss": 0.8403, "step": 10410 }, { "epoch": 0.1476507687780736, "grad_norm": 3.34375, "learning_rate": 4.744785041192431e-05, "loss": 0.8683, "step": 10412 }, { "epoch": 0.1476791304316998, "grad_norm": 3.25, "learning_rate": 4.744686686615397e-05, "loss": 0.8753, "step": 10414 }, { "epoch": 0.14770749208532605, "grad_norm": 2.96875, "learning_rate": 4.744588314109901e-05, "loss": 0.852, "step": 10416 }, { "epoch": 0.14773585373895226, "grad_norm": 3.125, "learning_rate": 4.744489923676728e-05, "loss": 0.8806, "step": 10418 }, { "epoch": 0.14776421539257847, "grad_norm": 2.75, "learning_rate": 4.744391515316664e-05, "loss": 0.8465, "step": 10420 }, { "epoch": 0.1477925770462047, "grad_norm": 2.796875, "learning_rate": 4.744293089030496e-05, "loss": 0.8609, "step": 10422 }, { "epoch": 0.1478209386998309, "grad_norm": 2.84375, "learning_rate": 4.7441946448190086e-05, "loss": 0.8562, "step": 10424 }, { "epoch": 0.1478493003534571, "grad_norm": 3.015625, "learning_rate": 4.74409618268299e-05, "loss": 0.8534, "step": 10426 }, { "epoch": 0.14787766200708333, "grad_norm": 3.015625, "learning_rate": 4.743997702623225e-05, "loss": 0.899, "step": 10428 }, { "epoch": 0.14790602366070954, "grad_norm": 3.21875, "learning_rate": 4.7438992046404994e-05, "loss": 0.8842, "step": 10430 }, { "epoch": 0.14793438531433575, "grad_norm": 3.234375, "learning_rate": 4.743800688735603e-05, "loss": 0.8542, "step": 10432 }, { "epoch": 0.14796274696796197, "grad_norm": 2.875, "learning_rate": 4.74370215490932e-05, "loss": 0.8614, "step": 10434 }, { "epoch": 0.14799110862158818, "grad_norm": 2.984375, "learning_rate": 4.7436036031624385e-05, "loss": 0.8652, "step": 10436 }, { "epoch": 0.1480194702752144, "grad_norm": 2.921875, "learning_rate": 4.743505033495746e-05, "loss": 0.844, "step": 10438 }, { "epoch": 0.1480478319288406, "grad_norm": 3.5625, "learning_rate": 4.7434064459100275e-05, "loss": 0.8386, "step": 10440 }, { "epoch": 0.14807619358246682, "grad_norm": 3.203125, "learning_rate": 4.743307840406073e-05, "loss": 0.8524, "step": 10442 }, { "epoch": 0.14810455523609303, "grad_norm": 3.1875, "learning_rate": 4.743209216984669e-05, "loss": 0.8453, "step": 10444 }, { "epoch": 0.14813291688971925, "grad_norm": 2.796875, "learning_rate": 4.743110575646603e-05, "loss": 0.8895, "step": 10446 }, { "epoch": 0.14816127854334546, "grad_norm": 3.0, "learning_rate": 4.743011916392664e-05, "loss": 0.8314, "step": 10448 }, { "epoch": 0.14818964019697167, "grad_norm": 3.0, "learning_rate": 4.7429132392236384e-05, "loss": 0.8272, "step": 10450 }, { "epoch": 0.14821800185059789, "grad_norm": 3.015625, "learning_rate": 4.742814544140316e-05, "loss": 0.83, "step": 10452 }, { "epoch": 0.14824636350422413, "grad_norm": 3.0625, "learning_rate": 4.7427158311434835e-05, "loss": 0.845, "step": 10454 }, { "epoch": 0.14827472515785034, "grad_norm": 2.921875, "learning_rate": 4.7426171002339306e-05, "loss": 0.8253, "step": 10456 }, { "epoch": 0.14830308681147655, "grad_norm": 3.015625, "learning_rate": 4.742518351412446e-05, "loss": 0.8757, "step": 10458 }, { "epoch": 0.14833144846510277, "grad_norm": 3.21875, "learning_rate": 4.742419584679816e-05, "loss": 0.8963, "step": 10460 }, { "epoch": 0.14835981011872898, "grad_norm": 3.28125, "learning_rate": 4.742320800036832e-05, "loss": 0.8683, "step": 10462 }, { "epoch": 0.1483881717723552, "grad_norm": 3.0, "learning_rate": 4.742221997484283e-05, "loss": 0.7973, "step": 10464 }, { "epoch": 0.1484165334259814, "grad_norm": 3.09375, "learning_rate": 4.742123177022957e-05, "loss": 0.8561, "step": 10466 }, { "epoch": 0.14844489507960762, "grad_norm": 3.140625, "learning_rate": 4.742024338653643e-05, "loss": 0.8887, "step": 10468 }, { "epoch": 0.14847325673323383, "grad_norm": 2.65625, "learning_rate": 4.741925482377131e-05, "loss": 0.8312, "step": 10470 }, { "epoch": 0.14850161838686005, "grad_norm": 2.875, "learning_rate": 4.7418266081942116e-05, "loss": 0.8632, "step": 10472 }, { "epoch": 0.14852998004048626, "grad_norm": 3.453125, "learning_rate": 4.741727716105673e-05, "loss": 0.8744, "step": 10474 }, { "epoch": 0.14855834169411247, "grad_norm": 3.65625, "learning_rate": 4.741628806112305e-05, "loss": 0.8407, "step": 10476 }, { "epoch": 0.14858670334773869, "grad_norm": 2.6875, "learning_rate": 4.741529878214899e-05, "loss": 0.8285, "step": 10478 }, { "epoch": 0.1486150650013649, "grad_norm": 3.203125, "learning_rate": 4.741430932414245e-05, "loss": 0.8436, "step": 10480 }, { "epoch": 0.1486434266549911, "grad_norm": 3.203125, "learning_rate": 4.741331968711131e-05, "loss": 0.858, "step": 10482 }, { "epoch": 0.14867178830861733, "grad_norm": 3.046875, "learning_rate": 4.74123298710635e-05, "loss": 0.8377, "step": 10484 }, { "epoch": 0.14870014996224354, "grad_norm": 3.09375, "learning_rate": 4.741133987600691e-05, "loss": 0.8519, "step": 10486 }, { "epoch": 0.14872851161586975, "grad_norm": 3.1875, "learning_rate": 4.741034970194946e-05, "loss": 0.8491, "step": 10488 }, { "epoch": 0.14875687326949597, "grad_norm": 3.828125, "learning_rate": 4.740935934889905e-05, "loss": 0.8534, "step": 10490 }, { "epoch": 0.1487852349231222, "grad_norm": 3.15625, "learning_rate": 4.7408368816863596e-05, "loss": 0.8587, "step": 10492 }, { "epoch": 0.14881359657674842, "grad_norm": 3.234375, "learning_rate": 4.7407378105851e-05, "loss": 0.7781, "step": 10494 }, { "epoch": 0.14884195823037463, "grad_norm": 3.125, "learning_rate": 4.7406387215869184e-05, "loss": 0.8945, "step": 10496 }, { "epoch": 0.14887031988400085, "grad_norm": 3.171875, "learning_rate": 4.7405396146926065e-05, "loss": 0.8522, "step": 10498 }, { "epoch": 0.14889868153762706, "grad_norm": 3.34375, "learning_rate": 4.740440489902954e-05, "loss": 0.902, "step": 10500 }, { "epoch": 0.14892704319125327, "grad_norm": 3.203125, "learning_rate": 4.740341347218754e-05, "loss": 0.8663, "step": 10502 }, { "epoch": 0.14895540484487949, "grad_norm": 3.140625, "learning_rate": 4.740242186640799e-05, "loss": 0.8985, "step": 10504 }, { "epoch": 0.1489837664985057, "grad_norm": 3.53125, "learning_rate": 4.740143008169879e-05, "loss": 0.8672, "step": 10506 }, { "epoch": 0.1490121281521319, "grad_norm": 3.734375, "learning_rate": 4.7400438118067894e-05, "loss": 0.9126, "step": 10508 }, { "epoch": 0.14904048980575813, "grad_norm": 3.625, "learning_rate": 4.7399445975523185e-05, "loss": 0.8612, "step": 10510 }, { "epoch": 0.14906885145938434, "grad_norm": 3.078125, "learning_rate": 4.7398453654072616e-05, "loss": 0.8557, "step": 10512 }, { "epoch": 0.14909721311301055, "grad_norm": 3.453125, "learning_rate": 4.73974611537241e-05, "loss": 0.8151, "step": 10514 }, { "epoch": 0.14912557476663676, "grad_norm": 2.890625, "learning_rate": 4.7396468474485575e-05, "loss": 0.823, "step": 10516 }, { "epoch": 0.14915393642026298, "grad_norm": 2.921875, "learning_rate": 4.739547561636496e-05, "loss": 0.8812, "step": 10518 }, { "epoch": 0.1491822980738892, "grad_norm": 2.828125, "learning_rate": 4.739448257937018e-05, "loss": 0.8435, "step": 10520 }, { "epoch": 0.1492106597275154, "grad_norm": 2.953125, "learning_rate": 4.739348936350918e-05, "loss": 0.8101, "step": 10522 }, { "epoch": 0.14923902138114162, "grad_norm": 2.9375, "learning_rate": 4.7392495968789885e-05, "loss": 0.8998, "step": 10524 }, { "epoch": 0.14926738303476783, "grad_norm": 3.28125, "learning_rate": 4.7391502395220234e-05, "loss": 0.8674, "step": 10526 }, { "epoch": 0.14929574468839404, "grad_norm": 3.171875, "learning_rate": 4.7390508642808155e-05, "loss": 0.9009, "step": 10528 }, { "epoch": 0.14932410634202029, "grad_norm": 3.046875, "learning_rate": 4.73895147115616e-05, "loss": 0.8576, "step": 10530 }, { "epoch": 0.1493524679956465, "grad_norm": 3.140625, "learning_rate": 4.738852060148849e-05, "loss": 0.7996, "step": 10532 }, { "epoch": 0.1493808296492727, "grad_norm": 3.21875, "learning_rate": 4.738752631259677e-05, "loss": 0.8229, "step": 10534 }, { "epoch": 0.14940919130289892, "grad_norm": 2.921875, "learning_rate": 4.738653184489439e-05, "loss": 0.8034, "step": 10536 }, { "epoch": 0.14943755295652514, "grad_norm": 3.234375, "learning_rate": 4.738553719838928e-05, "loss": 0.8418, "step": 10538 }, { "epoch": 0.14946591461015135, "grad_norm": 3.0625, "learning_rate": 4.738454237308939e-05, "loss": 0.9024, "step": 10540 }, { "epoch": 0.14949427626377756, "grad_norm": 3.734375, "learning_rate": 4.738354736900268e-05, "loss": 0.88, "step": 10542 }, { "epoch": 0.14952263791740378, "grad_norm": 2.71875, "learning_rate": 4.7382552186137074e-05, "loss": 0.8594, "step": 10544 }, { "epoch": 0.14955099957103, "grad_norm": 3.078125, "learning_rate": 4.7381556824500536e-05, "loss": 0.8171, "step": 10546 }, { "epoch": 0.1495793612246562, "grad_norm": 2.96875, "learning_rate": 4.7380561284101e-05, "loss": 0.8811, "step": 10548 }, { "epoch": 0.14960772287828242, "grad_norm": 3.25, "learning_rate": 4.737956556494644e-05, "loss": 0.852, "step": 10550 }, { "epoch": 0.14963608453190863, "grad_norm": 2.859375, "learning_rate": 4.737856966704479e-05, "loss": 0.8325, "step": 10552 }, { "epoch": 0.14966444618553484, "grad_norm": 3.390625, "learning_rate": 4.7377573590404026e-05, "loss": 0.8702, "step": 10554 }, { "epoch": 0.14969280783916106, "grad_norm": 3.25, "learning_rate": 4.737657733503207e-05, "loss": 0.8734, "step": 10556 }, { "epoch": 0.14972116949278727, "grad_norm": 3.296875, "learning_rate": 4.737558090093691e-05, "loss": 0.8427, "step": 10558 }, { "epoch": 0.14974953114641348, "grad_norm": 2.96875, "learning_rate": 4.737458428812649e-05, "loss": 0.8637, "step": 10560 }, { "epoch": 0.1497778928000397, "grad_norm": 3.046875, "learning_rate": 4.737358749660877e-05, "loss": 0.8285, "step": 10562 }, { "epoch": 0.1498062544536659, "grad_norm": 2.890625, "learning_rate": 4.7372590526391715e-05, "loss": 0.9196, "step": 10564 }, { "epoch": 0.14983461610729212, "grad_norm": 3.09375, "learning_rate": 4.737159337748329e-05, "loss": 0.8683, "step": 10566 }, { "epoch": 0.14986297776091834, "grad_norm": 2.921875, "learning_rate": 4.737059604989145e-05, "loss": 0.8668, "step": 10568 }, { "epoch": 0.14989133941454458, "grad_norm": 2.984375, "learning_rate": 4.736959854362417e-05, "loss": 0.9091, "step": 10570 }, { "epoch": 0.1499197010681708, "grad_norm": 3.078125, "learning_rate": 4.736860085868942e-05, "loss": 0.8966, "step": 10572 }, { "epoch": 0.149948062721797, "grad_norm": 3.671875, "learning_rate": 4.736760299509516e-05, "loss": 0.8502, "step": 10574 }, { "epoch": 0.14997642437542322, "grad_norm": 3.046875, "learning_rate": 4.7366604952849364e-05, "loss": 0.8359, "step": 10576 }, { "epoch": 0.15000478602904943, "grad_norm": 2.78125, "learning_rate": 4.736560673196e-05, "loss": 0.7901, "step": 10578 }, { "epoch": 0.15003314768267564, "grad_norm": 3.40625, "learning_rate": 4.736460833243505e-05, "loss": 0.8647, "step": 10580 }, { "epoch": 0.15006150933630186, "grad_norm": 2.9375, "learning_rate": 4.7363609754282466e-05, "loss": 0.8754, "step": 10582 }, { "epoch": 0.15008987098992807, "grad_norm": 2.90625, "learning_rate": 4.7362610997510255e-05, "loss": 0.8299, "step": 10584 }, { "epoch": 0.15011823264355428, "grad_norm": 3.140625, "learning_rate": 4.7361612062126374e-05, "loss": 0.8453, "step": 10586 }, { "epoch": 0.1501465942971805, "grad_norm": 3.125, "learning_rate": 4.73606129481388e-05, "loss": 0.8733, "step": 10588 }, { "epoch": 0.1501749559508067, "grad_norm": 2.875, "learning_rate": 4.735961365555552e-05, "loss": 0.8369, "step": 10590 }, { "epoch": 0.15020331760443292, "grad_norm": 3.328125, "learning_rate": 4.735861418438452e-05, "loss": 0.854, "step": 10592 }, { "epoch": 0.15023167925805914, "grad_norm": 3.40625, "learning_rate": 4.735761453463378e-05, "loss": 0.8656, "step": 10594 }, { "epoch": 0.15026004091168535, "grad_norm": 3.3125, "learning_rate": 4.735661470631127e-05, "loss": 0.8859, "step": 10596 }, { "epoch": 0.15028840256531156, "grad_norm": 3.28125, "learning_rate": 4.735561469942499e-05, "loss": 0.8423, "step": 10598 }, { "epoch": 0.15031676421893778, "grad_norm": 3.203125, "learning_rate": 4.7354614513982934e-05, "loss": 0.8685, "step": 10600 }, { "epoch": 0.150345125872564, "grad_norm": 3.078125, "learning_rate": 4.7353614149993074e-05, "loss": 0.8093, "step": 10602 }, { "epoch": 0.1503734875261902, "grad_norm": 2.6875, "learning_rate": 4.73526136074634e-05, "loss": 0.8242, "step": 10604 }, { "epoch": 0.15040184917981642, "grad_norm": 2.828125, "learning_rate": 4.735161288640192e-05, "loss": 0.8224, "step": 10606 }, { "epoch": 0.15043021083344266, "grad_norm": 2.734375, "learning_rate": 4.7350611986816616e-05, "loss": 0.8413, "step": 10608 }, { "epoch": 0.15045857248706887, "grad_norm": 2.859375, "learning_rate": 4.734961090871548e-05, "loss": 0.8802, "step": 10610 }, { "epoch": 0.15048693414069508, "grad_norm": 3.15625, "learning_rate": 4.734860965210651e-05, "loss": 0.8258, "step": 10612 }, { "epoch": 0.1505152957943213, "grad_norm": 3.078125, "learning_rate": 4.734760821699771e-05, "loss": 0.8835, "step": 10614 }, { "epoch": 0.1505436574479475, "grad_norm": 3.109375, "learning_rate": 4.734660660339707e-05, "loss": 0.853, "step": 10616 }, { "epoch": 0.15057201910157372, "grad_norm": 3.15625, "learning_rate": 4.734560481131259e-05, "loss": 0.8682, "step": 10618 }, { "epoch": 0.15060038075519994, "grad_norm": 2.875, "learning_rate": 4.734460284075227e-05, "loss": 0.867, "step": 10620 }, { "epoch": 0.15062874240882615, "grad_norm": 3.0625, "learning_rate": 4.734360069172413e-05, "loss": 0.8725, "step": 10622 }, { "epoch": 0.15065710406245236, "grad_norm": 3.15625, "learning_rate": 4.734259836423615e-05, "loss": 0.9182, "step": 10624 }, { "epoch": 0.15068546571607858, "grad_norm": 3.296875, "learning_rate": 4.734159585829635e-05, "loss": 0.8568, "step": 10626 }, { "epoch": 0.1507138273697048, "grad_norm": 3.359375, "learning_rate": 4.7340593173912737e-05, "loss": 0.8534, "step": 10628 }, { "epoch": 0.150742189023331, "grad_norm": 3.203125, "learning_rate": 4.733959031109331e-05, "loss": 0.8352, "step": 10630 }, { "epoch": 0.15077055067695722, "grad_norm": 3.34375, "learning_rate": 4.733858726984609e-05, "loss": 0.8328, "step": 10632 }, { "epoch": 0.15079891233058343, "grad_norm": 3.03125, "learning_rate": 4.7337584050179074e-05, "loss": 0.8525, "step": 10634 }, { "epoch": 0.15082727398420964, "grad_norm": 3.0625, "learning_rate": 4.733658065210029e-05, "loss": 0.8715, "step": 10636 }, { "epoch": 0.15085563563783586, "grad_norm": 3.328125, "learning_rate": 4.733557707561775e-05, "loss": 0.8584, "step": 10638 }, { "epoch": 0.15088399729146207, "grad_norm": 3.09375, "learning_rate": 4.7334573320739466e-05, "loss": 0.8288, "step": 10640 }, { "epoch": 0.15091235894508828, "grad_norm": 3.15625, "learning_rate": 4.733356938747345e-05, "loss": 0.8389, "step": 10642 }, { "epoch": 0.1509407205987145, "grad_norm": 2.90625, "learning_rate": 4.7332565275827733e-05, "loss": 0.7991, "step": 10644 }, { "epoch": 0.15096908225234074, "grad_norm": 3.125, "learning_rate": 4.733156098581032e-05, "loss": 0.8429, "step": 10646 }, { "epoch": 0.15099744390596695, "grad_norm": 3.546875, "learning_rate": 4.733055651742925e-05, "loss": 0.8911, "step": 10648 }, { "epoch": 0.15102580555959316, "grad_norm": 3.328125, "learning_rate": 4.732955187069252e-05, "loss": 0.8577, "step": 10650 }, { "epoch": 0.15105416721321938, "grad_norm": 2.875, "learning_rate": 4.7328547045608185e-05, "loss": 0.851, "step": 10652 }, { "epoch": 0.1510825288668456, "grad_norm": 2.765625, "learning_rate": 4.7327542042184245e-05, "loss": 0.836, "step": 10654 }, { "epoch": 0.1511108905204718, "grad_norm": 3.015625, "learning_rate": 4.7326536860428746e-05, "loss": 0.8914, "step": 10656 }, { "epoch": 0.15113925217409802, "grad_norm": 3.078125, "learning_rate": 4.73255315003497e-05, "loss": 0.8547, "step": 10658 }, { "epoch": 0.15116761382772423, "grad_norm": 3.171875, "learning_rate": 4.7324525961955144e-05, "loss": 0.8766, "step": 10660 }, { "epoch": 0.15119597548135044, "grad_norm": 3.046875, "learning_rate": 4.7323520245253114e-05, "loss": 0.8682, "step": 10662 }, { "epoch": 0.15122433713497666, "grad_norm": 3.40625, "learning_rate": 4.7322514350251645e-05, "loss": 0.8539, "step": 10664 }, { "epoch": 0.15125269878860287, "grad_norm": 2.984375, "learning_rate": 4.732150827695876e-05, "loss": 0.876, "step": 10666 }, { "epoch": 0.15128106044222908, "grad_norm": 3.46875, "learning_rate": 4.7320502025382495e-05, "loss": 0.8412, "step": 10668 }, { "epoch": 0.1513094220958553, "grad_norm": 2.9375, "learning_rate": 4.7319495595530894e-05, "loss": 0.8098, "step": 10670 }, { "epoch": 0.1513377837494815, "grad_norm": 3.078125, "learning_rate": 4.7318488987411994e-05, "loss": 0.8816, "step": 10672 }, { "epoch": 0.15136614540310772, "grad_norm": 3.390625, "learning_rate": 4.731748220103384e-05, "loss": 0.8814, "step": 10674 }, { "epoch": 0.15139450705673393, "grad_norm": 3.171875, "learning_rate": 4.7316475236404454e-05, "loss": 0.7992, "step": 10676 }, { "epoch": 0.15142286871036015, "grad_norm": 3.171875, "learning_rate": 4.73154680935319e-05, "loss": 0.8243, "step": 10678 }, { "epoch": 0.15145123036398636, "grad_norm": 2.953125, "learning_rate": 4.731446077242422e-05, "loss": 0.8025, "step": 10680 }, { "epoch": 0.15147959201761257, "grad_norm": 3.078125, "learning_rate": 4.7313453273089445e-05, "loss": 0.8614, "step": 10682 }, { "epoch": 0.15150795367123882, "grad_norm": 3.21875, "learning_rate": 4.7312445595535625e-05, "loss": 0.8442, "step": 10684 }, { "epoch": 0.15153631532486503, "grad_norm": 2.75, "learning_rate": 4.7311437739770824e-05, "loss": 0.8514, "step": 10686 }, { "epoch": 0.15156467697849124, "grad_norm": 3.078125, "learning_rate": 4.731042970580307e-05, "loss": 0.8575, "step": 10688 }, { "epoch": 0.15159303863211746, "grad_norm": 3.125, "learning_rate": 4.7309421493640436e-05, "loss": 0.8497, "step": 10690 }, { "epoch": 0.15162140028574367, "grad_norm": 3.234375, "learning_rate": 4.730841310329096e-05, "loss": 0.876, "step": 10692 }, { "epoch": 0.15164976193936988, "grad_norm": 3.140625, "learning_rate": 4.730740453476271e-05, "loss": 0.9269, "step": 10694 }, { "epoch": 0.1516781235929961, "grad_norm": 3.234375, "learning_rate": 4.7306395788063724e-05, "loss": 0.8301, "step": 10696 }, { "epoch": 0.1517064852466223, "grad_norm": 3.171875, "learning_rate": 4.7305386863202065e-05, "loss": 0.9016, "step": 10698 }, { "epoch": 0.15173484690024852, "grad_norm": 3.15625, "learning_rate": 4.7304377760185796e-05, "loss": 0.8385, "step": 10700 }, { "epoch": 0.15176320855387473, "grad_norm": 3.171875, "learning_rate": 4.7303368479022974e-05, "loss": 0.8781, "step": 10702 }, { "epoch": 0.15179157020750095, "grad_norm": 3.046875, "learning_rate": 4.730235901972166e-05, "loss": 0.8595, "step": 10704 }, { "epoch": 0.15181993186112716, "grad_norm": 3.078125, "learning_rate": 4.730134938228992e-05, "loss": 0.8832, "step": 10706 }, { "epoch": 0.15184829351475337, "grad_norm": 3.140625, "learning_rate": 4.7300339566735805e-05, "loss": 0.8634, "step": 10708 }, { "epoch": 0.1518766551683796, "grad_norm": 2.96875, "learning_rate": 4.72993295730674e-05, "loss": 0.8175, "step": 10710 }, { "epoch": 0.1519050168220058, "grad_norm": 3.359375, "learning_rate": 4.729831940129277e-05, "loss": 0.7978, "step": 10712 }, { "epoch": 0.151933378475632, "grad_norm": 3.28125, "learning_rate": 4.729730905141996e-05, "loss": 0.8862, "step": 10714 }, { "epoch": 0.15196174012925823, "grad_norm": 3.078125, "learning_rate": 4.729629852345706e-05, "loss": 0.8722, "step": 10716 }, { "epoch": 0.15199010178288444, "grad_norm": 3.375, "learning_rate": 4.7295287817412136e-05, "loss": 0.8525, "step": 10718 }, { "epoch": 0.15201846343651065, "grad_norm": 3.0, "learning_rate": 4.7294276933293265e-05, "loss": 0.8345, "step": 10720 }, { "epoch": 0.15204682509013687, "grad_norm": 2.890625, "learning_rate": 4.729326587110852e-05, "loss": 0.8258, "step": 10722 }, { "epoch": 0.1520751867437631, "grad_norm": 2.734375, "learning_rate": 4.729225463086596e-05, "loss": 0.8902, "step": 10724 }, { "epoch": 0.15210354839738932, "grad_norm": 3.15625, "learning_rate": 4.729124321257369e-05, "loss": 0.8481, "step": 10726 }, { "epoch": 0.15213191005101553, "grad_norm": 3.203125, "learning_rate": 4.729023161623977e-05, "loss": 0.8615, "step": 10728 }, { "epoch": 0.15216027170464175, "grad_norm": 3.4375, "learning_rate": 4.7289219841872273e-05, "loss": 0.8372, "step": 10730 }, { "epoch": 0.15218863335826796, "grad_norm": 3.375, "learning_rate": 4.72882078894793e-05, "loss": 0.8692, "step": 10732 }, { "epoch": 0.15221699501189417, "grad_norm": 3.0, "learning_rate": 4.728719575906892e-05, "loss": 0.8189, "step": 10734 }, { "epoch": 0.1522453566655204, "grad_norm": 3.0, "learning_rate": 4.7286183450649224e-05, "loss": 0.8397, "step": 10736 }, { "epoch": 0.1522737183191466, "grad_norm": 2.921875, "learning_rate": 4.7285170964228294e-05, "loss": 0.8087, "step": 10738 }, { "epoch": 0.1523020799727728, "grad_norm": 3.0, "learning_rate": 4.728415829981422e-05, "loss": 0.8147, "step": 10740 }, { "epoch": 0.15233044162639903, "grad_norm": 3.34375, "learning_rate": 4.728314545741508e-05, "loss": 0.8569, "step": 10742 }, { "epoch": 0.15235880328002524, "grad_norm": 3.21875, "learning_rate": 4.7282132437038975e-05, "loss": 0.8676, "step": 10744 }, { "epoch": 0.15238716493365145, "grad_norm": 3.015625, "learning_rate": 4.728111923869399e-05, "loss": 0.8694, "step": 10746 }, { "epoch": 0.15241552658727767, "grad_norm": 2.96875, "learning_rate": 4.728010586238822e-05, "loss": 0.859, "step": 10748 }, { "epoch": 0.15244388824090388, "grad_norm": 3.296875, "learning_rate": 4.727909230812976e-05, "loss": 0.8582, "step": 10750 }, { "epoch": 0.1524722498945301, "grad_norm": 3.09375, "learning_rate": 4.72780785759267e-05, "loss": 0.8241, "step": 10752 }, { "epoch": 0.1525006115481563, "grad_norm": 3.234375, "learning_rate": 4.727706466578714e-05, "loss": 0.8889, "step": 10754 }, { "epoch": 0.15252897320178252, "grad_norm": 3.09375, "learning_rate": 4.727605057771919e-05, "loss": 0.8372, "step": 10756 }, { "epoch": 0.15255733485540873, "grad_norm": 3.3125, "learning_rate": 4.727503631173092e-05, "loss": 0.8457, "step": 10758 }, { "epoch": 0.15258569650903495, "grad_norm": 2.78125, "learning_rate": 4.7274021867830465e-05, "loss": 0.8614, "step": 10760 }, { "epoch": 0.1526140581626612, "grad_norm": 2.90625, "learning_rate": 4.727300724602591e-05, "loss": 0.8552, "step": 10762 }, { "epoch": 0.1526424198162874, "grad_norm": 3.234375, "learning_rate": 4.727199244632535e-05, "loss": 0.8533, "step": 10764 }, { "epoch": 0.1526707814699136, "grad_norm": 2.84375, "learning_rate": 4.727097746873691e-05, "loss": 0.8328, "step": 10766 }, { "epoch": 0.15269914312353983, "grad_norm": 3.1875, "learning_rate": 4.726996231326869e-05, "loss": 0.8426, "step": 10768 }, { "epoch": 0.15272750477716604, "grad_norm": 2.859375, "learning_rate": 4.726894697992879e-05, "loss": 0.8401, "step": 10770 }, { "epoch": 0.15275586643079225, "grad_norm": 3.375, "learning_rate": 4.7267931468725326e-05, "loss": 0.8962, "step": 10772 }, { "epoch": 0.15278422808441847, "grad_norm": 3.296875, "learning_rate": 4.7266915779666405e-05, "loss": 0.8235, "step": 10774 }, { "epoch": 0.15281258973804468, "grad_norm": 2.9375, "learning_rate": 4.7265899912760144e-05, "loss": 0.8862, "step": 10776 }, { "epoch": 0.1528409513916709, "grad_norm": 2.953125, "learning_rate": 4.7264883868014654e-05, "loss": 0.8473, "step": 10778 }, { "epoch": 0.1528693130452971, "grad_norm": 3.578125, "learning_rate": 4.726386764543805e-05, "loss": 0.8784, "step": 10780 }, { "epoch": 0.15289767469892332, "grad_norm": 3.15625, "learning_rate": 4.7262851245038456e-05, "loss": 0.8825, "step": 10782 }, { "epoch": 0.15292603635254953, "grad_norm": 3.046875, "learning_rate": 4.726183466682399e-05, "loss": 0.8681, "step": 10784 }, { "epoch": 0.15295439800617575, "grad_norm": 3.0625, "learning_rate": 4.726081791080276e-05, "loss": 0.8223, "step": 10786 }, { "epoch": 0.15298275965980196, "grad_norm": 2.796875, "learning_rate": 4.725980097698288e-05, "loss": 0.797, "step": 10788 }, { "epoch": 0.15301112131342817, "grad_norm": 3.296875, "learning_rate": 4.72587838653725e-05, "loss": 0.8692, "step": 10790 }, { "epoch": 0.15303948296705439, "grad_norm": 2.921875, "learning_rate": 4.725776657597972e-05, "loss": 0.8666, "step": 10792 }, { "epoch": 0.1530678446206806, "grad_norm": 3.046875, "learning_rate": 4.725674910881268e-05, "loss": 0.9008, "step": 10794 }, { "epoch": 0.1530962062743068, "grad_norm": 3.125, "learning_rate": 4.725573146387949e-05, "loss": 0.8792, "step": 10796 }, { "epoch": 0.15312456792793303, "grad_norm": 3.125, "learning_rate": 4.7254713641188304e-05, "loss": 0.8704, "step": 10798 }, { "epoch": 0.15315292958155927, "grad_norm": 3.015625, "learning_rate": 4.725369564074723e-05, "loss": 0.8353, "step": 10800 }, { "epoch": 0.15318129123518548, "grad_norm": 3.34375, "learning_rate": 4.72526774625644e-05, "loss": 0.853, "step": 10802 }, { "epoch": 0.1532096528888117, "grad_norm": 3.40625, "learning_rate": 4.725165910664795e-05, "loss": 0.8656, "step": 10804 }, { "epoch": 0.1532380145424379, "grad_norm": 3.109375, "learning_rate": 4.725064057300602e-05, "loss": 0.8262, "step": 10806 }, { "epoch": 0.15326637619606412, "grad_norm": 2.96875, "learning_rate": 4.724962186164674e-05, "loss": 0.8688, "step": 10808 }, { "epoch": 0.15329473784969033, "grad_norm": 2.953125, "learning_rate": 4.724860297257824e-05, "loss": 0.8439, "step": 10810 }, { "epoch": 0.15332309950331655, "grad_norm": 3.171875, "learning_rate": 4.724758390580867e-05, "loss": 0.8513, "step": 10812 }, { "epoch": 0.15335146115694276, "grad_norm": 3.34375, "learning_rate": 4.724656466134616e-05, "loss": 0.8601, "step": 10814 }, { "epoch": 0.15337982281056897, "grad_norm": 2.890625, "learning_rate": 4.7245545239198855e-05, "loss": 0.8412, "step": 10816 }, { "epoch": 0.15340818446419519, "grad_norm": 4.0, "learning_rate": 4.724452563937489e-05, "loss": 0.8694, "step": 10818 }, { "epoch": 0.1534365461178214, "grad_norm": 3.484375, "learning_rate": 4.7243505861882434e-05, "loss": 0.851, "step": 10820 }, { "epoch": 0.1534649077714476, "grad_norm": 3.25, "learning_rate": 4.724248590672959e-05, "loss": 0.8515, "step": 10822 }, { "epoch": 0.15349326942507382, "grad_norm": 2.859375, "learning_rate": 4.724146577392454e-05, "loss": 0.8753, "step": 10824 }, { "epoch": 0.15352163107870004, "grad_norm": 3.09375, "learning_rate": 4.724044546347542e-05, "loss": 0.8399, "step": 10826 }, { "epoch": 0.15354999273232625, "grad_norm": 3.046875, "learning_rate": 4.723942497539038e-05, "loss": 0.8265, "step": 10828 }, { "epoch": 0.15357835438595246, "grad_norm": 2.90625, "learning_rate": 4.723840430967756e-05, "loss": 0.867, "step": 10830 }, { "epoch": 0.15360671603957868, "grad_norm": 3.34375, "learning_rate": 4.723738346634513e-05, "loss": 0.8405, "step": 10832 }, { "epoch": 0.1536350776932049, "grad_norm": 3.15625, "learning_rate": 4.7236362445401227e-05, "loss": 0.8562, "step": 10834 }, { "epoch": 0.1536634393468311, "grad_norm": 3.609375, "learning_rate": 4.723534124685402e-05, "loss": 0.8798, "step": 10836 }, { "epoch": 0.15369180100045735, "grad_norm": 3.109375, "learning_rate": 4.723431987071166e-05, "loss": 0.8262, "step": 10838 }, { "epoch": 0.15372016265408356, "grad_norm": 3.0625, "learning_rate": 4.72332983169823e-05, "loss": 0.8394, "step": 10840 }, { "epoch": 0.15374852430770977, "grad_norm": 3.109375, "learning_rate": 4.723227658567411e-05, "loss": 0.8423, "step": 10842 }, { "epoch": 0.15377688596133599, "grad_norm": 2.953125, "learning_rate": 4.723125467679523e-05, "loss": 0.8594, "step": 10844 }, { "epoch": 0.1538052476149622, "grad_norm": 2.890625, "learning_rate": 4.723023259035385e-05, "loss": 0.8316, "step": 10846 }, { "epoch": 0.1538336092685884, "grad_norm": 3.359375, "learning_rate": 4.722921032635812e-05, "loss": 0.9154, "step": 10848 }, { "epoch": 0.15386197092221462, "grad_norm": 3.25, "learning_rate": 4.72281878848162e-05, "loss": 0.8611, "step": 10850 }, { "epoch": 0.15389033257584084, "grad_norm": 2.875, "learning_rate": 4.722716526573626e-05, "loss": 0.8358, "step": 10852 }, { "epoch": 0.15391869422946705, "grad_norm": 3.421875, "learning_rate": 4.722614246912647e-05, "loss": 0.8122, "step": 10854 }, { "epoch": 0.15394705588309326, "grad_norm": 2.859375, "learning_rate": 4.7225119494995e-05, "loss": 0.8489, "step": 10856 }, { "epoch": 0.15397541753671948, "grad_norm": 3.40625, "learning_rate": 4.722409634335001e-05, "loss": 0.8791, "step": 10858 }, { "epoch": 0.1540037791903457, "grad_norm": 3.0, "learning_rate": 4.722307301419968e-05, "loss": 0.8559, "step": 10860 }, { "epoch": 0.1540321408439719, "grad_norm": 3.15625, "learning_rate": 4.722204950755219e-05, "loss": 0.8293, "step": 10862 }, { "epoch": 0.15406050249759812, "grad_norm": 3.1875, "learning_rate": 4.722102582341571e-05, "loss": 0.8668, "step": 10864 }, { "epoch": 0.15408886415122433, "grad_norm": 3.125, "learning_rate": 4.722000196179841e-05, "loss": 0.8517, "step": 10866 }, { "epoch": 0.15411722580485054, "grad_norm": 3.265625, "learning_rate": 4.721897792270848e-05, "loss": 0.8068, "step": 10868 }, { "epoch": 0.15414558745847676, "grad_norm": 2.765625, "learning_rate": 4.7217953706154074e-05, "loss": 0.8635, "step": 10870 }, { "epoch": 0.15417394911210297, "grad_norm": 2.859375, "learning_rate": 4.7216929312143396e-05, "loss": 0.8683, "step": 10872 }, { "epoch": 0.15420231076572918, "grad_norm": 3.109375, "learning_rate": 4.7215904740684634e-05, "loss": 0.8272, "step": 10874 }, { "epoch": 0.1542306724193554, "grad_norm": 3.1875, "learning_rate": 4.7214879991785955e-05, "loss": 0.8546, "step": 10876 }, { "epoch": 0.15425903407298164, "grad_norm": 3.125, "learning_rate": 4.721385506545554e-05, "loss": 0.8566, "step": 10878 }, { "epoch": 0.15428739572660785, "grad_norm": 2.625, "learning_rate": 4.721282996170159e-05, "loss": 0.8234, "step": 10880 }, { "epoch": 0.15431575738023406, "grad_norm": 3.375, "learning_rate": 4.7211804680532276e-05, "loss": 0.8506, "step": 10882 }, { "epoch": 0.15434411903386028, "grad_norm": 3.0625, "learning_rate": 4.72107792219558e-05, "loss": 0.8685, "step": 10884 }, { "epoch": 0.1543724806874865, "grad_norm": 3.140625, "learning_rate": 4.7209753585980355e-05, "loss": 0.8528, "step": 10886 }, { "epoch": 0.1544008423411127, "grad_norm": 3.0625, "learning_rate": 4.7208727772614125e-05, "loss": 0.8811, "step": 10888 }, { "epoch": 0.15442920399473892, "grad_norm": 2.734375, "learning_rate": 4.72077017818653e-05, "loss": 0.8816, "step": 10890 }, { "epoch": 0.15445756564836513, "grad_norm": 2.734375, "learning_rate": 4.7206675613742084e-05, "loss": 0.7986, "step": 10892 }, { "epoch": 0.15448592730199134, "grad_norm": 2.6875, "learning_rate": 4.720564926825267e-05, "loss": 0.8251, "step": 10894 }, { "epoch": 0.15451428895561756, "grad_norm": 3.421875, "learning_rate": 4.7204622745405255e-05, "loss": 0.8616, "step": 10896 }, { "epoch": 0.15454265060924377, "grad_norm": 3.5, "learning_rate": 4.720359604520803e-05, "loss": 0.8627, "step": 10898 }, { "epoch": 0.15457101226286998, "grad_norm": 3.3125, "learning_rate": 4.7202569167669206e-05, "loss": 0.869, "step": 10900 }, { "epoch": 0.1545993739164962, "grad_norm": 3.15625, "learning_rate": 4.720154211279698e-05, "loss": 0.8751, "step": 10902 }, { "epoch": 0.1546277355701224, "grad_norm": 3.0625, "learning_rate": 4.720051488059956e-05, "loss": 0.7982, "step": 10904 }, { "epoch": 0.15465609722374862, "grad_norm": 2.796875, "learning_rate": 4.7199487471085145e-05, "loss": 0.82, "step": 10906 }, { "epoch": 0.15468445887737484, "grad_norm": 3.40625, "learning_rate": 4.719845988426195e-05, "loss": 0.8417, "step": 10908 }, { "epoch": 0.15471282053100105, "grad_norm": 2.953125, "learning_rate": 4.719743212013816e-05, "loss": 0.8134, "step": 10910 }, { "epoch": 0.15474118218462726, "grad_norm": 2.921875, "learning_rate": 4.719640417872201e-05, "loss": 0.8642, "step": 10912 }, { "epoch": 0.15476954383825348, "grad_norm": 3.484375, "learning_rate": 4.719537606002169e-05, "loss": 0.8987, "step": 10914 }, { "epoch": 0.15479790549187972, "grad_norm": 3.703125, "learning_rate": 4.7194347764045435e-05, "loss": 0.9024, "step": 10916 }, { "epoch": 0.15482626714550593, "grad_norm": 3.40625, "learning_rate": 4.7193319290801427e-05, "loss": 0.8815, "step": 10918 }, { "epoch": 0.15485462879913214, "grad_norm": 3.09375, "learning_rate": 4.719229064029791e-05, "loss": 0.8757, "step": 10920 }, { "epoch": 0.15488299045275836, "grad_norm": 3.234375, "learning_rate": 4.7191261812543084e-05, "loss": 0.8833, "step": 10922 }, { "epoch": 0.15491135210638457, "grad_norm": 3.0625, "learning_rate": 4.719023280754517e-05, "loss": 0.9141, "step": 10924 }, { "epoch": 0.15493971376001078, "grad_norm": 2.890625, "learning_rate": 4.718920362531239e-05, "loss": 0.8534, "step": 10926 }, { "epoch": 0.154968075413637, "grad_norm": 3.0625, "learning_rate": 4.718817426585296e-05, "loss": 0.8669, "step": 10928 }, { "epoch": 0.1549964370672632, "grad_norm": 3.234375, "learning_rate": 4.718714472917509e-05, "loss": 0.8801, "step": 10930 }, { "epoch": 0.15502479872088942, "grad_norm": 2.96875, "learning_rate": 4.718611501528703e-05, "loss": 0.8831, "step": 10932 }, { "epoch": 0.15505316037451564, "grad_norm": 3.078125, "learning_rate": 4.718508512419698e-05, "loss": 0.8782, "step": 10934 }, { "epoch": 0.15508152202814185, "grad_norm": 2.953125, "learning_rate": 4.718405505591318e-05, "loss": 0.8657, "step": 10936 }, { "epoch": 0.15510988368176806, "grad_norm": 3.203125, "learning_rate": 4.718302481044385e-05, "loss": 0.8123, "step": 10938 }, { "epoch": 0.15513824533539428, "grad_norm": 2.890625, "learning_rate": 4.718199438779722e-05, "loss": 0.8473, "step": 10940 }, { "epoch": 0.1551666069890205, "grad_norm": 2.953125, "learning_rate": 4.718096378798153e-05, "loss": 0.8624, "step": 10942 }, { "epoch": 0.1551949686426467, "grad_norm": 3.0625, "learning_rate": 4.717993301100499e-05, "loss": 0.8787, "step": 10944 }, { "epoch": 0.15522333029627292, "grad_norm": 2.765625, "learning_rate": 4.717890205687586e-05, "loss": 0.8178, "step": 10946 }, { "epoch": 0.15525169194989913, "grad_norm": 3.34375, "learning_rate": 4.7177870925602354e-05, "loss": 0.8567, "step": 10948 }, { "epoch": 0.15528005360352534, "grad_norm": 2.671875, "learning_rate": 4.717683961719271e-05, "loss": 0.8356, "step": 10950 }, { "epoch": 0.15530841525715156, "grad_norm": 3.203125, "learning_rate": 4.717580813165517e-05, "loss": 0.8421, "step": 10952 }, { "epoch": 0.1553367769107778, "grad_norm": 3.015625, "learning_rate": 4.717477646899797e-05, "loss": 0.8622, "step": 10954 }, { "epoch": 0.155365138564404, "grad_norm": 3.890625, "learning_rate": 4.7173744629229354e-05, "loss": 0.8358, "step": 10956 }, { "epoch": 0.15539350021803022, "grad_norm": 3.65625, "learning_rate": 4.717271261235756e-05, "loss": 0.8596, "step": 10958 }, { "epoch": 0.15542186187165644, "grad_norm": 2.921875, "learning_rate": 4.717168041839083e-05, "loss": 0.8515, "step": 10960 }, { "epoch": 0.15545022352528265, "grad_norm": 3.34375, "learning_rate": 4.7170648047337415e-05, "loss": 0.8765, "step": 10962 }, { "epoch": 0.15547858517890886, "grad_norm": 2.9375, "learning_rate": 4.7169615499205546e-05, "loss": 0.8578, "step": 10964 }, { "epoch": 0.15550694683253508, "grad_norm": 3.28125, "learning_rate": 4.7168582774003486e-05, "loss": 0.8834, "step": 10966 }, { "epoch": 0.1555353084861613, "grad_norm": 3.1875, "learning_rate": 4.716754987173948e-05, "loss": 0.8671, "step": 10968 }, { "epoch": 0.1555636701397875, "grad_norm": 2.796875, "learning_rate": 4.7166516792421765e-05, "loss": 0.8555, "step": 10970 }, { "epoch": 0.15559203179341372, "grad_norm": 3.109375, "learning_rate": 4.7165483536058605e-05, "loss": 0.8604, "step": 10972 }, { "epoch": 0.15562039344703993, "grad_norm": 3.203125, "learning_rate": 4.716445010265826e-05, "loss": 0.8221, "step": 10974 }, { "epoch": 0.15564875510066614, "grad_norm": 3.0, "learning_rate": 4.7163416492228966e-05, "loss": 0.8499, "step": 10976 }, { "epoch": 0.15567711675429236, "grad_norm": 3.046875, "learning_rate": 4.716238270477899e-05, "loss": 0.8699, "step": 10978 }, { "epoch": 0.15570547840791857, "grad_norm": 2.984375, "learning_rate": 4.716134874031657e-05, "loss": 0.8367, "step": 10980 }, { "epoch": 0.15573384006154478, "grad_norm": 2.71875, "learning_rate": 4.716031459884999e-05, "loss": 0.8115, "step": 10982 }, { "epoch": 0.155762201715171, "grad_norm": 3.640625, "learning_rate": 4.7159280280387495e-05, "loss": 0.8136, "step": 10984 }, { "epoch": 0.1557905633687972, "grad_norm": 2.90625, "learning_rate": 4.715824578493736e-05, "loss": 0.9136, "step": 10986 }, { "epoch": 0.15581892502242342, "grad_norm": 3.09375, "learning_rate": 4.715721111250783e-05, "loss": 0.8731, "step": 10988 }, { "epoch": 0.15584728667604963, "grad_norm": 3.078125, "learning_rate": 4.715617626310718e-05, "loss": 0.8863, "step": 10990 }, { "epoch": 0.15587564832967588, "grad_norm": 3.421875, "learning_rate": 4.715514123674367e-05, "loss": 0.8613, "step": 10992 }, { "epoch": 0.1559040099833021, "grad_norm": 3.109375, "learning_rate": 4.715410603342557e-05, "loss": 0.7919, "step": 10994 }, { "epoch": 0.1559323716369283, "grad_norm": 3.046875, "learning_rate": 4.715307065316115e-05, "loss": 0.8035, "step": 10996 }, { "epoch": 0.15596073329055452, "grad_norm": 2.890625, "learning_rate": 4.715203509595868e-05, "loss": 0.8812, "step": 10998 }, { "epoch": 0.15598909494418073, "grad_norm": 3.015625, "learning_rate": 4.715099936182642e-05, "loss": 0.8406, "step": 11000 }, { "epoch": 0.15601745659780694, "grad_norm": 3.078125, "learning_rate": 4.714996345077265e-05, "loss": 0.8831, "step": 11002 }, { "epoch": 0.15604581825143315, "grad_norm": 3.4375, "learning_rate": 4.714892736280565e-05, "loss": 0.8271, "step": 11004 }, { "epoch": 0.15607417990505937, "grad_norm": 3.109375, "learning_rate": 4.71478910979337e-05, "loss": 0.8397, "step": 11006 }, { "epoch": 0.15610254155868558, "grad_norm": 3.0625, "learning_rate": 4.7146854656165045e-05, "loss": 0.8772, "step": 11008 }, { "epoch": 0.1561309032123118, "grad_norm": 3.046875, "learning_rate": 4.7145818037507996e-05, "loss": 0.849, "step": 11010 }, { "epoch": 0.156159264865938, "grad_norm": 3.359375, "learning_rate": 4.7144781241970815e-05, "loss": 0.8993, "step": 11012 }, { "epoch": 0.15618762651956422, "grad_norm": 3.203125, "learning_rate": 4.71437442695618e-05, "loss": 0.8754, "step": 11014 }, { "epoch": 0.15621598817319043, "grad_norm": 2.84375, "learning_rate": 4.714270712028921e-05, "loss": 0.8987, "step": 11016 }, { "epoch": 0.15624434982681665, "grad_norm": 3.34375, "learning_rate": 4.7141669794161345e-05, "loss": 0.8291, "step": 11018 }, { "epoch": 0.15627271148044286, "grad_norm": 2.640625, "learning_rate": 4.714063229118649e-05, "loss": 0.8518, "step": 11020 }, { "epoch": 0.15630107313406907, "grad_norm": 3.109375, "learning_rate": 4.713959461137293e-05, "loss": 0.8206, "step": 11022 }, { "epoch": 0.1563294347876953, "grad_norm": 2.78125, "learning_rate": 4.7138556754728944e-05, "loss": 0.8363, "step": 11024 }, { "epoch": 0.1563577964413215, "grad_norm": 3.21875, "learning_rate": 4.713751872126284e-05, "loss": 0.8701, "step": 11026 }, { "epoch": 0.1563861580949477, "grad_norm": 2.984375, "learning_rate": 4.713648051098289e-05, "loss": 0.8793, "step": 11028 }, { "epoch": 0.15641451974857393, "grad_norm": 3.109375, "learning_rate": 4.713544212389739e-05, "loss": 0.8601, "step": 11030 }, { "epoch": 0.15644288140220017, "grad_norm": 3.203125, "learning_rate": 4.713440356001464e-05, "loss": 0.8541, "step": 11032 }, { "epoch": 0.15647124305582638, "grad_norm": 3.0, "learning_rate": 4.713336481934294e-05, "loss": 0.8945, "step": 11034 }, { "epoch": 0.1564996047094526, "grad_norm": 2.8125, "learning_rate": 4.713232590189057e-05, "loss": 0.7985, "step": 11036 }, { "epoch": 0.1565279663630788, "grad_norm": 2.921875, "learning_rate": 4.713128680766584e-05, "loss": 0.8518, "step": 11038 }, { "epoch": 0.15655632801670502, "grad_norm": 3.03125, "learning_rate": 4.713024753667704e-05, "loss": 0.8538, "step": 11040 }, { "epoch": 0.15658468967033123, "grad_norm": 2.828125, "learning_rate": 4.712920808893249e-05, "loss": 0.8243, "step": 11042 }, { "epoch": 0.15661305132395745, "grad_norm": 3.125, "learning_rate": 4.712816846444047e-05, "loss": 0.9115, "step": 11044 }, { "epoch": 0.15664141297758366, "grad_norm": 3.3125, "learning_rate": 4.712712866320929e-05, "loss": 0.8904, "step": 11046 }, { "epoch": 0.15666977463120987, "grad_norm": 3.0, "learning_rate": 4.712608868524726e-05, "loss": 0.8497, "step": 11048 }, { "epoch": 0.1566981362848361, "grad_norm": 3.3125, "learning_rate": 4.7125048530562686e-05, "loss": 0.8311, "step": 11050 }, { "epoch": 0.1567264979384623, "grad_norm": 3.359375, "learning_rate": 4.712400819916387e-05, "loss": 0.8532, "step": 11052 }, { "epoch": 0.1567548595920885, "grad_norm": 3.171875, "learning_rate": 4.712296769105913e-05, "loss": 0.7876, "step": 11054 }, { "epoch": 0.15678322124571473, "grad_norm": 3.203125, "learning_rate": 4.712192700625677e-05, "loss": 0.8316, "step": 11056 }, { "epoch": 0.15681158289934094, "grad_norm": 3.28125, "learning_rate": 4.71208861447651e-05, "loss": 0.8606, "step": 11058 }, { "epoch": 0.15683994455296715, "grad_norm": 2.78125, "learning_rate": 4.711984510659244e-05, "loss": 0.8486, "step": 11060 }, { "epoch": 0.15686830620659337, "grad_norm": 3.09375, "learning_rate": 4.71188038917471e-05, "loss": 0.8634, "step": 11062 }, { "epoch": 0.15689666786021958, "grad_norm": 3.578125, "learning_rate": 4.7117762500237405e-05, "loss": 0.8459, "step": 11064 }, { "epoch": 0.1569250295138458, "grad_norm": 3.234375, "learning_rate": 4.711672093207166e-05, "loss": 0.8516, "step": 11066 }, { "epoch": 0.156953391167472, "grad_norm": 3.421875, "learning_rate": 4.711567918725819e-05, "loss": 0.882, "step": 11068 }, { "epoch": 0.15698175282109825, "grad_norm": 3.53125, "learning_rate": 4.711463726580531e-05, "loss": 0.8967, "step": 11070 }, { "epoch": 0.15701011447472446, "grad_norm": 3.3125, "learning_rate": 4.711359516772135e-05, "loss": 0.913, "step": 11072 }, { "epoch": 0.15703847612835067, "grad_norm": 3.140625, "learning_rate": 4.7112552893014634e-05, "loss": 0.8731, "step": 11074 }, { "epoch": 0.1570668377819769, "grad_norm": 2.921875, "learning_rate": 4.711151044169348e-05, "loss": 0.8617, "step": 11076 }, { "epoch": 0.1570951994356031, "grad_norm": 2.921875, "learning_rate": 4.7110467813766215e-05, "loss": 0.8272, "step": 11078 }, { "epoch": 0.1571235610892293, "grad_norm": 3.03125, "learning_rate": 4.710942500924118e-05, "loss": 0.7927, "step": 11080 }, { "epoch": 0.15715192274285553, "grad_norm": 3.046875, "learning_rate": 4.710838202812668e-05, "loss": 0.8717, "step": 11082 }, { "epoch": 0.15718028439648174, "grad_norm": 3.375, "learning_rate": 4.710733887043106e-05, "loss": 0.9206, "step": 11084 }, { "epoch": 0.15720864605010795, "grad_norm": 2.796875, "learning_rate": 4.710629553616266e-05, "loss": 0.843, "step": 11086 }, { "epoch": 0.15723700770373417, "grad_norm": 2.90625, "learning_rate": 4.7105252025329795e-05, "loss": 0.8475, "step": 11088 }, { "epoch": 0.15726536935736038, "grad_norm": 3.046875, "learning_rate": 4.71042083379408e-05, "loss": 0.807, "step": 11090 }, { "epoch": 0.1572937310109866, "grad_norm": 3.046875, "learning_rate": 4.7103164474004037e-05, "loss": 0.8238, "step": 11092 }, { "epoch": 0.1573220926646128, "grad_norm": 3.09375, "learning_rate": 4.7102120433527806e-05, "loss": 0.8518, "step": 11094 }, { "epoch": 0.15735045431823902, "grad_norm": 3.203125, "learning_rate": 4.7101076216520476e-05, "loss": 0.8442, "step": 11096 }, { "epoch": 0.15737881597186523, "grad_norm": 2.9375, "learning_rate": 4.7100031822990376e-05, "loss": 0.8668, "step": 11098 }, { "epoch": 0.15740717762549145, "grad_norm": 2.9375, "learning_rate": 4.7098987252945846e-05, "loss": 0.8282, "step": 11100 }, { "epoch": 0.15743553927911766, "grad_norm": 3.078125, "learning_rate": 4.709794250639523e-05, "loss": 0.7823, "step": 11102 }, { "epoch": 0.15746390093274387, "grad_norm": 3.109375, "learning_rate": 4.709689758334688e-05, "loss": 0.8433, "step": 11104 }, { "epoch": 0.15749226258637009, "grad_norm": 3.109375, "learning_rate": 4.709585248380912e-05, "loss": 0.8488, "step": 11106 }, { "epoch": 0.15752062423999633, "grad_norm": 3.015625, "learning_rate": 4.709480720779033e-05, "loss": 0.8455, "step": 11108 }, { "epoch": 0.15754898589362254, "grad_norm": 3.21875, "learning_rate": 4.7093761755298825e-05, "loss": 0.8606, "step": 11110 }, { "epoch": 0.15757734754724875, "grad_norm": 2.9375, "learning_rate": 4.709271612634298e-05, "loss": 0.8425, "step": 11112 }, { "epoch": 0.15760570920087497, "grad_norm": 3.265625, "learning_rate": 4.7091670320931134e-05, "loss": 0.8196, "step": 11114 }, { "epoch": 0.15763407085450118, "grad_norm": 3.03125, "learning_rate": 4.709062433907165e-05, "loss": 0.8583, "step": 11116 }, { "epoch": 0.1576624325081274, "grad_norm": 3.0625, "learning_rate": 4.7089578180772874e-05, "loss": 0.8326, "step": 11118 }, { "epoch": 0.1576907941617536, "grad_norm": 3.5, "learning_rate": 4.708853184604316e-05, "loss": 0.9074, "step": 11120 }, { "epoch": 0.15771915581537982, "grad_norm": 3.234375, "learning_rate": 4.7087485334890866e-05, "loss": 0.8143, "step": 11122 }, { "epoch": 0.15774751746900603, "grad_norm": 3.4375, "learning_rate": 4.708643864732436e-05, "loss": 0.8554, "step": 11124 }, { "epoch": 0.15777587912263225, "grad_norm": 3.0625, "learning_rate": 4.7085391783352e-05, "loss": 0.8634, "step": 11126 }, { "epoch": 0.15780424077625846, "grad_norm": 2.984375, "learning_rate": 4.708434474298213e-05, "loss": 0.8856, "step": 11128 }, { "epoch": 0.15783260242988467, "grad_norm": 2.828125, "learning_rate": 4.708329752622314e-05, "loss": 0.8834, "step": 11130 }, { "epoch": 0.15786096408351089, "grad_norm": 3.125, "learning_rate": 4.708225013308336e-05, "loss": 0.8756, "step": 11132 }, { "epoch": 0.1578893257371371, "grad_norm": 3.34375, "learning_rate": 4.7081202563571194e-05, "loss": 0.84, "step": 11134 }, { "epoch": 0.1579176873907633, "grad_norm": 3.21875, "learning_rate": 4.7080154817694976e-05, "loss": 0.8784, "step": 11136 }, { "epoch": 0.15794604904438952, "grad_norm": 3.328125, "learning_rate": 4.70791068954631e-05, "loss": 0.8593, "step": 11138 }, { "epoch": 0.15797441069801574, "grad_norm": 3.203125, "learning_rate": 4.707805879688392e-05, "loss": 0.854, "step": 11140 }, { "epoch": 0.15800277235164195, "grad_norm": 2.71875, "learning_rate": 4.7077010521965816e-05, "loss": 0.8378, "step": 11142 }, { "epoch": 0.15803113400526816, "grad_norm": 3.296875, "learning_rate": 4.707596207071715e-05, "loss": 0.8812, "step": 11144 }, { "epoch": 0.1580594956588944, "grad_norm": 2.890625, "learning_rate": 4.70749134431463e-05, "loss": 0.8696, "step": 11146 }, { "epoch": 0.15808785731252062, "grad_norm": 3.5, "learning_rate": 4.707386463926165e-05, "loss": 0.8111, "step": 11148 }, { "epoch": 0.15811621896614683, "grad_norm": 3.328125, "learning_rate": 4.707281565907157e-05, "loss": 0.8954, "step": 11150 }, { "epoch": 0.15814458061977305, "grad_norm": 2.953125, "learning_rate": 4.707176650258444e-05, "loss": 0.8123, "step": 11152 }, { "epoch": 0.15817294227339926, "grad_norm": 3.109375, "learning_rate": 4.707071716980863e-05, "loss": 0.9134, "step": 11154 }, { "epoch": 0.15820130392702547, "grad_norm": 3.109375, "learning_rate": 4.7069667660752544e-05, "loss": 0.8849, "step": 11156 }, { "epoch": 0.15822966558065168, "grad_norm": 3.1875, "learning_rate": 4.7068617975424546e-05, "loss": 0.8468, "step": 11158 }, { "epoch": 0.1582580272342779, "grad_norm": 2.6875, "learning_rate": 4.706756811383302e-05, "loss": 0.7827, "step": 11160 }, { "epoch": 0.1582863888879041, "grad_norm": 3.359375, "learning_rate": 4.706651807598635e-05, "loss": 0.8526, "step": 11162 }, { "epoch": 0.15831475054153032, "grad_norm": 3.546875, "learning_rate": 4.706546786189294e-05, "loss": 0.8705, "step": 11164 }, { "epoch": 0.15834311219515654, "grad_norm": 3.328125, "learning_rate": 4.706441747156116e-05, "loss": 0.8156, "step": 11166 }, { "epoch": 0.15837147384878275, "grad_norm": 3.40625, "learning_rate": 4.7063366904999404e-05, "loss": 0.829, "step": 11168 }, { "epoch": 0.15839983550240896, "grad_norm": 2.640625, "learning_rate": 4.706231616221607e-05, "loss": 0.8178, "step": 11170 }, { "epoch": 0.15842819715603518, "grad_norm": 2.90625, "learning_rate": 4.706126524321954e-05, "loss": 0.8704, "step": 11172 }, { "epoch": 0.1584565588096614, "grad_norm": 3.421875, "learning_rate": 4.706021414801822e-05, "loss": 0.8898, "step": 11174 }, { "epoch": 0.1584849204632876, "grad_norm": 2.953125, "learning_rate": 4.70591628766205e-05, "loss": 0.8189, "step": 11176 }, { "epoch": 0.15851328211691382, "grad_norm": 3.140625, "learning_rate": 4.705811142903477e-05, "loss": 0.8909, "step": 11178 }, { "epoch": 0.15854164377054003, "grad_norm": 3.3125, "learning_rate": 4.7057059805269434e-05, "loss": 0.8583, "step": 11180 }, { "epoch": 0.15857000542416624, "grad_norm": 3.03125, "learning_rate": 4.7056008005332886e-05, "loss": 0.8883, "step": 11182 }, { "epoch": 0.15859836707779246, "grad_norm": 3.203125, "learning_rate": 4.7054956029233535e-05, "loss": 0.8679, "step": 11184 }, { "epoch": 0.1586267287314187, "grad_norm": 3.171875, "learning_rate": 4.705390387697978e-05, "loss": 0.9084, "step": 11186 }, { "epoch": 0.1586550903850449, "grad_norm": 3.046875, "learning_rate": 4.705285154858002e-05, "loss": 0.8186, "step": 11188 }, { "epoch": 0.15868345203867112, "grad_norm": 3.125, "learning_rate": 4.7051799044042664e-05, "loss": 0.8662, "step": 11190 }, { "epoch": 0.15871181369229734, "grad_norm": 3.09375, "learning_rate": 4.705074636337612e-05, "loss": 0.8466, "step": 11192 }, { "epoch": 0.15874017534592355, "grad_norm": 3.109375, "learning_rate": 4.704969350658879e-05, "loss": 0.8762, "step": 11194 }, { "epoch": 0.15876853699954976, "grad_norm": 3.03125, "learning_rate": 4.7048640473689086e-05, "loss": 0.8659, "step": 11196 }, { "epoch": 0.15879689865317598, "grad_norm": 3.421875, "learning_rate": 4.7047587264685434e-05, "loss": 0.9, "step": 11198 }, { "epoch": 0.1588252603068022, "grad_norm": 2.78125, "learning_rate": 4.704653387958622e-05, "loss": 0.8421, "step": 11200 }, { "epoch": 0.1588536219604284, "grad_norm": 2.984375, "learning_rate": 4.704548031839987e-05, "loss": 0.9011, "step": 11202 }, { "epoch": 0.15888198361405462, "grad_norm": 3.0625, "learning_rate": 4.70444265811348e-05, "loss": 0.8748, "step": 11204 }, { "epoch": 0.15891034526768083, "grad_norm": 3.5625, "learning_rate": 4.704337266779942e-05, "loss": 0.855, "step": 11206 }, { "epoch": 0.15893870692130704, "grad_norm": 3.34375, "learning_rate": 4.704231857840216e-05, "loss": 0.8265, "step": 11208 }, { "epoch": 0.15896706857493326, "grad_norm": 3.375, "learning_rate": 4.704126431295143e-05, "loss": 0.8315, "step": 11210 }, { "epoch": 0.15899543022855947, "grad_norm": 3.234375, "learning_rate": 4.704020987145565e-05, "loss": 0.8484, "step": 11212 }, { "epoch": 0.15902379188218568, "grad_norm": 3.640625, "learning_rate": 4.703915525392324e-05, "loss": 0.8211, "step": 11214 }, { "epoch": 0.1590521535358119, "grad_norm": 3.625, "learning_rate": 4.703810046036263e-05, "loss": 0.8512, "step": 11216 }, { "epoch": 0.1590805151894381, "grad_norm": 2.96875, "learning_rate": 4.703704549078225e-05, "loss": 0.8499, "step": 11218 }, { "epoch": 0.15910887684306432, "grad_norm": 3.203125, "learning_rate": 4.7035990345190504e-05, "loss": 0.8608, "step": 11220 }, { "epoch": 0.15913723849669054, "grad_norm": 3.015625, "learning_rate": 4.703493502359584e-05, "loss": 0.8526, "step": 11222 }, { "epoch": 0.15916560015031678, "grad_norm": 3.21875, "learning_rate": 4.7033879526006674e-05, "loss": 0.9033, "step": 11224 }, { "epoch": 0.159193961803943, "grad_norm": 3.046875, "learning_rate": 4.703282385243145e-05, "loss": 0.8407, "step": 11226 }, { "epoch": 0.1592223234575692, "grad_norm": 3.234375, "learning_rate": 4.7031768002878596e-05, "loss": 0.8395, "step": 11228 }, { "epoch": 0.15925068511119542, "grad_norm": 3.71875, "learning_rate": 4.7030711977356536e-05, "loss": 0.9158, "step": 11230 }, { "epoch": 0.15927904676482163, "grad_norm": 3.15625, "learning_rate": 4.702965577587371e-05, "loss": 0.8865, "step": 11232 }, { "epoch": 0.15930740841844784, "grad_norm": 2.875, "learning_rate": 4.702859939843854e-05, "loss": 0.8097, "step": 11234 }, { "epoch": 0.15933577007207406, "grad_norm": 2.71875, "learning_rate": 4.70275428450595e-05, "loss": 0.8103, "step": 11236 }, { "epoch": 0.15936413172570027, "grad_norm": 2.984375, "learning_rate": 4.7026486115744995e-05, "loss": 0.8915, "step": 11238 }, { "epoch": 0.15939249337932648, "grad_norm": 3.4375, "learning_rate": 4.702542921050348e-05, "loss": 0.9178, "step": 11240 }, { "epoch": 0.1594208550329527, "grad_norm": 2.90625, "learning_rate": 4.702437212934339e-05, "loss": 0.8455, "step": 11242 }, { "epoch": 0.1594492166865789, "grad_norm": 3.0625, "learning_rate": 4.702331487227317e-05, "loss": 0.8746, "step": 11244 }, { "epoch": 0.15947757834020512, "grad_norm": 3.0, "learning_rate": 4.7022257439301266e-05, "loss": 0.8578, "step": 11246 }, { "epoch": 0.15950593999383134, "grad_norm": 3.109375, "learning_rate": 4.702119983043612e-05, "loss": 0.848, "step": 11248 }, { "epoch": 0.15953430164745755, "grad_norm": 3.09375, "learning_rate": 4.702014204568619e-05, "loss": 0.8508, "step": 11250 }, { "epoch": 0.15956266330108376, "grad_norm": 3.03125, "learning_rate": 4.701908408505992e-05, "loss": 0.8568, "step": 11252 }, { "epoch": 0.15959102495470998, "grad_norm": 3.171875, "learning_rate": 4.701802594856575e-05, "loss": 0.8186, "step": 11254 }, { "epoch": 0.1596193866083362, "grad_norm": 3.15625, "learning_rate": 4.701696763621213e-05, "loss": 0.8525, "step": 11256 }, { "epoch": 0.1596477482619624, "grad_norm": 3.125, "learning_rate": 4.701590914800753e-05, "loss": 0.8951, "step": 11258 }, { "epoch": 0.15967610991558862, "grad_norm": 3.015625, "learning_rate": 4.701485048396039e-05, "loss": 0.8287, "step": 11260 }, { "epoch": 0.15970447156921486, "grad_norm": 3.21875, "learning_rate": 4.701379164407917e-05, "loss": 0.8651, "step": 11262 }, { "epoch": 0.15973283322284107, "grad_norm": 3.640625, "learning_rate": 4.701273262837233e-05, "loss": 0.8517, "step": 11264 }, { "epoch": 0.15976119487646728, "grad_norm": 2.90625, "learning_rate": 4.7011673436848334e-05, "loss": 0.8433, "step": 11266 }, { "epoch": 0.1597895565300935, "grad_norm": 3.296875, "learning_rate": 4.701061406951563e-05, "loss": 0.9015, "step": 11268 }, { "epoch": 0.1598179181837197, "grad_norm": 2.703125, "learning_rate": 4.700955452638268e-05, "loss": 0.8102, "step": 11270 }, { "epoch": 0.15984627983734592, "grad_norm": 2.921875, "learning_rate": 4.7008494807457954e-05, "loss": 0.852, "step": 11272 }, { "epoch": 0.15987464149097214, "grad_norm": 2.953125, "learning_rate": 4.700743491274991e-05, "loss": 0.8546, "step": 11274 }, { "epoch": 0.15990300314459835, "grad_norm": 3.390625, "learning_rate": 4.700637484226702e-05, "loss": 0.8702, "step": 11276 }, { "epoch": 0.15993136479822456, "grad_norm": 3.15625, "learning_rate": 4.7005314596017736e-05, "loss": 0.8966, "step": 11278 }, { "epoch": 0.15995972645185078, "grad_norm": 2.875, "learning_rate": 4.700425417401054e-05, "loss": 0.8192, "step": 11280 }, { "epoch": 0.159988088105477, "grad_norm": 2.828125, "learning_rate": 4.70031935762539e-05, "loss": 0.869, "step": 11282 }, { "epoch": 0.1600164497591032, "grad_norm": 3.015625, "learning_rate": 4.700213280275629e-05, "loss": 0.8911, "step": 11284 }, { "epoch": 0.16004481141272942, "grad_norm": 3.296875, "learning_rate": 4.700107185352617e-05, "loss": 0.8333, "step": 11286 }, { "epoch": 0.16007317306635563, "grad_norm": 3.140625, "learning_rate": 4.7000010728572026e-05, "loss": 0.8438, "step": 11288 }, { "epoch": 0.16010153471998184, "grad_norm": 3.3125, "learning_rate": 4.699894942790233e-05, "loss": 0.8883, "step": 11290 }, { "epoch": 0.16012989637360805, "grad_norm": 2.828125, "learning_rate": 4.699788795152555e-05, "loss": 0.8369, "step": 11292 }, { "epoch": 0.16015825802723427, "grad_norm": 3.0, "learning_rate": 4.6996826299450184e-05, "loss": 0.8655, "step": 11294 }, { "epoch": 0.16018661968086048, "grad_norm": 3.140625, "learning_rate": 4.6995764471684686e-05, "loss": 0.8583, "step": 11296 }, { "epoch": 0.1602149813344867, "grad_norm": 2.84375, "learning_rate": 4.6994702468237555e-05, "loss": 0.8532, "step": 11298 }, { "epoch": 0.16024334298811294, "grad_norm": 2.90625, "learning_rate": 4.699364028911727e-05, "loss": 0.8679, "step": 11300 }, { "epoch": 0.16027170464173915, "grad_norm": 3.0, "learning_rate": 4.6992577934332315e-05, "loss": 0.828, "step": 11302 }, { "epoch": 0.16030006629536536, "grad_norm": 3.34375, "learning_rate": 4.6991515403891174e-05, "loss": 0.8946, "step": 11304 }, { "epoch": 0.16032842794899158, "grad_norm": 3.234375, "learning_rate": 4.699045269780232e-05, "loss": 0.8637, "step": 11306 }, { "epoch": 0.1603567896026178, "grad_norm": 2.828125, "learning_rate": 4.698938981607427e-05, "loss": 0.8357, "step": 11308 }, { "epoch": 0.160385151256244, "grad_norm": 3.265625, "learning_rate": 4.698832675871548e-05, "loss": 0.8588, "step": 11310 }, { "epoch": 0.16041351290987022, "grad_norm": 2.765625, "learning_rate": 4.6987263525734474e-05, "loss": 0.8119, "step": 11312 }, { "epoch": 0.16044187456349643, "grad_norm": 3.421875, "learning_rate": 4.6986200117139724e-05, "loss": 0.8713, "step": 11314 }, { "epoch": 0.16047023621712264, "grad_norm": 2.84375, "learning_rate": 4.6985136532939724e-05, "loss": 0.8768, "step": 11316 }, { "epoch": 0.16049859787074885, "grad_norm": 2.953125, "learning_rate": 4.698407277314297e-05, "loss": 0.8498, "step": 11318 }, { "epoch": 0.16052695952437507, "grad_norm": 3.265625, "learning_rate": 4.698300883775796e-05, "loss": 0.8207, "step": 11320 }, { "epoch": 0.16055532117800128, "grad_norm": 3.359375, "learning_rate": 4.698194472679319e-05, "loss": 0.8436, "step": 11322 }, { "epoch": 0.1605836828316275, "grad_norm": 2.75, "learning_rate": 4.6980880440257166e-05, "loss": 0.8522, "step": 11324 }, { "epoch": 0.1606120444852537, "grad_norm": 3.125, "learning_rate": 4.6979815978158386e-05, "loss": 0.8744, "step": 11326 }, { "epoch": 0.16064040613887992, "grad_norm": 2.75, "learning_rate": 4.6978751340505345e-05, "loss": 0.8471, "step": 11328 }, { "epoch": 0.16066876779250613, "grad_norm": 2.84375, "learning_rate": 4.6977686527306556e-05, "loss": 0.8471, "step": 11330 }, { "epoch": 0.16069712944613235, "grad_norm": 3.15625, "learning_rate": 4.697662153857052e-05, "loss": 0.83, "step": 11332 }, { "epoch": 0.16072549109975856, "grad_norm": 3.140625, "learning_rate": 4.6975556374305734e-05, "loss": 0.8638, "step": 11334 }, { "epoch": 0.16075385275338477, "grad_norm": 3.328125, "learning_rate": 4.697449103452072e-05, "loss": 0.8425, "step": 11336 }, { "epoch": 0.160782214407011, "grad_norm": 3.1875, "learning_rate": 4.697342551922398e-05, "loss": 0.9139, "step": 11338 }, { "epoch": 0.16081057606063723, "grad_norm": 3.25, "learning_rate": 4.697235982842402e-05, "loss": 0.897, "step": 11340 }, { "epoch": 0.16083893771426344, "grad_norm": 3.328125, "learning_rate": 4.697129396212936e-05, "loss": 0.8933, "step": 11342 }, { "epoch": 0.16086729936788965, "grad_norm": 2.859375, "learning_rate": 4.697022792034851e-05, "loss": 0.834, "step": 11344 }, { "epoch": 0.16089566102151587, "grad_norm": 3.140625, "learning_rate": 4.696916170308998e-05, "loss": 0.8006, "step": 11346 }, { "epoch": 0.16092402267514208, "grad_norm": 2.96875, "learning_rate": 4.696809531036229e-05, "loss": 0.8654, "step": 11348 }, { "epoch": 0.1609523843287683, "grad_norm": 2.84375, "learning_rate": 4.6967028742173964e-05, "loss": 0.8287, "step": 11350 }, { "epoch": 0.1609807459823945, "grad_norm": 3.5, "learning_rate": 4.696596199853351e-05, "loss": 0.8639, "step": 11352 }, { "epoch": 0.16100910763602072, "grad_norm": 3.234375, "learning_rate": 4.6964895079449456e-05, "loss": 0.8723, "step": 11354 }, { "epoch": 0.16103746928964693, "grad_norm": 3.25, "learning_rate": 4.696382798493032e-05, "loss": 0.8628, "step": 11356 }, { "epoch": 0.16106583094327315, "grad_norm": 2.734375, "learning_rate": 4.696276071498462e-05, "loss": 0.8354, "step": 11358 }, { "epoch": 0.16109419259689936, "grad_norm": 3.078125, "learning_rate": 4.696169326962088e-05, "loss": 0.8005, "step": 11360 }, { "epoch": 0.16112255425052557, "grad_norm": 2.75, "learning_rate": 4.696062564884764e-05, "loss": 0.8375, "step": 11362 }, { "epoch": 0.1611509159041518, "grad_norm": 3.0625, "learning_rate": 4.695955785267342e-05, "loss": 0.8701, "step": 11364 }, { "epoch": 0.161179277557778, "grad_norm": 2.734375, "learning_rate": 4.6958489881106736e-05, "loss": 0.8459, "step": 11366 }, { "epoch": 0.1612076392114042, "grad_norm": 3.328125, "learning_rate": 4.695742173415614e-05, "loss": 0.8407, "step": 11368 }, { "epoch": 0.16123600086503043, "grad_norm": 2.8125, "learning_rate": 4.6956353411830144e-05, "loss": 0.8236, "step": 11370 }, { "epoch": 0.16126436251865664, "grad_norm": 3.34375, "learning_rate": 4.69552849141373e-05, "loss": 0.9502, "step": 11372 }, { "epoch": 0.16129272417228285, "grad_norm": 2.9375, "learning_rate": 4.695421624108612e-05, "loss": 0.8808, "step": 11374 }, { "epoch": 0.16132108582590907, "grad_norm": 3.0, "learning_rate": 4.695314739268516e-05, "loss": 0.8186, "step": 11376 }, { "epoch": 0.1613494474795353, "grad_norm": 3.0625, "learning_rate": 4.6952078368942934e-05, "loss": 0.8791, "step": 11378 }, { "epoch": 0.16137780913316152, "grad_norm": 2.90625, "learning_rate": 4.6951009169868e-05, "loss": 0.79, "step": 11380 }, { "epoch": 0.16140617078678773, "grad_norm": 3.265625, "learning_rate": 4.69499397954689e-05, "loss": 0.896, "step": 11382 }, { "epoch": 0.16143453244041395, "grad_norm": 3.515625, "learning_rate": 4.694887024575415e-05, "loss": 0.8577, "step": 11384 }, { "epoch": 0.16146289409404016, "grad_norm": 2.96875, "learning_rate": 4.694780052073232e-05, "loss": 0.8402, "step": 11386 }, { "epoch": 0.16149125574766637, "grad_norm": 3.1875, "learning_rate": 4.694673062041195e-05, "loss": 0.8484, "step": 11388 }, { "epoch": 0.1615196174012926, "grad_norm": 3.359375, "learning_rate": 4.694566054480157e-05, "loss": 0.852, "step": 11390 }, { "epoch": 0.1615479790549188, "grad_norm": 3.125, "learning_rate": 4.694459029390973e-05, "loss": 0.8122, "step": 11392 }, { "epoch": 0.161576340708545, "grad_norm": 3.15625, "learning_rate": 4.694351986774499e-05, "loss": 0.8588, "step": 11394 }, { "epoch": 0.16160470236217123, "grad_norm": 3.203125, "learning_rate": 4.6942449266315894e-05, "loss": 0.8571, "step": 11396 }, { "epoch": 0.16163306401579744, "grad_norm": 3.125, "learning_rate": 4.694137848963099e-05, "loss": 0.8692, "step": 11398 }, { "epoch": 0.16166142566942365, "grad_norm": 3.09375, "learning_rate": 4.6940307537698833e-05, "loss": 0.8334, "step": 11400 }, { "epoch": 0.16168978732304987, "grad_norm": 3.015625, "learning_rate": 4.693923641052798e-05, "loss": 0.8515, "step": 11402 }, { "epoch": 0.16171814897667608, "grad_norm": 3.015625, "learning_rate": 4.693816510812698e-05, "loss": 0.8818, "step": 11404 }, { "epoch": 0.1617465106303023, "grad_norm": 2.78125, "learning_rate": 4.693709363050439e-05, "loss": 0.8506, "step": 11406 }, { "epoch": 0.1617748722839285, "grad_norm": 3.53125, "learning_rate": 4.6936021977668775e-05, "loss": 0.8685, "step": 11408 }, { "epoch": 0.16180323393755472, "grad_norm": 3.046875, "learning_rate": 4.693495014962869e-05, "loss": 0.849, "step": 11410 }, { "epoch": 0.16183159559118093, "grad_norm": 3.296875, "learning_rate": 4.6933878146392685e-05, "loss": 0.8936, "step": 11412 }, { "epoch": 0.16185995724480715, "grad_norm": 2.984375, "learning_rate": 4.693280596796934e-05, "loss": 0.8687, "step": 11414 }, { "epoch": 0.1618883188984334, "grad_norm": 2.875, "learning_rate": 4.6931733614367213e-05, "loss": 0.8827, "step": 11416 }, { "epoch": 0.1619166805520596, "grad_norm": 2.953125, "learning_rate": 4.6930661085594856e-05, "loss": 0.8577, "step": 11418 }, { "epoch": 0.1619450422056858, "grad_norm": 3.375, "learning_rate": 4.6929588381660855e-05, "loss": 0.8423, "step": 11420 }, { "epoch": 0.16197340385931203, "grad_norm": 3.015625, "learning_rate": 4.692851550257377e-05, "loss": 0.838, "step": 11422 }, { "epoch": 0.16200176551293824, "grad_norm": 3.4375, "learning_rate": 4.6927442448342165e-05, "loss": 0.8198, "step": 11424 }, { "epoch": 0.16203012716656445, "grad_norm": 2.96875, "learning_rate": 4.6926369218974606e-05, "loss": 0.8818, "step": 11426 }, { "epoch": 0.16205848882019067, "grad_norm": 3.015625, "learning_rate": 4.6925295814479685e-05, "loss": 0.824, "step": 11428 }, { "epoch": 0.16208685047381688, "grad_norm": 2.890625, "learning_rate": 4.692422223486597e-05, "loss": 0.8132, "step": 11430 }, { "epoch": 0.1621152121274431, "grad_norm": 3.03125, "learning_rate": 4.692314848014202e-05, "loss": 0.8332, "step": 11432 }, { "epoch": 0.1621435737810693, "grad_norm": 3.3125, "learning_rate": 4.6922074550316416e-05, "loss": 0.9003, "step": 11434 }, { "epoch": 0.16217193543469552, "grad_norm": 3.265625, "learning_rate": 4.692100044539775e-05, "loss": 0.8576, "step": 11436 }, { "epoch": 0.16220029708832173, "grad_norm": 3.578125, "learning_rate": 4.691992616539459e-05, "loss": 0.8811, "step": 11438 }, { "epoch": 0.16222865874194795, "grad_norm": 3.3125, "learning_rate": 4.6918851710315516e-05, "loss": 0.8616, "step": 11440 }, { "epoch": 0.16225702039557416, "grad_norm": 3.765625, "learning_rate": 4.691777708016911e-05, "loss": 0.8868, "step": 11442 }, { "epoch": 0.16228538204920037, "grad_norm": 3.03125, "learning_rate": 4.6916702274963954e-05, "loss": 0.8452, "step": 11444 }, { "epoch": 0.16231374370282659, "grad_norm": 3.1875, "learning_rate": 4.691562729470864e-05, "loss": 0.8681, "step": 11446 }, { "epoch": 0.1623421053564528, "grad_norm": 3.453125, "learning_rate": 4.691455213941175e-05, "loss": 0.8583, "step": 11448 }, { "epoch": 0.162370467010079, "grad_norm": 3.203125, "learning_rate": 4.691347680908187e-05, "loss": 0.8464, "step": 11450 }, { "epoch": 0.16239882866370522, "grad_norm": 2.671875, "learning_rate": 4.691240130372758e-05, "loss": 0.8307, "step": 11452 }, { "epoch": 0.16242719031733147, "grad_norm": 3.109375, "learning_rate": 4.6911325623357486e-05, "loss": 0.8338, "step": 11454 }, { "epoch": 0.16245555197095768, "grad_norm": 3.078125, "learning_rate": 4.691024976798017e-05, "loss": 0.8442, "step": 11456 }, { "epoch": 0.1624839136245839, "grad_norm": 3.375, "learning_rate": 4.690917373760424e-05, "loss": 0.832, "step": 11458 }, { "epoch": 0.1625122752782101, "grad_norm": 3.265625, "learning_rate": 4.6908097532238266e-05, "loss": 0.8076, "step": 11460 }, { "epoch": 0.16254063693183632, "grad_norm": 3.40625, "learning_rate": 4.690702115189086e-05, "loss": 0.8543, "step": 11462 }, { "epoch": 0.16256899858546253, "grad_norm": 3.09375, "learning_rate": 4.690594459657061e-05, "loss": 0.8649, "step": 11464 }, { "epoch": 0.16259736023908875, "grad_norm": 3.34375, "learning_rate": 4.690486786628612e-05, "loss": 0.8624, "step": 11466 }, { "epoch": 0.16262572189271496, "grad_norm": 3.359375, "learning_rate": 4.690379096104599e-05, "loss": 0.903, "step": 11468 }, { "epoch": 0.16265408354634117, "grad_norm": 3.171875, "learning_rate": 4.690271388085883e-05, "loss": 0.8637, "step": 11470 }, { "epoch": 0.16268244519996738, "grad_norm": 2.921875, "learning_rate": 4.690163662573323e-05, "loss": 0.8222, "step": 11472 }, { "epoch": 0.1627108068535936, "grad_norm": 2.953125, "learning_rate": 4.690055919567778e-05, "loss": 0.8182, "step": 11474 }, { "epoch": 0.1627391685072198, "grad_norm": 2.90625, "learning_rate": 4.689948159070112e-05, "loss": 0.8298, "step": 11476 }, { "epoch": 0.16276753016084602, "grad_norm": 3.1875, "learning_rate": 4.689840381081184e-05, "loss": 0.8597, "step": 11478 }, { "epoch": 0.16279589181447224, "grad_norm": 2.765625, "learning_rate": 4.689732585601854e-05, "loss": 0.8127, "step": 11480 }, { "epoch": 0.16282425346809845, "grad_norm": 3.25, "learning_rate": 4.6896247726329846e-05, "loss": 0.8257, "step": 11482 }, { "epoch": 0.16285261512172466, "grad_norm": 2.78125, "learning_rate": 4.6895169421754353e-05, "loss": 0.8197, "step": 11484 }, { "epoch": 0.16288097677535088, "grad_norm": 3.21875, "learning_rate": 4.689409094230068e-05, "loss": 0.8919, "step": 11486 }, { "epoch": 0.1629093384289771, "grad_norm": 3.15625, "learning_rate": 4.689301228797746e-05, "loss": 0.8532, "step": 11488 }, { "epoch": 0.1629377000826033, "grad_norm": 2.859375, "learning_rate": 4.689193345879327e-05, "loss": 0.8087, "step": 11490 }, { "epoch": 0.16296606173622952, "grad_norm": 2.921875, "learning_rate": 4.689085445475676e-05, "loss": 0.8089, "step": 11492 }, { "epoch": 0.16299442338985576, "grad_norm": 3.125, "learning_rate": 4.688977527587652e-05, "loss": 0.8766, "step": 11494 }, { "epoch": 0.16302278504348197, "grad_norm": 2.671875, "learning_rate": 4.68886959221612e-05, "loss": 0.8046, "step": 11496 }, { "epoch": 0.16305114669710818, "grad_norm": 3.078125, "learning_rate": 4.688761639361941e-05, "loss": 0.8748, "step": 11498 }, { "epoch": 0.1630795083507344, "grad_norm": 3.21875, "learning_rate": 4.688653669025975e-05, "loss": 0.8427, "step": 11500 }, { "epoch": 0.1631078700043606, "grad_norm": 3.0625, "learning_rate": 4.688545681209087e-05, "loss": 0.8648, "step": 11502 }, { "epoch": 0.16313623165798682, "grad_norm": 2.765625, "learning_rate": 4.688437675912139e-05, "loss": 0.8057, "step": 11504 }, { "epoch": 0.16316459331161304, "grad_norm": 3.078125, "learning_rate": 4.688329653135993e-05, "loss": 0.8624, "step": 11506 }, { "epoch": 0.16319295496523925, "grad_norm": 2.984375, "learning_rate": 4.6882216128815124e-05, "loss": 0.9001, "step": 11508 }, { "epoch": 0.16322131661886546, "grad_norm": 3.203125, "learning_rate": 4.688113555149559e-05, "loss": 0.8948, "step": 11510 }, { "epoch": 0.16324967827249168, "grad_norm": 2.96875, "learning_rate": 4.6880054799409976e-05, "loss": 0.8707, "step": 11512 }, { "epoch": 0.1632780399261179, "grad_norm": 2.890625, "learning_rate": 4.68789738725669e-05, "loss": 0.878, "step": 11514 }, { "epoch": 0.1633064015797441, "grad_norm": 3.03125, "learning_rate": 4.6877892770975005e-05, "loss": 0.8464, "step": 11516 }, { "epoch": 0.16333476323337032, "grad_norm": 2.875, "learning_rate": 4.6876811494642916e-05, "loss": 0.858, "step": 11518 }, { "epoch": 0.16336312488699653, "grad_norm": 2.828125, "learning_rate": 4.687573004357928e-05, "loss": 0.8419, "step": 11520 }, { "epoch": 0.16339148654062274, "grad_norm": 2.8125, "learning_rate": 4.6874648417792724e-05, "loss": 0.8261, "step": 11522 }, { "epoch": 0.16341984819424896, "grad_norm": 2.875, "learning_rate": 4.6873566617291894e-05, "loss": 0.8312, "step": 11524 }, { "epoch": 0.16344820984787517, "grad_norm": 3.34375, "learning_rate": 4.687248464208542e-05, "loss": 0.8739, "step": 11526 }, { "epoch": 0.16347657150150138, "grad_norm": 3.109375, "learning_rate": 4.6871402492181964e-05, "loss": 0.8103, "step": 11528 }, { "epoch": 0.1635049331551276, "grad_norm": 3.359375, "learning_rate": 4.687032016759016e-05, "loss": 0.8238, "step": 11530 }, { "epoch": 0.16353329480875384, "grad_norm": 3.328125, "learning_rate": 4.686923766831864e-05, "loss": 0.8358, "step": 11532 }, { "epoch": 0.16356165646238005, "grad_norm": 2.828125, "learning_rate": 4.6868154994376064e-05, "loss": 0.7996, "step": 11534 }, { "epoch": 0.16359001811600626, "grad_norm": 3.421875, "learning_rate": 4.686707214577107e-05, "loss": 0.8327, "step": 11536 }, { "epoch": 0.16361837976963248, "grad_norm": 3.109375, "learning_rate": 4.686598912251232e-05, "loss": 0.8625, "step": 11538 }, { "epoch": 0.1636467414232587, "grad_norm": 3.09375, "learning_rate": 4.686490592460845e-05, "loss": 0.8517, "step": 11540 }, { "epoch": 0.1636751030768849, "grad_norm": 2.90625, "learning_rate": 4.686382255206813e-05, "loss": 0.8575, "step": 11542 }, { "epoch": 0.16370346473051112, "grad_norm": 3.125, "learning_rate": 4.686273900489999e-05, "loss": 0.8284, "step": 11544 }, { "epoch": 0.16373182638413733, "grad_norm": 3.40625, "learning_rate": 4.6861655283112695e-05, "loss": 0.8591, "step": 11546 }, { "epoch": 0.16376018803776354, "grad_norm": 3.171875, "learning_rate": 4.68605713867149e-05, "loss": 0.8571, "step": 11548 }, { "epoch": 0.16378854969138976, "grad_norm": 3.75, "learning_rate": 4.6859487315715273e-05, "loss": 0.8581, "step": 11550 }, { "epoch": 0.16381691134501597, "grad_norm": 3.09375, "learning_rate": 4.6858403070122456e-05, "loss": 0.8553, "step": 11552 }, { "epoch": 0.16384527299864218, "grad_norm": 3.25, "learning_rate": 4.6857318649945116e-05, "loss": 0.8583, "step": 11554 }, { "epoch": 0.1638736346522684, "grad_norm": 3.109375, "learning_rate": 4.6856234055191915e-05, "loss": 0.8615, "step": 11556 }, { "epoch": 0.1639019963058946, "grad_norm": 3.25, "learning_rate": 4.685514928587151e-05, "loss": 0.8148, "step": 11558 }, { "epoch": 0.16393035795952082, "grad_norm": 3.171875, "learning_rate": 4.685406434199258e-05, "loss": 0.8645, "step": 11560 }, { "epoch": 0.16395871961314704, "grad_norm": 3.171875, "learning_rate": 4.685297922356378e-05, "loss": 0.8507, "step": 11562 }, { "epoch": 0.16398708126677325, "grad_norm": 2.671875, "learning_rate": 4.685189393059377e-05, "loss": 0.8361, "step": 11564 }, { "epoch": 0.16401544292039946, "grad_norm": 2.765625, "learning_rate": 4.6850808463091226e-05, "loss": 0.8398, "step": 11566 }, { "epoch": 0.16404380457402568, "grad_norm": 3.3125, "learning_rate": 4.6849722821064814e-05, "loss": 0.8931, "step": 11568 }, { "epoch": 0.16407216622765192, "grad_norm": 3.734375, "learning_rate": 4.684863700452322e-05, "loss": 0.8418, "step": 11570 }, { "epoch": 0.16410052788127813, "grad_norm": 3.109375, "learning_rate": 4.68475510134751e-05, "loss": 0.8259, "step": 11572 }, { "epoch": 0.16412888953490434, "grad_norm": 2.875, "learning_rate": 4.684646484792913e-05, "loss": 0.8524, "step": 11574 }, { "epoch": 0.16415725118853056, "grad_norm": 2.984375, "learning_rate": 4.684537850789399e-05, "loss": 0.8189, "step": 11576 }, { "epoch": 0.16418561284215677, "grad_norm": 3.140625, "learning_rate": 4.684429199337835e-05, "loss": 0.895, "step": 11578 }, { "epoch": 0.16421397449578298, "grad_norm": 3.0625, "learning_rate": 4.68432053043909e-05, "loss": 0.8626, "step": 11580 }, { "epoch": 0.1642423361494092, "grad_norm": 2.984375, "learning_rate": 4.6842118440940306e-05, "loss": 0.8424, "step": 11582 }, { "epoch": 0.1642706978030354, "grad_norm": 2.75, "learning_rate": 4.6841031403035263e-05, "loss": 0.8172, "step": 11584 }, { "epoch": 0.16429905945666162, "grad_norm": 3.421875, "learning_rate": 4.6839944190684435e-05, "loss": 0.8304, "step": 11586 }, { "epoch": 0.16432742111028784, "grad_norm": 3.140625, "learning_rate": 4.683885680389652e-05, "loss": 0.8203, "step": 11588 }, { "epoch": 0.16435578276391405, "grad_norm": 3.328125, "learning_rate": 4.683776924268021e-05, "loss": 0.8517, "step": 11590 }, { "epoch": 0.16438414441754026, "grad_norm": 3.15625, "learning_rate": 4.683668150704417e-05, "loss": 0.8671, "step": 11592 }, { "epoch": 0.16441250607116648, "grad_norm": 3.15625, "learning_rate": 4.68355935969971e-05, "loss": 0.8732, "step": 11594 }, { "epoch": 0.1644408677247927, "grad_norm": 2.9375, "learning_rate": 4.683450551254768e-05, "loss": 0.7991, "step": 11596 }, { "epoch": 0.1644692293784189, "grad_norm": 2.8125, "learning_rate": 4.683341725370462e-05, "loss": 0.7983, "step": 11598 }, { "epoch": 0.16449759103204512, "grad_norm": 3.1875, "learning_rate": 4.683232882047659e-05, "loss": 0.8389, "step": 11600 }, { "epoch": 0.16452595268567133, "grad_norm": 3.203125, "learning_rate": 4.6831240212872305e-05, "loss": 0.8642, "step": 11602 }, { "epoch": 0.16455431433929754, "grad_norm": 3.25, "learning_rate": 4.6830151430900436e-05, "loss": 0.8493, "step": 11604 }, { "epoch": 0.16458267599292375, "grad_norm": 2.859375, "learning_rate": 4.68290624745697e-05, "loss": 0.8279, "step": 11606 }, { "epoch": 0.16461103764655, "grad_norm": 3.125, "learning_rate": 4.682797334388879e-05, "loss": 0.8293, "step": 11608 }, { "epoch": 0.1646393993001762, "grad_norm": 3.203125, "learning_rate": 4.682688403886639e-05, "loss": 0.8713, "step": 11610 }, { "epoch": 0.16466776095380242, "grad_norm": 2.953125, "learning_rate": 4.682579455951122e-05, "loss": 0.831, "step": 11612 }, { "epoch": 0.16469612260742864, "grad_norm": 3.140625, "learning_rate": 4.6824704905831965e-05, "loss": 0.847, "step": 11614 }, { "epoch": 0.16472448426105485, "grad_norm": 2.875, "learning_rate": 4.682361507783734e-05, "loss": 0.8625, "step": 11616 }, { "epoch": 0.16475284591468106, "grad_norm": 3.21875, "learning_rate": 4.682252507553604e-05, "loss": 0.8485, "step": 11618 }, { "epoch": 0.16478120756830728, "grad_norm": 2.78125, "learning_rate": 4.682143489893679e-05, "loss": 0.8866, "step": 11620 }, { "epoch": 0.1648095692219335, "grad_norm": 2.9375, "learning_rate": 4.682034454804827e-05, "loss": 0.8678, "step": 11622 }, { "epoch": 0.1648379308755597, "grad_norm": 2.6875, "learning_rate": 4.681925402287921e-05, "loss": 0.8255, "step": 11624 }, { "epoch": 0.16486629252918591, "grad_norm": 3.3125, "learning_rate": 4.6818163323438315e-05, "loss": 0.8098, "step": 11626 }, { "epoch": 0.16489465418281213, "grad_norm": 3.234375, "learning_rate": 4.681707244973429e-05, "loss": 0.8681, "step": 11628 }, { "epoch": 0.16492301583643834, "grad_norm": 3.265625, "learning_rate": 4.681598140177586e-05, "loss": 0.8719, "step": 11630 }, { "epoch": 0.16495137749006455, "grad_norm": 3.078125, "learning_rate": 4.6814890179571714e-05, "loss": 0.8267, "step": 11632 }, { "epoch": 0.16497973914369077, "grad_norm": 2.953125, "learning_rate": 4.68137987831306e-05, "loss": 0.8513, "step": 11634 }, { "epoch": 0.16500810079731698, "grad_norm": 2.890625, "learning_rate": 4.681270721246121e-05, "loss": 0.8709, "step": 11636 }, { "epoch": 0.1650364624509432, "grad_norm": 3.1875, "learning_rate": 4.681161546757228e-05, "loss": 0.8436, "step": 11638 }, { "epoch": 0.1650648241045694, "grad_norm": 3.109375, "learning_rate": 4.681052354847252e-05, "loss": 0.8277, "step": 11640 }, { "epoch": 0.16509318575819562, "grad_norm": 3.328125, "learning_rate": 4.680943145517066e-05, "loss": 0.8317, "step": 11642 }, { "epoch": 0.16512154741182183, "grad_norm": 3.015625, "learning_rate": 4.680833918767541e-05, "loss": 0.8115, "step": 11644 }, { "epoch": 0.16514990906544805, "grad_norm": 3.296875, "learning_rate": 4.6807246745995504e-05, "loss": 0.8505, "step": 11646 }, { "epoch": 0.1651782707190743, "grad_norm": 3.453125, "learning_rate": 4.680615413013966e-05, "loss": 0.8586, "step": 11648 }, { "epoch": 0.1652066323727005, "grad_norm": 2.734375, "learning_rate": 4.680506134011661e-05, "loss": 0.8146, "step": 11650 }, { "epoch": 0.16523499402632671, "grad_norm": 3.0625, "learning_rate": 4.6803968375935076e-05, "loss": 0.8066, "step": 11652 }, { "epoch": 0.16526335567995293, "grad_norm": 2.9375, "learning_rate": 4.680287523760379e-05, "loss": 0.8462, "step": 11654 }, { "epoch": 0.16529171733357914, "grad_norm": 3.65625, "learning_rate": 4.68017819251315e-05, "loss": 0.9049, "step": 11656 }, { "epoch": 0.16532007898720535, "grad_norm": 3.609375, "learning_rate": 4.680068843852691e-05, "loss": 0.8495, "step": 11658 }, { "epoch": 0.16534844064083157, "grad_norm": 3.359375, "learning_rate": 4.679959477779877e-05, "loss": 0.8622, "step": 11660 }, { "epoch": 0.16537680229445778, "grad_norm": 3.28125, "learning_rate": 4.679850094295581e-05, "loss": 0.8387, "step": 11662 }, { "epoch": 0.165405163948084, "grad_norm": 3.25, "learning_rate": 4.679740693400678e-05, "loss": 0.8058, "step": 11664 }, { "epoch": 0.1654335256017102, "grad_norm": 3.140625, "learning_rate": 4.67963127509604e-05, "loss": 0.8754, "step": 11666 }, { "epoch": 0.16546188725533642, "grad_norm": 2.984375, "learning_rate": 4.679521839382542e-05, "loss": 0.8166, "step": 11668 }, { "epoch": 0.16549024890896263, "grad_norm": 3.171875, "learning_rate": 4.679412386261057e-05, "loss": 0.8841, "step": 11670 }, { "epoch": 0.16551861056258885, "grad_norm": 3.171875, "learning_rate": 4.67930291573246e-05, "loss": 0.8433, "step": 11672 }, { "epoch": 0.16554697221621506, "grad_norm": 3.28125, "learning_rate": 4.679193427797626e-05, "loss": 0.8334, "step": 11674 }, { "epoch": 0.16557533386984127, "grad_norm": 3.265625, "learning_rate": 4.679083922457428e-05, "loss": 0.8449, "step": 11676 }, { "epoch": 0.1656036955234675, "grad_norm": 2.765625, "learning_rate": 4.678974399712742e-05, "loss": 0.8429, "step": 11678 }, { "epoch": 0.1656320571770937, "grad_norm": 2.921875, "learning_rate": 4.6788648595644417e-05, "loss": 0.7927, "step": 11680 }, { "epoch": 0.1656604188307199, "grad_norm": 2.984375, "learning_rate": 4.6787553020134025e-05, "loss": 0.8528, "step": 11682 }, { "epoch": 0.16568878048434613, "grad_norm": 3.015625, "learning_rate": 4.678645727060499e-05, "loss": 0.8949, "step": 11684 }, { "epoch": 0.16571714213797237, "grad_norm": 3.0, "learning_rate": 4.678536134706607e-05, "loss": 0.851, "step": 11686 }, { "epoch": 0.16574550379159858, "grad_norm": 3.359375, "learning_rate": 4.6784265249526016e-05, "loss": 0.8244, "step": 11688 }, { "epoch": 0.1657738654452248, "grad_norm": 3.234375, "learning_rate": 4.678316897799359e-05, "loss": 0.8746, "step": 11690 }, { "epoch": 0.165802227098851, "grad_norm": 3.328125, "learning_rate": 4.678207253247753e-05, "loss": 0.9013, "step": 11692 }, { "epoch": 0.16583058875247722, "grad_norm": 3.0625, "learning_rate": 4.678097591298661e-05, "loss": 0.8274, "step": 11694 }, { "epoch": 0.16585895040610343, "grad_norm": 3.171875, "learning_rate": 4.677987911952958e-05, "loss": 0.8901, "step": 11696 }, { "epoch": 0.16588731205972965, "grad_norm": 3.203125, "learning_rate": 4.6778782152115195e-05, "loss": 0.8614, "step": 11698 }, { "epoch": 0.16591567371335586, "grad_norm": 3.046875, "learning_rate": 4.677768501075224e-05, "loss": 0.9307, "step": 11700 }, { "epoch": 0.16594403536698207, "grad_norm": 3.421875, "learning_rate": 4.6776587695449455e-05, "loss": 0.8527, "step": 11702 }, { "epoch": 0.1659723970206083, "grad_norm": 2.921875, "learning_rate": 4.677549020621561e-05, "loss": 0.8012, "step": 11704 }, { "epoch": 0.1660007586742345, "grad_norm": 3.328125, "learning_rate": 4.6774392543059476e-05, "loss": 0.813, "step": 11706 }, { "epoch": 0.1660291203278607, "grad_norm": 2.9375, "learning_rate": 4.677329470598981e-05, "loss": 0.873, "step": 11708 }, { "epoch": 0.16605748198148693, "grad_norm": 2.828125, "learning_rate": 4.677219669501539e-05, "loss": 0.8658, "step": 11710 }, { "epoch": 0.16608584363511314, "grad_norm": 3.046875, "learning_rate": 4.6771098510144984e-05, "loss": 0.8266, "step": 11712 }, { "epoch": 0.16611420528873935, "grad_norm": 3.25, "learning_rate": 4.677000015138736e-05, "loss": 0.8378, "step": 11714 }, { "epoch": 0.16614256694236557, "grad_norm": 3.25, "learning_rate": 4.6768901618751295e-05, "loss": 0.8558, "step": 11716 }, { "epoch": 0.16617092859599178, "grad_norm": 3.140625, "learning_rate": 4.676780291224556e-05, "loss": 0.8853, "step": 11718 }, { "epoch": 0.166199290249618, "grad_norm": 3.203125, "learning_rate": 4.676670403187893e-05, "loss": 0.8853, "step": 11720 }, { "epoch": 0.1662276519032442, "grad_norm": 3.265625, "learning_rate": 4.676560497766019e-05, "loss": 0.8616, "step": 11722 }, { "epoch": 0.16625601355687045, "grad_norm": 3.046875, "learning_rate": 4.67645057495981e-05, "loss": 0.876, "step": 11724 }, { "epoch": 0.16628437521049666, "grad_norm": 3.046875, "learning_rate": 4.676340634770146e-05, "loss": 0.837, "step": 11726 }, { "epoch": 0.16631273686412287, "grad_norm": 3.328125, "learning_rate": 4.676230677197904e-05, "loss": 0.8328, "step": 11728 }, { "epoch": 0.1663410985177491, "grad_norm": 2.96875, "learning_rate": 4.6761207022439625e-05, "loss": 0.8458, "step": 11730 }, { "epoch": 0.1663694601713753, "grad_norm": 2.828125, "learning_rate": 4.6760107099091985e-05, "loss": 0.8434, "step": 11732 }, { "epoch": 0.1663978218250015, "grad_norm": 2.875, "learning_rate": 4.675900700194493e-05, "loss": 0.8469, "step": 11734 }, { "epoch": 0.16642618347862773, "grad_norm": 3.015625, "learning_rate": 4.675790673100724e-05, "loss": 0.7805, "step": 11736 }, { "epoch": 0.16645454513225394, "grad_norm": 3.078125, "learning_rate": 4.675680628628769e-05, "loss": 0.86, "step": 11738 }, { "epoch": 0.16648290678588015, "grad_norm": 2.703125, "learning_rate": 4.675570566779508e-05, "loss": 0.8214, "step": 11740 }, { "epoch": 0.16651126843950637, "grad_norm": 3.0, "learning_rate": 4.6754604875538187e-05, "loss": 0.7887, "step": 11742 }, { "epoch": 0.16653963009313258, "grad_norm": 3.046875, "learning_rate": 4.675350390952583e-05, "loss": 0.8604, "step": 11744 }, { "epoch": 0.1665679917467588, "grad_norm": 3.46875, "learning_rate": 4.675240276976678e-05, "loss": 0.8149, "step": 11746 }, { "epoch": 0.166596353400385, "grad_norm": 3.578125, "learning_rate": 4.675130145626984e-05, "loss": 0.9002, "step": 11748 }, { "epoch": 0.16662471505401122, "grad_norm": 3.296875, "learning_rate": 4.67501999690438e-05, "loss": 0.8644, "step": 11750 }, { "epoch": 0.16665307670763743, "grad_norm": 2.875, "learning_rate": 4.6749098308097464e-05, "loss": 0.8251, "step": 11752 }, { "epoch": 0.16668143836126365, "grad_norm": 3.203125, "learning_rate": 4.674799647343962e-05, "loss": 0.827, "step": 11754 }, { "epoch": 0.16670980001488986, "grad_norm": 3.015625, "learning_rate": 4.6746894465079094e-05, "loss": 0.8784, "step": 11756 }, { "epoch": 0.16673816166851607, "grad_norm": 2.765625, "learning_rate": 4.674579228302466e-05, "loss": 0.8642, "step": 11758 }, { "epoch": 0.16676652332214228, "grad_norm": 3.28125, "learning_rate": 4.6744689927285134e-05, "loss": 0.919, "step": 11760 }, { "epoch": 0.16679488497576853, "grad_norm": 3.203125, "learning_rate": 4.674358739786933e-05, "loss": 0.8714, "step": 11762 }, { "epoch": 0.16682324662939474, "grad_norm": 2.953125, "learning_rate": 4.6742484694786024e-05, "loss": 0.8562, "step": 11764 }, { "epoch": 0.16685160828302095, "grad_norm": 3.015625, "learning_rate": 4.674138181804406e-05, "loss": 0.8466, "step": 11766 }, { "epoch": 0.16687996993664717, "grad_norm": 3.09375, "learning_rate": 4.674027876765222e-05, "loss": 0.8277, "step": 11768 }, { "epoch": 0.16690833159027338, "grad_norm": 2.875, "learning_rate": 4.673917554361933e-05, "loss": 0.8278, "step": 11770 }, { "epoch": 0.1669366932438996, "grad_norm": 3.34375, "learning_rate": 4.673807214595419e-05, "loss": 0.854, "step": 11772 }, { "epoch": 0.1669650548975258, "grad_norm": 3.125, "learning_rate": 4.6736968574665616e-05, "loss": 0.8949, "step": 11774 }, { "epoch": 0.16699341655115202, "grad_norm": 3.234375, "learning_rate": 4.673586482976243e-05, "loss": 0.8712, "step": 11776 }, { "epoch": 0.16702177820477823, "grad_norm": 3.015625, "learning_rate": 4.6734760911253436e-05, "loss": 0.8431, "step": 11778 }, { "epoch": 0.16705013985840445, "grad_norm": 3.171875, "learning_rate": 4.673365681914746e-05, "loss": 0.8148, "step": 11780 }, { "epoch": 0.16707850151203066, "grad_norm": 3.0625, "learning_rate": 4.6732552553453316e-05, "loss": 0.8347, "step": 11782 }, { "epoch": 0.16710686316565687, "grad_norm": 3.140625, "learning_rate": 4.673144811417982e-05, "loss": 0.877, "step": 11784 }, { "epoch": 0.16713522481928308, "grad_norm": 2.96875, "learning_rate": 4.67303435013358e-05, "loss": 0.8252, "step": 11786 }, { "epoch": 0.1671635864729093, "grad_norm": 3.078125, "learning_rate": 4.672923871493009e-05, "loss": 0.8834, "step": 11788 }, { "epoch": 0.1671919481265355, "grad_norm": 3.484375, "learning_rate": 4.672813375497149e-05, "loss": 0.8938, "step": 11790 }, { "epoch": 0.16722030978016172, "grad_norm": 3.46875, "learning_rate": 4.672702862146884e-05, "loss": 0.8909, "step": 11792 }, { "epoch": 0.16724867143378794, "grad_norm": 3.484375, "learning_rate": 4.6725923314430965e-05, "loss": 0.8957, "step": 11794 }, { "epoch": 0.16727703308741415, "grad_norm": 3.390625, "learning_rate": 4.6724817833866684e-05, "loss": 0.872, "step": 11796 }, { "epoch": 0.16730539474104036, "grad_norm": 2.921875, "learning_rate": 4.672371217978484e-05, "loss": 0.8108, "step": 11798 }, { "epoch": 0.16733375639466658, "grad_norm": 2.6875, "learning_rate": 4.6722606352194255e-05, "loss": 0.771, "step": 11800 }, { "epoch": 0.16736211804829282, "grad_norm": 2.84375, "learning_rate": 4.6721500351103766e-05, "loss": 0.8661, "step": 11802 }, { "epoch": 0.16739047970191903, "grad_norm": 3.453125, "learning_rate": 4.6720394176522206e-05, "loss": 0.8279, "step": 11804 }, { "epoch": 0.16741884135554524, "grad_norm": 2.96875, "learning_rate": 4.6719287828458405e-05, "loss": 0.8817, "step": 11806 }, { "epoch": 0.16744720300917146, "grad_norm": 3.09375, "learning_rate": 4.671818130692121e-05, "loss": 0.8429, "step": 11808 }, { "epoch": 0.16747556466279767, "grad_norm": 3.3125, "learning_rate": 4.671707461191944e-05, "loss": 0.8783, "step": 11810 }, { "epoch": 0.16750392631642388, "grad_norm": 3.296875, "learning_rate": 4.671596774346196e-05, "loss": 0.8274, "step": 11812 }, { "epoch": 0.1675322879700501, "grad_norm": 4.53125, "learning_rate": 4.6714860701557586e-05, "loss": 0.8945, "step": 11814 }, { "epoch": 0.1675606496236763, "grad_norm": 3.0625, "learning_rate": 4.6713753486215185e-05, "loss": 0.8565, "step": 11816 }, { "epoch": 0.16758901127730252, "grad_norm": 3.359375, "learning_rate": 4.6712646097443575e-05, "loss": 0.849, "step": 11818 }, { "epoch": 0.16761737293092874, "grad_norm": 3.109375, "learning_rate": 4.671153853525162e-05, "loss": 0.8912, "step": 11820 }, { "epoch": 0.16764573458455495, "grad_norm": 2.796875, "learning_rate": 4.671043079964815e-05, "loss": 0.8166, "step": 11822 }, { "epoch": 0.16767409623818116, "grad_norm": 2.78125, "learning_rate": 4.670932289064203e-05, "loss": 0.8272, "step": 11824 }, { "epoch": 0.16770245789180738, "grad_norm": 2.84375, "learning_rate": 4.670821480824209e-05, "loss": 0.8401, "step": 11826 }, { "epoch": 0.1677308195454336, "grad_norm": 3.0625, "learning_rate": 4.670710655245719e-05, "loss": 0.8439, "step": 11828 }, { "epoch": 0.1677591811990598, "grad_norm": 3.25, "learning_rate": 4.6705998123296194e-05, "loss": 0.851, "step": 11830 }, { "epoch": 0.16778754285268602, "grad_norm": 3.21875, "learning_rate": 4.6704889520767935e-05, "loss": 0.826, "step": 11832 }, { "epoch": 0.16781590450631223, "grad_norm": 2.984375, "learning_rate": 4.670378074488128e-05, "loss": 0.8485, "step": 11834 }, { "epoch": 0.16784426615993844, "grad_norm": 3.09375, "learning_rate": 4.6702671795645074e-05, "loss": 0.8362, "step": 11836 }, { "epoch": 0.16787262781356466, "grad_norm": 3.25, "learning_rate": 4.670156267306819e-05, "loss": 0.8829, "step": 11838 }, { "epoch": 0.1679009894671909, "grad_norm": 3.0625, "learning_rate": 4.670045337715947e-05, "loss": 0.8554, "step": 11840 }, { "epoch": 0.1679293511208171, "grad_norm": 3.375, "learning_rate": 4.6699343907927785e-05, "loss": 0.9396, "step": 11842 }, { "epoch": 0.16795771277444332, "grad_norm": 3.03125, "learning_rate": 4.669823426538199e-05, "loss": 0.8417, "step": 11844 }, { "epoch": 0.16798607442806954, "grad_norm": 2.796875, "learning_rate": 4.669712444953095e-05, "loss": 0.8867, "step": 11846 }, { "epoch": 0.16801443608169575, "grad_norm": 3.484375, "learning_rate": 4.669601446038353e-05, "loss": 0.8677, "step": 11848 }, { "epoch": 0.16804279773532196, "grad_norm": 3.25, "learning_rate": 4.6694904297948606e-05, "loss": 0.8429, "step": 11850 }, { "epoch": 0.16807115938894818, "grad_norm": 3.28125, "learning_rate": 4.669379396223502e-05, "loss": 0.8423, "step": 11852 }, { "epoch": 0.1680995210425744, "grad_norm": 2.96875, "learning_rate": 4.669268345325166e-05, "loss": 0.8341, "step": 11854 }, { "epoch": 0.1681278826962006, "grad_norm": 3.078125, "learning_rate": 4.669157277100739e-05, "loss": 0.8422, "step": 11856 }, { "epoch": 0.16815624434982682, "grad_norm": 2.9375, "learning_rate": 4.6690461915511085e-05, "loss": 0.8446, "step": 11858 }, { "epoch": 0.16818460600345303, "grad_norm": 2.625, "learning_rate": 4.668935088677161e-05, "loss": 0.8158, "step": 11860 }, { "epoch": 0.16821296765707924, "grad_norm": 3.03125, "learning_rate": 4.668823968479784e-05, "loss": 0.8574, "step": 11862 }, { "epoch": 0.16824132931070546, "grad_norm": 3.03125, "learning_rate": 4.668712830959866e-05, "loss": 0.8233, "step": 11864 }, { "epoch": 0.16826969096433167, "grad_norm": 3.09375, "learning_rate": 4.6686016761182936e-05, "loss": 0.8841, "step": 11866 }, { "epoch": 0.16829805261795788, "grad_norm": 3.09375, "learning_rate": 4.6684905039559554e-05, "loss": 0.8682, "step": 11868 }, { "epoch": 0.1683264142715841, "grad_norm": 3.078125, "learning_rate": 4.668379314473739e-05, "loss": 0.819, "step": 11870 }, { "epoch": 0.1683547759252103, "grad_norm": 3.015625, "learning_rate": 4.668268107672531e-05, "loss": 0.8432, "step": 11872 }, { "epoch": 0.16838313757883652, "grad_norm": 3.078125, "learning_rate": 4.668156883553222e-05, "loss": 0.835, "step": 11874 }, { "epoch": 0.16841149923246274, "grad_norm": 3.015625, "learning_rate": 4.6680456421166995e-05, "loss": 0.8565, "step": 11876 }, { "epoch": 0.16843986088608898, "grad_norm": 3.3125, "learning_rate": 4.6679343833638513e-05, "loss": 0.8787, "step": 11878 }, { "epoch": 0.1684682225397152, "grad_norm": 2.8125, "learning_rate": 4.667823107295567e-05, "loss": 0.8169, "step": 11880 }, { "epoch": 0.1684965841933414, "grad_norm": 3.140625, "learning_rate": 4.6677118139127354e-05, "loss": 0.8376, "step": 11882 }, { "epoch": 0.16852494584696762, "grad_norm": 3.359375, "learning_rate": 4.667600503216244e-05, "loss": 0.8218, "step": 11884 }, { "epoch": 0.16855330750059383, "grad_norm": 2.921875, "learning_rate": 4.6674891752069825e-05, "loss": 0.8295, "step": 11886 }, { "epoch": 0.16858166915422004, "grad_norm": 3.546875, "learning_rate": 4.667377829885842e-05, "loss": 0.8859, "step": 11888 }, { "epoch": 0.16861003080784626, "grad_norm": 3.265625, "learning_rate": 4.6672664672537085e-05, "loss": 0.8312, "step": 11890 }, { "epoch": 0.16863839246147247, "grad_norm": 2.875, "learning_rate": 4.6671550873114744e-05, "loss": 0.7894, "step": 11892 }, { "epoch": 0.16866675411509868, "grad_norm": 3.3125, "learning_rate": 4.667043690060027e-05, "loss": 0.8461, "step": 11894 }, { "epoch": 0.1686951157687249, "grad_norm": 3.53125, "learning_rate": 4.6669322755002584e-05, "loss": 0.8659, "step": 11896 }, { "epoch": 0.1687234774223511, "grad_norm": 2.671875, "learning_rate": 4.666820843633056e-05, "loss": 0.804, "step": 11898 }, { "epoch": 0.16875183907597732, "grad_norm": 3.390625, "learning_rate": 4.666709394459312e-05, "loss": 0.8645, "step": 11900 }, { "epoch": 0.16878020072960354, "grad_norm": 3.21875, "learning_rate": 4.6665979279799146e-05, "loss": 0.8576, "step": 11902 }, { "epoch": 0.16880856238322975, "grad_norm": 2.921875, "learning_rate": 4.6664864441957556e-05, "loss": 0.786, "step": 11904 }, { "epoch": 0.16883692403685596, "grad_norm": 2.765625, "learning_rate": 4.6663749431077245e-05, "loss": 0.8135, "step": 11906 }, { "epoch": 0.16886528569048218, "grad_norm": 3.59375, "learning_rate": 4.6662634247167123e-05, "loss": 0.8875, "step": 11908 }, { "epoch": 0.1688936473441084, "grad_norm": 3.296875, "learning_rate": 4.66615188902361e-05, "loss": 0.9018, "step": 11910 }, { "epoch": 0.1689220089977346, "grad_norm": 3.3125, "learning_rate": 4.666040336029308e-05, "loss": 0.8483, "step": 11912 }, { "epoch": 0.16895037065136081, "grad_norm": 3.0, "learning_rate": 4.665928765734697e-05, "loss": 0.813, "step": 11914 }, { "epoch": 0.16897873230498706, "grad_norm": 3.0625, "learning_rate": 4.665817178140669e-05, "loss": 0.8794, "step": 11916 }, { "epoch": 0.16900709395861327, "grad_norm": 2.90625, "learning_rate": 4.665705573248115e-05, "loss": 0.8425, "step": 11918 }, { "epoch": 0.16903545561223948, "grad_norm": 3.09375, "learning_rate": 4.665593951057925e-05, "loss": 0.8495, "step": 11920 }, { "epoch": 0.1690638172658657, "grad_norm": 3.09375, "learning_rate": 4.665482311570992e-05, "loss": 0.8202, "step": 11922 }, { "epoch": 0.1690921789194919, "grad_norm": 2.890625, "learning_rate": 4.665370654788208e-05, "loss": 0.8607, "step": 11924 }, { "epoch": 0.16912054057311812, "grad_norm": 3.3125, "learning_rate": 4.665258980710464e-05, "loss": 0.8583, "step": 11926 }, { "epoch": 0.16914890222674434, "grad_norm": 2.828125, "learning_rate": 4.6651472893386516e-05, "loss": 0.8575, "step": 11928 }, { "epoch": 0.16917726388037055, "grad_norm": 3.015625, "learning_rate": 4.6650355806736636e-05, "loss": 0.8092, "step": 11930 }, { "epoch": 0.16920562553399676, "grad_norm": 2.84375, "learning_rate": 4.664923854716392e-05, "loss": 0.8544, "step": 11932 }, { "epoch": 0.16923398718762298, "grad_norm": 3.03125, "learning_rate": 4.664812111467729e-05, "loss": 0.8056, "step": 11934 }, { "epoch": 0.1692623488412492, "grad_norm": 3.078125, "learning_rate": 4.664700350928567e-05, "loss": 0.8264, "step": 11936 }, { "epoch": 0.1692907104948754, "grad_norm": 3.296875, "learning_rate": 4.6645885730998e-05, "loss": 0.8973, "step": 11938 }, { "epoch": 0.16931907214850161, "grad_norm": 3.046875, "learning_rate": 4.664476777982319e-05, "loss": 0.8087, "step": 11940 }, { "epoch": 0.16934743380212783, "grad_norm": 2.640625, "learning_rate": 4.664364965577018e-05, "loss": 0.8382, "step": 11942 }, { "epoch": 0.16937579545575404, "grad_norm": 3.21875, "learning_rate": 4.664253135884789e-05, "loss": 0.8228, "step": 11944 }, { "epoch": 0.16940415710938025, "grad_norm": 3.09375, "learning_rate": 4.664141288906526e-05, "loss": 0.8257, "step": 11946 }, { "epoch": 0.16943251876300647, "grad_norm": 2.953125, "learning_rate": 4.6640294246431224e-05, "loss": 0.8633, "step": 11948 }, { "epoch": 0.16946088041663268, "grad_norm": 2.703125, "learning_rate": 4.663917543095472e-05, "loss": 0.7782, "step": 11950 }, { "epoch": 0.1694892420702589, "grad_norm": 3.109375, "learning_rate": 4.663805644264467e-05, "loss": 0.8466, "step": 11952 }, { "epoch": 0.1695176037238851, "grad_norm": 3.03125, "learning_rate": 4.663693728151002e-05, "loss": 0.8613, "step": 11954 }, { "epoch": 0.16954596537751135, "grad_norm": 2.984375, "learning_rate": 4.663581794755971e-05, "loss": 0.8326, "step": 11956 }, { "epoch": 0.16957432703113756, "grad_norm": 2.84375, "learning_rate": 4.6634698440802685e-05, "loss": 0.846, "step": 11958 }, { "epoch": 0.16960268868476377, "grad_norm": 2.96875, "learning_rate": 4.663357876124787e-05, "loss": 0.882, "step": 11960 }, { "epoch": 0.16963105033839, "grad_norm": 3.109375, "learning_rate": 4.663245890890423e-05, "loss": 0.8149, "step": 11962 }, { "epoch": 0.1696594119920162, "grad_norm": 2.84375, "learning_rate": 4.6631338883780685e-05, "loss": 0.8539, "step": 11964 }, { "epoch": 0.16968777364564241, "grad_norm": 3.390625, "learning_rate": 4.66302186858862e-05, "loss": 0.8202, "step": 11966 }, { "epoch": 0.16971613529926863, "grad_norm": 2.75, "learning_rate": 4.662909831522972e-05, "loss": 0.8443, "step": 11968 }, { "epoch": 0.16974449695289484, "grad_norm": 3.25, "learning_rate": 4.662797777182018e-05, "loss": 0.8539, "step": 11970 }, { "epoch": 0.16977285860652105, "grad_norm": 2.875, "learning_rate": 4.6626857055666546e-05, "loss": 0.827, "step": 11972 }, { "epoch": 0.16980122026014727, "grad_norm": 3.125, "learning_rate": 4.662573616677775e-05, "loss": 0.7641, "step": 11974 }, { "epoch": 0.16982958191377348, "grad_norm": 3.171875, "learning_rate": 4.662461510516278e-05, "loss": 0.8039, "step": 11976 }, { "epoch": 0.1698579435673997, "grad_norm": 3.234375, "learning_rate": 4.662349387083055e-05, "loss": 0.8898, "step": 11978 }, { "epoch": 0.1698863052210259, "grad_norm": 3.5625, "learning_rate": 4.662237246379004e-05, "loss": 0.8806, "step": 11980 }, { "epoch": 0.16991466687465212, "grad_norm": 2.953125, "learning_rate": 4.6621250884050195e-05, "loss": 0.7659, "step": 11982 }, { "epoch": 0.16994302852827833, "grad_norm": 3.234375, "learning_rate": 4.662012913161997e-05, "loss": 0.8905, "step": 11984 }, { "epoch": 0.16997139018190455, "grad_norm": 2.90625, "learning_rate": 4.6619007206508335e-05, "loss": 0.8382, "step": 11986 }, { "epoch": 0.16999975183553076, "grad_norm": 2.984375, "learning_rate": 4.661788510872426e-05, "loss": 0.8666, "step": 11988 }, { "epoch": 0.17002811348915697, "grad_norm": 3.125, "learning_rate": 4.6616762838276687e-05, "loss": 0.816, "step": 11990 }, { "epoch": 0.1700564751427832, "grad_norm": 3.140625, "learning_rate": 4.661564039517458e-05, "loss": 0.801, "step": 11992 }, { "epoch": 0.17008483679640943, "grad_norm": 3.0, "learning_rate": 4.6614517779426917e-05, "loss": 0.8468, "step": 11994 }, { "epoch": 0.17011319845003564, "grad_norm": 2.890625, "learning_rate": 4.661339499104266e-05, "loss": 0.91, "step": 11996 }, { "epoch": 0.17014156010366185, "grad_norm": 3.265625, "learning_rate": 4.661227203003077e-05, "loss": 0.8211, "step": 11998 }, { "epoch": 0.17016992175728807, "grad_norm": 2.84375, "learning_rate": 4.661114889640023e-05, "loss": 0.8023, "step": 12000 }, { "epoch": 0.17019828341091428, "grad_norm": 2.765625, "learning_rate": 4.661002559016e-05, "loss": 0.8685, "step": 12002 }, { "epoch": 0.1702266450645405, "grad_norm": 3.109375, "learning_rate": 4.6608902111319054e-05, "loss": 0.861, "step": 12004 }, { "epoch": 0.1702550067181667, "grad_norm": 3.21875, "learning_rate": 4.6607778459886365e-05, "loss": 0.8641, "step": 12006 }, { "epoch": 0.17028336837179292, "grad_norm": 2.8125, "learning_rate": 4.66066546358709e-05, "loss": 0.8237, "step": 12008 }, { "epoch": 0.17031173002541913, "grad_norm": 2.609375, "learning_rate": 4.660553063928166e-05, "loss": 0.8554, "step": 12010 }, { "epoch": 0.17034009167904535, "grad_norm": 2.96875, "learning_rate": 4.660440647012759e-05, "loss": 0.8075, "step": 12012 }, { "epoch": 0.17036845333267156, "grad_norm": 3.078125, "learning_rate": 4.660328212841769e-05, "loss": 0.8381, "step": 12014 }, { "epoch": 0.17039681498629777, "grad_norm": 3.453125, "learning_rate": 4.660215761416093e-05, "loss": 0.8715, "step": 12016 }, { "epoch": 0.170425176639924, "grad_norm": 2.984375, "learning_rate": 4.660103292736631e-05, "loss": 0.8527, "step": 12018 }, { "epoch": 0.1704535382935502, "grad_norm": 3.46875, "learning_rate": 4.659990806804279e-05, "loss": 0.8506, "step": 12020 }, { "epoch": 0.1704818999471764, "grad_norm": 3.125, "learning_rate": 4.659878303619937e-05, "loss": 0.8983, "step": 12022 }, { "epoch": 0.17051026160080263, "grad_norm": 3.46875, "learning_rate": 4.659765783184502e-05, "loss": 0.8089, "step": 12024 }, { "epoch": 0.17053862325442884, "grad_norm": 3.046875, "learning_rate": 4.659653245498874e-05, "loss": 0.8553, "step": 12026 }, { "epoch": 0.17056698490805505, "grad_norm": 3.0625, "learning_rate": 4.659540690563952e-05, "loss": 0.8351, "step": 12028 }, { "epoch": 0.17059534656168127, "grad_norm": 3.015625, "learning_rate": 4.659428118380633e-05, "loss": 0.8277, "step": 12030 }, { "epoch": 0.1706237082153075, "grad_norm": 2.890625, "learning_rate": 4.659315528949819e-05, "loss": 0.837, "step": 12032 }, { "epoch": 0.17065206986893372, "grad_norm": 3.09375, "learning_rate": 4.659202922272408e-05, "loss": 0.8581, "step": 12034 }, { "epoch": 0.17068043152255993, "grad_norm": 3.09375, "learning_rate": 4.659090298349298e-05, "loss": 0.834, "step": 12036 }, { "epoch": 0.17070879317618615, "grad_norm": 2.90625, "learning_rate": 4.6589776571813904e-05, "loss": 0.8573, "step": 12038 }, { "epoch": 0.17073715482981236, "grad_norm": 3.34375, "learning_rate": 4.6588649987695843e-05, "loss": 0.9004, "step": 12040 }, { "epoch": 0.17076551648343857, "grad_norm": 2.84375, "learning_rate": 4.658752323114779e-05, "loss": 0.8496, "step": 12042 }, { "epoch": 0.1707938781370648, "grad_norm": 3.0625, "learning_rate": 4.658639630217876e-05, "loss": 0.8618, "step": 12044 }, { "epoch": 0.170822239790691, "grad_norm": 3.03125, "learning_rate": 4.658526920079773e-05, "loss": 0.8557, "step": 12046 }, { "epoch": 0.1708506014443172, "grad_norm": 3.0625, "learning_rate": 4.658414192701373e-05, "loss": 0.8539, "step": 12048 }, { "epoch": 0.17087896309794343, "grad_norm": 3.4375, "learning_rate": 4.658301448083573e-05, "loss": 0.8704, "step": 12050 }, { "epoch": 0.17090732475156964, "grad_norm": 3.375, "learning_rate": 4.658188686227277e-05, "loss": 0.8164, "step": 12052 }, { "epoch": 0.17093568640519585, "grad_norm": 3.265625, "learning_rate": 4.658075907133383e-05, "loss": 0.8432, "step": 12054 }, { "epoch": 0.17096404805882207, "grad_norm": 3.34375, "learning_rate": 4.6579631108027934e-05, "loss": 0.8348, "step": 12056 }, { "epoch": 0.17099240971244828, "grad_norm": 3.046875, "learning_rate": 4.657850297236408e-05, "loss": 0.8454, "step": 12058 }, { "epoch": 0.1710207713660745, "grad_norm": 3.203125, "learning_rate": 4.6577374664351286e-05, "loss": 0.8803, "step": 12060 }, { "epoch": 0.1710491330197007, "grad_norm": 2.796875, "learning_rate": 4.6576246183998564e-05, "loss": 0.8578, "step": 12062 }, { "epoch": 0.17107749467332692, "grad_norm": 3.0625, "learning_rate": 4.6575117531314915e-05, "loss": 0.7894, "step": 12064 }, { "epoch": 0.17110585632695313, "grad_norm": 2.890625, "learning_rate": 4.657398870630937e-05, "loss": 0.8139, "step": 12066 }, { "epoch": 0.17113421798057935, "grad_norm": 3.015625, "learning_rate": 4.657285970899094e-05, "loss": 0.8054, "step": 12068 }, { "epoch": 0.17116257963420559, "grad_norm": 2.984375, "learning_rate": 4.657173053936864e-05, "loss": 0.8545, "step": 12070 }, { "epoch": 0.1711909412878318, "grad_norm": 2.8125, "learning_rate": 4.657060119745149e-05, "loss": 0.7978, "step": 12072 }, { "epoch": 0.171219302941458, "grad_norm": 2.8125, "learning_rate": 4.6569471683248504e-05, "loss": 0.8334, "step": 12074 }, { "epoch": 0.17124766459508423, "grad_norm": 2.921875, "learning_rate": 4.656834199676872e-05, "loss": 0.8115, "step": 12076 }, { "epoch": 0.17127602624871044, "grad_norm": 3.296875, "learning_rate": 4.6567212138021135e-05, "loss": 0.854, "step": 12078 }, { "epoch": 0.17130438790233665, "grad_norm": 3.390625, "learning_rate": 4.65660821070148e-05, "loss": 0.8539, "step": 12080 }, { "epoch": 0.17133274955596287, "grad_norm": 2.828125, "learning_rate": 4.656495190375872e-05, "loss": 0.8037, "step": 12082 }, { "epoch": 0.17136111120958908, "grad_norm": 3.328125, "learning_rate": 4.656382152826194e-05, "loss": 0.8373, "step": 12084 }, { "epoch": 0.1713894728632153, "grad_norm": 2.875, "learning_rate": 4.656269098053347e-05, "loss": 0.8812, "step": 12086 }, { "epoch": 0.1714178345168415, "grad_norm": 3.203125, "learning_rate": 4.6561560260582356e-05, "loss": 0.8568, "step": 12088 }, { "epoch": 0.17144619617046772, "grad_norm": 2.90625, "learning_rate": 4.656042936841761e-05, "loss": 0.8384, "step": 12090 }, { "epoch": 0.17147455782409393, "grad_norm": 3.0625, "learning_rate": 4.655929830404829e-05, "loss": 0.7781, "step": 12092 }, { "epoch": 0.17150291947772014, "grad_norm": 3.078125, "learning_rate": 4.6558167067483414e-05, "loss": 0.8153, "step": 12094 }, { "epoch": 0.17153128113134636, "grad_norm": 3.125, "learning_rate": 4.655703565873202e-05, "loss": 0.8655, "step": 12096 }, { "epoch": 0.17155964278497257, "grad_norm": 3.296875, "learning_rate": 4.655590407780314e-05, "loss": 0.91, "step": 12098 }, { "epoch": 0.17158800443859878, "grad_norm": 3.15625, "learning_rate": 4.655477232470582e-05, "loss": 0.8305, "step": 12100 }, { "epoch": 0.171616366092225, "grad_norm": 3.40625, "learning_rate": 4.655364039944909e-05, "loss": 0.8632, "step": 12102 }, { "epoch": 0.1716447277458512, "grad_norm": 2.9375, "learning_rate": 4.6552508302042e-05, "loss": 0.833, "step": 12104 }, { "epoch": 0.17167308939947742, "grad_norm": 3.046875, "learning_rate": 4.655137603249359e-05, "loss": 0.8381, "step": 12106 }, { "epoch": 0.17170145105310364, "grad_norm": 3.09375, "learning_rate": 4.6550243590812905e-05, "loss": 0.9034, "step": 12108 }, { "epoch": 0.17172981270672988, "grad_norm": 3.0, "learning_rate": 4.654911097700898e-05, "loss": 0.8615, "step": 12110 }, { "epoch": 0.1717581743603561, "grad_norm": 3.3125, "learning_rate": 4.654797819109087e-05, "loss": 0.8611, "step": 12112 }, { "epoch": 0.1717865360139823, "grad_norm": 3.0625, "learning_rate": 4.6546845233067623e-05, "loss": 0.8215, "step": 12114 }, { "epoch": 0.17181489766760852, "grad_norm": 3.234375, "learning_rate": 4.654571210294829e-05, "loss": 0.8661, "step": 12116 }, { "epoch": 0.17184325932123473, "grad_norm": 2.921875, "learning_rate": 4.6544578800741914e-05, "loss": 0.8596, "step": 12118 }, { "epoch": 0.17187162097486094, "grad_norm": 2.921875, "learning_rate": 4.6543445326457545e-05, "loss": 0.8393, "step": 12120 }, { "epoch": 0.17189998262848716, "grad_norm": 3.234375, "learning_rate": 4.654231168010425e-05, "loss": 0.8367, "step": 12122 }, { "epoch": 0.17192834428211337, "grad_norm": 3.0625, "learning_rate": 4.654117786169107e-05, "loss": 0.8503, "step": 12124 }, { "epoch": 0.17195670593573958, "grad_norm": 2.84375, "learning_rate": 4.6540043871227064e-05, "loss": 0.8454, "step": 12126 }, { "epoch": 0.1719850675893658, "grad_norm": 3.1875, "learning_rate": 4.653890970872129e-05, "loss": 0.8781, "step": 12128 }, { "epoch": 0.172013429242992, "grad_norm": 2.9375, "learning_rate": 4.6537775374182813e-05, "loss": 0.8093, "step": 12130 }, { "epoch": 0.17204179089661822, "grad_norm": 3.15625, "learning_rate": 4.6536640867620686e-05, "loss": 0.8489, "step": 12132 }, { "epoch": 0.17207015255024444, "grad_norm": 2.953125, "learning_rate": 4.653550618904396e-05, "loss": 0.8283, "step": 12134 }, { "epoch": 0.17209851420387065, "grad_norm": 3.28125, "learning_rate": 4.653437133846172e-05, "loss": 0.9028, "step": 12136 }, { "epoch": 0.17212687585749686, "grad_norm": 3.1875, "learning_rate": 4.653323631588302e-05, "loss": 0.8341, "step": 12138 }, { "epoch": 0.17215523751112308, "grad_norm": 2.8125, "learning_rate": 4.653210112131693e-05, "loss": 0.8365, "step": 12140 }, { "epoch": 0.1721835991647493, "grad_norm": 3.34375, "learning_rate": 4.65309657547725e-05, "loss": 0.8941, "step": 12142 }, { "epoch": 0.1722119608183755, "grad_norm": 3.734375, "learning_rate": 4.652983021625883e-05, "loss": 0.8741, "step": 12144 }, { "epoch": 0.17224032247200172, "grad_norm": 2.90625, "learning_rate": 4.652869450578495e-05, "loss": 0.821, "step": 12146 }, { "epoch": 0.17226868412562796, "grad_norm": 3.1875, "learning_rate": 4.652755862335996e-05, "loss": 0.9094, "step": 12148 }, { "epoch": 0.17229704577925417, "grad_norm": 3.09375, "learning_rate": 4.652642256899292e-05, "loss": 0.8, "step": 12150 }, { "epoch": 0.17232540743288038, "grad_norm": 3.34375, "learning_rate": 4.652528634269291e-05, "loss": 0.8423, "step": 12152 }, { "epoch": 0.1723537690865066, "grad_norm": 2.796875, "learning_rate": 4.6524149944469e-05, "loss": 0.849, "step": 12154 }, { "epoch": 0.1723821307401328, "grad_norm": 3.28125, "learning_rate": 4.652301337433027e-05, "loss": 0.9289, "step": 12156 }, { "epoch": 0.17241049239375902, "grad_norm": 3.1875, "learning_rate": 4.6521876632285796e-05, "loss": 0.8313, "step": 12158 }, { "epoch": 0.17243885404738524, "grad_norm": 3.21875, "learning_rate": 4.6520739718344665e-05, "loss": 0.8727, "step": 12160 }, { "epoch": 0.17246721570101145, "grad_norm": 3.1875, "learning_rate": 4.651960263251594e-05, "loss": 0.8212, "step": 12162 }, { "epoch": 0.17249557735463766, "grad_norm": 3.03125, "learning_rate": 4.6518465374808714e-05, "loss": 0.8639, "step": 12164 }, { "epoch": 0.17252393900826388, "grad_norm": 3.25, "learning_rate": 4.6517327945232075e-05, "loss": 0.8575, "step": 12166 }, { "epoch": 0.1725523006618901, "grad_norm": 3.359375, "learning_rate": 4.65161903437951e-05, "loss": 0.8691, "step": 12168 }, { "epoch": 0.1725806623155163, "grad_norm": 3.234375, "learning_rate": 4.651505257050688e-05, "loss": 0.8713, "step": 12170 }, { "epoch": 0.17260902396914252, "grad_norm": 3.359375, "learning_rate": 4.65139146253765e-05, "loss": 0.8117, "step": 12172 }, { "epoch": 0.17263738562276873, "grad_norm": 3.234375, "learning_rate": 4.651277650841305e-05, "loss": 0.873, "step": 12174 }, { "epoch": 0.17266574727639494, "grad_norm": 2.75, "learning_rate": 4.651163821962561e-05, "loss": 0.867, "step": 12176 }, { "epoch": 0.17269410893002116, "grad_norm": 3.09375, "learning_rate": 4.651049975902328e-05, "loss": 0.8184, "step": 12178 }, { "epoch": 0.17272247058364737, "grad_norm": 2.875, "learning_rate": 4.650936112661517e-05, "loss": 0.8054, "step": 12180 }, { "epoch": 0.17275083223727358, "grad_norm": 2.78125, "learning_rate": 4.650822232241034e-05, "loss": 0.8438, "step": 12182 }, { "epoch": 0.1727791938908998, "grad_norm": 3.0625, "learning_rate": 4.6507083346417916e-05, "loss": 0.7824, "step": 12184 }, { "epoch": 0.17280755554452604, "grad_norm": 2.921875, "learning_rate": 4.650594419864698e-05, "loss": 0.7867, "step": 12186 }, { "epoch": 0.17283591719815225, "grad_norm": 3.046875, "learning_rate": 4.6504804879106636e-05, "loss": 0.8093, "step": 12188 }, { "epoch": 0.17286427885177846, "grad_norm": 3.0, "learning_rate": 4.6503665387805975e-05, "loss": 0.869, "step": 12190 }, { "epoch": 0.17289264050540468, "grad_norm": 2.84375, "learning_rate": 4.650252572475411e-05, "loss": 0.8349, "step": 12192 }, { "epoch": 0.1729210021590309, "grad_norm": 3.09375, "learning_rate": 4.650138588996013e-05, "loss": 0.8225, "step": 12194 }, { "epoch": 0.1729493638126571, "grad_norm": 2.984375, "learning_rate": 4.650024588343315e-05, "loss": 0.8305, "step": 12196 }, { "epoch": 0.17297772546628332, "grad_norm": 2.78125, "learning_rate": 4.6499105705182275e-05, "loss": 0.8393, "step": 12198 }, { "epoch": 0.17300608711990953, "grad_norm": 2.84375, "learning_rate": 4.649796535521661e-05, "loss": 0.8002, "step": 12200 }, { "epoch": 0.17303444877353574, "grad_norm": 3.3125, "learning_rate": 4.649682483354525e-05, "loss": 0.8751, "step": 12202 }, { "epoch": 0.17306281042716196, "grad_norm": 2.921875, "learning_rate": 4.649568414017733e-05, "loss": 0.805, "step": 12204 }, { "epoch": 0.17309117208078817, "grad_norm": 3.25, "learning_rate": 4.649454327512194e-05, "loss": 0.822, "step": 12206 }, { "epoch": 0.17311953373441438, "grad_norm": 3.140625, "learning_rate": 4.6493402238388204e-05, "loss": 0.8436, "step": 12208 }, { "epoch": 0.1731478953880406, "grad_norm": 3.3125, "learning_rate": 4.6492261029985226e-05, "loss": 0.9227, "step": 12210 }, { "epoch": 0.1731762570416668, "grad_norm": 2.875, "learning_rate": 4.6491119649922124e-05, "loss": 0.8329, "step": 12212 }, { "epoch": 0.17320461869529302, "grad_norm": 3.125, "learning_rate": 4.648997809820802e-05, "loss": 0.8423, "step": 12214 }, { "epoch": 0.17323298034891924, "grad_norm": 3.140625, "learning_rate": 4.648883637485203e-05, "loss": 0.8236, "step": 12216 }, { "epoch": 0.17326134200254545, "grad_norm": 3.1875, "learning_rate": 4.648769447986326e-05, "loss": 0.8733, "step": 12218 }, { "epoch": 0.17328970365617166, "grad_norm": 2.9375, "learning_rate": 4.648655241325085e-05, "loss": 0.8388, "step": 12220 }, { "epoch": 0.17331806530979788, "grad_norm": 3.0625, "learning_rate": 4.648541017502392e-05, "loss": 0.8709, "step": 12222 }, { "epoch": 0.17334642696342412, "grad_norm": 3.015625, "learning_rate": 4.648426776519157e-05, "loss": 0.8962, "step": 12224 }, { "epoch": 0.17337478861705033, "grad_norm": 3.234375, "learning_rate": 4.648312518376295e-05, "loss": 0.8839, "step": 12226 }, { "epoch": 0.17340315027067654, "grad_norm": 2.703125, "learning_rate": 4.648198243074717e-05, "loss": 0.8691, "step": 12228 }, { "epoch": 0.17343151192430276, "grad_norm": 3.140625, "learning_rate": 4.6480839506153364e-05, "loss": 0.8455, "step": 12230 }, { "epoch": 0.17345987357792897, "grad_norm": 3.5625, "learning_rate": 4.647969640999066e-05, "loss": 0.855, "step": 12232 }, { "epoch": 0.17348823523155518, "grad_norm": 2.84375, "learning_rate": 4.64785531422682e-05, "loss": 0.9109, "step": 12234 }, { "epoch": 0.1735165968851814, "grad_norm": 2.90625, "learning_rate": 4.647740970299509e-05, "loss": 0.8638, "step": 12236 }, { "epoch": 0.1735449585388076, "grad_norm": 3.0625, "learning_rate": 4.647626609218048e-05, "loss": 0.8612, "step": 12238 }, { "epoch": 0.17357332019243382, "grad_norm": 2.859375, "learning_rate": 4.6475122309833504e-05, "loss": 0.8732, "step": 12240 }, { "epoch": 0.17360168184606004, "grad_norm": 3.5, "learning_rate": 4.647397835596329e-05, "loss": 0.8411, "step": 12242 }, { "epoch": 0.17363004349968625, "grad_norm": 2.890625, "learning_rate": 4.6472834230578977e-05, "loss": 0.8358, "step": 12244 }, { "epoch": 0.17365840515331246, "grad_norm": 3.078125, "learning_rate": 4.6471689933689704e-05, "loss": 0.8974, "step": 12246 }, { "epoch": 0.17368676680693867, "grad_norm": 3.25, "learning_rate": 4.647054546530462e-05, "loss": 0.8865, "step": 12248 }, { "epoch": 0.1737151284605649, "grad_norm": 2.875, "learning_rate": 4.646940082543285e-05, "loss": 0.7892, "step": 12250 }, { "epoch": 0.1737434901141911, "grad_norm": 3.28125, "learning_rate": 4.6468256014083546e-05, "loss": 0.8558, "step": 12252 }, { "epoch": 0.17377185176781731, "grad_norm": 3.015625, "learning_rate": 4.646711103126584e-05, "loss": 0.8111, "step": 12254 }, { "epoch": 0.17380021342144353, "grad_norm": 3.03125, "learning_rate": 4.64659658769889e-05, "loss": 0.9268, "step": 12256 }, { "epoch": 0.17382857507506974, "grad_norm": 2.65625, "learning_rate": 4.646482055126185e-05, "loss": 0.8588, "step": 12258 }, { "epoch": 0.17385693672869595, "grad_norm": 3.125, "learning_rate": 4.646367505409385e-05, "loss": 0.8747, "step": 12260 }, { "epoch": 0.17388529838232217, "grad_norm": 3.5, "learning_rate": 4.646252938549405e-05, "loss": 0.8417, "step": 12262 }, { "epoch": 0.1739136600359484, "grad_norm": 2.890625, "learning_rate": 4.646138354547159e-05, "loss": 0.8216, "step": 12264 }, { "epoch": 0.17394202168957462, "grad_norm": 3.21875, "learning_rate": 4.6460237534035626e-05, "loss": 0.8357, "step": 12266 }, { "epoch": 0.17397038334320084, "grad_norm": 3.0, "learning_rate": 4.6459091351195316e-05, "loss": 0.7958, "step": 12268 }, { "epoch": 0.17399874499682705, "grad_norm": 3.40625, "learning_rate": 4.645794499695981e-05, "loss": 0.8411, "step": 12270 }, { "epoch": 0.17402710665045326, "grad_norm": 3.203125, "learning_rate": 4.645679847133827e-05, "loss": 0.8601, "step": 12272 }, { "epoch": 0.17405546830407947, "grad_norm": 2.921875, "learning_rate": 4.645565177433985e-05, "loss": 0.8439, "step": 12274 }, { "epoch": 0.1740838299577057, "grad_norm": 3.265625, "learning_rate": 4.645450490597371e-05, "loss": 0.8395, "step": 12276 }, { "epoch": 0.1741121916113319, "grad_norm": 3.28125, "learning_rate": 4.6453357866249e-05, "loss": 0.8503, "step": 12278 }, { "epoch": 0.17414055326495811, "grad_norm": 3.71875, "learning_rate": 4.64522106551749e-05, "loss": 0.8124, "step": 12280 }, { "epoch": 0.17416891491858433, "grad_norm": 3.046875, "learning_rate": 4.645106327276056e-05, "loss": 0.8141, "step": 12282 }, { "epoch": 0.17419727657221054, "grad_norm": 3.1875, "learning_rate": 4.6449915719015144e-05, "loss": 0.7862, "step": 12284 }, { "epoch": 0.17422563822583675, "grad_norm": 3.359375, "learning_rate": 4.6448767993947815e-05, "loss": 0.8373, "step": 12286 }, { "epoch": 0.17425399987946297, "grad_norm": 2.921875, "learning_rate": 4.6447620097567754e-05, "loss": 0.8462, "step": 12288 }, { "epoch": 0.17428236153308918, "grad_norm": 3.125, "learning_rate": 4.644647202988412e-05, "loss": 0.902, "step": 12290 }, { "epoch": 0.1743107231867154, "grad_norm": 2.921875, "learning_rate": 4.644532379090608e-05, "loss": 0.7617, "step": 12292 }, { "epoch": 0.1743390848403416, "grad_norm": 3.265625, "learning_rate": 4.6444175380642807e-05, "loss": 0.8468, "step": 12294 }, { "epoch": 0.17436744649396782, "grad_norm": 3.296875, "learning_rate": 4.6443026799103486e-05, "loss": 0.818, "step": 12296 }, { "epoch": 0.17439580814759403, "grad_norm": 3.09375, "learning_rate": 4.644187804629727e-05, "loss": 0.9021, "step": 12298 }, { "epoch": 0.17442416980122025, "grad_norm": 3.015625, "learning_rate": 4.644072912223334e-05, "loss": 0.8118, "step": 12300 }, { "epoch": 0.1744525314548465, "grad_norm": 3.078125, "learning_rate": 4.643958002692088e-05, "loss": 0.8486, "step": 12302 }, { "epoch": 0.1744808931084727, "grad_norm": 2.96875, "learning_rate": 4.6438430760369064e-05, "loss": 0.8269, "step": 12304 }, { "epoch": 0.17450925476209891, "grad_norm": 2.90625, "learning_rate": 4.643728132258708e-05, "loss": 0.8216, "step": 12306 }, { "epoch": 0.17453761641572513, "grad_norm": 3.0, "learning_rate": 4.643613171358409e-05, "loss": 0.8519, "step": 12308 }, { "epoch": 0.17456597806935134, "grad_norm": 3.21875, "learning_rate": 4.6434981933369294e-05, "loss": 0.8493, "step": 12310 }, { "epoch": 0.17459433972297755, "grad_norm": 2.953125, "learning_rate": 4.643383198195186e-05, "loss": 0.8004, "step": 12312 }, { "epoch": 0.17462270137660377, "grad_norm": 3.03125, "learning_rate": 4.643268185934099e-05, "loss": 0.7851, "step": 12314 }, { "epoch": 0.17465106303022998, "grad_norm": 3.1875, "learning_rate": 4.643153156554585e-05, "loss": 0.8573, "step": 12316 }, { "epoch": 0.1746794246838562, "grad_norm": 3.3125, "learning_rate": 4.643038110057564e-05, "loss": 0.8299, "step": 12318 }, { "epoch": 0.1747077863374824, "grad_norm": 3.078125, "learning_rate": 4.6429230464439544e-05, "loss": 0.8255, "step": 12320 }, { "epoch": 0.17473614799110862, "grad_norm": 3.234375, "learning_rate": 4.642807965714676e-05, "loss": 0.8639, "step": 12322 }, { "epoch": 0.17476450964473483, "grad_norm": 3.140625, "learning_rate": 4.642692867870648e-05, "loss": 0.8382, "step": 12324 }, { "epoch": 0.17479287129836105, "grad_norm": 3.296875, "learning_rate": 4.6425777529127885e-05, "loss": 0.8247, "step": 12326 }, { "epoch": 0.17482123295198726, "grad_norm": 2.921875, "learning_rate": 4.6424626208420175e-05, "loss": 0.8779, "step": 12328 }, { "epoch": 0.17484959460561347, "grad_norm": 3.0625, "learning_rate": 4.6423474716592546e-05, "loss": 0.8107, "step": 12330 }, { "epoch": 0.1748779562592397, "grad_norm": 3.125, "learning_rate": 4.6422323053654205e-05, "loss": 0.8488, "step": 12332 }, { "epoch": 0.1749063179128659, "grad_norm": 2.921875, "learning_rate": 4.642117121961432e-05, "loss": 0.8343, "step": 12334 }, { "epoch": 0.1749346795664921, "grad_norm": 2.96875, "learning_rate": 4.642001921448214e-05, "loss": 0.8628, "step": 12336 }, { "epoch": 0.17496304122011833, "grad_norm": 3.59375, "learning_rate": 4.6418867038266814e-05, "loss": 0.8656, "step": 12338 }, { "epoch": 0.17499140287374457, "grad_norm": 3.15625, "learning_rate": 4.6417714690977575e-05, "loss": 0.8346, "step": 12340 }, { "epoch": 0.17501976452737078, "grad_norm": 3.015625, "learning_rate": 4.641656217262362e-05, "loss": 0.8389, "step": 12342 }, { "epoch": 0.175048126180997, "grad_norm": 2.75, "learning_rate": 4.6415409483214164e-05, "loss": 0.836, "step": 12344 }, { "epoch": 0.1750764878346232, "grad_norm": 2.90625, "learning_rate": 4.6414256622758396e-05, "loss": 0.9025, "step": 12346 }, { "epoch": 0.17510484948824942, "grad_norm": 2.765625, "learning_rate": 4.641310359126553e-05, "loss": 0.8001, "step": 12348 }, { "epoch": 0.17513321114187563, "grad_norm": 2.984375, "learning_rate": 4.641195038874479e-05, "loss": 0.8434, "step": 12350 }, { "epoch": 0.17516157279550185, "grad_norm": 2.734375, "learning_rate": 4.641079701520535e-05, "loss": 0.8375, "step": 12352 }, { "epoch": 0.17518993444912806, "grad_norm": 3.078125, "learning_rate": 4.640964347065646e-05, "loss": 0.8381, "step": 12354 }, { "epoch": 0.17521829610275427, "grad_norm": 3.6875, "learning_rate": 4.640848975510732e-05, "loss": 0.8247, "step": 12356 }, { "epoch": 0.1752466577563805, "grad_norm": 3.265625, "learning_rate": 4.640733586856715e-05, "loss": 0.8165, "step": 12358 }, { "epoch": 0.1752750194100067, "grad_norm": 2.9375, "learning_rate": 4.640618181104516e-05, "loss": 0.8395, "step": 12360 }, { "epoch": 0.1753033810636329, "grad_norm": 3.484375, "learning_rate": 4.6405027582550556e-05, "loss": 0.8736, "step": 12362 }, { "epoch": 0.17533174271725913, "grad_norm": 3.15625, "learning_rate": 4.640387318309258e-05, "loss": 0.8534, "step": 12364 }, { "epoch": 0.17536010437088534, "grad_norm": 3.1875, "learning_rate": 4.640271861268043e-05, "loss": 0.8289, "step": 12366 }, { "epoch": 0.17538846602451155, "grad_norm": 2.703125, "learning_rate": 4.640156387132335e-05, "loss": 0.7691, "step": 12368 }, { "epoch": 0.17541682767813777, "grad_norm": 3.03125, "learning_rate": 4.640040895903054e-05, "loss": 0.8178, "step": 12370 }, { "epoch": 0.17544518933176398, "grad_norm": 3.359375, "learning_rate": 4.639925387581125e-05, "loss": 0.8038, "step": 12372 }, { "epoch": 0.1754735509853902, "grad_norm": 2.953125, "learning_rate": 4.639809862167468e-05, "loss": 0.8212, "step": 12374 }, { "epoch": 0.1755019126390164, "grad_norm": 2.984375, "learning_rate": 4.639694319663008e-05, "loss": 0.8392, "step": 12376 }, { "epoch": 0.17553027429264265, "grad_norm": 2.984375, "learning_rate": 4.639578760068666e-05, "loss": 0.7977, "step": 12378 }, { "epoch": 0.17555863594626886, "grad_norm": 2.890625, "learning_rate": 4.6394631833853666e-05, "loss": 0.869, "step": 12380 }, { "epoch": 0.17558699759989507, "grad_norm": 3.453125, "learning_rate": 4.6393475896140306e-05, "loss": 0.8748, "step": 12382 }, { "epoch": 0.17561535925352129, "grad_norm": 3.203125, "learning_rate": 4.639231978755584e-05, "loss": 0.8879, "step": 12384 }, { "epoch": 0.1756437209071475, "grad_norm": 2.8125, "learning_rate": 4.639116350810948e-05, "loss": 0.8267, "step": 12386 }, { "epoch": 0.1756720825607737, "grad_norm": 3.5, "learning_rate": 4.639000705781048e-05, "loss": 0.8497, "step": 12388 }, { "epoch": 0.17570044421439993, "grad_norm": 3.5, "learning_rate": 4.6388850436668065e-05, "loss": 0.8637, "step": 12390 }, { "epoch": 0.17572880586802614, "grad_norm": 2.828125, "learning_rate": 4.6387693644691464e-05, "loss": 0.8515, "step": 12392 }, { "epoch": 0.17575716752165235, "grad_norm": 3.21875, "learning_rate": 4.6386536681889934e-05, "loss": 0.7947, "step": 12394 }, { "epoch": 0.17578552917527857, "grad_norm": 3.125, "learning_rate": 4.638537954827271e-05, "loss": 0.8897, "step": 12396 }, { "epoch": 0.17581389082890478, "grad_norm": 3.171875, "learning_rate": 4.638422224384904e-05, "loss": 0.8315, "step": 12398 }, { "epoch": 0.175842252482531, "grad_norm": 2.78125, "learning_rate": 4.638306476862815e-05, "loss": 0.847, "step": 12400 }, { "epoch": 0.1758706141361572, "grad_norm": 2.96875, "learning_rate": 4.63819071226193e-05, "loss": 0.8284, "step": 12402 }, { "epoch": 0.17589897578978342, "grad_norm": 3.078125, "learning_rate": 4.638074930583173e-05, "loss": 0.8503, "step": 12404 }, { "epoch": 0.17592733744340963, "grad_norm": 3.078125, "learning_rate": 4.6379591318274684e-05, "loss": 0.8233, "step": 12406 }, { "epoch": 0.17595569909703584, "grad_norm": 3.8125, "learning_rate": 4.637843315995742e-05, "loss": 0.8389, "step": 12408 }, { "epoch": 0.17598406075066206, "grad_norm": 2.90625, "learning_rate": 4.637727483088919e-05, "loss": 0.8146, "step": 12410 }, { "epoch": 0.17601242240428827, "grad_norm": 3.015625, "learning_rate": 4.6376116331079235e-05, "loss": 0.8279, "step": 12412 }, { "epoch": 0.17604078405791448, "grad_norm": 3.0625, "learning_rate": 4.6374957660536816e-05, "loss": 0.8128, "step": 12414 }, { "epoch": 0.1760691457115407, "grad_norm": 3.65625, "learning_rate": 4.6373798819271186e-05, "loss": 0.8043, "step": 12416 }, { "epoch": 0.17609750736516694, "grad_norm": 3.1875, "learning_rate": 4.637263980729159e-05, "loss": 0.8207, "step": 12418 }, { "epoch": 0.17612586901879315, "grad_norm": 3.015625, "learning_rate": 4.63714806246073e-05, "loss": 0.8652, "step": 12420 }, { "epoch": 0.17615423067241937, "grad_norm": 3.0625, "learning_rate": 4.637032127122757e-05, "loss": 0.8387, "step": 12422 }, { "epoch": 0.17618259232604558, "grad_norm": 2.78125, "learning_rate": 4.636916174716166e-05, "loss": 0.8219, "step": 12424 }, { "epoch": 0.1762109539796718, "grad_norm": 3.203125, "learning_rate": 4.636800205241882e-05, "loss": 0.8478, "step": 12426 }, { "epoch": 0.176239315633298, "grad_norm": 2.84375, "learning_rate": 4.6366842187008333e-05, "loss": 0.8115, "step": 12428 }, { "epoch": 0.17626767728692422, "grad_norm": 3.3125, "learning_rate": 4.636568215093945e-05, "loss": 0.8798, "step": 12430 }, { "epoch": 0.17629603894055043, "grad_norm": 3.34375, "learning_rate": 4.636452194422144e-05, "loss": 0.8187, "step": 12432 }, { "epoch": 0.17632440059417664, "grad_norm": 3.25, "learning_rate": 4.636336156686356e-05, "loss": 0.8794, "step": 12434 }, { "epoch": 0.17635276224780286, "grad_norm": 3.1875, "learning_rate": 4.63622010188751e-05, "loss": 0.8367, "step": 12436 }, { "epoch": 0.17638112390142907, "grad_norm": 3.015625, "learning_rate": 4.63610403002653e-05, "loss": 0.8112, "step": 12438 }, { "epoch": 0.17640948555505528, "grad_norm": 2.921875, "learning_rate": 4.635987941104346e-05, "loss": 0.8639, "step": 12440 }, { "epoch": 0.1764378472086815, "grad_norm": 3.4375, "learning_rate": 4.635871835121883e-05, "loss": 0.8322, "step": 12442 }, { "epoch": 0.1764662088623077, "grad_norm": 2.84375, "learning_rate": 4.63575571208007e-05, "loss": 0.8443, "step": 12444 }, { "epoch": 0.17649457051593392, "grad_norm": 3.203125, "learning_rate": 4.635639571979832e-05, "loss": 0.8351, "step": 12446 }, { "epoch": 0.17652293216956014, "grad_norm": 3.046875, "learning_rate": 4.6355234148221e-05, "loss": 0.8675, "step": 12448 }, { "epoch": 0.17655129382318635, "grad_norm": 2.734375, "learning_rate": 4.635407240607799e-05, "loss": 0.7643, "step": 12450 }, { "epoch": 0.17657965547681256, "grad_norm": 2.859375, "learning_rate": 4.635291049337859e-05, "loss": 0.8354, "step": 12452 }, { "epoch": 0.17660801713043878, "grad_norm": 2.84375, "learning_rate": 4.6351748410132055e-05, "loss": 0.8055, "step": 12454 }, { "epoch": 0.17663637878406502, "grad_norm": 2.734375, "learning_rate": 4.63505861563477e-05, "loss": 0.8122, "step": 12456 }, { "epoch": 0.17666474043769123, "grad_norm": 3.171875, "learning_rate": 4.634942373203477e-05, "loss": 0.8549, "step": 12458 }, { "epoch": 0.17669310209131744, "grad_norm": 3.125, "learning_rate": 4.634826113720258e-05, "loss": 0.8179, "step": 12460 }, { "epoch": 0.17672146374494366, "grad_norm": 3.03125, "learning_rate": 4.6347098371860396e-05, "loss": 0.8378, "step": 12462 }, { "epoch": 0.17674982539856987, "grad_norm": 2.84375, "learning_rate": 4.634593543601752e-05, "loss": 0.8412, "step": 12464 }, { "epoch": 0.17677818705219608, "grad_norm": 3.484375, "learning_rate": 4.634477232968324e-05, "loss": 0.8585, "step": 12466 }, { "epoch": 0.1768065487058223, "grad_norm": 3.375, "learning_rate": 4.634360905286683e-05, "loss": 0.8448, "step": 12468 }, { "epoch": 0.1768349103594485, "grad_norm": 3.125, "learning_rate": 4.6342445605577595e-05, "loss": 0.87, "step": 12470 }, { "epoch": 0.17686327201307472, "grad_norm": 3.171875, "learning_rate": 4.6341281987824817e-05, "loss": 0.8332, "step": 12472 }, { "epoch": 0.17689163366670094, "grad_norm": 3.703125, "learning_rate": 4.6340118199617805e-05, "loss": 0.8791, "step": 12474 }, { "epoch": 0.17691999532032715, "grad_norm": 2.71875, "learning_rate": 4.6338954240965836e-05, "loss": 0.8119, "step": 12476 }, { "epoch": 0.17694835697395336, "grad_norm": 3.125, "learning_rate": 4.633779011187823e-05, "loss": 0.8894, "step": 12478 }, { "epoch": 0.17697671862757958, "grad_norm": 2.890625, "learning_rate": 4.633662581236426e-05, "loss": 0.8229, "step": 12480 }, { "epoch": 0.1770050802812058, "grad_norm": 3.328125, "learning_rate": 4.633546134243324e-05, "loss": 0.8509, "step": 12482 }, { "epoch": 0.177033441934832, "grad_norm": 2.890625, "learning_rate": 4.633429670209447e-05, "loss": 0.7919, "step": 12484 }, { "epoch": 0.17706180358845822, "grad_norm": 3.109375, "learning_rate": 4.633313189135724e-05, "loss": 0.9022, "step": 12486 }, { "epoch": 0.17709016524208443, "grad_norm": 3.140625, "learning_rate": 4.633196691023087e-05, "loss": 0.9055, "step": 12488 }, { "epoch": 0.17711852689571064, "grad_norm": 3.0, "learning_rate": 4.6330801758724656e-05, "loss": 0.8362, "step": 12490 }, { "epoch": 0.17714688854933686, "grad_norm": 3.015625, "learning_rate": 4.632963643684791e-05, "loss": 0.8349, "step": 12492 }, { "epoch": 0.1771752502029631, "grad_norm": 3.375, "learning_rate": 4.632847094460992e-05, "loss": 0.8317, "step": 12494 }, { "epoch": 0.1772036118565893, "grad_norm": 3.21875, "learning_rate": 4.6327305282020026e-05, "loss": 0.8622, "step": 12496 }, { "epoch": 0.17723197351021552, "grad_norm": 3.390625, "learning_rate": 4.6326139449087514e-05, "loss": 0.8526, "step": 12498 }, { "epoch": 0.17726033516384174, "grad_norm": 2.96875, "learning_rate": 4.6324973445821705e-05, "loss": 0.8261, "step": 12500 }, { "epoch": 0.17728869681746795, "grad_norm": 2.75, "learning_rate": 4.6323807272231915e-05, "loss": 0.848, "step": 12502 }, { "epoch": 0.17731705847109416, "grad_norm": 3.015625, "learning_rate": 4.6322640928327455e-05, "loss": 0.8271, "step": 12504 }, { "epoch": 0.17734542012472038, "grad_norm": 3.15625, "learning_rate": 4.6321474414117636e-05, "loss": 0.84, "step": 12506 }, { "epoch": 0.1773737817783466, "grad_norm": 3.125, "learning_rate": 4.6320307729611775e-05, "loss": 0.8298, "step": 12508 }, { "epoch": 0.1774021434319728, "grad_norm": 3.0625, "learning_rate": 4.63191408748192e-05, "loss": 0.8465, "step": 12510 }, { "epoch": 0.17743050508559902, "grad_norm": 3.203125, "learning_rate": 4.631797384974922e-05, "loss": 0.7893, "step": 12512 }, { "epoch": 0.17745886673922523, "grad_norm": 3.0625, "learning_rate": 4.631680665441116e-05, "loss": 0.8321, "step": 12514 }, { "epoch": 0.17748722839285144, "grad_norm": 3.265625, "learning_rate": 4.631563928881435e-05, "loss": 0.8563, "step": 12516 }, { "epoch": 0.17751559004647766, "grad_norm": 3.1875, "learning_rate": 4.63144717529681e-05, "loss": 0.8354, "step": 12518 }, { "epoch": 0.17754395170010387, "grad_norm": 3.203125, "learning_rate": 4.6313304046881756e-05, "loss": 0.8669, "step": 12520 }, { "epoch": 0.17757231335373008, "grad_norm": 3.171875, "learning_rate": 4.631213617056462e-05, "loss": 0.8339, "step": 12522 }, { "epoch": 0.1776006750073563, "grad_norm": 3.390625, "learning_rate": 4.631096812402603e-05, "loss": 0.8774, "step": 12524 }, { "epoch": 0.1776290366609825, "grad_norm": 2.828125, "learning_rate": 4.630979990727532e-05, "loss": 0.8019, "step": 12526 }, { "epoch": 0.17765739831460872, "grad_norm": 3.265625, "learning_rate": 4.630863152032181e-05, "loss": 0.8873, "step": 12528 }, { "epoch": 0.17768575996823494, "grad_norm": 3.015625, "learning_rate": 4.6307462963174845e-05, "loss": 0.8402, "step": 12530 }, { "epoch": 0.17771412162186118, "grad_norm": 3.203125, "learning_rate": 4.630629423584376e-05, "loss": 0.8487, "step": 12532 }, { "epoch": 0.1777424832754874, "grad_norm": 2.765625, "learning_rate": 4.630512533833787e-05, "loss": 0.8458, "step": 12534 }, { "epoch": 0.1777708449291136, "grad_norm": 2.90625, "learning_rate": 4.630395627066653e-05, "loss": 0.822, "step": 12536 }, { "epoch": 0.17779920658273982, "grad_norm": 2.90625, "learning_rate": 4.630278703283906e-05, "loss": 0.849, "step": 12538 }, { "epoch": 0.17782756823636603, "grad_norm": 2.859375, "learning_rate": 4.6301617624864815e-05, "loss": 0.8411, "step": 12540 }, { "epoch": 0.17785592988999224, "grad_norm": 3.078125, "learning_rate": 4.630044804675313e-05, "loss": 0.8556, "step": 12542 }, { "epoch": 0.17788429154361846, "grad_norm": 2.828125, "learning_rate": 4.629927829851334e-05, "loss": 0.8855, "step": 12544 }, { "epoch": 0.17791265319724467, "grad_norm": 2.953125, "learning_rate": 4.62981083801548e-05, "loss": 0.8526, "step": 12546 }, { "epoch": 0.17794101485087088, "grad_norm": 3.140625, "learning_rate": 4.6296938291686845e-05, "loss": 0.8358, "step": 12548 }, { "epoch": 0.1779693765044971, "grad_norm": 2.765625, "learning_rate": 4.629576803311882e-05, "loss": 0.8437, "step": 12550 }, { "epoch": 0.1779977381581233, "grad_norm": 3.078125, "learning_rate": 4.6294597604460086e-05, "loss": 0.8169, "step": 12552 }, { "epoch": 0.17802609981174952, "grad_norm": 3.046875, "learning_rate": 4.629342700571998e-05, "loss": 0.8293, "step": 12554 }, { "epoch": 0.17805446146537574, "grad_norm": 3.0625, "learning_rate": 4.629225623690784e-05, "loss": 0.8989, "step": 12556 }, { "epoch": 0.17808282311900195, "grad_norm": 2.671875, "learning_rate": 4.6291085298033035e-05, "loss": 0.8461, "step": 12558 }, { "epoch": 0.17811118477262816, "grad_norm": 3.21875, "learning_rate": 4.6289914189104914e-05, "loss": 0.8541, "step": 12560 }, { "epoch": 0.17813954642625437, "grad_norm": 2.890625, "learning_rate": 4.6288742910132834e-05, "loss": 0.8325, "step": 12562 }, { "epoch": 0.1781679080798806, "grad_norm": 3.234375, "learning_rate": 4.6287571461126134e-05, "loss": 0.8721, "step": 12564 }, { "epoch": 0.1781962697335068, "grad_norm": 3.15625, "learning_rate": 4.6286399842094185e-05, "loss": 0.8826, "step": 12566 }, { "epoch": 0.17822463138713301, "grad_norm": 3.109375, "learning_rate": 4.628522805304634e-05, "loss": 0.832, "step": 12568 }, { "epoch": 0.17825299304075923, "grad_norm": 3.078125, "learning_rate": 4.628405609399196e-05, "loss": 0.8381, "step": 12570 }, { "epoch": 0.17828135469438547, "grad_norm": 2.96875, "learning_rate": 4.62828839649404e-05, "loss": 0.8607, "step": 12572 }, { "epoch": 0.17830971634801168, "grad_norm": 3.078125, "learning_rate": 4.628171166590103e-05, "loss": 0.8604, "step": 12574 }, { "epoch": 0.1783380780016379, "grad_norm": 3.34375, "learning_rate": 4.628053919688321e-05, "loss": 0.8196, "step": 12576 }, { "epoch": 0.1783664396552641, "grad_norm": 2.96875, "learning_rate": 4.6279366557896296e-05, "loss": 0.818, "step": 12578 }, { "epoch": 0.17839480130889032, "grad_norm": 3.0, "learning_rate": 4.627819374894967e-05, "loss": 0.8581, "step": 12580 }, { "epoch": 0.17842316296251653, "grad_norm": 3.40625, "learning_rate": 4.6277020770052695e-05, "loss": 0.8901, "step": 12582 }, { "epoch": 0.17845152461614275, "grad_norm": 2.984375, "learning_rate": 4.627584762121473e-05, "loss": 0.832, "step": 12584 }, { "epoch": 0.17847988626976896, "grad_norm": 2.984375, "learning_rate": 4.627467430244515e-05, "loss": 0.8414, "step": 12586 }, { "epoch": 0.17850824792339517, "grad_norm": 3.1875, "learning_rate": 4.6273500813753324e-05, "loss": 0.8246, "step": 12588 }, { "epoch": 0.1785366095770214, "grad_norm": 2.875, "learning_rate": 4.6272327155148634e-05, "loss": 0.8164, "step": 12590 }, { "epoch": 0.1785649712306476, "grad_norm": 2.828125, "learning_rate": 4.627115332664045e-05, "loss": 0.8129, "step": 12592 }, { "epoch": 0.17859333288427381, "grad_norm": 2.859375, "learning_rate": 4.626997932823815e-05, "loss": 0.8421, "step": 12594 }, { "epoch": 0.17862169453790003, "grad_norm": 3.28125, "learning_rate": 4.6268805159951086e-05, "loss": 0.8196, "step": 12596 }, { "epoch": 0.17865005619152624, "grad_norm": 2.90625, "learning_rate": 4.6267630821788675e-05, "loss": 0.8847, "step": 12598 }, { "epoch": 0.17867841784515245, "grad_norm": 3.328125, "learning_rate": 4.626645631376027e-05, "loss": 0.9046, "step": 12600 }, { "epoch": 0.17870677949877867, "grad_norm": 3.71875, "learning_rate": 4.626528163587527e-05, "loss": 0.8962, "step": 12602 }, { "epoch": 0.17873514115240488, "grad_norm": 3.453125, "learning_rate": 4.6264106788143035e-05, "loss": 0.8703, "step": 12604 }, { "epoch": 0.1787635028060311, "grad_norm": 3.140625, "learning_rate": 4.626293177057296e-05, "loss": 0.8129, "step": 12606 }, { "epoch": 0.1787918644596573, "grad_norm": 3.1875, "learning_rate": 4.6261756583174443e-05, "loss": 0.8194, "step": 12608 }, { "epoch": 0.17882022611328355, "grad_norm": 3.0625, "learning_rate": 4.6260581225956844e-05, "loss": 0.8133, "step": 12610 }, { "epoch": 0.17884858776690976, "grad_norm": 2.796875, "learning_rate": 4.625940569892958e-05, "loss": 0.8832, "step": 12612 }, { "epoch": 0.17887694942053597, "grad_norm": 3.015625, "learning_rate": 4.625823000210201e-05, "loss": 0.8218, "step": 12614 }, { "epoch": 0.1789053110741622, "grad_norm": 2.84375, "learning_rate": 4.6257054135483546e-05, "loss": 0.844, "step": 12616 }, { "epoch": 0.1789336727277884, "grad_norm": 2.78125, "learning_rate": 4.6255878099083575e-05, "loss": 0.8746, "step": 12618 }, { "epoch": 0.17896203438141461, "grad_norm": 3.140625, "learning_rate": 4.625470189291148e-05, "loss": 0.839, "step": 12620 }, { "epoch": 0.17899039603504083, "grad_norm": 3.078125, "learning_rate": 4.625352551697667e-05, "loss": 0.8041, "step": 12622 }, { "epoch": 0.17901875768866704, "grad_norm": 3.21875, "learning_rate": 4.625234897128854e-05, "loss": 0.8834, "step": 12624 }, { "epoch": 0.17904711934229325, "grad_norm": 2.953125, "learning_rate": 4.6251172255856465e-05, "loss": 0.7874, "step": 12626 }, { "epoch": 0.17907548099591947, "grad_norm": 3.140625, "learning_rate": 4.624999537068987e-05, "loss": 0.8547, "step": 12628 }, { "epoch": 0.17910384264954568, "grad_norm": 2.96875, "learning_rate": 4.624881831579815e-05, "loss": 0.8339, "step": 12630 }, { "epoch": 0.1791322043031719, "grad_norm": 3.34375, "learning_rate": 4.624764109119069e-05, "loss": 0.8424, "step": 12632 }, { "epoch": 0.1791605659567981, "grad_norm": 3.28125, "learning_rate": 4.624646369687691e-05, "loss": 0.8683, "step": 12634 }, { "epoch": 0.17918892761042432, "grad_norm": 3.0625, "learning_rate": 4.62452861328662e-05, "loss": 0.8303, "step": 12636 }, { "epoch": 0.17921728926405053, "grad_norm": 2.6875, "learning_rate": 4.624410839916798e-05, "loss": 0.8176, "step": 12638 }, { "epoch": 0.17924565091767675, "grad_norm": 3.078125, "learning_rate": 4.6242930495791646e-05, "loss": 0.8482, "step": 12640 }, { "epoch": 0.17927401257130296, "grad_norm": 2.921875, "learning_rate": 4.624175242274661e-05, "loss": 0.8685, "step": 12642 }, { "epoch": 0.17930237422492917, "grad_norm": 3.390625, "learning_rate": 4.624057418004228e-05, "loss": 0.8555, "step": 12644 }, { "epoch": 0.1793307358785554, "grad_norm": 3.625, "learning_rate": 4.623939576768807e-05, "loss": 0.8978, "step": 12646 }, { "epoch": 0.17935909753218163, "grad_norm": 3.1875, "learning_rate": 4.623821718569338e-05, "loss": 0.8696, "step": 12648 }, { "epoch": 0.17938745918580784, "grad_norm": 3.109375, "learning_rate": 4.623703843406764e-05, "loss": 0.8355, "step": 12650 }, { "epoch": 0.17941582083943405, "grad_norm": 3.015625, "learning_rate": 4.623585951282026e-05, "loss": 0.8115, "step": 12652 }, { "epoch": 0.17944418249306027, "grad_norm": 3.03125, "learning_rate": 4.6234680421960655e-05, "loss": 0.8517, "step": 12654 }, { "epoch": 0.17947254414668648, "grad_norm": 3.15625, "learning_rate": 4.623350116149823e-05, "loss": 0.8207, "step": 12656 }, { "epoch": 0.1795009058003127, "grad_norm": 2.828125, "learning_rate": 4.623232173144242e-05, "loss": 0.8364, "step": 12658 }, { "epoch": 0.1795292674539389, "grad_norm": 2.9375, "learning_rate": 4.623114213180264e-05, "loss": 0.8285, "step": 12660 }, { "epoch": 0.17955762910756512, "grad_norm": 3.359375, "learning_rate": 4.622996236258832e-05, "loss": 0.8542, "step": 12662 }, { "epoch": 0.17958599076119133, "grad_norm": 3.078125, "learning_rate": 4.622878242380887e-05, "loss": 0.8826, "step": 12664 }, { "epoch": 0.17961435241481755, "grad_norm": 3.390625, "learning_rate": 4.622760231547371e-05, "loss": 0.8536, "step": 12666 }, { "epoch": 0.17964271406844376, "grad_norm": 2.90625, "learning_rate": 4.6226422037592286e-05, "loss": 0.8177, "step": 12668 }, { "epoch": 0.17967107572206997, "grad_norm": 2.71875, "learning_rate": 4.6225241590174005e-05, "loss": 0.7825, "step": 12670 }, { "epoch": 0.17969943737569619, "grad_norm": 2.71875, "learning_rate": 4.6224060973228314e-05, "loss": 0.7484, "step": 12672 }, { "epoch": 0.1797277990293224, "grad_norm": 2.96875, "learning_rate": 4.622288018676462e-05, "loss": 0.8645, "step": 12674 }, { "epoch": 0.1797561606829486, "grad_norm": 3.296875, "learning_rate": 4.622169923079237e-05, "loss": 0.8663, "step": 12676 }, { "epoch": 0.17978452233657483, "grad_norm": 3.09375, "learning_rate": 4.6220518105320996e-05, "loss": 0.86, "step": 12678 }, { "epoch": 0.17981288399020104, "grad_norm": 3.046875, "learning_rate": 4.621933681035992e-05, "loss": 0.8226, "step": 12680 }, { "epoch": 0.17984124564382725, "grad_norm": 2.640625, "learning_rate": 4.62181553459186e-05, "loss": 0.8311, "step": 12682 }, { "epoch": 0.17986960729745347, "grad_norm": 3.140625, "learning_rate": 4.621697371200645e-05, "loss": 0.8647, "step": 12684 }, { "epoch": 0.1798979689510797, "grad_norm": 3.546875, "learning_rate": 4.621579190863292e-05, "loss": 0.8611, "step": 12686 }, { "epoch": 0.17992633060470592, "grad_norm": 2.875, "learning_rate": 4.621460993580744e-05, "loss": 0.832, "step": 12688 }, { "epoch": 0.17995469225833213, "grad_norm": 2.859375, "learning_rate": 4.6213427793539457e-05, "loss": 0.7857, "step": 12690 }, { "epoch": 0.17998305391195835, "grad_norm": 3.15625, "learning_rate": 4.621224548183841e-05, "loss": 0.8165, "step": 12692 }, { "epoch": 0.18001141556558456, "grad_norm": 3.5625, "learning_rate": 4.621106300071374e-05, "loss": 0.8272, "step": 12694 }, { "epoch": 0.18003977721921077, "grad_norm": 2.859375, "learning_rate": 4.6209880350174904e-05, "loss": 0.7935, "step": 12696 }, { "epoch": 0.18006813887283699, "grad_norm": 3.25, "learning_rate": 4.620869753023133e-05, "loss": 0.8143, "step": 12698 }, { "epoch": 0.1800965005264632, "grad_norm": 3.234375, "learning_rate": 4.6207514540892485e-05, "loss": 0.8784, "step": 12700 }, { "epoch": 0.1801248621800894, "grad_norm": 2.78125, "learning_rate": 4.62063313821678e-05, "loss": 0.8044, "step": 12702 }, { "epoch": 0.18015322383371563, "grad_norm": 3.25, "learning_rate": 4.620514805406673e-05, "loss": 0.8564, "step": 12704 }, { "epoch": 0.18018158548734184, "grad_norm": 2.640625, "learning_rate": 4.6203964556598734e-05, "loss": 0.813, "step": 12706 }, { "epoch": 0.18020994714096805, "grad_norm": 3.09375, "learning_rate": 4.620278088977325e-05, "loss": 0.8727, "step": 12708 }, { "epoch": 0.18023830879459427, "grad_norm": 3.140625, "learning_rate": 4.620159705359975e-05, "loss": 0.8759, "step": 12710 }, { "epoch": 0.18026667044822048, "grad_norm": 2.875, "learning_rate": 4.620041304808767e-05, "loss": 0.8334, "step": 12712 }, { "epoch": 0.1802950321018467, "grad_norm": 2.859375, "learning_rate": 4.619922887324649e-05, "loss": 0.7911, "step": 12714 }, { "epoch": 0.1803233937554729, "grad_norm": 3.203125, "learning_rate": 4.619804452908565e-05, "loss": 0.839, "step": 12716 }, { "epoch": 0.18035175540909912, "grad_norm": 3.234375, "learning_rate": 4.619686001561461e-05, "loss": 0.7925, "step": 12718 }, { "epoch": 0.18038011706272533, "grad_norm": 3.015625, "learning_rate": 4.619567533284284e-05, "loss": 0.8259, "step": 12720 }, { "epoch": 0.18040847871635154, "grad_norm": 3.03125, "learning_rate": 4.619449048077979e-05, "loss": 0.855, "step": 12722 }, { "epoch": 0.18043684036997776, "grad_norm": 2.75, "learning_rate": 4.619330545943493e-05, "loss": 0.8324, "step": 12724 }, { "epoch": 0.180465202023604, "grad_norm": 3.078125, "learning_rate": 4.619212026881773e-05, "loss": 0.8099, "step": 12726 }, { "epoch": 0.1804935636772302, "grad_norm": 3.109375, "learning_rate": 4.619093490893766e-05, "loss": 0.8235, "step": 12728 }, { "epoch": 0.18052192533085643, "grad_norm": 3.03125, "learning_rate": 4.618974937980417e-05, "loss": 0.7999, "step": 12730 }, { "epoch": 0.18055028698448264, "grad_norm": 3.0625, "learning_rate": 4.618856368142674e-05, "loss": 0.8385, "step": 12732 }, { "epoch": 0.18057864863810885, "grad_norm": 3.0, "learning_rate": 4.6187377813814844e-05, "loss": 0.8238, "step": 12734 }, { "epoch": 0.18060701029173507, "grad_norm": 2.796875, "learning_rate": 4.6186191776977946e-05, "loss": 0.791, "step": 12736 }, { "epoch": 0.18063537194536128, "grad_norm": 2.859375, "learning_rate": 4.6185005570925515e-05, "loss": 0.8129, "step": 12738 }, { "epoch": 0.1806637335989875, "grad_norm": 3.0625, "learning_rate": 4.618381919566703e-05, "loss": 0.8641, "step": 12740 }, { "epoch": 0.1806920952526137, "grad_norm": 3.109375, "learning_rate": 4.6182632651211976e-05, "loss": 0.8208, "step": 12742 }, { "epoch": 0.18072045690623992, "grad_norm": 3.25, "learning_rate": 4.618144593756982e-05, "loss": 0.8637, "step": 12744 }, { "epoch": 0.18074881855986613, "grad_norm": 2.96875, "learning_rate": 4.618025905475004e-05, "loss": 0.8398, "step": 12746 }, { "epoch": 0.18077718021349234, "grad_norm": 3.125, "learning_rate": 4.6179072002762123e-05, "loss": 0.8371, "step": 12748 }, { "epoch": 0.18080554186711856, "grad_norm": 3.078125, "learning_rate": 4.617788478161554e-05, "loss": 0.814, "step": 12750 }, { "epoch": 0.18083390352074477, "grad_norm": 3.0625, "learning_rate": 4.617669739131979e-05, "loss": 0.8517, "step": 12752 }, { "epoch": 0.18086226517437098, "grad_norm": 3.375, "learning_rate": 4.6175509831884336e-05, "loss": 0.827, "step": 12754 }, { "epoch": 0.1808906268279972, "grad_norm": 3.578125, "learning_rate": 4.617432210331867e-05, "loss": 0.7785, "step": 12756 }, { "epoch": 0.1809189884816234, "grad_norm": 2.828125, "learning_rate": 4.6173134205632284e-05, "loss": 0.805, "step": 12758 }, { "epoch": 0.18094735013524962, "grad_norm": 2.96875, "learning_rate": 4.6171946138834667e-05, "loss": 0.7827, "step": 12760 }, { "epoch": 0.18097571178887584, "grad_norm": 2.96875, "learning_rate": 4.6170757902935296e-05, "loss": 0.8323, "step": 12762 }, { "epoch": 0.18100407344250208, "grad_norm": 3.046875, "learning_rate": 4.6169569497943674e-05, "loss": 0.8319, "step": 12764 }, { "epoch": 0.1810324350961283, "grad_norm": 3.15625, "learning_rate": 4.616838092386929e-05, "loss": 0.8897, "step": 12766 }, { "epoch": 0.1810607967497545, "grad_norm": 3.25, "learning_rate": 4.616719218072163e-05, "loss": 0.8245, "step": 12768 }, { "epoch": 0.18108915840338072, "grad_norm": 2.96875, "learning_rate": 4.61660032685102e-05, "loss": 0.8604, "step": 12770 }, { "epoch": 0.18111752005700693, "grad_norm": 3.46875, "learning_rate": 4.616481418724449e-05, "loss": 0.9098, "step": 12772 }, { "epoch": 0.18114588171063314, "grad_norm": 3.15625, "learning_rate": 4.6163624936933994e-05, "loss": 0.8468, "step": 12774 }, { "epoch": 0.18117424336425936, "grad_norm": 3.203125, "learning_rate": 4.6162435517588216e-05, "loss": 0.884, "step": 12776 }, { "epoch": 0.18120260501788557, "grad_norm": 3.40625, "learning_rate": 4.6161245929216655e-05, "loss": 0.875, "step": 12778 }, { "epoch": 0.18123096667151178, "grad_norm": 3.0625, "learning_rate": 4.616005617182881e-05, "loss": 0.7976, "step": 12780 }, { "epoch": 0.181259328325138, "grad_norm": 3.25, "learning_rate": 4.615886624543418e-05, "loss": 0.8438, "step": 12782 }, { "epoch": 0.1812876899787642, "grad_norm": 2.90625, "learning_rate": 4.6157676150042286e-05, "loss": 0.8343, "step": 12784 }, { "epoch": 0.18131605163239042, "grad_norm": 2.921875, "learning_rate": 4.6156485885662606e-05, "loss": 0.8782, "step": 12786 }, { "epoch": 0.18134441328601664, "grad_norm": 3.375, "learning_rate": 4.615529545230467e-05, "loss": 0.8824, "step": 12788 }, { "epoch": 0.18137277493964285, "grad_norm": 3.15625, "learning_rate": 4.615410484997798e-05, "loss": 0.8476, "step": 12790 }, { "epoch": 0.18140113659326906, "grad_norm": 2.8125, "learning_rate": 4.6152914078692046e-05, "loss": 0.8412, "step": 12792 }, { "epoch": 0.18142949824689528, "grad_norm": 3.265625, "learning_rate": 4.615172313845637e-05, "loss": 0.8553, "step": 12794 }, { "epoch": 0.1814578599005215, "grad_norm": 3.5, "learning_rate": 4.615053202928047e-05, "loss": 0.8119, "step": 12796 }, { "epoch": 0.1814862215541477, "grad_norm": 3.0625, "learning_rate": 4.614934075117386e-05, "loss": 0.8376, "step": 12798 }, { "epoch": 0.18151458320777392, "grad_norm": 2.984375, "learning_rate": 4.614814930414606e-05, "loss": 0.8349, "step": 12800 }, { "epoch": 0.18154294486140016, "grad_norm": 3.1875, "learning_rate": 4.6146957688206585e-05, "loss": 0.8453, "step": 12802 }, { "epoch": 0.18157130651502637, "grad_norm": 3.015625, "learning_rate": 4.614576590336493e-05, "loss": 0.861, "step": 12804 }, { "epoch": 0.18159966816865258, "grad_norm": 3.296875, "learning_rate": 4.614457394963065e-05, "loss": 0.8354, "step": 12806 }, { "epoch": 0.1816280298222788, "grad_norm": 3.046875, "learning_rate": 4.6143381827013246e-05, "loss": 0.8669, "step": 12808 }, { "epoch": 0.181656391475905, "grad_norm": 3.125, "learning_rate": 4.614218953552223e-05, "loss": 0.8659, "step": 12810 }, { "epoch": 0.18168475312953122, "grad_norm": 3.140625, "learning_rate": 4.614099707516715e-05, "loss": 0.8282, "step": 12812 }, { "epoch": 0.18171311478315744, "grad_norm": 3.609375, "learning_rate": 4.61398044459575e-05, "loss": 0.8183, "step": 12814 }, { "epoch": 0.18174147643678365, "grad_norm": 3.453125, "learning_rate": 4.613861164790283e-05, "loss": 0.8283, "step": 12816 }, { "epoch": 0.18176983809040986, "grad_norm": 2.828125, "learning_rate": 4.613741868101266e-05, "loss": 0.8666, "step": 12818 }, { "epoch": 0.18179819974403608, "grad_norm": 3.0625, "learning_rate": 4.613622554529652e-05, "loss": 0.8464, "step": 12820 }, { "epoch": 0.1818265613976623, "grad_norm": 3.328125, "learning_rate": 4.613503224076393e-05, "loss": 0.8237, "step": 12822 }, { "epoch": 0.1818549230512885, "grad_norm": 3.296875, "learning_rate": 4.613383876742443e-05, "loss": 0.8372, "step": 12824 }, { "epoch": 0.18188328470491472, "grad_norm": 3.046875, "learning_rate": 4.613264512528755e-05, "loss": 0.8469, "step": 12826 }, { "epoch": 0.18191164635854093, "grad_norm": 2.84375, "learning_rate": 4.613145131436283e-05, "loss": 0.8517, "step": 12828 }, { "epoch": 0.18194000801216714, "grad_norm": 3.25, "learning_rate": 4.6130257334659794e-05, "loss": 0.8702, "step": 12830 }, { "epoch": 0.18196836966579336, "grad_norm": 3.59375, "learning_rate": 4.612906318618798e-05, "loss": 0.8935, "step": 12832 }, { "epoch": 0.18199673131941957, "grad_norm": 3.03125, "learning_rate": 4.612786886895693e-05, "loss": 0.8124, "step": 12834 }, { "epoch": 0.18202509297304578, "grad_norm": 3.3125, "learning_rate": 4.6126674382976185e-05, "loss": 0.8502, "step": 12836 }, { "epoch": 0.182053454626672, "grad_norm": 3.015625, "learning_rate": 4.6125479728255286e-05, "loss": 0.8418, "step": 12838 }, { "epoch": 0.18208181628029824, "grad_norm": 3.421875, "learning_rate": 4.612428490480376e-05, "loss": 0.8009, "step": 12840 }, { "epoch": 0.18211017793392445, "grad_norm": 3.3125, "learning_rate": 4.612308991263118e-05, "loss": 0.8816, "step": 12842 }, { "epoch": 0.18213853958755066, "grad_norm": 3.078125, "learning_rate": 4.612189475174705e-05, "loss": 0.8444, "step": 12844 }, { "epoch": 0.18216690124117688, "grad_norm": 2.953125, "learning_rate": 4.6120699422160954e-05, "loss": 0.8561, "step": 12846 }, { "epoch": 0.1821952628948031, "grad_norm": 2.75, "learning_rate": 4.6119503923882416e-05, "loss": 0.8057, "step": 12848 }, { "epoch": 0.1822236245484293, "grad_norm": 3.109375, "learning_rate": 4.6118308256920994e-05, "loss": 0.8083, "step": 12850 }, { "epoch": 0.18225198620205552, "grad_norm": 2.953125, "learning_rate": 4.6117112421286235e-05, "loss": 0.8472, "step": 12852 }, { "epoch": 0.18228034785568173, "grad_norm": 2.96875, "learning_rate": 4.6115916416987686e-05, "loss": 0.8956, "step": 12854 }, { "epoch": 0.18230870950930794, "grad_norm": 2.890625, "learning_rate": 4.611472024403491e-05, "loss": 0.8338, "step": 12856 }, { "epoch": 0.18233707116293416, "grad_norm": 3.734375, "learning_rate": 4.611352390243745e-05, "loss": 0.8513, "step": 12858 }, { "epoch": 0.18236543281656037, "grad_norm": 3.359375, "learning_rate": 4.611232739220487e-05, "loss": 0.782, "step": 12860 }, { "epoch": 0.18239379447018658, "grad_norm": 2.984375, "learning_rate": 4.611113071334673e-05, "loss": 0.8203, "step": 12862 }, { "epoch": 0.1824221561238128, "grad_norm": 3.453125, "learning_rate": 4.610993386587257e-05, "loss": 0.7996, "step": 12864 }, { "epoch": 0.182450517777439, "grad_norm": 3.015625, "learning_rate": 4.610873684979196e-05, "loss": 0.8971, "step": 12866 }, { "epoch": 0.18247887943106522, "grad_norm": 3.21875, "learning_rate": 4.610753966511446e-05, "loss": 0.8564, "step": 12868 }, { "epoch": 0.18250724108469144, "grad_norm": 3.296875, "learning_rate": 4.610634231184964e-05, "loss": 0.8376, "step": 12870 }, { "epoch": 0.18253560273831765, "grad_norm": 3.28125, "learning_rate": 4.610514479000706e-05, "loss": 0.8547, "step": 12872 }, { "epoch": 0.18256396439194386, "grad_norm": 3.34375, "learning_rate": 4.610394709959627e-05, "loss": 0.8569, "step": 12874 }, { "epoch": 0.18259232604557007, "grad_norm": 3.359375, "learning_rate": 4.610274924062685e-05, "loss": 0.8305, "step": 12876 }, { "epoch": 0.1826206876991963, "grad_norm": 2.953125, "learning_rate": 4.6101551213108365e-05, "loss": 0.8287, "step": 12878 }, { "epoch": 0.18264904935282253, "grad_norm": 2.859375, "learning_rate": 4.610035301705038e-05, "loss": 0.8344, "step": 12880 }, { "epoch": 0.18267741100644874, "grad_norm": 3.125, "learning_rate": 4.6099154652462474e-05, "loss": 0.859, "step": 12882 }, { "epoch": 0.18270577266007496, "grad_norm": 3.296875, "learning_rate": 4.609795611935421e-05, "loss": 0.8725, "step": 12884 }, { "epoch": 0.18273413431370117, "grad_norm": 3.1875, "learning_rate": 4.609675741773516e-05, "loss": 0.8796, "step": 12886 }, { "epoch": 0.18276249596732738, "grad_norm": 3.265625, "learning_rate": 4.60955585476149e-05, "loss": 0.8429, "step": 12888 }, { "epoch": 0.1827908576209536, "grad_norm": 2.828125, "learning_rate": 4.6094359509003014e-05, "loss": 0.827, "step": 12890 }, { "epoch": 0.1828192192745798, "grad_norm": 3.0625, "learning_rate": 4.609316030190906e-05, "loss": 0.8486, "step": 12892 }, { "epoch": 0.18284758092820602, "grad_norm": 2.90625, "learning_rate": 4.6091960926342644e-05, "loss": 0.8078, "step": 12894 }, { "epoch": 0.18287594258183223, "grad_norm": 3.1875, "learning_rate": 4.609076138231332e-05, "loss": 0.8252, "step": 12896 }, { "epoch": 0.18290430423545845, "grad_norm": 3.0625, "learning_rate": 4.608956166983068e-05, "loss": 0.8628, "step": 12898 }, { "epoch": 0.18293266588908466, "grad_norm": 3.0625, "learning_rate": 4.60883617889043e-05, "loss": 0.8249, "step": 12900 }, { "epoch": 0.18296102754271087, "grad_norm": 3.0, "learning_rate": 4.608716173954377e-05, "loss": 0.8377, "step": 12902 }, { "epoch": 0.1829893891963371, "grad_norm": 3.0625, "learning_rate": 4.608596152175868e-05, "loss": 0.8572, "step": 12904 }, { "epoch": 0.1830177508499633, "grad_norm": 2.875, "learning_rate": 4.60847611355586e-05, "loss": 0.8088, "step": 12906 }, { "epoch": 0.18304611250358951, "grad_norm": 3.015625, "learning_rate": 4.608356058095313e-05, "loss": 0.8655, "step": 12908 }, { "epoch": 0.18307447415721573, "grad_norm": 3.0625, "learning_rate": 4.608235985795185e-05, "loss": 0.8257, "step": 12910 }, { "epoch": 0.18310283581084194, "grad_norm": 3.21875, "learning_rate": 4.608115896656437e-05, "loss": 0.8127, "step": 12912 }, { "epoch": 0.18313119746446815, "grad_norm": 3.140625, "learning_rate": 4.607995790680025e-05, "loss": 0.8644, "step": 12914 }, { "epoch": 0.18315955911809437, "grad_norm": 3.40625, "learning_rate": 4.607875667866911e-05, "loss": 0.851, "step": 12916 }, { "epoch": 0.1831879207717206, "grad_norm": 3.25, "learning_rate": 4.6077555282180527e-05, "loss": 0.8592, "step": 12918 }, { "epoch": 0.18321628242534682, "grad_norm": 2.671875, "learning_rate": 4.607635371734411e-05, "loss": 0.8004, "step": 12920 }, { "epoch": 0.18324464407897303, "grad_norm": 2.75, "learning_rate": 4.607515198416945e-05, "loss": 0.8097, "step": 12922 }, { "epoch": 0.18327300573259925, "grad_norm": 2.890625, "learning_rate": 4.607395008266615e-05, "loss": 0.8322, "step": 12924 }, { "epoch": 0.18330136738622546, "grad_norm": 3.625, "learning_rate": 4.60727480128438e-05, "loss": 0.8453, "step": 12926 }, { "epoch": 0.18332972903985167, "grad_norm": 3.125, "learning_rate": 4.6071545774712e-05, "loss": 0.8569, "step": 12928 }, { "epoch": 0.1833580906934779, "grad_norm": 3.0625, "learning_rate": 4.607034336828037e-05, "loss": 0.8407, "step": 12930 }, { "epoch": 0.1833864523471041, "grad_norm": 3.25, "learning_rate": 4.6069140793558495e-05, "loss": 0.8226, "step": 12932 }, { "epoch": 0.18341481400073031, "grad_norm": 2.9375, "learning_rate": 4.6067938050555985e-05, "loss": 0.869, "step": 12934 }, { "epoch": 0.18344317565435653, "grad_norm": 3.265625, "learning_rate": 4.606673513928245e-05, "loss": 0.8623, "step": 12936 }, { "epoch": 0.18347153730798274, "grad_norm": 3.1875, "learning_rate": 4.60655320597475e-05, "loss": 0.8535, "step": 12938 }, { "epoch": 0.18349989896160895, "grad_norm": 3.484375, "learning_rate": 4.606432881196074e-05, "loss": 0.8589, "step": 12940 }, { "epoch": 0.18352826061523517, "grad_norm": 2.921875, "learning_rate": 4.606312539593178e-05, "loss": 0.8367, "step": 12942 }, { "epoch": 0.18355662226886138, "grad_norm": 3.171875, "learning_rate": 4.606192181167023e-05, "loss": 0.8597, "step": 12944 }, { "epoch": 0.1835849839224876, "grad_norm": 3.625, "learning_rate": 4.606071805918571e-05, "loss": 0.8607, "step": 12946 }, { "epoch": 0.1836133455761138, "grad_norm": 3.21875, "learning_rate": 4.605951413848783e-05, "loss": 0.8627, "step": 12948 }, { "epoch": 0.18364170722974002, "grad_norm": 3.234375, "learning_rate": 4.6058310049586196e-05, "loss": 0.8562, "step": 12950 }, { "epoch": 0.18367006888336623, "grad_norm": 3.328125, "learning_rate": 4.6057105792490446e-05, "loss": 0.8422, "step": 12952 }, { "epoch": 0.18369843053699245, "grad_norm": 2.875, "learning_rate": 4.6055901367210185e-05, "loss": 0.7802, "step": 12954 }, { "epoch": 0.1837267921906187, "grad_norm": 3.0, "learning_rate": 4.6054696773755036e-05, "loss": 0.8314, "step": 12956 }, { "epoch": 0.1837551538442449, "grad_norm": 3.46875, "learning_rate": 4.6053492012134615e-05, "loss": 0.7939, "step": 12958 }, { "epoch": 0.1837835154978711, "grad_norm": 3.15625, "learning_rate": 4.6052287082358556e-05, "loss": 0.8382, "step": 12960 }, { "epoch": 0.18381187715149733, "grad_norm": 3.140625, "learning_rate": 4.605108198443647e-05, "loss": 0.8241, "step": 12962 }, { "epoch": 0.18384023880512354, "grad_norm": 3.40625, "learning_rate": 4.604987671837799e-05, "loss": 0.8548, "step": 12964 }, { "epoch": 0.18386860045874975, "grad_norm": 3.140625, "learning_rate": 4.604867128419273e-05, "loss": 0.8871, "step": 12966 }, { "epoch": 0.18389696211237597, "grad_norm": 3.125, "learning_rate": 4.604746568189034e-05, "loss": 0.8224, "step": 12968 }, { "epoch": 0.18392532376600218, "grad_norm": 3.6875, "learning_rate": 4.6046259911480446e-05, "loss": 0.862, "step": 12970 }, { "epoch": 0.1839536854196284, "grad_norm": 3.171875, "learning_rate": 4.6045053972972654e-05, "loss": 0.8243, "step": 12972 }, { "epoch": 0.1839820470732546, "grad_norm": 3.15625, "learning_rate": 4.604384786637662e-05, "loss": 0.813, "step": 12974 }, { "epoch": 0.18401040872688082, "grad_norm": 2.96875, "learning_rate": 4.604264159170197e-05, "loss": 0.8009, "step": 12976 }, { "epoch": 0.18403877038050703, "grad_norm": 3.140625, "learning_rate": 4.604143514895833e-05, "loss": 0.865, "step": 12978 }, { "epoch": 0.18406713203413325, "grad_norm": 3.1875, "learning_rate": 4.604022853815536e-05, "loss": 0.8174, "step": 12980 }, { "epoch": 0.18409549368775946, "grad_norm": 2.953125, "learning_rate": 4.603902175930267e-05, "loss": 0.8475, "step": 12982 }, { "epoch": 0.18412385534138567, "grad_norm": 3.515625, "learning_rate": 4.6037814812409906e-05, "loss": 0.8608, "step": 12984 }, { "epoch": 0.18415221699501189, "grad_norm": 2.984375, "learning_rate": 4.6036607697486724e-05, "loss": 0.8211, "step": 12986 }, { "epoch": 0.1841805786486381, "grad_norm": 3.15625, "learning_rate": 4.6035400414542745e-05, "loss": 0.8025, "step": 12988 }, { "epoch": 0.1842089403022643, "grad_norm": 3.109375, "learning_rate": 4.603419296358763e-05, "loss": 0.8281, "step": 12990 }, { "epoch": 0.18423730195589053, "grad_norm": 2.984375, "learning_rate": 4.6032985344631e-05, "loss": 0.829, "step": 12992 }, { "epoch": 0.18426566360951677, "grad_norm": 3.03125, "learning_rate": 4.6031777557682514e-05, "loss": 0.8284, "step": 12994 }, { "epoch": 0.18429402526314298, "grad_norm": 3.078125, "learning_rate": 4.6030569602751825e-05, "loss": 0.8627, "step": 12996 }, { "epoch": 0.1843223869167692, "grad_norm": 3.484375, "learning_rate": 4.6029361479848566e-05, "loss": 0.835, "step": 12998 }, { "epoch": 0.1843507485703954, "grad_norm": 3.203125, "learning_rate": 4.60281531889824e-05, "loss": 0.8228, "step": 13000 }, { "epoch": 0.18437911022402162, "grad_norm": 3.0625, "learning_rate": 4.602694473016297e-05, "loss": 0.8422, "step": 13002 }, { "epoch": 0.18440747187764783, "grad_norm": 2.96875, "learning_rate": 4.602573610339993e-05, "loss": 0.8327, "step": 13004 }, { "epoch": 0.18443583353127405, "grad_norm": 2.859375, "learning_rate": 4.602452730870293e-05, "loss": 0.8415, "step": 13006 }, { "epoch": 0.18446419518490026, "grad_norm": 3.0, "learning_rate": 4.6023318346081636e-05, "loss": 0.7983, "step": 13008 }, { "epoch": 0.18449255683852647, "grad_norm": 3.34375, "learning_rate": 4.602210921554569e-05, "loss": 0.8707, "step": 13010 }, { "epoch": 0.18452091849215269, "grad_norm": 3.265625, "learning_rate": 4.602089991710475e-05, "loss": 0.8581, "step": 13012 }, { "epoch": 0.1845492801457789, "grad_norm": 3.203125, "learning_rate": 4.6019690450768493e-05, "loss": 0.8545, "step": 13014 }, { "epoch": 0.1845776417994051, "grad_norm": 3.40625, "learning_rate": 4.601848081654656e-05, "loss": 0.8699, "step": 13016 }, { "epoch": 0.18460600345303133, "grad_norm": 3.421875, "learning_rate": 4.6017271014448625e-05, "loss": 0.8446, "step": 13018 }, { "epoch": 0.18463436510665754, "grad_norm": 3.203125, "learning_rate": 4.6016061044484335e-05, "loss": 0.8681, "step": 13020 }, { "epoch": 0.18466272676028375, "grad_norm": 3.3125, "learning_rate": 4.601485090666337e-05, "loss": 0.8604, "step": 13022 }, { "epoch": 0.18469108841390997, "grad_norm": 2.796875, "learning_rate": 4.601364060099539e-05, "loss": 0.854, "step": 13024 }, { "epoch": 0.18471945006753618, "grad_norm": 2.953125, "learning_rate": 4.6012430127490055e-05, "loss": 0.8177, "step": 13026 }, { "epoch": 0.1847478117211624, "grad_norm": 3.234375, "learning_rate": 4.601121948615704e-05, "loss": 0.8287, "step": 13028 }, { "epoch": 0.1847761733747886, "grad_norm": 3.15625, "learning_rate": 4.6010008677006015e-05, "loss": 0.8152, "step": 13030 }, { "epoch": 0.18480453502841482, "grad_norm": 3.03125, "learning_rate": 4.6008797700046647e-05, "loss": 0.8466, "step": 13032 }, { "epoch": 0.18483289668204106, "grad_norm": 2.84375, "learning_rate": 4.6007586555288615e-05, "loss": 0.8034, "step": 13034 }, { "epoch": 0.18486125833566727, "grad_norm": 2.734375, "learning_rate": 4.600637524274158e-05, "loss": 0.8526, "step": 13036 }, { "epoch": 0.18488961998929349, "grad_norm": 2.859375, "learning_rate": 4.600516376241524e-05, "loss": 0.8214, "step": 13038 }, { "epoch": 0.1849179816429197, "grad_norm": 3.28125, "learning_rate": 4.600395211431925e-05, "loss": 0.8014, "step": 13040 }, { "epoch": 0.1849463432965459, "grad_norm": 3.109375, "learning_rate": 4.600274029846329e-05, "loss": 0.8617, "step": 13042 }, { "epoch": 0.18497470495017213, "grad_norm": 2.6875, "learning_rate": 4.6001528314857034e-05, "loss": 0.7872, "step": 13044 }, { "epoch": 0.18500306660379834, "grad_norm": 3.109375, "learning_rate": 4.600031616351018e-05, "loss": 0.8183, "step": 13046 }, { "epoch": 0.18503142825742455, "grad_norm": 3.078125, "learning_rate": 4.59991038444324e-05, "loss": 0.802, "step": 13048 }, { "epoch": 0.18505978991105076, "grad_norm": 3.046875, "learning_rate": 4.5997891357633374e-05, "loss": 0.7783, "step": 13050 }, { "epoch": 0.18508815156467698, "grad_norm": 2.96875, "learning_rate": 4.5996678703122794e-05, "loss": 0.8105, "step": 13052 }, { "epoch": 0.1851165132183032, "grad_norm": 3.140625, "learning_rate": 4.5995465880910336e-05, "loss": 0.8528, "step": 13054 }, { "epoch": 0.1851448748719294, "grad_norm": 2.859375, "learning_rate": 4.59942528910057e-05, "loss": 0.8125, "step": 13056 }, { "epoch": 0.18517323652555562, "grad_norm": 2.734375, "learning_rate": 4.599303973341856e-05, "loss": 0.8686, "step": 13058 }, { "epoch": 0.18520159817918183, "grad_norm": 3.1875, "learning_rate": 4.5991826408158614e-05, "loss": 0.7863, "step": 13060 }, { "epoch": 0.18522995983280804, "grad_norm": 3.265625, "learning_rate": 4.5990612915235545e-05, "loss": 0.8668, "step": 13062 }, { "epoch": 0.18525832148643426, "grad_norm": 2.765625, "learning_rate": 4.598939925465905e-05, "loss": 0.8114, "step": 13064 }, { "epoch": 0.18528668314006047, "grad_norm": 3.265625, "learning_rate": 4.5988185426438826e-05, "loss": 0.8909, "step": 13066 }, { "epoch": 0.18531504479368668, "grad_norm": 3.09375, "learning_rate": 4.5986971430584556e-05, "loss": 0.8368, "step": 13068 }, { "epoch": 0.1853434064473129, "grad_norm": 3.375, "learning_rate": 4.598575726710596e-05, "loss": 0.8269, "step": 13070 }, { "epoch": 0.18537176810093914, "grad_norm": 3.421875, "learning_rate": 4.5984542936012716e-05, "loss": 0.7916, "step": 13072 }, { "epoch": 0.18540012975456535, "grad_norm": 2.921875, "learning_rate": 4.5983328437314526e-05, "loss": 0.8511, "step": 13074 }, { "epoch": 0.18542849140819156, "grad_norm": 2.875, "learning_rate": 4.5982113771021095e-05, "loss": 0.8668, "step": 13076 }, { "epoch": 0.18545685306181778, "grad_norm": 3.328125, "learning_rate": 4.598089893714212e-05, "loss": 0.8231, "step": 13078 }, { "epoch": 0.185485214715444, "grad_norm": 3.171875, "learning_rate": 4.597968393568731e-05, "loss": 0.8385, "step": 13080 }, { "epoch": 0.1855135763690702, "grad_norm": 3.25, "learning_rate": 4.597846876666635e-05, "loss": 0.8211, "step": 13082 }, { "epoch": 0.18554193802269642, "grad_norm": 3.140625, "learning_rate": 4.597725343008898e-05, "loss": 0.8548, "step": 13084 }, { "epoch": 0.18557029967632263, "grad_norm": 3.203125, "learning_rate": 4.5976037925964874e-05, "loss": 0.8151, "step": 13086 }, { "epoch": 0.18559866132994884, "grad_norm": 3.140625, "learning_rate": 4.597482225430376e-05, "loss": 0.8955, "step": 13088 }, { "epoch": 0.18562702298357506, "grad_norm": 3.25, "learning_rate": 4.597360641511534e-05, "loss": 0.8102, "step": 13090 }, { "epoch": 0.18565538463720127, "grad_norm": 3.0625, "learning_rate": 4.597239040840933e-05, "loss": 0.8371, "step": 13092 }, { "epoch": 0.18568374629082748, "grad_norm": 3.171875, "learning_rate": 4.5971174234195435e-05, "loss": 0.8388, "step": 13094 }, { "epoch": 0.1857121079444537, "grad_norm": 3.265625, "learning_rate": 4.596995789248337e-05, "loss": 0.8481, "step": 13096 }, { "epoch": 0.1857404695980799, "grad_norm": 2.90625, "learning_rate": 4.596874138328287e-05, "loss": 0.7828, "step": 13098 }, { "epoch": 0.18576883125170612, "grad_norm": 2.890625, "learning_rate": 4.5967524706603615e-05, "loss": 0.8554, "step": 13100 }, { "epoch": 0.18579719290533234, "grad_norm": 3.03125, "learning_rate": 4.5966307862455344e-05, "loss": 0.8659, "step": 13102 }, { "epoch": 0.18582555455895855, "grad_norm": 3.53125, "learning_rate": 4.596509085084778e-05, "loss": 0.848, "step": 13104 }, { "epoch": 0.18585391621258476, "grad_norm": 3.203125, "learning_rate": 4.596387367179063e-05, "loss": 0.8382, "step": 13106 }, { "epoch": 0.18588227786621098, "grad_norm": 2.953125, "learning_rate": 4.596265632529363e-05, "loss": 0.8609, "step": 13108 }, { "epoch": 0.18591063951983722, "grad_norm": 3.3125, "learning_rate": 4.5961438811366484e-05, "loss": 0.8494, "step": 13110 }, { "epoch": 0.18593900117346343, "grad_norm": 3.109375, "learning_rate": 4.5960221130018946e-05, "loss": 0.8323, "step": 13112 }, { "epoch": 0.18596736282708964, "grad_norm": 3.484375, "learning_rate": 4.595900328126071e-05, "loss": 0.8719, "step": 13114 }, { "epoch": 0.18599572448071586, "grad_norm": 3.03125, "learning_rate": 4.5957785265101514e-05, "loss": 0.8155, "step": 13116 }, { "epoch": 0.18602408613434207, "grad_norm": 2.984375, "learning_rate": 4.595656708155111e-05, "loss": 0.7966, "step": 13118 }, { "epoch": 0.18605244778796828, "grad_norm": 2.953125, "learning_rate": 4.595534873061919e-05, "loss": 0.8396, "step": 13120 }, { "epoch": 0.1860808094415945, "grad_norm": 2.640625, "learning_rate": 4.59541302123155e-05, "loss": 0.7848, "step": 13122 }, { "epoch": 0.1861091710952207, "grad_norm": 2.609375, "learning_rate": 4.595291152664978e-05, "loss": 0.8058, "step": 13124 }, { "epoch": 0.18613753274884692, "grad_norm": 3.109375, "learning_rate": 4.595169267363175e-05, "loss": 0.8243, "step": 13126 }, { "epoch": 0.18616589440247314, "grad_norm": 3.09375, "learning_rate": 4.5950473653271165e-05, "loss": 0.8273, "step": 13128 }, { "epoch": 0.18619425605609935, "grad_norm": 2.984375, "learning_rate": 4.594925446557774e-05, "loss": 0.8088, "step": 13130 }, { "epoch": 0.18622261770972556, "grad_norm": 2.96875, "learning_rate": 4.5948035110561236e-05, "loss": 0.805, "step": 13132 }, { "epoch": 0.18625097936335178, "grad_norm": 3.296875, "learning_rate": 4.594681558823136e-05, "loss": 0.8682, "step": 13134 }, { "epoch": 0.186279341016978, "grad_norm": 3.015625, "learning_rate": 4.594559589859789e-05, "loss": 0.8321, "step": 13136 }, { "epoch": 0.1863077026706042, "grad_norm": 2.84375, "learning_rate": 4.594437604167053e-05, "loss": 0.8459, "step": 13138 }, { "epoch": 0.18633606432423042, "grad_norm": 3.484375, "learning_rate": 4.5943156017459054e-05, "loss": 0.8633, "step": 13140 }, { "epoch": 0.18636442597785663, "grad_norm": 3.046875, "learning_rate": 4.594193582597319e-05, "loss": 0.8347, "step": 13142 }, { "epoch": 0.18639278763148284, "grad_norm": 2.953125, "learning_rate": 4.594071546722269e-05, "loss": 0.8528, "step": 13144 }, { "epoch": 0.18642114928510906, "grad_norm": 2.921875, "learning_rate": 4.59394949412173e-05, "loss": 0.7814, "step": 13146 }, { "epoch": 0.1864495109387353, "grad_norm": 3.0625, "learning_rate": 4.593827424796676e-05, "loss": 0.8238, "step": 13148 }, { "epoch": 0.1864778725923615, "grad_norm": 3.203125, "learning_rate": 4.5937053387480835e-05, "loss": 0.8767, "step": 13150 }, { "epoch": 0.18650623424598772, "grad_norm": 2.90625, "learning_rate": 4.593583235976926e-05, "loss": 0.8338, "step": 13152 }, { "epoch": 0.18653459589961394, "grad_norm": 2.984375, "learning_rate": 4.59346111648418e-05, "loss": 0.8379, "step": 13154 }, { "epoch": 0.18656295755324015, "grad_norm": 2.59375, "learning_rate": 4.593338980270821e-05, "loss": 0.8138, "step": 13156 }, { "epoch": 0.18659131920686636, "grad_norm": 2.875, "learning_rate": 4.593216827337823e-05, "loss": 0.8153, "step": 13158 }, { "epoch": 0.18661968086049258, "grad_norm": 3.109375, "learning_rate": 4.593094657686162e-05, "loss": 0.852, "step": 13160 }, { "epoch": 0.1866480425141188, "grad_norm": 3.453125, "learning_rate": 4.592972471316815e-05, "loss": 0.8778, "step": 13162 }, { "epoch": 0.186676404167745, "grad_norm": 2.84375, "learning_rate": 4.592850268230758e-05, "loss": 0.8269, "step": 13164 }, { "epoch": 0.18670476582137122, "grad_norm": 2.953125, "learning_rate": 4.592728048428966e-05, "loss": 0.8166, "step": 13166 }, { "epoch": 0.18673312747499743, "grad_norm": 3.15625, "learning_rate": 4.592605811912415e-05, "loss": 0.8613, "step": 13168 }, { "epoch": 0.18676148912862364, "grad_norm": 3.09375, "learning_rate": 4.592483558682083e-05, "loss": 0.829, "step": 13170 }, { "epoch": 0.18678985078224986, "grad_norm": 2.953125, "learning_rate": 4.592361288738945e-05, "loss": 0.8649, "step": 13172 }, { "epoch": 0.18681821243587607, "grad_norm": 2.921875, "learning_rate": 4.592239002083976e-05, "loss": 0.8067, "step": 13174 }, { "epoch": 0.18684657408950228, "grad_norm": 3.015625, "learning_rate": 4.5921166987181566e-05, "loss": 0.8051, "step": 13176 }, { "epoch": 0.1868749357431285, "grad_norm": 3.296875, "learning_rate": 4.59199437864246e-05, "loss": 0.8701, "step": 13178 }, { "epoch": 0.1869032973967547, "grad_norm": 2.859375, "learning_rate": 4.591872041857865e-05, "loss": 0.8331, "step": 13180 }, { "epoch": 0.18693165905038092, "grad_norm": 2.9375, "learning_rate": 4.591749688365349e-05, "loss": 0.855, "step": 13182 }, { "epoch": 0.18696002070400713, "grad_norm": 3.296875, "learning_rate": 4.591627318165889e-05, "loss": 0.8416, "step": 13184 }, { "epoch": 0.18698838235763335, "grad_norm": 2.734375, "learning_rate": 4.591504931260462e-05, "loss": 0.8072, "step": 13186 }, { "epoch": 0.1870167440112596, "grad_norm": 2.828125, "learning_rate": 4.591382527650046e-05, "loss": 0.8383, "step": 13188 }, { "epoch": 0.1870451056648858, "grad_norm": 2.59375, "learning_rate": 4.5912601073356164e-05, "loss": 0.8402, "step": 13190 }, { "epoch": 0.18707346731851202, "grad_norm": 2.828125, "learning_rate": 4.591137670318155e-05, "loss": 0.857, "step": 13192 }, { "epoch": 0.18710182897213823, "grad_norm": 2.78125, "learning_rate": 4.591015216598637e-05, "loss": 0.8345, "step": 13194 }, { "epoch": 0.18713019062576444, "grad_norm": 3.0, "learning_rate": 4.590892746178039e-05, "loss": 0.8868, "step": 13196 }, { "epoch": 0.18715855227939066, "grad_norm": 3.15625, "learning_rate": 4.5907702590573434e-05, "loss": 0.8614, "step": 13198 }, { "epoch": 0.18718691393301687, "grad_norm": 3.015625, "learning_rate": 4.590647755237526e-05, "loss": 0.7963, "step": 13200 }, { "epoch": 0.18721527558664308, "grad_norm": 2.921875, "learning_rate": 4.590525234719565e-05, "loss": 0.8295, "step": 13202 }, { "epoch": 0.1872436372402693, "grad_norm": 3.140625, "learning_rate": 4.5904026975044404e-05, "loss": 0.8039, "step": 13204 }, { "epoch": 0.1872719988938955, "grad_norm": 3.203125, "learning_rate": 4.5902801435931286e-05, "loss": 0.8509, "step": 13206 }, { "epoch": 0.18730036054752172, "grad_norm": 3.140625, "learning_rate": 4.590157572986611e-05, "loss": 0.8388, "step": 13208 }, { "epoch": 0.18732872220114793, "grad_norm": 3.40625, "learning_rate": 4.590034985685865e-05, "loss": 0.8909, "step": 13210 }, { "epoch": 0.18735708385477415, "grad_norm": 2.9375, "learning_rate": 4.58991238169187e-05, "loss": 0.8036, "step": 13212 }, { "epoch": 0.18738544550840036, "grad_norm": 3.0625, "learning_rate": 4.589789761005606e-05, "loss": 0.8134, "step": 13214 }, { "epoch": 0.18741380716202657, "grad_norm": 3.046875, "learning_rate": 4.589667123628051e-05, "loss": 0.8743, "step": 13216 }, { "epoch": 0.1874421688156528, "grad_norm": 3.46875, "learning_rate": 4.5895444695601855e-05, "loss": 0.8436, "step": 13218 }, { "epoch": 0.187470530469279, "grad_norm": 3.40625, "learning_rate": 4.5894217988029894e-05, "loss": 0.8654, "step": 13220 }, { "epoch": 0.18749889212290521, "grad_norm": 2.984375, "learning_rate": 4.589299111357441e-05, "loss": 0.8007, "step": 13222 }, { "epoch": 0.18752725377653143, "grad_norm": 2.9375, "learning_rate": 4.5891764072245224e-05, "loss": 0.8077, "step": 13224 }, { "epoch": 0.18755561543015767, "grad_norm": 3.265625, "learning_rate": 4.5890536864052125e-05, "loss": 0.7967, "step": 13226 }, { "epoch": 0.18758397708378388, "grad_norm": 2.984375, "learning_rate": 4.588930948900491e-05, "loss": 0.8254, "step": 13228 }, { "epoch": 0.1876123387374101, "grad_norm": 2.71875, "learning_rate": 4.5888081947113387e-05, "loss": 0.8194, "step": 13230 }, { "epoch": 0.1876407003910363, "grad_norm": 3.265625, "learning_rate": 4.5886854238387364e-05, "loss": 0.819, "step": 13232 }, { "epoch": 0.18766906204466252, "grad_norm": 3.125, "learning_rate": 4.588562636283664e-05, "loss": 0.8626, "step": 13234 }, { "epoch": 0.18769742369828873, "grad_norm": 2.90625, "learning_rate": 4.5884398320471025e-05, "loss": 0.8365, "step": 13236 }, { "epoch": 0.18772578535191495, "grad_norm": 3.375, "learning_rate": 4.5883170111300325e-05, "loss": 0.8916, "step": 13238 }, { "epoch": 0.18775414700554116, "grad_norm": 3.0625, "learning_rate": 4.588194173533436e-05, "loss": 0.8426, "step": 13240 }, { "epoch": 0.18778250865916737, "grad_norm": 3.359375, "learning_rate": 4.588071319258293e-05, "loss": 0.8109, "step": 13242 }, { "epoch": 0.1878108703127936, "grad_norm": 3.359375, "learning_rate": 4.5879484483055846e-05, "loss": 0.8049, "step": 13244 }, { "epoch": 0.1878392319664198, "grad_norm": 3.0625, "learning_rate": 4.5878255606762925e-05, "loss": 0.8489, "step": 13246 }, { "epoch": 0.18786759362004601, "grad_norm": 3.015625, "learning_rate": 4.587702656371399e-05, "loss": 0.864, "step": 13248 }, { "epoch": 0.18789595527367223, "grad_norm": 3.203125, "learning_rate": 4.587579735391885e-05, "loss": 0.8556, "step": 13250 }, { "epoch": 0.18792431692729844, "grad_norm": 2.96875, "learning_rate": 4.5874567977387326e-05, "loss": 0.7831, "step": 13252 }, { "epoch": 0.18795267858092465, "grad_norm": 3.21875, "learning_rate": 4.587333843412923e-05, "loss": 0.8611, "step": 13254 }, { "epoch": 0.18798104023455087, "grad_norm": 3.28125, "learning_rate": 4.587210872415439e-05, "loss": 0.8093, "step": 13256 }, { "epoch": 0.18800940188817708, "grad_norm": 3.03125, "learning_rate": 4.587087884747263e-05, "loss": 0.8137, "step": 13258 }, { "epoch": 0.1880377635418033, "grad_norm": 2.9375, "learning_rate": 4.586964880409377e-05, "loss": 0.8307, "step": 13260 }, { "epoch": 0.1880661251954295, "grad_norm": 3.453125, "learning_rate": 4.586841859402763e-05, "loss": 0.8623, "step": 13262 }, { "epoch": 0.18809448684905575, "grad_norm": 3.3125, "learning_rate": 4.586718821728403e-05, "loss": 0.8325, "step": 13264 }, { "epoch": 0.18812284850268196, "grad_norm": 2.9375, "learning_rate": 4.5865957673872816e-05, "loss": 0.8541, "step": 13266 }, { "epoch": 0.18815121015630817, "grad_norm": 3.546875, "learning_rate": 4.5864726963803804e-05, "loss": 0.8937, "step": 13268 }, { "epoch": 0.1881795718099344, "grad_norm": 2.84375, "learning_rate": 4.586349608708682e-05, "loss": 0.7934, "step": 13270 }, { "epoch": 0.1882079334635606, "grad_norm": 3.21875, "learning_rate": 4.586226504373171e-05, "loss": 0.8646, "step": 13272 }, { "epoch": 0.1882362951171868, "grad_norm": 2.6875, "learning_rate": 4.5861033833748296e-05, "loss": 0.8557, "step": 13274 }, { "epoch": 0.18826465677081303, "grad_norm": 2.8125, "learning_rate": 4.585980245714641e-05, "loss": 0.8433, "step": 13276 }, { "epoch": 0.18829301842443924, "grad_norm": 2.84375, "learning_rate": 4.585857091393589e-05, "loss": 0.886, "step": 13278 }, { "epoch": 0.18832138007806545, "grad_norm": 3.140625, "learning_rate": 4.585733920412658e-05, "loss": 0.7842, "step": 13280 }, { "epoch": 0.18834974173169167, "grad_norm": 2.890625, "learning_rate": 4.5856107327728305e-05, "loss": 0.8127, "step": 13282 }, { "epoch": 0.18837810338531788, "grad_norm": 3.28125, "learning_rate": 4.585487528475091e-05, "loss": 0.8308, "step": 13284 }, { "epoch": 0.1884064650389441, "grad_norm": 3.09375, "learning_rate": 4.5853643075204235e-05, "loss": 0.8094, "step": 13286 }, { "epoch": 0.1884348266925703, "grad_norm": 3.296875, "learning_rate": 4.585241069909812e-05, "loss": 0.8379, "step": 13288 }, { "epoch": 0.18846318834619652, "grad_norm": 3.0625, "learning_rate": 4.585117815644241e-05, "loss": 0.8627, "step": 13290 }, { "epoch": 0.18849154999982273, "grad_norm": 2.984375, "learning_rate": 4.584994544724695e-05, "loss": 0.8446, "step": 13292 }, { "epoch": 0.18851991165344895, "grad_norm": 2.9375, "learning_rate": 4.584871257152158e-05, "loss": 0.8755, "step": 13294 }, { "epoch": 0.18854827330707516, "grad_norm": 2.90625, "learning_rate": 4.584747952927617e-05, "loss": 0.8244, "step": 13296 }, { "epoch": 0.18857663496070137, "grad_norm": 3.109375, "learning_rate": 4.5846246320520534e-05, "loss": 0.8497, "step": 13298 }, { "epoch": 0.18860499661432759, "grad_norm": 2.9375, "learning_rate": 4.584501294526454e-05, "loss": 0.8321, "step": 13300 }, { "epoch": 0.18863335826795383, "grad_norm": 2.765625, "learning_rate": 4.584377940351804e-05, "loss": 0.8601, "step": 13302 }, { "epoch": 0.18866171992158004, "grad_norm": 3.09375, "learning_rate": 4.584254569529088e-05, "loss": 0.8426, "step": 13304 }, { "epoch": 0.18869008157520625, "grad_norm": 3.0, "learning_rate": 4.584131182059292e-05, "loss": 0.8461, "step": 13306 }, { "epoch": 0.18871844322883247, "grad_norm": 3.0, "learning_rate": 4.584007777943402e-05, "loss": 0.832, "step": 13308 }, { "epoch": 0.18874680488245868, "grad_norm": 2.984375, "learning_rate": 4.583884357182402e-05, "loss": 0.8127, "step": 13310 }, { "epoch": 0.1887751665360849, "grad_norm": 3.203125, "learning_rate": 4.583760919777279e-05, "loss": 0.85, "step": 13312 }, { "epoch": 0.1888035281897111, "grad_norm": 3.15625, "learning_rate": 4.5836374657290184e-05, "loss": 0.7969, "step": 13314 }, { "epoch": 0.18883188984333732, "grad_norm": 2.921875, "learning_rate": 4.583513995038606e-05, "loss": 0.8127, "step": 13316 }, { "epoch": 0.18886025149696353, "grad_norm": 2.90625, "learning_rate": 4.58339050770703e-05, "loss": 0.7802, "step": 13318 }, { "epoch": 0.18888861315058975, "grad_norm": 2.921875, "learning_rate": 4.583267003735273e-05, "loss": 0.85, "step": 13320 }, { "epoch": 0.18891697480421596, "grad_norm": 3.453125, "learning_rate": 4.583143483124324e-05, "loss": 0.9022, "step": 13322 }, { "epoch": 0.18894533645784217, "grad_norm": 3.171875, "learning_rate": 4.583019945875169e-05, "loss": 0.8715, "step": 13324 }, { "epoch": 0.18897369811146839, "grad_norm": 3.109375, "learning_rate": 4.5828963919887955e-05, "loss": 0.7876, "step": 13326 }, { "epoch": 0.1890020597650946, "grad_norm": 3.3125, "learning_rate": 4.582772821466189e-05, "loss": 0.7808, "step": 13328 }, { "epoch": 0.1890304214187208, "grad_norm": 3.15625, "learning_rate": 4.5826492343083374e-05, "loss": 0.9097, "step": 13330 }, { "epoch": 0.18905878307234703, "grad_norm": 3.0, "learning_rate": 4.582525630516227e-05, "loss": 0.7945, "step": 13332 }, { "epoch": 0.18908714472597324, "grad_norm": 2.9375, "learning_rate": 4.582402010090845e-05, "loss": 0.8638, "step": 13334 }, { "epoch": 0.18911550637959945, "grad_norm": 2.984375, "learning_rate": 4.582278373033181e-05, "loss": 0.8251, "step": 13336 }, { "epoch": 0.18914386803322566, "grad_norm": 3.3125, "learning_rate": 4.5821547193442196e-05, "loss": 0.9171, "step": 13338 }, { "epoch": 0.18917222968685188, "grad_norm": 3.0, "learning_rate": 4.5820310490249497e-05, "loss": 0.7947, "step": 13340 }, { "epoch": 0.18920059134047812, "grad_norm": 3.15625, "learning_rate": 4.581907362076359e-05, "loss": 0.8604, "step": 13342 }, { "epoch": 0.18922895299410433, "grad_norm": 3.28125, "learning_rate": 4.581783658499435e-05, "loss": 0.8572, "step": 13344 }, { "epoch": 0.18925731464773055, "grad_norm": 2.90625, "learning_rate": 4.581659938295166e-05, "loss": 0.8633, "step": 13346 }, { "epoch": 0.18928567630135676, "grad_norm": 3.21875, "learning_rate": 4.581536201464541e-05, "loss": 0.8596, "step": 13348 }, { "epoch": 0.18931403795498297, "grad_norm": 2.890625, "learning_rate": 4.5814124480085465e-05, "loss": 0.8283, "step": 13350 }, { "epoch": 0.18934239960860919, "grad_norm": 3.5625, "learning_rate": 4.581288677928173e-05, "loss": 0.8768, "step": 13352 }, { "epoch": 0.1893707612622354, "grad_norm": 3.40625, "learning_rate": 4.5811648912244074e-05, "loss": 0.859, "step": 13354 }, { "epoch": 0.1893991229158616, "grad_norm": 3.40625, "learning_rate": 4.581041087898239e-05, "loss": 0.8265, "step": 13356 }, { "epoch": 0.18942748456948783, "grad_norm": 3.0625, "learning_rate": 4.5809172679506566e-05, "loss": 0.8739, "step": 13358 }, { "epoch": 0.18945584622311404, "grad_norm": 3.046875, "learning_rate": 4.580793431382649e-05, "loss": 0.8043, "step": 13360 }, { "epoch": 0.18948420787674025, "grad_norm": 3.375, "learning_rate": 4.580669578195206e-05, "loss": 0.8729, "step": 13362 }, { "epoch": 0.18951256953036646, "grad_norm": 2.828125, "learning_rate": 4.580545708389317e-05, "loss": 0.8522, "step": 13364 }, { "epoch": 0.18954093118399268, "grad_norm": 3.0625, "learning_rate": 4.580421821965969e-05, "loss": 0.8505, "step": 13366 }, { "epoch": 0.1895692928376189, "grad_norm": 3.109375, "learning_rate": 4.580297918926154e-05, "loss": 0.841, "step": 13368 }, { "epoch": 0.1895976544912451, "grad_norm": 3.21875, "learning_rate": 4.5801739992708607e-05, "loss": 0.8498, "step": 13370 }, { "epoch": 0.18962601614487132, "grad_norm": 3.046875, "learning_rate": 4.580050063001079e-05, "loss": 0.834, "step": 13372 }, { "epoch": 0.18965437779849753, "grad_norm": 3.09375, "learning_rate": 4.5799261101177984e-05, "loss": 0.8731, "step": 13374 }, { "epoch": 0.18968273945212374, "grad_norm": 2.765625, "learning_rate": 4.579802140622009e-05, "loss": 0.8383, "step": 13376 }, { "epoch": 0.18971110110574996, "grad_norm": 3.078125, "learning_rate": 4.579678154514702e-05, "loss": 0.8743, "step": 13378 }, { "epoch": 0.1897394627593762, "grad_norm": 3.125, "learning_rate": 4.579554151796867e-05, "loss": 0.837, "step": 13380 }, { "epoch": 0.1897678244130024, "grad_norm": 3.296875, "learning_rate": 4.5794301324694934e-05, "loss": 0.8268, "step": 13382 }, { "epoch": 0.18979618606662862, "grad_norm": 3.09375, "learning_rate": 4.579306096533573e-05, "loss": 0.823, "step": 13384 }, { "epoch": 0.18982454772025484, "grad_norm": 3.078125, "learning_rate": 4.579182043990096e-05, "loss": 0.8011, "step": 13386 }, { "epoch": 0.18985290937388105, "grad_norm": 3.75, "learning_rate": 4.579057974840054e-05, "loss": 0.8512, "step": 13388 }, { "epoch": 0.18988127102750726, "grad_norm": 3.109375, "learning_rate": 4.578933889084436e-05, "loss": 0.8128, "step": 13390 }, { "epoch": 0.18990963268113348, "grad_norm": 2.875, "learning_rate": 4.5788097867242355e-05, "loss": 0.828, "step": 13392 }, { "epoch": 0.1899379943347597, "grad_norm": 3.125, "learning_rate": 4.5786856677604416e-05, "loss": 0.8378, "step": 13394 }, { "epoch": 0.1899663559883859, "grad_norm": 2.8125, "learning_rate": 4.578561532194048e-05, "loss": 0.8487, "step": 13396 }, { "epoch": 0.18999471764201212, "grad_norm": 3.09375, "learning_rate": 4.578437380026043e-05, "loss": 0.7965, "step": 13398 }, { "epoch": 0.19002307929563833, "grad_norm": 3.0625, "learning_rate": 4.578313211257421e-05, "loss": 0.8712, "step": 13400 }, { "epoch": 0.19005144094926454, "grad_norm": 3.125, "learning_rate": 4.578189025889173e-05, "loss": 0.8758, "step": 13402 }, { "epoch": 0.19007980260289076, "grad_norm": 2.671875, "learning_rate": 4.57806482392229e-05, "loss": 0.835, "step": 13404 }, { "epoch": 0.19010816425651697, "grad_norm": 3.015625, "learning_rate": 4.5779406053577653e-05, "loss": 0.8259, "step": 13406 }, { "epoch": 0.19013652591014318, "grad_norm": 3.59375, "learning_rate": 4.5778163701965904e-05, "loss": 0.8892, "step": 13408 }, { "epoch": 0.1901648875637694, "grad_norm": 3.03125, "learning_rate": 4.577692118439757e-05, "loss": 0.8482, "step": 13410 }, { "epoch": 0.1901932492173956, "grad_norm": 3.078125, "learning_rate": 4.577567850088258e-05, "loss": 0.8033, "step": 13412 }, { "epoch": 0.19022161087102182, "grad_norm": 3.765625, "learning_rate": 4.577443565143087e-05, "loss": 0.8294, "step": 13414 }, { "epoch": 0.19024997252464804, "grad_norm": 3.078125, "learning_rate": 4.577319263605235e-05, "loss": 0.8551, "step": 13416 }, { "epoch": 0.19027833417827428, "grad_norm": 2.875, "learning_rate": 4.5771949454756955e-05, "loss": 0.855, "step": 13418 }, { "epoch": 0.1903066958319005, "grad_norm": 3.125, "learning_rate": 4.577070610755461e-05, "loss": 0.8671, "step": 13420 }, { "epoch": 0.1903350574855267, "grad_norm": 2.890625, "learning_rate": 4.5769462594455256e-05, "loss": 0.9328, "step": 13422 }, { "epoch": 0.19036341913915292, "grad_norm": 3.203125, "learning_rate": 4.576821891546882e-05, "loss": 0.8645, "step": 13424 }, { "epoch": 0.19039178079277913, "grad_norm": 3.0, "learning_rate": 4.5766975070605227e-05, "loss": 0.838, "step": 13426 }, { "epoch": 0.19042014244640534, "grad_norm": 3.21875, "learning_rate": 4.576573105987442e-05, "loss": 0.8534, "step": 13428 }, { "epoch": 0.19044850410003156, "grad_norm": 3.015625, "learning_rate": 4.576448688328634e-05, "loss": 0.8261, "step": 13430 }, { "epoch": 0.19047686575365777, "grad_norm": 3.234375, "learning_rate": 4.576324254085092e-05, "loss": 0.8201, "step": 13432 }, { "epoch": 0.19050522740728398, "grad_norm": 3.421875, "learning_rate": 4.576199803257809e-05, "loss": 0.8686, "step": 13434 }, { "epoch": 0.1905335890609102, "grad_norm": 3.25, "learning_rate": 4.5760753358477796e-05, "loss": 0.8248, "step": 13436 }, { "epoch": 0.1905619507145364, "grad_norm": 2.859375, "learning_rate": 4.5759508518559984e-05, "loss": 0.8624, "step": 13438 }, { "epoch": 0.19059031236816262, "grad_norm": 2.984375, "learning_rate": 4.575826351283459e-05, "loss": 0.794, "step": 13440 }, { "epoch": 0.19061867402178884, "grad_norm": 3.21875, "learning_rate": 4.5757018341311565e-05, "loss": 0.8272, "step": 13442 }, { "epoch": 0.19064703567541505, "grad_norm": 3.265625, "learning_rate": 4.575577300400085e-05, "loss": 0.8793, "step": 13444 }, { "epoch": 0.19067539732904126, "grad_norm": 2.828125, "learning_rate": 4.5754527500912386e-05, "loss": 0.8291, "step": 13446 }, { "epoch": 0.19070375898266748, "grad_norm": 2.640625, "learning_rate": 4.5753281832056127e-05, "loss": 0.8333, "step": 13448 }, { "epoch": 0.1907321206362937, "grad_norm": 3.109375, "learning_rate": 4.5752035997442025e-05, "loss": 0.8739, "step": 13450 }, { "epoch": 0.1907604822899199, "grad_norm": 3.296875, "learning_rate": 4.5750789997080035e-05, "loss": 0.7886, "step": 13452 }, { "epoch": 0.19078884394354612, "grad_norm": 3.15625, "learning_rate": 4.574954383098009e-05, "loss": 0.8612, "step": 13454 }, { "epoch": 0.19081720559717236, "grad_norm": 2.984375, "learning_rate": 4.574829749915216e-05, "loss": 0.8282, "step": 13456 }, { "epoch": 0.19084556725079857, "grad_norm": 3.171875, "learning_rate": 4.574705100160619e-05, "loss": 0.885, "step": 13458 }, { "epoch": 0.19087392890442478, "grad_norm": 3.03125, "learning_rate": 4.574580433835214e-05, "loss": 0.7724, "step": 13460 }, { "epoch": 0.190902290558051, "grad_norm": 2.84375, "learning_rate": 4.574455750939997e-05, "loss": 0.8258, "step": 13462 }, { "epoch": 0.1909306522116772, "grad_norm": 2.65625, "learning_rate": 4.574331051475963e-05, "loss": 0.7976, "step": 13464 }, { "epoch": 0.19095901386530342, "grad_norm": 2.9375, "learning_rate": 4.574206335444109e-05, "loss": 0.8289, "step": 13466 }, { "epoch": 0.19098737551892964, "grad_norm": 3.109375, "learning_rate": 4.57408160284543e-05, "loss": 0.8247, "step": 13468 }, { "epoch": 0.19101573717255585, "grad_norm": 3.171875, "learning_rate": 4.573956853680924e-05, "loss": 0.8351, "step": 13470 }, { "epoch": 0.19104409882618206, "grad_norm": 3.078125, "learning_rate": 4.573832087951586e-05, "loss": 0.8374, "step": 13472 }, { "epoch": 0.19107246047980828, "grad_norm": 3.15625, "learning_rate": 4.5737073056584124e-05, "loss": 0.8595, "step": 13474 }, { "epoch": 0.1911008221334345, "grad_norm": 3.09375, "learning_rate": 4.5735825068024e-05, "loss": 0.7851, "step": 13476 }, { "epoch": 0.1911291837870607, "grad_norm": 3.140625, "learning_rate": 4.5734576913845465e-05, "loss": 0.8746, "step": 13478 }, { "epoch": 0.19115754544068692, "grad_norm": 3.234375, "learning_rate": 4.5733328594058475e-05, "loss": 0.7864, "step": 13480 }, { "epoch": 0.19118590709431313, "grad_norm": 3.59375, "learning_rate": 4.5732080108673007e-05, "loss": 0.8418, "step": 13482 }, { "epoch": 0.19121426874793934, "grad_norm": 3.34375, "learning_rate": 4.573083145769904e-05, "loss": 0.8358, "step": 13484 }, { "epoch": 0.19124263040156556, "grad_norm": 3.265625, "learning_rate": 4.572958264114653e-05, "loss": 0.8298, "step": 13486 }, { "epoch": 0.19127099205519177, "grad_norm": 3.453125, "learning_rate": 4.572833365902547e-05, "loss": 0.8836, "step": 13488 }, { "epoch": 0.19129935370881798, "grad_norm": 3.0625, "learning_rate": 4.572708451134582e-05, "loss": 0.8458, "step": 13490 }, { "epoch": 0.1913277153624442, "grad_norm": 3.28125, "learning_rate": 4.572583519811756e-05, "loss": 0.9055, "step": 13492 }, { "epoch": 0.1913560770160704, "grad_norm": 2.90625, "learning_rate": 4.572458571935068e-05, "loss": 0.771, "step": 13494 }, { "epoch": 0.19138443866969665, "grad_norm": 2.6875, "learning_rate": 4.5723336075055144e-05, "loss": 0.8415, "step": 13496 }, { "epoch": 0.19141280032332286, "grad_norm": 3.21875, "learning_rate": 4.572208626524095e-05, "loss": 0.8388, "step": 13498 }, { "epoch": 0.19144116197694908, "grad_norm": 3.578125, "learning_rate": 4.572083628991807e-05, "loss": 0.8264, "step": 13500 }, { "epoch": 0.1914695236305753, "grad_norm": 3.265625, "learning_rate": 4.571958614909648e-05, "loss": 0.8362, "step": 13502 }, { "epoch": 0.1914978852842015, "grad_norm": 3.140625, "learning_rate": 4.5718335842786176e-05, "loss": 0.7967, "step": 13504 }, { "epoch": 0.19152624693782772, "grad_norm": 3.109375, "learning_rate": 4.571708537099714e-05, "loss": 0.8293, "step": 13506 }, { "epoch": 0.19155460859145393, "grad_norm": 4.28125, "learning_rate": 4.571583473373937e-05, "loss": 0.8175, "step": 13508 }, { "epoch": 0.19158297024508014, "grad_norm": 3.203125, "learning_rate": 4.5714583931022835e-05, "loss": 0.8071, "step": 13510 }, { "epoch": 0.19161133189870636, "grad_norm": 3.25, "learning_rate": 4.571333296285755e-05, "loss": 0.8241, "step": 13512 }, { "epoch": 0.19163969355233257, "grad_norm": 2.984375, "learning_rate": 4.571208182925348e-05, "loss": 0.802, "step": 13514 }, { "epoch": 0.19166805520595878, "grad_norm": 3.125, "learning_rate": 4.571083053022064e-05, "loss": 0.8576, "step": 13516 }, { "epoch": 0.191696416859585, "grad_norm": 3.109375, "learning_rate": 4.5709579065769e-05, "loss": 0.8417, "step": 13518 }, { "epoch": 0.1917247785132112, "grad_norm": 3.578125, "learning_rate": 4.570832743590859e-05, "loss": 0.9205, "step": 13520 }, { "epoch": 0.19175314016683742, "grad_norm": 3.1875, "learning_rate": 4.570707564064938e-05, "loss": 0.8363, "step": 13522 }, { "epoch": 0.19178150182046363, "grad_norm": 3.28125, "learning_rate": 4.570582368000137e-05, "loss": 0.798, "step": 13524 }, { "epoch": 0.19180986347408985, "grad_norm": 3.1875, "learning_rate": 4.5704571553974574e-05, "loss": 0.8799, "step": 13526 }, { "epoch": 0.19183822512771606, "grad_norm": 2.9375, "learning_rate": 4.570331926257898e-05, "loss": 0.7609, "step": 13528 }, { "epoch": 0.19186658678134227, "grad_norm": 3.171875, "learning_rate": 4.5702066805824596e-05, "loss": 0.8474, "step": 13530 }, { "epoch": 0.1918949484349685, "grad_norm": 2.921875, "learning_rate": 4.570081418372142e-05, "loss": 0.8562, "step": 13532 }, { "epoch": 0.19192331008859473, "grad_norm": 2.875, "learning_rate": 4.5699561396279464e-05, "loss": 0.8165, "step": 13534 }, { "epoch": 0.19195167174222094, "grad_norm": 2.9375, "learning_rate": 4.569830844350873e-05, "loss": 0.8489, "step": 13536 }, { "epoch": 0.19198003339584715, "grad_norm": 3.515625, "learning_rate": 4.569705532541923e-05, "loss": 0.8313, "step": 13538 }, { "epoch": 0.19200839504947337, "grad_norm": 2.828125, "learning_rate": 4.5695802042020955e-05, "loss": 0.812, "step": 13540 }, { "epoch": 0.19203675670309958, "grad_norm": 3.25, "learning_rate": 4.569454859332394e-05, "loss": 0.839, "step": 13542 }, { "epoch": 0.1920651183567258, "grad_norm": 2.953125, "learning_rate": 4.5693294979338174e-05, "loss": 0.8769, "step": 13544 }, { "epoch": 0.192093480010352, "grad_norm": 3.09375, "learning_rate": 4.5692041200073696e-05, "loss": 0.8314, "step": 13546 }, { "epoch": 0.19212184166397822, "grad_norm": 3.203125, "learning_rate": 4.569078725554049e-05, "loss": 0.8544, "step": 13548 }, { "epoch": 0.19215020331760443, "grad_norm": 3.078125, "learning_rate": 4.56895331457486e-05, "loss": 0.8675, "step": 13550 }, { "epoch": 0.19217856497123065, "grad_norm": 3.09375, "learning_rate": 4.568827887070802e-05, "loss": 0.8087, "step": 13552 }, { "epoch": 0.19220692662485686, "grad_norm": 3.234375, "learning_rate": 4.568702443042878e-05, "loss": 0.82, "step": 13554 }, { "epoch": 0.19223528827848307, "grad_norm": 2.90625, "learning_rate": 4.56857698249209e-05, "loss": 0.8295, "step": 13556 }, { "epoch": 0.1922636499321093, "grad_norm": 3.1875, "learning_rate": 4.568451505419439e-05, "loss": 0.8292, "step": 13558 }, { "epoch": 0.1922920115857355, "grad_norm": 3.296875, "learning_rate": 4.568326011825927e-05, "loss": 0.8522, "step": 13560 }, { "epoch": 0.1923203732393617, "grad_norm": 3.28125, "learning_rate": 4.5682005017125584e-05, "loss": 0.8151, "step": 13562 }, { "epoch": 0.19234873489298793, "grad_norm": 3.15625, "learning_rate": 4.568074975080334e-05, "loss": 0.8472, "step": 13564 }, { "epoch": 0.19237709654661414, "grad_norm": 2.921875, "learning_rate": 4.5679494319302573e-05, "loss": 0.8505, "step": 13566 }, { "epoch": 0.19240545820024035, "grad_norm": 3.21875, "learning_rate": 4.567823872263331e-05, "loss": 0.7962, "step": 13568 }, { "epoch": 0.19243381985386657, "grad_norm": 2.640625, "learning_rate": 4.567698296080557e-05, "loss": 0.8168, "step": 13570 }, { "epoch": 0.1924621815074928, "grad_norm": 3.3125, "learning_rate": 4.5675727033829386e-05, "loss": 0.8491, "step": 13572 }, { "epoch": 0.19249054316111902, "grad_norm": 3.046875, "learning_rate": 4.567447094171479e-05, "loss": 0.8174, "step": 13574 }, { "epoch": 0.19251890481474523, "grad_norm": 3.28125, "learning_rate": 4.5673214684471814e-05, "loss": 0.869, "step": 13576 }, { "epoch": 0.19254726646837145, "grad_norm": 2.96875, "learning_rate": 4.56719582621105e-05, "loss": 0.868, "step": 13578 }, { "epoch": 0.19257562812199766, "grad_norm": 2.78125, "learning_rate": 4.567070167464087e-05, "loss": 0.8353, "step": 13580 }, { "epoch": 0.19260398977562387, "grad_norm": 3.5625, "learning_rate": 4.5669444922072965e-05, "loss": 0.848, "step": 13582 }, { "epoch": 0.1926323514292501, "grad_norm": 3.484375, "learning_rate": 4.5668188004416834e-05, "loss": 0.8841, "step": 13584 }, { "epoch": 0.1926607130828763, "grad_norm": 3.4375, "learning_rate": 4.5666930921682507e-05, "loss": 0.8356, "step": 13586 }, { "epoch": 0.1926890747365025, "grad_norm": 3.03125, "learning_rate": 4.566567367388002e-05, "loss": 0.8326, "step": 13588 }, { "epoch": 0.19271743639012873, "grad_norm": 3.171875, "learning_rate": 4.5664416261019416e-05, "loss": 0.8111, "step": 13590 }, { "epoch": 0.19274579804375494, "grad_norm": 2.984375, "learning_rate": 4.566315868311074e-05, "loss": 0.867, "step": 13592 }, { "epoch": 0.19277415969738115, "grad_norm": 2.96875, "learning_rate": 4.566190094016404e-05, "loss": 0.8006, "step": 13594 }, { "epoch": 0.19280252135100737, "grad_norm": 2.859375, "learning_rate": 4.5660643032189365e-05, "loss": 0.8367, "step": 13596 }, { "epoch": 0.19283088300463358, "grad_norm": 3.1875, "learning_rate": 4.565938495919675e-05, "loss": 0.7735, "step": 13598 }, { "epoch": 0.1928592446582598, "grad_norm": 3.375, "learning_rate": 4.565812672119625e-05, "loss": 0.9016, "step": 13600 }, { "epoch": 0.192887606311886, "grad_norm": 2.90625, "learning_rate": 4.5656868318197914e-05, "loss": 0.8102, "step": 13602 }, { "epoch": 0.19291596796551222, "grad_norm": 3.296875, "learning_rate": 4.565560975021179e-05, "loss": 0.8075, "step": 13604 }, { "epoch": 0.19294432961913843, "grad_norm": 2.96875, "learning_rate": 4.565435101724794e-05, "loss": 0.8305, "step": 13606 }, { "epoch": 0.19297269127276465, "grad_norm": 3.109375, "learning_rate": 4.565309211931641e-05, "loss": 0.8243, "step": 13608 }, { "epoch": 0.1930010529263909, "grad_norm": 3.15625, "learning_rate": 4.5651833056427254e-05, "loss": 0.8478, "step": 13610 }, { "epoch": 0.1930294145800171, "grad_norm": 2.765625, "learning_rate": 4.5650573828590525e-05, "loss": 0.8312, "step": 13612 }, { "epoch": 0.1930577762336433, "grad_norm": 3.015625, "learning_rate": 4.5649314435816295e-05, "loss": 0.9103, "step": 13614 }, { "epoch": 0.19308613788726953, "grad_norm": 3.1875, "learning_rate": 4.5648054878114606e-05, "loss": 0.8661, "step": 13616 }, { "epoch": 0.19311449954089574, "grad_norm": 3.203125, "learning_rate": 4.564679515549553e-05, "loss": 0.8761, "step": 13618 }, { "epoch": 0.19314286119452195, "grad_norm": 3.109375, "learning_rate": 4.5645535267969116e-05, "loss": 0.7999, "step": 13620 }, { "epoch": 0.19317122284814817, "grad_norm": 2.796875, "learning_rate": 4.564427521554544e-05, "loss": 0.8543, "step": 13622 }, { "epoch": 0.19319958450177438, "grad_norm": 2.734375, "learning_rate": 4.564301499823456e-05, "loss": 0.8345, "step": 13624 }, { "epoch": 0.1932279461554006, "grad_norm": 2.828125, "learning_rate": 4.564175461604654e-05, "loss": 0.8269, "step": 13626 }, { "epoch": 0.1932563078090268, "grad_norm": 3.4375, "learning_rate": 4.564049406899146e-05, "loss": 0.9168, "step": 13628 }, { "epoch": 0.19328466946265302, "grad_norm": 3.21875, "learning_rate": 4.563923335707936e-05, "loss": 0.8391, "step": 13630 }, { "epoch": 0.19331303111627923, "grad_norm": 3.203125, "learning_rate": 4.563797248032034e-05, "loss": 0.8275, "step": 13632 }, { "epoch": 0.19334139276990545, "grad_norm": 3.125, "learning_rate": 4.563671143872445e-05, "loss": 0.8301, "step": 13634 }, { "epoch": 0.19336975442353166, "grad_norm": 3.09375, "learning_rate": 4.563545023230177e-05, "loss": 0.8992, "step": 13636 }, { "epoch": 0.19339811607715787, "grad_norm": 3.09375, "learning_rate": 4.563418886106238e-05, "loss": 0.8563, "step": 13638 }, { "epoch": 0.19342647773078409, "grad_norm": 3.234375, "learning_rate": 4.563292732501634e-05, "loss": 0.8374, "step": 13640 }, { "epoch": 0.1934548393844103, "grad_norm": 3.0, "learning_rate": 4.563166562417374e-05, "loss": 0.83, "step": 13642 }, { "epoch": 0.1934832010380365, "grad_norm": 2.796875, "learning_rate": 4.5630403758544646e-05, "loss": 0.8717, "step": 13644 }, { "epoch": 0.19351156269166273, "grad_norm": 3.09375, "learning_rate": 4.562914172813913e-05, "loss": 0.8788, "step": 13646 }, { "epoch": 0.19353992434528894, "grad_norm": 3.328125, "learning_rate": 4.56278795329673e-05, "loss": 0.8499, "step": 13648 }, { "epoch": 0.19356828599891518, "grad_norm": 3.234375, "learning_rate": 4.562661717303921e-05, "loss": 0.7898, "step": 13650 }, { "epoch": 0.1935966476525414, "grad_norm": 3.40625, "learning_rate": 4.562535464836496e-05, "loss": 0.873, "step": 13652 }, { "epoch": 0.1936250093061676, "grad_norm": 3.375, "learning_rate": 4.562409195895463e-05, "loss": 0.8257, "step": 13654 }, { "epoch": 0.19365337095979382, "grad_norm": 3.265625, "learning_rate": 4.5622829104818287e-05, "loss": 0.8552, "step": 13656 }, { "epoch": 0.19368173261342003, "grad_norm": 3.3125, "learning_rate": 4.562156608596604e-05, "loss": 0.8391, "step": 13658 }, { "epoch": 0.19371009426704625, "grad_norm": 2.8125, "learning_rate": 4.562030290240798e-05, "loss": 0.7875, "step": 13660 }, { "epoch": 0.19373845592067246, "grad_norm": 2.671875, "learning_rate": 4.561903955415417e-05, "loss": 0.796, "step": 13662 }, { "epoch": 0.19376681757429867, "grad_norm": 3.078125, "learning_rate": 4.561777604121472e-05, "loss": 0.8196, "step": 13664 }, { "epoch": 0.19379517922792489, "grad_norm": 3.078125, "learning_rate": 4.561651236359972e-05, "loss": 0.8133, "step": 13666 }, { "epoch": 0.1938235408815511, "grad_norm": 2.90625, "learning_rate": 4.5615248521319256e-05, "loss": 0.8728, "step": 13668 }, { "epoch": 0.1938519025351773, "grad_norm": 3.078125, "learning_rate": 4.5613984514383434e-05, "loss": 0.8194, "step": 13670 }, { "epoch": 0.19388026418880352, "grad_norm": 2.890625, "learning_rate": 4.561272034280234e-05, "loss": 0.812, "step": 13672 }, { "epoch": 0.19390862584242974, "grad_norm": 3.234375, "learning_rate": 4.5611456006586076e-05, "loss": 0.866, "step": 13674 }, { "epoch": 0.19393698749605595, "grad_norm": 3.03125, "learning_rate": 4.561019150574473e-05, "loss": 0.8266, "step": 13676 }, { "epoch": 0.19396534914968216, "grad_norm": 3.296875, "learning_rate": 4.560892684028841e-05, "loss": 0.8297, "step": 13678 }, { "epoch": 0.19399371080330838, "grad_norm": 2.90625, "learning_rate": 4.560766201022723e-05, "loss": 0.8646, "step": 13680 }, { "epoch": 0.1940220724569346, "grad_norm": 3.078125, "learning_rate": 4.560639701557127e-05, "loss": 0.8831, "step": 13682 }, { "epoch": 0.1940504341105608, "grad_norm": 2.9375, "learning_rate": 4.560513185633064e-05, "loss": 0.8065, "step": 13684 }, { "epoch": 0.19407879576418702, "grad_norm": 2.875, "learning_rate": 4.560386653251545e-05, "loss": 0.8051, "step": 13686 }, { "epoch": 0.19410715741781326, "grad_norm": 3.0, "learning_rate": 4.560260104413581e-05, "loss": 0.8603, "step": 13688 }, { "epoch": 0.19413551907143947, "grad_norm": 2.984375, "learning_rate": 4.560133539120181e-05, "loss": 0.8588, "step": 13690 }, { "epoch": 0.19416388072506569, "grad_norm": 3.078125, "learning_rate": 4.5600069573723577e-05, "loss": 0.8204, "step": 13692 }, { "epoch": 0.1941922423786919, "grad_norm": 2.796875, "learning_rate": 4.559880359171121e-05, "loss": 0.8171, "step": 13694 }, { "epoch": 0.1942206040323181, "grad_norm": 3.15625, "learning_rate": 4.5597537445174835e-05, "loss": 0.7763, "step": 13696 }, { "epoch": 0.19424896568594432, "grad_norm": 3.109375, "learning_rate": 4.559627113412454e-05, "loss": 0.8849, "step": 13698 }, { "epoch": 0.19427732733957054, "grad_norm": 3.078125, "learning_rate": 4.559500465857046e-05, "loss": 0.8396, "step": 13700 }, { "epoch": 0.19430568899319675, "grad_norm": 2.90625, "learning_rate": 4.559373801852271e-05, "loss": 0.8361, "step": 13702 }, { "epoch": 0.19433405064682296, "grad_norm": 2.921875, "learning_rate": 4.55924712139914e-05, "loss": 0.7922, "step": 13704 }, { "epoch": 0.19436241230044918, "grad_norm": 2.703125, "learning_rate": 4.5591204244986644e-05, "loss": 0.8029, "step": 13706 }, { "epoch": 0.1943907739540754, "grad_norm": 3.140625, "learning_rate": 4.558993711151856e-05, "loss": 0.8688, "step": 13708 }, { "epoch": 0.1944191356077016, "grad_norm": 2.9375, "learning_rate": 4.5588669813597285e-05, "loss": 0.8106, "step": 13710 }, { "epoch": 0.19444749726132782, "grad_norm": 2.953125, "learning_rate": 4.558740235123292e-05, "loss": 0.7667, "step": 13712 }, { "epoch": 0.19447585891495403, "grad_norm": 3.359375, "learning_rate": 4.5586134724435614e-05, "loss": 0.8804, "step": 13714 }, { "epoch": 0.19450422056858024, "grad_norm": 2.953125, "learning_rate": 4.5584866933215465e-05, "loss": 0.8249, "step": 13716 }, { "epoch": 0.19453258222220646, "grad_norm": 3.234375, "learning_rate": 4.558359897758262e-05, "loss": 0.861, "step": 13718 }, { "epoch": 0.19456094387583267, "grad_norm": 3.09375, "learning_rate": 4.558233085754719e-05, "loss": 0.8596, "step": 13720 }, { "epoch": 0.19458930552945888, "grad_norm": 3.609375, "learning_rate": 4.558106257311932e-05, "loss": 0.8829, "step": 13722 }, { "epoch": 0.1946176671830851, "grad_norm": 3.203125, "learning_rate": 4.5579794124309116e-05, "loss": 0.8759, "step": 13724 }, { "epoch": 0.19464602883671134, "grad_norm": 3.09375, "learning_rate": 4.557852551112673e-05, "loss": 0.829, "step": 13726 }, { "epoch": 0.19467439049033755, "grad_norm": 3.015625, "learning_rate": 4.557725673358229e-05, "loss": 0.8493, "step": 13728 }, { "epoch": 0.19470275214396376, "grad_norm": 3.21875, "learning_rate": 4.557598779168593e-05, "loss": 0.8296, "step": 13730 }, { "epoch": 0.19473111379758998, "grad_norm": 2.953125, "learning_rate": 4.5574718685447784e-05, "loss": 0.7857, "step": 13732 }, { "epoch": 0.1947594754512162, "grad_norm": 3.34375, "learning_rate": 4.557344941487798e-05, "loss": 0.8104, "step": 13734 }, { "epoch": 0.1947878371048424, "grad_norm": 3.109375, "learning_rate": 4.557217997998666e-05, "loss": 0.8318, "step": 13736 }, { "epoch": 0.19481619875846862, "grad_norm": 3.21875, "learning_rate": 4.557091038078398e-05, "loss": 0.8497, "step": 13738 }, { "epoch": 0.19484456041209483, "grad_norm": 3.203125, "learning_rate": 4.556964061728006e-05, "loss": 0.8762, "step": 13740 }, { "epoch": 0.19487292206572104, "grad_norm": 3.15625, "learning_rate": 4.556837068948505e-05, "loss": 0.8858, "step": 13742 }, { "epoch": 0.19490128371934726, "grad_norm": 3.09375, "learning_rate": 4.5567100597409085e-05, "loss": 0.7808, "step": 13744 }, { "epoch": 0.19492964537297347, "grad_norm": 2.875, "learning_rate": 4.556583034106232e-05, "loss": 0.8354, "step": 13746 }, { "epoch": 0.19495800702659968, "grad_norm": 3.265625, "learning_rate": 4.55645599204549e-05, "loss": 0.8572, "step": 13748 }, { "epoch": 0.1949863686802259, "grad_norm": 3.0, "learning_rate": 4.556328933559696e-05, "loss": 0.8329, "step": 13750 }, { "epoch": 0.1950147303338521, "grad_norm": 3.203125, "learning_rate": 4.556201858649867e-05, "loss": 0.8736, "step": 13752 }, { "epoch": 0.19504309198747832, "grad_norm": 3.28125, "learning_rate": 4.556074767317016e-05, "loss": 0.8634, "step": 13754 }, { "epoch": 0.19507145364110454, "grad_norm": 2.84375, "learning_rate": 4.555947659562158e-05, "loss": 0.8357, "step": 13756 }, { "epoch": 0.19509981529473075, "grad_norm": 2.90625, "learning_rate": 4.555820535386309e-05, "loss": 0.8555, "step": 13758 }, { "epoch": 0.19512817694835696, "grad_norm": 3.171875, "learning_rate": 4.555693394790485e-05, "loss": 0.8446, "step": 13760 }, { "epoch": 0.19515653860198318, "grad_norm": 3.21875, "learning_rate": 4.5555662377757e-05, "loss": 0.8843, "step": 13762 }, { "epoch": 0.19518490025560942, "grad_norm": 3.203125, "learning_rate": 4.555439064342971e-05, "loss": 0.8879, "step": 13764 }, { "epoch": 0.19521326190923563, "grad_norm": 3.09375, "learning_rate": 4.555311874493313e-05, "loss": 0.8431, "step": 13766 }, { "epoch": 0.19524162356286184, "grad_norm": 3.078125, "learning_rate": 4.5551846682277415e-05, "loss": 0.8254, "step": 13768 }, { "epoch": 0.19526998521648806, "grad_norm": 3.125, "learning_rate": 4.5550574455472736e-05, "loss": 0.7993, "step": 13770 }, { "epoch": 0.19529834687011427, "grad_norm": 3.09375, "learning_rate": 4.554930206452924e-05, "loss": 0.8603, "step": 13772 }, { "epoch": 0.19532670852374048, "grad_norm": 2.84375, "learning_rate": 4.5548029509457105e-05, "loss": 0.7865, "step": 13774 }, { "epoch": 0.1953550701773667, "grad_norm": 3.484375, "learning_rate": 4.554675679026649e-05, "loss": 0.8493, "step": 13776 }, { "epoch": 0.1953834318309929, "grad_norm": 2.859375, "learning_rate": 4.554548390696755e-05, "loss": 0.8078, "step": 13778 }, { "epoch": 0.19541179348461912, "grad_norm": 3.0625, "learning_rate": 4.554421085957046e-05, "loss": 0.859, "step": 13780 }, { "epoch": 0.19544015513824534, "grad_norm": 2.921875, "learning_rate": 4.5542937648085394e-05, "loss": 0.8576, "step": 13782 }, { "epoch": 0.19546851679187155, "grad_norm": 3.21875, "learning_rate": 4.554166427252251e-05, "loss": 0.871, "step": 13784 }, { "epoch": 0.19549687844549776, "grad_norm": 3.359375, "learning_rate": 4.5540390732891986e-05, "loss": 0.8773, "step": 13786 }, { "epoch": 0.19552524009912398, "grad_norm": 2.71875, "learning_rate": 4.553911702920399e-05, "loss": 0.8572, "step": 13788 }, { "epoch": 0.1955536017527502, "grad_norm": 3.71875, "learning_rate": 4.55378431614687e-05, "loss": 0.8114, "step": 13790 }, { "epoch": 0.1955819634063764, "grad_norm": 3.359375, "learning_rate": 4.553656912969628e-05, "loss": 0.8716, "step": 13792 }, { "epoch": 0.19561032506000262, "grad_norm": 3.1875, "learning_rate": 4.553529493389691e-05, "loss": 0.8173, "step": 13794 }, { "epoch": 0.19563868671362883, "grad_norm": 2.78125, "learning_rate": 4.553402057408078e-05, "loss": 0.8162, "step": 13796 }, { "epoch": 0.19566704836725504, "grad_norm": 2.921875, "learning_rate": 4.553274605025805e-05, "loss": 0.886, "step": 13798 }, { "epoch": 0.19569541002088126, "grad_norm": 3.28125, "learning_rate": 4.553147136243892e-05, "loss": 0.8876, "step": 13800 }, { "epoch": 0.19572377167450747, "grad_norm": 2.765625, "learning_rate": 4.553019651063354e-05, "loss": 0.8142, "step": 13802 }, { "epoch": 0.1957521333281337, "grad_norm": 2.890625, "learning_rate": 4.552892149485212e-05, "loss": 0.7927, "step": 13804 }, { "epoch": 0.19578049498175992, "grad_norm": 3.21875, "learning_rate": 4.5527646315104835e-05, "loss": 0.8089, "step": 13806 }, { "epoch": 0.19580885663538614, "grad_norm": 3.203125, "learning_rate": 4.552637097140187e-05, "loss": 0.8527, "step": 13808 }, { "epoch": 0.19583721828901235, "grad_norm": 3.046875, "learning_rate": 4.552509546375341e-05, "loss": 0.8326, "step": 13810 }, { "epoch": 0.19586557994263856, "grad_norm": 3.015625, "learning_rate": 4.5523819792169646e-05, "loss": 0.8095, "step": 13812 }, { "epoch": 0.19589394159626478, "grad_norm": 2.984375, "learning_rate": 4.552254395666077e-05, "loss": 0.8843, "step": 13814 }, { "epoch": 0.195922303249891, "grad_norm": 3.03125, "learning_rate": 4.552126795723695e-05, "loss": 0.8622, "step": 13816 }, { "epoch": 0.1959506649035172, "grad_norm": 3.0625, "learning_rate": 4.551999179390841e-05, "loss": 0.8287, "step": 13818 }, { "epoch": 0.19597902655714342, "grad_norm": 2.75, "learning_rate": 4.5518715466685314e-05, "loss": 0.826, "step": 13820 }, { "epoch": 0.19600738821076963, "grad_norm": 3.171875, "learning_rate": 4.551743897557788e-05, "loss": 0.8949, "step": 13822 }, { "epoch": 0.19603574986439584, "grad_norm": 3.046875, "learning_rate": 4.5516162320596284e-05, "loss": 0.8615, "step": 13824 }, { "epoch": 0.19606411151802206, "grad_norm": 3.125, "learning_rate": 4.551488550175074e-05, "loss": 0.8313, "step": 13826 }, { "epoch": 0.19609247317164827, "grad_norm": 3.140625, "learning_rate": 4.551360851905143e-05, "loss": 0.8419, "step": 13828 }, { "epoch": 0.19612083482527448, "grad_norm": 3.59375, "learning_rate": 4.551233137250856e-05, "loss": 0.8335, "step": 13830 }, { "epoch": 0.1961491964789007, "grad_norm": 3.046875, "learning_rate": 4.551105406213233e-05, "loss": 0.8553, "step": 13832 }, { "epoch": 0.1961775581325269, "grad_norm": 3.203125, "learning_rate": 4.550977658793295e-05, "loss": 0.8536, "step": 13834 }, { "epoch": 0.19620591978615312, "grad_norm": 3.234375, "learning_rate": 4.550849894992061e-05, "loss": 0.858, "step": 13836 }, { "epoch": 0.19623428143977933, "grad_norm": 2.78125, "learning_rate": 4.550722114810553e-05, "loss": 0.8121, "step": 13838 }, { "epoch": 0.19626264309340555, "grad_norm": 3.34375, "learning_rate": 4.55059431824979e-05, "loss": 0.861, "step": 13840 }, { "epoch": 0.1962910047470318, "grad_norm": 3.234375, "learning_rate": 4.550466505310793e-05, "loss": 0.8931, "step": 13842 }, { "epoch": 0.196319366400658, "grad_norm": 2.828125, "learning_rate": 4.550338675994584e-05, "loss": 0.8087, "step": 13844 }, { "epoch": 0.19634772805428422, "grad_norm": 3.03125, "learning_rate": 4.550210830302183e-05, "loss": 0.8787, "step": 13846 }, { "epoch": 0.19637608970791043, "grad_norm": 3.328125, "learning_rate": 4.5500829682346116e-05, "loss": 0.8898, "step": 13848 }, { "epoch": 0.19640445136153664, "grad_norm": 3.015625, "learning_rate": 4.54995508979289e-05, "loss": 0.8772, "step": 13850 }, { "epoch": 0.19643281301516285, "grad_norm": 2.984375, "learning_rate": 4.5498271949780414e-05, "loss": 0.7862, "step": 13852 }, { "epoch": 0.19646117466878907, "grad_norm": 3.015625, "learning_rate": 4.5496992837910856e-05, "loss": 0.8032, "step": 13854 }, { "epoch": 0.19648953632241528, "grad_norm": 2.703125, "learning_rate": 4.549571356233045e-05, "loss": 0.8171, "step": 13856 }, { "epoch": 0.1965178979760415, "grad_norm": 3.171875, "learning_rate": 4.5494434123049414e-05, "loss": 0.8677, "step": 13858 }, { "epoch": 0.1965462596296677, "grad_norm": 3.0, "learning_rate": 4.5493154520077966e-05, "loss": 0.8341, "step": 13860 }, { "epoch": 0.19657462128329392, "grad_norm": 3.171875, "learning_rate": 4.549187475342632e-05, "loss": 0.8545, "step": 13862 }, { "epoch": 0.19660298293692013, "grad_norm": 2.796875, "learning_rate": 4.549059482310471e-05, "loss": 0.8293, "step": 13864 }, { "epoch": 0.19663134459054635, "grad_norm": 3.453125, "learning_rate": 4.548931472912335e-05, "loss": 0.8482, "step": 13866 }, { "epoch": 0.19665970624417256, "grad_norm": 2.8125, "learning_rate": 4.548803447149247e-05, "loss": 0.8486, "step": 13868 }, { "epoch": 0.19668806789779877, "grad_norm": 3.0625, "learning_rate": 4.548675405022229e-05, "loss": 0.8538, "step": 13870 }, { "epoch": 0.196716429551425, "grad_norm": 3.375, "learning_rate": 4.5485473465323035e-05, "loss": 0.8593, "step": 13872 }, { "epoch": 0.1967447912050512, "grad_norm": 2.734375, "learning_rate": 4.5484192716804944e-05, "loss": 0.8004, "step": 13874 }, { "epoch": 0.1967731528586774, "grad_norm": 3.28125, "learning_rate": 4.548291180467824e-05, "loss": 0.8399, "step": 13876 }, { "epoch": 0.19680151451230363, "grad_norm": 3.28125, "learning_rate": 4.5481630728953136e-05, "loss": 0.8608, "step": 13878 }, { "epoch": 0.19682987616592987, "grad_norm": 2.875, "learning_rate": 4.5480349489639904e-05, "loss": 0.7961, "step": 13880 }, { "epoch": 0.19685823781955608, "grad_norm": 3.109375, "learning_rate": 4.5479068086748746e-05, "loss": 0.8559, "step": 13882 }, { "epoch": 0.1968865994731823, "grad_norm": 3.078125, "learning_rate": 4.54777865202899e-05, "loss": 0.8349, "step": 13884 }, { "epoch": 0.1969149611268085, "grad_norm": 2.953125, "learning_rate": 4.547650479027361e-05, "loss": 0.8214, "step": 13886 }, { "epoch": 0.19694332278043472, "grad_norm": 3.109375, "learning_rate": 4.547522289671011e-05, "loss": 0.8419, "step": 13888 }, { "epoch": 0.19697168443406093, "grad_norm": 3.671875, "learning_rate": 4.547394083960964e-05, "loss": 0.8859, "step": 13890 }, { "epoch": 0.19700004608768715, "grad_norm": 3.234375, "learning_rate": 4.5472658618982446e-05, "loss": 0.8351, "step": 13892 }, { "epoch": 0.19702840774131336, "grad_norm": 2.921875, "learning_rate": 4.5471376234838754e-05, "loss": 0.7962, "step": 13894 }, { "epoch": 0.19705676939493957, "grad_norm": 3.046875, "learning_rate": 4.5470093687188815e-05, "loss": 0.8598, "step": 13896 }, { "epoch": 0.1970851310485658, "grad_norm": 2.859375, "learning_rate": 4.546881097604288e-05, "loss": 0.8238, "step": 13898 }, { "epoch": 0.197113492702192, "grad_norm": 3.125, "learning_rate": 4.546752810141117e-05, "loss": 0.8107, "step": 13900 }, { "epoch": 0.1971418543558182, "grad_norm": 3.234375, "learning_rate": 4.546624506330396e-05, "loss": 0.8252, "step": 13902 }, { "epoch": 0.19717021600944443, "grad_norm": 3.625, "learning_rate": 4.546496186173148e-05, "loss": 0.7961, "step": 13904 }, { "epoch": 0.19719857766307064, "grad_norm": 2.90625, "learning_rate": 4.546367849670399e-05, "loss": 0.8548, "step": 13906 }, { "epoch": 0.19722693931669685, "grad_norm": 2.875, "learning_rate": 4.546239496823174e-05, "loss": 0.8434, "step": 13908 }, { "epoch": 0.19725530097032307, "grad_norm": 2.90625, "learning_rate": 4.5461111276324964e-05, "loss": 0.8626, "step": 13910 }, { "epoch": 0.19728366262394928, "grad_norm": 2.953125, "learning_rate": 4.545982742099394e-05, "loss": 0.8576, "step": 13912 }, { "epoch": 0.1973120242775755, "grad_norm": 3.15625, "learning_rate": 4.5458543402248895e-05, "loss": 0.8932, "step": 13914 }, { "epoch": 0.1973403859312017, "grad_norm": 3.015625, "learning_rate": 4.545725922010011e-05, "loss": 0.8027, "step": 13916 }, { "epoch": 0.19736874758482795, "grad_norm": 3.078125, "learning_rate": 4.545597487455783e-05, "loss": 0.8015, "step": 13918 }, { "epoch": 0.19739710923845416, "grad_norm": 2.96875, "learning_rate": 4.545469036563231e-05, "loss": 0.8285, "step": 13920 }, { "epoch": 0.19742547089208037, "grad_norm": 3.0625, "learning_rate": 4.545340569333382e-05, "loss": 0.8165, "step": 13922 }, { "epoch": 0.1974538325457066, "grad_norm": 3.40625, "learning_rate": 4.545212085767261e-05, "loss": 0.8747, "step": 13924 }, { "epoch": 0.1974821941993328, "grad_norm": 3.203125, "learning_rate": 4.5450835858658946e-05, "loss": 0.7792, "step": 13926 }, { "epoch": 0.197510555852959, "grad_norm": 3.046875, "learning_rate": 4.544955069630309e-05, "loss": 0.8908, "step": 13928 }, { "epoch": 0.19753891750658523, "grad_norm": 2.984375, "learning_rate": 4.54482653706153e-05, "loss": 0.887, "step": 13930 }, { "epoch": 0.19756727916021144, "grad_norm": 3.328125, "learning_rate": 4.5446979881605874e-05, "loss": 0.8407, "step": 13932 }, { "epoch": 0.19759564081383765, "grad_norm": 3.03125, "learning_rate": 4.5445694229285044e-05, "loss": 0.7915, "step": 13934 }, { "epoch": 0.19762400246746387, "grad_norm": 3.078125, "learning_rate": 4.5444408413663085e-05, "loss": 0.8103, "step": 13936 }, { "epoch": 0.19765236412109008, "grad_norm": 2.859375, "learning_rate": 4.5443122434750284e-05, "loss": 0.8414, "step": 13938 }, { "epoch": 0.1976807257747163, "grad_norm": 3.125, "learning_rate": 4.5441836292556894e-05, "loss": 0.8512, "step": 13940 }, { "epoch": 0.1977090874283425, "grad_norm": 3.1875, "learning_rate": 4.544054998709319e-05, "loss": 0.8175, "step": 13942 }, { "epoch": 0.19773744908196872, "grad_norm": 3.171875, "learning_rate": 4.5439263518369455e-05, "loss": 0.8902, "step": 13944 }, { "epoch": 0.19776581073559493, "grad_norm": 2.984375, "learning_rate": 4.543797688639596e-05, "loss": 0.8388, "step": 13946 }, { "epoch": 0.19779417238922115, "grad_norm": 3.3125, "learning_rate": 4.543669009118298e-05, "loss": 0.8277, "step": 13948 }, { "epoch": 0.19782253404284736, "grad_norm": 3.234375, "learning_rate": 4.5435403132740787e-05, "loss": 0.877, "step": 13950 }, { "epoch": 0.19785089569647357, "grad_norm": 3.796875, "learning_rate": 4.5434116011079675e-05, "loss": 0.8509, "step": 13952 }, { "epoch": 0.19787925735009979, "grad_norm": 3.078125, "learning_rate": 4.543282872620991e-05, "loss": 0.8072, "step": 13954 }, { "epoch": 0.197907619003726, "grad_norm": 3.265625, "learning_rate": 4.543154127814178e-05, "loss": 0.831, "step": 13956 }, { "epoch": 0.19793598065735224, "grad_norm": 3.234375, "learning_rate": 4.543025366688557e-05, "loss": 0.7641, "step": 13958 }, { "epoch": 0.19796434231097845, "grad_norm": 3.15625, "learning_rate": 4.542896589245156e-05, "loss": 0.8415, "step": 13960 }, { "epoch": 0.19799270396460467, "grad_norm": 3.484375, "learning_rate": 4.542767795485003e-05, "loss": 0.8234, "step": 13962 }, { "epoch": 0.19802106561823088, "grad_norm": 3.078125, "learning_rate": 4.542638985409128e-05, "loss": 0.8215, "step": 13964 }, { "epoch": 0.1980494272718571, "grad_norm": 3.046875, "learning_rate": 4.542510159018559e-05, "loss": 0.8773, "step": 13966 }, { "epoch": 0.1980777889254833, "grad_norm": 3.21875, "learning_rate": 4.5423813163143254e-05, "loss": 0.8258, "step": 13968 }, { "epoch": 0.19810615057910952, "grad_norm": 3.171875, "learning_rate": 4.5422524572974556e-05, "loss": 0.8509, "step": 13970 }, { "epoch": 0.19813451223273573, "grad_norm": 2.75, "learning_rate": 4.5421235819689796e-05, "loss": 0.8229, "step": 13972 }, { "epoch": 0.19816287388636195, "grad_norm": 2.875, "learning_rate": 4.541994690329926e-05, "loss": 0.8346, "step": 13974 }, { "epoch": 0.19819123553998816, "grad_norm": 3.078125, "learning_rate": 4.541865782381325e-05, "loss": 0.825, "step": 13976 }, { "epoch": 0.19821959719361437, "grad_norm": 2.71875, "learning_rate": 4.5417368581242054e-05, "loss": 0.7776, "step": 13978 }, { "epoch": 0.19824795884724059, "grad_norm": 2.921875, "learning_rate": 4.541607917559597e-05, "loss": 0.837, "step": 13980 }, { "epoch": 0.1982763205008668, "grad_norm": 3.09375, "learning_rate": 4.541478960688531e-05, "loss": 0.8399, "step": 13982 }, { "epoch": 0.198304682154493, "grad_norm": 3.0, "learning_rate": 4.5413499875120355e-05, "loss": 0.8735, "step": 13984 }, { "epoch": 0.19833304380811922, "grad_norm": 3.046875, "learning_rate": 4.541220998031142e-05, "loss": 0.8168, "step": 13986 }, { "epoch": 0.19836140546174544, "grad_norm": 3.046875, "learning_rate": 4.5410919922468795e-05, "loss": 0.8119, "step": 13988 }, { "epoch": 0.19838976711537165, "grad_norm": 2.984375, "learning_rate": 4.540962970160281e-05, "loss": 0.8397, "step": 13990 }, { "epoch": 0.19841812876899786, "grad_norm": 3.203125, "learning_rate": 4.540833931772373e-05, "loss": 0.8275, "step": 13992 }, { "epoch": 0.19844649042262408, "grad_norm": 2.734375, "learning_rate": 4.540704877084189e-05, "loss": 0.8753, "step": 13994 }, { "epoch": 0.19847485207625032, "grad_norm": 3.3125, "learning_rate": 4.54057580609676e-05, "loss": 0.833, "step": 13996 }, { "epoch": 0.19850321372987653, "grad_norm": 2.8125, "learning_rate": 4.540446718811114e-05, "loss": 0.8535, "step": 13998 }, { "epoch": 0.19853157538350275, "grad_norm": 3.375, "learning_rate": 4.5403176152282856e-05, "loss": 0.8728, "step": 14000 } ], "logging_steps": 2, "max_steps": 70517, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.860275745291921e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }