diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,15145 +1,16580 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 2.0, + "epoch": 1.9998306376492505, "eval_steps": 500, - "global_step": 10780, + "global_step": 11808, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "grad_norm": 0.361328125, - "learning_rate": 3.710575139146568e-07, - "loss": 2.6427, + "grad_norm": 0.484375, + "learning_rate": 1.6934801016088062e-07, + "loss": 2.5162, "step": 1 }, { "epoch": 0.0, - "grad_norm": 0.34765625, - "learning_rate": 1.855287569573284e-06, - "loss": 2.6431, + "grad_norm": 0.40234375, + "learning_rate": 8.46740050804403e-07, + "loss": 2.6069, "step": 5 }, { "epoch": 0.0, - "grad_norm": 0.380859375, - "learning_rate": 3.710575139146568e-06, - "loss": 2.6904, + "grad_norm": 0.515625, + "learning_rate": 1.693480101608806e-06, + "loss": 2.5858, "step": 10 }, { "epoch": 0.0, - "grad_norm": 0.369140625, - "learning_rate": 5.565862708719852e-06, - "loss": 2.643, + "grad_norm": 0.5859375, + "learning_rate": 2.5402201524132094e-06, + "loss": 2.5739, "step": 15 }, { "epoch": 0.0, - "grad_norm": 0.359375, - "learning_rate": 7.421150278293136e-06, - "loss": 2.6806, + "grad_norm": 0.404296875, + "learning_rate": 3.386960203217612e-06, + "loss": 2.588, "step": 20 }, { "epoch": 0.0, - "grad_norm": 0.337890625, - "learning_rate": 9.276437847866419e-06, - "loss": 2.6886, + "grad_norm": 0.46484375, + "learning_rate": 4.233700254022015e-06, + "loss": 2.5688, "step": 25 }, { "epoch": 0.01, - "grad_norm": 0.40234375, - "learning_rate": 1.1131725417439704e-05, - "loss": 2.6287, + "grad_norm": 0.5859375, + "learning_rate": 5.080440304826419e-06, + "loss": 2.5694, "step": 30 }, { "epoch": 0.01, - "grad_norm": 0.36328125, - "learning_rate": 1.2987012987012986e-05, - "loss": 2.5784, + "grad_norm": 0.77734375, + "learning_rate": 5.927180355630822e-06, + "loss": 2.5357, "step": 35 }, { "epoch": 0.01, - "grad_norm": 0.376953125, - "learning_rate": 1.4842300556586271e-05, - "loss": 2.5996, + "grad_norm": 0.44921875, + "learning_rate": 6.773920406435224e-06, + "loss": 2.5469, "step": 40 }, { "epoch": 0.01, - "grad_norm": 0.3046875, - "learning_rate": 1.6697588126159555e-05, - "loss": 2.5966, + "grad_norm": 0.5234375, + "learning_rate": 7.620660457239629e-06, + "loss": 2.5776, "step": 45 }, { "epoch": 0.01, - "grad_norm": 0.31640625, - "learning_rate": 1.8552875695732837e-05, - "loss": 2.5816, + "grad_norm": 0.55859375, + "learning_rate": 8.46740050804403e-06, + "loss": 2.5495, "step": 50 }, { "epoch": 0.01, - "grad_norm": 0.314453125, - "learning_rate": 2.0408163265306123e-05, - "loss": 2.5522, + "grad_norm": 0.57421875, + "learning_rate": 9.314140558848434e-06, + "loss": 2.489, "step": 55 }, { "epoch": 0.01, - "grad_norm": 0.3046875, - "learning_rate": 2.2263450834879408e-05, - "loss": 2.5633, + "grad_norm": 0.427734375, + "learning_rate": 1.0160880609652838e-05, + "loss": 2.5028, "step": 60 }, { "epoch": 0.01, - "grad_norm": 0.328125, - "learning_rate": 2.4118738404452693e-05, - "loss": 2.5333, + "grad_norm": 0.37890625, + "learning_rate": 1.1007620660457241e-05, + "loss": 2.5345, "step": 65 }, { "epoch": 0.01, - "grad_norm": 0.26171875, - "learning_rate": 2.5974025974025972e-05, - "loss": 2.4653, + "grad_norm": 0.353515625, + "learning_rate": 1.1854360711261643e-05, + "loss": 2.4845, "step": 70 }, { "epoch": 0.01, - "grad_norm": 0.2431640625, - "learning_rate": 2.782931354359926e-05, - "loss": 2.4254, + "grad_norm": 0.28515625, + "learning_rate": 1.2701100762066045e-05, + "loss": 2.4452, "step": 75 }, { "epoch": 0.01, - "grad_norm": 0.2333984375, - "learning_rate": 2.9684601113172543e-05, - "loss": 2.4339, + "grad_norm": 0.267578125, + "learning_rate": 1.3547840812870449e-05, + "loss": 2.455, "step": 80 }, { - "epoch": 0.02, - "grad_norm": 0.248046875, - "learning_rate": 3.1539888682745825e-05, - "loss": 2.4517, + "epoch": 0.01, + "grad_norm": 0.2421875, + "learning_rate": 1.4394580863674852e-05, + "loss": 2.5327, "step": 85 }, { "epoch": 0.02, - "grad_norm": 0.2138671875, - "learning_rate": 3.339517625231911e-05, - "loss": 2.4258, + "grad_norm": 0.2265625, + "learning_rate": 1.5241320914479258e-05, + "loss": 2.4376, "step": 90 }, { "epoch": 0.02, - "grad_norm": 0.220703125, - "learning_rate": 3.5250463821892396e-05, - "loss": 2.4284, + "grad_norm": 0.2275390625, + "learning_rate": 1.608806096528366e-05, + "loss": 2.4626, "step": 95 }, { "epoch": 0.02, - "grad_norm": 0.23046875, - "learning_rate": 3.7105751391465674e-05, - "loss": 2.3756, + "grad_norm": 0.2265625, + "learning_rate": 1.693480101608806e-05, + "loss": 2.4267, "step": 100 }, { "epoch": 0.02, - "grad_norm": 0.21484375, - "learning_rate": 3.8961038961038966e-05, - "loss": 2.3719, + "grad_norm": 0.2177734375, + "learning_rate": 1.7781541066892467e-05, + "loss": 2.4574, "step": 105 }, { "epoch": 0.02, - "grad_norm": 0.1845703125, - "learning_rate": 4.0816326530612245e-05, - "loss": 2.3452, + "grad_norm": 0.2333984375, + "learning_rate": 1.862828111769687e-05, + "loss": 2.3983, "step": 110 }, { "epoch": 0.02, - "grad_norm": 0.203125, - "learning_rate": 4.267161410018553e-05, - "loss": 2.389, + "grad_norm": 0.2421875, + "learning_rate": 1.947502116850127e-05, + "loss": 2.4354, "step": 115 }, { "epoch": 0.02, - "grad_norm": 0.2080078125, - "learning_rate": 4.4526901669758816e-05, - "loss": 2.3233, + "grad_norm": 0.232421875, + "learning_rate": 2.0321761219305676e-05, + "loss": 2.4131, "step": 120 }, { "epoch": 0.02, - "grad_norm": 0.201171875, - "learning_rate": 4.6382189239332094e-05, - "loss": 2.3368, + "grad_norm": 0.2080078125, + "learning_rate": 2.1168501270110077e-05, + "loss": 2.4226, "step": 125 }, { "epoch": 0.02, - "grad_norm": 0.1826171875, - "learning_rate": 4.823747680890539e-05, - "loss": 2.2901, + "grad_norm": 0.208984375, + "learning_rate": 2.2015241320914483e-05, + "loss": 2.4228, "step": 130 }, { - "epoch": 0.03, - "grad_norm": 0.212890625, - "learning_rate": 5.0092764378478665e-05, - "loss": 2.3386, + "epoch": 0.02, + "grad_norm": 0.2060546875, + "learning_rate": 2.286198137171888e-05, + "loss": 2.3872, "step": 135 }, { - "epoch": 0.03, - "grad_norm": 0.1943359375, - "learning_rate": 5.1948051948051944e-05, - "loss": 2.3245, + "epoch": 0.02, + "grad_norm": 0.1904296875, + "learning_rate": 2.3708721422523286e-05, + "loss": 2.3907, "step": 140 }, { - "epoch": 0.03, - "grad_norm": 0.1943359375, - "learning_rate": 5.380333951762524e-05, - "loss": 2.3099, + "epoch": 0.02, + "grad_norm": 0.2001953125, + "learning_rate": 2.455546147332769e-05, + "loss": 2.3826, "step": 145 }, { "epoch": 0.03, - "grad_norm": 0.1787109375, - "learning_rate": 5.565862708719852e-05, - "loss": 2.2899, + "grad_norm": 0.2119140625, + "learning_rate": 2.540220152413209e-05, + "loss": 2.3294, "step": 150 }, { "epoch": 0.03, - "grad_norm": 0.1796875, - "learning_rate": 5.751391465677181e-05, - "loss": 2.2857, + "grad_norm": 0.197265625, + "learning_rate": 2.62489415749365e-05, + "loss": 2.3485, "step": 155 }, { "epoch": 0.03, - "grad_norm": 0.185546875, - "learning_rate": 5.9369202226345086e-05, - "loss": 2.2968, + "grad_norm": 0.2236328125, + "learning_rate": 2.7095681625740897e-05, + "loss": 2.3282, "step": 160 }, { "epoch": 0.03, - "grad_norm": 0.197265625, - "learning_rate": 6.122448979591838e-05, - "loss": 2.2657, + "grad_norm": 0.216796875, + "learning_rate": 2.79424216765453e-05, + "loss": 2.3451, "step": 165 }, { "epoch": 0.03, - "grad_norm": 0.1884765625, - "learning_rate": 6.307977736549165e-05, - "loss": 2.3, + "grad_norm": 0.19921875, + "learning_rate": 2.8789161727349705e-05, + "loss": 2.2971, "step": 170 }, { "epoch": 0.03, - "grad_norm": 0.1875, - "learning_rate": 6.493506493506494e-05, - "loss": 2.2807, + "grad_norm": 0.21484375, + "learning_rate": 2.9635901778154106e-05, + "loss": 2.3093, "step": 175 }, { "epoch": 0.03, - "grad_norm": 0.208984375, - "learning_rate": 6.679035250463822e-05, - "loss": 2.257, + "grad_norm": 0.232421875, + "learning_rate": 3.0482641828958515e-05, + "loss": 2.2861, "step": 180 }, { "epoch": 0.03, - "grad_norm": 0.1962890625, - "learning_rate": 6.86456400742115e-05, - "loss": 2.2708, + "grad_norm": 0.205078125, + "learning_rate": 3.132938187976292e-05, + "loss": 2.3381, "step": 185 }, { - "epoch": 0.04, - "grad_norm": 0.1787109375, - "learning_rate": 7.050092764378479e-05, - "loss": 2.273, + "epoch": 0.03, + "grad_norm": 0.1845703125, + "learning_rate": 3.217612193056732e-05, + "loss": 2.269, "step": 190 }, { - "epoch": 0.04, - "grad_norm": 0.1875, - "learning_rate": 7.235621521335806e-05, - "loss": 2.2603, + "epoch": 0.03, + "grad_norm": 0.185546875, + "learning_rate": 3.302286198137172e-05, + "loss": 2.3178, "step": 195 }, { - "epoch": 0.04, - "grad_norm": 0.1904296875, - "learning_rate": 7.421150278293135e-05, - "loss": 2.2514, + "epoch": 0.03, + "grad_norm": 0.181640625, + "learning_rate": 3.386960203217612e-05, + "loss": 2.2231, "step": 200 }, { - "epoch": 0.04, - "grad_norm": 0.2119140625, - "learning_rate": 7.606679035250465e-05, - "loss": 2.2908, + "epoch": 0.03, + "grad_norm": 0.173828125, + "learning_rate": 3.4716342082980524e-05, + "loss": 2.2515, "step": 205 }, { "epoch": 0.04, - "grad_norm": 0.1767578125, - "learning_rate": 7.792207792207793e-05, - "loss": 2.2708, + "grad_norm": 0.1865234375, + "learning_rate": 3.556308213378493e-05, + "loss": 2.3124, "step": 210 }, { "epoch": 0.04, - "grad_norm": 0.1982421875, - "learning_rate": 7.977736549165122e-05, - "loss": 2.2279, + "grad_norm": 0.1826171875, + "learning_rate": 3.6409822184589335e-05, + "loss": 2.2382, "step": 215 }, { "epoch": 0.04, - "grad_norm": 0.19921875, - "learning_rate": 8.163265306122449e-05, - "loss": 2.2627, + "grad_norm": 0.1884765625, + "learning_rate": 3.725656223539374e-05, + "loss": 2.2292, "step": 220 }, { "epoch": 0.04, - "grad_norm": 0.2099609375, - "learning_rate": 8.348794063079778e-05, - "loss": 2.2642, + "grad_norm": 0.1650390625, + "learning_rate": 3.810330228619814e-05, + "loss": 2.2462, "step": 225 }, { "epoch": 0.04, - "grad_norm": 0.2041015625, - "learning_rate": 8.534322820037106e-05, - "loss": 2.2373, + "grad_norm": 0.171875, + "learning_rate": 3.895004233700254e-05, + "loss": 2.2408, "step": 230 }, { "epoch": 0.04, - "grad_norm": 0.2001953125, - "learning_rate": 8.719851576994435e-05, - "loss": 2.2572, + "grad_norm": 0.1884765625, + "learning_rate": 3.979678238780695e-05, + "loss": 2.2485, "step": 235 }, { "epoch": 0.04, - "grad_norm": 0.1953125, - "learning_rate": 8.905380333951763e-05, - "loss": 2.2105, + "grad_norm": 0.1884765625, + "learning_rate": 4.064352243861135e-05, + "loss": 2.319, "step": 240 }, { - "epoch": 0.05, - "grad_norm": 0.1953125, - "learning_rate": 9.090909090909092e-05, - "loss": 2.2398, + "epoch": 0.04, + "grad_norm": 0.1904296875, + "learning_rate": 4.1490262489415746e-05, + "loss": 2.2125, "step": 245 }, { - "epoch": 0.05, - "grad_norm": 0.203125, - "learning_rate": 9.276437847866419e-05, - "loss": 2.2448, + "epoch": 0.04, + "grad_norm": 0.1826171875, + "learning_rate": 4.2337002540220155e-05, + "loss": 2.2627, "step": 250 }, { - "epoch": 0.05, - "grad_norm": 0.203125, - "learning_rate": 9.461966604823747e-05, - "loss": 2.2113, + "epoch": 0.04, + "grad_norm": 0.1826171875, + "learning_rate": 4.318374259102456e-05, + "loss": 2.2665, "step": 255 }, { - "epoch": 0.05, - "grad_norm": 0.2021484375, - "learning_rate": 9.647495361781077e-05, - "loss": 2.2631, + "epoch": 0.04, + "grad_norm": 0.1904296875, + "learning_rate": 4.4030482641828965e-05, + "loss": 2.2375, "step": 260 }, { - "epoch": 0.05, - "grad_norm": 0.19921875, - "learning_rate": 9.833024118738406e-05, - "loss": 2.2716, + "epoch": 0.04, + "grad_norm": 0.1884765625, + "learning_rate": 4.487722269263336e-05, + "loss": 2.2342, "step": 265 }, { "epoch": 0.05, - "grad_norm": 0.201171875, - "learning_rate": 0.00010018552875695733, - "loss": 2.2368, + "grad_norm": 0.189453125, + "learning_rate": 4.572396274343776e-05, + "loss": 2.2752, "step": 270 }, { "epoch": 0.05, - "grad_norm": 0.1982421875, - "learning_rate": 0.00010204081632653062, - "loss": 2.2297, + "grad_norm": 0.18359375, + "learning_rate": 4.657070279424217e-05, + "loss": 2.2579, "step": 275 }, { "epoch": 0.05, - "grad_norm": 0.201171875, - "learning_rate": 0.00010389610389610389, - "loss": 2.2706, + "grad_norm": 0.181640625, + "learning_rate": 4.741744284504657e-05, + "loss": 2.2639, "step": 280 }, { "epoch": 0.05, - "grad_norm": 0.205078125, - "learning_rate": 0.00010575139146567719, - "loss": 2.254, + "grad_norm": 0.1953125, + "learning_rate": 4.8264182895850975e-05, + "loss": 2.2554, "step": 285 }, { "epoch": 0.05, - "grad_norm": 0.1962890625, - "learning_rate": 0.00010760667903525049, - "loss": 2.2576, + "grad_norm": 0.185546875, + "learning_rate": 4.911092294665538e-05, + "loss": 2.2714, "step": 290 }, { "epoch": 0.05, - "grad_norm": 0.2060546875, - "learning_rate": 0.00010946196660482376, - "loss": 2.2456, + "grad_norm": 0.18359375, + "learning_rate": 4.995766299745978e-05, + "loss": 2.2491, "step": 295 }, { - "epoch": 0.06, - "grad_norm": 0.19921875, - "learning_rate": 0.00011131725417439704, - "loss": 2.2665, + "epoch": 0.05, + "grad_norm": 0.1806640625, + "learning_rate": 5.080440304826418e-05, + "loss": 2.2583, "step": 300 }, { - "epoch": 0.06, - "grad_norm": 0.2080078125, - "learning_rate": 0.00011317254174397031, - "loss": 2.2529, + "epoch": 0.05, + "grad_norm": 0.19921875, + "learning_rate": 5.165114309906859e-05, + "loss": 2.2448, "step": 305 }, { - "epoch": 0.06, - "grad_norm": 0.2119140625, - "learning_rate": 0.00011502782931354361, - "loss": 2.2427, + "epoch": 0.05, + "grad_norm": 0.19140625, + "learning_rate": 5.2497883149873e-05, + "loss": 2.2126, "step": 310 }, { - "epoch": 0.06, - "grad_norm": 0.201171875, - "learning_rate": 0.00011688311688311689, - "loss": 2.2489, + "epoch": 0.05, + "grad_norm": 0.1845703125, + "learning_rate": 5.334462320067739e-05, + "loss": 2.2718, "step": 315 }, { - "epoch": 0.06, - "grad_norm": 0.203125, - "learning_rate": 0.00011873840445269017, - "loss": 2.2153, + "epoch": 0.05, + "grad_norm": 0.1943359375, + "learning_rate": 5.4191363251481795e-05, + "loss": 2.2236, "step": 320 }, { "epoch": 0.06, - "grad_norm": 0.193359375, - "learning_rate": 0.00012059369202226344, - "loss": 2.2204, + "grad_norm": 0.1943359375, + "learning_rate": 5.5038103302286203e-05, + "loss": 2.2657, "step": 325 }, { "epoch": 0.06, - "grad_norm": 0.1962890625, - "learning_rate": 0.00012244897959183676, - "loss": 2.2397, + "grad_norm": 0.1943359375, + "learning_rate": 5.58848433530906e-05, + "loss": 2.2426, "step": 330 }, { "epoch": 0.06, - "grad_norm": 0.19921875, - "learning_rate": 0.00012430426716141001, - "loss": 2.2586, + "grad_norm": 0.193359375, + "learning_rate": 5.673158340389501e-05, + "loss": 2.2843, "step": 335 }, { "epoch": 0.06, - "grad_norm": 0.1943359375, - "learning_rate": 0.0001261595547309833, - "loss": 2.2376, + "grad_norm": 0.193359375, + "learning_rate": 5.757832345469941e-05, + "loss": 2.2423, "step": 340 }, { "epoch": 0.06, "grad_norm": 0.197265625, - "learning_rate": 0.0001280148423005566, - "loss": 2.1859, + "learning_rate": 5.842506350550381e-05, + "loss": 2.2573, "step": 345 }, { "epoch": 0.06, - "grad_norm": 0.216796875, - "learning_rate": 0.00012987012987012987, - "loss": 2.2297, + "grad_norm": 0.2099609375, + "learning_rate": 5.927180355630821e-05, + "loss": 2.2623, "step": 350 }, { - "epoch": 0.07, - "grad_norm": 0.203125, - "learning_rate": 0.00013172541743970318, - "loss": 2.2543, + "epoch": 0.06, + "grad_norm": 0.1953125, + "learning_rate": 6.011854360711262e-05, + "loss": 2.2224, "step": 355 }, { - "epoch": 0.07, - "grad_norm": 0.2041015625, - "learning_rate": 0.00013358070500927644, - "loss": 2.2614, + "epoch": 0.06, + "grad_norm": 0.2001953125, + "learning_rate": 6.096528365791703e-05, + "loss": 2.227, "step": 360 }, { - "epoch": 0.07, - "grad_norm": 0.1953125, - "learning_rate": 0.00013543599257884973, - "loss": 2.2152, + "epoch": 0.06, + "grad_norm": 0.1982421875, + "learning_rate": 6.181202370872143e-05, + "loss": 2.2246, "step": 365 }, { - "epoch": 0.07, - "grad_norm": 0.2080078125, - "learning_rate": 0.000137291280148423, - "loss": 2.2712, + "epoch": 0.06, + "grad_norm": 0.1962890625, + "learning_rate": 6.265876375952583e-05, + "loss": 2.2499, "step": 370 }, { - "epoch": 0.07, - "grad_norm": 0.185546875, - "learning_rate": 0.0001391465677179963, - "loss": 2.2363, + "epoch": 0.06, + "grad_norm": 0.2001953125, + "learning_rate": 6.350550381033024e-05, + "loss": 2.2423, "step": 375 }, { - "epoch": 0.07, - "grad_norm": 0.205078125, - "learning_rate": 0.00014100185528756958, - "loss": 2.2156, + "epoch": 0.06, + "grad_norm": 0.1962890625, + "learning_rate": 6.435224386113464e-05, + "loss": 2.2377, "step": 380 }, { "epoch": 0.07, - "grad_norm": 0.1953125, - "learning_rate": 0.00014285714285714287, - "loss": 2.2721, + "grad_norm": 0.2060546875, + "learning_rate": 6.519898391193903e-05, + "loss": 2.2515, "step": 385 }, { "epoch": 0.07, - "grad_norm": 0.1923828125, - "learning_rate": 0.00014471243042671613, - "loss": 2.2148, + "grad_norm": 0.2021484375, + "learning_rate": 6.604572396274344e-05, + "loss": 2.2243, "step": 390 }, { "epoch": 0.07, - "grad_norm": 0.1943359375, - "learning_rate": 0.00014656771799628944, - "loss": 2.1739, + "grad_norm": 0.197265625, + "learning_rate": 6.689246401354784e-05, + "loss": 2.2758, "step": 395 }, { "epoch": 0.07, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001484230055658627, - "loss": 2.2193, + "grad_norm": 0.2099609375, + "learning_rate": 6.773920406435225e-05, + "loss": 2.2412, "step": 400 }, { - "epoch": 0.08, - "grad_norm": 0.1923828125, - "learning_rate": 0.000150278293135436, - "loss": 2.1901, + "epoch": 0.07, + "grad_norm": 0.1982421875, + "learning_rate": 6.858594411515665e-05, + "loss": 2.2574, "step": 405 }, { - "epoch": 0.08, - "grad_norm": 0.1982421875, - "learning_rate": 0.0001521335807050093, - "loss": 2.2271, + "epoch": 0.07, + "grad_norm": 0.1943359375, + "learning_rate": 6.943268416596105e-05, + "loss": 2.2604, "step": 410 }, { - "epoch": 0.08, - "grad_norm": 0.197265625, - "learning_rate": 0.00015398886827458255, + "epoch": 0.07, + "grad_norm": 0.2265625, + "learning_rate": 7.027942421676546e-05, "loss": 2.2502, "step": 415 }, { - "epoch": 0.08, - "grad_norm": 0.19921875, - "learning_rate": 0.00015584415584415587, - "loss": 2.2148, + "epoch": 0.07, + "grad_norm": 0.1962890625, + "learning_rate": 7.112616426756987e-05, + "loss": 2.2565, "step": 420 }, { - "epoch": 0.08, + "epoch": 0.07, "grad_norm": 0.197265625, - "learning_rate": 0.00015769944341372912, - "loss": 2.1976, + "learning_rate": 7.197290431837426e-05, + "loss": 2.2028, "step": 425 }, { - "epoch": 0.08, - "grad_norm": 0.1943359375, - "learning_rate": 0.00015955473098330244, - "loss": 2.1779, + "epoch": 0.07, + "grad_norm": 0.2001953125, + "learning_rate": 7.281964436917867e-05, + "loss": 2.2372, "step": 430 }, { - "epoch": 0.08, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001614100185528757, - "loss": 2.2163, + "epoch": 0.07, + "grad_norm": 0.2080078125, + "learning_rate": 7.366638441998307e-05, + "loss": 2.2608, "step": 435 }, { - "epoch": 0.08, - "grad_norm": 0.1962890625, - "learning_rate": 0.00016326530612244898, - "loss": 2.2278, + "epoch": 0.07, + "grad_norm": 0.205078125, + "learning_rate": 7.451312447078747e-05, + "loss": 2.2387, "step": 440 }, { "epoch": 0.08, - "grad_norm": 0.1904296875, - "learning_rate": 0.00016512059369202227, - "loss": 2.2018, + "grad_norm": 0.212890625, + "learning_rate": 7.535986452159187e-05, + "loss": 2.2552, "step": 445 }, { "epoch": 0.08, - "grad_norm": 0.1884765625, - "learning_rate": 0.00016697588126159555, - "loss": 2.2086, + "grad_norm": 0.205078125, + "learning_rate": 7.620660457239628e-05, + "loss": 2.183, "step": 450 }, { "epoch": 0.08, - "grad_norm": 0.185546875, - "learning_rate": 0.00016883116883116884, - "loss": 2.2486, + "grad_norm": 0.220703125, + "learning_rate": 7.705334462320069e-05, + "loss": 2.2255, "step": 455 }, { - "epoch": 0.09, - "grad_norm": 0.1923828125, - "learning_rate": 0.00017068645640074212, - "loss": 2.2295, + "epoch": 0.08, + "grad_norm": 0.2001953125, + "learning_rate": 7.790008467400508e-05, + "loss": 2.2245, "step": 460 }, { - "epoch": 0.09, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001725417439703154, - "loss": 2.2318, + "epoch": 0.08, + "grad_norm": 0.2275390625, + "learning_rate": 7.874682472480949e-05, + "loss": 2.2154, "step": 465 }, { - "epoch": 0.09, - "grad_norm": 0.1865234375, - "learning_rate": 0.0001743970315398887, - "loss": 2.2289, + "epoch": 0.08, + "grad_norm": 0.203125, + "learning_rate": 7.95935647756139e-05, + "loss": 2.2057, "step": 470 }, { - "epoch": 0.09, - "grad_norm": 0.1953125, - "learning_rate": 0.00017625231910946198, - "loss": 2.2378, + "epoch": 0.08, + "grad_norm": 0.2001953125, + "learning_rate": 8.04403048264183e-05, + "loss": 2.2309, "step": 475 }, { - "epoch": 0.09, - "grad_norm": 0.18359375, - "learning_rate": 0.00017810760667903526, - "loss": 2.1831, + "epoch": 0.08, + "grad_norm": 0.2041015625, + "learning_rate": 8.12870448772227e-05, + "loss": 2.2176, "step": 480 }, { - "epoch": 0.09, - "grad_norm": 0.2041015625, - "learning_rate": 0.00017996289424860855, - "loss": 2.2105, + "epoch": 0.08, + "grad_norm": 0.2060546875, + "learning_rate": 8.21337849280271e-05, + "loss": 2.2527, "step": 485 }, { - "epoch": 0.09, - "grad_norm": 0.1845703125, - "learning_rate": 0.00018181818181818183, - "loss": 2.2156, + "epoch": 0.08, + "grad_norm": 0.205078125, + "learning_rate": 8.298052497883149e-05, + "loss": 2.2488, "step": 490 }, { - "epoch": 0.09, - "grad_norm": 0.1884765625, - "learning_rate": 0.00018367346938775512, - "loss": 2.2045, + "epoch": 0.08, + "grad_norm": 0.21484375, + "learning_rate": 8.38272650296359e-05, + "loss": 2.2384, "step": 495 }, { - "epoch": 0.09, - "grad_norm": 0.197265625, - "learning_rate": 0.00018552875695732838, - "loss": 2.2298, + "epoch": 0.08, + "grad_norm": 0.203125, + "learning_rate": 8.467400508044031e-05, + "loss": 2.246, "step": 500 }, { "epoch": 0.09, - "grad_norm": 0.1884765625, - "learning_rate": 0.0001873840445269017, - "loss": 2.2342, + "grad_norm": 0.2060546875, + "learning_rate": 8.552074513124472e-05, + "loss": 2.2167, "step": 505 }, { "epoch": 0.09, - "grad_norm": 0.1953125, - "learning_rate": 0.00018923933209647495, - "loss": 2.2121, + "grad_norm": 0.2109375, + "learning_rate": 8.636748518204911e-05, + "loss": 2.2183, "step": 510 }, { - "epoch": 0.1, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019109461966604823, - "loss": 2.2169, + "epoch": 0.09, + "grad_norm": 0.208984375, + "learning_rate": 8.721422523285352e-05, + "loss": 2.2027, "step": 515 }, { - "epoch": 0.1, - "grad_norm": 0.1796875, - "learning_rate": 0.00019294990723562155, - "loss": 2.2174, + "epoch": 0.09, + "grad_norm": 0.251953125, + "learning_rate": 8.806096528365793e-05, + "loss": 2.1894, "step": 520 }, { - "epoch": 0.1, - "grad_norm": 0.1826171875, - "learning_rate": 0.0001948051948051948, - "loss": 2.2328, + "epoch": 0.09, + "grad_norm": 0.2080078125, + "learning_rate": 8.890770533446233e-05, + "loss": 2.2201, "step": 525 }, { - "epoch": 0.1, - "grad_norm": 0.18359375, - "learning_rate": 0.00019666048237476812, - "loss": 2.2081, + "epoch": 0.09, + "grad_norm": 0.2138671875, + "learning_rate": 8.975444538526672e-05, + "loss": 2.2116, "step": 530 }, { - "epoch": 0.1, - "grad_norm": 0.189453125, - "learning_rate": 0.00019851576994434138, - "loss": 2.2411, + "epoch": 0.09, + "grad_norm": 0.2041015625, + "learning_rate": 9.060118543607113e-05, + "loss": 2.2211, "step": 535 }, { - "epoch": 0.1, - "grad_norm": 0.173828125, - "learning_rate": 0.00020037105751391466, - "loss": 2.208, + "epoch": 0.09, + "grad_norm": 0.203125, + "learning_rate": 9.144792548687552e-05, + "loss": 2.2107, "step": 540 }, { - "epoch": 0.1, - "grad_norm": 0.1796875, - "learning_rate": 0.00020222634508348795, - "loss": 2.1872, + "epoch": 0.09, + "grad_norm": 0.2041015625, + "learning_rate": 9.229466553767993e-05, + "loss": 2.2446, "step": 545 }, { - "epoch": 0.1, - "grad_norm": 0.185546875, - "learning_rate": 0.00020408163265306123, - "loss": 2.1829, + "epoch": 0.09, + "grad_norm": 0.2080078125, + "learning_rate": 9.314140558848434e-05, + "loss": 2.2244, "step": 550 }, { - "epoch": 0.1, - "grad_norm": 0.197265625, - "learning_rate": 0.00020593692022263454, - "loss": 2.2194, + "epoch": 0.09, + "grad_norm": 0.208984375, + "learning_rate": 9.398814563928874e-05, + "loss": 2.2381, "step": 555 }, { - "epoch": 0.1, - "grad_norm": 0.185546875, - "learning_rate": 0.00020779220779220778, - "loss": 2.2283, + "epoch": 0.09, + "grad_norm": 0.2099609375, + "learning_rate": 9.483488569009315e-05, + "loss": 2.2091, "step": 560 }, { "epoch": 0.1, - "grad_norm": 0.1884765625, - "learning_rate": 0.0002096474953617811, - "loss": 2.2119, + "grad_norm": 0.2236328125, + "learning_rate": 9.568162574089755e-05, + "loss": 2.2132, "step": 565 }, { - "epoch": 0.11, - "grad_norm": 0.1826171875, - "learning_rate": 0.00021150278293135437, - "loss": 2.2196, + "epoch": 0.1, + "grad_norm": 0.205078125, + "learning_rate": 9.652836579170195e-05, + "loss": 2.2323, "step": 570 }, { - "epoch": 0.11, - "grad_norm": 0.189453125, - "learning_rate": 0.00021335807050092766, - "loss": 2.2378, + "epoch": 0.1, + "grad_norm": 0.2138671875, + "learning_rate": 9.737510584250636e-05, + "loss": 2.2082, "step": 575 }, { - "epoch": 0.11, - "grad_norm": 0.1845703125, - "learning_rate": 0.00021521335807050097, - "loss": 2.2361, + "epoch": 0.1, + "grad_norm": 0.2099609375, + "learning_rate": 9.822184589331075e-05, + "loss": 2.2041, "step": 580 }, { - "epoch": 0.11, - "grad_norm": 0.185546875, - "learning_rate": 0.0002170686456400742, - "loss": 2.2253, + "epoch": 0.1, + "grad_norm": 0.2138671875, + "learning_rate": 9.906858594411516e-05, + "loss": 2.2193, "step": 585 }, { - "epoch": 0.11, - "grad_norm": 0.173828125, - "learning_rate": 0.00021892393320964752, - "loss": 2.2111, + "epoch": 0.1, + "grad_norm": 0.203125, + "learning_rate": 9.991532599491956e-05, + "loss": 2.2349, "step": 590 }, { - "epoch": 0.11, - "grad_norm": 0.1796875, - "learning_rate": 0.0002207792207792208, - "loss": 2.2417, + "epoch": 0.1, + "grad_norm": 0.201171875, + "learning_rate": 0.00010076206604572395, + "loss": 2.2278, "step": 595 }, { - "epoch": 0.11, - "grad_norm": 0.1806640625, - "learning_rate": 0.00022263450834879409, - "loss": 2.1814, + "epoch": 0.1, + "grad_norm": 0.2138671875, + "learning_rate": 0.00010160880609652836, + "loss": 2.2127, "step": 600 }, { - "epoch": 0.11, - "grad_norm": 0.177734375, - "learning_rate": 0.00022448979591836734, - "loss": 2.248, + "epoch": 0.1, + "grad_norm": 0.21875, + "learning_rate": 0.00010245554614733277, + "loss": 2.2367, "step": 605 }, { - "epoch": 0.11, - "grad_norm": 0.177734375, - "learning_rate": 0.00022634508348794063, - "loss": 2.191, + "epoch": 0.1, + "grad_norm": 0.2119140625, + "learning_rate": 0.00010330228619813718, + "loss": 2.2172, "step": 610 }, { - "epoch": 0.11, - "grad_norm": 0.1787109375, - "learning_rate": 0.00022820037105751392, - "loss": 2.2258, + "epoch": 0.1, + "grad_norm": 0.2197265625, + "learning_rate": 0.00010414902624894159, + "loss": 2.222, "step": 615 }, { - "epoch": 0.12, - "grad_norm": 0.181640625, - "learning_rate": 0.00023005565862708723, - "loss": 2.226, + "epoch": 0.11, + "grad_norm": 0.1982421875, + "learning_rate": 0.000104995766299746, + "loss": 2.1955, "step": 620 }, { - "epoch": 0.12, - "grad_norm": 0.189453125, - "learning_rate": 0.00023191094619666046, - "loss": 2.231, + "epoch": 0.11, + "grad_norm": 0.203125, + "learning_rate": 0.00010584250635055039, + "loss": 2.2568, "step": 625 }, { - "epoch": 0.12, - "grad_norm": 0.1826171875, - "learning_rate": 0.00023376623376623377, - "loss": 2.2119, + "epoch": 0.11, + "grad_norm": 0.21484375, + "learning_rate": 0.00010668924640135479, + "loss": 2.2051, "step": 630 }, { - "epoch": 0.12, - "grad_norm": 0.173828125, - "learning_rate": 0.00023562152133580706, - "loss": 2.2206, + "epoch": 0.11, + "grad_norm": 0.2158203125, + "learning_rate": 0.00010753598645215918, + "loss": 2.2461, "step": 635 }, { - "epoch": 0.12, - "grad_norm": 0.177734375, - "learning_rate": 0.00023747680890538034, - "loss": 2.2083, + "epoch": 0.11, + "grad_norm": 0.208984375, + "learning_rate": 0.00010838272650296359, + "loss": 2.2322, "step": 640 }, { - "epoch": 0.12, - "grad_norm": 0.1748046875, - "learning_rate": 0.00023933209647495365, - "loss": 2.2206, + "epoch": 0.11, + "grad_norm": 0.2138671875, + "learning_rate": 0.000109229466553768, + "loss": 2.2358, "step": 645 }, { - "epoch": 0.12, - "grad_norm": 0.1728515625, - "learning_rate": 0.00024118738404452689, - "loss": 2.1931, + "epoch": 0.11, + "grad_norm": 0.2001953125, + "learning_rate": 0.00011007620660457241, + "loss": 2.1646, "step": 650 }, { - "epoch": 0.12, - "grad_norm": 0.1767578125, - "learning_rate": 0.0002430426716141002, - "loss": 2.1753, + "epoch": 0.11, + "grad_norm": 0.2216796875, + "learning_rate": 0.00011092294665537682, + "loss": 2.2039, "step": 655 }, { - "epoch": 0.12, - "grad_norm": 0.1796875, - "learning_rate": 0.0002448979591836735, - "loss": 2.1885, + "epoch": 0.11, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001117696867061812, + "loss": 2.1748, "step": 660 }, { - "epoch": 0.12, - "grad_norm": 0.181640625, - "learning_rate": 0.0002467532467532468, - "loss": 2.2343, + "epoch": 0.11, + "grad_norm": 0.205078125, + "learning_rate": 0.0001126164267569856, + "loss": 2.249, "step": 665 }, { - "epoch": 0.12, - "grad_norm": 0.1865234375, - "learning_rate": 0.00024860853432282003, - "loss": 2.2425, + "epoch": 0.11, + "grad_norm": 0.201171875, + "learning_rate": 0.00011346316680779001, + "loss": 2.2732, "step": 670 }, { - "epoch": 0.13, - "grad_norm": 0.1796875, - "learning_rate": 0.0002504638218923933, - "loss": 2.1929, + "epoch": 0.11, + "grad_norm": 0.2109375, + "learning_rate": 0.00011430990685859442, + "loss": 2.2496, "step": 675 }, { - "epoch": 0.13, - "grad_norm": 0.1767578125, - "learning_rate": 0.0002523191094619666, - "loss": 2.2569, + "epoch": 0.12, + "grad_norm": 0.201171875, + "learning_rate": 0.00011515664690939882, + "loss": 2.2412, "step": 680 }, { - "epoch": 0.13, - "grad_norm": 0.173828125, - "learning_rate": 0.00025417439703153994, - "loss": 2.2311, + "epoch": 0.12, + "grad_norm": 0.1982421875, + "learning_rate": 0.00011600338696020323, + "loss": 2.1998, "step": 685 }, { - "epoch": 0.13, - "grad_norm": 0.181640625, - "learning_rate": 0.0002560296846011132, - "loss": 2.2218, + "epoch": 0.12, + "grad_norm": 0.2099609375, + "learning_rate": 0.00011685012701100762, + "loss": 2.202, "step": 690 }, { - "epoch": 0.13, - "grad_norm": 0.1708984375, - "learning_rate": 0.00025788497217068645, - "loss": 2.2148, + "epoch": 0.12, + "grad_norm": 0.2001953125, + "learning_rate": 0.00011769686706181202, + "loss": 2.208, "step": 695 }, { - "epoch": 0.13, - "grad_norm": 0.1796875, - "learning_rate": 0.00025974025974025974, - "loss": 2.1973, + "epoch": 0.12, + "grad_norm": 0.2021484375, + "learning_rate": 0.00011854360711261643, + "loss": 2.212, "step": 700 }, { - "epoch": 0.13, - "grad_norm": 0.1826171875, - "learning_rate": 0.000261595547309833, - "loss": 2.197, + "epoch": 0.12, + "grad_norm": 0.2041015625, + "learning_rate": 0.00011939034716342083, + "loss": 2.2409, "step": 705 }, { - "epoch": 0.13, - "grad_norm": 0.1796875, - "learning_rate": 0.00026345083487940637, - "loss": 2.2127, + "epoch": 0.12, + "grad_norm": 0.19921875, + "learning_rate": 0.00012023708721422524, + "loss": 2.2069, "step": 710 }, { - "epoch": 0.13, - "grad_norm": 0.169921875, - "learning_rate": 0.0002653061224489796, - "loss": 2.1964, + "epoch": 0.12, + "grad_norm": 0.2197265625, + "learning_rate": 0.00012108382726502965, + "loss": 2.2205, "step": 715 }, { - "epoch": 0.13, - "grad_norm": 0.171875, - "learning_rate": 0.0002671614100185529, - "loss": 2.2008, + "epoch": 0.12, + "grad_norm": 0.1982421875, + "learning_rate": 0.00012193056731583406, + "loss": 2.2525, "step": 720 }, { - "epoch": 0.13, - "grad_norm": 0.185546875, - "learning_rate": 0.00026901669758812617, - "loss": 2.1945, + "epoch": 0.12, + "grad_norm": 0.20703125, + "learning_rate": 0.00012277730736663843, + "loss": 2.2261, "step": 725 }, { - "epoch": 0.14, - "grad_norm": 0.173828125, - "learning_rate": 0.00027087198515769945, - "loss": 2.1954, + "epoch": 0.12, + "grad_norm": 0.1962890625, + "learning_rate": 0.00012362404741744285, + "loss": 2.1868, "step": 730 }, { - "epoch": 0.14, - "grad_norm": 0.1748046875, - "learning_rate": 0.00027272727272727274, - "loss": 2.2016, + "epoch": 0.12, + "grad_norm": 0.2041015625, + "learning_rate": 0.00012447078746824725, + "loss": 2.2363, "step": 735 }, { - "epoch": 0.14, - "grad_norm": 0.1787109375, - "learning_rate": 0.000274582560296846, - "loss": 2.257, + "epoch": 0.13, + "grad_norm": 0.220703125, + "learning_rate": 0.00012531752751905167, + "loss": 2.1955, "step": 740 }, { - "epoch": 0.14, - "grad_norm": 0.1767578125, - "learning_rate": 0.0002764378478664193, - "loss": 2.2091, + "epoch": 0.13, + "grad_norm": 0.2021484375, + "learning_rate": 0.00012616426756985606, + "loss": 2.1964, "step": 745 }, { - "epoch": 0.14, - "grad_norm": 0.173828125, - "learning_rate": 0.0002782931354359926, - "loss": 2.2242, + "epoch": 0.13, + "grad_norm": 0.1982421875, + "learning_rate": 0.00012701100762066049, + "loss": 2.2456, "step": 750 }, { - "epoch": 0.14, - "grad_norm": 0.1748046875, - "learning_rate": 0.0002801484230055659, - "loss": 2.2002, + "epoch": 0.13, + "grad_norm": 0.2041015625, + "learning_rate": 0.00012785774767146485, + "loss": 2.1887, "step": 755 }, { - "epoch": 0.14, - "grad_norm": 0.1767578125, - "learning_rate": 0.00028200371057513916, - "loss": 2.2441, + "epoch": 0.13, + "grad_norm": 0.21484375, + "learning_rate": 0.00012870448772226928, + "loss": 2.2023, "step": 760 }, { - "epoch": 0.14, - "grad_norm": 0.171875, - "learning_rate": 0.00028385899814471245, - "loss": 2.1937, + "epoch": 0.13, + "grad_norm": 0.197265625, + "learning_rate": 0.00012955122777307367, + "loss": 2.21, "step": 765 }, { - "epoch": 0.14, - "grad_norm": 0.173828125, - "learning_rate": 0.00028571428571428574, - "loss": 2.2132, + "epoch": 0.13, + "grad_norm": 0.20703125, + "learning_rate": 0.00013039796782387807, + "loss": 2.1852, "step": 770 }, { - "epoch": 0.14, - "grad_norm": 0.1708984375, - "learning_rate": 0.000287569573283859, - "loss": 2.2231, + "epoch": 0.13, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001312447078746825, + "loss": 2.1978, "step": 775 }, { - "epoch": 0.14, - "grad_norm": 0.177734375, - "learning_rate": 0.00028942486085343225, - "loss": 2.2246, + "epoch": 0.13, + "grad_norm": 0.197265625, + "learning_rate": 0.00013209144792548688, + "loss": 2.2205, "step": 780 }, { - "epoch": 0.15, - "grad_norm": 0.1787109375, - "learning_rate": 0.0002912801484230056, - "loss": 2.2031, + "epoch": 0.13, + "grad_norm": 0.2099609375, + "learning_rate": 0.0001329381879762913, + "loss": 2.179, "step": 785 }, { - "epoch": 0.15, - "grad_norm": 0.1728515625, - "learning_rate": 0.0002931354359925789, - "loss": 2.2084, + "epoch": 0.13, + "grad_norm": 0.203125, + "learning_rate": 0.00013378492802709567, + "loss": 2.2183, "step": 790 }, { - "epoch": 0.15, - "grad_norm": 0.171875, - "learning_rate": 0.00029499072356215216, - "loss": 2.2021, + "epoch": 0.13, + "grad_norm": 0.203125, + "learning_rate": 0.0001346316680779001, + "loss": 2.1874, "step": 795 }, { - "epoch": 0.15, - "grad_norm": 0.1748046875, - "learning_rate": 0.0002968460111317254, - "loss": 2.1771, + "epoch": 0.14, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001354784081287045, + "loss": 2.2123, "step": 800 }, { - "epoch": 0.15, - "grad_norm": 0.1796875, - "learning_rate": 0.0002987012987012987, - "loss": 2.2068, + "epoch": 0.14, + "grad_norm": 0.205078125, + "learning_rate": 0.0001363251481795089, + "loss": 2.2362, "step": 805 }, { - "epoch": 0.15, - "grad_norm": 0.1689453125, - "learning_rate": 0.000300556586270872, - "loss": 2.1828, + "epoch": 0.14, + "grad_norm": 0.201171875, + "learning_rate": 0.0001371718882303133, + "loss": 2.2316, "step": 810 }, { - "epoch": 0.15, - "grad_norm": 0.1708984375, - "learning_rate": 0.0003024118738404453, - "loss": 2.2225, + "epoch": 0.14, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001380186282811177, + "loss": 2.1873, "step": 815 }, { - "epoch": 0.15, - "grad_norm": 0.1767578125, - "learning_rate": 0.0003042671614100186, - "loss": 2.2214, + "epoch": 0.14, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001388653683319221, + "loss": 2.2417, "step": 820 }, { - "epoch": 0.15, - "grad_norm": 0.1689453125, - "learning_rate": 0.0003061224489795918, - "loss": 2.2285, + "epoch": 0.14, + "grad_norm": 0.1953125, + "learning_rate": 0.0001397121083827265, + "loss": 2.1944, "step": 825 }, { - "epoch": 0.15, - "grad_norm": 0.1875, - "learning_rate": 0.0003079777365491651, - "loss": 2.1673, + "epoch": 0.14, + "grad_norm": 0.1982421875, + "learning_rate": 0.00014055884843353092, + "loss": 2.1983, "step": 830 }, { - "epoch": 0.15, - "grad_norm": 0.173828125, - "learning_rate": 0.00030983302411873845, - "loss": 2.2316, + "epoch": 0.14, + "grad_norm": 0.1953125, + "learning_rate": 0.0001414055884843353, + "loss": 2.2246, "step": 835 }, { - "epoch": 0.16, - "grad_norm": 0.1787109375, - "learning_rate": 0.00031168831168831173, - "loss": 2.211, + "epoch": 0.14, + "grad_norm": 0.193359375, + "learning_rate": 0.00014225232853513973, + "loss": 2.2173, "step": 840 }, { - "epoch": 0.16, - "grad_norm": 0.1875, - "learning_rate": 0.00031354359925788496, - "loss": 2.2433, + "epoch": 0.14, + "grad_norm": 0.201171875, + "learning_rate": 0.00014309906858594413, + "loss": 2.1354, "step": 845 }, { - "epoch": 0.16, - "grad_norm": 0.1728515625, - "learning_rate": 0.00031539888682745825, - "loss": 2.2117, + "epoch": 0.14, + "grad_norm": 0.2021484375, + "learning_rate": 0.00014394580863674852, + "loss": 2.2328, "step": 850 }, { - "epoch": 0.16, - "grad_norm": 0.1748046875, - "learning_rate": 0.00031725417439703153, - "loss": 2.2104, + "epoch": 0.14, + "grad_norm": 0.1953125, + "learning_rate": 0.00014479254868755292, + "loss": 2.215, "step": 855 }, { - "epoch": 0.16, - "grad_norm": 0.181640625, - "learning_rate": 0.0003191094619666049, - "loss": 2.2543, + "epoch": 0.15, + "grad_norm": 0.2060546875, + "learning_rate": 0.00014563928873835734, + "loss": 2.214, "step": 860 }, { - "epoch": 0.16, - "grad_norm": 0.1728515625, - "learning_rate": 0.00032096474953617816, - "loss": 2.202, + "epoch": 0.15, + "grad_norm": 0.212890625, + "learning_rate": 0.00014648602878916173, + "loss": 2.1958, "step": 865 }, { - "epoch": 0.16, - "grad_norm": 0.1748046875, - "learning_rate": 0.0003228200371057514, - "loss": 2.2002, + "epoch": 0.15, + "grad_norm": 0.2041015625, + "learning_rate": 0.00014733276883996613, + "loss": 2.1809, "step": 870 }, { - "epoch": 0.16, - "grad_norm": 0.169921875, - "learning_rate": 0.0003246753246753247, - "loss": 2.2332, + "epoch": 0.15, + "grad_norm": 0.2001953125, + "learning_rate": 0.00014817950889077055, + "loss": 2.264, "step": 875 }, { - "epoch": 0.16, - "grad_norm": 0.1767578125, - "learning_rate": 0.00032653061224489796, - "loss": 2.1782, + "epoch": 0.15, + "grad_norm": 0.19921875, + "learning_rate": 0.00014902624894157495, + "loss": 2.2147, "step": 880 }, { - "epoch": 0.16, - "grad_norm": 0.169921875, - "learning_rate": 0.0003283858998144713, - "loss": 2.2428, + "epoch": 0.15, + "grad_norm": 0.193359375, + "learning_rate": 0.00014987298899237934, + "loss": 2.1813, "step": 885 }, { - "epoch": 0.17, - "grad_norm": 0.171875, - "learning_rate": 0.00033024118738404453, - "loss": 2.2064, + "epoch": 0.15, + "grad_norm": 0.2001953125, + "learning_rate": 0.00015071972904318374, + "loss": 2.1541, "step": 890 }, { - "epoch": 0.17, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003320964749536178, - "loss": 2.2444, + "epoch": 0.15, + "grad_norm": 0.205078125, + "learning_rate": 0.00015156646909398816, + "loss": 2.1661, "step": 895 }, { - "epoch": 0.17, - "grad_norm": 0.181640625, - "learning_rate": 0.0003339517625231911, - "loss": 2.2078, + "epoch": 0.15, + "grad_norm": 0.2060546875, + "learning_rate": 0.00015241320914479255, + "loss": 2.2126, "step": 900 }, { - "epoch": 0.17, - "grad_norm": 0.169921875, - "learning_rate": 0.0003358070500927644, - "loss": 2.2068, + "epoch": 0.15, + "grad_norm": 0.19921875, + "learning_rate": 0.00015325994919559695, + "loss": 2.192, "step": 905 }, { - "epoch": 0.17, - "grad_norm": 0.1796875, - "learning_rate": 0.00033766233766233767, - "loss": 2.1913, + "epoch": 0.15, + "grad_norm": 0.1982421875, + "learning_rate": 0.00015410668924640137, + "loss": 2.2546, "step": 910 }, { - "epoch": 0.17, - "grad_norm": 0.1767578125, - "learning_rate": 0.00033951762523191096, - "loss": 2.2534, + "epoch": 0.15, + "grad_norm": 0.197265625, + "learning_rate": 0.00015495342929720577, + "loss": 2.1839, "step": 915 }, { - "epoch": 0.17, - "grad_norm": 0.17578125, - "learning_rate": 0.00034137291280148424, - "loss": 2.1718, + "epoch": 0.16, + "grad_norm": 0.2041015625, + "learning_rate": 0.00015580016934801016, + "loss": 2.2013, "step": 920 }, { - "epoch": 0.17, - "grad_norm": 0.1796875, - "learning_rate": 0.00034322820037105753, - "loss": 2.2004, + "epoch": 0.16, + "grad_norm": 0.19140625, + "learning_rate": 0.00015664690939881456, + "loss": 2.2374, "step": 925 }, { - "epoch": 0.17, - "grad_norm": 0.16796875, - "learning_rate": 0.0003450834879406308, - "loss": 2.2119, + "epoch": 0.16, + "grad_norm": 0.19921875, + "learning_rate": 0.00015749364944961898, + "loss": 2.1984, "step": 930 }, { - "epoch": 0.17, - "grad_norm": 0.16796875, - "learning_rate": 0.0003469387755102041, - "loss": 2.2186, + "epoch": 0.16, + "grad_norm": 0.19921875, + "learning_rate": 0.00015834038950042337, + "loss": 2.171, "step": 935 }, { - "epoch": 0.17, - "grad_norm": 0.1787109375, - "learning_rate": 0.0003487940630797774, - "loss": 2.2306, + "epoch": 0.16, + "grad_norm": 0.19140625, + "learning_rate": 0.0001591871295512278, + "loss": 2.199, "step": 940 }, { - "epoch": 0.18, - "grad_norm": 0.173828125, - "learning_rate": 0.00035064935064935067, - "loss": 2.234, + "epoch": 0.16, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001600338696020322, + "loss": 2.2011, "step": 945 }, { - "epoch": 0.18, - "grad_norm": 0.1748046875, - "learning_rate": 0.00035250463821892396, - "loss": 2.1975, + "epoch": 0.16, + "grad_norm": 0.2109375, + "learning_rate": 0.0001608806096528366, + "loss": 2.2099, "step": 950 }, { - "epoch": 0.18, - "grad_norm": 0.177734375, - "learning_rate": 0.00035435992578849724, - "loss": 2.2149, + "epoch": 0.16, + "grad_norm": 0.1982421875, + "learning_rate": 0.00016172734970364098, + "loss": 2.203, "step": 955 }, { - "epoch": 0.18, - "grad_norm": 0.17578125, - "learning_rate": 0.0003562152133580705, - "loss": 2.1836, + "epoch": 0.16, + "grad_norm": 0.197265625, + "learning_rate": 0.0001625740897544454, + "loss": 2.2233, "step": 960 }, { - "epoch": 0.18, - "grad_norm": 0.1728515625, - "learning_rate": 0.0003580705009276438, - "loss": 2.1872, + "epoch": 0.16, + "grad_norm": 0.19921875, + "learning_rate": 0.0001634208298052498, + "loss": 2.2154, "step": 965 }, { - "epoch": 0.18, - "grad_norm": 0.17578125, - "learning_rate": 0.0003599257884972171, - "loss": 2.1991, + "epoch": 0.16, + "grad_norm": 0.197265625, + "learning_rate": 0.0001642675698560542, + "loss": 2.1916, "step": 970 }, { - "epoch": 0.18, - "grad_norm": 0.181640625, - "learning_rate": 0.0003617810760667904, - "loss": 2.2465, + "epoch": 0.17, + "grad_norm": 0.193359375, + "learning_rate": 0.00016511430990685862, + "loss": 2.2565, "step": 975 }, { - "epoch": 0.18, - "grad_norm": 0.166015625, - "learning_rate": 0.00036363636363636367, - "loss": 2.2186, + "epoch": 0.17, + "grad_norm": 0.1875, + "learning_rate": 0.00016596104995766298, + "loss": 2.2449, "step": 980 }, { - "epoch": 0.18, - "grad_norm": 0.1728515625, - "learning_rate": 0.00036549165120593695, - "loss": 2.2318, + "epoch": 0.17, + "grad_norm": 0.19140625, + "learning_rate": 0.0001668077900084674, + "loss": 2.2073, "step": 985 }, { - "epoch": 0.18, - "grad_norm": 0.173828125, - "learning_rate": 0.00036734693877551024, - "loss": 2.1858, + "epoch": 0.17, + "grad_norm": 0.1884765625, + "learning_rate": 0.0001676545300592718, + "loss": 2.2011, "step": 990 }, { - "epoch": 0.18, - "grad_norm": 0.169921875, - "learning_rate": 0.0003692022263450835, - "loss": 2.1947, + "epoch": 0.17, + "grad_norm": 0.193359375, + "learning_rate": 0.00016850127011007622, + "loss": 2.2025, "step": 995 }, { - "epoch": 0.19, - "grad_norm": 0.1787109375, - "learning_rate": 0.00037105751391465676, - "loss": 2.2203, + "epoch": 0.17, + "grad_norm": 0.1865234375, + "learning_rate": 0.00016934801016088062, + "loss": 2.2179, "step": 1000 }, { - "epoch": 0.19, - "grad_norm": 0.1884765625, - "learning_rate": 0.00037291280148423004, - "loss": 2.2217, + "epoch": 0.17, + "grad_norm": 0.197265625, + "learning_rate": 0.00017019475021168501, + "loss": 2.2083, "step": 1005 }, { - "epoch": 0.19, - "grad_norm": 0.1826171875, - "learning_rate": 0.0003747680890538034, - "loss": 2.2001, + "epoch": 0.17, + "grad_norm": 0.1865234375, + "learning_rate": 0.00017104149026248944, + "loss": 2.1914, "step": 1010 }, { - "epoch": 0.19, - "grad_norm": 0.1748046875, - "learning_rate": 0.00037662337662337667, - "loss": 2.2013, + "epoch": 0.17, + "grad_norm": 0.1962890625, + "learning_rate": 0.00017188823031329383, + "loss": 2.2375, "step": 1015 }, { - "epoch": 0.19, - "grad_norm": 0.1748046875, - "learning_rate": 0.0003784786641929499, - "loss": 2.2198, + "epoch": 0.17, + "grad_norm": 0.193359375, + "learning_rate": 0.00017273497036409823, + "loss": 2.2289, "step": 1020 }, { - "epoch": 0.19, - "grad_norm": 0.1767578125, - "learning_rate": 0.0003803339517625232, - "loss": 2.2236, + "epoch": 0.17, + "grad_norm": 0.1904296875, + "learning_rate": 0.00017358171041490262, + "loss": 2.2088, "step": 1025 }, { - "epoch": 0.19, - "grad_norm": 0.1787109375, - "learning_rate": 0.00038218923933209647, - "loss": 2.2227, + "epoch": 0.17, + "grad_norm": 0.193359375, + "learning_rate": 0.00017442845046570704, + "loss": 2.1675, "step": 1030 }, { - "epoch": 0.19, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003840445269016698, - "loss": 2.2275, + "epoch": 0.18, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017527519051651144, + "loss": 2.2139, "step": 1035 }, { - "epoch": 0.19, - "grad_norm": 0.1796875, - "learning_rate": 0.0003858998144712431, - "loss": 2.2241, + "epoch": 0.18, + "grad_norm": 0.1845703125, + "learning_rate": 0.00017612193056731586, + "loss": 2.1872, "step": 1040 }, { - "epoch": 0.19, - "grad_norm": 0.1708984375, - "learning_rate": 0.0003877551020408163, - "loss": 2.2066, + "epoch": 0.18, + "grad_norm": 0.189453125, + "learning_rate": 0.00017696867061812023, + "loss": 2.1982, "step": 1045 }, { - "epoch": 0.19, - "grad_norm": 0.1787109375, - "learning_rate": 0.0003896103896103896, - "loss": 2.1842, + "epoch": 0.18, + "grad_norm": 0.1953125, + "learning_rate": 0.00017781541066892465, + "loss": 2.2075, "step": 1050 }, { - "epoch": 0.2, - "grad_norm": 0.1748046875, - "learning_rate": 0.0003914656771799629, - "loss": 2.1279, + "epoch": 0.18, + "grad_norm": 0.19921875, + "learning_rate": 0.00017866215071972905, + "loss": 2.2447, "step": 1055 }, { - "epoch": 0.2, - "grad_norm": 0.1748046875, - "learning_rate": 0.00039332096474953624, - "loss": 2.1712, + "epoch": 0.18, + "grad_norm": 0.193359375, + "learning_rate": 0.00017950889077053344, + "loss": 2.1948, "step": 1060 }, { - "epoch": 0.2, - "grad_norm": 0.1904296875, - "learning_rate": 0.00039517625231910947, - "loss": 2.1972, - "step": 1065 + "epoch": 0.18, + "grad_norm": 0.205078125, + "learning_rate": 0.00018035563082133786, + "loss": 2.2087, + "step": 1065 }, { - "epoch": 0.2, - "grad_norm": 0.173828125, - "learning_rate": 0.00039703153988868275, - "loss": 2.2082, + "epoch": 0.18, + "grad_norm": 0.20703125, + "learning_rate": 0.00018120237087214226, + "loss": 2.2168, "step": 1070 }, { - "epoch": 0.2, - "grad_norm": 0.1767578125, - "learning_rate": 0.00039888682745825604, - "loss": 2.21, + "epoch": 0.18, + "grad_norm": 0.2080078125, + "learning_rate": 0.00018204911092294668, + "loss": 2.2122, "step": 1075 }, { - "epoch": 0.2, - "grad_norm": 0.1728515625, - "learning_rate": 0.0003999999580591546, - "loss": 2.2126, + "epoch": 0.18, + "grad_norm": 0.189453125, + "learning_rate": 0.00018289585097375105, + "loss": 2.2347, "step": 1080 }, { - "epoch": 0.2, - "grad_norm": 0.1806640625, - "learning_rate": 0.00039999948622484506, - "loss": 2.1857, + "epoch": 0.18, + "grad_norm": 0.1962890625, + "learning_rate": 0.00018374259102455547, + "loss": 2.2307, "step": 1085 }, { - "epoch": 0.2, - "grad_norm": 0.1748046875, - "learning_rate": 0.0003999984901314102, - "loss": 2.1972, + "epoch": 0.18, + "grad_norm": 0.18359375, + "learning_rate": 0.00018458933107535987, + "loss": 2.1836, "step": 1090 }, { - "epoch": 0.2, - "grad_norm": 0.17578125, - "learning_rate": 0.0003999969697814611, - "loss": 2.1948, + "epoch": 0.19, + "grad_norm": 0.1953125, + "learning_rate": 0.0001854360711261643, + "loss": 2.1929, "step": 1095 }, { - "epoch": 0.2, - "grad_norm": 0.1708984375, - "learning_rate": 0.00039999492517898294, - "loss": 2.2061, + "epoch": 0.19, + "grad_norm": 0.189453125, + "learning_rate": 0.00018628281117696868, + "loss": 2.1908, "step": 1100 }, { - "epoch": 0.21, - "grad_norm": 0.177734375, - "learning_rate": 0.00039999235632933523, - "loss": 2.2225, + "epoch": 0.19, + "grad_norm": 0.18359375, + "learning_rate": 0.00018712955122777308, + "loss": 2.2123, "step": 1105 }, { - "epoch": 0.21, - "grad_norm": 0.1748046875, - "learning_rate": 0.0003999892632392519, - "loss": 2.2043, + "epoch": 0.19, + "grad_norm": 0.1875, + "learning_rate": 0.00018797629127857747, + "loss": 2.1282, "step": 1110 }, { - "epoch": 0.21, - "grad_norm": 0.1728515625, - "learning_rate": 0.00039998564591684063, - "loss": 2.1753, + "epoch": 0.19, + "grad_norm": 0.1953125, + "learning_rate": 0.0001888230313293819, + "loss": 2.1387, "step": 1115 }, { - "epoch": 0.21, - "grad_norm": 0.1806640625, - "learning_rate": 0.00039998150437158366, - "loss": 2.1665, + "epoch": 0.19, + "grad_norm": 0.193359375, + "learning_rate": 0.0001896697713801863, + "loss": 2.2153, "step": 1120 }, { - "epoch": 0.21, - "grad_norm": 0.181640625, - "learning_rate": 0.00039997683861433723, - "loss": 2.2314, + "epoch": 0.19, + "grad_norm": 0.1875, + "learning_rate": 0.0001905165114309907, + "loss": 2.1932, "step": 1125 }, { - "epoch": 0.21, - "grad_norm": 0.1728515625, - "learning_rate": 0.0003999716486573316, - "loss": 2.1757, + "epoch": 0.19, + "grad_norm": 0.1923828125, + "learning_rate": 0.0001913632514817951, + "loss": 2.1695, "step": 1130 }, { - "epoch": 0.21, - "grad_norm": 0.1728515625, - "learning_rate": 0.0003999659345141714, - "loss": 2.2133, + "epoch": 0.19, + "grad_norm": 0.1865234375, + "learning_rate": 0.0001922099915325995, + "loss": 2.1855, "step": 1135 }, { - "epoch": 0.21, - "grad_norm": 0.18359375, - "learning_rate": 0.0003999596961998349, - "loss": 2.204, + "epoch": 0.19, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001930567315834039, + "loss": 2.2053, "step": 1140 }, { - "epoch": 0.21, - "grad_norm": 0.1826171875, - "learning_rate": 0.0003999529337306748, - "loss": 2.1927, + "epoch": 0.19, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001939034716342083, + "loss": 2.2082, "step": 1145 }, { - "epoch": 0.21, - "grad_norm": 0.1787109375, - "learning_rate": 0.0003999456471244174, - "loss": 2.2271, + "epoch": 0.19, + "grad_norm": 0.1962890625, + "learning_rate": 0.00019475021168501272, + "loss": 2.1713, "step": 1150 }, { - "epoch": 0.21, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039993783640016327, - "loss": 2.1898, + "epoch": 0.2, + "grad_norm": 0.1865234375, + "learning_rate": 0.0001955969517358171, + "loss": 2.1904, "step": 1155 }, { - "epoch": 0.22, - "grad_norm": 0.171875, - "learning_rate": 0.0003999295015783866, - "loss": 2.1957, + "epoch": 0.2, + "grad_norm": 0.1953125, + "learning_rate": 0.0001964436917866215, + "loss": 2.235, "step": 1160 }, { - "epoch": 0.22, - "grad_norm": 0.1806640625, - "learning_rate": 0.00039992064268093544, - "loss": 2.1883, + "epoch": 0.2, + "grad_norm": 0.1982421875, + "learning_rate": 0.00019729043183742593, + "loss": 2.1879, "step": 1165 }, { - "epoch": 0.22, - "grad_norm": 0.1767578125, - "learning_rate": 0.00039991125973103174, - "loss": 2.2178, + "epoch": 0.2, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019813717188823032, + "loss": 2.2022, "step": 1170 }, { - "epoch": 0.22, - "grad_norm": 0.1796875, - "learning_rate": 0.00039990135275327096, - "loss": 2.2018, + "epoch": 0.2, + "grad_norm": 0.1953125, + "learning_rate": 0.00019898391193903472, + "loss": 2.1715, "step": 1175 }, { - "epoch": 0.22, - "grad_norm": 0.1787109375, - "learning_rate": 0.0003998909217736223, - "loss": 2.2102, + "epoch": 0.2, + "grad_norm": 0.189453125, + "learning_rate": 0.00019983065198983911, + "loss": 2.2363, "step": 1180 }, { - "epoch": 0.22, - "grad_norm": 0.1845703125, - "learning_rate": 0.0003998799668194285, - "loss": 2.1726, + "epoch": 0.2, + "grad_norm": 0.197265625, + "learning_rate": 0.00019999993008532863, + "loss": 2.1869, "step": 1185 }, { - "epoch": 0.22, - "grad_norm": 0.1865234375, - "learning_rate": 0.0003998684879194059, - "loss": 2.2063, + "epoch": 0.2, + "grad_norm": 0.2001953125, + "learning_rate": 0.00019999964605714373, + "loss": 2.1716, "step": 1190 }, { - "epoch": 0.22, - "grad_norm": 0.1748046875, - "learning_rate": 0.0003998564851036441, - "loss": 2.2208, + "epoch": 0.2, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019999914354639845, + "loss": 2.1728, "step": 1195 }, { - "epoch": 0.22, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039984395840360603, - "loss": 2.1942, + "epoch": 0.2, + "grad_norm": 0.1845703125, + "learning_rate": 0.00019999842255419064, + "loss": 2.1702, "step": 1200 }, { - "epoch": 0.22, - "grad_norm": 0.1796875, - "learning_rate": 0.0003998309078521281, - "loss": 2.2295, + "epoch": 0.2, + "grad_norm": 0.1845703125, + "learning_rate": 0.0001999974830820956, + "loss": 2.1999, "step": 1205 }, { - "epoch": 0.22, - "grad_norm": 0.173828125, - "learning_rate": 0.00039981733348341966, - "loss": 2.1758, + "epoch": 0.2, + "grad_norm": 0.193359375, + "learning_rate": 0.00019999632513216587, + "loss": 2.2017, "step": 1210 }, { - "epoch": 0.23, - "grad_norm": 0.17578125, - "learning_rate": 0.00039980323533306327, - "loss": 2.2272, + "epoch": 0.21, + "grad_norm": 0.1865234375, + "learning_rate": 0.00019999494870693142, + "loss": 2.1771, "step": 1215 }, { - "epoch": 0.23, - "grad_norm": 0.1767578125, - "learning_rate": 0.00039978861343801446, - "loss": 2.1481, + "epoch": 0.21, + "grad_norm": 0.1875, + "learning_rate": 0.00019999335380939948, + "loss": 2.2625, "step": 1220 }, { - "epoch": 0.23, - "grad_norm": 0.1796875, - "learning_rate": 0.0003997734678366016, - "loss": 2.2496, + "epoch": 0.21, + "grad_norm": 0.1875, + "learning_rate": 0.00019999154044305465, + "loss": 2.1442, "step": 1225 }, { - "epoch": 0.23, - "grad_norm": 0.1708984375, - "learning_rate": 0.00039975779856852596, - "loss": 2.1578, + "epoch": 0.21, + "grad_norm": 0.1875, + "learning_rate": 0.00019998950861185885, + "loss": 2.1751, "step": 1230 }, { - "epoch": 0.23, - "grad_norm": 0.1787109375, - "learning_rate": 0.0003997416056748613, - "loss": 2.1974, + "epoch": 0.21, + "grad_norm": 0.185546875, + "learning_rate": 0.00019998725832025125, + "loss": 2.1923, "step": 1235 }, { - "epoch": 0.23, - "grad_norm": 0.177734375, - "learning_rate": 0.0003997248891980542, - "loss": 2.195, + "epoch": 0.21, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001999847895731484, + "loss": 2.1393, "step": 1240 }, { - "epoch": 0.23, - "grad_norm": 0.1806640625, - "learning_rate": 0.00039970764918192356, - "loss": 2.1602, + "epoch": 0.21, + "grad_norm": 0.1875, + "learning_rate": 0.0001999821023759441, + "loss": 2.1904, "step": 1245 }, { - "epoch": 0.23, - "grad_norm": 0.1806640625, - "learning_rate": 0.0003996898856716607, - "loss": 2.1616, + "epoch": 0.21, + "grad_norm": 0.1865234375, + "learning_rate": 0.00019997919673450938, + "loss": 2.1866, "step": 1250 }, { - "epoch": 0.23, - "grad_norm": 0.17578125, - "learning_rate": 0.00039967159871382915, - "loss": 2.2104, + "epoch": 0.21, + "grad_norm": 0.19140625, + "learning_rate": 0.00019997607265519264, + "loss": 2.1876, "step": 1255 }, { - "epoch": 0.23, - "grad_norm": 0.18359375, - "learning_rate": 0.0003996527883563645, - "loss": 2.2346, + "epoch": 0.21, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019997273014481942, + "loss": 2.1773, "step": 1260 }, { - "epoch": 0.23, - "grad_norm": 0.1767578125, - "learning_rate": 0.0003996334546485744, - "loss": 2.1678, + "epoch": 0.21, + "grad_norm": 0.185546875, + "learning_rate": 0.0001999691692106926, + "loss": 2.2087, "step": 1265 }, { - "epoch": 0.24, - "grad_norm": 0.1767578125, - "learning_rate": 0.00039961359764113845, - "loss": 2.2538, + "epoch": 0.22, + "grad_norm": 0.1826171875, + "learning_rate": 0.00019996538986059221, + "loss": 2.1703, "step": 1270 }, { - "epoch": 0.24, - "grad_norm": 0.171875, - "learning_rate": 0.00039959321738610777, - "loss": 2.1512, + "epoch": 0.22, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001999613921027755, + "loss": 2.1993, "step": 1275 }, { - "epoch": 0.24, - "grad_norm": 0.1796875, - "learning_rate": 0.0003995723139369052, - "loss": 2.1887, + "epoch": 0.22, + "grad_norm": 0.189453125, + "learning_rate": 0.0001999571759459769, + "loss": 2.1809, "step": 1280 }, { - "epoch": 0.24, - "grad_norm": 0.18359375, - "learning_rate": 0.00039955088734832485, - "loss": 2.2243, + "epoch": 0.22, + "grad_norm": 0.1875, + "learning_rate": 0.000199952741399408, + "loss": 2.1823, "step": 1285 }, { - "epoch": 0.24, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039952893767653257, - "loss": 2.182, + "epoch": 0.22, + "grad_norm": 0.18359375, + "learning_rate": 0.00019994808847275755, + "loss": 2.1903, "step": 1290 }, { - "epoch": 0.24, - "grad_norm": 0.173828125, - "learning_rate": 0.0003995064649790649, - "loss": 2.2166, + "epoch": 0.22, + "grad_norm": 0.185546875, + "learning_rate": 0.00019994321717619143, + "loss": 2.1749, "step": 1295 }, { - "epoch": 0.24, - "grad_norm": 0.1708984375, - "learning_rate": 0.00039948346931482963, - "loss": 2.2192, + "epoch": 0.22, + "grad_norm": 0.1845703125, + "learning_rate": 0.0001999381275203526, + "loss": 2.1858, "step": 1300 }, { - "epoch": 0.24, - "grad_norm": 0.17578125, - "learning_rate": 0.0003994599507441053, - "loss": 2.2279, + "epoch": 0.22, + "grad_norm": 0.185546875, + "learning_rate": 0.00019993281951636113, + "loss": 2.1789, "step": 1305 }, { - "epoch": 0.24, - "grad_norm": 0.1787109375, - "learning_rate": 0.00039943590932854124, - "loss": 2.1521, + "epoch": 0.22, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019992729317581408, + "loss": 2.1869, "step": 1310 }, { - "epoch": 0.24, - "grad_norm": 0.1806640625, - "learning_rate": 0.00039941134513115734, - "loss": 2.1969, + "epoch": 0.22, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019992154851078563, + "loss": 2.2076, "step": 1315 }, { - "epoch": 0.24, - "grad_norm": 0.1767578125, - "learning_rate": 0.00039938625821634365, - "loss": 2.1979, + "epoch": 0.22, + "grad_norm": 0.1865234375, + "learning_rate": 0.0001999155855338269, + "loss": 2.2026, "step": 1320 }, { - "epoch": 0.25, - "grad_norm": 0.18359375, - "learning_rate": 0.00039936064864986063, - "loss": 2.1844, + "epoch": 0.22, + "grad_norm": 0.193359375, + "learning_rate": 0.00019990940425796604, + "loss": 2.1573, "step": 1325 }, { - "epoch": 0.25, - "grad_norm": 0.1767578125, - "learning_rate": 0.00039933451649883865, - "loss": 2.2703, + "epoch": 0.23, + "grad_norm": 0.1865234375, + "learning_rate": 0.000199903004696708, + "loss": 2.1708, "step": 1330 }, { - "epoch": 0.25, - "grad_norm": 0.1787109375, - "learning_rate": 0.0003993078618317781, - "loss": 2.218, + "epoch": 0.23, + "grad_norm": 0.1865234375, + "learning_rate": 0.00019989638686403484, + "loss": 2.2057, "step": 1335 }, { - "epoch": 0.25, - "grad_norm": 0.1796875, - "learning_rate": 0.00039928068471854875, - "loss": 2.1867, + "epoch": 0.23, + "grad_norm": 0.1884765625, + "learning_rate": 0.0001998895507744054, + "loss": 2.201, "step": 1340 }, { - "epoch": 0.25, - "grad_norm": 0.1767578125, - "learning_rate": 0.00039925298523039017, - "loss": 2.1961, + "epoch": 0.23, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019988249644275542, + "loss": 2.2209, "step": 1345 }, { - "epoch": 0.25, - "grad_norm": 0.1787109375, - "learning_rate": 0.000399224763439911, - "loss": 2.1803, + "epoch": 0.23, + "grad_norm": 0.1796875, + "learning_rate": 0.0001998752238844974, + "loss": 2.2228, "step": 1350 }, { - "epoch": 0.25, - "grad_norm": 0.1787109375, - "learning_rate": 0.0003991960194210892, - "loss": 2.2029, + "epoch": 0.23, + "grad_norm": 0.189453125, + "learning_rate": 0.00019986773311552069, + "loss": 2.1913, "step": 1355 }, { - "epoch": 0.25, - "grad_norm": 0.1748046875, - "learning_rate": 0.0003991667532492714, - "loss": 2.1686, + "epoch": 0.23, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019986002415219137, + "loss": 2.1614, "step": 1360 }, { - "epoch": 0.25, - "grad_norm": 0.1787109375, - "learning_rate": 0.0003991369650011731, - "loss": 2.18, + "epoch": 0.23, + "grad_norm": 0.1875, + "learning_rate": 0.00019985209701135222, + "loss": 2.1918, "step": 1365 }, { - "epoch": 0.25, - "grad_norm": 0.1826171875, - "learning_rate": 0.0003991066547548785, - "loss": 2.1813, + "epoch": 0.23, + "grad_norm": 0.19140625, + "learning_rate": 0.00019984395171032278, + "loss": 2.1789, "step": 1370 }, { - "epoch": 0.26, - "grad_norm": 0.17578125, - "learning_rate": 0.00039907582258983965, - "loss": 2.1894, + "epoch": 0.23, + "grad_norm": 0.193359375, + "learning_rate": 0.0001998355882668991, + "loss": 2.1664, "step": 1375 }, { - "epoch": 0.26, - "grad_norm": 0.1767578125, - "learning_rate": 0.00039904446858687713, - "loss": 2.2217, + "epoch": 0.23, + "grad_norm": 0.19921875, + "learning_rate": 0.00019982700669935396, + "loss": 2.2059, "step": 1380 }, { - "epoch": 0.26, - "grad_norm": 0.1806640625, - "learning_rate": 0.0003990125928281793, - "loss": 2.1899, + "epoch": 0.23, + "grad_norm": 0.185546875, + "learning_rate": 0.00019981820702643662, + "loss": 2.1638, "step": 1385 }, { - "epoch": 0.26, - "grad_norm": 0.1767578125, - "learning_rate": 0.00039898019539730197, - "loss": 2.202, + "epoch": 0.24, + "grad_norm": 0.189453125, + "learning_rate": 0.00019980918926737294, + "loss": 2.2125, "step": 1390 }, { - "epoch": 0.26, - "grad_norm": 0.177734375, - "learning_rate": 0.0003989472763791688, - "loss": 2.1604, + "epoch": 0.24, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001997999534418652, + "loss": 2.1583, "step": 1395 }, { - "epoch": 0.26, - "grad_norm": 0.181640625, - "learning_rate": 0.00039891383586007043, - "loss": 2.1963, + "epoch": 0.24, + "grad_norm": 0.1806640625, + "learning_rate": 0.00019979049957009212, + "loss": 2.1899, "step": 1400 }, { - "epoch": 0.26, - "grad_norm": 0.1943359375, - "learning_rate": 0.00039887987392766454, - "loss": 2.1794, + "epoch": 0.24, + "grad_norm": 0.1845703125, + "learning_rate": 0.00019978082767270884, + "loss": 2.2027, "step": 1405 }, { - "epoch": 0.26, - "grad_norm": 0.17578125, - "learning_rate": 0.0003988453906709756, - "loss": 2.1875, + "epoch": 0.24, + "grad_norm": 0.18359375, + "learning_rate": 0.0001997709377708469, + "loss": 2.2096, "step": 1410 }, { - "epoch": 0.26, - "grad_norm": 0.185546875, - "learning_rate": 0.0003988103861803948, - "loss": 2.1734, + "epoch": 0.24, + "grad_norm": 0.1845703125, + "learning_rate": 0.000199760829886114, + "loss": 2.1597, "step": 1415 }, { - "epoch": 0.26, - "grad_norm": 0.1748046875, - "learning_rate": 0.0003987748605476793, - "loss": 2.1798, + "epoch": 0.24, + "grad_norm": 0.1953125, + "learning_rate": 0.00019975050404059426, + "loss": 2.1986, "step": 1420 }, { - "epoch": 0.26, - "grad_norm": 0.177734375, - "learning_rate": 0.0003987388138659526, - "loss": 2.2137, + "epoch": 0.24, + "grad_norm": 0.193359375, + "learning_rate": 0.00019973996025684788, + "loss": 2.2003, "step": 1425 }, { - "epoch": 0.27, - "grad_norm": 0.1865234375, - "learning_rate": 0.000398702246229704, - "loss": 2.1868, + "epoch": 0.24, + "grad_norm": 0.189453125, + "learning_rate": 0.00019972919855791132, + "loss": 2.1415, "step": 1430 }, { - "epoch": 0.27, - "grad_norm": 0.177734375, - "learning_rate": 0.00039866515773478826, - "loss": 2.1732, + "epoch": 0.24, + "grad_norm": 0.1865234375, + "learning_rate": 0.00019971821896729703, + "loss": 2.1862, "step": 1435 }, { - "epoch": 0.27, - "grad_norm": 0.1796875, - "learning_rate": 0.00039862754847842563, - "loss": 2.1935, + "epoch": 0.24, + "grad_norm": 0.1875, + "learning_rate": 0.00019970702150899365, + "loss": 2.1944, "step": 1440 }, { - "epoch": 0.27, - "grad_norm": 0.181640625, - "learning_rate": 0.0003985894185592012, - "loss": 2.1878, + "epoch": 0.24, + "grad_norm": 0.193359375, + "learning_rate": 0.00019969560620746571, + "loss": 2.2099, "step": 1445 }, { - "epoch": 0.27, - "grad_norm": 0.185546875, - "learning_rate": 0.00039855076807706523, - "loss": 2.1381, + "epoch": 0.25, + "grad_norm": 0.1865234375, + "learning_rate": 0.00019968397308765375, + "loss": 2.2194, "step": 1450 }, { - "epoch": 0.27, - "grad_norm": 0.1796875, - "learning_rate": 0.0003985115971333321, - "loss": 2.1796, + "epoch": 0.25, + "grad_norm": 0.1875, + "learning_rate": 0.00019967212217497426, + "loss": 2.2112, "step": 1455 }, { - "epoch": 0.27, - "grad_norm": 0.1796875, - "learning_rate": 0.0003984719058306808, - "loss": 2.2372, + "epoch": 0.25, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019966005349531942, + "loss": 2.1745, "step": 1460 }, { - "epoch": 0.27, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039843169427315425, - "loss": 2.2127, + "epoch": 0.25, + "grad_norm": 0.1904296875, + "learning_rate": 0.00019964776707505734, + "loss": 2.1624, "step": 1465 }, { - "epoch": 0.27, - "grad_norm": 0.1796875, - "learning_rate": 0.0003983909625661591, - "loss": 2.1515, + "epoch": 0.25, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001996352629410318, + "loss": 2.1977, "step": 1470 }, { - "epoch": 0.27, - "grad_norm": 0.1845703125, - "learning_rate": 0.0003983497108164654, - "loss": 2.2013, + "epoch": 0.25, + "grad_norm": 0.181640625, + "learning_rate": 0.00019962254112056223, + "loss": 2.2192, "step": 1475 }, { - "epoch": 0.27, - "grad_norm": 0.17578125, - "learning_rate": 0.0003983079391322065, - "loss": 2.1768, + "epoch": 0.25, + "grad_norm": 0.189453125, + "learning_rate": 0.00019960960164144368, + "loss": 2.1652, "step": 1480 }, { - "epoch": 0.28, - "grad_norm": 0.1796875, - "learning_rate": 0.0003982656476228787, - "loss": 2.2009, + "epoch": 0.25, + "grad_norm": 0.1982421875, + "learning_rate": 0.00019959644453194678, + "loss": 2.1841, "step": 1485 }, { - "epoch": 0.28, - "grad_norm": 0.1767578125, - "learning_rate": 0.0003982228363993406, - "loss": 2.24, + "epoch": 0.25, + "grad_norm": 0.19140625, + "learning_rate": 0.00019958306982081761, + "loss": 2.2137, "step": 1490 }, { - "epoch": 0.28, - "grad_norm": 0.181640625, - "learning_rate": 0.0003981795055738137, - "loss": 2.2217, + "epoch": 0.25, + "grad_norm": 0.18359375, + "learning_rate": 0.00019956947753727765, + "loss": 2.1878, "step": 1495 }, { - "epoch": 0.28, - "grad_norm": 0.1767578125, - "learning_rate": 0.00039813565525988084, - "loss": 2.1766, + "epoch": 0.25, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019955566771102384, + "loss": 2.148, "step": 1500 }, { - "epoch": 0.28, - "grad_norm": 0.1875, - "learning_rate": 0.00039809128557248726, - "loss": 2.1778, + "epoch": 0.25, + "grad_norm": 0.1923828125, + "learning_rate": 0.0001995416403722283, + "loss": 2.2543, "step": 1505 }, { - "epoch": 0.28, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039804639662793914, - "loss": 2.2053, + "epoch": 0.26, + "grad_norm": 0.1865234375, + "learning_rate": 0.00019952739555153848, + "loss": 2.1969, "step": 1510 }, { - "epoch": 0.28, - "grad_norm": 0.1826171875, - "learning_rate": 0.000398000988543904, - "loss": 2.2288, + "epoch": 0.26, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001995129332800769, + "loss": 2.2019, "step": 1515 }, { - "epoch": 0.28, - "grad_norm": 0.181640625, - "learning_rate": 0.00039795506143941017, - "loss": 2.1998, + "epoch": 0.26, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019949825358944113, + "loss": 2.1805, "step": 1520 }, { - "epoch": 0.28, - "grad_norm": 0.1787109375, - "learning_rate": 0.0003979086154348465, - "loss": 2.2061, + "epoch": 0.26, + "grad_norm": 0.19140625, + "learning_rate": 0.00019948335651170403, + "loss": 2.1349, "step": 1525 }, { - "epoch": 0.28, - "grad_norm": 0.1796875, - "learning_rate": 0.00039786165065196205, - "loss": 2.1373, + "epoch": 0.26, + "grad_norm": 0.193359375, + "learning_rate": 0.00019946824207941308, + "loss": 2.1884, "step": 1530 }, { - "epoch": 0.28, - "grad_norm": 0.181640625, - "learning_rate": 0.00039781416721386566, - "loss": 2.2074, + "epoch": 0.26, + "grad_norm": 0.19140625, + "learning_rate": 0.00019945291032559087, + "loss": 2.1758, "step": 1535 }, { - "epoch": 0.29, - "grad_norm": 0.1796875, - "learning_rate": 0.0003977661652450257, - "loss": 2.2157, + "epoch": 0.26, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001994373612837347, + "loss": 2.2044, "step": 1540 }, { - "epoch": 0.29, - "grad_norm": 0.1884765625, - "learning_rate": 0.00039771764487127, - "loss": 2.2054, + "epoch": 0.26, + "grad_norm": 0.18359375, + "learning_rate": 0.00019942159498781667, + "loss": 2.1701, "step": 1545 }, { - "epoch": 0.29, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039766860621978504, - "loss": 2.1908, + "epoch": 0.26, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019940561147228347, + "loss": 2.1771, "step": 1550 }, { - "epoch": 0.29, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039761904941911603, - "loss": 2.1985, + "epoch": 0.26, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001993894107720564, + "loss": 2.1836, "step": 1555 }, { - "epoch": 0.29, - "grad_norm": 0.1875, - "learning_rate": 0.0003975689745991662, - "loss": 2.1675, + "epoch": 0.26, + "grad_norm": 0.1865234375, + "learning_rate": 0.00019937299292253137, + "loss": 2.1649, "step": 1560 }, { - "epoch": 0.29, - "grad_norm": 0.185546875, - "learning_rate": 0.0003975183818911969, - "loss": 2.178, + "epoch": 0.27, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019935635795957857, + "loss": 2.1816, "step": 1565 }, { - "epoch": 0.29, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039746727142782686, - "loss": 2.1903, + "epoch": 0.27, + "grad_norm": 0.193359375, + "learning_rate": 0.00019933950591954265, + "loss": 2.189, "step": 1570 }, { - "epoch": 0.29, - "grad_norm": 0.181640625, - "learning_rate": 0.0003974156433430321, - "loss": 2.2173, + "epoch": 0.27, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001993224368392425, + "loss": 2.2155, "step": 1575 }, { - "epoch": 0.29, - "grad_norm": 0.1796875, - "learning_rate": 0.0003973634977721454, - "loss": 2.235, + "epoch": 0.27, + "grad_norm": 0.1845703125, + "learning_rate": 0.00019930515075597123, + "loss": 2.1719, "step": 1580 }, { - "epoch": 0.29, - "grad_norm": 0.1796875, - "learning_rate": 0.00039731083485185605, - "loss": 2.159, + "epoch": 0.27, + "grad_norm": 0.185546875, + "learning_rate": 0.00019928764770749604, + "loss": 2.1808, "step": 1585 }, { - "epoch": 0.29, - "grad_norm": 0.1806640625, - "learning_rate": 0.0003972576547202096, - "loss": 2.2094, + "epoch": 0.27, + "grad_norm": 0.1904296875, + "learning_rate": 0.00019926992773205816, + "loss": 2.1824, "step": 1590 }, { - "epoch": 0.3, - "grad_norm": 0.181640625, - "learning_rate": 0.0003972039575166071, - "loss": 2.2054, + "epoch": 0.27, + "grad_norm": 0.1845703125, + "learning_rate": 0.00019925199086837282, + "loss": 2.1842, "step": 1595 }, { - "epoch": 0.3, - "grad_norm": 0.1875, - "learning_rate": 0.0003971497433818053, - "loss": 2.166, + "epoch": 0.27, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019923383715562902, + "loss": 2.1908, "step": 1600 }, { - "epoch": 0.3, - "grad_norm": 0.1865234375, - "learning_rate": 0.00039709501245791575, - "loss": 2.216, + "epoch": 0.27, + "grad_norm": 0.18359375, + "learning_rate": 0.00019921546663348964, + "loss": 2.2098, "step": 1605 }, { - "epoch": 0.3, - "grad_norm": 0.1865234375, - "learning_rate": 0.0003970397648884048, - "loss": 2.1989, + "epoch": 0.27, + "grad_norm": 0.19140625, + "learning_rate": 0.00019919687934209123, + "loss": 2.1821, "step": 1610 }, { - "epoch": 0.3, - "grad_norm": 0.1806640625, - "learning_rate": 0.000396984000818093, - "loss": 2.159, + "epoch": 0.27, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001991780753220439, + "loss": 2.1931, "step": 1615 }, { - "epoch": 0.3, - "grad_norm": 0.1796875, - "learning_rate": 0.00039692772039315484, - "loss": 2.2085, + "epoch": 0.27, + "grad_norm": 0.2001953125, + "learning_rate": 0.00019915905461443125, + "loss": 2.2284, "step": 1620 }, { - "epoch": 0.3, - "grad_norm": 0.1845703125, - "learning_rate": 0.0003968709237611183, - "loss": 2.1399, + "epoch": 0.28, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019913981726081046, + "loss": 2.1604, "step": 1625 }, { - "epoch": 0.3, - "grad_norm": 0.18359375, - "learning_rate": 0.00039681361107086463, - "loss": 2.1798, + "epoch": 0.28, + "grad_norm": 0.1826171875, + "learning_rate": 0.00019912036330321185, + "loss": 2.2391, "step": 1630 }, { - "epoch": 0.3, - "grad_norm": 0.1787109375, - "learning_rate": 0.0003967557824726276, - "loss": 2.1858, + "epoch": 0.28, + "grad_norm": 0.185546875, + "learning_rate": 0.0001991006927841391, + "loss": 2.1986, "step": 1635 }, { - "epoch": 0.3, - "grad_norm": 0.181640625, - "learning_rate": 0.00039669743811799354, - "loss": 2.1729, + "epoch": 0.28, + "grad_norm": 0.19140625, + "learning_rate": 0.00019908080574656905, + "loss": 2.2385, "step": 1640 }, { - "epoch": 0.31, - "grad_norm": 0.1865234375, - "learning_rate": 0.0003966385781599006, - "loss": 2.2067, + "epoch": 0.28, + "grad_norm": 0.1845703125, + "learning_rate": 0.00019906070223395153, + "loss": 2.1974, "step": 1645 }, { - "epoch": 0.31, - "grad_norm": 0.1806640625, - "learning_rate": 0.00039657920275263856, - "loss": 2.1663, + "epoch": 0.28, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019904038229020935, + "loss": 2.1889, "step": 1650 }, { - "epoch": 0.31, - "grad_norm": 0.185546875, - "learning_rate": 0.00039651931205184824, - "loss": 2.1732, + "epoch": 0.28, + "grad_norm": 0.189453125, + "learning_rate": 0.00019901984595973823, + "loss": 2.1733, "step": 1655 }, { - "epoch": 0.31, - "grad_norm": 0.1875, - "learning_rate": 0.00039645890621452137, - "loss": 2.1602, + "epoch": 0.28, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019899909328740666, + "loss": 2.1783, "step": 1660 }, { - "epoch": 0.31, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003963979853989999, - "loss": 2.2557, + "epoch": 0.28, + "grad_norm": 0.189453125, + "learning_rate": 0.00019897812431855569, + "loss": 2.1863, "step": 1665 }, { - "epoch": 0.31, - "grad_norm": 0.1953125, - "learning_rate": 0.00039633654976497563, - "loss": 2.149, + "epoch": 0.28, + "grad_norm": 0.197265625, + "learning_rate": 0.00019895693909899908, + "loss": 2.1418, "step": 1670 }, { - "epoch": 0.31, - "grad_norm": 0.1806640625, - "learning_rate": 0.00039627459947349, - "loss": 2.172, + "epoch": 0.28, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019893553767502299, + "loss": 2.1798, "step": 1675 }, { - "epoch": 0.31, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039621213468693343, - "loss": 2.1722, + "epoch": 0.28, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019891392009338597, + "loss": 2.2185, "step": 1680 }, { - "epoch": 0.31, - "grad_norm": 0.1875, - "learning_rate": 0.000396149155569045, - "loss": 2.177, + "epoch": 0.29, + "grad_norm": 0.185546875, + "learning_rate": 0.0001988920864013188, + "loss": 2.2217, "step": 1685 }, { - "epoch": 0.31, - "grad_norm": 0.181640625, - "learning_rate": 0.00039608566228491204, - "loss": 2.1349, + "epoch": 0.29, + "grad_norm": 0.189453125, + "learning_rate": 0.00019887003664652452, + "loss": 2.19, "step": 1690 }, { - "epoch": 0.31, - "grad_norm": 0.1865234375, - "learning_rate": 0.00039602165500096973, - "loss": 2.2292, + "epoch": 0.29, + "grad_norm": 0.19140625, + "learning_rate": 0.0001988477708771781, + "loss": 2.2109, "step": 1695 }, { - "epoch": 0.32, - "grad_norm": 0.1845703125, - "learning_rate": 0.00039595713388500037, - "loss": 2.1504, + "epoch": 0.29, + "grad_norm": 0.19921875, + "learning_rate": 0.00019882528914192657, + "loss": 2.1982, "step": 1700 }, { - "epoch": 0.32, + "epoch": 0.29, "grad_norm": 0.1875, - "learning_rate": 0.00039589209910613336, - "loss": 2.153, + "learning_rate": 0.0001988025914898888, + "loss": 2.1367, "step": 1705 }, { - "epoch": 0.32, - "grad_norm": 0.18359375, - "learning_rate": 0.00039582655083484454, - "loss": 2.2195, + "epoch": 0.29, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001987796779706553, + "loss": 2.1894, "step": 1710 }, { - "epoch": 0.32, - "grad_norm": 0.1787109375, - "learning_rate": 0.00039576048924295576, - "loss": 2.1511, + "epoch": 0.29, + "grad_norm": 0.1962890625, + "learning_rate": 0.00019875654863428838, + "loss": 2.1371, "step": 1715 }, { - "epoch": 0.32, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003956939145036344, - "loss": 2.1825, + "epoch": 0.29, + "grad_norm": 0.197265625, + "learning_rate": 0.00019873320353132174, + "loss": 2.1592, "step": 1720 }, { - "epoch": 0.32, - "grad_norm": 0.18359375, - "learning_rate": 0.000395626826791393, - "loss": 2.2075, + "epoch": 0.29, + "grad_norm": 0.19921875, + "learning_rate": 0.00019870964271276055, + "loss": 2.1695, "step": 1725 }, { - "epoch": 0.32, + "epoch": 0.29, "grad_norm": 0.18359375, - "learning_rate": 0.00039555922628208874, - "loss": 2.2107, + "learning_rate": 0.00019868586623008125, + "loss": 2.1658, "step": 1730 }, { - "epoch": 0.32, - "grad_norm": 0.1845703125, - "learning_rate": 0.00039549111315292294, - "loss": 2.2175, + "epoch": 0.29, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019866187413523153, + "loss": 2.1584, "step": 1735 }, { - "epoch": 0.32, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039542248758244077, - "loss": 2.1779, + "epoch": 0.29, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019863766648063006, + "loss": 2.2071, "step": 1740 }, { - "epoch": 0.32, - "grad_norm": 0.1953125, - "learning_rate": 0.0003953533497505306, - "loss": 2.1802, + "epoch": 0.3, + "grad_norm": 0.19140625, + "learning_rate": 0.00019861324331916662, + "loss": 2.2012, "step": 1745 }, { - "epoch": 0.32, - "grad_norm": 0.1787109375, - "learning_rate": 0.00039528369983842356, - "loss": 2.1587, + "epoch": 0.3, + "grad_norm": 0.193359375, + "learning_rate": 0.00019858860470420167, + "loss": 2.2062, "step": 1750 }, { - "epoch": 0.33, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003952135380286931, - "loss": 2.1503, + "epoch": 0.3, + "grad_norm": 0.1845703125, + "learning_rate": 0.00019856375068956651, + "loss": 2.1877, "step": 1755 }, { - "epoch": 0.33, - "grad_norm": 0.18359375, - "learning_rate": 0.00039514286450525457, - "loss": 2.2139, + "epoch": 0.3, + "grad_norm": 0.1923828125, + "learning_rate": 0.000198538681329563, + "loss": 2.1791, "step": 1760 }, { - "epoch": 0.33, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003950716794533647, - "loss": 2.1928, + "epoch": 0.3, + "grad_norm": 0.2021484375, + "learning_rate": 0.00019851339667896354, + "loss": 2.155, "step": 1765 }, { - "epoch": 0.33, - "grad_norm": 0.1787109375, - "learning_rate": 0.000394999983059621, - "loss": 2.1583, + "epoch": 0.3, + "grad_norm": 0.193359375, + "learning_rate": 0.00019848789679301085, + "loss": 2.1589, "step": 1770 }, { - "epoch": 0.33, - "grad_norm": 0.177734375, - "learning_rate": 0.00039492777551196134, - "loss": 2.2097, + "epoch": 0.3, + "grad_norm": 0.1904296875, + "learning_rate": 0.00019846218172741794, + "loss": 2.1752, "step": 1775 }, { - "epoch": 0.33, - "grad_norm": 0.1845703125, - "learning_rate": 0.00039485505699966356, - "loss": 2.175, + "epoch": 0.3, + "grad_norm": 0.185546875, + "learning_rate": 0.00019843625153836798, + "loss": 2.2145, "step": 1780 }, { - "epoch": 0.33, - "grad_norm": 0.18359375, - "learning_rate": 0.00039478182771334494, - "loss": 2.1728, + "epoch": 0.3, + "grad_norm": 0.1826171875, + "learning_rate": 0.00019841010628251406, + "loss": 2.166, "step": 1785 }, { - "epoch": 0.33, - "grad_norm": 0.18359375, - "learning_rate": 0.0003947080878449615, - "loss": 2.1756, + "epoch": 0.3, + "grad_norm": 0.1875, + "learning_rate": 0.00019838374601697923, + "loss": 2.2264, "step": 1790 }, { - "epoch": 0.33, - "grad_norm": 0.1826171875, - "learning_rate": 0.0003946338375878078, - "loss": 2.2138, + "epoch": 0.3, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019835717079935624, + "loss": 2.1749, "step": 1795 }, { - "epoch": 0.33, - "grad_norm": 0.1884765625, - "learning_rate": 0.00039455907713651614, - "loss": 2.1444, + "epoch": 0.3, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019833038068770757, + "loss": 2.1778, "step": 1800 }, { - "epoch": 0.33, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003944838066870563, - "loss": 2.1863, + "epoch": 0.31, + "grad_norm": 0.193359375, + "learning_rate": 0.00019830337574056514, + "loss": 2.1967, "step": 1805 }, { - "epoch": 0.34, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039440802643673486, - "loss": 2.1562, + "epoch": 0.31, + "grad_norm": 0.19140625, + "learning_rate": 0.00019827615601693022, + "loss": 2.1804, "step": 1810 }, { - "epoch": 0.34, - "grad_norm": 0.1845703125, - "learning_rate": 0.00039433173658419483, - "loss": 2.2186, + "epoch": 0.31, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019824872157627339, + "loss": 2.2043, "step": 1815 }, { - "epoch": 0.34, - "grad_norm": 0.18359375, - "learning_rate": 0.0003942549373294149, - "loss": 2.1931, + "epoch": 0.31, + "grad_norm": 0.1953125, + "learning_rate": 0.00019822107247853435, + "loss": 2.1591, "step": 1820 }, { - "epoch": 0.34, - "grad_norm": 0.1796875, - "learning_rate": 0.00039417762887370924, - "loss": 2.1274, + "epoch": 0.31, + "grad_norm": 0.193359375, + "learning_rate": 0.00019819320878412174, + "loss": 2.1763, "step": 1825 }, { - "epoch": 0.34, - "grad_norm": 0.1845703125, - "learning_rate": 0.0003940998114197266, - "loss": 2.2013, + "epoch": 0.31, + "grad_norm": 0.1865234375, + "learning_rate": 0.00019816513055391307, + "loss": 2.1789, "step": 1830 }, { - "epoch": 0.34, - "grad_norm": 0.1787109375, - "learning_rate": 0.0003940214851714501, - "loss": 2.177, + "epoch": 0.31, + "grad_norm": 0.189453125, + "learning_rate": 0.00019813683784925467, + "loss": 2.2, "step": 1835 }, { - "epoch": 0.34, - "grad_norm": 0.1875, - "learning_rate": 0.0003939426503341965, - "loss": 2.1978, + "epoch": 0.31, + "grad_norm": 0.1904296875, + "learning_rate": 0.00019810833073196133, + "loss": 2.1581, "step": 1840 }, { - "epoch": 0.34, - "grad_norm": 0.185546875, - "learning_rate": 0.0003938633071146158, - "loss": 2.2461, + "epoch": 0.31, + "grad_norm": 0.1904296875, + "learning_rate": 0.00019807960926431634, + "loss": 2.2085, "step": 1845 }, { - "epoch": 0.34, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039378345572069044, - "loss": 2.1986, + "epoch": 0.31, + "grad_norm": 0.1875, + "learning_rate": 0.00019805067350907134, + "loss": 2.1584, "step": 1850 }, { - "epoch": 0.34, - "grad_norm": 0.1826171875, - "learning_rate": 0.00039370309636173513, - "loss": 2.1906, + "epoch": 0.31, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019802152352944616, + "loss": 2.2049, "step": 1855 }, { - "epoch": 0.35, - "grad_norm": 0.1845703125, - "learning_rate": 0.00039362222924839614, - "loss": 2.1805, + "epoch": 0.32, + "grad_norm": 0.1923828125, + "learning_rate": 0.0001979921593891286, + "loss": 2.1572, "step": 1860 }, { - "epoch": 0.35, - "grad_norm": 0.1845703125, - "learning_rate": 0.00039354085459265055, - "loss": 2.1797, + "epoch": 0.32, + "grad_norm": 0.197265625, + "learning_rate": 0.00019796258115227443, + "loss": 2.2329, "step": 1865 }, { - "epoch": 0.35, - "grad_norm": 0.1865234375, - "learning_rate": 0.0003934589726078059, - "loss": 2.1593, + "epoch": 0.32, + "grad_norm": 0.189453125, + "learning_rate": 0.00019793278888350716, + "loss": 2.1925, "step": 1870 }, { - "epoch": 0.35, - "grad_norm": 0.1845703125, - "learning_rate": 0.00039337658350849973, - "loss": 2.1646, + "epoch": 0.32, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019790278264791795, + "loss": 2.1534, "step": 1875 }, { - "epoch": 0.35, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003932936875106986, - "loss": 2.215, + "epoch": 0.32, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019787256251106543, + "loss": 2.1437, "step": 1880 }, { - "epoch": 0.35, - "grad_norm": 0.1865234375, - "learning_rate": 0.00039321028483169817, - "loss": 2.1932, + "epoch": 0.32, + "grad_norm": 0.19140625, + "learning_rate": 0.00019784212853897552, + "loss": 2.193, "step": 1885 }, { - "epoch": 0.35, - "grad_norm": 0.181640625, - "learning_rate": 0.00039312637569012207, - "loss": 2.1654, + "epoch": 0.32, + "grad_norm": 0.1923828125, + "learning_rate": 0.0001978114807981414, + "loss": 2.1838, "step": 1890 }, { - "epoch": 0.35, - "grad_norm": 0.1806640625, - "learning_rate": 0.0003930419603059214, - "loss": 2.1686, + "epoch": 0.32, + "grad_norm": 0.1884765625, + "learning_rate": 0.0001977806193555233, + "loss": 2.1925, "step": 1895 }, { - "epoch": 0.35, - "grad_norm": 0.1845703125, - "learning_rate": 0.00039295703890037444, - "loss": 2.1788, + "epoch": 0.32, + "grad_norm": 0.189453125, + "learning_rate": 0.00019774954427854833, + "loss": 2.1709, "step": 1900 }, { - "epoch": 0.35, - "grad_norm": 0.1865234375, - "learning_rate": 0.00039287161169608597, - "loss": 2.2414, + "epoch": 0.32, + "grad_norm": 0.1875, + "learning_rate": 0.0001977182556351103, + "loss": 2.1448, "step": 1905 }, { - "epoch": 0.35, - "grad_norm": 0.185546875, - "learning_rate": 0.0003927856789169865, - "loss": 2.2244, + "epoch": 0.32, + "grad_norm": 0.19921875, + "learning_rate": 0.0001976867534935697, + "loss": 2.2003, "step": 1910 }, { - "epoch": 0.36, - "grad_norm": 0.189453125, - "learning_rate": 0.0003926992407883317, - "loss": 2.1363, + "epoch": 0.32, + "grad_norm": 0.2060546875, + "learning_rate": 0.00019765503792275354, + "loss": 2.1616, "step": 1915 }, { - "epoch": 0.36, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003926122975367022, - "loss": 2.204, + "epoch": 0.33, + "grad_norm": 0.2001953125, + "learning_rate": 0.0001976231089919549, + "loss": 2.171, "step": 1920 }, { - "epoch": 0.36, - "grad_norm": 0.18359375, - "learning_rate": 0.0003925248493900024, - "loss": 2.181, + "epoch": 0.33, + "grad_norm": 0.19140625, + "learning_rate": 0.00019759096677093334, + "loss": 2.1726, "step": 1925 }, { - "epoch": 0.36, - "grad_norm": 0.1953125, - "learning_rate": 0.00039243689657746046, - "loss": 2.1653, + "epoch": 0.33, + "grad_norm": 0.189453125, + "learning_rate": 0.00019755861132991412, + "loss": 2.1745, "step": 1930 }, { - "epoch": 0.36, - "grad_norm": 0.181640625, - "learning_rate": 0.0003923484393296273, - "loss": 2.1717, + "epoch": 0.33, + "grad_norm": 0.1953125, + "learning_rate": 0.0001975260427395886, + "loss": 2.1956, "step": 1935 }, { - "epoch": 0.36, - "grad_norm": 0.1962890625, - "learning_rate": 0.000392259477878376, - "loss": 2.1734, + "epoch": 0.33, + "grad_norm": 0.19140625, + "learning_rate": 0.00019749326107111362, + "loss": 2.2004, "step": 1940 }, { - "epoch": 0.36, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003921700124569015, - "loss": 2.1973, + "epoch": 0.33, + "grad_norm": 0.1904296875, + "learning_rate": 0.00019746026639611174, + "loss": 2.1805, "step": 1945 }, { - "epoch": 0.36, - "grad_norm": 0.189453125, - "learning_rate": 0.0003920800432997197, - "loss": 2.189, + "epoch": 0.33, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019742705878667075, + "loss": 2.2056, "step": 1950 }, { - "epoch": 0.36, - "grad_norm": 0.2080078125, - "learning_rate": 0.000391989570642667, - "loss": 2.2058, + "epoch": 0.33, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001973936383153438, + "loss": 2.1754, "step": 1955 }, { - "epoch": 0.36, - "grad_norm": 0.181640625, - "learning_rate": 0.0003918985947228995, - "loss": 2.174, + "epoch": 0.33, + "grad_norm": 0.1875, + "learning_rate": 0.00019736000505514908, + "loss": 2.1286, "step": 1960 }, { - "epoch": 0.36, - "grad_norm": 0.1923828125, - "learning_rate": 0.00039180711577889264, - "loss": 2.2542, + "epoch": 0.33, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001973261590795696, + "loss": 2.1644, "step": 1965 }, { - "epoch": 0.37, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003917151340504405, - "loss": 2.1754, + "epoch": 0.33, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019729210046255316, + "loss": 2.2054, "step": 1970 }, { - "epoch": 0.37, - "grad_norm": 0.193359375, - "learning_rate": 0.0003916226497786548, - "loss": 2.2225, + "epoch": 0.33, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001972578292785122, + "loss": 2.2077, "step": 1975 }, { - "epoch": 0.37, - "grad_norm": 0.181640625, - "learning_rate": 0.0003915296632059649, - "loss": 2.1529, + "epoch": 0.34, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019722334560232354, + "loss": 2.1545, "step": 1980 }, { - "epoch": 0.37, - "grad_norm": 0.1875, - "learning_rate": 0.00039143617457611674, - "loss": 2.1837, + "epoch": 0.34, + "grad_norm": 0.1845703125, + "learning_rate": 0.00019718864950932826, + "loss": 2.1974, "step": 1985 }, { - "epoch": 0.37, - "grad_norm": 0.1845703125, - "learning_rate": 0.0003913421841341723, - "loss": 2.1773, + "epoch": 0.34, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019715374107533157, + "loss": 2.1435, "step": 1990 }, { - "epoch": 0.37, + "epoch": 0.34, "grad_norm": 0.1904296875, - "learning_rate": 0.00039124769212650883, - "loss": 2.1852, + "learning_rate": 0.00019711862037660253, + "loss": 2.195, "step": 1995 }, { - "epoch": 0.37, - "grad_norm": 0.1953125, - "learning_rate": 0.0003911526988008185, - "loss": 2.2199, + "epoch": 0.34, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019708328748987403, + "loss": 2.2048, "step": 2000 }, { - "epoch": 0.37, - "grad_norm": 0.1904296875, - "learning_rate": 0.00039105720440610765, - "loss": 2.2312, + "epoch": 0.34, + "grad_norm": 0.1845703125, + "learning_rate": 0.00019704774249234256, + "loss": 2.2101, "step": 2005 }, { - "epoch": 0.37, - "grad_norm": 0.1904296875, - "learning_rate": 0.00039096120919269577, - "loss": 2.2002, + "epoch": 0.34, + "grad_norm": 0.1962890625, + "learning_rate": 0.00019701198546166803, + "loss": 2.2184, "step": 2010 }, { - "epoch": 0.37, - "grad_norm": 0.1826171875, - "learning_rate": 0.0003908647134122156, - "loss": 2.1313, + "epoch": 0.34, + "grad_norm": 0.19140625, + "learning_rate": 0.0001969760164759735, + "loss": 2.1553, "step": 2015 }, { - "epoch": 0.37, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003907677173176115, - "loss": 2.1836, + "epoch": 0.34, + "grad_norm": 0.1953125, + "learning_rate": 0.0001969398356138453, + "loss": 2.1782, "step": 2020 }, { - "epoch": 0.38, + "epoch": 0.34, "grad_norm": 0.1923828125, - "learning_rate": 0.00039067022116313964, - "loss": 2.2171, + "learning_rate": 0.00019690344295433256, + "loss": 2.1714, "step": 2025 }, { - "epoch": 0.38, - "grad_norm": 0.1845703125, - "learning_rate": 0.000390572225204367, - "loss": 2.1877, + "epoch": 0.34, + "grad_norm": 0.1904296875, + "learning_rate": 0.00019686683857694716, + "loss": 2.1662, "step": 2030 }, { - "epoch": 0.38, - "grad_norm": 0.18359375, - "learning_rate": 0.00039047372969817044, - "loss": 2.1615, + "epoch": 0.34, + "grad_norm": 0.193359375, + "learning_rate": 0.0001968300225616636, + "loss": 2.1654, "step": 2035 }, { - "epoch": 0.38, - "grad_norm": 0.1865234375, - "learning_rate": 0.00039037473490273673, - "loss": 2.1992, + "epoch": 0.35, + "grad_norm": 0.1904296875, + "learning_rate": 0.00019679299498891873, + "loss": 2.2053, "step": 2040 }, { - "epoch": 0.38, + "epoch": 0.35, "grad_norm": 0.189453125, - "learning_rate": 0.0003902752410775609, - "loss": 2.1588, + "learning_rate": 0.00019675575593961156, + "loss": 2.1423, "step": 2045 }, { - "epoch": 0.38, - "grad_norm": 0.19140625, - "learning_rate": 0.00039017524848344653, - "loss": 2.1624, + "epoch": 0.35, + "grad_norm": 0.205078125, + "learning_rate": 0.0001967183054951033, + "loss": 2.1537, "step": 2050 }, { - "epoch": 0.38, - "grad_norm": 0.185546875, - "learning_rate": 0.0003900747573825044, - "loss": 2.1364, + "epoch": 0.35, + "grad_norm": 0.19140625, + "learning_rate": 0.00019668064373721685, + "loss": 2.2083, "step": 2055 }, { - "epoch": 0.38, - "grad_norm": 0.185546875, - "learning_rate": 0.00038997376803815196, - "loss": 2.1946, + "epoch": 0.35, + "grad_norm": 0.1875, + "learning_rate": 0.00019664277074823693, + "loss": 2.164, "step": 2060 }, { - "epoch": 0.38, - "grad_norm": 0.1845703125, - "learning_rate": 0.0003898722807151129, - "loss": 2.1941, + "epoch": 0.35, + "grad_norm": 0.189453125, + "learning_rate": 0.0001966046866109097, + "loss": 2.1678, "step": 2065 }, { - "epoch": 0.38, - "grad_norm": 0.197265625, - "learning_rate": 0.0003897702956794163, - "loss": 2.217, + "epoch": 0.35, + "grad_norm": 0.19140625, + "learning_rate": 0.00019656639140844262, + "loss": 2.2032, "step": 2070 }, { - "epoch": 0.38, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003896678131983956, - "loss": 2.1456, + "epoch": 0.35, + "grad_norm": 0.193359375, + "learning_rate": 0.00019652788522450437, + "loss": 2.2068, "step": 2075 }, { - "epoch": 0.39, - "grad_norm": 0.2177734375, - "learning_rate": 0.0003895648335406884, - "loss": 2.1496, + "epoch": 0.35, + "grad_norm": 0.1875, + "learning_rate": 0.00019648916814322446, + "loss": 2.1622, "step": 2080 }, { - "epoch": 0.39, - "grad_norm": 0.197265625, - "learning_rate": 0.0003894613569762356, - "loss": 2.1576, + "epoch": 0.35, + "grad_norm": 0.19140625, + "learning_rate": 0.00019645024024919337, + "loss": 2.2037, "step": 2085 }, { - "epoch": 0.39, - "grad_norm": 0.181640625, - "learning_rate": 0.00038935738377628045, - "loss": 2.1978, + "epoch": 0.35, + "grad_norm": 0.1826171875, + "learning_rate": 0.00019641110162746202, + "loss": 2.1631, "step": 2090 }, { - "epoch": 0.39, - "grad_norm": 0.19140625, - "learning_rate": 0.00038925291421336824, - "loss": 2.221, + "epoch": 0.35, + "grad_norm": 0.19921875, + "learning_rate": 0.00019637175236354175, + "loss": 2.2035, "step": 2095 }, { - "epoch": 0.39, - "grad_norm": 0.1845703125, - "learning_rate": 0.0003891479485613452, - "loss": 2.1936, + "epoch": 0.36, + "grad_norm": 0.2021484375, + "learning_rate": 0.00019633219254340417, + "loss": 2.1476, "step": 2100 }, { - "epoch": 0.39, - "grad_norm": 0.189453125, - "learning_rate": 0.00038904248709535817, - "loss": 2.2115, + "epoch": 0.36, + "grad_norm": 0.1953125, + "learning_rate": 0.00019629242225348086, + "loss": 2.1799, "step": 2105 }, { - "epoch": 0.39, - "grad_norm": 0.189453125, - "learning_rate": 0.0003889365300918534, - "loss": 2.1994, + "epoch": 0.36, + "grad_norm": 0.19140625, + "learning_rate": 0.00019625244158066332, + "loss": 2.2112, "step": 2110 }, { - "epoch": 0.39, - "grad_norm": 0.1865234375, - "learning_rate": 0.00038883007782857627, - "loss": 2.2152, + "epoch": 0.36, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001962122506123026, + "loss": 2.1967, "step": 2115 }, { - "epoch": 0.39, - "grad_norm": 0.1884765625, - "learning_rate": 0.00038872313058457044, - "loss": 2.1625, + "epoch": 0.36, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019617184943620936, + "loss": 2.1841, "step": 2120 }, { - "epoch": 0.39, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003886156886401768, - "loss": 2.1987, + "epoch": 0.36, + "grad_norm": 0.1875, + "learning_rate": 0.00019613123814065335, + "loss": 2.2235, "step": 2125 }, { - "epoch": 0.4, - "grad_norm": 0.1845703125, - "learning_rate": 0.0003885077522770334, - "loss": 2.1692, + "epoch": 0.36, + "grad_norm": 0.189453125, + "learning_rate": 0.00019609041681436354, + "loss": 2.1743, "step": 2130 }, { - "epoch": 0.4, - "grad_norm": 0.1875, - "learning_rate": 0.00038839932177807385, - "loss": 2.1872, + "epoch": 0.36, + "grad_norm": 0.1904296875, + "learning_rate": 0.00019604938554652765, + "loss": 2.1865, "step": 2135 }, { - "epoch": 0.4, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003882903974275275, - "loss": 2.2051, + "epoch": 0.36, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019600814442679226, + "loss": 2.143, "step": 2140 }, { - "epoch": 0.4, - "grad_norm": 0.18359375, - "learning_rate": 0.00038818097951091776, - "loss": 2.2039, + "epoch": 0.36, + "grad_norm": 0.205078125, + "learning_rate": 0.00019596669354526224, + "loss": 2.2324, "step": 2145 }, { - "epoch": 0.4, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003880710683150622, - "loss": 2.1854, + "epoch": 0.36, + "grad_norm": 0.1982421875, + "learning_rate": 0.00019592503299250096, + "loss": 2.2198, "step": 2150 }, { - "epoch": 0.4, - "grad_norm": 0.1806640625, - "learning_rate": 0.0003879606641280714, - "loss": 2.2216, + "epoch": 0.36, + "grad_norm": 0.193359375, + "learning_rate": 0.0001958831628595297, + "loss": 2.1736, "step": 2155 }, { - "epoch": 0.4, - "grad_norm": 0.1865234375, - "learning_rate": 0.00038784976723934796, - "loss": 2.169, + "epoch": 0.37, + "grad_norm": 0.189453125, + "learning_rate": 0.00019584108323782777, + "loss": 2.1709, "step": 2160 }, { - "epoch": 0.4, - "grad_norm": 0.1923828125, - "learning_rate": 0.00038773837793958625, - "loss": 2.202, + "epoch": 0.37, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001957987942193321, + "loss": 2.1806, "step": 2165 }, { - "epoch": 0.4, - "grad_norm": 0.1865234375, - "learning_rate": 0.0003876264965207712, - "loss": 2.1729, + "epoch": 0.37, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019575629589643718, + "loss": 2.1568, "step": 2170 }, { - "epoch": 0.4, + "epoch": 0.37, "grad_norm": 0.1845703125, - "learning_rate": 0.00038751412327617794, - "loss": 2.1744, + "learning_rate": 0.00019571358836199476, + "loss": 2.1647, "step": 2175 }, { - "epoch": 0.4, - "grad_norm": 0.1875, - "learning_rate": 0.0003874012585003707, - "loss": 2.1703, + "epoch": 0.37, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019567067170931366, + "loss": 2.2088, "step": 2180 }, { - "epoch": 0.41, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003872879024892021, - "loss": 2.2233, + "epoch": 0.37, + "grad_norm": 0.19140625, + "learning_rate": 0.00019562754603215962, + "loss": 2.1749, "step": 2185 }, { - "epoch": 0.41, - "grad_norm": 0.189453125, - "learning_rate": 0.00038717405553981266, - "loss": 2.2082, + "epoch": 0.37, + "grad_norm": 0.197265625, + "learning_rate": 0.00019558421142475507, + "loss": 2.1569, "step": 2190 }, { - "epoch": 0.41, - "grad_norm": 0.1904296875, - "learning_rate": 0.00038705971795062954, - "loss": 2.1604, + "epoch": 0.37, + "grad_norm": 0.193359375, + "learning_rate": 0.0001955406679817789, + "loss": 2.1758, "step": 2195 }, { - "epoch": 0.41, - "grad_norm": 0.1884765625, - "learning_rate": 0.00038694489002136625, - "loss": 2.1782, + "epoch": 0.37, + "grad_norm": 0.193359375, + "learning_rate": 0.00019549691579836626, + "loss": 2.2226, "step": 2200 }, { - "epoch": 0.41, - "grad_norm": 0.1826171875, - "learning_rate": 0.00038682957205302137, - "loss": 2.1704, + "epoch": 0.37, + "grad_norm": 0.201171875, + "learning_rate": 0.00019545295497010843, + "loss": 2.1599, "step": 2205 }, { - "epoch": 0.41, - "grad_norm": 0.1904296875, - "learning_rate": 0.00038671376434787824, - "loss": 2.1866, + "epoch": 0.37, + "grad_norm": 0.19921875, + "learning_rate": 0.0001954087855930524, + "loss": 2.1589, "step": 2210 }, { - "epoch": 0.41, - "grad_norm": 0.189453125, - "learning_rate": 0.0003865974672095039, - "loss": 2.1453, + "epoch": 0.38, + "grad_norm": 0.1923828125, + "learning_rate": 0.000195364407763701, + "loss": 2.1723, "step": 2215 }, { - "epoch": 0.41, - "grad_norm": 0.1845703125, - "learning_rate": 0.00038648068094274823, - "loss": 2.183, + "epoch": 0.38, + "grad_norm": 0.19140625, + "learning_rate": 0.00019531982157901232, + "loss": 2.1533, "step": 2220 }, { - "epoch": 0.41, - "grad_norm": 0.1845703125, - "learning_rate": 0.0003863634058537434, - "loss": 2.1609, + "epoch": 0.38, + "grad_norm": 0.193359375, + "learning_rate": 0.00019527502713639975, + "loss": 2.1804, "step": 2225 }, { - "epoch": 0.41, - "grad_norm": 0.1875, - "learning_rate": 0.00038624564224990285, - "loss": 2.2011, + "epoch": 0.38, + "grad_norm": 0.2021484375, + "learning_rate": 0.00019523002453373175, + "loss": 2.163, "step": 2230 }, { - "epoch": 0.41, - "grad_norm": 0.1953125, - "learning_rate": 0.0003861273904399207, - "loss": 2.1987, + "epoch": 0.38, + "grad_norm": 0.193359375, + "learning_rate": 0.0001951848138693314, + "loss": 2.1807, "step": 2235 }, { - "epoch": 0.42, - "grad_norm": 0.1865234375, - "learning_rate": 0.0003860086507337705, - "loss": 2.147, + "epoch": 0.38, + "grad_norm": 0.2001953125, + "learning_rate": 0.00019513939524197656, + "loss": 2.1523, "step": 2240 }, { - "epoch": 0.42, - "grad_norm": 0.1943359375, - "learning_rate": 0.00038588942344270504, - "loss": 2.2012, + "epoch": 0.38, + "grad_norm": 0.1875, + "learning_rate": 0.0001950937687508993, + "loss": 2.1963, "step": 2245 }, { - "epoch": 0.42, - "grad_norm": 0.1865234375, - "learning_rate": 0.00038576970887925515, - "loss": 2.1498, + "epoch": 0.38, + "grad_norm": 0.2001953125, + "learning_rate": 0.00019504793449578593, + "loss": 2.171, "step": 2250 }, { - "epoch": 0.42, - "grad_norm": 0.189453125, - "learning_rate": 0.0003856495073572289, - "loss": 2.1827, + "epoch": 0.38, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019500189257677666, + "loss": 2.1529, "step": 2255 }, { - "epoch": 0.42, - "grad_norm": 0.189453125, - "learning_rate": 0.0003855288191917106, - "loss": 2.2141, + "epoch": 0.38, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001949556430944654, + "loss": 2.1683, "step": 2260 }, { - "epoch": 0.42, + "epoch": 0.38, "grad_norm": 0.1904296875, - "learning_rate": 0.00038540764469906073, - "loss": 2.1551, + "learning_rate": 0.00019490918614989956, + "loss": 2.1611, "step": 2265 }, { - "epoch": 0.42, - "grad_norm": 0.1943359375, - "learning_rate": 0.00038528598419691404, - "loss": 2.1988, + "epoch": 0.38, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019486252184457977, + "loss": 2.1865, "step": 2270 }, { - "epoch": 0.42, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003851638380041796, - "loss": 2.2131, + "epoch": 0.39, + "grad_norm": 0.1904296875, + "learning_rate": 0.00019481565028045986, + "loss": 2.1827, "step": 2275 }, { - "epoch": 0.42, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003850412064410396, - "loss": 2.2368, + "epoch": 0.39, + "grad_norm": 0.19140625, + "learning_rate": 0.00019476857155994635, + "loss": 2.1502, "step": 2280 }, { - "epoch": 0.42, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003849180898289482, - "loss": 2.2131, + "epoch": 0.39, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019472128578589833, + "loss": 2.1553, "step": 2285 }, { - "epoch": 0.42, - "grad_norm": 0.189453125, - "learning_rate": 0.00038479448849063145, - "loss": 2.1753, + "epoch": 0.39, + "grad_norm": 0.1962890625, + "learning_rate": 0.00019467379306162746, + "loss": 2.2209, "step": 2290 }, { - "epoch": 0.43, - "grad_norm": 0.1826171875, - "learning_rate": 0.0003846704027500859, - "loss": 2.125, + "epoch": 0.39, + "grad_norm": 0.19140625, + "learning_rate": 0.0001946260934908973, + "loss": 2.202, "step": 2295 }, { - "epoch": 0.43, - "grad_norm": 0.1962890625, - "learning_rate": 0.00038454583293257754, - "loss": 2.1785, + "epoch": 0.39, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019457818717792357, + "loss": 2.1814, "step": 2300 }, { - "epoch": 0.43, + "epoch": 0.39, "grad_norm": 0.19140625, - "learning_rate": 0.0003844207793646417, - "loss": 2.1774, + "learning_rate": 0.0001945300742273735, + "loss": 2.1992, "step": 2305 }, { - "epoch": 0.43, - "grad_norm": 0.18359375, - "learning_rate": 0.0003842952423740815, - "loss": 2.201, + "epoch": 0.39, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019448175474436592, + "loss": 2.1637, "step": 2310 }, { - "epoch": 0.43, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003841692222899675, - "loss": 2.1697, + "epoch": 0.39, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019443322883447078, + "loss": 2.1961, "step": 2315 }, { - "epoch": 0.43, - "grad_norm": 0.1962890625, - "learning_rate": 0.00038404271944263635, - "loss": 2.2196, + "epoch": 0.39, + "grad_norm": 0.1904296875, + "learning_rate": 0.00019438449660370922, + "loss": 2.1988, "step": 2320 }, { - "epoch": 0.43, - "grad_norm": 0.189453125, - "learning_rate": 0.0003839157341636903, - "loss": 2.1834, + "epoch": 0.39, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019433555815855292, + "loss": 2.1567, "step": 2325 }, { - "epoch": 0.43, - "grad_norm": 0.19140625, - "learning_rate": 0.0003837882667859961, - "loss": 2.168, - "step": 2330 - }, + "epoch": 0.39, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001942864136059243, + "loss": 2.1266, + "step": 2330 + }, { - "epoch": 0.43, - "grad_norm": 0.2001953125, - "learning_rate": 0.0003836603176436842, - "loss": 2.2153, + "epoch": 0.4, + "grad_norm": 0.19921875, + "learning_rate": 0.000194237063053196, + "loss": 2.1608, "step": 2335 }, { - "epoch": 0.43, - "grad_norm": 0.1845703125, - "learning_rate": 0.00038353188707214826, - "loss": 2.1761, + "epoch": 0.4, + "grad_norm": 0.189453125, + "learning_rate": 0.00019418750660819074, + "loss": 2.1657, "step": 2340 }, { - "epoch": 0.44, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003834029754080435, - "loss": 2.1737, + "epoch": 0.4, + "grad_norm": 0.189453125, + "learning_rate": 0.0001941377443791811, + "loss": 2.1726, "step": 2345 }, { - "epoch": 0.44, - "grad_norm": 0.1865234375, - "learning_rate": 0.00038327358298928624, - "loss": 2.152, + "epoch": 0.4, + "grad_norm": 0.193359375, + "learning_rate": 0.00019408777647488928, + "loss": 2.2402, "step": 2350 }, { - "epoch": 0.44, - "grad_norm": 0.1962890625, - "learning_rate": 0.00038314371015505327, - "loss": 2.2155, + "epoch": 0.4, + "grad_norm": 0.197265625, + "learning_rate": 0.00019403760300448677, + "loss": 2.1513, "step": 2355 }, { - "epoch": 0.44, - "grad_norm": 0.1943359375, - "learning_rate": 0.00038301335724578057, - "loss": 2.196, + "epoch": 0.4, + "grad_norm": 0.193359375, + "learning_rate": 0.0001939872240775943, + "loss": 2.1811, "step": 2360 }, { - "epoch": 0.44, - "grad_norm": 0.197265625, - "learning_rate": 0.00038288252460316253, - "loss": 2.1863, + "epoch": 0.4, + "grad_norm": 0.2001953125, + "learning_rate": 0.0001939366398042814, + "loss": 2.1592, "step": 2365 }, { - "epoch": 0.44, - "grad_norm": 0.1875, - "learning_rate": 0.000382751212570151, - "loss": 2.1775, + "epoch": 0.4, + "grad_norm": 0.2060546875, + "learning_rate": 0.00019388585029506627, + "loss": 2.1665, "step": 2370 }, { - "epoch": 0.44, - "grad_norm": 0.1865234375, - "learning_rate": 0.0003826194214909545, - "loss": 2.185, + "epoch": 0.4, + "grad_norm": 0.193359375, + "learning_rate": 0.00019383485566091554, + "loss": 2.1636, "step": 2375 }, { - "epoch": 0.44, - "grad_norm": 0.19921875, - "learning_rate": 0.00038248715171103744, - "loss": 2.1951, + "epoch": 0.4, + "grad_norm": 0.1884765625, + "learning_rate": 0.000193783656013244, + "loss": 2.144, "step": 2380 }, { - "epoch": 0.44, - "grad_norm": 0.1953125, - "learning_rate": 0.0003823544035771187, - "loss": 2.143, + "epoch": 0.4, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001937322514639143, + "loss": 2.1331, "step": 2385 }, { - "epoch": 0.44, - "grad_norm": 0.19140625, - "learning_rate": 0.0003822211774371715, - "loss": 2.2101, + "epoch": 0.4, + "grad_norm": 0.1904296875, + "learning_rate": 0.00019368064212523686, + "loss": 2.1441, "step": 2390 }, { - "epoch": 0.44, - "grad_norm": 0.1904296875, - "learning_rate": 0.00038208747364042167, - "loss": 2.1411, + "epoch": 0.41, + "grad_norm": 0.1953125, + "learning_rate": 0.0001936288281099694, + "loss": 2.2009, "step": 2395 }, { - "epoch": 0.45, - "grad_norm": 0.2001953125, - "learning_rate": 0.00038195329253734735, - "loss": 2.1732, + "epoch": 0.41, + "grad_norm": 0.19140625, + "learning_rate": 0.00019357680953131703, + "loss": 2.1558, "step": 2400 }, { - "epoch": 0.45, - "grad_norm": 0.1875, - "learning_rate": 0.0003818186344796778, - "loss": 2.1737, + "epoch": 0.41, + "grad_norm": 0.1865234375, + "learning_rate": 0.0001935245865029316, + "loss": 2.1831, "step": 2405 }, { - "epoch": 0.45, - "grad_norm": 0.193359375, - "learning_rate": 0.00038168349982039244, - "loss": 2.185, + "epoch": 0.41, + "grad_norm": 0.19921875, + "learning_rate": 0.00019347215913891175, + "loss": 2.1691, "step": 2410 }, { - "epoch": 0.45, - "grad_norm": 0.1865234375, - "learning_rate": 0.0003815478889137201, - "loss": 2.1498, + "epoch": 0.41, + "grad_norm": 0.1923828125, + "learning_rate": 0.00019341952755380252, + "loss": 2.1821, "step": 2415 }, { - "epoch": 0.45, - "grad_norm": 0.1865234375, - "learning_rate": 0.000381411802115138, - "loss": 2.1966, + "epoch": 0.41, + "grad_norm": 0.197265625, + "learning_rate": 0.00019336669186259515, + "loss": 2.1822, "step": 2420 }, { - "epoch": 0.45, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003812752397813708, - "loss": 2.1869, + "epoch": 0.41, + "grad_norm": 0.19140625, + "learning_rate": 0.00019331365218072682, + "loss": 2.2013, "step": 2425 }, { - "epoch": 0.45, - "grad_norm": 0.1865234375, - "learning_rate": 0.00038113820227038967, - "loss": 2.1542, + "epoch": 0.41, + "grad_norm": 0.189453125, + "learning_rate": 0.0001932604086240804, + "loss": 2.2009, "step": 2430 }, { - "epoch": 0.45, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003810006899414113, - "loss": 2.1937, + "epoch": 0.41, + "grad_norm": 0.2021484375, + "learning_rate": 0.00019320696130898418, + "loss": 2.0917, "step": 2435 }, { - "epoch": 0.45, - "grad_norm": 0.1962890625, - "learning_rate": 0.00038086270315489703, - "loss": 2.178, + "epoch": 0.41, + "grad_norm": 0.185546875, + "learning_rate": 0.00019315331035221162, + "loss": 2.1562, "step": 2440 }, { - "epoch": 0.45, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003807242422725521, - "loss": 2.199, + "epoch": 0.41, + "grad_norm": 0.2001953125, + "learning_rate": 0.00019309945587098117, + "loss": 2.1827, "step": 2445 }, { - "epoch": 0.45, - "grad_norm": 0.193359375, - "learning_rate": 0.0003805853076573243, - "loss": 2.1818, + "epoch": 0.41, + "grad_norm": 0.19921875, + "learning_rate": 0.00019304539798295587, + "loss": 2.1584, "step": 2450 }, { - "epoch": 0.46, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003804458996734032, - "loss": 2.1872, + "epoch": 0.42, + "grad_norm": 0.197265625, + "learning_rate": 0.0001929911368062432, + "loss": 2.158, "step": 2455 }, { - "epoch": 0.46, - "grad_norm": 0.1953125, - "learning_rate": 0.0003803060186862193, - "loss": 2.1838, + "epoch": 0.42, + "grad_norm": 0.193359375, + "learning_rate": 0.00019293667245939475, + "loss": 2.171, "step": 2460 }, { - "epoch": 0.46, - "grad_norm": 0.19140625, - "learning_rate": 0.000380165665062443, - "loss": 2.188, + "epoch": 0.42, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001928820050614061, + "loss": 2.1782, "step": 2465 }, { - "epoch": 0.46, - "grad_norm": 0.1875, - "learning_rate": 0.0003800248391699836, - "loss": 2.1769, + "epoch": 0.42, + "grad_norm": 0.1962890625, + "learning_rate": 0.00019282713473171633, + "loss": 2.2018, "step": 2470 }, { - "epoch": 0.46, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003798835413779883, - "loss": 2.1981, + "epoch": 0.42, + "grad_norm": 0.193359375, + "learning_rate": 0.00019277206159020805, + "loss": 2.1583, "step": 2475 }, { - "epoch": 0.46, - "grad_norm": 0.1953125, - "learning_rate": 0.0003797417720568413, - "loss": 2.1709, + "epoch": 0.42, + "grad_norm": 0.2138671875, + "learning_rate": 0.00019271678575720683, + "loss": 2.1846, "step": 2480 }, { - "epoch": 0.46, - "grad_norm": 0.1865234375, - "learning_rate": 0.0003795995315781629, - "loss": 2.1286, + "epoch": 0.42, + "grad_norm": 0.1953125, + "learning_rate": 0.00019266130735348118, + "loss": 2.1489, "step": 2485 }, { - "epoch": 0.46, - "grad_norm": 0.201171875, - "learning_rate": 0.00037945682031480845, - "loss": 2.174, + "epoch": 0.42, + "grad_norm": 0.203125, + "learning_rate": 0.0001926056265002422, + "loss": 2.1503, "step": 2490 }, { - "epoch": 0.46, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003793136386408673, - "loss": 2.2026, + "epoch": 0.42, + "grad_norm": 0.1884765625, + "learning_rate": 0.00019254974331914322, + "loss": 2.1489, "step": 2495 }, { - "epoch": 0.46, - "grad_norm": 0.1884765625, - "learning_rate": 0.00037916998693166183, - "loss": 2.1502, + "epoch": 0.42, + "grad_norm": 0.2001953125, + "learning_rate": 0.00019249365793227966, + "loss": 2.2092, "step": 2500 }, { - "epoch": 0.46, - "grad_norm": 0.1884765625, - "learning_rate": 0.00037902586556374666, - "loss": 2.2136, + "epoch": 0.42, + "grad_norm": 0.197265625, + "learning_rate": 0.0001924373704621888, + "loss": 2.1788, "step": 2505 }, { - "epoch": 0.47, - "grad_norm": 0.1923828125, - "learning_rate": 0.00037888127491490754, - "loss": 2.1598, + "epoch": 0.43, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001923808810318494, + "loss": 2.1331, "step": 2510 }, { - "epoch": 0.47, - "grad_norm": 0.1923828125, - "learning_rate": 0.00037873621536416017, - "loss": 2.1932, + "epoch": 0.43, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019232418976468133, + "loss": 2.1295, "step": 2515 }, { - "epoch": 0.47, - "grad_norm": 0.189453125, - "learning_rate": 0.00037859068729174955, - "loss": 2.1698, + "epoch": 0.43, + "grad_norm": 0.1953125, + "learning_rate": 0.0001922672967845457, + "loss": 2.1849, "step": 2520 }, { - "epoch": 0.47, - "grad_norm": 0.19140625, - "learning_rate": 0.00037844469107914874, - "loss": 2.1692, + "epoch": 0.43, + "grad_norm": 0.1953125, + "learning_rate": 0.00019221020221574413, + "loss": 2.1991, "step": 2525 }, { - "epoch": 0.47, - "grad_norm": 0.1875, - "learning_rate": 0.0003782982271090579, - "loss": 2.2203, + "epoch": 0.43, + "grad_norm": 0.193359375, + "learning_rate": 0.00019215290618301875, + "loss": 2.1679, "step": 2530 }, { - "epoch": 0.47, - "grad_norm": 0.189453125, - "learning_rate": 0.00037815129576540356, - "loss": 2.1922, + "epoch": 0.43, + "grad_norm": 0.1953125, + "learning_rate": 0.00019209540881155176, + "loss": 2.1439, "step": 2535 }, { - "epoch": 0.47, - "grad_norm": 0.197265625, - "learning_rate": 0.000378003897433337, - "loss": 2.1406, + "epoch": 0.43, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019203771022696547, + "loss": 2.1732, "step": 2540 }, { - "epoch": 0.47, - "grad_norm": 0.1875, - "learning_rate": 0.00037785603249923386, - "loss": 2.1636, + "epoch": 0.43, + "grad_norm": 0.19140625, + "learning_rate": 0.00019197981055532156, + "loss": 2.1724, "step": 2545 }, { - "epoch": 0.47, - "grad_norm": 0.185546875, - "learning_rate": 0.00037770770135069293, - "loss": 2.1878, + "epoch": 0.43, + "grad_norm": 0.1953125, + "learning_rate": 0.00019192170992312125, + "loss": 2.1703, "step": 2550 }, { - "epoch": 0.47, - "grad_norm": 0.1826171875, - "learning_rate": 0.000377558904376535, - "loss": 2.1672, + "epoch": 0.43, + "grad_norm": 0.1953125, + "learning_rate": 0.00019186340845730467, + "loss": 2.1369, "step": 2555 }, { - "epoch": 0.47, + "epoch": 0.43, "grad_norm": 0.193359375, - "learning_rate": 0.0003774096419668018, - "loss": 2.1447, + "learning_rate": 0.00019180490628525082, + "loss": 2.1925, "step": 2560 }, { - "epoch": 0.48, - "grad_norm": 0.1953125, - "learning_rate": 0.0003772599145127553, - "loss": 2.1738, + "epoch": 0.43, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019174620353477724, + "loss": 2.1806, "step": 2565 }, { - "epoch": 0.48, - "grad_norm": 0.1904296875, - "learning_rate": 0.00037710972240687654, - "loss": 2.127, + "epoch": 0.44, + "grad_norm": 0.1923828125, + "learning_rate": 0.0001916873003341396, + "loss": 2.1636, "step": 2570 }, { - "epoch": 0.48, - "grad_norm": 0.1865234375, - "learning_rate": 0.00037695906604286427, - "loss": 2.1835, + "epoch": 0.44, + "grad_norm": 0.1923828125, + "learning_rate": 0.0001916281968120316, + "loss": 2.1723, "step": 2575 }, { - "epoch": 0.48, - "grad_norm": 0.19140625, - "learning_rate": 0.0003768079458156344, - "loss": 2.2312, + "epoch": 0.44, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001915688930975846, + "loss": 2.1838, "step": 2580 }, { - "epoch": 0.48, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003766563621213189, - "loss": 2.1855, + "epoch": 0.44, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001915093893203673, + "loss": 2.1576, "step": 2585 }, { - "epoch": 0.48, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003765043153572643, - "loss": 2.1818, + "epoch": 0.44, + "grad_norm": 0.189453125, + "learning_rate": 0.00019144968561038558, + "loss": 2.1672, "step": 2590 }, { - "epoch": 0.48, - "grad_norm": 0.189453125, - "learning_rate": 0.0003763518059220311, - "loss": 2.1717, + "epoch": 0.44, + "grad_norm": 0.197265625, + "learning_rate": 0.00019138978209808208, + "loss": 2.1246, "step": 2595 }, { - "epoch": 0.48, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003761988342153929, - "loss": 2.1862, + "epoch": 0.44, + "grad_norm": 0.19921875, + "learning_rate": 0.00019132967891433595, + "loss": 2.1887, "step": 2600 }, { - "epoch": 0.48, - "grad_norm": 0.1875, - "learning_rate": 0.0003760454006383345, - "loss": 2.1539, + "epoch": 0.44, + "grad_norm": 0.201171875, + "learning_rate": 0.00019126937619046267, + "loss": 2.2243, "step": 2605 }, { - "epoch": 0.48, - "grad_norm": 0.1875, - "learning_rate": 0.0003758915055930519, - "loss": 2.1678, + "epoch": 0.44, + "grad_norm": 0.2021484375, + "learning_rate": 0.00019120887405821361, + "loss": 2.1627, "step": 2610 }, { - "epoch": 0.49, + "epoch": 0.44, "grad_norm": 0.19140625, - "learning_rate": 0.00037573714948295044, - "loss": 2.1442, + "learning_rate": 0.00019114817264977588, + "loss": 2.1638, "step": 2615 }, { - "epoch": 0.49, - "grad_norm": 0.1962890625, - "learning_rate": 0.00037558233271264423, - "loss": 2.2111, + "epoch": 0.44, + "grad_norm": 0.2021484375, + "learning_rate": 0.00019108727209777196, + "loss": 2.1382, "step": 2620 }, { - "epoch": 0.49, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003754270556879547, - "loss": 2.1631, + "epoch": 0.44, + "grad_norm": 0.197265625, + "learning_rate": 0.00019102617253525934, + "loss": 2.1539, "step": 2625 }, { - "epoch": 0.49, + "epoch": 0.45, "grad_norm": 0.189453125, - "learning_rate": 0.0003752713188159101, - "loss": 2.1998, + "learning_rate": 0.00019096487409573043, + "loss": 2.1688, "step": 2630 }, { - "epoch": 0.49, - "grad_norm": 0.1904296875, - "learning_rate": 0.00037511512250474363, - "loss": 2.1674, + "epoch": 0.45, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019090337691311207, + "loss": 2.1974, "step": 2635 }, { - "epoch": 0.49, - "grad_norm": 0.1884765625, - "learning_rate": 0.00037495846716389323, - "loss": 2.1769, + "epoch": 0.45, + "grad_norm": 0.1904296875, + "learning_rate": 0.0001908416811217654, + "loss": 2.178, "step": 2640 }, { - "epoch": 0.49, - "grad_norm": 0.19140625, - "learning_rate": 0.0003748013532039998, - "loss": 2.1797, + "epoch": 0.45, + "grad_norm": 0.1953125, + "learning_rate": 0.0001907797868564854, + "loss": 2.1297, "step": 2645 }, { - "epoch": 0.49, - "grad_norm": 0.193359375, - "learning_rate": 0.00037464378103690656, - "loss": 2.1771, + "epoch": 0.45, + "grad_norm": 0.19140625, + "learning_rate": 0.00019071769425250075, + "loss": 2.161, "step": 2650 }, { - "epoch": 0.49, - "grad_norm": 0.1904296875, - "learning_rate": 0.00037448575107565786, - "loss": 2.1965, + "epoch": 0.45, + "grad_norm": 0.1982421875, + "learning_rate": 0.00019065540344547342, + "loss": 2.1568, "step": 2655 }, { - "epoch": 0.49, - "grad_norm": 0.1884765625, - "learning_rate": 0.000374327263734498, - "loss": 2.176, + "epoch": 0.45, + "grad_norm": 0.2080078125, + "learning_rate": 0.00019059291457149846, + "loss": 2.2083, "step": 2660 }, { - "epoch": 0.49, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003741683194288701, - "loss": 2.2076, + "epoch": 0.45, + "grad_norm": 0.2021484375, + "learning_rate": 0.00019053022776710363, + "loss": 2.1752, "step": 2665 }, { - "epoch": 0.5, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003740089185754154, - "loss": 2.1803, + "epoch": 0.45, + "grad_norm": 0.189453125, + "learning_rate": 0.0001904673431692492, + "loss": 2.145, "step": 2670 }, { - "epoch": 0.5, - "grad_norm": 0.2001953125, - "learning_rate": 0.0003738490615919716, - "loss": 2.1576, + "epoch": 0.45, + "grad_norm": 0.19140625, + "learning_rate": 0.00019040426091532743, + "loss": 2.1651, "step": 2675 }, { - "epoch": 0.5, - "grad_norm": 0.2021484375, - "learning_rate": 0.0003736887488975723, - "loss": 2.2038, + "epoch": 0.45, + "grad_norm": 0.1943359375, + "learning_rate": 0.00019034098114316264, + "loss": 2.2082, "step": 2680 }, { - "epoch": 0.5, - "grad_norm": 0.19140625, - "learning_rate": 0.00037352798091244547, - "loss": 2.1394, + "epoch": 0.45, + "grad_norm": 0.1982421875, + "learning_rate": 0.00019027750399101053, + "loss": 2.1772, "step": 2685 }, { - "epoch": 0.5, - "grad_norm": 0.1953125, - "learning_rate": 0.0003733667580580127, - "loss": 2.2167, + "epoch": 0.46, + "grad_norm": 0.201171875, + "learning_rate": 0.00019021382959755808, + "loss": 2.2035, "step": 2690 }, { - "epoch": 0.5, - "grad_norm": 0.189453125, - "learning_rate": 0.00037320508075688776, - "loss": 2.1758, + "epoch": 0.46, + "grad_norm": 0.19140625, + "learning_rate": 0.00019014995810192332, + "loss": 2.1952, "step": 2695 }, { - "epoch": 0.5, - "grad_norm": 0.193359375, - "learning_rate": 0.0003730429494328757, - "loss": 2.1592, + "epoch": 0.46, + "grad_norm": 0.1953125, + "learning_rate": 0.0001900858896436547, + "loss": 2.0956, "step": 2700 }, { - "epoch": 0.5, - "grad_norm": 0.1865234375, - "learning_rate": 0.0003728803645109719, - "loss": 2.1819, + "epoch": 0.46, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001900216243627312, + "loss": 2.1508, "step": 2705 }, { - "epoch": 0.5, - "grad_norm": 0.1923828125, - "learning_rate": 0.00037271732641736043, - "loss": 2.2038, + "epoch": 0.46, + "grad_norm": 0.2021484375, + "learning_rate": 0.00018995716239956175, + "loss": 2.2125, "step": 2710 }, { - "epoch": 0.5, - "grad_norm": 0.197265625, - "learning_rate": 0.0003725538355794135, - "loss": 2.191, + "epoch": 0.46, + "grad_norm": 0.193359375, + "learning_rate": 0.00018989250389498497, + "loss": 2.15, "step": 2715 }, { - "epoch": 0.5, - "grad_norm": 0.2001953125, - "learning_rate": 0.00037238989242569003, - "loss": 2.1797, + "epoch": 0.46, + "grad_norm": 0.1953125, + "learning_rate": 0.0001898276489902689, + "loss": 2.1861, "step": 2720 }, { - "epoch": 0.51, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003722254973859346, - "loss": 2.1828, + "epoch": 0.46, + "grad_norm": 0.1904296875, + "learning_rate": 0.00018976259782711074, + "loss": 2.1673, "step": 2725 }, { - "epoch": 0.51, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003720606508910763, - "loss": 2.1772, + "epoch": 0.46, + "grad_norm": 0.197265625, + "learning_rate": 0.00018969735054763645, + "loss": 2.1716, "step": 2730 }, { - "epoch": 0.51, - "grad_norm": 0.189453125, - "learning_rate": 0.00037189535337322767, - "loss": 2.1698, + "epoch": 0.46, + "grad_norm": 0.1923828125, + "learning_rate": 0.0001896319072944004, + "loss": 2.186, "step": 2735 }, { - "epoch": 0.51, - "grad_norm": 0.2080078125, - "learning_rate": 0.0003717296052656835, - "loss": 2.1274, + "epoch": 0.46, + "grad_norm": 0.197265625, + "learning_rate": 0.00018956626821038522, + "loss": 2.2132, "step": 2740 }, { - "epoch": 0.51, - "grad_norm": 0.189453125, - "learning_rate": 0.0003715634070029196, - "loss": 2.1501, + "epoch": 0.46, + "grad_norm": 0.197265625, + "learning_rate": 0.00018950043343900138, + "loss": 2.1679, "step": 2745 }, { - "epoch": 0.51, - "grad_norm": 0.1875, - "learning_rate": 0.0003713967590205919, - "loss": 2.1519, + "epoch": 0.47, + "grad_norm": 0.1953125, + "learning_rate": 0.0001894344031240869, + "loss": 2.169, "step": 2750 }, { - "epoch": 0.51, - "grad_norm": 0.1884765625, - "learning_rate": 0.00037122966175553524, - "loss": 2.1676, + "epoch": 0.47, + "grad_norm": 0.1923828125, + "learning_rate": 0.00018936817740990692, + "loss": 2.1564, "step": 2755 }, { - "epoch": 0.51, - "grad_norm": 0.2119140625, - "learning_rate": 0.000371062115645762, - "loss": 2.1744, + "epoch": 0.47, + "grad_norm": 0.197265625, + "learning_rate": 0.00018930175644115373, + "loss": 2.1463, "step": 2760 }, { - "epoch": 0.51, - "grad_norm": 0.1923828125, - "learning_rate": 0.00037089412113046116, - "loss": 2.1807, + "epoch": 0.47, + "grad_norm": 0.193359375, + "learning_rate": 0.00018923514036294598, + "loss": 2.1655, "step": 2765 }, { - "epoch": 0.51, - "grad_norm": 0.1904296875, - "learning_rate": 0.00037072567864999723, - "loss": 2.183, + "epoch": 0.47, + "grad_norm": 0.2041015625, + "learning_rate": 0.00018916832932082872, + "loss": 2.1705, "step": 2770 }, { - "epoch": 0.51, - "grad_norm": 0.189453125, - "learning_rate": 0.00037055678864590874, - "loss": 2.1767, + "epoch": 0.47, + "grad_norm": 0.203125, + "learning_rate": 0.00018910132346077295, + "loss": 2.1628, "step": 2775 }, { - "epoch": 0.52, - "grad_norm": 0.1865234375, - "learning_rate": 0.00037038745156090766, - "loss": 2.1606, + "epoch": 0.47, + "grad_norm": 0.19921875, + "learning_rate": 0.0001890341229291753, + "loss": 2.1291, "step": 2780 }, { - "epoch": 0.52, - "grad_norm": 0.1845703125, - "learning_rate": 0.0003702176678388775, - "loss": 2.149, + "epoch": 0.47, + "grad_norm": 0.1943359375, + "learning_rate": 0.00018896672787285774, + "loss": 2.1664, "step": 2785 }, { - "epoch": 0.52, - "grad_norm": 0.185546875, - "learning_rate": 0.0003700474379248728, - "loss": 2.2374, + "epoch": 0.47, + "grad_norm": 0.1943359375, + "learning_rate": 0.00018889913843906725, + "loss": 2.1971, "step": 2790 }, { - "epoch": 0.52, - "grad_norm": 0.189453125, - "learning_rate": 0.0003698767622651178, - "loss": 2.1584, + "epoch": 0.47, + "grad_norm": 0.201171875, + "learning_rate": 0.00018883135477547542, + "loss": 2.1711, "step": 2795 }, { - "epoch": 0.52, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003697056413070047, - "loss": 2.1679, - "step": 2800 + "epoch": 0.47, + "grad_norm": 0.19921875, + "learning_rate": 0.0001887633770301783, + "loss": 2.169, + "step": 2800 }, { - "epoch": 0.52, - "grad_norm": 0.19140625, - "learning_rate": 0.0003695340754990935, - "loss": 2.1748, + "epoch": 0.48, + "grad_norm": 0.1962890625, + "learning_rate": 0.00018869520535169597, + "loss": 2.1618, "step": 2805 }, { - "epoch": 0.52, - "grad_norm": 0.189453125, - "learning_rate": 0.00036936206529110995, - "loss": 2.2067, + "epoch": 0.48, + "grad_norm": 0.1904296875, + "learning_rate": 0.00018862683988897212, + "loss": 2.1426, "step": 2810 }, { - "epoch": 0.52, - "grad_norm": 0.1875, - "learning_rate": 0.0003691896111339449, - "loss": 2.1876, + "epoch": 0.48, + "grad_norm": 0.2001953125, + "learning_rate": 0.0001885582807913739, + "loss": 2.1659, "step": 2815 }, { - "epoch": 0.52, - "grad_norm": 0.1884765625, - "learning_rate": 0.00036901671347965275, - "loss": 2.1719, + "epoch": 0.48, + "grad_norm": 0.19921875, + "learning_rate": 0.00018848952820869154, + "loss": 2.1803, "step": 2820 }, { - "epoch": 0.52, - "grad_norm": 0.189453125, - "learning_rate": 0.0003688433727814506, - "loss": 2.1687, + "epoch": 0.48, + "grad_norm": 0.1923828125, + "learning_rate": 0.00018842058229113796, + "loss": 2.1246, "step": 2825 }, { - "epoch": 0.53, - "grad_norm": 0.1904296875, - "learning_rate": 0.00036866958949371677, - "loss": 2.1702, + "epoch": 0.48, + "grad_norm": 0.19921875, + "learning_rate": 0.00018835144318934854, + "loss": 2.167, "step": 2830 }, { - "epoch": 0.53, - "grad_norm": 0.19140625, - "learning_rate": 0.0003684953640719899, - "loss": 2.1444, + "epoch": 0.48, + "grad_norm": 0.201171875, + "learning_rate": 0.0001882821110543806, + "loss": 2.1674, "step": 2835 }, { - "epoch": 0.53, - "grad_norm": 0.203125, - "learning_rate": 0.0003683206969729673, - "loss": 2.2099, + "epoch": 0.48, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001882125860377134, + "loss": 2.147, "step": 2840 }, { - "epoch": 0.53, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003681455886545045, - "loss": 2.1858, + "epoch": 0.48, + "grad_norm": 0.2060546875, + "learning_rate": 0.00018814286829124747, + "loss": 2.1274, "step": 2845 }, { - "epoch": 0.53, - "grad_norm": 0.1904296875, - "learning_rate": 0.00036797003957561315, - "loss": 2.1791, + "epoch": 0.48, + "grad_norm": 0.1923828125, + "learning_rate": 0.00018807295796730445, + "loss": 2.1769, "step": 2850 }, { - "epoch": 0.53, - "grad_norm": 0.19140625, - "learning_rate": 0.0003677940501964606, - "loss": 2.1658, + "epoch": 0.48, + "grad_norm": 0.19921875, + "learning_rate": 0.00018800285521862679, + "loss": 2.1788, "step": 2855 }, { - "epoch": 0.53, - "grad_norm": 0.1953125, - "learning_rate": 0.0003676176209783681, - "loss": 2.1968, + "epoch": 0.48, + "grad_norm": 0.1962890625, + "learning_rate": 0.00018793256019837727, + "loss": 2.1786, "step": 2860 }, { - "epoch": 0.53, - "grad_norm": 0.1884765625, - "learning_rate": 0.00036744075238381017, - "loss": 2.1729, + "epoch": 0.49, + "grad_norm": 0.197265625, + "learning_rate": 0.00018786207306013882, + "loss": 2.1968, "step": 2865 }, { - "epoch": 0.53, - "grad_norm": 0.1962890625, - "learning_rate": 0.00036726344487641267, - "loss": 2.1772, + "epoch": 0.49, + "grad_norm": 0.203125, + "learning_rate": 0.00018779139395791407, + "loss": 2.1675, "step": 2870 }, { - "epoch": 0.53, - "grad_norm": 0.1923828125, - "learning_rate": 0.00036708569892095227, - "loss": 2.1572, + "epoch": 0.49, + "grad_norm": 0.1962890625, + "learning_rate": 0.00018772052304612507, + "loss": 2.1596, "step": 2875 }, { - "epoch": 0.53, - "grad_norm": 0.193359375, - "learning_rate": 0.00036690751498335487, - "loss": 2.1488, + "epoch": 0.49, + "grad_norm": 0.2041015625, + "learning_rate": 0.000187649460479613, + "loss": 2.1348, "step": 2880 }, { - "epoch": 0.54, - "grad_norm": 0.1953125, - "learning_rate": 0.0003667288935306944, - "loss": 2.1995, + "epoch": 0.49, + "grad_norm": 0.193359375, + "learning_rate": 0.0001875782064136377, + "loss": 2.1215, "step": 2885 }, { - "epoch": 0.54, - "grad_norm": 0.1953125, - "learning_rate": 0.0003665498350311918, - "loss": 2.1854, + "epoch": 0.49, + "grad_norm": 0.19921875, + "learning_rate": 0.00018750676100387742, + "loss": 2.2065, "step": 2890 }, { - "epoch": 0.54, - "grad_norm": 0.2021484375, - "learning_rate": 0.00036637033995421347, - "loss": 2.1867, + "epoch": 0.49, + "grad_norm": 0.1943359375, + "learning_rate": 0.00018743512440642845, + "loss": 2.1686, "step": 2895 }, { - "epoch": 0.54, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003661904087702702, - "loss": 2.1902, + "epoch": 0.49, + "grad_norm": 0.203125, + "learning_rate": 0.00018736329677780487, + "loss": 2.1854, "step": 2900 }, { - "epoch": 0.54, - "grad_norm": 0.189453125, - "learning_rate": 0.0003660100419510161, - "loss": 2.1654, + "epoch": 0.49, + "grad_norm": 0.1904296875, + "learning_rate": 0.00018729127827493805, + "loss": 2.1674, "step": 2905 }, { - "epoch": 0.54, + "epoch": 0.49, "grad_norm": 0.1953125, - "learning_rate": 0.00036582923996924724, - "loss": 2.1987, + "learning_rate": 0.0001872190690551764, + "loss": 2.1876, "step": 2910 }, { - "epoch": 0.54, - "grad_norm": 0.193359375, - "learning_rate": 0.0003656480032989001, - "loss": 2.1442, + "epoch": 0.49, + "grad_norm": 0.1982421875, + "learning_rate": 0.00018714666927628504, + "loss": 2.1409, "step": 2915 }, { - "epoch": 0.54, - "grad_norm": 0.1923828125, - "learning_rate": 0.00036546633241505094, - "loss": 2.1855, + "epoch": 0.49, + "grad_norm": 0.197265625, + "learning_rate": 0.00018707407909644542, + "loss": 2.1408, "step": 2920 }, { - "epoch": 0.54, - "grad_norm": 0.1904296875, - "learning_rate": 0.000365284227793914, - "loss": 2.2243, + "epoch": 0.5, + "grad_norm": 0.205078125, + "learning_rate": 0.00018700129867425504, + "loss": 2.2294, "step": 2925 }, { - "epoch": 0.54, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003651016899128406, - "loss": 2.1589, + "epoch": 0.5, + "grad_norm": 0.212890625, + "learning_rate": 0.0001869283281687269, + "loss": 2.1731, "step": 2930 }, { - "epoch": 0.54, - "grad_norm": 0.1943359375, - "learning_rate": 0.00036491871925031755, - "loss": 2.1643, + "epoch": 0.5, + "grad_norm": 0.1982421875, + "learning_rate": 0.00018685516773928943, + "loss": 2.1667, "step": 2935 }, { - "epoch": 0.55, - "grad_norm": 0.193359375, - "learning_rate": 0.0003647353162859666, - "loss": 2.1953, + "epoch": 0.5, + "grad_norm": 0.1982421875, + "learning_rate": 0.00018678181754578602, + "loss": 2.1689, "step": 2940 }, { - "epoch": 0.55, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003645514815005421, - "loss": 2.1838, + "epoch": 0.5, + "grad_norm": 0.1982421875, + "learning_rate": 0.00018670827774847456, + "loss": 2.2155, "step": 2945 }, { - "epoch": 0.55, - "grad_norm": 0.1865234375, - "learning_rate": 0.0003643672153759307, - "loss": 2.2055, + "epoch": 0.5, + "grad_norm": 0.2158203125, + "learning_rate": 0.00018663454850802728, + "loss": 2.1756, "step": 2950 }, { - "epoch": 0.55, - "grad_norm": 0.1923828125, - "learning_rate": 0.00036418251839514956, - "loss": 2.1623, + "epoch": 0.5, + "grad_norm": 0.19921875, + "learning_rate": 0.0001865606299855303, + "loss": 2.1609, "step": 2955 }, { - "epoch": 0.55, - "grad_norm": 0.18359375, - "learning_rate": 0.00036399739104234544, - "loss": 2.1533, + "epoch": 0.5, + "grad_norm": 0.19140625, + "learning_rate": 0.0001864865223424832, + "loss": 2.1553, "step": 2960 }, { - "epoch": 0.55, - "grad_norm": 0.2001953125, - "learning_rate": 0.00036381183380279305, - "loss": 2.1831, + "epoch": 0.5, + "grad_norm": 0.193359375, + "learning_rate": 0.0001864122257407989, + "loss": 2.1826, "step": 2965 }, { - "epoch": 0.55, - "grad_norm": 0.1962890625, - "learning_rate": 0.00036362584716289405, - "loss": 2.1655, + "epoch": 0.5, + "grad_norm": 0.1884765625, + "learning_rate": 0.00018633774034280306, + "loss": 2.1677, "step": 2970 }, { - "epoch": 0.55, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003634394316101756, - "loss": 2.1324, + "epoch": 0.5, + "grad_norm": 0.203125, + "learning_rate": 0.00018626306631123386, + "loss": 2.156, "step": 2975 }, { - "epoch": 0.55, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003632525876332892, - "loss": 2.2055, + "epoch": 0.5, + "grad_norm": 0.19921875, + "learning_rate": 0.00018618820380924165, + "loss": 2.1514, "step": 2980 }, { - "epoch": 0.55, - "grad_norm": 0.1982421875, - "learning_rate": 0.00036306531572200944, - "loss": 2.1625, + "epoch": 0.51, + "grad_norm": 0.2041015625, + "learning_rate": 0.00018611315300038847, + "loss": 2.1479, "step": 2985 }, { - "epoch": 0.55, - "grad_norm": 0.19921875, - "learning_rate": 0.00036287761636723275, - "loss": 2.1718, + "epoch": 0.51, + "grad_norm": 0.201171875, + "learning_rate": 0.00018603791404864784, + "loss": 2.1405, "step": 2990 }, { - "epoch": 0.56, - "grad_norm": 0.1923828125, - "learning_rate": 0.00036268949006097566, - "loss": 2.192, + "epoch": 0.51, + "grad_norm": 0.212890625, + "learning_rate": 0.00018596248711840436, + "loss": 2.1531, "step": 2995 }, { - "epoch": 0.56, - "grad_norm": 0.1904296875, - "learning_rate": 0.00036250093729637433, - "loss": 2.1969, + "epoch": 0.51, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001858868723744533, + "loss": 2.1746, "step": 3000 }, { - "epoch": 0.56, - "grad_norm": 0.1923828125, - "learning_rate": 0.00036231195856768235, - "loss": 2.1664, + "epoch": 0.51, + "grad_norm": 0.1953125, + "learning_rate": 0.00018581106998200023, + "loss": 2.1487, "step": 3005 }, { - "epoch": 0.56, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003621225543702703, - "loss": 2.1199, + "epoch": 0.51, + "grad_norm": 0.21484375, + "learning_rate": 0.00018573508010666078, + "loss": 2.2017, "step": 3010 }, { - "epoch": 0.56, - "grad_norm": 0.193359375, - "learning_rate": 0.00036193272520062376, - "loss": 2.211, + "epoch": 0.51, + "grad_norm": 0.1962890625, + "learning_rate": 0.00018565890291446014, + "loss": 2.1301, "step": 3015 }, { - "epoch": 0.56, - "grad_norm": 0.1904296875, - "learning_rate": 0.00036174247155634233, - "loss": 2.1411, + "epoch": 0.51, + "grad_norm": 0.1982421875, + "learning_rate": 0.00018558253857183277, + "loss": 2.139, "step": 3020 }, { - "epoch": 0.56, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003615517939361385, - "loss": 2.1303, + "epoch": 0.51, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001855059872456221, + "loss": 2.1775, "step": 3025 }, { - "epoch": 0.56, - "grad_norm": 0.1923828125, - "learning_rate": 0.00036136069283983577, - "loss": 2.1731, + "epoch": 0.51, + "grad_norm": 0.1962890625, + "learning_rate": 0.00018542924910307996, + "loss": 2.1787, "step": 3030 }, { - "epoch": 0.56, - "grad_norm": 0.193359375, - "learning_rate": 0.00036116916876836804, - "loss": 2.1537, + "epoch": 0.51, + "grad_norm": 0.1923828125, + "learning_rate": 0.0001853523243118664, + "loss": 2.14, "step": 3035 }, { - "epoch": 0.56, - "grad_norm": 0.193359375, - "learning_rate": 0.00036097722222377775, - "loss": 2.2165, + "epoch": 0.51, + "grad_norm": 0.203125, + "learning_rate": 0.00018527521304004932, + "loss": 2.1609, "step": 3040 }, { - "epoch": 0.56, - "grad_norm": 0.1953125, - "learning_rate": 0.00036078485370921476, - "loss": 2.1514, + "epoch": 0.52, + "grad_norm": 0.1943359375, + "learning_rate": 0.00018519791545610392, + "loss": 2.1944, "step": 3045 }, { - "epoch": 0.57, - "grad_norm": 0.193359375, - "learning_rate": 0.00036059206372893523, - "loss": 2.1642, + "epoch": 0.52, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001851204317289126, + "loss": 2.1888, "step": 3050 }, { - "epoch": 0.57, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003603988527883, - "loss": 2.1888, + "epoch": 0.52, + "grad_norm": 0.1953125, + "learning_rate": 0.00018504276202776438, + "loss": 2.1624, "step": 3055 }, { - "epoch": 0.57, - "grad_norm": 0.2041015625, - "learning_rate": 0.00036020522139377327, - "loss": 2.1869, + "epoch": 0.52, + "grad_norm": 0.19921875, + "learning_rate": 0.00018496490652235455, + "loss": 2.1327, "step": 3060 }, { - "epoch": 0.57, - "grad_norm": 0.19140625, - "learning_rate": 0.00036001117005292154, - "loss": 2.1677, + "epoch": 0.52, + "grad_norm": 0.2001953125, + "learning_rate": 0.00018488686538278452, + "loss": 2.154, "step": 3065 }, { - "epoch": 0.57, - "grad_norm": 0.189453125, - "learning_rate": 0.000359816699274412, - "loss": 2.167, + "epoch": 0.52, + "grad_norm": 0.1875, + "learning_rate": 0.0001848086387795611, + "loss": 2.1481, "step": 3070 }, { - "epoch": 0.57, - "grad_norm": 0.193359375, - "learning_rate": 0.00035962180956801133, - "loss": 2.1305, + "epoch": 0.52, + "grad_norm": 0.2001953125, + "learning_rate": 0.0001847302268835964, + "loss": 2.1466, "step": 3075 }, { - "epoch": 0.57, - "grad_norm": 0.189453125, - "learning_rate": 0.0003594265014445845, - "loss": 2.1672, + "epoch": 0.52, + "grad_norm": 0.197265625, + "learning_rate": 0.00018465162986620737, + "loss": 2.1797, "step": 3080 }, { - "epoch": 0.57, - "grad_norm": 0.189453125, - "learning_rate": 0.00035923077541609314, - "loss": 2.1662, + "epoch": 0.52, + "grad_norm": 0.19921875, + "learning_rate": 0.00018457284789911532, + "loss": 2.1701, "step": 3085 }, { - "epoch": 0.57, + "epoch": 0.52, "grad_norm": 0.1962890625, - "learning_rate": 0.0003590346319955942, - "loss": 2.2116, + "learning_rate": 0.00018449388115444578, + "loss": 2.1868, "step": 3090 }, { - "epoch": 0.57, - "grad_norm": 0.1875, - "learning_rate": 0.000358838071697239, - "loss": 2.1816, + "epoch": 0.52, + "grad_norm": 0.1982421875, + "learning_rate": 0.00018441472980472795, + "loss": 2.1842, "step": 3095 }, { - "epoch": 0.58, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003586410950362715, - "loss": 2.182, + "epoch": 0.53, + "grad_norm": 0.1982421875, + "learning_rate": 0.00018433539402289427, + "loss": 2.1489, "step": 3100 }, { - "epoch": 0.58, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003584437025290271, - "loss": 2.1423, + "epoch": 0.53, + "grad_norm": 0.2041015625, + "learning_rate": 0.00018425587398228021, + "loss": 2.1478, "step": 3105 }, { - "epoch": 0.58, - "grad_norm": 0.19140625, - "learning_rate": 0.00035824589469293127, - "loss": 2.1577, + "epoch": 0.53, + "grad_norm": 0.2021484375, + "learning_rate": 0.00018417616985662386, + "loss": 2.166, "step": 3110 }, { - "epoch": 0.58, + "epoch": 0.53, "grad_norm": 0.189453125, - "learning_rate": 0.00035804767204649805, - "loss": 2.2046, + "learning_rate": 0.0001840962818200654, + "loss": 2.1668, "step": 3115 }, { - "epoch": 0.58, + "epoch": 0.53, "grad_norm": 0.2001953125, - "learning_rate": 0.00035784903510932905, - "loss": 2.2101, + "learning_rate": 0.0001840162100471469, + "loss": 2.1575, "step": 3120 }, { - "epoch": 0.58, - "grad_norm": 0.1943359375, - "learning_rate": 0.00035764998440211167, - "loss": 2.1624, + "epoch": 0.53, + "grad_norm": 0.2060546875, + "learning_rate": 0.00018393595471281182, + "loss": 2.1573, "step": 3125 }, { - "epoch": 0.58, - "grad_norm": 0.19140625, - "learning_rate": 0.00035745052044661803, - "loss": 2.213, + "epoch": 0.53, + "grad_norm": 0.1982421875, + "learning_rate": 0.00018385551599240472, + "loss": 2.2463, "step": 3130 }, { - "epoch": 0.58, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003572506437657035, - "loss": 2.197, + "epoch": 0.53, + "grad_norm": 0.1923828125, + "learning_rate": 0.00018377489406167077, + "loss": 2.1743, "step": 3135 }, { - "epoch": 0.58, - "grad_norm": 0.1875, - "learning_rate": 0.00035705035488330523, - "loss": 2.1877, + "epoch": 0.53, + "grad_norm": 0.201171875, + "learning_rate": 0.00018369408909675543, + "loss": 2.1865, "step": 3140 }, { - "epoch": 0.58, - "grad_norm": 0.1923828125, - "learning_rate": 0.00035684965432444094, - "loss": 2.1717, + "epoch": 0.53, + "grad_norm": 0.2041015625, + "learning_rate": 0.00018361310127420417, + "loss": 2.1548, "step": 3145 }, { - "epoch": 0.58, - "grad_norm": 0.193359375, - "learning_rate": 0.00035664854261520753, - "loss": 2.1887, + "epoch": 0.53, + "grad_norm": 0.197265625, + "learning_rate": 0.00018353193077096178, + "loss": 2.1521, "step": 3150 }, { - "epoch": 0.59, - "grad_norm": 0.19140625, - "learning_rate": 0.00035644702028277955, - "loss": 2.1668, + "epoch": 0.53, + "grad_norm": 0.2021484375, + "learning_rate": 0.00018345057776437233, + "loss": 2.1347, "step": 3155 }, { - "epoch": 0.59, - "grad_norm": 0.20703125, - "learning_rate": 0.00035624508785540805, - "loss": 2.1618, + "epoch": 0.54, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001833690424321786, + "loss": 2.176, "step": 3160 }, { - "epoch": 0.59, - "grad_norm": 0.1962890625, - "learning_rate": 0.00035604274586241886, - "loss": 2.2285, + "epoch": 0.54, + "grad_norm": 0.2001953125, + "learning_rate": 0.00018328732495252167, + "loss": 2.1327, "step": 3165 }, { - "epoch": 0.59, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003558399948342116, - "loss": 2.2208, + "epoch": 0.54, + "grad_norm": 0.203125, + "learning_rate": 0.00018320542550394065, + "loss": 2.1949, "step": 3170 }, { - "epoch": 0.59, - "grad_norm": 0.193359375, - "learning_rate": 0.00035563683530225797, - "loss": 2.197, + "epoch": 0.54, + "grad_norm": 0.19921875, + "learning_rate": 0.00018312334426537214, + "loss": 2.1317, "step": 3175 }, { - "epoch": 0.59, - "grad_norm": 0.19140625, - "learning_rate": 0.0003554332677991006, - "loss": 2.2029, + "epoch": 0.54, + "grad_norm": 0.2001953125, + "learning_rate": 0.00018304108141615, + "loss": 2.1799, "step": 3180 }, { - "epoch": 0.59, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003552292928583514, - "loss": 2.1402, + "epoch": 0.54, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001829586371360048, + "loss": 2.214, "step": 3185 }, { - "epoch": 0.59, - "grad_norm": 0.2001953125, - "learning_rate": 0.0003550249110146904, - "loss": 2.2078, + "epoch": 0.54, + "grad_norm": 0.20703125, + "learning_rate": 0.00018287601160506362, + "loss": 2.1478, "step": 3190 }, { - "epoch": 0.59, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003548201228038642, - "loss": 2.1611, + "epoch": 0.54, + "grad_norm": 0.2001953125, + "learning_rate": 0.00018279320500384942, + "loss": 2.1804, "step": 3195 }, { - "epoch": 0.59, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003546149287626846, - "loss": 2.1795, + "epoch": 0.54, + "grad_norm": 0.19921875, + "learning_rate": 0.00018271021751328084, + "loss": 2.1779, "step": 3200 }, { - "epoch": 0.59, - "grad_norm": 0.2041015625, - "learning_rate": 0.00035440932942902727, - "loss": 2.1683, + "epoch": 0.54, + "grad_norm": 0.19921875, + "learning_rate": 0.00018262704931467174, + "loss": 2.1433, "step": 3205 }, { - "epoch": 0.6, + "epoch": 0.54, "grad_norm": 0.201171875, - "learning_rate": 0.00035420332534183023, - "loss": 2.1783, + "learning_rate": 0.00018254370058973072, + "loss": 2.1722, "step": 3210 }, { - "epoch": 0.6, - "grad_norm": 0.1953125, - "learning_rate": 0.0003539969170410924, - "loss": 2.1756, + "epoch": 0.54, + "grad_norm": 0.193359375, + "learning_rate": 0.0001824601715205609, + "loss": 2.133, "step": 3215 }, { - "epoch": 0.6, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003537901050678724, - "loss": 2.1829, + "epoch": 0.55, + "grad_norm": 0.205078125, + "learning_rate": 0.00018237646228965937, + "loss": 2.1461, "step": 3220 }, { - "epoch": 0.6, - "grad_norm": 0.1953125, - "learning_rate": 0.000353582889964287, - "loss": 2.1947, + "epoch": 0.55, + "grad_norm": 0.197265625, + "learning_rate": 0.0001822925730799168, + "loss": 2.1887, "step": 3225 }, { - "epoch": 0.6, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003533752722735096, - "loss": 2.1792, + "epoch": 0.55, + "grad_norm": 0.193359375, + "learning_rate": 0.00018220850407461717, + "loss": 2.1615, "step": 3230 }, { - "epoch": 0.6, - "grad_norm": 0.2001953125, - "learning_rate": 0.00035316725253976887, - "loss": 2.1858, + "epoch": 0.55, + "grad_norm": 0.1923828125, + "learning_rate": 0.0001821242554574373, + "loss": 2.1579, "step": 3235 }, { - "epoch": 0.6, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003529588313083474, - "loss": 2.1128, + "epoch": 0.55, + "grad_norm": 0.208984375, + "learning_rate": 0.00018203982741244628, + "loss": 2.1899, "step": 3240 }, { - "epoch": 0.6, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003527500091255805, - "loss": 2.2005, + "epoch": 0.55, + "grad_norm": 0.208984375, + "learning_rate": 0.00018195522012410536, + "loss": 2.1738, "step": 3245 }, { - "epoch": 0.6, - "grad_norm": 0.185546875, - "learning_rate": 0.000352540786538854, - "loss": 2.1451, + "epoch": 0.55, + "grad_norm": 0.21484375, + "learning_rate": 0.00018187043377726735, + "loss": 2.1169, "step": 3250 }, { - "epoch": 0.6, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003523311640966037, - "loss": 2.1474, + "epoch": 0.55, + "grad_norm": 0.2041015625, + "learning_rate": 0.0001817854685571763, + "loss": 2.1394, "step": 3255 }, { - "epoch": 0.6, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003521211423483133, - "loss": 2.1799, + "epoch": 0.55, + "grad_norm": 0.19921875, + "learning_rate": 0.00018170032464946708, + "loss": 2.1765, "step": 3260 }, { - "epoch": 0.61, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003519107218445134, - "loss": 2.1957, + "epoch": 0.55, + "grad_norm": 0.19921875, + "learning_rate": 0.0001816150022401649, + "loss": 2.174, "step": 3265 }, { - "epoch": 0.61, - "grad_norm": 0.19140625, - "learning_rate": 0.00035169990313677974, - "loss": 2.178, + "epoch": 0.55, + "grad_norm": 0.1962890625, + "learning_rate": 0.00018152950151568504, + "loss": 2.1572, "step": 3270 }, { - "epoch": 0.61, - "grad_norm": 0.193359375, - "learning_rate": 0.00035148868677773186, - "loss": 2.2025, + "epoch": 0.55, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001814438226628323, + "loss": 2.1492, "step": 3275 }, { - "epoch": 0.61, - "grad_norm": 0.197265625, - "learning_rate": 0.00035127707332103175, - "loss": 2.1991, + "epoch": 0.56, + "grad_norm": 0.2021484375, + "learning_rate": 0.00018135796586880068, + "loss": 2.1177, "step": 3280 }, { - "epoch": 0.61, - "grad_norm": 0.197265625, - "learning_rate": 0.00035106506332138217, - "loss": 2.1969, + "epoch": 0.56, + "grad_norm": 0.19921875, + "learning_rate": 0.000181271931321173, + "loss": 2.1699, "step": 3285 }, { - "epoch": 0.61, - "grad_norm": 0.1884765625, - "learning_rate": 0.00035085265733452554, - "loss": 2.1569, + "epoch": 0.56, + "grad_norm": 0.1943359375, + "learning_rate": 0.0001811857192079204, + "loss": 2.1318, "step": 3290 }, { - "epoch": 0.61, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003506398559172421, - "loss": 2.2137, + "epoch": 0.56, + "grad_norm": 0.197265625, + "learning_rate": 0.000181099329717402, + "loss": 2.1738, "step": 3295 }, { - "epoch": 0.61, - "grad_norm": 0.201171875, - "learning_rate": 0.0003504266596273488, - "loss": 2.1768, + "epoch": 0.56, + "grad_norm": 0.1953125, + "learning_rate": 0.00018101276303836438, + "loss": 2.1476, "step": 3300 }, { - "epoch": 0.61, - "grad_norm": 0.2021484375, - "learning_rate": 0.00035021306902369745, - "loss": 2.2186, + "epoch": 0.56, + "grad_norm": 0.1943359375, + "learning_rate": 0.00018092601935994137, + "loss": 2.1671, "step": 3305 }, { - "epoch": 0.61, - "grad_norm": 0.1953125, - "learning_rate": 0.0003499990846661737, - "loss": 2.1161, + "epoch": 0.56, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001808390988716534, + "loss": 2.1648, "step": 3310 }, { - "epoch": 0.62, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003497847071156952, - "loss": 2.1391, + "epoch": 0.56, + "grad_norm": 0.197265625, + "learning_rate": 0.0001807520017634073, + "loss": 2.1088, "step": 3315 }, { - "epoch": 0.62, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003495699369342104, - "loss": 2.1398, + "epoch": 0.56, + "grad_norm": 0.1982421875, + "learning_rate": 0.00018066472822549567, + "loss": 2.154, "step": 3320 }, { - "epoch": 0.62, - "grad_norm": 0.189453125, - "learning_rate": 0.0003493547746846968, - "loss": 2.1858, + "epoch": 0.56, + "grad_norm": 0.208984375, + "learning_rate": 0.00018057727844859672, + "loss": 2.1864, "step": 3325 }, { - "epoch": 0.62, - "grad_norm": 0.189453125, - "learning_rate": 0.0003491392209311597, - "loss": 2.159, + "epoch": 0.56, + "grad_norm": 0.205078125, + "learning_rate": 0.00018048965262377358, + "loss": 2.1863, "step": 3330 }, { - "epoch": 0.62, - "grad_norm": 0.1943359375, - "learning_rate": 0.00034892327623863077, - "loss": 2.1631, + "epoch": 0.56, + "grad_norm": 0.19921875, + "learning_rate": 0.00018040185094247413, + "loss": 2.1081, "step": 3335 }, { - "epoch": 0.62, - "grad_norm": 0.2021484375, - "learning_rate": 0.0003487069411731663, - "loss": 2.1614, + "epoch": 0.57, + "grad_norm": 0.203125, + "learning_rate": 0.00018031387359653035, + "loss": 2.1357, "step": 3340 }, { - "epoch": 0.62, - "grad_norm": 0.1943359375, - "learning_rate": 0.00034849021630184587, - "loss": 2.1665, + "epoch": 0.57, + "grad_norm": 0.19921875, + "learning_rate": 0.00018022572077815808, + "loss": 2.1908, "step": 3345 }, { - "epoch": 0.62, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003482731021927709, - "loss": 2.1872, + "epoch": 0.57, + "grad_norm": 0.1943359375, + "learning_rate": 0.00018013739267995659, + "loss": 2.1439, "step": 3350 }, { - "epoch": 0.62, - "grad_norm": 0.193359375, - "learning_rate": 0.0003480555994150631, - "loss": 2.1781, + "epoch": 0.57, + "grad_norm": 0.2021484375, + "learning_rate": 0.00018004888949490802, + "loss": 2.2017, "step": 3355 }, { - "epoch": 0.62, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003478377085388631, - "loss": 2.1907, + "epoch": 0.57, + "grad_norm": 0.19921875, + "learning_rate": 0.00017996021141637709, + "loss": 2.1607, "step": 3360 }, { - "epoch": 0.62, - "grad_norm": 0.1865234375, - "learning_rate": 0.00034761943013532874, - "loss": 2.1636, + "epoch": 0.57, + "grad_norm": 0.2080078125, + "learning_rate": 0.00017987135863811062, + "loss": 2.1538, "step": 3365 }, { - "epoch": 0.63, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003474007647766336, - "loss": 2.1246, + "epoch": 0.57, + "grad_norm": 0.19921875, + "learning_rate": 0.0001797823313542371, + "loss": 2.1318, "step": 3370 }, { - "epoch": 0.63, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003471817130359659, - "loss": 2.1438, + "epoch": 0.57, + "grad_norm": 0.203125, + "learning_rate": 0.00017969312975926632, + "loss": 2.1433, "step": 3375 }, { - "epoch": 0.63, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003469622754875263, - "loss": 2.1563, + "epoch": 0.57, + "grad_norm": 0.1923828125, + "learning_rate": 0.0001796037540480889, + "loss": 2.1633, "step": 3380 }, { - "epoch": 0.63, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003467424527065271, - "loss": 2.1809, + "epoch": 0.57, + "grad_norm": 0.2177734375, + "learning_rate": 0.0001795142044159759, + "loss": 2.1587, "step": 3385 }, { - "epoch": 0.63, - "grad_norm": 0.1943359375, - "learning_rate": 0.00034652224526919014, - "loss": 2.1731, + "epoch": 0.57, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001794244810585783, + "loss": 2.1575, "step": 3390 }, { - "epoch": 0.63, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003463016537527458, - "loss": 2.1673, + "epoch": 0.57, + "grad_norm": 0.2041015625, + "learning_rate": 0.00017933458417192672, + "loss": 2.1543, "step": 3395 }, { - "epoch": 0.63, - "grad_norm": 0.1962890625, - "learning_rate": 0.00034608067873543116, - "loss": 2.1539, + "epoch": 0.58, + "grad_norm": 0.19921875, + "learning_rate": 0.00017924451395243086, + "loss": 2.1969, "step": 3400 }, { - "epoch": 0.63, - "grad_norm": 0.19140625, - "learning_rate": 0.0003458593207964885, - "loss": 2.1642, + "epoch": 0.58, + "grad_norm": 0.19921875, + "learning_rate": 0.00017915427059687908, + "loss": 2.1322, "step": 3405 }, { - "epoch": 0.63, - "grad_norm": 0.197265625, - "learning_rate": 0.0003456375805161638, - "loss": 2.1938, + "epoch": 0.58, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017906385430243817, + "loss": 2.1745, "step": 3410 }, { - "epoch": 0.63, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003454154584757056, - "loss": 2.1743, + "epoch": 0.58, + "grad_norm": 0.205078125, + "learning_rate": 0.0001789732652666526, + "loss": 2.1668, "step": 3415 }, { - "epoch": 0.63, - "grad_norm": 0.1875, - "learning_rate": 0.0003451929552573629, - "loss": 2.1621, + "epoch": 0.58, + "grad_norm": 0.193359375, + "learning_rate": 0.00017888250368744437, + "loss": 2.1606, "step": 3420 }, { - "epoch": 0.64, + "epoch": 0.58, "grad_norm": 0.1962890625, - "learning_rate": 0.00034497007144438367, - "loss": 2.1694, + "learning_rate": 0.00017879156976311234, + "loss": 2.1449, "step": 3425 }, { - "epoch": 0.64, + "epoch": 0.58, "grad_norm": 0.2041015625, - "learning_rate": 0.00034474680762101406, - "loss": 2.1555, + "learning_rate": 0.000178700463692332, + "loss": 2.1682, "step": 3430 }, { - "epoch": 0.64, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003445231643724959, - "loss": 2.1856, + "epoch": 0.58, + "grad_norm": 0.203125, + "learning_rate": 0.00017860918567415496, + "loss": 2.1207, "step": 3435 }, { - "epoch": 0.64, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003442991422850658, - "loss": 2.1824, + "epoch": 0.58, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017851773590800844, + "loss": 2.1677, "step": 3440 }, { - "epoch": 0.64, - "grad_norm": 0.19921875, - "learning_rate": 0.0003440747419459534, - "loss": 2.1838, + "epoch": 0.58, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017842611459369497, + "loss": 2.1592, "step": 3445 }, { - "epoch": 0.64, - "grad_norm": 0.19140625, - "learning_rate": 0.0003438499639433798, - "loss": 2.1467, + "epoch": 0.58, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001783343219313918, + "loss": 2.1685, "step": 3450 }, { - "epoch": 0.64, - "grad_norm": 0.19140625, - "learning_rate": 0.00034362480886655615, - "loss": 2.1592, + "epoch": 0.59, + "grad_norm": 0.228515625, + "learning_rate": 0.0001782423581216507, + "loss": 2.1355, "step": 3455 }, { - "epoch": 0.64, - "grad_norm": 0.1943359375, - "learning_rate": 0.000343399277305682, - "loss": 2.1921, + "epoch": 0.59, + "grad_norm": 0.20703125, + "learning_rate": 0.00017815022336539716, + "loss": 2.1463, "step": 3460 }, { - "epoch": 0.64, - "grad_norm": 0.19140625, - "learning_rate": 0.0003431733698519437, - "loss": 2.1951, + "epoch": 0.59, + "grad_norm": 0.1982421875, + "learning_rate": 0.00017805791786393028, + "loss": 2.1927, "step": 3465 }, { - "epoch": 0.64, - "grad_norm": 0.189453125, - "learning_rate": 0.0003429470870975131, - "loss": 2.225, + "epoch": 0.59, + "grad_norm": 0.201171875, + "learning_rate": 0.00017796544181892228, + "loss": 2.1393, "step": 3470 }, { - "epoch": 0.64, - "grad_norm": 0.1943359375, - "learning_rate": 0.00034272042963554554, - "loss": 2.185, + "epoch": 0.59, + "grad_norm": 0.2041015625, + "learning_rate": 0.00017787279543241783, + "loss": 2.1723, "step": 3475 }, { - "epoch": 0.65, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003424933980601789, - "loss": 2.1617, + "epoch": 0.59, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017777997890683385, + "loss": 2.1761, "step": 3480 }, { - "epoch": 0.65, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003422659929665316, - "loss": 2.1954, + "epoch": 0.59, + "grad_norm": 0.2021484375, + "learning_rate": 0.00017768699244495904, + "loss": 2.1744, "step": 3485 }, { - "epoch": 0.65, - "grad_norm": 0.1923828125, - "learning_rate": 0.00034203821495070103, - "loss": 2.1607, + "epoch": 0.59, + "grad_norm": 0.205078125, + "learning_rate": 0.00017759383624995321, + "loss": 2.1923, "step": 3490 }, { - "epoch": 0.65, + "epoch": 0.59, "grad_norm": 0.1962890625, - "learning_rate": 0.0003418100646097624, - "loss": 2.1595, + "learning_rate": 0.00017750051052534724, + "loss": 2.1148, "step": 3495 }, { - "epoch": 0.65, - "grad_norm": 0.189453125, - "learning_rate": 0.00034158154254176654, - "loss": 2.1473, + "epoch": 0.59, + "grad_norm": 0.201171875, + "learning_rate": 0.0001774070154750422, + "loss": 2.1625, "step": 3500 }, { - "epoch": 0.65, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003413526493457391, - "loss": 2.2123, + "epoch": 0.59, + "grad_norm": 0.205078125, + "learning_rate": 0.00017731335130330927, + "loss": 2.1456, "step": 3505 }, { - "epoch": 0.65, - "grad_norm": 0.189453125, - "learning_rate": 0.0003411233856216781, - "loss": 2.1788, + "epoch": 0.59, + "grad_norm": 0.1953125, + "learning_rate": 0.00017721951821478898, + "loss": 2.1667, "step": 3510 }, { - "epoch": 0.65, - "grad_norm": 0.1943359375, - "learning_rate": 0.00034089375197055336, - "loss": 2.1447, + "epoch": 0.6, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017712551641449099, + "loss": 2.2208, "step": 3515 }, { - "epoch": 0.65, - "grad_norm": 0.197265625, - "learning_rate": 0.0003406637489943039, - "loss": 2.1855, + "epoch": 0.6, + "grad_norm": 0.1953125, + "learning_rate": 0.00017703134610779362, + "loss": 2.1765, "step": 3520 }, { - "epoch": 0.65, - "grad_norm": 0.197265625, - "learning_rate": 0.0003404333772958372, - "loss": 2.1916, + "epoch": 0.6, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017693700750044328, + "loss": 2.176, "step": 3525 }, { - "epoch": 0.65, - "grad_norm": 0.19140625, - "learning_rate": 0.00034020263747902715, - "loss": 2.1985, + "epoch": 0.6, + "grad_norm": 0.1953125, + "learning_rate": 0.0001768425007985541, + "loss": 2.1524, "step": 3530 }, { - "epoch": 0.66, - "grad_norm": 0.1943359375, - "learning_rate": 0.00033997153014871237, - "loss": 2.2015, + "epoch": 0.6, + "grad_norm": 0.197265625, + "learning_rate": 0.00017674782620860744, + "loss": 2.1427, "step": 3535 }, { - "epoch": 0.66, - "grad_norm": 0.197265625, - "learning_rate": 0.0003397400559106953, - "loss": 2.1476, + "epoch": 0.6, + "grad_norm": 0.1943359375, + "learning_rate": 0.00017665298393745152, + "loss": 2.1892, "step": 3540 }, { - "epoch": 0.66, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003395082153717397, - "loss": 2.159, + "epoch": 0.6, + "grad_norm": 0.197265625, + "learning_rate": 0.00017655797419230095, + "loss": 2.1542, "step": 3545 }, { - "epoch": 0.66, - "grad_norm": 0.197265625, - "learning_rate": 0.00033927600913956986, - "loss": 2.19, + "epoch": 0.6, + "grad_norm": 0.203125, + "learning_rate": 0.00017646279718073611, + "loss": 2.1891, "step": 3550 }, { - "epoch": 0.66, - "grad_norm": 0.1953125, - "learning_rate": 0.0003390434378228685, - "loss": 2.1506, + "epoch": 0.6, + "grad_norm": 0.2109375, + "learning_rate": 0.00017636745311070296, + "loss": 2.1905, "step": 3555 }, { - "epoch": 0.66, - "grad_norm": 0.1923828125, - "learning_rate": 0.00033881050203127527, - "loss": 2.1569, + "epoch": 0.6, + "grad_norm": 0.197265625, + "learning_rate": 0.00017627194219051238, + "loss": 2.164, "step": 3560 }, { - "epoch": 0.66, - "grad_norm": 0.203125, - "learning_rate": 0.0003385772023753855, - "loss": 2.1574, + "epoch": 0.6, + "grad_norm": 0.19921875, + "learning_rate": 0.0001761762646288398, + "loss": 2.1426, "step": 3565 }, { - "epoch": 0.66, - "grad_norm": 0.1953125, - "learning_rate": 0.000338343539466748, - "loss": 2.1559, + "epoch": 0.6, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001760804206347248, + "loss": 2.1479, "step": 3570 }, { - "epoch": 0.66, - "grad_norm": 0.19921875, - "learning_rate": 0.00033810951391786384, - "loss": 2.2004, + "epoch": 0.61, + "grad_norm": 0.205078125, + "learning_rate": 0.00017598441041757047, + "loss": 2.1541, "step": 3575 }, { - "epoch": 0.66, - "grad_norm": 0.1904296875, - "learning_rate": 0.00033787512634218483, - "loss": 2.1497, + "epoch": 0.61, + "grad_norm": 0.19921875, + "learning_rate": 0.00017588823418714314, + "loss": 2.1498, "step": 3580 }, { - "epoch": 0.67, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003376403773541117, - "loss": 2.1442, + "epoch": 0.61, + "grad_norm": 0.205078125, + "learning_rate": 0.00017579189215357187, + "loss": 2.1466, "step": 3585 }, { - "epoch": 0.67, - "grad_norm": 0.19140625, - "learning_rate": 0.0003374052675689925, - "loss": 2.1432, + "epoch": 0.61, + "grad_norm": 0.203125, + "learning_rate": 0.00017569538452734797, + "loss": 2.1702, "step": 3590 }, { - "epoch": 0.67, - "grad_norm": 0.1962890625, - "learning_rate": 0.000337169797603121, - "loss": 2.1708, + "epoch": 0.61, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017559871151932448, + "loss": 2.1822, "step": 3595 }, { - "epoch": 0.67, - "grad_norm": 0.19140625, - "learning_rate": 0.00033693396807373536, - "loss": 2.1471, + "epoch": 0.61, + "grad_norm": 0.201171875, + "learning_rate": 0.0001755018733407158, + "loss": 2.1712, "step": 3600 }, { - "epoch": 0.67, - "grad_norm": 0.193359375, - "learning_rate": 0.00033669777959901583, - "loss": 2.1216, + "epoch": 0.61, + "grad_norm": 0.19921875, + "learning_rate": 0.00017540487020309726, + "loss": 2.1469, "step": 3605 }, { - "epoch": 0.67, - "grad_norm": 0.1943359375, - "learning_rate": 0.000336461232798084, - "loss": 2.1942, + "epoch": 0.61, + "grad_norm": 0.20703125, + "learning_rate": 0.0001753077023184045, + "loss": 2.1792, "step": 3610 }, { - "epoch": 0.67, - "grad_norm": 0.19140625, - "learning_rate": 0.0003362243282910005, - "loss": 2.198, + "epoch": 0.61, + "grad_norm": 0.19921875, + "learning_rate": 0.00017521036989893318, + "loss": 2.1561, "step": 3615 }, { - "epoch": 0.67, - "grad_norm": 0.205078125, - "learning_rate": 0.0003359870666987637, - "loss": 2.1837, + "epoch": 0.61, + "grad_norm": 0.203125, + "learning_rate": 0.00017511287315733837, + "loss": 2.164, "step": 3620 }, { - "epoch": 0.67, - "grad_norm": 0.197265625, - "learning_rate": 0.0003357494486433078, - "loss": 2.1701, + "epoch": 0.61, + "grad_norm": 0.2021484375, + "learning_rate": 0.00017501521230663429, + "loss": 2.1957, "step": 3625 }, { - "epoch": 0.67, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003355114747475019, - "loss": 2.1814, + "epoch": 0.61, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017491738756019357, + "loss": 2.147, "step": 3630 }, { - "epoch": 0.67, - "grad_norm": 0.1884765625, - "learning_rate": 0.00033527314563514725, - "loss": 2.1656, + "epoch": 0.62, + "grad_norm": 0.197265625, + "learning_rate": 0.00017481939913174696, + "loss": 2.1521, "step": 3635 }, { - "epoch": 0.68, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003350344619309767, - "loss": 2.196, + "epoch": 0.62, + "grad_norm": 0.21875, + "learning_rate": 0.00017472124723538288, + "loss": 2.1614, "step": 3640 }, { - "epoch": 0.68, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003347954242606522, + "epoch": 0.62, + "grad_norm": 0.203125, + "learning_rate": 0.00017462293208554683, "loss": 2.1617, "step": 3645 }, { - "epoch": 0.68, - "grad_norm": 0.1982421875, - "learning_rate": 0.000334556033250764, - "loss": 2.1825, + "epoch": 0.62, + "grad_norm": 0.2080078125, + "learning_rate": 0.00017452445389704106, + "loss": 2.1826, "step": 3650 }, { - "epoch": 0.68, - "grad_norm": 0.1923828125, - "learning_rate": 0.00033431628952882813, - "loss": 2.132, + "epoch": 0.62, + "grad_norm": 0.2041015625, + "learning_rate": 0.00017442581288502397, + "loss": 2.1398, "step": 3655 }, { - "epoch": 0.68, - "grad_norm": 0.193359375, - "learning_rate": 0.00033407619372328545, - "loss": 2.1444, + "epoch": 0.62, + "grad_norm": 0.205078125, + "learning_rate": 0.00017432700926500977, + "loss": 2.1396, "step": 3660 }, { - "epoch": 0.68, - "grad_norm": 0.197265625, - "learning_rate": 0.00033383574646349973, - "loss": 2.1785, + "epoch": 0.62, + "grad_norm": 0.20703125, + "learning_rate": 0.00017422804325286788, + "loss": 2.133, "step": 3665 }, { - "epoch": 0.68, - "grad_norm": 0.197265625, - "learning_rate": 0.0003335949483797558, - "loss": 2.117, + "epoch": 0.62, + "grad_norm": 0.2041015625, + "learning_rate": 0.0001741289150648225, + "loss": 2.2083, "step": 3670 }, { - "epoch": 0.68, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003333538001032583, - "loss": 2.1793, + "epoch": 0.62, + "grad_norm": 0.2080078125, + "learning_rate": 0.00017402962491745228, + "loss": 2.1073, "step": 3675 }, { - "epoch": 0.68, - "grad_norm": 0.201171875, - "learning_rate": 0.00033311230226612987, - "loss": 2.1872, + "epoch": 0.62, + "grad_norm": 0.205078125, + "learning_rate": 0.00017393017302768963, + "loss": 2.2294, "step": 3680 }, { - "epoch": 0.68, - "grad_norm": 0.1923828125, - "learning_rate": 0.00033287045550140924, - "loss": 2.1664, + "epoch": 0.62, + "grad_norm": 0.1982421875, + "learning_rate": 0.00017383055961282028, + "loss": 2.1668, "step": 3685 }, { - "epoch": 0.68, - "grad_norm": 0.19140625, - "learning_rate": 0.00033262826044304995, - "loss": 2.1627, + "epoch": 0.62, + "grad_norm": 0.203125, + "learning_rate": 0.00017373078489048302, + "loss": 2.1639, "step": 3690 }, { - "epoch": 0.69, - "grad_norm": 0.201171875, - "learning_rate": 0.00033238571772591845, - "loss": 2.1571, + "epoch": 0.63, + "grad_norm": 0.19140625, + "learning_rate": 0.00017363084907866895, + "loss": 2.1752, "step": 3695 }, { - "epoch": 0.69, - "grad_norm": 0.19140625, - "learning_rate": 0.00033214282798579256, - "loss": 2.1433, + "epoch": 0.63, + "grad_norm": 0.201171875, + "learning_rate": 0.00017353075239572117, + "loss": 2.1609, "step": 3700 }, { - "epoch": 0.69, - "grad_norm": 0.189453125, - "learning_rate": 0.00033189959185935973, - "loss": 2.127, + "epoch": 0.63, + "grad_norm": 0.2021484375, + "learning_rate": 0.00017343049506033425, + "loss": 2.1996, "step": 3705 }, { - "epoch": 0.69, - "grad_norm": 0.2001953125, - "learning_rate": 0.00033165600998421544, - "loss": 2.1733, + "epoch": 0.63, + "grad_norm": 0.1982421875, + "learning_rate": 0.00017333007729155377, + "loss": 2.1319, "step": 3710 }, { - "epoch": 0.69, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003314120829988613, - "loss": 2.1758, + "epoch": 0.63, + "grad_norm": 0.208984375, + "learning_rate": 0.00017322949930877583, + "loss": 2.1314, "step": 3715 }, { - "epoch": 0.69, - "grad_norm": 0.19921875, - "learning_rate": 0.0003311678115427039, - "loss": 2.17, + "epoch": 0.63, + "grad_norm": 0.2021484375, + "learning_rate": 0.00017312876133174655, + "loss": 2.1361, "step": 3720 }, { - "epoch": 0.69, + "epoch": 0.63, "grad_norm": 0.2041015625, - "learning_rate": 0.0003309231962560524, - "loss": 2.167, + "learning_rate": 0.00017302786358056155, + "loss": 2.1442, "step": 3725 }, { - "epoch": 0.69, - "grad_norm": 0.197265625, - "learning_rate": 0.0003306782377801175, - "loss": 2.172, + "epoch": 0.63, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017292680627566568, + "loss": 2.1606, "step": 3730 }, { - "epoch": 0.69, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003304329367570094, - "loss": 2.1651, + "epoch": 0.63, + "grad_norm": 0.2021484375, + "learning_rate": 0.00017282558963785234, + "loss": 2.1756, "step": 3735 }, { - "epoch": 0.69, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003301872938297362, - "loss": 2.153, + "epoch": 0.63, + "grad_norm": 0.2099609375, + "learning_rate": 0.0001727242138882629, + "loss": 2.1586, "step": 3740 }, { - "epoch": 0.69, - "grad_norm": 0.203125, - "learning_rate": 0.0003299413096422025, - "loss": 2.1491, + "epoch": 0.63, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017262267924838658, + "loss": 2.148, "step": 3745 }, { - "epoch": 0.7, - "grad_norm": 0.2099609375, - "learning_rate": 0.00032969498483920704, - "loss": 2.1947, + "epoch": 0.64, + "grad_norm": 0.1982421875, + "learning_rate": 0.0001725209859400596, + "loss": 2.1351, "step": 3750 }, { - "epoch": 0.7, - "grad_norm": 0.1923828125, - "learning_rate": 0.00032944832006644165, - "loss": 2.1276, + "epoch": 0.64, + "grad_norm": 0.2041015625, + "learning_rate": 0.0001724191341854649, + "loss": 2.1536, "step": 3755 }, { - "epoch": 0.7, + "epoch": 0.64, "grad_norm": 0.2001953125, - "learning_rate": 0.0003292013159704893, - "loss": 2.1548, + "learning_rate": 0.00017231712420713157, + "loss": 2.15, "step": 3760 }, { - "epoch": 0.7, - "grad_norm": 0.1953125, - "learning_rate": 0.00032895397319882236, - "loss": 2.1466, + "epoch": 0.64, + "grad_norm": 0.197265625, + "learning_rate": 0.00017221495622793444, + "loss": 2.1419, "step": 3765 }, { - "epoch": 0.7, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003287062923998011, - "loss": 2.1091, + "epoch": 0.64, + "grad_norm": 0.19921875, + "learning_rate": 0.00017211263047109353, + "loss": 2.1394, "step": 3770 }, { - "epoch": 0.7, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003284582742226717, - "loss": 2.1843, + "epoch": 0.64, + "grad_norm": 0.208984375, + "learning_rate": 0.00017201014716017348, + "loss": 2.2241, "step": 3775 }, { - "epoch": 0.7, - "grad_norm": 0.201171875, - "learning_rate": 0.00032820991931756493, - "loss": 2.2426, + "epoch": 0.64, + "grad_norm": 0.1982421875, + "learning_rate": 0.00017190750651908336, + "loss": 2.1367, "step": 3780 }, { - "epoch": 0.7, - "grad_norm": 0.1904296875, - "learning_rate": 0.00032796122833549394, - "loss": 2.1489, + "epoch": 0.64, + "grad_norm": 0.1982421875, + "learning_rate": 0.00017180470877207576, + "loss": 2.1249, "step": 3785 }, { - "epoch": 0.7, - "grad_norm": 0.189453125, - "learning_rate": 0.000327712201928353, - "loss": 2.1309, + "epoch": 0.64, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001717017541437467, + "loss": 2.1624, "step": 3790 }, { - "epoch": 0.7, - "grad_norm": 0.1884765625, - "learning_rate": 0.00032746284074891573, - "loss": 2.1768, + "epoch": 0.64, + "grad_norm": 0.1943359375, + "learning_rate": 0.00017159864285903488, + "loss": 2.1353, "step": 3795 }, { - "epoch": 0.71, - "grad_norm": 0.189453125, - "learning_rate": 0.0003272131454508331, - "loss": 2.1846, + "epoch": 0.64, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017149537514322123, + "loss": 2.1452, "step": 3800 }, { - "epoch": 0.71, - "grad_norm": 0.2001953125, - "learning_rate": 0.000326963116688632, - "loss": 2.2, + "epoch": 0.64, + "grad_norm": 0.201171875, + "learning_rate": 0.0001713919512219285, + "loss": 2.1418, "step": 3805 }, { - "epoch": 0.71, - "grad_norm": 0.1904296875, - "learning_rate": 0.00032671275511771337, - "loss": 2.1608, + "epoch": 0.65, + "grad_norm": 0.2060546875, + "learning_rate": 0.00017128837132112076, + "loss": 2.1446, "step": 3810 }, { - "epoch": 0.71, - "grad_norm": 0.193359375, - "learning_rate": 0.00032646206139435067, - "loss": 2.1848, + "epoch": 0.65, + "grad_norm": 0.2041015625, + "learning_rate": 0.00017118463566710284, + "loss": 2.1525, "step": 3815 }, { - "epoch": 0.71, - "grad_norm": 0.1923828125, - "learning_rate": 0.00032621103617568785, - "loss": 2.1613, + "epoch": 0.65, + "grad_norm": 0.2041015625, + "learning_rate": 0.00017108074448651976, + "loss": 2.1617, "step": 3820 }, { - "epoch": 0.71, - "grad_norm": 0.19140625, - "learning_rate": 0.00032595968011973805, - "loss": 2.1394, + "epoch": 0.65, + "grad_norm": 0.201171875, + "learning_rate": 0.00017097669800635653, + "loss": 2.1856, "step": 3825 }, { - "epoch": 0.71, - "grad_norm": 0.1943359375, - "learning_rate": 0.00032570799388538133, - "loss": 2.1516, + "epoch": 0.65, + "grad_norm": 0.205078125, + "learning_rate": 0.00017087249645393734, + "loss": 2.1432, "step": 3830 }, { - "epoch": 0.71, + "epoch": 0.65, "grad_norm": 0.2041015625, - "learning_rate": 0.00032545597813236356, - "loss": 2.214, + "learning_rate": 0.00017076814005692522, + "loss": 2.1366, "step": 3835 }, { - "epoch": 0.71, - "grad_norm": 0.1884765625, - "learning_rate": 0.0003252036335212941, - "loss": 2.1682, + "epoch": 0.65, + "grad_norm": 0.203125, + "learning_rate": 0.0001706636290433215, + "loss": 2.1779, "step": 3840 }, { - "epoch": 0.71, - "grad_norm": 0.197265625, - "learning_rate": 0.0003249509607136446, - "loss": 2.1702, + "epoch": 0.65, + "grad_norm": 0.2001953125, + "learning_rate": 0.00017055896364146528, + "loss": 2.1282, "step": 3845 }, { - "epoch": 0.71, - "grad_norm": 0.1943359375, - "learning_rate": 0.00032469796037174674, - "loss": 2.2195, + "epoch": 0.65, + "grad_norm": 0.197265625, + "learning_rate": 0.00017045414408003312, + "loss": 2.1785, "step": 3850 }, { - "epoch": 0.72, - "grad_norm": 0.1904296875, - "learning_rate": 0.00032444463315879103, - "loss": 2.0988, + "epoch": 0.65, + "grad_norm": 0.2080078125, + "learning_rate": 0.00017034917058803822, + "loss": 2.1825, "step": 3855 }, { - "epoch": 0.72, + "epoch": 0.65, "grad_norm": 0.19921875, - "learning_rate": 0.0003241909797388246, - "loss": 2.1504, + "learning_rate": 0.00017024404339483016, + "loss": 2.1743, "step": 3860 }, { - "epoch": 0.72, - "grad_norm": 0.1982421875, - "learning_rate": 0.00032393700077674987, - "loss": 2.1767, + "epoch": 0.65, + "grad_norm": 0.201171875, + "learning_rate": 0.00017013876273009438, + "loss": 2.1668, "step": 3865 }, { - "epoch": 0.72, - "grad_norm": 0.201171875, - "learning_rate": 0.0003236826969383224, - "loss": 2.1628, + "epoch": 0.66, + "grad_norm": 0.197265625, + "learning_rate": 0.00017003332882385155, + "loss": 2.1435, "step": 3870 }, { - "epoch": 0.72, - "grad_norm": 0.201171875, - "learning_rate": 0.0003234280688901495, - "loss": 2.2204, + "epoch": 0.66, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001699277419064572, + "loss": 2.1453, "step": 3875 }, { - "epoch": 0.72, - "grad_norm": 0.197265625, - "learning_rate": 0.00032317311729968825, - "loss": 2.1606, + "epoch": 0.66, + "grad_norm": 0.201171875, + "learning_rate": 0.00016982200220860114, + "loss": 2.1617, "step": 3880 }, { - "epoch": 0.72, - "grad_norm": 0.2001953125, - "learning_rate": 0.0003229178428352438, - "loss": 2.1721, + "epoch": 0.66, + "grad_norm": 0.197265625, + "learning_rate": 0.00016971610996130703, + "loss": 2.1807, "step": 3885 }, { - "epoch": 0.72, - "grad_norm": 0.1923828125, - "learning_rate": 0.00032266224616596785, - "loss": 2.1741, + "epoch": 0.66, + "grad_norm": 0.201171875, + "learning_rate": 0.0001696100653959317, + "loss": 2.1619, "step": 3890 }, { - "epoch": 0.72, - "grad_norm": 0.2080078125, - "learning_rate": 0.0003224063279618564, - "loss": 2.1784, + "epoch": 0.66, + "grad_norm": 0.203125, + "learning_rate": 0.0001695038687441649, + "loss": 2.1842, "step": 3895 }, { - "epoch": 0.72, - "grad_norm": 0.1953125, - "learning_rate": 0.00032215008889374853, - "loss": 2.1944, + "epoch": 0.66, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001693975202380286, + "loss": 2.1851, "step": 3900 }, { - "epoch": 0.72, - "grad_norm": 0.1962890625, - "learning_rate": 0.00032189352963332425, - "loss": 2.1608, + "epoch": 0.66, + "grad_norm": 0.2099609375, + "learning_rate": 0.0001692910201098766, + "loss": 2.1786, "step": 3905 }, { - "epoch": 0.73, - "grad_norm": 0.1953125, - "learning_rate": 0.0003216366508531031, - "loss": 2.1662, + "epoch": 0.66, + "grad_norm": 0.1982421875, + "learning_rate": 0.00016918436859239387, + "loss": 2.1408, "step": 3910 }, { - "epoch": 0.73, - "grad_norm": 0.2060546875, - "learning_rate": 0.00032137945322644184, - "loss": 2.1546, + "epoch": 0.66, + "grad_norm": 0.205078125, + "learning_rate": 0.00016907756591859628, + "loss": 2.1447, "step": 3915 }, { - "epoch": 0.73, - "grad_norm": 0.1962890625, - "learning_rate": 0.00032112193742753333, - "loss": 2.1698, + "epoch": 0.66, + "grad_norm": 0.2041015625, + "learning_rate": 0.00016897061232182977, + "loss": 2.1707, "step": 3920 }, { - "epoch": 0.73, - "grad_norm": 0.203125, - "learning_rate": 0.0003208641041314043, - "loss": 2.2374, + "epoch": 0.66, + "grad_norm": 0.205078125, + "learning_rate": 0.0001688635080357702, + "loss": 2.1928, "step": 3925 }, { - "epoch": 0.73, - "grad_norm": 0.1953125, - "learning_rate": 0.0003206059540139139, - "loss": 2.1985, + "epoch": 0.67, + "grad_norm": 0.2177734375, + "learning_rate": 0.00016875625329442257, + "loss": 2.169, "step": 3930 }, { - "epoch": 0.73, - "grad_norm": 0.2041015625, - "learning_rate": 0.0003203474877517514, - "loss": 2.1759, + "epoch": 0.67, + "grad_norm": 0.2001953125, + "learning_rate": 0.0001686488483321206, + "loss": 2.1886, "step": 3935 }, { - "epoch": 0.73, - "grad_norm": 0.1943359375, - "learning_rate": 0.00032008870602243523, - "loss": 2.1489, + "epoch": 0.67, + "grad_norm": 0.205078125, + "learning_rate": 0.00016854129338352624, + "loss": 2.2059, "step": 3940 }, { - "epoch": 0.73, - "grad_norm": 0.2021484375, - "learning_rate": 0.0003198296095043104, - "loss": 2.1875, + "epoch": 0.67, + "grad_norm": 0.2060546875, + "learning_rate": 0.00016843358868362914, + "loss": 2.1918, "step": 3945 }, { - "epoch": 0.73, - "grad_norm": 0.19921875, - "learning_rate": 0.00031957019887654737, - "loss": 2.1403, + "epoch": 0.67, + "grad_norm": 0.1962890625, + "learning_rate": 0.0001683257344677461, + "loss": 2.1218, "step": 3950 }, { - "epoch": 0.73, - "grad_norm": 0.2001953125, - "learning_rate": 0.0003193104748191398, - "loss": 2.1555, + "epoch": 0.67, + "grad_norm": 0.205078125, + "learning_rate": 0.0001682177309715206, + "loss": 2.1635, "step": 3955 }, { - "epoch": 0.73, - "grad_norm": 0.201171875, - "learning_rate": 0.0003190504380129029, - "loss": 2.178, + "epoch": 0.67, + "grad_norm": 0.2099609375, + "learning_rate": 0.0001681095784309223, + "loss": 2.1986, "step": 3960 }, { - "epoch": 0.74, + "epoch": 0.67, "grad_norm": 0.19921875, - "learning_rate": 0.0003187900891394718, - "loss": 2.1751, + "learning_rate": 0.00016800127708224648, + "loss": 2.101, "step": 3965 }, { - "epoch": 0.74, - "grad_norm": 0.1953125, - "learning_rate": 0.0003185294288812996, - "loss": 2.1864, + "epoch": 0.67, + "grad_norm": 0.197265625, + "learning_rate": 0.0001678928271621135, + "loss": 2.1343, "step": 3970 }, { - "epoch": 0.74, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003182684579216557, - "loss": 2.1756, + "epoch": 0.67, + "grad_norm": 0.205078125, + "learning_rate": 0.0001677842289074684, + "loss": 2.1897, "step": 3975 }, { - "epoch": 0.74, - "grad_norm": 0.201171875, - "learning_rate": 0.0003180071769446238, - "loss": 2.1778, - "step": 3980 + "epoch": 0.67, + "grad_norm": 0.19921875, + "learning_rate": 0.00016767548255558023, + "loss": 2.1349, + "step": 3980 }, { - "epoch": 0.74, - "grad_norm": 0.208984375, - "learning_rate": 0.00031774558663510044, - "loss": 2.1806, + "epoch": 0.67, + "grad_norm": 0.2109375, + "learning_rate": 0.0001675665883440417, + "loss": 2.1386, "step": 3985 }, { - "epoch": 0.74, - "grad_norm": 0.197265625, - "learning_rate": 0.00031748368767879284, - "loss": 2.1518, + "epoch": 0.68, + "grad_norm": 0.2021484375, + "learning_rate": 0.00016745754651076848, + "loss": 2.1427, "step": 3990 }, { - "epoch": 0.74, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003172214807622175, - "loss": 2.1643, + "epoch": 0.68, + "grad_norm": 0.2099609375, + "learning_rate": 0.00016734835729399877, + "loss": 2.1637, "step": 3995 }, { - "epoch": 0.74, - "grad_norm": 0.197265625, - "learning_rate": 0.00031695896657269785, - "loss": 2.1502, + "epoch": 0.68, + "grad_norm": 0.2099609375, + "learning_rate": 0.0001672390209322929, + "loss": 2.1205, "step": 4000 }, { - "epoch": 0.74, - "grad_norm": 0.1982421875, - "learning_rate": 0.00031669614579836307, - "loss": 2.1605, + "epoch": 0.68, + "grad_norm": 0.203125, + "learning_rate": 0.0001671295376645325, + "loss": 2.1365, "step": 4005 }, { - "epoch": 0.74, - "grad_norm": 0.197265625, - "learning_rate": 0.00031643301912814576, - "loss": 2.1922, + "epoch": 0.68, + "grad_norm": 0.205078125, + "learning_rate": 0.0001670199077299203, + "loss": 2.1636, "step": 4010 }, { - "epoch": 0.74, - "grad_norm": 0.193359375, - "learning_rate": 0.00031616958725178056, - "loss": 2.146, + "epoch": 0.68, + "grad_norm": 0.2021484375, + "learning_rate": 0.00016691013136797947, + "loss": 2.1305, "step": 4015 }, { - "epoch": 0.75, - "grad_norm": 0.19140625, - "learning_rate": 0.0003159058508598021, - "loss": 2.1565, + "epoch": 0.68, + "grad_norm": 0.2041015625, + "learning_rate": 0.00016680020881855301, + "loss": 2.1441, "step": 4020 }, { - "epoch": 0.75, - "grad_norm": 0.193359375, - "learning_rate": 0.00031564181064354315, - "loss": 2.141, + "epoch": 0.68, + "grad_norm": 0.205078125, + "learning_rate": 0.0001666901403218034, + "loss": 2.1418, "step": 4025 }, { - "epoch": 0.75, - "grad_norm": 0.2041015625, - "learning_rate": 0.000315377467295133, - "loss": 2.1504, + "epoch": 0.68, + "grad_norm": 0.197265625, + "learning_rate": 0.000166579926118212, + "loss": 2.15, "step": 4030 }, { - "epoch": 0.75, - "grad_norm": 0.19921875, - "learning_rate": 0.0003151128215074954, - "loss": 2.1534, + "epoch": 0.68, + "grad_norm": 0.205078125, + "learning_rate": 0.00016646956644857837, + "loss": 2.1799, "step": 4035 }, { - "epoch": 0.75, - "grad_norm": 0.19921875, - "learning_rate": 0.0003148478739743472, - "loss": 2.1279, + "epoch": 0.68, + "grad_norm": 0.2119140625, + "learning_rate": 0.0001663590615540201, + "loss": 2.1624, "step": 4040 }, { - "epoch": 0.75, - "grad_norm": 0.208984375, - "learning_rate": 0.0003145826253901957, - "loss": 2.1695, + "epoch": 0.69, + "grad_norm": 0.205078125, + "learning_rate": 0.00016624841167597193, + "loss": 2.1176, "step": 4045 }, { - "epoch": 0.75, - "grad_norm": 0.1953125, - "learning_rate": 0.00031431707645033775, - "loss": 2.2067, + "epoch": 0.69, + "grad_norm": 0.203125, + "learning_rate": 0.00016613761705618538, + "loss": 2.1614, "step": 4050 }, { - "epoch": 0.75, - "grad_norm": 0.201171875, - "learning_rate": 0.00031405122785085757, - "loss": 2.1689, + "epoch": 0.69, + "grad_norm": 0.203125, + "learning_rate": 0.0001660266779367283, + "loss": 2.1349, "step": 4055 }, { - "epoch": 0.75, - "grad_norm": 0.1953125, - "learning_rate": 0.0003137850802886245, - "loss": 2.1577, + "epoch": 0.69, + "grad_norm": 0.208984375, + "learning_rate": 0.00016591559455998408, + "loss": 2.1229, "step": 4060 }, { - "epoch": 0.75, - "grad_norm": 0.1904296875, - "learning_rate": 0.00031351863446129187, - "loss": 2.1237, + "epoch": 0.69, + "grad_norm": 0.203125, + "learning_rate": 0.0001658043671686514, + "loss": 2.1506, "step": 4065 }, { - "epoch": 0.76, - "grad_norm": 0.1953125, - "learning_rate": 0.0003132518910672948, - "loss": 2.247, + "epoch": 0.69, + "grad_norm": 0.208984375, + "learning_rate": 0.00016569299600574365, + "loss": 2.1233, "step": 4070 }, { - "epoch": 0.76, - "grad_norm": 0.201171875, - "learning_rate": 0.00031298485080584823, - "loss": 2.1987, + "epoch": 0.69, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001655814813145882, + "loss": 2.151, "step": 4075 }, { - "epoch": 0.76, - "grad_norm": 0.197265625, - "learning_rate": 0.0003127175143769456, - "loss": 2.1932, + "epoch": 0.69, + "grad_norm": 0.2119140625, + "learning_rate": 0.00016546982333882608, + "loss": 2.1246, "step": 4080 }, { - "epoch": 0.76, - "grad_norm": 0.1904296875, - "learning_rate": 0.00031244988248135645, - "loss": 2.1826, + "epoch": 0.69, + "grad_norm": 0.2041015625, + "learning_rate": 0.00016535802232241133, + "loss": 2.1752, "step": 4085 }, { - "epoch": 0.76, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003121819558206249, - "loss": 2.1388, + "epoch": 0.69, + "grad_norm": 0.203125, + "learning_rate": 0.0001652460785096106, + "loss": 2.1706, "step": 4090 }, { - "epoch": 0.76, - "grad_norm": 0.2001953125, - "learning_rate": 0.00031191373509706767, - "loss": 2.1869, + "epoch": 0.69, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001651339921450024, + "loss": 2.1159, "step": 4095 }, { - "epoch": 0.76, - "grad_norm": 0.193359375, - "learning_rate": 0.00031164522101377254, - "loss": 2.1403, + "epoch": 0.69, + "grad_norm": 0.21484375, + "learning_rate": 0.0001650217634734768, + "loss": 2.1378, "step": 4100 }, { - "epoch": 0.76, - "grad_norm": 0.203125, - "learning_rate": 0.000311376414274596, - "loss": 2.176, + "epoch": 0.7, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001649093927402347, + "loss": 2.1695, "step": 4105 }, { - "epoch": 0.76, - "grad_norm": 0.1943359375, - "learning_rate": 0.00031110731558416175, - "loss": 2.184, + "epoch": 0.7, + "grad_norm": 0.208984375, + "learning_rate": 0.00016479688019078748, + "loss": 2.1548, "step": 4110 }, { - "epoch": 0.76, - "grad_norm": 0.19921875, - "learning_rate": 0.0003108379256478589, - "loss": 2.1698, + "epoch": 0.7, + "grad_norm": 0.201171875, + "learning_rate": 0.00016468422607095626, + "loss": 2.1457, "step": 4115 }, { - "epoch": 0.76, - "grad_norm": 0.20703125, - "learning_rate": 0.00031056824517183986, - "loss": 2.1796, + "epoch": 0.7, + "grad_norm": 0.2001953125, + "learning_rate": 0.00016457143062687153, + "loss": 2.1345, "step": 4120 }, { - "epoch": 0.77, - "grad_norm": 0.2001953125, - "learning_rate": 0.0003102982748630188, - "loss": 2.196, + "epoch": 0.7, + "grad_norm": 0.2021484375, + "learning_rate": 0.00016445849410497257, + "loss": 2.1505, "step": 4125 }, { - "epoch": 0.77, - "grad_norm": 0.2041015625, - "learning_rate": 0.00031002801542906943, - "loss": 2.165, + "epoch": 0.7, + "grad_norm": 0.2060546875, + "learning_rate": 0.00016434541675200678, + "loss": 2.1584, "step": 4130 }, { - "epoch": 0.77, - "grad_norm": 0.2080078125, - "learning_rate": 0.00030975746757842354, - "loss": 2.1812, + "epoch": 0.7, + "grad_norm": 0.20703125, + "learning_rate": 0.00016423219881502946, + "loss": 2.1538, "step": 4135 }, { - "epoch": 0.77, - "grad_norm": 0.1943359375, - "learning_rate": 0.00030948663202026873, - "loss": 2.2209, + "epoch": 0.7, + "grad_norm": 0.19921875, + "learning_rate": 0.00016411884054140277, + "loss": 2.1481, "step": 4140 }, { - "epoch": 0.77, - "grad_norm": 0.1982421875, - "learning_rate": 0.00030921550946454694, - "loss": 2.1573, + "epoch": 0.7, + "grad_norm": 0.2080078125, + "learning_rate": 0.00016400534217879574, + "loss": 2.1452, "step": 4145 }, { - "epoch": 0.77, - "grad_norm": 0.19140625, - "learning_rate": 0.0003089441006219524, - "loss": 2.1966, + "epoch": 0.7, + "grad_norm": 0.208984375, + "learning_rate": 0.0001638917039751834, + "loss": 2.1736, "step": 4150 }, { - "epoch": 0.77, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003086724062039297, - "loss": 2.1905, + "epoch": 0.7, + "grad_norm": 0.2021484375, + "learning_rate": 0.00016377792617884625, + "loss": 2.1551, "step": 4155 }, { - "epoch": 0.77, - "grad_norm": 0.2001953125, - "learning_rate": 0.00030840042692267214, - "loss": 2.1838, + "epoch": 0.7, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001636640090383698, + "loss": 2.1443, "step": 4160 }, { - "epoch": 0.77, - "grad_norm": 0.2177734375, - "learning_rate": 0.00030812816349111953, - "loss": 2.1487, + "epoch": 0.71, + "grad_norm": 0.2080078125, + "learning_rate": 0.00016354995280264402, + "loss": 2.1875, "step": 4165 }, { - "epoch": 0.77, - "grad_norm": 0.1943359375, - "learning_rate": 0.00030785561662295674, - "loss": 2.1751, + "epoch": 0.71, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001634357577208628, + "loss": 2.1689, "step": 4170 }, { - "epoch": 0.77, - "grad_norm": 0.1962890625, - "learning_rate": 0.0003075827870326115, - "loss": 2.1752, + "epoch": 0.71, + "grad_norm": 0.2001953125, + "learning_rate": 0.00016332142404252333, + "loss": 2.1903, "step": 4175 }, { - "epoch": 0.78, - "grad_norm": 0.1923828125, - "learning_rate": 0.0003073096754352526, - "loss": 2.1844, + "epoch": 0.71, + "grad_norm": 0.2041015625, + "learning_rate": 0.00016320695201742566, + "loss": 2.1529, "step": 4180 }, { - "epoch": 0.78, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003070362825467882, - "loss": 2.1858, + "epoch": 0.71, + "grad_norm": 0.201171875, + "learning_rate": 0.0001630923418956721, + "loss": 2.1493, "step": 4185 }, { - "epoch": 0.78, - "grad_norm": 0.1904296875, - "learning_rate": 0.00030676260908386367, - "loss": 2.1701, + "epoch": 0.71, + "grad_norm": 0.20703125, + "learning_rate": 0.00016297759392766667, + "loss": 2.1718, "step": 4190 }, { - "epoch": 0.78, - "grad_norm": 0.19921875, - "learning_rate": 0.00030648865576385993, - "loss": 2.1694, + "epoch": 0.71, + "grad_norm": 0.20703125, + "learning_rate": 0.0001628627083641145, + "loss": 2.155, "step": 4195 }, { - "epoch": 0.78, - "grad_norm": 0.1943359375, - "learning_rate": 0.00030621442330489143, - "loss": 2.1362, + "epoch": 0.71, + "grad_norm": 0.1953125, + "learning_rate": 0.00016274768545602143, + "loss": 2.1576, "step": 4200 }, { - "epoch": 0.78, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003059399124258044, - "loss": 2.1407, + "epoch": 0.71, + "grad_norm": 0.205078125, + "learning_rate": 0.00016263252545469338, + "loss": 2.1408, "step": 4205 }, { - "epoch": 0.78, - "grad_norm": 0.1962890625, - "learning_rate": 0.00030566512384617485, - "loss": 2.1593, + "epoch": 0.71, + "grad_norm": 0.20703125, + "learning_rate": 0.0001625172286117357, + "loss": 2.1832, "step": 4210 }, { - "epoch": 0.78, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003053900582863068, - "loss": 2.1579, + "epoch": 0.71, + "grad_norm": 0.203125, + "learning_rate": 0.00016240179517905282, + "loss": 2.1728, "step": 4215 }, { - "epoch": 0.78, - "grad_norm": 0.1953125, - "learning_rate": 0.0003051147164672301, - "loss": 2.2069, + "epoch": 0.71, + "grad_norm": 0.2001953125, + "learning_rate": 0.00016228622540884755, + "loss": 2.1633, "step": 4220 }, { - "epoch": 0.78, - "grad_norm": 0.19921875, - "learning_rate": 0.000304839099110699, - "loss": 2.1785, + "epoch": 0.72, + "grad_norm": 0.2060546875, + "learning_rate": 0.00016217051955362056, + "loss": 2.1659, "step": 4225 }, { - "epoch": 0.78, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003045632069391899, - "loss": 2.1446, + "epoch": 0.72, + "grad_norm": 0.203125, + "learning_rate": 0.00016205467786616984, + "loss": 2.174, "step": 4230 }, { - "epoch": 0.79, - "grad_norm": 0.193359375, - "learning_rate": 0.0003042870406758996, - "loss": 2.1558, + "epoch": 0.72, + "grad_norm": 0.2138671875, + "learning_rate": 0.0001619387005995902, + "loss": 2.2027, "step": 4235 }, { - "epoch": 0.79, - "grad_norm": 0.2109375, - "learning_rate": 0.0003040106010447435, - "loss": 2.1839, + "epoch": 0.72, + "grad_norm": 0.2080078125, + "learning_rate": 0.00016182258800727267, + "loss": 2.1338, "step": 4240 }, { - "epoch": 0.79, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003037338887703532, - "loss": 2.1597, + "epoch": 0.72, + "grad_norm": 0.2080078125, + "learning_rate": 0.00016170634034290383, + "loss": 2.1211, "step": 4245 }, { - "epoch": 0.79, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003034569045780755, - "loss": 2.146, + "epoch": 0.72, + "grad_norm": 0.2021484375, + "learning_rate": 0.00016158995786046552, + "loss": 2.1571, "step": 4250 }, { - "epoch": 0.79, - "grad_norm": 0.1904296875, - "learning_rate": 0.0003031796491939695, - "loss": 2.1618, + "epoch": 0.72, + "grad_norm": 0.2041015625, + "learning_rate": 0.00016147344081423402, + "loss": 2.1354, "step": 4255 }, { - "epoch": 0.79, - "grad_norm": 0.19140625, - "learning_rate": 0.0003029021233448056, - "loss": 2.1484, + "epoch": 0.72, + "grad_norm": 0.2080078125, + "learning_rate": 0.0001613567894587797, + "loss": 2.2071, "step": 4260 }, { - "epoch": 0.79, - "grad_norm": 0.1953125, - "learning_rate": 0.0003026243277580628, - "loss": 2.1625, + "epoch": 0.72, + "grad_norm": 0.20703125, + "learning_rate": 0.00016124000404896632, + "loss": 2.1344, "step": 4265 }, { - "epoch": 0.79, - "grad_norm": 0.1943359375, - "learning_rate": 0.0003023462631619274, - "loss": 2.1474, + "epoch": 0.72, + "grad_norm": 0.2060546875, + "learning_rate": 0.00016112308483995052, + "loss": 2.1864, "step": 4270 }, { - "epoch": 0.79, - "grad_norm": 0.193359375, - "learning_rate": 0.0003020679302852908, - "loss": 2.1733, + "epoch": 0.72, + "grad_norm": 0.203125, + "learning_rate": 0.0001610060320871813, + "loss": 2.1774, "step": 4275 }, { - "epoch": 0.79, - "grad_norm": 0.203125, - "learning_rate": 0.00030178932985774757, - "loss": 2.1369, + "epoch": 0.72, + "grad_norm": 0.2001953125, + "learning_rate": 0.0001608888460463994, + "loss": 2.1568, "step": 4280 }, { - "epoch": 0.79, - "grad_norm": 0.2021484375, - "learning_rate": 0.00030151046260959376, - "loss": 2.1609, + "epoch": 0.73, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001607715269736368, + "loss": 2.1271, "step": 4285 }, { - "epoch": 0.8, - "grad_norm": 0.197265625, - "learning_rate": 0.0003012313292718246, - "loss": 2.1482, + "epoch": 0.73, + "grad_norm": 0.203125, + "learning_rate": 0.00016065407512521612, + "loss": 2.1558, "step": 4290 }, { - "epoch": 0.8, - "grad_norm": 0.1982421875, - "learning_rate": 0.00030095193057613306, - "loss": 2.1889, + "epoch": 0.73, + "grad_norm": 0.2080078125, + "learning_rate": 0.0001605364907577501, + "loss": 2.1547, "step": 4295 }, { - "epoch": 0.8, - "grad_norm": 0.1953125, - "learning_rate": 0.0003006722672549076, - "loss": 2.1691, + "epoch": 0.73, + "grad_norm": 0.2080078125, + "learning_rate": 0.00016041877412814094, + "loss": 2.1729, "step": 4300 }, { - "epoch": 0.8, - "grad_norm": 0.197265625, - "learning_rate": 0.0003003923400412304, - "loss": 2.1902, + "epoch": 0.73, + "grad_norm": 0.2021484375, + "learning_rate": 0.00016030092549357988, + "loss": 2.191, "step": 4305 }, { - "epoch": 0.8, - "grad_norm": 0.1982421875, - "learning_rate": 0.0003001121496688752, - "loss": 2.1412, + "epoch": 0.73, + "grad_norm": 0.2001953125, + "learning_rate": 0.00016018294511154654, + "loss": 2.1488, "step": 4310 }, { - "epoch": 0.8, - "grad_norm": 0.1982421875, - "learning_rate": 0.00029983169687230576, - "loss": 2.2059, + "epoch": 0.73, + "grad_norm": 0.203125, + "learning_rate": 0.00016006483323980844, + "loss": 2.1452, "step": 4315 }, { - "epoch": 0.8, - "grad_norm": 0.19921875, - "learning_rate": 0.0002995509823866736, - "loss": 2.1333, + "epoch": 0.73, + "grad_norm": 0.205078125, + "learning_rate": 0.0001599465901364202, + "loss": 2.1807, "step": 4320 }, { - "epoch": 0.8, - "grad_norm": 0.1982421875, - "learning_rate": 0.00029927000694781637, - "loss": 2.159, + "epoch": 0.73, + "grad_norm": 0.2041015625, + "learning_rate": 0.00015982821605972346, + "loss": 2.1537, "step": 4325 }, { - "epoch": 0.8, - "grad_norm": 0.203125, - "learning_rate": 0.0002989887712922555, - "loss": 2.1684, + "epoch": 0.73, + "grad_norm": 0.1982421875, + "learning_rate": 0.00015970971126834575, + "loss": 2.1796, "step": 4330 }, { - "epoch": 0.8, - "grad_norm": 0.193359375, - "learning_rate": 0.0002987072761571948, - "loss": 2.1985, + "epoch": 0.73, + "grad_norm": 0.205078125, + "learning_rate": 0.00015959107602120032, + "loss": 2.1339, "step": 4335 }, { - "epoch": 0.81, - "grad_norm": 0.201171875, - "learning_rate": 0.0002984255222805182, - "loss": 2.1664, + "epoch": 0.74, + "grad_norm": 0.2021484375, + "learning_rate": 0.00015947231057748535, + "loss": 2.1562, "step": 4340 }, { - "epoch": 0.81, - "grad_norm": 0.193359375, - "learning_rate": 0.0002981435104007876, - "loss": 2.1755, + "epoch": 0.74, + "grad_norm": 0.2119140625, + "learning_rate": 0.00015935341519668367, + "loss": 2.1585, "step": 4345 }, { - "epoch": 0.81, - "grad_norm": 0.1943359375, - "learning_rate": 0.0002978612412572416, - "loss": 2.1208, + "epoch": 0.74, + "grad_norm": 0.203125, + "learning_rate": 0.00015923439013856174, + "loss": 2.1133, "step": 4350 }, { - "epoch": 0.81, - "grad_norm": 0.2021484375, - "learning_rate": 0.0002975787155897927, - "loss": 2.1604, + "epoch": 0.74, + "grad_norm": 0.2001953125, + "learning_rate": 0.00015911523566316954, + "loss": 2.176, "step": 4355 }, { - "epoch": 0.81, - "grad_norm": 0.19921875, - "learning_rate": 0.00029729593413902643, - "loss": 2.1972, + "epoch": 0.74, + "grad_norm": 0.2041015625, + "learning_rate": 0.00015899595203083976, + "loss": 2.1195, "step": 4360 }, { - "epoch": 0.81, - "grad_norm": 0.1962890625, - "learning_rate": 0.00029701289764619824, - "loss": 2.179, + "epoch": 0.74, + "grad_norm": 0.2021484375, + "learning_rate": 0.00015887653950218722, + "loss": 2.1538, "step": 4365 }, { - "epoch": 0.81, - "grad_norm": 0.1962890625, - "learning_rate": 0.0002967296068532325, - "loss": 2.1741, + "epoch": 0.74, + "grad_norm": 0.2060546875, + "learning_rate": 0.00015875699833810839, + "loss": 2.1617, "step": 4370 }, { - "epoch": 0.81, - "grad_norm": 0.193359375, - "learning_rate": 0.0002964460625027199, - "loss": 2.1915, + "epoch": 0.74, + "grad_norm": 0.2021484375, + "learning_rate": 0.00015863732879978082, + "loss": 2.1945, "step": 4375 }, { - "epoch": 0.81, - "grad_norm": 0.2001953125, - "learning_rate": 0.00029616226533791607, - "loss": 2.1776, + "epoch": 0.74, + "grad_norm": 0.2158203125, + "learning_rate": 0.00015851753114866251, + "loss": 2.1321, "step": 4380 }, { - "epoch": 0.81, - "grad_norm": 0.19921875, - "learning_rate": 0.000295878216102739, - "loss": 2.1422, + "epoch": 0.74, + "grad_norm": 0.208984375, + "learning_rate": 0.0001583976056464913, + "loss": 2.1336, "step": 4385 }, { - "epoch": 0.81, - "grad_norm": 0.1953125, - "learning_rate": 0.0002955939155417678, - "loss": 2.1374, + "epoch": 0.74, + "grad_norm": 0.2041015625, + "learning_rate": 0.00015827755255528448, + "loss": 2.1547, "step": 4390 }, { - "epoch": 0.82, - "grad_norm": 0.1923828125, - "learning_rate": 0.0002953093644002402, - "loss": 2.1876, + "epoch": 0.74, + "grad_norm": 0.205078125, + "learning_rate": 0.000158157372137338, + "loss": 2.1544, "step": 4395 }, { - "epoch": 0.82, - "grad_norm": 0.1953125, - "learning_rate": 0.0002950245634240506, - "loss": 2.1888, + "epoch": 0.75, + "grad_norm": 0.203125, + "learning_rate": 0.00015803706465522614, + "loss": 2.1145, "step": 4400 }, { - "epoch": 0.82, - "grad_norm": 0.193359375, - "learning_rate": 0.00029473951335974856, - "loss": 2.1611, + "epoch": 0.75, + "grad_norm": 0.205078125, + "learning_rate": 0.00015791663037180057, + "loss": 2.1527, "step": 4405 }, { - "epoch": 0.82, - "grad_norm": 0.197265625, - "learning_rate": 0.0002944542149545366, - "loss": 2.1848, + "epoch": 0.75, + "grad_norm": 0.208984375, + "learning_rate": 0.0001577960695501902, + "loss": 2.1787, "step": 4410 }, { - "epoch": 0.82, - "grad_norm": 0.1953125, - "learning_rate": 0.0002941686689562679, - "loss": 2.1544, + "epoch": 0.75, + "grad_norm": 0.205078125, + "learning_rate": 0.00015767538245380037, + "loss": 2.1749, "step": 4415 }, { - "epoch": 0.82, - "grad_norm": 0.1943359375, - "learning_rate": 0.00029388287611344506, - "loss": 2.1715, + "epoch": 0.75, + "grad_norm": 0.2041015625, + "learning_rate": 0.00015755456934631222, + "loss": 2.1307, "step": 4420 }, { - "epoch": 0.82, - "grad_norm": 0.19921875, - "learning_rate": 0.0002935968371752174, - "loss": 2.1448, + "epoch": 0.75, + "grad_norm": 0.2021484375, + "learning_rate": 0.00015743363049168223, + "loss": 2.1711, "step": 4425 }, { - "epoch": 0.82, - "grad_norm": 0.1962890625, - "learning_rate": 0.0002933105528913795, - "loss": 2.1661, + "epoch": 0.75, + "grad_norm": 0.201171875, + "learning_rate": 0.00015731256615414166, + "loss": 2.1446, "step": 4430 }, { - "epoch": 0.82, - "grad_norm": 0.2060546875, - "learning_rate": 0.00029302402401236904, - "loss": 2.1847, + "epoch": 0.75, + "grad_norm": 0.2041015625, + "learning_rate": 0.00015719137659819593, + "loss": 2.1615, "step": 4435 }, { - "epoch": 0.82, - "grad_norm": 0.1953125, - "learning_rate": 0.00029273725128926484, - "loss": 2.1442, + "epoch": 0.75, + "grad_norm": 0.203125, + "learning_rate": 0.00015707006208862402, + "loss": 2.1711, "step": 4440 }, { - "epoch": 0.82, - "grad_norm": 0.193359375, - "learning_rate": 0.00029245023547378493, - "loss": 2.1806, + "epoch": 0.75, + "grad_norm": 0.20703125, + "learning_rate": 0.0001569486228904779, + "loss": 2.1336, "step": 4445 }, { - "epoch": 0.83, - "grad_norm": 0.201171875, - "learning_rate": 0.0002921629773182845, - "loss": 2.1519, + "epoch": 0.75, + "grad_norm": 0.2099609375, + "learning_rate": 0.000156827059269082, + "loss": 2.1397, "step": 4450 }, { - "epoch": 0.83, - "grad_norm": 0.1962890625, - "learning_rate": 0.0002918754775757541, - "loss": 2.1223, + "epoch": 0.75, + "grad_norm": 0.2041015625, + "learning_rate": 0.00015670537149003257, + "loss": 2.1769, "step": 4455 }, { - "epoch": 0.83, - "grad_norm": 0.189453125, - "learning_rate": 0.0002915877369998174, - "loss": 2.1435, + "epoch": 0.76, + "grad_norm": 0.212890625, + "learning_rate": 0.0001565835598191971, + "loss": 2.14, "step": 4460 }, { - "epoch": 0.83, - "grad_norm": 0.1884765625, - "learning_rate": 0.0002912997563447296, - "loss": 2.1613, + "epoch": 0.76, + "grad_norm": 0.208984375, + "learning_rate": 0.00015646162452271378, + "loss": 2.1609, "step": 4465 }, { - "epoch": 0.83, - "grad_norm": 0.1923828125, - "learning_rate": 0.0002910115363653749, - "loss": 2.1497, + "epoch": 0.76, + "grad_norm": 0.208984375, + "learning_rate": 0.00015633956586699096, + "loss": 2.1562, "step": 4470 }, { - "epoch": 0.83, - "grad_norm": 0.1953125, - "learning_rate": 0.000290723077817265, - "loss": 2.1734, + "epoch": 0.76, + "grad_norm": 0.2021484375, + "learning_rate": 0.00015621738411870643, + "loss": 2.1282, "step": 4475 }, { - "epoch": 0.83, - "grad_norm": 0.2001953125, - "learning_rate": 0.00029043438145653715, - "loss": 2.1742, + "epoch": 0.76, + "grad_norm": 0.20703125, + "learning_rate": 0.00015609507954480697, + "loss": 2.1813, "step": 4480 }, { - "epoch": 0.83, - "grad_norm": 0.1982421875, - "learning_rate": 0.0002901454480399517, - "loss": 2.1759, + "epoch": 0.76, + "grad_norm": 0.216796875, + "learning_rate": 0.00015597265241250763, + "loss": 2.1393, "step": 4485 }, { - "epoch": 0.83, - "grad_norm": 0.1943359375, - "learning_rate": 0.00028985627832489044, - "loss": 2.1722, + "epoch": 0.76, + "grad_norm": 0.208984375, + "learning_rate": 0.00015585010298929138, + "loss": 2.1257, "step": 4490 }, { - "epoch": 0.83, - "grad_norm": 0.1923828125, - "learning_rate": 0.0002895668730693548, - "loss": 2.1574, + "epoch": 0.76, + "grad_norm": 0.203125, + "learning_rate": 0.00015572743154290824, + "loss": 2.1303, "step": 4495 }, { - "epoch": 0.83, - "grad_norm": 0.1962890625, - "learning_rate": 0.0002892772330319633, - "loss": 2.1523, + "epoch": 0.76, + "grad_norm": 0.2041015625, + "learning_rate": 0.00015560463834137482, + "loss": 2.1328, "step": 4500 }, { - "epoch": 0.84, - "grad_norm": 0.19921875, - "learning_rate": 0.0002889873589719501, - "loss": 2.1746, + "epoch": 0.76, + "grad_norm": 0.2041015625, + "learning_rate": 0.0001554817236529739, + "loss": 2.1419, "step": 4505 }, { - "epoch": 0.84, - "grad_norm": 0.193359375, - "learning_rate": 0.0002886972516491627, - "loss": 2.1642, + "epoch": 0.76, + "grad_norm": 0.2021484375, + "learning_rate": 0.00015535868774625353, + "loss": 2.1534, "step": 4510 }, { - "epoch": 0.84, - "grad_norm": 0.203125, - "learning_rate": 0.0002884069118240602, - "loss": 2.1204, + "epoch": 0.76, + "grad_norm": 0.2041015625, + "learning_rate": 0.00015523553089002667, + "loss": 2.1393, "step": 4515 }, { - "epoch": 0.84, - "grad_norm": 0.193359375, - "learning_rate": 0.0002881163402577111, - "loss": 2.1641, + "epoch": 0.77, + "grad_norm": 0.2080078125, + "learning_rate": 0.0001551122533533705, + "loss": 2.145, "step": 4520 }, { - "epoch": 0.84, - "grad_norm": 0.20703125, - "learning_rate": 0.00028782553771179123, - "loss": 2.1502, + "epoch": 0.77, + "grad_norm": 0.2109375, + "learning_rate": 0.00015498885540562597, + "loss": 2.1604, "step": 4525 }, { - "epoch": 0.84, - "grad_norm": 0.1953125, - "learning_rate": 0.000287534504948582, - "loss": 2.1846, + "epoch": 0.77, + "grad_norm": 0.1982421875, + "learning_rate": 0.000154865337316397, + "loss": 2.137, "step": 4530 }, { - "epoch": 0.84, - "grad_norm": 0.2001953125, - "learning_rate": 0.00028724324273096837, - "loss": 2.2048, + "epoch": 0.77, + "grad_norm": 0.2099609375, + "learning_rate": 0.00015474169935554994, + "loss": 2.1242, "step": 4535 }, { - "epoch": 0.84, + "epoch": 0.77, "grad_norm": 0.208984375, - "learning_rate": 0.0002869517518224366, - "loss": 2.1565, + "learning_rate": 0.00015461794179321323, + "loss": 2.2208, "step": 4540 }, { - "epoch": 0.84, - "grad_norm": 0.2001953125, - "learning_rate": 0.0002866600329870725, - "loss": 2.1622, + "epoch": 0.77, + "grad_norm": 0.203125, + "learning_rate": 0.0001544940648997765, + "loss": 2.1613, "step": 4545 }, { - "epoch": 0.84, - "grad_norm": 0.1943359375, - "learning_rate": 0.00028636808698955933, - "loss": 2.1589, + "epoch": 0.77, + "grad_norm": 0.20703125, + "learning_rate": 0.00015437006894589007, + "loss": 2.1307, "step": 4550 }, { - "epoch": 0.85, - "grad_norm": 0.1953125, - "learning_rate": 0.00028607591459517596, - "loss": 2.1666, + "epoch": 0.77, + "grad_norm": 0.205078125, + "learning_rate": 0.00015424595420246448, + "loss": 2.1235, "step": 4555 }, { - "epoch": 0.85, - "grad_norm": 0.1962890625, - "learning_rate": 0.0002857835165697944, - "loss": 2.1468, + "epoch": 0.77, + "grad_norm": 0.203125, + "learning_rate": 0.00015412172094066975, + "loss": 2.1515, "step": 4560 }, { - "epoch": 0.85, - "grad_norm": 0.1982421875, - "learning_rate": 0.0002854908936798783, - "loss": 2.1915, + "epoch": 0.77, + "grad_norm": 0.2041015625, + "learning_rate": 0.00015399736943193487, + "loss": 2.1534, "step": 4565 }, { - "epoch": 0.85, - "grad_norm": 0.19921875, - "learning_rate": 0.00028519804669248084, - "loss": 2.2123, + "epoch": 0.77, + "grad_norm": 0.203125, + "learning_rate": 0.0001538728999479471, + "loss": 2.1222, "step": 4570 }, { - "epoch": 0.85, - "grad_norm": 0.197265625, - "learning_rate": 0.0002849049763752424, - "loss": 2.1614, + "epoch": 0.77, + "grad_norm": 0.201171875, + "learning_rate": 0.00015374831276065157, + "loss": 2.2067, "step": 4575 }, { - "epoch": 0.85, - "grad_norm": 0.2060546875, - "learning_rate": 0.00028461168349638903, - "loss": 2.1514, + "epoch": 0.78, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001536236081422505, + "loss": 2.1823, "step": 4580 }, { - "epoch": 0.85, - "grad_norm": 0.2001953125, - "learning_rate": 0.00028431816882473, - "loss": 2.1957, + "epoch": 0.78, + "grad_norm": 0.203125, + "learning_rate": 0.00015349878636520273, + "loss": 2.1067, "step": 4585 }, { - "epoch": 0.85, - "grad_norm": 0.19140625, - "learning_rate": 0.00028402443312965596, - "loss": 2.172, + "epoch": 0.78, + "grad_norm": 0.20703125, + "learning_rate": 0.00015337384770222295, + "loss": 2.1536, "step": 4590 }, { - "epoch": 0.85, - "grad_norm": 0.19921875, - "learning_rate": 0.000283730477181137, - "loss": 2.1331, + "epoch": 0.78, + "grad_norm": 0.2080078125, + "learning_rate": 0.00015324879242628145, + "loss": 2.149, "step": 4595 }, { - "epoch": 0.85, - "grad_norm": 0.1982421875, - "learning_rate": 0.0002834363017497205, - "loss": 2.1357, + "epoch": 0.78, + "grad_norm": 0.208984375, + "learning_rate": 0.00015312362081060308, + "loss": 2.1436, "step": 4600 }, { - "epoch": 0.85, - "grad_norm": 0.2001953125, - "learning_rate": 0.0002831419076065293, - "loss": 2.1649, + "epoch": 0.78, + "grad_norm": 0.2080078125, + "learning_rate": 0.00015299833312866696, + "loss": 2.1826, "step": 4605 }, { - "epoch": 0.86, - "grad_norm": 0.193359375, - "learning_rate": 0.0002828472955232595, - "loss": 2.1565, + "epoch": 0.78, + "grad_norm": 0.19921875, + "learning_rate": 0.0001528729296542058, + "loss": 2.1255, "step": 4610 }, { - "epoch": 0.86, + "epoch": 0.78, "grad_norm": 0.2021484375, - "learning_rate": 0.00028255246627217824, - "loss": 2.1964, + "learning_rate": 0.00015274741066120535, + "loss": 2.1766, "step": 4615 }, { - "epoch": 0.86, - "grad_norm": 0.201171875, - "learning_rate": 0.00028225742062612237, - "loss": 2.1533, + "epoch": 0.78, + "grad_norm": 0.2197265625, + "learning_rate": 0.0001526217764239036, + "loss": 2.1777, "step": 4620 }, { - "epoch": 0.86, - "grad_norm": 0.2021484375, - "learning_rate": 0.00028196215935849555, - "loss": 2.1384, + "epoch": 0.78, + "grad_norm": 0.201171875, + "learning_rate": 0.00015249602721679047, + "loss": 2.1478, "step": 4625 }, { - "epoch": 0.86, - "grad_norm": 0.2021484375, - "learning_rate": 0.00028166668324326695, - "loss": 2.1632, + "epoch": 0.78, + "grad_norm": 0.2109375, + "learning_rate": 0.00015237016331460702, + "loss": 2.1484, "step": 4630 }, { - "epoch": 0.86, - "grad_norm": 0.185546875, - "learning_rate": 0.0002813709930549688, - "loss": 2.1184, + "epoch": 0.78, + "grad_norm": 0.2001953125, + "learning_rate": 0.00015224418499234488, + "loss": 2.1994, "step": 4635 }, { - "epoch": 0.86, - "grad_norm": 0.197265625, - "learning_rate": 0.0002810750895686944, - "loss": 2.1813, + "epoch": 0.79, + "grad_norm": 0.2041015625, + "learning_rate": 0.00015211809252524568, + "loss": 2.1161, "step": 4640 }, { - "epoch": 0.86, - "grad_norm": 0.197265625, - "learning_rate": 0.0002807789735600964, - "loss": 2.1603, + "epoch": 0.79, + "grad_norm": 0.232421875, + "learning_rate": 0.00015199188618880049, + "loss": 2.1493, "step": 4645 }, { - "epoch": 0.86, - "grad_norm": 0.197265625, - "learning_rate": 0.00028048264580538435, - "loss": 2.1534, + "epoch": 0.79, + "grad_norm": 0.208984375, + "learning_rate": 0.0001518655662587491, + "loss": 2.1431, "step": 4650 }, { - "epoch": 0.86, - "grad_norm": 0.1962890625, - "learning_rate": 0.00028018610708132274, - "loss": 2.191, + "epoch": 0.79, + "grad_norm": 0.203125, + "learning_rate": 0.0001517391330110795, + "loss": 2.1434, "step": 4655 }, { - "epoch": 0.86, - "grad_norm": 0.205078125, - "learning_rate": 0.0002798893581652295, - "loss": 2.1827, + "epoch": 0.79, + "grad_norm": 0.2080078125, + "learning_rate": 0.00015161258672202724, + "loss": 2.1252, "step": 4660 }, { - "epoch": 0.87, - "grad_norm": 0.1962890625, - "learning_rate": 0.0002795923998349729, - "loss": 2.1759, + "epoch": 0.79, + "grad_norm": 0.203125, + "learning_rate": 0.0001514859276680749, + "loss": 2.1591, "step": 4665 }, { - "epoch": 0.87, + "epoch": 0.79, "grad_norm": 0.1982421875, - "learning_rate": 0.0002792952328689709, - "loss": 2.15, + "learning_rate": 0.0001513591561259514, + "loss": 2.1137, "step": 4670 }, { - "epoch": 0.87, - "grad_norm": 0.203125, - "learning_rate": 0.0002789978580461877, - "loss": 2.1464, + "epoch": 0.79, + "grad_norm": 0.2060546875, + "learning_rate": 0.00015123227237263148, + "loss": 2.1636, "step": 4675 }, { - "epoch": 0.87, - "grad_norm": 0.193359375, - "learning_rate": 0.0002787002761461328, - "loss": 2.1575, + "epoch": 0.79, + "grad_norm": 0.2080078125, + "learning_rate": 0.00015110527668533486, + "loss": 2.1489, "step": 4680 }, { - "epoch": 0.87, - "grad_norm": 0.1962890625, - "learning_rate": 0.00027840248794885826, - "loss": 2.1241, + "epoch": 0.79, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001509781693415261, + "loss": 2.1301, "step": 4685 }, { - "epoch": 0.87, - "grad_norm": 0.2060546875, - "learning_rate": 0.0002781044942349569, - "loss": 2.1715, + "epoch": 0.79, + "grad_norm": 0.203125, + "learning_rate": 0.00015085095061891348, + "loss": 2.1761, "step": 4690 }, { - "epoch": 0.87, - "grad_norm": 0.1943359375, - "learning_rate": 0.00027780629578556045, - "loss": 2.1551, + "epoch": 0.8, + "grad_norm": 0.203125, + "learning_rate": 0.0001507236207954487, + "loss": 2.2051, "step": 4695 }, { - "epoch": 0.87, - "grad_norm": 0.1982421875, - "learning_rate": 0.0002775078933823372, - "loss": 2.1945, + "epoch": 0.8, + "grad_norm": 0.203125, + "learning_rate": 0.00015059618014932625, + "loss": 2.1436, "step": 4700 }, { - "epoch": 0.87, - "grad_norm": 0.201171875, - "learning_rate": 0.00027720928780749, - "loss": 2.1742, + "epoch": 0.8, + "grad_norm": 0.2119140625, + "learning_rate": 0.00015046862895898267, + "loss": 2.1323, "step": 4705 }, { - "epoch": 0.87, - "grad_norm": 0.2001953125, - "learning_rate": 0.0002769104798437546, - "loss": 2.1535, + "epoch": 0.8, + "grad_norm": 0.2060546875, + "learning_rate": 0.00015034096750309609, + "loss": 2.1837, "step": 4710 }, { - "epoch": 0.87, - "grad_norm": 0.2001953125, - "learning_rate": 0.00027661147027439664, - "loss": 2.1172, + "epoch": 0.8, + "grad_norm": 0.205078125, + "learning_rate": 0.00015021319606058544, + "loss": 2.1374, "step": 4715 }, { - "epoch": 0.88, - "grad_norm": 0.1943359375, - "learning_rate": 0.00027631225988321084, - "loss": 2.1983, + "epoch": 0.8, + "grad_norm": 0.208984375, + "learning_rate": 0.00015008531491061012, + "loss": 2.1646, "step": 4720 }, { - "epoch": 0.88, - "grad_norm": 0.1962890625, - "learning_rate": 0.0002760128494545181, - "loss": 2.1574, + "epoch": 0.8, + "grad_norm": 0.21484375, + "learning_rate": 0.00014995732433256906, + "loss": 2.1726, "step": 4725 }, { - "epoch": 0.88, - "grad_norm": 0.2001953125, - "learning_rate": 0.0002757132397731636, - "loss": 2.1594, + "epoch": 0.8, + "grad_norm": 0.208984375, + "learning_rate": 0.00014982922460610038, + "loss": 2.1688, "step": 4730 }, { - "epoch": 0.88, - "grad_norm": 0.1943359375, - "learning_rate": 0.00027541343162451495, - "loss": 2.1266, + "epoch": 0.8, + "grad_norm": 0.205078125, + "learning_rate": 0.00014970101601108059, + "loss": 2.1733, "step": 4735 }, { - "epoch": 0.88, - "grad_norm": 0.20703125, - "learning_rate": 0.00027511342579446, - "loss": 2.1707, + "epoch": 0.8, + "grad_norm": 0.2109375, + "learning_rate": 0.00014957269882762416, + "loss": 2.1179, "step": 4740 }, { - "epoch": 0.88, - "grad_norm": 0.1953125, - "learning_rate": 0.0002748132230694047, - "loss": 2.1271, + "epoch": 0.8, + "grad_norm": 0.21484375, + "learning_rate": 0.0001494442733360827, + "loss": 2.1872, "step": 4745 }, { - "epoch": 0.88, - "grad_norm": 0.205078125, - "learning_rate": 0.0002745128242362711, - "loss": 2.1612, + "epoch": 0.8, + "grad_norm": 0.2109375, + "learning_rate": 0.00014931573981704453, + "loss": 2.1705, "step": 4750 }, { - "epoch": 0.88, - "grad_norm": 0.2021484375, - "learning_rate": 0.00027421223008249545, - "loss": 2.1779, + "epoch": 0.81, + "grad_norm": 0.2119140625, + "learning_rate": 0.00014918709855133396, + "loss": 2.1283, "step": 4755 }, { - "epoch": 0.88, - "grad_norm": 0.2041015625, - "learning_rate": 0.00027391144139602596, - "loss": 2.1914, + "epoch": 0.81, + "grad_norm": 0.208984375, + "learning_rate": 0.00014905834982001075, + "loss": 2.1372, "step": 4760 }, { - "epoch": 0.88, - "grad_norm": 0.2001953125, - "learning_rate": 0.00027361045896532053, - "loss": 2.1352, + "epoch": 0.81, + "grad_norm": 0.212890625, + "learning_rate": 0.00014892949390436934, + "loss": 2.161, "step": 4765 }, { - "epoch": 0.88, - "grad_norm": 0.197265625, - "learning_rate": 0.0002733092835793454, - "loss": 2.1487, + "epoch": 0.81, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001488005310859385, + "loss": 2.1579, "step": 4770 }, { - "epoch": 0.89, - "grad_norm": 0.2001953125, - "learning_rate": 0.0002730079160275721, - "loss": 2.1512, + "epoch": 0.81, + "grad_norm": 0.212890625, + "learning_rate": 0.0001486714616464805, + "loss": 2.1786, "step": 4775 }, { - "epoch": 0.89, - "grad_norm": 0.2041015625, - "learning_rate": 0.00027270635709997616, - "loss": 2.1593, + "epoch": 0.81, + "grad_norm": 0.20703125, + "learning_rate": 0.0001485422858679905, + "loss": 2.1798, "step": 4780 }, { - "epoch": 0.89, - "grad_norm": 0.1962890625, - "learning_rate": 0.0002724046075870348, - "loss": 2.1361, + "epoch": 0.81, + "grad_norm": 0.205078125, + "learning_rate": 0.0001484130040326961, + "loss": 2.1244, "step": 4785 }, { - "epoch": 0.89, - "grad_norm": 0.197265625, - "learning_rate": 0.0002721026682797245, - "loss": 2.1535, + "epoch": 0.81, + "grad_norm": 0.2080078125, + "learning_rate": 0.0001482836164230565, + "loss": 2.1467, "step": 4790 }, { - "epoch": 0.89, - "grad_norm": 0.2021484375, - "learning_rate": 0.0002718005399695197, - "loss": 2.1602, + "epoch": 0.81, + "grad_norm": 0.205078125, + "learning_rate": 0.00014815412332176212, + "loss": 2.1469, "step": 4795 }, { - "epoch": 0.89, - "grad_norm": 0.19921875, - "learning_rate": 0.00027149822344839006, - "loss": 2.1593, + "epoch": 0.81, + "grad_norm": 0.212890625, + "learning_rate": 0.00014802452501173384, + "loss": 2.1511, "step": 4800 }, { - "epoch": 0.89, - "grad_norm": 0.193359375, - "learning_rate": 0.00027119571950879847, - "loss": 2.1754, + "epoch": 0.81, + "grad_norm": 0.2060546875, + "learning_rate": 0.00014789482177612225, + "loss": 2.1176, "step": 4805 }, { - "epoch": 0.89, - "grad_norm": 0.1953125, - "learning_rate": 0.00027089302894369924, - "loss": 2.1668, + "epoch": 0.81, + "grad_norm": 0.21484375, + "learning_rate": 0.00014776501389830737, + "loss": 2.1606, "step": 4810 }, { - "epoch": 0.89, - "grad_norm": 0.197265625, - "learning_rate": 0.00027059015254653586, - "loss": 2.1684, + "epoch": 0.82, + "grad_norm": 0.212890625, + "learning_rate": 0.00014763510166189783, + "loss": 2.1423, "step": 4815 }, { - "epoch": 0.89, - "grad_norm": 0.1962890625, - "learning_rate": 0.000270287091111239, - "loss": 2.1496, + "epoch": 0.82, + "grad_norm": 0.216796875, + "learning_rate": 0.00014750508535073012, + "loss": 2.166, "step": 4820 }, { - "epoch": 0.9, - "grad_norm": 0.1962890625, - "learning_rate": 0.00026998384543222434, - "loss": 2.1385, + "epoch": 0.82, + "grad_norm": 0.2080078125, + "learning_rate": 0.00014737496524886828, + "loss": 2.1404, "step": 4825 }, { - "epoch": 0.9, - "grad_norm": 0.201171875, - "learning_rate": 0.0002696804163043904, - "loss": 2.1392, + "epoch": 0.82, + "grad_norm": 0.2021484375, + "learning_rate": 0.00014724474164060298, + "loss": 2.1461, "step": 4830 }, { - "epoch": 0.9, - "grad_norm": 0.212890625, - "learning_rate": 0.00026937680452311674, - "loss": 2.2032, + "epoch": 0.82, + "grad_norm": 0.2080078125, + "learning_rate": 0.00014711441481045115, + "loss": 2.1584, "step": 4835 }, { - "epoch": 0.9, - "grad_norm": 0.201171875, - "learning_rate": 0.0002690730108842615, - "loss": 2.1713, + "epoch": 0.82, + "grad_norm": 0.2099609375, + "learning_rate": 0.00014698398504315522, + "loss": 2.1381, "step": 4840 }, { - "epoch": 0.9, - "grad_norm": 0.19921875, - "learning_rate": 0.0002687690361841599, - "loss": 2.1981, + "epoch": 0.82, + "grad_norm": 0.2109375, + "learning_rate": 0.00014685345262368242, + "loss": 2.1385, "step": 4845 }, { - "epoch": 0.9, - "grad_norm": 0.201171875, - "learning_rate": 0.00026846488121962136, - "loss": 2.1998, + "epoch": 0.82, + "grad_norm": 0.203125, + "learning_rate": 0.00014672281783722438, + "loss": 2.187, "step": 4850 }, { - "epoch": 0.9, - "grad_norm": 0.1943359375, - "learning_rate": 0.00026816054678792807, - "loss": 2.1832, + "epoch": 0.82, + "grad_norm": 0.20703125, + "learning_rate": 0.00014659208096919635, + "loss": 2.2096, "step": 4855 }, { - "epoch": 0.9, - "grad_norm": 0.197265625, - "learning_rate": 0.00026785603368683253, - "loss": 2.137, + "epoch": 0.82, + "grad_norm": 0.2060546875, + "learning_rate": 0.00014646124230523652, + "loss": 2.1409, "step": 4860 }, { - "epoch": 0.9, - "grad_norm": 0.1982421875, - "learning_rate": 0.0002675513427145558, - "loss": 2.1652, + "epoch": 0.82, + "grad_norm": 0.2080078125, + "learning_rate": 0.00014633030213120568, + "loss": 2.1633, "step": 4865 }, { - "epoch": 0.9, - "grad_norm": 0.201171875, - "learning_rate": 0.00026724647466978493, - "loss": 2.164, + "epoch": 0.82, + "grad_norm": 0.2060546875, + "learning_rate": 0.00014619926073318617, + "loss": 2.1757, "step": 4870 }, { - "epoch": 0.9, - "grad_norm": 0.1904296875, - "learning_rate": 0.0002669414303516712, - "loss": 2.1208, + "epoch": 0.83, + "grad_norm": 0.2021484375, + "learning_rate": 0.00014606811839748172, + "loss": 2.1703, "step": 4875 }, { - "epoch": 0.91, - "grad_norm": 0.197265625, - "learning_rate": 0.0002666362105598281, - "loss": 2.1669, + "epoch": 0.83, + "grad_norm": 0.2158203125, + "learning_rate": 0.00014593687541061636, + "loss": 2.1715, "step": 4880 }, { - "epoch": 0.91, - "grad_norm": 0.1982421875, - "learning_rate": 0.00026633081609432895, - "loss": 2.1483, + "epoch": 0.83, + "grad_norm": 0.2109375, + "learning_rate": 0.00014580553205933422, + "loss": 2.1174, "step": 4885 }, { - "epoch": 0.91, - "grad_norm": 0.1962890625, - "learning_rate": 0.000266025247755705, - "loss": 2.1685, + "epoch": 0.83, + "grad_norm": 0.2119140625, + "learning_rate": 0.00014567408863059864, + "loss": 2.1588, "step": 4890 }, { - "epoch": 0.91, - "grad_norm": 0.1953125, - "learning_rate": 0.0002657195063449432, - "loss": 2.1387, + "epoch": 0.83, + "grad_norm": 0.20703125, + "learning_rate": 0.00014554254541159154, + "loss": 2.1533, "step": 4895 }, { - "epoch": 0.91, - "grad_norm": 0.1962890625, - "learning_rate": 0.00026541359266348436, - "loss": 2.1927, - "step": 4900 - }, + "epoch": 0.83, + "grad_norm": 0.2109375, + "learning_rate": 0.00014541090268971297, + "loss": 2.1168, + "step": 4900 + }, { - "epoch": 0.91, - "grad_norm": 0.2021484375, - "learning_rate": 0.0002651075075132206, - "loss": 2.1684, + "epoch": 0.83, + "grad_norm": 0.203125, + "learning_rate": 0.00014527916075258036, + "loss": 2.1413, "step": 4905 }, { - "epoch": 0.91, - "grad_norm": 0.2021484375, - "learning_rate": 0.0002648012516964937, + "epoch": 0.83, + "grad_norm": 0.205078125, + "learning_rate": 0.00014514731988802786, "loss": 2.1658, "step": 4910 }, { - "epoch": 0.91, - "grad_norm": 0.1982421875, - "learning_rate": 0.0002644948260160928, - "loss": 2.1713, + "epoch": 0.83, + "grad_norm": 0.2060546875, + "learning_rate": 0.00014501538038410574, + "loss": 2.1561, "step": 4915 }, { - "epoch": 0.91, - "grad_norm": 0.1962890625, - "learning_rate": 0.00026418823127525215, - "loss": 2.1661, + "epoch": 0.83, + "grad_norm": 0.2041015625, + "learning_rate": 0.00014488334252907992, + "loss": 2.1379, "step": 4920 }, { - "epoch": 0.91, - "grad_norm": 0.2138671875, - "learning_rate": 0.0002638814682776494, - "loss": 2.2177, + "epoch": 0.83, + "grad_norm": 0.2060546875, + "learning_rate": 0.00014475120661143107, + "loss": 2.131, "step": 4925 }, { - "epoch": 0.91, - "grad_norm": 0.1962890625, - "learning_rate": 0.000263574537827403, - "loss": 2.145, + "epoch": 0.83, + "grad_norm": 0.19921875, + "learning_rate": 0.00014461897291985411, + "loss": 2.1684, "step": 4930 }, { - "epoch": 0.92, - "grad_norm": 0.1904296875, - "learning_rate": 0.00026326744072907056, - "loss": 2.1322, + "epoch": 0.84, + "grad_norm": 0.2109375, + "learning_rate": 0.00014448664174325764, + "loss": 2.1809, "step": 4935 }, { - "epoch": 0.92, - "grad_norm": 0.2099609375, - "learning_rate": 0.00026296017778764633, - "loss": 2.1891, + "epoch": 0.84, + "grad_norm": 0.21875, + "learning_rate": 0.00014435421337076327, + "loss": 2.1414, "step": 4940 }, { - "epoch": 0.92, - "grad_norm": 0.2041015625, - "learning_rate": 0.00026265274980855947, - "loss": 2.2206, + "epoch": 0.84, + "grad_norm": 0.205078125, + "learning_rate": 0.00014422168809170486, + "loss": 2.1749, "step": 4945 }, { - "epoch": 0.92, - "grad_norm": 0.2041015625, - "learning_rate": 0.00026234515759767166, - "loss": 2.1822, + "epoch": 0.84, + "grad_norm": 0.2158203125, + "learning_rate": 0.00014408906619562808, + "loss": 2.1698, "step": 4950 }, { - "epoch": 0.92, - "grad_norm": 0.205078125, - "learning_rate": 0.00026203740196127504, - "loss": 2.1469, + "epoch": 0.84, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001439563479722897, + "loss": 2.1361, "step": 4955 }, { - "epoch": 0.92, - "grad_norm": 0.193359375, - "learning_rate": 0.0002617294837060902, - "loss": 2.1701, + "epoch": 0.84, + "grad_norm": 0.2099609375, + "learning_rate": 0.00014382353371165685, + "loss": 2.1304, "step": 4960 }, { - "epoch": 0.92, - "grad_norm": 0.201171875, - "learning_rate": 0.000261421403639264, - "loss": 2.1456, + "epoch": 0.84, + "grad_norm": 0.2109375, + "learning_rate": 0.00014369062370390667, + "loss": 2.1559, "step": 4965 }, { - "epoch": 0.92, - "grad_norm": 0.2041015625, - "learning_rate": 0.00026111316256836745, - "loss": 2.1366, + "epoch": 0.84, + "grad_norm": 0.208984375, + "learning_rate": 0.00014355761823942525, + "loss": 2.1343, "step": 4970 }, { - "epoch": 0.92, - "grad_norm": 0.205078125, - "learning_rate": 0.0002608047613013936, - "loss": 2.1981, + "epoch": 0.84, + "grad_norm": 0.208984375, + "learning_rate": 0.0001434245176088074, + "loss": 2.1623, "step": 4975 }, { - "epoch": 0.92, - "grad_norm": 0.197265625, - "learning_rate": 0.0002604962006467555, - "loss": 2.1793, + "epoch": 0.84, + "grad_norm": 0.20703125, + "learning_rate": 0.00014329132210285587, + "loss": 2.1498, "step": 4980 }, { - "epoch": 0.92, - "grad_norm": 0.1953125, - "learning_rate": 0.0002601874814132837, - "loss": 2.1473, + "epoch": 0.84, + "grad_norm": 0.2138671875, + "learning_rate": 0.00014315803201258058, + "loss": 2.1251, "step": 4985 }, { - "epoch": 0.93, - "grad_norm": 0.1923828125, - "learning_rate": 0.0002598786044102249, - "loss": 2.1915, + "epoch": 0.85, + "grad_norm": 0.2001953125, + "learning_rate": 0.00014302464762919819, + "loss": 2.1, "step": 4990 }, { - "epoch": 0.93, - "grad_norm": 0.1904296875, - "learning_rate": 0.0002595695704472389, - "loss": 2.1568, + "epoch": 0.85, + "grad_norm": 0.2138671875, + "learning_rate": 0.00014289116924413132, + "loss": 2.1501, "step": 4995 }, { - "epoch": 0.93, - "grad_norm": 0.1982421875, - "learning_rate": 0.0002592603803343973, - "loss": 2.157, + "epoch": 0.85, + "grad_norm": 0.2001953125, + "learning_rate": 0.00014275759714900806, + "loss": 2.135, "step": 5000 }, { - "epoch": 0.93, - "grad_norm": 0.201171875, - "learning_rate": 0.00025895103488218085, - "loss": 2.1811, + "epoch": 0.85, + "grad_norm": 0.2138671875, + "learning_rate": 0.0001426239316356611, + "loss": 2.1379, "step": 5005 }, { - "epoch": 0.93, - "grad_norm": 0.1943359375, - "learning_rate": 0.0002586415349014775, - "loss": 2.1387, + "epoch": 0.85, + "grad_norm": 0.208984375, + "learning_rate": 0.00014249017299612735, + "loss": 2.1039, "step": 5010 }, { - "epoch": 0.93, - "grad_norm": 0.19140625, - "learning_rate": 0.0002583318812035803, - "loss": 2.1382, + "epoch": 0.85, + "grad_norm": 0.2060546875, + "learning_rate": 0.00014235632152264716, + "loss": 2.1887, "step": 5015 }, { - "epoch": 0.93, - "grad_norm": 0.2080078125, - "learning_rate": 0.0002580220746001852, - "loss": 2.1861, + "epoch": 0.85, + "grad_norm": 0.208984375, + "learning_rate": 0.00014222237750766365, + "loss": 2.1571, "step": 5020 }, { - "epoch": 0.93, - "grad_norm": 0.2001953125, - "learning_rate": 0.000257712115903389, - "loss": 2.1506, + "epoch": 0.85, + "grad_norm": 0.216796875, + "learning_rate": 0.0001420883412438222, + "loss": 2.1553, "step": 5025 }, { - "epoch": 0.93, - "grad_norm": 0.19921875, - "learning_rate": 0.00025740200592568713, - "loss": 2.1404, + "epoch": 0.85, + "grad_norm": 0.20703125, + "learning_rate": 0.00014195421302396968, + "loss": 2.1225, "step": 5030 }, { - "epoch": 0.93, - "grad_norm": 0.197265625, - "learning_rate": 0.00025709174547997157, - "loss": 2.1181, + "epoch": 0.85, + "grad_norm": 0.2109375, + "learning_rate": 0.00014181999314115393, + "loss": 2.151, "step": 5035 }, { - "epoch": 0.94, - "grad_norm": 0.1953125, - "learning_rate": 0.0002567813353795288, - "loss": 2.1433, + "epoch": 0.85, + "grad_norm": 0.205078125, + "learning_rate": 0.000141685681888623, + "loss": 2.1276, "step": 5040 }, { - "epoch": 0.94, - "grad_norm": 0.1962890625, - "learning_rate": 0.00025647077643803763, - "loss": 2.1527, + "epoch": 0.85, + "grad_norm": 0.20703125, + "learning_rate": 0.0001415512795598246, + "loss": 2.162, "step": 5045 }, { - "epoch": 0.94, - "grad_norm": 0.2021484375, - "learning_rate": 0.00025616006946956683, - "loss": 2.1853, + "epoch": 0.86, + "grad_norm": 0.205078125, + "learning_rate": 0.00014141678644840542, + "loss": 2.1619, "step": 5050 }, { - "epoch": 0.94, - "grad_norm": 0.203125, - "learning_rate": 0.0002558492152885735, - "loss": 2.1775, + "epoch": 0.86, + "grad_norm": 0.212890625, + "learning_rate": 0.0001412822028482105, + "loss": 2.1493, "step": 5055 }, { - "epoch": 0.94, - "grad_norm": 0.2080078125, - "learning_rate": 0.00025553821470990007, - "loss": 2.1644, + "epoch": 0.86, + "grad_norm": 0.1982421875, + "learning_rate": 0.00014114752905328257, + "loss": 2.1496, "step": 5060 }, { - "epoch": 0.94, - "grad_norm": 0.1962890625, - "learning_rate": 0.00025522706854877344, - "loss": 2.1959, + "epoch": 0.86, + "grad_norm": 0.2099609375, + "learning_rate": 0.00014101276535786138, + "loss": 2.1648, "step": 5065 }, { - "epoch": 0.94, - "grad_norm": 0.1943359375, - "learning_rate": 0.0002549157776208016, - "loss": 2.2071, + "epoch": 0.86, + "grad_norm": 0.208984375, + "learning_rate": 0.00014087791205638324, + "loss": 2.1168, "step": 5070 }, { - "epoch": 0.94, - "grad_norm": 0.205078125, - "learning_rate": 0.0002546043427419723, - "loss": 2.1704, + "epoch": 0.86, + "grad_norm": 0.2109375, + "learning_rate": 0.00014074296944348007, + "loss": 2.1447, "step": 5075 }, { - "epoch": 0.94, - "grad_norm": 0.1962890625, - "learning_rate": 0.0002542927647286505, - "loss": 2.1489, + "epoch": 0.86, + "grad_norm": 0.216796875, + "learning_rate": 0.000140607937813979, + "loss": 2.1338, "step": 5080 }, { - "epoch": 0.94, - "grad_norm": 0.201171875, - "learning_rate": 0.00025398104439757624, - "loss": 2.1703, + "epoch": 0.86, + "grad_norm": 0.212890625, + "learning_rate": 0.00014047281746290167, + "loss": 2.1485, "step": 5085 }, { - "epoch": 0.94, - "grad_norm": 0.1982421875, - "learning_rate": 0.0002536691825658629, - "loss": 2.1398, + "epoch": 0.86, + "grad_norm": 0.2060546875, + "learning_rate": 0.00014033760868546345, + "loss": 2.1682, "step": 5090 }, { - "epoch": 0.95, - "grad_norm": 0.1904296875, - "learning_rate": 0.00025335718005099474, - "loss": 2.1478, + "epoch": 0.86, + "grad_norm": 0.2109375, + "learning_rate": 0.00014020231177707307, + "loss": 2.1584, "step": 5095 }, { - "epoch": 0.95, - "grad_norm": 0.1904296875, - "learning_rate": 0.0002530450376708244, - "loss": 2.137, + "epoch": 0.86, + "grad_norm": 0.2109375, + "learning_rate": 0.00014006692703333171, + "loss": 2.1144, "step": 5100 }, { - "epoch": 0.95, - "grad_norm": 0.19921875, - "learning_rate": 0.00025273275624357165, - "loss": 2.1979, + "epoch": 0.86, + "grad_norm": 0.201171875, + "learning_rate": 0.00013993145475003243, + "loss": 2.1796, "step": 5105 }, { - "epoch": 0.95, - "grad_norm": 0.1962890625, - "learning_rate": 0.00025242033658782044, - "loss": 2.1232, + "epoch": 0.87, + "grad_norm": 0.205078125, + "learning_rate": 0.00013979589522315959, + "loss": 2.1514, "step": 5110 }, { - "epoch": 0.95, - "grad_norm": 0.2041015625, - "learning_rate": 0.00025210777952251723, - "loss": 2.1871, + "epoch": 0.87, + "grad_norm": 0.203125, + "learning_rate": 0.00013966024874888821, + "loss": 2.101, "step": 5115 }, { - "epoch": 0.95, - "grad_norm": 0.2041015625, - "learning_rate": 0.00025179508586696856, - "loss": 2.1422, + "epoch": 0.87, + "grad_norm": 0.2109375, + "learning_rate": 0.0001395245156235832, + "loss": 2.1363, "step": 5120 }, { - "epoch": 0.95, - "grad_norm": 0.205078125, - "learning_rate": 0.000251482256440839, - "loss": 2.1773, + "epoch": 0.87, + "grad_norm": 0.20703125, + "learning_rate": 0.00013938869614379883, + "loss": 2.145, "step": 5125 }, { - "epoch": 0.95, - "grad_norm": 0.2099609375, - "learning_rate": 0.0002511692920641491, - "loss": 2.167, + "epoch": 0.87, + "grad_norm": 0.2041015625, + "learning_rate": 0.000139252790606278, + "loss": 2.1134, "step": 5130 }, { - "epoch": 0.95, - "grad_norm": 0.201171875, - "learning_rate": 0.0002508561935572731, - "loss": 2.1768, + "epoch": 0.87, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001391167993079517, + "loss": 2.1702, "step": 5135 }, { - "epoch": 0.95, + "epoch": 0.87, "grad_norm": 0.205078125, - "learning_rate": 0.0002505429617409369, - "loss": 2.1799, + "learning_rate": 0.00013898072254593823, + "loss": 2.1752, "step": 5140 }, { - "epoch": 0.95, - "grad_norm": 0.205078125, - "learning_rate": 0.0002502295974362158, - "loss": 2.153, + "epoch": 0.87, + "grad_norm": 0.2060546875, + "learning_rate": 0.00013884456061754265, + "loss": 2.1614, "step": 5145 }, { - "epoch": 0.96, - "grad_norm": 0.1962890625, - "learning_rate": 0.0002499161014645324, - "loss": 2.1172, + "epoch": 0.87, + "grad_norm": 0.201171875, + "learning_rate": 0.00013870831382025602, + "loss": 2.1189, "step": 5150 }, { - "epoch": 0.96, - "grad_norm": 0.2021484375, - "learning_rate": 0.00024960247464765443, - "loss": 2.206, + "epoch": 0.87, + "grad_norm": 0.205078125, + "learning_rate": 0.00013857198245175497, + "loss": 2.1356, "step": 5155 }, { - "epoch": 0.96, - "grad_norm": 0.2001953125, - "learning_rate": 0.0002492887178076924, - "loss": 2.1683, + "epoch": 0.87, + "grad_norm": 0.20703125, + "learning_rate": 0.00013843556680990078, + "loss": 2.1685, "step": 5160 }, { - "epoch": 0.96, - "grad_norm": 0.197265625, - "learning_rate": 0.0002489748317670982, - "loss": 2.1484, + "epoch": 0.87, + "grad_norm": 0.205078125, + "learning_rate": 0.00013829906719273885, + "loss": 2.1494, "step": 5165 }, { - "epoch": 0.96, - "grad_norm": 0.201171875, - "learning_rate": 0.0002486608173486618, - "loss": 2.1443, + "epoch": 0.88, + "grad_norm": 0.2109375, + "learning_rate": 0.0001381624838984982, + "loss": 2.1311, "step": 5170 }, { - "epoch": 0.96, - "grad_norm": 0.197265625, - "learning_rate": 0.00024834667537550995, - "loss": 2.16, + "epoch": 0.88, + "grad_norm": 0.2158203125, + "learning_rate": 0.00013802581722559048, + "loss": 2.1802, "step": 5175 }, { - "epoch": 0.96, - "grad_norm": 0.19921875, - "learning_rate": 0.0002480324066711039, - "loss": 2.1605, + "epoch": 0.88, + "grad_norm": 0.2109375, + "learning_rate": 0.0001378890674726096, + "loss": 2.1341, "step": 5180 }, { - "epoch": 0.96, - "grad_norm": 0.19921875, - "learning_rate": 0.0002477180120592368, - "loss": 2.1522, + "epoch": 0.88, + "grad_norm": 0.2109375, + "learning_rate": 0.000137752234938331, + "loss": 2.1643, "step": 5185 }, { - "epoch": 0.96, - "grad_norm": 0.2001953125, - "learning_rate": 0.0002474034923640321, - "loss": 2.1217, + "epoch": 0.88, + "grad_norm": 0.20703125, + "learning_rate": 0.00013761531992171095, + "loss": 2.1469, "step": 5190 }, { - "epoch": 0.96, - "grad_norm": 0.1982421875, - "learning_rate": 0.00024708884840994095, - "loss": 2.2046, + "epoch": 0.88, + "grad_norm": 0.2099609375, + "learning_rate": 0.00013747832272188596, + "loss": 2.1857, "step": 5195 }, { - "epoch": 0.96, - "grad_norm": 0.2001953125, - "learning_rate": 0.00024677408102174027, - "loss": 2.1732, + "epoch": 0.88, + "grad_norm": 0.2138671875, + "learning_rate": 0.00013734124363817208, + "loss": 2.1803, "step": 5200 }, { - "epoch": 0.97, - "grad_norm": 0.2021484375, - "learning_rate": 0.00024645919102453065, - "loss": 2.1298, + "epoch": 0.88, + "grad_norm": 0.2099609375, + "learning_rate": 0.0001372040829700642, + "loss": 2.1367, "step": 5205 }, { - "epoch": 0.97, - "grad_norm": 0.19140625, - "learning_rate": 0.00024614417924373385, - "loss": 2.1284, + "epoch": 0.88, + "grad_norm": 0.2021484375, + "learning_rate": 0.00013706684101723562, + "loss": 2.1175, "step": 5210 }, { - "epoch": 0.97, - "grad_norm": 0.203125, - "learning_rate": 0.0002458290465050911, - "loss": 2.1746, + "epoch": 0.88, + "grad_norm": 0.2060546875, + "learning_rate": 0.00013692951807953708, + "loss": 2.1463, "step": 5215 }, { - "epoch": 0.97, - "grad_norm": 0.2001953125, - "learning_rate": 0.0002455137936346606, - "loss": 2.1432, + "epoch": 0.88, + "grad_norm": 0.2138671875, + "learning_rate": 0.00013679211445699632, + "loss": 2.1948, "step": 5220 }, { - "epoch": 0.97, - "grad_norm": 0.197265625, - "learning_rate": 0.0002451984214588153, - "loss": 2.1479, + "epoch": 0.88, + "grad_norm": 0.20703125, + "learning_rate": 0.0001366546304498173, + "loss": 2.1677, "step": 5225 }, { - "epoch": 0.97, - "grad_norm": 0.1982421875, - "learning_rate": 0.0002448829308042412, - "loss": 2.1422, + "epoch": 0.89, + "grad_norm": 0.208984375, + "learning_rate": 0.00013651706635837976, + "loss": 2.1749, "step": 5230 }, { - "epoch": 0.97, - "grad_norm": 0.1953125, - "learning_rate": 0.0002445673224979347, - "loss": 2.1339, + "epoch": 0.89, + "grad_norm": 0.2041015625, + "learning_rate": 0.00013637942248323828, + "loss": 2.1, "step": 5235 }, { - "epoch": 0.97, - "grad_norm": 0.1982421875, - "learning_rate": 0.0002442515973672005, - "loss": 2.1753, + "epoch": 0.89, + "grad_norm": 0.205078125, + "learning_rate": 0.00013624169912512173, + "loss": 2.1391, "step": 5240 }, { - "epoch": 0.97, - "grad_norm": 0.1953125, - "learning_rate": 0.0002439357562396496, - "loss": 2.1239, + "epoch": 0.89, + "grad_norm": 0.20703125, + "learning_rate": 0.00013610389658493276, + "loss": 2.1248, "step": 5245 }, { - "epoch": 0.97, - "grad_norm": 0.1982421875, - "learning_rate": 0.00024361979994319715, - "loss": 2.1903, + "epoch": 0.89, + "grad_norm": 0.2099609375, + "learning_rate": 0.00013596601516374697, + "loss": 2.1287, "step": 5250 }, { - "epoch": 0.97, - "grad_norm": 0.2021484375, - "learning_rate": 0.00024330372930606018, - "loss": 2.1487, + "epoch": 0.89, + "grad_norm": 0.2080078125, + "learning_rate": 0.00013582805516281217, + "loss": 2.1049, "step": 5255 }, { - "epoch": 0.98, - "grad_norm": 0.201171875, - "learning_rate": 0.00024298754515675533, - "loss": 2.1711, + "epoch": 0.89, + "grad_norm": 0.21484375, + "learning_rate": 0.0001356900168835481, + "loss": 2.1764, "step": 5260 }, { - "epoch": 0.98, - "grad_norm": 0.197265625, - "learning_rate": 0.0002426712483240969, - "loss": 2.1315, + "epoch": 0.89, + "grad_norm": 0.2392578125, + "learning_rate": 0.00013555190062754534, + "loss": 2.163, "step": 5265 }, { - "epoch": 0.98, - "grad_norm": 0.201171875, - "learning_rate": 0.00024235483963719453, - "loss": 2.191, + "epoch": 0.89, + "grad_norm": 0.2099609375, + "learning_rate": 0.00013541370669656487, + "loss": 2.1276, "step": 5270 }, { - "epoch": 0.98, - "grad_norm": 0.20703125, - "learning_rate": 0.00024203831992545096, - "loss": 2.1441, + "epoch": 0.89, + "grad_norm": 0.2109375, + "learning_rate": 0.00013527543539253742, + "loss": 2.1712, "step": 5275 }, { - "epoch": 0.98, - "grad_norm": 0.2021484375, - "learning_rate": 0.0002417216900185602, - "loss": 2.1895, + "epoch": 0.89, + "grad_norm": 0.208984375, + "learning_rate": 0.00013513708701756277, + "loss": 2.1763, "step": 5280 }, { - "epoch": 0.98, + "epoch": 0.9, "grad_norm": 0.2021484375, - "learning_rate": 0.00024140495074650487, - "loss": 2.2274, + "learning_rate": 0.000134998661873909, + "loss": 2.1273, "step": 5285 }, { - "epoch": 0.98, - "grad_norm": 0.197265625, - "learning_rate": 0.00024108810293955438, - "loss": 2.1568, + "epoch": 0.9, + "grad_norm": 0.2099609375, + "learning_rate": 0.00013486016026401202, + "loss": 2.1176, "step": 5290 }, { - "epoch": 0.98, - "grad_norm": 0.201171875, - "learning_rate": 0.0002407711474282627, - "loss": 2.1499, + "epoch": 0.9, + "grad_norm": 0.203125, + "learning_rate": 0.00013472158249047477, + "loss": 2.1221, "step": 5295 }, { - "epoch": 0.98, - "grad_norm": 0.2021484375, - "learning_rate": 0.00024045408504346606, - "loss": 2.1909, + "epoch": 0.9, + "grad_norm": 0.2138671875, + "learning_rate": 0.00013458292885606656, + "loss": 2.1508, "step": 5300 }, { - "epoch": 0.98, - "grad_norm": 0.1962890625, - "learning_rate": 0.00024013691661628074, - "loss": 2.1499, + "epoch": 0.9, + "grad_norm": 0.20703125, + "learning_rate": 0.00013444419966372252, + "loss": 2.1472, "step": 5305 }, { - "epoch": 0.99, - "grad_norm": 0.2041015625, - "learning_rate": 0.00023981964297810118, - "loss": 2.1664, + "epoch": 0.9, + "grad_norm": 0.21875, + "learning_rate": 0.00013430539521654277, + "loss": 2.2225, "step": 5310 }, { - "epoch": 0.99, - "grad_norm": 0.197265625, - "learning_rate": 0.00023950226496059743, - "loss": 2.1702, + "epoch": 0.9, + "grad_norm": 0.2021484375, + "learning_rate": 0.0001341665158177919, + "loss": 2.1097, "step": 5315 }, { - "epoch": 0.99, - "grad_norm": 0.1982421875, - "learning_rate": 0.00023918478339571335, - "loss": 2.1252, + "epoch": 0.9, + "grad_norm": 0.205078125, + "learning_rate": 0.00013402756177089827, + "loss": 2.1191, "step": 5320 }, { - "epoch": 0.99, - "grad_norm": 0.2001953125, - "learning_rate": 0.00023886719911566404, - "loss": 2.1412, + "epoch": 0.9, + "grad_norm": 0.2119140625, + "learning_rate": 0.0001338885333794533, + "loss": 2.1713, "step": 5325 }, { - "epoch": 0.99, - "grad_norm": 0.203125, - "learning_rate": 0.0002385495129529339, - "loss": 2.1895, + "epoch": 0.9, + "grad_norm": 0.205078125, + "learning_rate": 0.00013374943094721084, + "loss": 2.1795, "step": 5330 }, { - "epoch": 0.99, - "grad_norm": 0.1943359375, - "learning_rate": 0.0002382317257402745, - "loss": 2.1669, + "epoch": 0.9, + "grad_norm": 0.2138671875, + "learning_rate": 0.00013361025477808656, + "loss": 2.1675, "step": 5335 }, { - "epoch": 0.99, - "grad_norm": 0.1982421875, - "learning_rate": 0.0002379138383107021, - "loss": 2.1341, + "epoch": 0.9, + "grad_norm": 0.2099609375, + "learning_rate": 0.00013347100517615716, + "loss": 2.1828, "step": 5340 }, { - "epoch": 0.99, - "grad_norm": 0.1943359375, - "learning_rate": 0.0002375958514974959, - "loss": 2.1688, + "epoch": 0.91, + "grad_norm": 0.2099609375, + "learning_rate": 0.0001333316824456598, + "loss": 2.1384, "step": 5345 }, { - "epoch": 0.99, - "grad_norm": 0.2001953125, - "learning_rate": 0.00023727776613419543, - "loss": 2.1949, + "epoch": 0.91, + "grad_norm": 0.2119140625, + "learning_rate": 0.00013319228689099154, + "loss": 2.1835, "step": 5350 }, { - "epoch": 0.99, - "grad_norm": 0.1953125, - "learning_rate": 0.00023695958305459854, - "loss": 2.1693, + "epoch": 0.91, + "grad_norm": 0.205078125, + "learning_rate": 0.00013305281881670827, + "loss": 2.1461, "step": 5355 }, { - "epoch": 0.99, - "grad_norm": 0.203125, - "learning_rate": 0.0002366413030927594, - "loss": 2.1514, + "epoch": 0.91, + "grad_norm": 0.208984375, + "learning_rate": 0.00013291327852752458, + "loss": 2.1473, "step": 5360 }, { - "epoch": 1.0, - "grad_norm": 0.2021484375, - "learning_rate": 0.00023632292708298587, - "loss": 2.1296, + "epoch": 0.91, + "grad_norm": 0.22265625, + "learning_rate": 0.00013277366632831271, + "loss": 2.1584, "step": 5365 }, { - "epoch": 1.0, - "grad_norm": 0.201171875, - "learning_rate": 0.00023600445585983791, - "loss": 2.1054, - "step": 5370 + "epoch": 0.91, + "grad_norm": 0.21484375, + "learning_rate": 0.0001326339825241021, + "loss": 2.1336, + "step": 5370 }, { - "epoch": 1.0, - "grad_norm": 0.19921875, - "learning_rate": 0.0002356858902581248, - "loss": 2.1498, + "epoch": 0.91, + "grad_norm": 0.205078125, + "learning_rate": 0.00013249422742007852, + "loss": 2.1211, "step": 5375 }, { - "epoch": 1.0, - "grad_norm": 0.1943359375, - "learning_rate": 0.00023536723111290328, - "loss": 2.1251, + "epoch": 0.91, + "grad_norm": 0.2060546875, + "learning_rate": 0.00013235440132158366, + "loss": 2.1066, "step": 5380 }, { - "epoch": 1.0, - "grad_norm": 0.1953125, - "learning_rate": 0.0002350484792594754, - "loss": 2.1655, + "epoch": 0.91, + "grad_norm": 0.212890625, + "learning_rate": 0.00013221450453411413, + "loss": 2.1636, "step": 5385 }, { - "epoch": 1.0, - "grad_norm": 0.421875, - "learning_rate": 0.00023472963553338613, - "loss": 2.1002, - "step": 5390 - }, - { - "epoch": 1.0, - "eval_loss": 2.155019521713257, - "eval_runtime": 171.4248, - "eval_samples_per_second": 28.286, - "eval_steps_per_second": 3.541, + "epoch": 0.91, + "grad_norm": 0.20703125, + "learning_rate": 0.00013207453736332117, + "loss": 2.1426, "step": 5390 }, { - "epoch": 1.0, - "grad_norm": 0.2001953125, - "learning_rate": 0.00023441070077042118, - "loss": 2.0676, + "epoch": 0.91, + "grad_norm": 0.2138671875, + "learning_rate": 0.0001319345001150097, + "loss": 2.178, "step": 5395 }, { - "epoch": 1.0, - "grad_norm": 0.201171875, - "learning_rate": 0.0002340916758066051, - "loss": 2.0957, + "epoch": 0.91, + "grad_norm": 0.205078125, + "learning_rate": 0.0001317943930951378, + "loss": 2.1224, "step": 5400 }, { - "epoch": 1.0, - "grad_norm": 0.205078125, - "learning_rate": 0.00023377256147819872, - "loss": 2.0928, + "epoch": 0.92, + "grad_norm": 0.2216796875, + "learning_rate": 0.00013165421660981592, + "loss": 2.1353, "step": 5405 }, { - "epoch": 1.0, - "grad_norm": 0.2041015625, - "learning_rate": 0.00023345335862169723, - "loss": 2.1057, + "epoch": 0.92, + "grad_norm": 0.2119140625, + "learning_rate": 0.00013151397096530635, + "loss": 2.1219, "step": 5410 }, { - "epoch": 1.0, - "grad_norm": 0.2021484375, - "learning_rate": 0.00023313406807382782, - "loss": 2.0921, + "epoch": 0.92, + "grad_norm": 0.2158203125, + "learning_rate": 0.0001313736564680224, + "loss": 2.1358, "step": 5415 }, { - "epoch": 1.01, - "grad_norm": 0.19921875, - "learning_rate": 0.0002328146906715476, - "loss": 2.0752, + "epoch": 0.92, + "grad_norm": 0.208984375, + "learning_rate": 0.0001312332734245279, + "loss": 2.1331, "step": 5420 }, { - "epoch": 1.01, - "grad_norm": 0.201171875, - "learning_rate": 0.00023249522725204123, - "loss": 2.0937, + "epoch": 0.92, + "grad_norm": 0.2060546875, + "learning_rate": 0.0001310928221415364, + "loss": 2.126, "step": 5425 }, { - "epoch": 1.01, - "grad_norm": 0.20703125, - "learning_rate": 0.0002321756786527189, - "loss": 2.09, + "epoch": 0.92, + "grad_norm": 0.2119140625, + "learning_rate": 0.00013095230292591055, + "loss": 2.1418, "step": 5430 }, { - "epoch": 1.01, - "grad_norm": 0.2021484375, - "learning_rate": 0.00023185604571121418, - "loss": 2.1037, + "epoch": 0.92, + "grad_norm": 0.2099609375, + "learning_rate": 0.0001308117160846614, + "loss": 2.1893, "step": 5435 }, { - "epoch": 1.01, - "grad_norm": 0.20703125, - "learning_rate": 0.00023153632926538166, - "loss": 2.1509, + "epoch": 0.92, + "grad_norm": 0.208984375, + "learning_rate": 0.0001306710619249478, + "loss": 2.1608, "step": 5440 }, { - "epoch": 1.01, - "grad_norm": 0.201171875, - "learning_rate": 0.0002312165301532948, - "loss": 2.1312, + "epoch": 0.92, + "grad_norm": 0.2041015625, + "learning_rate": 0.00013053034075407555, + "loss": 2.1653, "step": 5445 }, { - "epoch": 1.01, - "grad_norm": 0.20703125, - "learning_rate": 0.00023089664921324373, - "loss": 2.1174, + "epoch": 0.92, + "grad_norm": 0.2109375, + "learning_rate": 0.00013038955287949708, + "loss": 2.141, "step": 5450 }, { - "epoch": 1.01, - "grad_norm": 0.201171875, - "learning_rate": 0.00023057668728373315, - "loss": 2.1352, + "epoch": 0.92, + "grad_norm": 0.212890625, + "learning_rate": 0.00013024869860881036, + "loss": 2.145, "step": 5455 }, { - "epoch": 1.01, - "grad_norm": 0.2041015625, - "learning_rate": 0.00023025664520348005, - "loss": 2.1088, + "epoch": 0.92, + "grad_norm": 0.2099609375, + "learning_rate": 0.00013010777824975852, + "loss": 2.1504, "step": 5460 }, { - "epoch": 1.01, - "grad_norm": 0.203125, - "learning_rate": 0.00022993652381141138, - "loss": 2.1271, + "epoch": 0.93, + "grad_norm": 0.2119140625, + "learning_rate": 0.00012996679211022908, + "loss": 2.1545, "step": 5465 }, { - "epoch": 1.01, - "grad_norm": 0.212890625, - "learning_rate": 0.00022961632394666222, - "loss": 2.1281, + "epoch": 0.93, + "grad_norm": 0.2109375, + "learning_rate": 0.00012982574049825324, + "loss": 2.118, "step": 5470 }, { - "epoch": 1.02, - "grad_norm": 0.2041015625, - "learning_rate": 0.0002292960464485732, - "loss": 2.1294, + "epoch": 0.93, + "grad_norm": 0.203125, + "learning_rate": 0.00012968462372200517, + "loss": 2.1523, "step": 5475 }, { - "epoch": 1.02, - "grad_norm": 0.2041015625, - "learning_rate": 0.00022897569215668843, + "epoch": 0.93, + "grad_norm": 0.2109375, + "learning_rate": 0.00012954344208980167, "loss": 2.1289, "step": 5480 }, { - "epoch": 1.02, - "grad_norm": 0.2080078125, - "learning_rate": 0.00022865526191075347, - "loss": 2.0997, + "epoch": 0.93, + "grad_norm": 0.2138671875, + "learning_rate": 0.000129402195910101, + "loss": 2.15, "step": 5485 }, { - "epoch": 1.02, - "grad_norm": 0.20703125, - "learning_rate": 0.00022833475655071274, - "loss": 2.1106, + "epoch": 0.93, + "grad_norm": 0.2158203125, + "learning_rate": 0.00012926088549150246, + "loss": 2.1693, "step": 5490 }, { - "epoch": 1.02, - "grad_norm": 0.21484375, - "learning_rate": 0.0002280141769167078, - "loss": 2.1005, + "epoch": 0.93, + "grad_norm": 0.2041015625, + "learning_rate": 0.00012911951114274588, + "loss": 2.1559, "step": 5495 }, { - "epoch": 1.02, - "grad_norm": 0.203125, - "learning_rate": 0.0002276935238490748, - "loss": 2.0904, + "epoch": 0.93, + "grad_norm": 0.2158203125, + "learning_rate": 0.0001289780731727106, + "loss": 2.1352, "step": 5500 }, { - "epoch": 1.02, - "grad_norm": 0.203125, - "learning_rate": 0.00022737279818834237, - "loss": 2.114, + "epoch": 0.93, + "grad_norm": 0.2109375, + "learning_rate": 0.00012883657189041495, + "loss": 2.1314, "step": 5505 }, { - "epoch": 1.02, + "epoch": 0.93, "grad_norm": 0.208984375, - "learning_rate": 0.0002270520007752294, - "loss": 2.1366, + "learning_rate": 0.00012869500760501572, + "loss": 2.1777, "step": 5510 }, { - "epoch": 1.02, - "grad_norm": 0.216796875, - "learning_rate": 0.00022673113245064296, - "loss": 2.1823, + "epoch": 0.93, + "grad_norm": 0.2109375, + "learning_rate": 0.00012855338062580732, + "loss": 2.1191, "step": 5515 }, { - "epoch": 1.02, - "grad_norm": 0.2080078125, - "learning_rate": 0.0002264101940556759, - "loss": 2.1347, + "epoch": 0.93, + "grad_norm": 0.2099609375, + "learning_rate": 0.000128411691262221, + "loss": 2.1499, "step": 5520 }, { - "epoch": 1.03, - "grad_norm": 0.2138671875, - "learning_rate": 0.00022608918643160486, - "loss": 2.1263, + "epoch": 0.94, + "grad_norm": 0.212890625, + "learning_rate": 0.00012826993982382448, + "loss": 2.1447, "step": 5525 }, { - "epoch": 1.03, - "grad_norm": 0.2041015625, - "learning_rate": 0.0002257681104198878, - "loss": 2.132, + "epoch": 0.94, + "grad_norm": 0.2099609375, + "learning_rate": 0.00012812812662032098, + "loss": 2.1268, "step": 5530 }, { - "epoch": 1.03, - "grad_norm": 0.208984375, - "learning_rate": 0.00022544696686216208, - "loss": 2.1122, + "epoch": 0.94, + "grad_norm": 0.2119140625, + "learning_rate": 0.0001279862519615487, + "loss": 2.15, "step": 5535 }, { - "epoch": 1.03, - "grad_norm": 0.20703125, - "learning_rate": 0.00022512575660024205, - "loss": 2.1077, + "epoch": 0.94, + "grad_norm": 0.205078125, + "learning_rate": 0.0001278443161574802, + "loss": 2.1364, "step": 5540 }, { - "epoch": 1.03, - "grad_norm": 0.2099609375, - "learning_rate": 0.00022480448047611695, - "loss": 2.1157, + "epoch": 0.94, + "grad_norm": 0.203125, + "learning_rate": 0.00012770231951822144, + "loss": 2.1466, "step": 5545 }, { - "epoch": 1.03, - "grad_norm": 0.2041015625, - "learning_rate": 0.0002244831393319486, - "loss": 2.0735, + "epoch": 0.94, + "grad_norm": 0.208984375, + "learning_rate": 0.00012756026235401154, + "loss": 2.1302, "step": 5550 }, { - "epoch": 1.03, - "grad_norm": 0.2080078125, - "learning_rate": 0.00022416173401006932, - "loss": 2.1343, + "epoch": 0.94, + "grad_norm": 0.2099609375, + "learning_rate": 0.00012741814497522165, + "loss": 2.1373, "step": 5555 }, { - "epoch": 1.03, - "grad_norm": 0.2060546875, - "learning_rate": 0.00022384026535297963, - "loss": 2.1022, + "epoch": 0.94, + "grad_norm": 0.212890625, + "learning_rate": 0.0001272759676923546, + "loss": 2.1432, "step": 5560 }, { - "epoch": 1.03, - "grad_norm": 0.208984375, - "learning_rate": 0.00022351873420334615, - "loss": 2.0955, + "epoch": 0.94, + "grad_norm": 0.2060546875, + "learning_rate": 0.00012713373081604397, + "loss": 2.128, "step": 5565 }, { - "epoch": 1.03, - "grad_norm": 0.220703125, - "learning_rate": 0.0002231971414039991, - "loss": 2.13, + "epoch": 0.94, + "grad_norm": 0.208984375, + "learning_rate": 0.00012699143465705378, + "loss": 2.1319, "step": 5570 }, { - "epoch": 1.03, - "grad_norm": 0.201171875, - "learning_rate": 0.0002228754877979306, - "loss": 2.0688, + "epoch": 0.94, + "grad_norm": 0.2041015625, + "learning_rate": 0.0001268490795262773, + "loss": 2.1415, "step": 5575 }, { - "epoch": 1.04, - "grad_norm": 0.2119140625, - "learning_rate": 0.0002225537742282919, - "loss": 2.1275, + "epoch": 0.95, + "grad_norm": 0.2109375, + "learning_rate": 0.00012670666573473676, + "loss": 2.21, "step": 5580 }, { - "epoch": 1.04, - "grad_norm": 0.2109375, - "learning_rate": 0.0002222320015383916, - "loss": 2.0992, + "epoch": 0.95, + "grad_norm": 0.20703125, + "learning_rate": 0.00012656419359358261, + "loss": 2.1752, "step": 5585 }, { - "epoch": 1.04, - "grad_norm": 0.2119140625, - "learning_rate": 0.00022191017057169318, - "loss": 2.1206, + "epoch": 0.95, + "grad_norm": 0.2080078125, + "learning_rate": 0.00012642166341409277, + "loss": 2.1218, "step": 5590 }, { - "epoch": 1.04, - "grad_norm": 0.2099609375, - "learning_rate": 0.0002215882821718129, - "loss": 2.148, + "epoch": 0.95, + "grad_norm": 0.2041015625, + "learning_rate": 0.00012627907550767187, + "loss": 2.1361, "step": 5595 }, { - "epoch": 1.04, - "grad_norm": 0.2158203125, - "learning_rate": 0.0002212663371825176, - "loss": 2.1268, + "epoch": 0.95, + "grad_norm": 0.20703125, + "learning_rate": 0.0001261364301858507, + "loss": 2.1305, "step": 5600 }, { - "epoch": 1.04, - "grad_norm": 0.21875, - "learning_rate": 0.00022094433644772248, - "loss": 2.0779, + "epoch": 0.95, + "grad_norm": 0.2041015625, + "learning_rate": 0.00012599372776028557, + "loss": 2.1319, "step": 5605 }, { - "epoch": 1.04, - "grad_norm": 0.20703125, - "learning_rate": 0.00022062228081148874, - "loss": 2.1113, + "epoch": 0.95, + "grad_norm": 0.208984375, + "learning_rate": 0.0001258509685427575, + "loss": 2.1559, "step": 5610 }, { - "epoch": 1.04, - "grad_norm": 0.208984375, - "learning_rate": 0.00022030017111802165, - "loss": 2.1148, + "epoch": 0.95, + "grad_norm": 0.212890625, + "learning_rate": 0.00012570815284517153, + "loss": 2.1181, "step": 5615 }, { - "epoch": 1.04, - "grad_norm": 0.203125, - "learning_rate": 0.00021997800821166807, - "loss": 2.1307, + "epoch": 0.95, + "grad_norm": 0.21875, + "learning_rate": 0.00012556528097955617, + "loss": 2.1424, "step": 5620 }, { - "epoch": 1.04, - "grad_norm": 0.212890625, - "learning_rate": 0.00021965579293691442, - "loss": 2.1319, + "epoch": 0.95, + "grad_norm": 0.2216796875, + "learning_rate": 0.00012542235325806267, + "loss": 2.1025, "step": 5625 }, { - "epoch": 1.04, + "epoch": 0.95, "grad_norm": 0.2099609375, - "learning_rate": 0.00021933352613838435, - "loss": 2.1281, + "learning_rate": 0.00012527936999296428, + "loss": 2.2013, "step": 5630 }, { - "epoch": 1.05, - "grad_norm": 0.21484375, - "learning_rate": 0.00021901120866083651, - "loss": 2.1421, + "epoch": 0.95, + "grad_norm": 0.205078125, + "learning_rate": 0.00012513633149665557, + "loss": 2.1427, "step": 5635 }, { - "epoch": 1.05, - "grad_norm": 0.2216796875, - "learning_rate": 0.00021868884134916265, - "loss": 2.1655, + "epoch": 0.96, + "grad_norm": 0.2119140625, + "learning_rate": 0.00012499323808165183, + "loss": 2.1794, "step": 5640 }, { - "epoch": 1.05, - "grad_norm": 0.2119140625, - "learning_rate": 0.00021836642504838473, - "loss": 2.1082, + "epoch": 0.96, + "grad_norm": 0.205078125, + "learning_rate": 0.00012485009006058835, + "loss": 2.1722, "step": 5645 }, { - "epoch": 1.05, - "grad_norm": 0.2119140625, - "learning_rate": 0.00021804396060365355, - "loss": 2.1059, + "epoch": 0.96, + "grad_norm": 0.2177734375, + "learning_rate": 0.00012470688774621964, + "loss": 2.1241, "step": 5650 }, { - "epoch": 1.05, - "grad_norm": 0.2109375, - "learning_rate": 0.00021772144886024583, - "loss": 2.1674, + "epoch": 0.96, + "grad_norm": 0.21484375, + "learning_rate": 0.00012456363145141894, + "loss": 2.1439, "step": 5655 }, { - "epoch": 1.05, - "grad_norm": 0.2109375, - "learning_rate": 0.00021739889066356232, - "loss": 2.1147, + "epoch": 0.96, + "grad_norm": 0.2060546875, + "learning_rate": 0.00012442032148917738, + "loss": 2.1363, "step": 5660 }, { - "epoch": 1.05, - "grad_norm": 0.2060546875, - "learning_rate": 0.00021707628685912572, - "loss": 2.12, + "epoch": 0.96, + "grad_norm": 0.212890625, + "learning_rate": 0.00012427695817260329, + "loss": 2.1426, "step": 5665 }, { - "epoch": 1.05, - "grad_norm": 0.22265625, - "learning_rate": 0.00021675363829257803, - "loss": 2.0997, + "epoch": 0.96, + "grad_norm": 0.2119140625, + "learning_rate": 0.0001241335418149217, + "loss": 2.1132, "step": 5670 }, { - "epoch": 1.05, - "grad_norm": 0.205078125, - "learning_rate": 0.00021643094580967874, - "loss": 2.0945, + "epoch": 0.96, + "grad_norm": 0.208984375, + "learning_rate": 0.00012399007272947341, + "loss": 2.1441, "step": 5675 }, { - "epoch": 1.05, - "grad_norm": 0.208984375, - "learning_rate": 0.00021610821025630243, - "loss": 2.1583, + "epoch": 0.96, + "grad_norm": 0.2158203125, + "learning_rate": 0.00012384655122971445, + "loss": 2.1381, "step": 5680 }, { - "epoch": 1.05, - "grad_norm": 0.208984375, - "learning_rate": 0.00021578543247843647, - "loss": 2.1116, + "epoch": 0.96, + "grad_norm": 0.2119140625, + "learning_rate": 0.00012370297762921538, + "loss": 2.1614, "step": 5685 }, { - "epoch": 1.06, + "epoch": 0.96, "grad_norm": 0.2060546875, - "learning_rate": 0.00021546261332217918, - "loss": 2.0938, + "learning_rate": 0.0001235593522416606, + "loss": 2.1412, "step": 5690 }, { - "epoch": 1.06, - "grad_norm": 0.2080078125, - "learning_rate": 0.00021513975363373703, - "loss": 2.086, + "epoch": 0.96, + "grad_norm": 0.2138671875, + "learning_rate": 0.00012341567538084764, + "loss": 2.1509, "step": 5695 }, { - "epoch": 1.06, - "grad_norm": 0.2177734375, - "learning_rate": 0.00021481685425942302, - "loss": 2.1388, + "epoch": 0.97, + "grad_norm": 0.22265625, + "learning_rate": 0.00012327194736068653, + "loss": 2.1336, "step": 5700 }, { - "epoch": 1.06, - "grad_norm": 0.2099609375, - "learning_rate": 0.00021449391604565392, - "loss": 2.0998, + "epoch": 0.97, + "grad_norm": 0.212890625, + "learning_rate": 0.00012312816849519899, + "loss": 2.1298, "step": 5705 }, { - "epoch": 1.06, - "grad_norm": 0.2109375, - "learning_rate": 0.00021417093983894844, - "loss": 2.1114, + "epoch": 0.97, + "grad_norm": 0.216796875, + "learning_rate": 0.00012298433909851785, + "loss": 2.189, "step": 5710 }, { - "epoch": 1.06, - "grad_norm": 0.216796875, - "learning_rate": 0.0002138479264859249, - "loss": 2.1571, + "epoch": 0.97, + "grad_norm": 0.2216796875, + "learning_rate": 0.00012284045948488648, + "loss": 2.1234, "step": 5715 }, { - "epoch": 1.06, - "grad_norm": 0.2119140625, - "learning_rate": 0.000213524876833299, - "loss": 2.1065, + "epoch": 0.97, + "grad_norm": 0.2041015625, + "learning_rate": 0.00012269652996865776, + "loss": 2.1426, "step": 5720 }, { - "epoch": 1.06, - "grad_norm": 0.20703125, - "learning_rate": 0.00021320179172788155, - "loss": 2.1273, + "epoch": 0.97, + "grad_norm": 0.2177734375, + "learning_rate": 0.00012255255086429372, + "loss": 2.2038, "step": 5725 }, { - "epoch": 1.06, - "grad_norm": 0.2216796875, - "learning_rate": 0.0002128786720165763, - "loss": 2.1065, + "epoch": 0.97, + "grad_norm": 0.2177734375, + "learning_rate": 0.00012240852248636473, + "loss": 2.1829, "step": 5730 }, { - "epoch": 1.06, - "grad_norm": 0.205078125, - "learning_rate": 0.00021255551854637762, - "loss": 2.1065, + "epoch": 0.97, + "grad_norm": 0.2099609375, + "learning_rate": 0.00012226444514954878, + "loss": 2.1347, "step": 5735 }, { - "epoch": 1.06, - "grad_norm": 0.216796875, - "learning_rate": 0.00021223233216436857, - "loss": 2.0759, + "epoch": 0.97, + "grad_norm": 0.2041015625, + "learning_rate": 0.00012212031916863082, + "loss": 2.1792, "step": 5740 }, { - "epoch": 1.07, - "grad_norm": 0.21875, - "learning_rate": 0.0002119091137177183, - "loss": 2.0995, + "epoch": 0.97, + "grad_norm": 0.2080078125, + "learning_rate": 0.0001219761448585021, + "loss": 2.1241, "step": 5745 }, { - "epoch": 1.07, - "grad_norm": 0.2099609375, - "learning_rate": 0.00021158586405368017, - "loss": 2.1513, + "epoch": 0.97, + "grad_norm": 0.21484375, + "learning_rate": 0.00012183192253415952, + "loss": 2.1887, "step": 5750 }, { - "epoch": 1.07, - "grad_norm": 0.2158203125, - "learning_rate": 0.0002112625840195893, - "loss": 2.1142, + "epoch": 0.97, + "grad_norm": 0.208984375, + "learning_rate": 0.00012168765251070473, + "loss": 2.1419, "step": 5755 }, { - "epoch": 1.07, - "grad_norm": 0.216796875, - "learning_rate": 0.0002109392744628603, - "loss": 2.1145, + "epoch": 0.98, + "grad_norm": 0.2119140625, + "learning_rate": 0.00012154333510334375, + "loss": 2.1507, "step": 5760 }, { - "epoch": 1.07, - "grad_norm": 0.212890625, - "learning_rate": 0.00021061593623098533, - "loss": 2.1038, + "epoch": 0.98, + "grad_norm": 0.2138671875, + "learning_rate": 0.00012139897062738606, + "loss": 2.1603, "step": 5765 }, { - "epoch": 1.07, - "grad_norm": 0.2041015625, - "learning_rate": 0.00021029257017153162, - "loss": 2.1134, + "epoch": 0.98, + "grad_norm": 0.2119140625, + "learning_rate": 0.00012125455939824393, + "loss": 2.1563, "step": 5770 }, { - "epoch": 1.07, - "grad_norm": 0.2109375, - "learning_rate": 0.00020996917713213945, - "loss": 2.0712, + "epoch": 0.98, + "grad_norm": 0.2119140625, + "learning_rate": 0.0001211101017314319, + "loss": 2.1201, "step": 5775 }, { - "epoch": 1.07, + "epoch": 0.98, "grad_norm": 0.2158203125, - "learning_rate": 0.00020964575796051974, - "loss": 2.1444, + "learning_rate": 0.00012096559794256581, + "loss": 2.1329, "step": 5780 }, { - "epoch": 1.07, - "grad_norm": 0.2041015625, - "learning_rate": 0.00020932231350445188, - "loss": 2.1248, + "epoch": 0.98, + "grad_norm": 0.208984375, + "learning_rate": 0.00012082104834736244, + "loss": 2.1179, "step": 5785 }, { - "epoch": 1.07, - "grad_norm": 0.2109375, - "learning_rate": 0.0002089988446117817, - "loss": 2.1342, + "epoch": 0.98, + "grad_norm": 0.208984375, + "learning_rate": 0.0001206764532616385, + "loss": 2.1557, "step": 5790 }, { - "epoch": 1.08, - "grad_norm": 0.208984375, - "learning_rate": 0.00020867535213041883, - "loss": 2.1064, + "epoch": 0.98, + "grad_norm": 0.2080078125, + "learning_rate": 0.00012053181300131022, + "loss": 2.1671, "step": 5795 }, { - "epoch": 1.08, - "grad_norm": 0.2109375, - "learning_rate": 0.00020835183690833496, - "loss": 2.1467, + "epoch": 0.98, + "grad_norm": 0.2138671875, + "learning_rate": 0.00012038712788239236, + "loss": 2.1472, "step": 5800 }, { - "epoch": 1.08, - "grad_norm": 0.220703125, - "learning_rate": 0.00020802829979356134, - "loss": 2.093, + "epoch": 0.98, + "grad_norm": 0.2158203125, + "learning_rate": 0.00012024239822099792, + "loss": 2.1443, "step": 5805 }, { - "epoch": 1.08, - "grad_norm": 0.203125, - "learning_rate": 0.0002077047416341864, - "loss": 2.0891, + "epoch": 0.98, + "grad_norm": 0.212890625, + "learning_rate": 0.000120097624333337, + "loss": 2.1556, "step": 5810 }, { - "epoch": 1.08, - "grad_norm": 0.208984375, - "learning_rate": 0.00020738116327835413, - "loss": 2.1186, + "epoch": 0.98, + "grad_norm": 0.21484375, + "learning_rate": 0.00011995280653571641, + "loss": 2.122, "step": 5815 }, { - "epoch": 1.08, - "grad_norm": 0.2158203125, - "learning_rate": 0.00020705756557426108, - "loss": 2.1244, + "epoch": 0.99, + "grad_norm": 0.212890625, + "learning_rate": 0.00011980794514453897, + "loss": 2.0965, "step": 5820 }, { - "epoch": 1.08, - "grad_norm": 0.2109375, - "learning_rate": 0.00020673394937015477, - "loss": 2.1047, + "epoch": 0.99, + "grad_norm": 0.205078125, + "learning_rate": 0.00011966304047630263, + "loss": 2.1735, "step": 5825 }, { - "epoch": 1.08, - "grad_norm": 0.212890625, - "learning_rate": 0.0002064103155143311, - "loss": 2.0977, + "epoch": 0.99, + "grad_norm": 0.21484375, + "learning_rate": 0.00011951809284759993, + "loss": 2.1382, "step": 5830 }, { - "epoch": 1.08, - "grad_norm": 0.2119140625, - "learning_rate": 0.00020608666485513215, - "loss": 2.1302, + "epoch": 0.99, + "grad_norm": 0.2138671875, + "learning_rate": 0.00011937310257511732, + "loss": 2.1571, "step": 5835 }, { - "epoch": 1.08, - "grad_norm": 0.21484375, - "learning_rate": 0.00020576299824094432, - "loss": 2.1351, + "epoch": 0.99, + "grad_norm": 0.2041015625, + "learning_rate": 0.00011922806997563435, + "loss": 2.1056, "step": 5840 }, { - "epoch": 1.08, - "grad_norm": 0.2177734375, - "learning_rate": 0.00020543931652019555, - "loss": 2.0831, + "epoch": 0.99, + "grad_norm": 0.2099609375, + "learning_rate": 0.0001190829953660231, + "loss": 2.1016, "step": 5845 }, { - "epoch": 1.09, - "grad_norm": 0.20703125, - "learning_rate": 0.00020511562054135354, - "loss": 2.1272, + "epoch": 0.99, + "grad_norm": 0.212890625, + "learning_rate": 0.00011893787906324738, + "loss": 2.1113, "step": 5850 }, { - "epoch": 1.09, - "grad_norm": 0.2119140625, - "learning_rate": 0.0002047919111529234, - "loss": 2.1544, + "epoch": 0.99, + "grad_norm": 0.2080078125, + "learning_rate": 0.0001187927213843622, + "loss": 2.1492, "step": 5855 }, { - "epoch": 1.09, + "epoch": 0.99, "grad_norm": 0.2119140625, - "learning_rate": 0.0002044681892034452, - "loss": 2.135, + "learning_rate": 0.0001186475226465128, + "loss": 2.1852, "step": 5860 }, { - "epoch": 1.09, + "epoch": 0.99, "grad_norm": 0.21484375, - "learning_rate": 0.00020414445554149208, - "loss": 2.1514, + "learning_rate": 0.00011850228316693428, + "loss": 2.1586, "step": 5865 }, { - "epoch": 1.09, + "epoch": 0.99, "grad_norm": 0.2080078125, - "learning_rate": 0.00020382071101566788, - "loss": 2.067, + "learning_rate": 0.00011835700326295067, + "loss": 2.156, "step": 5870 }, { - "epoch": 1.09, - "grad_norm": 0.2197265625, - "learning_rate": 0.00020349695647460485, - "loss": 2.1237, + "epoch": 1.0, + "grad_norm": 0.212890625, + "learning_rate": 0.00011821168325197436, + "loss": 2.1291, "step": 5875 }, { - "epoch": 1.09, - "grad_norm": 0.2119140625, - "learning_rate": 0.00020317319276696161, - "loss": 2.1594, + "epoch": 1.0, + "grad_norm": 0.220703125, + "learning_rate": 0.00011806632345150538, + "loss": 2.1686, "step": 5880 }, { - "epoch": 1.09, - "grad_norm": 0.2119140625, - "learning_rate": 0.00020284942074142066, - "loss": 2.1181, + "epoch": 1.0, + "grad_norm": 0.2099609375, + "learning_rate": 0.00011792092417913063, + "loss": 2.1898, "step": 5885 }, { - "epoch": 1.09, - "grad_norm": 0.208984375, - "learning_rate": 0.0002025256412466864, - "loss": 2.1467, + "epoch": 1.0, + "grad_norm": 0.2109375, + "learning_rate": 0.0001177754857525233, + "loss": 2.1896, "step": 5890 }, { - "epoch": 1.09, - "grad_norm": 0.2158203125, - "learning_rate": 0.00020220185513148277, - "loss": 2.1358, + "epoch": 1.0, + "grad_norm": 0.2021484375, + "learning_rate": 0.00011763000848944212, + "loss": 2.1315, "step": 5895 }, { - "epoch": 1.09, - "grad_norm": 0.21484375, - "learning_rate": 0.00020187806324455104, - "loss": 2.1006, + "epoch": 1.0, + "grad_norm": 0.205078125, + "learning_rate": 0.00011748449270773066, + "loss": 2.1473, "step": 5900 }, { - "epoch": 1.1, - "grad_norm": 0.2177734375, - "learning_rate": 0.00020155426643464773, - "loss": 2.1606, + "epoch": 1.0, + "eval_loss": 2.1470842361450195, + "eval_runtime": 161.6105, + "eval_samples_per_second": 16.441, + "eval_steps_per_second": 2.061, + "step": 5904 + }, + { + "epoch": 1.0, + "grad_norm": 0.2197265625, + "learning_rate": 0.00011733893872531664, + "loss": 2.1194, "step": 5905 }, { - "epoch": 1.1, - "grad_norm": 0.21484375, - "learning_rate": 0.00020123046555054215, - "loss": 2.1072, + "epoch": 1.0, + "grad_norm": 0.208984375, + "learning_rate": 0.00011719334686021129, + "loss": 2.1479, "step": 5910 }, { - "epoch": 1.1, - "grad_norm": 0.2158203125, - "learning_rate": 0.00020090666144101436, - "loss": 2.0934, + "epoch": 1.0, + "grad_norm": 0.2099609375, + "learning_rate": 0.00011704771743050851, + "loss": 2.1207, "step": 5915 }, { - "epoch": 1.1, - "grad_norm": 0.2099609375, - "learning_rate": 0.00020058285495485275, - "loss": 2.086, + "epoch": 1.0, + "grad_norm": 0.2109375, + "learning_rate": 0.00011690205075438438, + "loss": 2.157, "step": 5920 }, { - "epoch": 1.1, - "grad_norm": 0.21484375, - "learning_rate": 0.00020025904694085202, - "loss": 2.1406, + "epoch": 1.0, + "grad_norm": 0.2158203125, + "learning_rate": 0.00011675634715009631, + "loss": 2.1193, "step": 5925 }, { - "epoch": 1.1, - "grad_norm": 0.220703125, - "learning_rate": 0.00019993523824781104, - "loss": 2.1658, + "epoch": 1.0, + "grad_norm": 0.2119140625, + "learning_rate": 0.00011661060693598233, + "loss": 2.1361, "step": 5930 }, { - "epoch": 1.1, - "grad_norm": 0.21484375, - "learning_rate": 0.0001996114297245301, - "loss": 2.0964, + "epoch": 1.01, + "grad_norm": 0.2109375, + "learning_rate": 0.00011646483043046063, + "loss": 2.124, "step": 5935 }, { - "epoch": 1.1, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001992876222198094, - "loss": 2.1084, + "epoch": 1.01, + "grad_norm": 0.208984375, + "learning_rate": 0.00011631901795202849, + "loss": 2.1033, "step": 5940 }, { - "epoch": 1.1, - "grad_norm": 0.2099609375, - "learning_rate": 0.00019896381658244622, - "loss": 2.0951, + "epoch": 1.01, + "grad_norm": 0.212890625, + "learning_rate": 0.0001161731698192619, + "loss": 2.1761, "step": 5945 }, { - "epoch": 1.1, - "grad_norm": 0.21484375, - "learning_rate": 0.00019864001366123307, - "loss": 2.1141, + "epoch": 1.01, + "grad_norm": 0.2177734375, + "learning_rate": 0.0001160272863508147, + "loss": 2.1375, "step": 5950 }, { - "epoch": 1.1, - "grad_norm": 0.2138671875, - "learning_rate": 0.00019831621430495532, - "loss": 2.0948, + "epoch": 1.01, + "grad_norm": 0.220703125, + "learning_rate": 0.00011588136786541802, + "loss": 2.1671, "step": 5955 }, { - "epoch": 1.11, + "epoch": 1.01, "grad_norm": 0.21484375, - "learning_rate": 0.00019799241936238908, - "loss": 2.1407, + "learning_rate": 0.00011573541468187936, + "loss": 2.1324, "step": 5960 }, { - "epoch": 1.11, - "grad_norm": 0.2177734375, - "learning_rate": 0.00019766862968229865, - "loss": 2.1298, + "epoch": 1.01, + "grad_norm": 0.216796875, + "learning_rate": 0.00011558942711908212, + "loss": 2.1454, "step": 5965 }, { - "epoch": 1.11, - "grad_norm": 0.2216796875, - "learning_rate": 0.00019734484611343467, - "loss": 2.1166, + "epoch": 1.01, + "grad_norm": 0.2099609375, + "learning_rate": 0.00011544340549598482, + "loss": 2.1152, "step": 5970 }, { - "epoch": 1.11, - "grad_norm": 0.2119140625, - "learning_rate": 0.00019702106950453193, - "loss": 2.0888, + "epoch": 1.01, + "grad_norm": 0.212890625, + "learning_rate": 0.00011529735013162036, + "loss": 2.1125, "step": 5975 }, { - "epoch": 1.11, - "grad_norm": 0.2158203125, - "learning_rate": 0.00019669730070430663, - "loss": 2.12, + "epoch": 1.01, + "grad_norm": 0.2109375, + "learning_rate": 0.00011515126134509533, + "loss": 2.1649, "step": 5980 }, { - "epoch": 1.11, - "grad_norm": 0.2060546875, - "learning_rate": 0.0001963735405614549, - "loss": 2.1181, + "epoch": 1.01, + "grad_norm": 0.2109375, + "learning_rate": 0.00011500513945558947, + "loss": 2.1339, "step": 5985 }, { - "epoch": 1.11, - "grad_norm": 0.2109375, - "learning_rate": 0.00019604978992464976, - "loss": 2.0892, + "epoch": 1.01, + "grad_norm": 0.22265625, + "learning_rate": 0.00011485898478235464, + "loss": 2.1462, "step": 5990 }, { - "epoch": 1.11, - "grad_norm": 0.21484375, - "learning_rate": 0.00019572604964253972, - "loss": 2.1449, + "epoch": 1.02, + "grad_norm": 0.216796875, + "learning_rate": 0.00011471279764471452, + "loss": 2.1817, "step": 5995 }, { - "epoch": 1.11, - "grad_norm": 0.2109375, - "learning_rate": 0.00019540232056374578, - "loss": 2.0995, + "epoch": 1.02, + "grad_norm": 0.21484375, + "learning_rate": 0.00011456657836206366, + "loss": 2.1261, "step": 6000 }, { - "epoch": 1.11, - "grad_norm": 0.216796875, - "learning_rate": 0.0001950786035368598, - "loss": 2.138, + "epoch": 1.02, + "grad_norm": 0.22265625, + "learning_rate": 0.00011442032725386675, + "loss": 2.1029, "step": 6005 }, { - "epoch": 1.12, - "grad_norm": 0.20703125, - "learning_rate": 0.00019475489941044204, - "loss": 2.1263, + "epoch": 1.02, + "grad_norm": 0.2158203125, + "learning_rate": 0.00011427404463965814, + "loss": 2.1269, "step": 6010 }, { - "epoch": 1.12, - "grad_norm": 0.2060546875, - "learning_rate": 0.00019443120903301871, - "loss": 2.1197, + "epoch": 1.02, + "grad_norm": 0.20703125, + "learning_rate": 0.00011412773083904094, + "loss": 2.116, "step": 6015 }, { - "epoch": 1.12, - "grad_norm": 0.22265625, - "learning_rate": 0.00019410753325308042, - "loss": 2.0856, + "epoch": 1.02, + "grad_norm": 0.216796875, + "learning_rate": 0.00011398138617168642, + "loss": 2.1198, "step": 6020 }, { - "epoch": 1.12, - "grad_norm": 0.2177734375, - "learning_rate": 0.00019378387291907909, - "loss": 2.1015, + "epoch": 1.02, + "grad_norm": 0.21875, + "learning_rate": 0.0001138350109573333, + "loss": 2.1262, "step": 6025 }, { - "epoch": 1.12, - "grad_norm": 0.208984375, - "learning_rate": 0.0001934602288794263, - "loss": 2.0782, + "epoch": 1.02, + "grad_norm": 0.21484375, + "learning_rate": 0.00011368860551578702, + "loss": 2.1268, "step": 6030 }, { - "epoch": 1.12, - "grad_norm": 0.21875, - "learning_rate": 0.00019313660198249107, - "loss": 2.1392, + "epoch": 1.02, + "grad_norm": 0.2138671875, + "learning_rate": 0.00011354217016691905, + "loss": 2.157, "step": 6035 }, { - "epoch": 1.12, + "epoch": 1.02, "grad_norm": 0.212890625, - "learning_rate": 0.00019281299307659713, - "loss": 2.0854, + "learning_rate": 0.0001133957052306663, + "loss": 2.1132, "step": 6040 }, { - "epoch": 1.12, - "grad_norm": 0.2099609375, - "learning_rate": 0.00019248940301002155, - "loss": 2.1192, + "epoch": 1.02, + "grad_norm": 0.2158203125, + "learning_rate": 0.00011324921102703015, + "loss": 2.1324, "step": 6045 }, { - "epoch": 1.12, - "grad_norm": 0.21484375, - "learning_rate": 0.00019216583263099147, - "loss": 2.1065, + "epoch": 1.02, + "grad_norm": 0.2099609375, + "learning_rate": 0.00011310268787607603, + "loss": 2.1372, "step": 6050 }, { - "epoch": 1.12, - "grad_norm": 0.2197265625, - "learning_rate": 0.0001918422827876829, - "loss": 2.0768, + "epoch": 1.03, + "grad_norm": 0.224609375, + "learning_rate": 0.00011295613609793267, + "loss": 2.1227, "step": 6055 }, { - "epoch": 1.12, - "grad_norm": 0.2080078125, - "learning_rate": 0.00019151875432821773, - "loss": 2.0851, + "epoch": 1.03, + "grad_norm": 0.2119140625, + "learning_rate": 0.00011280955601279127, + "loss": 2.1311, "step": 6060 }, { - "epoch": 1.13, - "grad_norm": 0.2138671875, - "learning_rate": 0.00019119524810066175, - "loss": 2.1027, + "epoch": 1.03, + "grad_norm": 0.2109375, + "learning_rate": 0.0001126629479409048, + "loss": 2.1219, "step": 6065 }, { - "epoch": 1.13, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001908717649530228, - "loss": 2.1008, + "epoch": 1.03, + "grad_norm": 0.216796875, + "learning_rate": 0.00011251631220258753, + "loss": 2.0692, "step": 6070 }, { - "epoch": 1.13, - "grad_norm": 0.220703125, - "learning_rate": 0.0001905483057332479, - "loss": 2.0943, + "epoch": 1.03, + "grad_norm": 0.2177734375, + "learning_rate": 0.00011236964911821413, + "loss": 2.1236, "step": 6075 }, { - "epoch": 1.13, - "grad_norm": 0.2138671875, - "learning_rate": 0.00019022487128922148, - "loss": 2.1203, + "epoch": 1.03, + "grad_norm": 0.2119140625, + "learning_rate": 0.00011222295900821896, + "loss": 2.1425, "step": 6080 }, { - "epoch": 1.13, - "grad_norm": 0.2177734375, - "learning_rate": 0.0001899014624687631, - "loss": 2.1122, + "epoch": 1.03, + "grad_norm": 0.21484375, + "learning_rate": 0.00011207624219309544, + "loss": 2.1312, "step": 6085 }, { - "epoch": 1.13, - "grad_norm": 0.2099609375, - "learning_rate": 0.00018957808011962486, - "loss": 2.1437, + "epoch": 1.03, + "grad_norm": 0.2275390625, + "learning_rate": 0.00011192949899339544, + "loss": 2.1528, "step": 6090 }, { - "epoch": 1.13, - "grad_norm": 0.2099609375, - "learning_rate": 0.00018925472508948992, - "loss": 2.073, + "epoch": 1.03, + "grad_norm": 0.2119140625, + "learning_rate": 0.00011178272972972833, + "loss": 2.1495, "step": 6095 }, { - "epoch": 1.13, - "grad_norm": 0.2099609375, - "learning_rate": 0.00018893139822596938, - "loss": 2.1426, + "epoch": 1.03, + "grad_norm": 0.212890625, + "learning_rate": 0.00011163593472276048, + "loss": 2.1504, "step": 6100 }, { - "epoch": 1.13, - "grad_norm": 0.21484375, - "learning_rate": 0.00018860810037660085, - "loss": 2.0974, + "epoch": 1.03, + "grad_norm": 0.2099609375, + "learning_rate": 0.00011148911429321456, + "loss": 2.0733, "step": 6105 }, { - "epoch": 1.13, - "grad_norm": 0.2109375, - "learning_rate": 0.00018828483238884564, - "loss": 2.1141, + "epoch": 1.03, + "grad_norm": 0.21875, + "learning_rate": 0.00011134226876186871, + "loss": 2.0977, "step": 6110 }, { - "epoch": 1.13, - "grad_norm": 0.212890625, - "learning_rate": 0.00018796159511008702, - "loss": 2.1222, + "epoch": 1.04, + "grad_norm": 0.216796875, + "learning_rate": 0.00011119539844955595, + "loss": 2.1138, "step": 6115 }, { - "epoch": 1.14, - "grad_norm": 0.2138671875, - "learning_rate": 0.00018763838938762756, - "loss": 2.1244, + "epoch": 1.04, + "grad_norm": 0.2177734375, + "learning_rate": 0.00011104850367716344, + "loss": 2.1027, "step": 6120 }, { - "epoch": 1.14, - "grad_norm": 0.2177734375, - "learning_rate": 0.0001873152160686871, - "loss": 2.141, + "epoch": 1.04, + "grad_norm": 0.216796875, + "learning_rate": 0.00011090158476563175, + "loss": 2.1559, "step": 6125 }, { - "epoch": 1.14, - "grad_norm": 0.216796875, - "learning_rate": 0.00018699207600040077, - "loss": 2.0931, + "epoch": 1.04, + "grad_norm": 0.212890625, + "learning_rate": 0.00011075464203595427, + "loss": 2.1822, "step": 6130 }, { - "epoch": 1.14, - "grad_norm": 0.2099609375, - "learning_rate": 0.00018666897002981626, - "loss": 2.104, + "epoch": 1.04, + "grad_norm": 0.2177734375, + "learning_rate": 0.00011060767580917634, + "loss": 2.1362, "step": 6135 }, { - "epoch": 1.14, - "grad_norm": 0.212890625, - "learning_rate": 0.00018634589900389217, - "loss": 2.0929, + "epoch": 1.04, + "grad_norm": 0.2138671875, + "learning_rate": 0.00011046068640639464, + "loss": 2.1073, "step": 6140 }, { - "epoch": 1.14, - "grad_norm": 0.2119140625, - "learning_rate": 0.00018602286376949515, - "loss": 2.08, + "epoch": 1.04, + "grad_norm": 0.22265625, + "learning_rate": 0.00011031367414875658, + "loss": 2.1463, "step": 6145 }, { - "epoch": 1.14, - "grad_norm": 0.2255859375, - "learning_rate": 0.00018569986517339844, - "loss": 2.0757, + "epoch": 1.04, + "grad_norm": 0.224609375, + "learning_rate": 0.0001101666393574594, + "loss": 2.1327, "step": 6150 }, { - "epoch": 1.14, - "grad_norm": 0.205078125, - "learning_rate": 0.00018537690406227888, - "loss": 2.1296, + "epoch": 1.04, + "grad_norm": 0.2119140625, + "learning_rate": 0.00011001958235374963, + "loss": 2.1137, "step": 6155 }, { - "epoch": 1.14, - "grad_norm": 0.2177734375, - "learning_rate": 0.00018505398128271515, - "loss": 2.113, + "epoch": 1.04, + "grad_norm": 0.216796875, + "learning_rate": 0.0001098725034589223, + "loss": 2.1158, "step": 6160 }, { - "epoch": 1.14, - "grad_norm": 0.21484375, - "learning_rate": 0.0001847310976811856, - "loss": 2.1245, + "epoch": 1.04, + "grad_norm": 0.2197265625, + "learning_rate": 0.00010972540299432033, + "loss": 2.1705, "step": 6165 }, { - "epoch": 1.14, - "grad_norm": 0.21484375, - "learning_rate": 0.00018440825410406575, - "loss": 2.1231, + "epoch": 1.04, + "grad_norm": 0.228515625, + "learning_rate": 0.00010957828128133363, + "loss": 2.1357, "step": 6170 }, { - "epoch": 1.15, - "grad_norm": 0.2109375, - "learning_rate": 0.00018408545139762627, - "loss": 2.1119, + "epoch": 1.05, + "grad_norm": 0.2158203125, + "learning_rate": 0.00010943113864139868, + "loss": 2.1597, "step": 6175 }, { - "epoch": 1.15, - "grad_norm": 0.2119140625, - "learning_rate": 0.00018376269040803057, - "loss": 2.1047, + "epoch": 1.05, + "grad_norm": 0.2265625, + "learning_rate": 0.00010928397539599766, + "loss": 2.1408, "step": 6180 }, { - "epoch": 1.15, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001834399719813328, - "loss": 2.0886, + "epoch": 1.05, + "grad_norm": 0.220703125, + "learning_rate": 0.00010913679186665766, + "loss": 2.1515, "step": 6185 }, { - "epoch": 1.15, - "grad_norm": 0.208984375, - "learning_rate": 0.00018311729696347562, - "loss": 2.1106, + "epoch": 1.05, + "grad_norm": 0.224609375, + "learning_rate": 0.00010898958837495021, + "loss": 2.1577, "step": 6190 }, { - "epoch": 1.15, - "grad_norm": 0.21484375, - "learning_rate": 0.0001827946662002877, - "loss": 2.0957, + "epoch": 1.05, + "grad_norm": 0.224609375, + "learning_rate": 0.00010884236524249039, + "loss": 2.1414, "step": 6195 }, { - "epoch": 1.15, - "grad_norm": 0.212890625, - "learning_rate": 0.000182472080537482, - "loss": 2.1069, + "epoch": 1.05, + "grad_norm": 0.21875, + "learning_rate": 0.0001086951227909362, + "loss": 2.0958, "step": 6200 }, { - "epoch": 1.15, + "epoch": 1.05, "grad_norm": 0.216796875, - "learning_rate": 0.00018214954082065282, - "loss": 2.1129, + "learning_rate": 0.00010854786134198786, + "loss": 2.0813, "step": 6205 }, { - "epoch": 1.15, + "epoch": 1.05, "grad_norm": 0.2197265625, - "learning_rate": 0.00018182704789527452, - "loss": 2.147, + "learning_rate": 0.00010840058121738712, + "loss": 2.107, "step": 6210 }, { - "epoch": 1.15, - "grad_norm": 0.2138671875, - "learning_rate": 0.00018150460260669846, - "loss": 2.1113, + "epoch": 1.05, + "grad_norm": 0.2177734375, + "learning_rate": 0.00010825328273891646, + "loss": 2.1572, "step": 6215 }, { - "epoch": 1.15, - "grad_norm": 0.21484375, - "learning_rate": 0.0001811822058001512, - "loss": 2.0983, + "epoch": 1.05, + "grad_norm": 0.220703125, + "learning_rate": 0.00010810596622839854, + "loss": 2.1621, "step": 6220 }, { - "epoch": 1.15, - "grad_norm": 0.2158203125, - "learning_rate": 0.00018085985832073237, - "loss": 2.1165, + "epoch": 1.05, + "grad_norm": 0.2197265625, + "learning_rate": 0.00010795863200769538, + "loss": 2.1263, "step": 6225 }, { - "epoch": 1.16, - "grad_norm": 0.212890625, - "learning_rate": 0.00018053756101341206, - "loss": 2.0929, + "epoch": 1.06, + "grad_norm": 0.216796875, + "learning_rate": 0.00010781128039870769, + "loss": 2.113, "step": 6230 }, { - "epoch": 1.16, - "grad_norm": 0.2236328125, - "learning_rate": 0.000180215314723029, - "loss": 2.1439, + "epoch": 1.06, + "grad_norm": 0.216796875, + "learning_rate": 0.0001076639117233742, + "loss": 2.1485, "step": 6235 }, { - "epoch": 1.16, - "grad_norm": 0.2158203125, - "learning_rate": 0.0001798931202942882, - "loss": 2.1208, + "epoch": 1.06, + "grad_norm": 0.2255859375, + "learning_rate": 0.00010751652630367086, + "loss": 2.0961, "step": 6240 }, { - "epoch": 1.16, - "grad_norm": 0.2080078125, - "learning_rate": 0.0001795709785717586, - "loss": 2.1192, + "epoch": 1.06, + "grad_norm": 0.21875, + "learning_rate": 0.0001073691244616103, + "loss": 2.1367, "step": 6245 }, { - "epoch": 1.16, - "grad_norm": 0.2109375, - "learning_rate": 0.00017924889039987117, - "loss": 2.1322, + "epoch": 1.06, + "grad_norm": 0.23828125, + "learning_rate": 0.00010722170651924091, + "loss": 2.1195, "step": 6250 }, { - "epoch": 1.16, - "grad_norm": 0.216796875, - "learning_rate": 0.00017892685662291622, - "loss": 2.1014, + "epoch": 1.06, + "grad_norm": 0.2294921875, + "learning_rate": 0.00010707427279864637, + "loss": 2.1521, "step": 6255 }, { - "epoch": 1.16, - "grad_norm": 0.2109375, - "learning_rate": 0.0001786048780850418, - "loss": 2.1299, + "epoch": 1.06, + "grad_norm": 0.21875, + "learning_rate": 0.00010692682362194481, + "loss": 2.1207, "step": 6260 }, { - "epoch": 1.16, - "grad_norm": 0.2119140625, - "learning_rate": 0.00017828295563025091, - "loss": 2.0805, + "epoch": 1.06, + "grad_norm": 0.2197265625, + "learning_rate": 0.00010677935931128807, + "loss": 2.1476, "step": 6265 }, { - "epoch": 1.16, - "grad_norm": 0.220703125, - "learning_rate": 0.00017796109010239977, - "loss": 2.1155, + "epoch": 1.06, + "grad_norm": 0.21875, + "learning_rate": 0.0001066318801888611, + "loss": 2.0966, "step": 6270 }, { - "epoch": 1.16, - "grad_norm": 0.2119140625, - "learning_rate": 0.00017763928234519518, - "loss": 2.1166, + "epoch": 1.06, + "grad_norm": 0.224609375, + "learning_rate": 0.00010648438657688123, + "loss": 2.1013, "step": 6275 }, { - "epoch": 1.17, - "grad_norm": 0.2197265625, - "learning_rate": 0.0001773175332021925, - "loss": 2.1122, + "epoch": 1.06, + "grad_norm": 0.2216796875, + "learning_rate": 0.00010633687879759738, + "loss": 2.1487, "step": 6280 }, { - "epoch": 1.17, - "grad_norm": 0.21484375, - "learning_rate": 0.00017699584351679363, - "loss": 2.1151, + "epoch": 1.06, + "grad_norm": 0.2197265625, + "learning_rate": 0.00010618935717328944, + "loss": 2.1477, "step": 6285 }, { - "epoch": 1.17, - "grad_norm": 0.21484375, - "learning_rate": 0.0001766742141322444, - "loss": 2.1076, + "epoch": 1.07, + "grad_norm": 0.2236328125, + "learning_rate": 0.00010604182202626765, + "loss": 2.1778, "step": 6290 }, { - "epoch": 1.17, - "grad_norm": 0.216796875, - "learning_rate": 0.00017635264589163275, - "loss": 2.1442, + "epoch": 1.07, + "grad_norm": 0.234375, + "learning_rate": 0.0001058942736788717, + "loss": 2.1494, "step": 6295 }, { - "epoch": 1.17, - "grad_norm": 0.2158203125, - "learning_rate": 0.0001760311396378863, - "loss": 2.1386, + "epoch": 1.07, + "grad_norm": 0.2138671875, + "learning_rate": 0.00010574671245347005, + "loss": 2.1321, "step": 6300 }, { - "epoch": 1.17, + "epoch": 1.07, "grad_norm": 0.216796875, - "learning_rate": 0.00017570969621377003, - "loss": 2.1099, + "learning_rate": 0.00010559913867245952, + "loss": 2.1529, "step": 6305 }, { - "epoch": 1.17, - "grad_norm": 0.216796875, - "learning_rate": 0.00017538831646188443, - "loss": 2.1427, - "step": 6310 + "epoch": 1.07, + "grad_norm": 0.21875, + "learning_rate": 0.00010545155265826414, + "loss": 2.1089, + "step": 6310 }, { - "epoch": 1.17, - "grad_norm": 0.2177734375, - "learning_rate": 0.00017506700122466297, - "loss": 2.1288, + "epoch": 1.07, + "grad_norm": 0.22265625, + "learning_rate": 0.00010530395473333477, + "loss": 2.1105, "step": 6315 }, { - "epoch": 1.17, - "grad_norm": 0.220703125, - "learning_rate": 0.00017474575134437007, - "loss": 2.1197, + "epoch": 1.07, + "grad_norm": 0.2197265625, + "learning_rate": 0.00010515634522014828, + "loss": 2.0971, "step": 6320 }, { - "epoch": 1.17, - "grad_norm": 0.220703125, - "learning_rate": 0.00017442456766309877, - "loss": 2.0971, + "epoch": 1.07, + "grad_norm": 0.2236328125, + "learning_rate": 0.00010500872444120686, + "loss": 2.1279, "step": 6325 }, { - "epoch": 1.17, - "grad_norm": 0.216796875, - "learning_rate": 0.00017410345102276872, - "loss": 2.1008, + "epoch": 1.07, + "grad_norm": 0.234375, + "learning_rate": 0.0001048610927190373, + "loss": 2.1178, "step": 6330 }, { - "epoch": 1.18, - "grad_norm": 0.212890625, - "learning_rate": 0.0001737824022651236, - "loss": 2.102, + "epoch": 1.07, + "grad_norm": 0.2197265625, + "learning_rate": 0.00010471345037619032, + "loss": 2.1238, "step": 6335 }, { - "epoch": 1.18, - "grad_norm": 0.2197265625, - "learning_rate": 0.00017346142223172926, - "loss": 2.1308, + "epoch": 1.07, + "grad_norm": 0.2236328125, + "learning_rate": 0.0001045657977352398, + "loss": 2.1127, "step": 6340 }, { - "epoch": 1.18, - "grad_norm": 0.212890625, - "learning_rate": 0.0001731405117639715, - "loss": 2.1104, + "epoch": 1.07, + "grad_norm": 0.2177734375, + "learning_rate": 0.0001044181351187822, + "loss": 2.1348, "step": 6345 }, { - "epoch": 1.18, - "grad_norm": 0.2138671875, - "learning_rate": 0.0001728196717030536, - "loss": 2.0971, + "epoch": 1.08, + "grad_norm": 0.224609375, + "learning_rate": 0.00010427046284943572, + "loss": 2.1502, "step": 6350 }, { - "epoch": 1.18, - "grad_norm": 0.21875, - "learning_rate": 0.00017249890288999453, - "loss": 2.0961, + "epoch": 1.08, + "grad_norm": 0.2255859375, + "learning_rate": 0.0001041227812498396, + "loss": 2.1288, "step": 6355 }, { - "epoch": 1.18, + "epoch": 1.08, "grad_norm": 0.216796875, - "learning_rate": 0.00017217820616562615, - "loss": 2.0973, + "learning_rate": 0.00010397509064265359, + "loss": 2.1578, "step": 6360 }, { - "epoch": 1.18, - "grad_norm": 0.2158203125, - "learning_rate": 0.00017185758237059172, - "loss": 2.1169, + "epoch": 1.08, + "grad_norm": 0.2265625, + "learning_rate": 0.00010382739135055703, + "loss": 2.1656, "step": 6365 }, { - "epoch": 1.18, - "grad_norm": 0.2119140625, - "learning_rate": 0.000171537032345343, - "loss": 2.1253, + "epoch": 1.08, + "grad_norm": 0.220703125, + "learning_rate": 0.00010367968369624825, + "loss": 2.1261, "step": 6370 }, { - "epoch": 1.18, - "grad_norm": 0.2119140625, - "learning_rate": 0.00017121655693013856, - "loss": 2.1127, + "epoch": 1.08, + "grad_norm": 0.21875, + "learning_rate": 0.00010353196800244382, + "loss": 2.1418, "step": 6375 }, { - "epoch": 1.18, - "grad_norm": 0.2138671875, - "learning_rate": 0.0001708961569650414, - "loss": 2.0914, + "epoch": 1.08, + "grad_norm": 0.21875, + "learning_rate": 0.00010338424459187801, + "loss": 2.1163, "step": 6380 }, { - "epoch": 1.18, - "grad_norm": 0.2158203125, - "learning_rate": 0.00017057583328991668, - "loss": 2.0782, + "epoch": 1.08, + "grad_norm": 0.220703125, + "learning_rate": 0.00010323651378730179, + "loss": 2.114, "step": 6385 }, { - "epoch": 1.19, - "grad_norm": 0.212890625, - "learning_rate": 0.00017025558674442972, - "loss": 2.1294, + "epoch": 1.08, + "grad_norm": 0.2265625, + "learning_rate": 0.0001030887759114823, + "loss": 2.0651, "step": 6390 }, { - "epoch": 1.19, - "grad_norm": 0.2119140625, - "learning_rate": 0.00016993541816804334, - "loss": 2.1139, + "epoch": 1.08, + "grad_norm": 0.21484375, + "learning_rate": 0.00010294103128720227, + "loss": 2.1278, "step": 6395 }, { - "epoch": 1.19, - "grad_norm": 0.216796875, - "learning_rate": 0.0001696153284000163, - "loss": 2.0751, + "epoch": 1.08, + "grad_norm": 0.224609375, + "learning_rate": 0.00010279328023725905, + "loss": 2.1356, "step": 6400 }, { - "epoch": 1.19, - "grad_norm": 0.2177734375, - "learning_rate": 0.00016929531827940066, - "loss": 2.11, + "epoch": 1.08, + "grad_norm": 0.2197265625, + "learning_rate": 0.00010264552308446403, + "loss": 2.1141, "step": 6405 }, { - "epoch": 1.19, - "grad_norm": 0.21875, - "learning_rate": 0.00016897538864503968, - "loss": 2.1733, + "epoch": 1.09, + "grad_norm": 0.220703125, + "learning_rate": 0.00010249776015164197, + "loss": 2.0926, "step": 6410 }, { - "epoch": 1.19, - "grad_norm": 0.2158203125, - "learning_rate": 0.00016865554033556574, - "loss": 2.1227, + "epoch": 1.09, + "grad_norm": 0.224609375, + "learning_rate": 0.00010234999176163026, + "loss": 2.1441, "step": 6415 }, { - "epoch": 1.19, - "grad_norm": 0.2138671875, - "learning_rate": 0.00016833577418939785, - "loss": 2.1076, + "epoch": 1.09, + "grad_norm": 0.224609375, + "learning_rate": 0.00010220221823727822, + "loss": 2.15, "step": 6420 }, { - "epoch": 1.19, - "grad_norm": 0.2197265625, - "learning_rate": 0.00016801609104473986, - "loss": 2.113, + "epoch": 1.09, + "grad_norm": 0.2177734375, + "learning_rate": 0.00010205443990144636, + "loss": 2.126, "step": 6425 }, { - "epoch": 1.19, - "grad_norm": 0.22265625, - "learning_rate": 0.0001676964917395779, - "loss": 2.1067, + "epoch": 1.09, + "grad_norm": 0.2265625, + "learning_rate": 0.0001019066570770057, + "loss": 2.1257, "step": 6430 }, { - "epoch": 1.19, - "grad_norm": 0.2158203125, - "learning_rate": 0.00016737697711167836, - "loss": 2.094, + "epoch": 1.09, + "grad_norm": 0.228515625, + "learning_rate": 0.00010175887008683712, + "loss": 2.1286, "step": 6435 }, { - "epoch": 1.19, - "grad_norm": 0.2158203125, - "learning_rate": 0.00016705754799858585, - "loss": 2.0662, + "epoch": 1.09, + "grad_norm": 0.2255859375, + "learning_rate": 0.00010161107925383054, + "loss": 2.1173, "step": 6440 }, { - "epoch": 1.2, - "grad_norm": 0.220703125, - "learning_rate": 0.0001667382052376204, - "loss": 2.1005, + "epoch": 1.09, + "grad_norm": 0.21875, + "learning_rate": 0.00010146328490088428, + "loss": 2.1478, "step": 6445 }, { - "epoch": 1.2, - "grad_norm": 0.212890625, - "learning_rate": 0.00016641894966587618, - "loss": 2.1408, + "epoch": 1.09, + "grad_norm": 0.2216796875, + "learning_rate": 0.00010131548735090437, + "loss": 2.134, "step": 6450 }, { - "epoch": 1.2, - "grad_norm": 0.2158203125, - "learning_rate": 0.00016609978212021843, - "loss": 2.104, + "epoch": 1.09, + "grad_norm": 0.22265625, + "learning_rate": 0.00010116768692680387, + "loss": 2.1342, "step": 6455 }, { - "epoch": 1.2, - "grad_norm": 0.21484375, - "learning_rate": 0.00016578070343728181, - "loss": 2.1289, + "epoch": 1.09, + "grad_norm": 0.2294921875, + "learning_rate": 0.00010101988395150203, + "loss": 2.1318, "step": 6460 }, { - "epoch": 1.2, - "grad_norm": 0.2119140625, - "learning_rate": 0.00016546171445346811, - "loss": 2.0986, + "epoch": 1.09, + "grad_norm": 0.2216796875, + "learning_rate": 0.00010087207874792374, + "loss": 2.1647, "step": 6465 }, { - "epoch": 1.2, - "grad_norm": 0.2158203125, - "learning_rate": 0.00016514281600494378, - "loss": 2.1056, + "epoch": 1.1, + "grad_norm": 0.224609375, + "learning_rate": 0.00010072427163899874, + "loss": 2.1257, "step": 6470 }, { - "epoch": 1.2, - "grad_norm": 0.2177734375, - "learning_rate": 0.0001648240089276382, - "loss": 2.1461, + "epoch": 1.1, + "grad_norm": 0.2158203125, + "learning_rate": 0.0001005764629476609, + "loss": 2.1106, "step": 6475 }, { - "epoch": 1.2, - "grad_norm": 0.2216796875, - "learning_rate": 0.00016450529405724097, - "loss": 2.0915, + "epoch": 1.1, + "grad_norm": 0.224609375, + "learning_rate": 0.0001004286529968476, + "loss": 2.1002, "step": 6480 }, { - "epoch": 1.2, - "grad_norm": 0.2138671875, - "learning_rate": 0.00016418667222920029, - "loss": 2.086, + "epoch": 1.1, + "grad_norm": 0.22265625, + "learning_rate": 0.00010028084210949895, + "loss": 2.1074, "step": 6485 }, { - "epoch": 1.2, - "grad_norm": 0.220703125, - "learning_rate": 0.00016386814427872025, - "loss": 2.1414, + "epoch": 1.1, + "grad_norm": 0.2294921875, + "learning_rate": 0.00010013303060855708, + "loss": 2.0886, "step": 6490 }, { - "epoch": 1.21, - "grad_norm": 0.224609375, - "learning_rate": 0.00016354971104075888, - "loss": 2.128, + "epoch": 1.1, + "grad_norm": 0.2255859375, + "learning_rate": 9.998521881696551e-05, + "loss": 2.0777, "step": 6495 }, { - "epoch": 1.21, - "grad_norm": 0.2138671875, - "learning_rate": 0.000163231373350026, - "loss": 2.1093, + "epoch": 1.1, + "grad_norm": 0.2216796875, + "learning_rate": 9.98374070576684e-05, + "loss": 2.1192, "step": 6500 }, { - "epoch": 1.21, - "grad_norm": 0.212890625, - "learning_rate": 0.00016291313204098092, - "loss": 2.1172, + "epoch": 1.1, + "grad_norm": 0.2080078125, + "learning_rate": 9.968959565360973e-05, + "loss": 2.103, "step": 6505 }, { - "epoch": 1.21, - "grad_norm": 0.2177734375, - "learning_rate": 0.00016259498794783043, - "loss": 2.1398, + "epoch": 1.1, + "grad_norm": 0.2138671875, + "learning_rate": 9.954178492773278e-05, + "loss": 2.1614, "step": 6510 }, { - "epoch": 1.21, + "epoch": 1.1, "grad_norm": 0.2197265625, - "learning_rate": 0.00016227694190452626, - "loss": 2.1342, + "learning_rate": 9.939397520297949e-05, + "loss": 2.1397, "step": 6515 }, { - "epoch": 1.21, - "grad_norm": 0.2158203125, - "learning_rate": 0.00016195899474476345, - "loss": 2.1023, + "epoch": 1.1, + "grad_norm": 0.2216796875, + "learning_rate": 9.924616680228933e-05, + "loss": 2.0756, "step": 6520 }, { - "epoch": 1.21, - "grad_norm": 0.220703125, - "learning_rate": 0.00016164114730197744, - "loss": 2.0859, + "epoch": 1.11, + "grad_norm": 0.2216796875, + "learning_rate": 9.909836004859908e-05, + "loss": 2.1093, "step": 6525 }, { - "epoch": 1.21, - "grad_norm": 0.2138671875, - "learning_rate": 0.00016132340040934254, - "loss": 2.1153, + "epoch": 1.11, + "grad_norm": 0.21875, + "learning_rate": 9.895055526484184e-05, + "loss": 2.1218, "step": 6530 }, { - "epoch": 1.21, - "grad_norm": 0.21875, - "learning_rate": 0.00016100575489976947, - "loss": 2.1159, + "epoch": 1.11, + "grad_norm": 0.224609375, + "learning_rate": 9.880275277394644e-05, + "loss": 2.1829, "step": 6535 }, { - "epoch": 1.21, - "grad_norm": 0.2119140625, - "learning_rate": 0.00016068821160590308, - "loss": 2.08, - "step": 6540 + "epoch": 1.11, + "grad_norm": 0.2177734375, + "learning_rate": 9.865495289883672e-05, + "loss": 2.1078, + "step": 6540 }, { - "epoch": 1.21, - "grad_norm": 0.2119140625, - "learning_rate": 0.00016037077136012054, - "loss": 2.1129, + "epoch": 1.11, + "grad_norm": 0.2294921875, + "learning_rate": 9.850715596243073e-05, + "loss": 2.1234, "step": 6545 }, { - "epoch": 1.22, - "grad_norm": 0.224609375, - "learning_rate": 0.0001600534349945285, - "loss": 2.1458, + "epoch": 1.11, + "grad_norm": 0.220703125, + "learning_rate": 9.835936228764014e-05, + "loss": 2.0701, "step": 6550 }, { - "epoch": 1.22, - "grad_norm": 0.2197265625, - "learning_rate": 0.00015973620334096159, - "loss": 2.1361, + "epoch": 1.11, + "grad_norm": 0.2216796875, + "learning_rate": 9.821157219736955e-05, + "loss": 2.1111, "step": 6555 }, { - "epoch": 1.22, - "grad_norm": 0.2138671875, - "learning_rate": 0.00015941907723097994, - "loss": 2.1387, + "epoch": 1.11, + "grad_norm": 0.2197265625, + "learning_rate": 9.806378601451563e-05, + "loss": 2.1091, "step": 6560 }, { - "epoch": 1.22, - "grad_norm": 0.220703125, - "learning_rate": 0.0001591020574958669, - "loss": 2.1254, + "epoch": 1.11, + "grad_norm": 0.22265625, + "learning_rate": 9.791600406196656e-05, + "loss": 2.1229, "step": 6565 }, { - "epoch": 1.22, - "grad_norm": 0.21875, - "learning_rate": 0.00015878514496662715, - "loss": 2.1272, + "epoch": 1.11, + "grad_norm": 0.216796875, + "learning_rate": 9.776822666260133e-05, + "loss": 2.1289, "step": 6570 }, { - "epoch": 1.22, - "grad_norm": 0.216796875, - "learning_rate": 0.0001584683404739841, - "loss": 2.1448, + "epoch": 1.11, + "grad_norm": 0.2177734375, + "learning_rate": 9.762045413928884e-05, + "loss": 2.0959, "step": 6575 }, { - "epoch": 1.22, - "grad_norm": 0.2255859375, - "learning_rate": 0.00015815164484837832, - "loss": 2.1431, + "epoch": 1.11, + "grad_norm": 0.216796875, + "learning_rate": 9.747268681488749e-05, + "loss": 2.1405, "step": 6580 }, { - "epoch": 1.22, - "grad_norm": 0.2177734375, - "learning_rate": 0.00015783505891996466, - "loss": 2.1139, + "epoch": 1.12, + "grad_norm": 0.2197265625, + "learning_rate": 9.732492501224426e-05, + "loss": 2.1203, "step": 6585 }, { - "epoch": 1.22, - "grad_norm": 0.2138671875, - "learning_rate": 0.00015751858351861054, - "loss": 2.1558, + "epoch": 1.12, + "grad_norm": 0.2265625, + "learning_rate": 9.717716905419403e-05, + "loss": 2.1509, "step": 6590 }, { - "epoch": 1.22, - "grad_norm": 0.21875, - "learning_rate": 0.0001572022194738938, - "loss": 2.1249, + "epoch": 1.12, + "grad_norm": 0.22265625, + "learning_rate": 9.702941926355897e-05, + "loss": 2.1252, "step": 6595 }, { - "epoch": 1.22, - "grad_norm": 0.2138671875, - "learning_rate": 0.00015688596761510005, - "loss": 2.1071, + "epoch": 1.12, + "grad_norm": 0.2158203125, + "learning_rate": 9.688167596314772e-05, + "loss": 2.1211, "step": 6600 }, { - "epoch": 1.23, + "epoch": 1.12, "grad_norm": 0.21875, - "learning_rate": 0.00015656982877122134, - "loss": 2.0894, + "learning_rate": 9.673393947575477e-05, + "loss": 2.1291, "step": 6605 }, { - "epoch": 1.23, - "grad_norm": 0.212890625, - "learning_rate": 0.0001562538037709529, - "loss": 2.1351, + "epoch": 1.12, + "grad_norm": 0.2216796875, + "learning_rate": 9.658621012415974e-05, + "loss": 2.1686, "step": 6610 }, { - "epoch": 1.23, - "grad_norm": 0.2138671875, - "learning_rate": 0.00015593789344269188, - "loss": 2.1364, + "epoch": 1.12, + "grad_norm": 0.2197265625, + "learning_rate": 9.643848823112664e-05, + "loss": 2.1454, "step": 6615 }, { - "epoch": 1.23, - "grad_norm": 0.2197265625, - "learning_rate": 0.00015562209861453487, - "loss": 2.078, + "epoch": 1.12, + "grad_norm": 0.21875, + "learning_rate": 9.629077411940318e-05, + "loss": 2.1243, "step": 6620 }, { - "epoch": 1.23, - "grad_norm": 0.212890625, - "learning_rate": 0.00015530642011427542, - "loss": 2.1231, + "epoch": 1.12, + "grad_norm": 0.2177734375, + "learning_rate": 9.614306811172009e-05, + "loss": 2.1075, "step": 6625 }, { - "epoch": 1.23, - "grad_norm": 0.21484375, - "learning_rate": 0.00015499085876940255, - "loss": 2.1232, + "epoch": 1.12, + "grad_norm": 0.2177734375, + "learning_rate": 9.599537053079037e-05, + "loss": 2.1105, "step": 6630 }, { - "epoch": 1.23, - "grad_norm": 0.21875, - "learning_rate": 0.00015467541540709772, - "loss": 2.1206, + "epoch": 1.12, + "grad_norm": 0.224609375, + "learning_rate": 9.58476816993086e-05, + "loss": 2.1203, "step": 6635 }, { - "epoch": 1.23, - "grad_norm": 0.216796875, - "learning_rate": 0.00015436009085423354, - "loss": 2.0971, + "epoch": 1.12, + "grad_norm": 0.2255859375, + "learning_rate": 9.570000193995028e-05, + "loss": 2.1075, "step": 6640 }, { - "epoch": 1.23, - "grad_norm": 0.220703125, - "learning_rate": 0.000154044885937371, - "loss": 2.0972, + "epoch": 1.13, + "grad_norm": 0.228515625, + "learning_rate": 9.555233157537109e-05, + "loss": 2.1306, "step": 6645 }, { - "epoch": 1.23, - "grad_norm": 0.2177734375, - "learning_rate": 0.0001537298014827573, - "loss": 2.1167, + "epoch": 1.13, + "grad_norm": 0.21484375, + "learning_rate": 9.540467092820614e-05, + "loss": 2.1238, "step": 6650 }, { - "epoch": 1.23, - "grad_norm": 0.2099609375, - "learning_rate": 0.00015341483831632434, - "loss": 2.0823, + "epoch": 1.13, + "grad_norm": 0.2255859375, + "learning_rate": 9.525702032106933e-05, + "loss": 2.1468, "step": 6655 }, { - "epoch": 1.24, - "grad_norm": 0.2177734375, - "learning_rate": 0.00015309999726368555, - "loss": 2.138, + "epoch": 1.13, + "grad_norm": 0.2236328125, + "learning_rate": 9.510938007655264e-05, + "loss": 2.1477, "step": 6660 }, { - "epoch": 1.24, - "grad_norm": 0.2197265625, - "learning_rate": 0.0001527852791501347, - "loss": 2.1009, + "epoch": 1.13, + "grad_norm": 0.22265625, + "learning_rate": 9.496175051722542e-05, + "loss": 2.1205, "step": 6665 }, { - "epoch": 1.24, - "grad_norm": 0.2197265625, - "learning_rate": 0.00015247068480064307, - "loss": 2.0907, + "epoch": 1.13, + "grad_norm": 0.2158203125, + "learning_rate": 9.481413196563362e-05, + "loss": 2.1107, "step": 6670 }, { - "epoch": 1.24, - "grad_norm": 0.21875, - "learning_rate": 0.00015215621503985758, - "loss": 2.0969, + "epoch": 1.13, + "grad_norm": 0.216796875, + "learning_rate": 9.466652474429915e-05, + "loss": 2.1116, "step": 6675 }, { - "epoch": 1.24, - "grad_norm": 0.2158203125, - "learning_rate": 0.00015184187069209858, - "loss": 2.1056, + "epoch": 1.13, + "grad_norm": 0.2236328125, + "learning_rate": 9.451892917571927e-05, + "loss": 2.1433, "step": 6680 }, { - "epoch": 1.24, - "grad_norm": 0.21484375, - "learning_rate": 0.00015152765258135754, - "loss": 2.0957, + "epoch": 1.13, + "grad_norm": 0.2158203125, + "learning_rate": 9.437134558236562e-05, + "loss": 2.1305, "step": 6685 }, { - "epoch": 1.24, - "grad_norm": 0.2216796875, - "learning_rate": 0.00015121356153129526, - "loss": 2.1561, + "epoch": 1.13, + "grad_norm": 0.2333984375, + "learning_rate": 9.42237742866838e-05, + "loss": 2.141, "step": 6690 }, { - "epoch": 1.24, - "grad_norm": 0.2158203125, - "learning_rate": 0.00015089959836523927, - "loss": 2.1067, + "epoch": 1.13, + "grad_norm": 0.22265625, + "learning_rate": 9.407621561109251e-05, + "loss": 2.0987, "step": 6695 }, { - "epoch": 1.24, - "grad_norm": 0.212890625, - "learning_rate": 0.00015058576390618205, - "loss": 2.1244, + "epoch": 1.13, + "grad_norm": 0.22265625, + "learning_rate": 9.392866987798277e-05, + "loss": 2.1598, "step": 6700 }, { - "epoch": 1.24, - "grad_norm": 0.21484375, - "learning_rate": 0.00015027205897677855, - "loss": 2.1364, + "epoch": 1.14, + "grad_norm": 0.21875, + "learning_rate": 9.378113740971754e-05, + "loss": 2.1487, "step": 6705 }, { - "epoch": 1.24, - "grad_norm": 0.2138671875, - "learning_rate": 0.00014995848439934418, - "loss": 2.0978, + "epoch": 1.14, + "grad_norm": 0.2158203125, + "learning_rate": 9.363361852863058e-05, + "loss": 2.1104, "step": 6710 }, { - "epoch": 1.25, - "grad_norm": 0.21484375, - "learning_rate": 0.00014964504099585283, - "loss": 2.1216, + "epoch": 1.14, + "grad_norm": 0.2216796875, + "learning_rate": 9.348611355702608e-05, + "loss": 2.1171, "step": 6715 }, { - "epoch": 1.25, - "grad_norm": 0.21484375, - "learning_rate": 0.00014933172958793436, - "loss": 2.0968, + "epoch": 1.14, + "grad_norm": 0.2177734375, + "learning_rate": 9.333862281717788e-05, + "loss": 2.1482, "step": 6720 }, { - "epoch": 1.25, - "grad_norm": 0.2119140625, - "learning_rate": 0.00014901855099687275, - "loss": 2.0871, + "epoch": 1.14, + "grad_norm": 0.21484375, + "learning_rate": 9.31911466313286e-05, + "loss": 2.0912, "step": 6725 }, { - "epoch": 1.25, - "grad_norm": 0.22265625, - "learning_rate": 0.00014870550604360373, - "loss": 2.1265, + "epoch": 1.14, + "grad_norm": 0.2158203125, + "learning_rate": 9.304368532168912e-05, + "loss": 2.0972, "step": 6730 }, { - "epoch": 1.25, - "grad_norm": 0.2138671875, - "learning_rate": 0.0001483925955487129, - "loss": 2.0967, + "epoch": 1.14, + "grad_norm": 0.2236328125, + "learning_rate": 9.28962392104379e-05, + "loss": 2.1025, "step": 6735 }, { - "epoch": 1.25, - "grad_norm": 0.220703125, - "learning_rate": 0.00014807982033243313, - "loss": 2.1202, + "epoch": 1.14, + "grad_norm": 0.224609375, + "learning_rate": 9.274880861972005e-05, + "loss": 2.0854, "step": 6740 }, { - "epoch": 1.25, - "grad_norm": 0.2099609375, - "learning_rate": 0.00014776718121464283, - "loss": 2.0886, + "epoch": 1.14, + "grad_norm": 0.224609375, + "learning_rate": 9.260139387164684e-05, + "loss": 2.1208, "step": 6745 }, { - "epoch": 1.25, - "grad_norm": 0.224609375, - "learning_rate": 0.00014745467901486377, - "loss": 2.112, + "epoch": 1.14, + "grad_norm": 0.22265625, + "learning_rate": 9.245399528829501e-05, + "loss": 2.1269, "step": 6750 }, { - "epoch": 1.25, - "grad_norm": 0.2138671875, - "learning_rate": 0.00014714231455225862, - "loss": 2.1163, + "epoch": 1.14, + "grad_norm": 0.21875, + "learning_rate": 9.230661319170578e-05, + "loss": 2.0986, "step": 6755 }, { - "epoch": 1.25, + "epoch": 1.14, "grad_norm": 0.2236328125, - "learning_rate": 0.00014683008864562917, - "loss": 2.1304, + "learning_rate": 9.215924790388451e-05, + "loss": 2.1067, "step": 6760 }, { - "epoch": 1.26, - "grad_norm": 0.212890625, - "learning_rate": 0.00014651800211341385, - "loss": 2.0945, + "epoch": 1.15, + "grad_norm": 0.2275390625, + "learning_rate": 9.201189974679986e-05, + "loss": 2.1029, "step": 6765 }, { - "epoch": 1.26, - "grad_norm": 0.216796875, - "learning_rate": 0.0001462060557736858, - "loss": 2.1094, - "step": 6770 - }, + "epoch": 1.15, + "grad_norm": 0.2197265625, + "learning_rate": 9.186456904238292e-05, + "loss": 2.1548, + "step": 6770 + }, { - "epoch": 1.26, - "grad_norm": 0.21875, - "learning_rate": 0.00014589425044415075, - "loss": 2.0669, + "epoch": 1.15, + "grad_norm": 0.220703125, + "learning_rate": 9.171725611252676e-05, + "loss": 2.1147, "step": 6775 }, { - "epoch": 1.26, - "grad_norm": 0.224609375, - "learning_rate": 0.0001455825869421447, - "loss": 2.1211, + "epoch": 1.15, + "grad_norm": 0.2236328125, + "learning_rate": 9.156996127908555e-05, + "loss": 2.1242, "step": 6780 }, { - "epoch": 1.26, - "grad_norm": 0.2138671875, - "learning_rate": 0.00014527106608463206, - "loss": 2.1007, + "epoch": 1.15, + "grad_norm": 0.23046875, + "learning_rate": 9.142268486387398e-05, + "loss": 2.0846, "step": 6785 }, { - "epoch": 1.26, - "grad_norm": 0.22265625, - "learning_rate": 0.000144959688688203, - "loss": 2.1065, + "epoch": 1.15, + "grad_norm": 0.23828125, + "learning_rate": 9.127542718866646e-05, + "loss": 2.1363, "step": 6790 }, { - "epoch": 1.26, - "grad_norm": 0.216796875, - "learning_rate": 0.00014464845556907196, - "loss": 2.089, + "epoch": 1.15, + "grad_norm": 0.220703125, + "learning_rate": 9.112818857519647e-05, + "loss": 2.1028, "step": 6795 }, { - "epoch": 1.26, - "grad_norm": 0.2138671875, - "learning_rate": 0.000144337367543075, - "loss": 2.14, + "epoch": 1.15, + "grad_norm": 0.21875, + "learning_rate": 9.098096934515583e-05, + "loss": 2.1668, "step": 6800 }, { - "epoch": 1.26, - "grad_norm": 0.2197265625, - "learning_rate": 0.00014402642542566782, - "loss": 2.1439, + "epoch": 1.15, + "grad_norm": 0.2236328125, + "learning_rate": 9.083376982019406e-05, + "loss": 2.1371, "step": 6805 }, { - "epoch": 1.26, - "grad_norm": 0.20703125, - "learning_rate": 0.00014371563003192392, - "loss": 2.0581, + "epoch": 1.15, + "grad_norm": 0.2255859375, + "learning_rate": 9.068659032191753e-05, + "loss": 2.1092, "step": 6810 }, { - "epoch": 1.26, + "epoch": 1.15, "grad_norm": 0.21875, - "learning_rate": 0.0001434049821765318, - "loss": 2.1186, + "learning_rate": 9.053943117188896e-05, + "loss": 2.1803, "step": 6815 }, { - "epoch": 1.27, - "grad_norm": 0.2197265625, - "learning_rate": 0.00014309448267379353, - "loss": 2.1526, + "epoch": 1.16, + "grad_norm": 0.2255859375, + "learning_rate": 9.039229269162656e-05, + "loss": 2.1319, "step": 6820 }, { - "epoch": 1.27, - "grad_norm": 0.2138671875, - "learning_rate": 0.00014278413233762214, - "loss": 2.071, + "epoch": 1.16, + "grad_norm": 0.2236328125, + "learning_rate": 9.024517520260339e-05, + "loss": 2.1312, "step": 6825 }, { - "epoch": 1.27, - "grad_norm": 0.21484375, - "learning_rate": 0.00014247393198153974, - "loss": 2.1107, + "epoch": 1.16, + "grad_norm": 0.2255859375, + "learning_rate": 9.009807902624662e-05, + "loss": 2.1224, "step": 6830 }, { - "epoch": 1.27, - "grad_norm": 0.2177734375, - "learning_rate": 0.0001421638824186753, - "loss": 2.1339, + "epoch": 1.16, + "grad_norm": 0.2197265625, + "learning_rate": 8.99510044839369e-05, + "loss": 2.1172, "step": 6835 }, { - "epoch": 1.27, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001418539844617623, - "loss": 2.1236, + "epoch": 1.16, + "grad_norm": 0.21484375, + "learning_rate": 8.980395189700758e-05, + "loss": 2.1406, "step": 6840 }, { - "epoch": 1.27, + "epoch": 1.16, "grad_norm": 0.22265625, - "learning_rate": 0.00014154423892313712, - "loss": 2.1086, + "learning_rate": 8.965692158674408e-05, + "loss": 2.1704, "step": 6845 }, { - "epoch": 1.27, + "epoch": 1.16, "grad_norm": 0.2216796875, - "learning_rate": 0.00014123464661473646, - "loss": 2.0773, + "learning_rate": 8.950991387438308e-05, + "loss": 2.0968, "step": 6850 }, { - "epoch": 1.27, - "grad_norm": 0.220703125, - "learning_rate": 0.00014092520834809534, - "loss": 2.1404, + "epoch": 1.16, + "grad_norm": 0.25, + "learning_rate": 8.936292908111197e-05, + "loss": 2.1551, "step": 6855 }, { - "epoch": 1.27, - "grad_norm": 0.2177734375, - "learning_rate": 0.0001406159249343451, - "loss": 2.0654, + "epoch": 1.16, + "grad_norm": 0.2236328125, + "learning_rate": 8.921596752806802e-05, + "loss": 2.134, "step": 6860 }, { - "epoch": 1.27, - "grad_norm": 0.21875, - "learning_rate": 0.0001403067971842109, - "loss": 2.0919, + "epoch": 1.16, + "grad_norm": 0.2236328125, + "learning_rate": 8.906902953633771e-05, + "loss": 2.1215, "step": 6865 }, { - "epoch": 1.27, - "grad_norm": 0.228515625, - "learning_rate": 0.00013999782590801022, - "loss": 2.0763, + "epoch": 1.16, + "grad_norm": 0.216796875, + "learning_rate": 8.892211542695607e-05, + "loss": 2.1057, "step": 6870 }, { - "epoch": 1.28, - "grad_norm": 0.2109375, - "learning_rate": 0.0001396890119156501, - "loss": 2.109, + "epoch": 1.16, + "grad_norm": 0.2265625, + "learning_rate": 8.877522552090598e-05, + "loss": 2.14, "step": 6875 }, { - "epoch": 1.28, - "grad_norm": 0.2294921875, - "learning_rate": 0.00013938035601662545, - "loss": 2.117, + "epoch": 1.17, + "grad_norm": 0.2177734375, + "learning_rate": 8.862836013911735e-05, + "loss": 2.0927, "step": 6880 }, { - "epoch": 1.28, - "grad_norm": 0.2275390625, - "learning_rate": 0.00013907185902001663, - "loss": 2.1378, + "epoch": 1.17, + "grad_norm": 0.2255859375, + "learning_rate": 8.848151960246663e-05, + "loss": 2.1415, "step": 6885 }, { - "epoch": 1.28, - "grad_norm": 0.216796875, - "learning_rate": 0.00013876352173448764, - "loss": 2.1208, + "epoch": 1.17, + "grad_norm": 0.21875, + "learning_rate": 8.833470423177578e-05, + "loss": 2.1684, "step": 6890 }, { - "epoch": 1.28, - "grad_norm": 0.208984375, - "learning_rate": 0.00013845534496828368, - "loss": 2.1245, + "epoch": 1.17, + "grad_norm": 0.21875, + "learning_rate": 8.818791434781208e-05, + "loss": 2.1264, "step": 6895 }, { - "epoch": 1.28, - "grad_norm": 0.2177734375, - "learning_rate": 0.00013814732952922918, - "loss": 2.1393, + "epoch": 1.17, + "grad_norm": 0.220703125, + "learning_rate": 8.804115027128692e-05, + "loss": 2.0953, "step": 6900 }, { - "epoch": 1.28, - "grad_norm": 0.22265625, - "learning_rate": 0.00013783947622472587, - "loss": 2.1493, + "epoch": 1.17, + "grad_norm": 0.234375, + "learning_rate": 8.789441232285524e-05, + "loss": 2.1234, "step": 6905 }, { - "epoch": 1.28, - "grad_norm": 0.21875, - "learning_rate": 0.0001375317858617502, - "loss": 2.1436, + "epoch": 1.17, + "grad_norm": 0.2236328125, + "learning_rate": 8.774770082311512e-05, + "loss": 2.1046, "step": 6910 }, { - "epoch": 1.28, - "grad_norm": 0.2158203125, - "learning_rate": 0.0001372242592468518, - "loss": 2.1092, + "epoch": 1.17, + "grad_norm": 0.2265625, + "learning_rate": 8.760101609260673e-05, + "loss": 2.1038, "step": 6915 }, { - "epoch": 1.28, + "epoch": 1.17, "grad_norm": 0.21875, - "learning_rate": 0.00013691689718615079, - "loss": 2.1213, + "learning_rate": 8.745435845181168e-05, + "loss": 2.1039, "step": 6920 }, { - "epoch": 1.28, - "grad_norm": 0.2119140625, - "learning_rate": 0.00013660970048533607, - "loss": 2.0764, + "epoch": 1.17, + "grad_norm": 0.2236328125, + "learning_rate": 8.730772822115252e-05, + "loss": 2.1259, "step": 6925 }, { - "epoch": 1.29, - "grad_norm": 0.2119140625, - "learning_rate": 0.00013630266994966314, - "loss": 2.124, + "epoch": 1.17, + "grad_norm": 0.216796875, + "learning_rate": 8.716112572099193e-05, + "loss": 2.0748, "step": 6930 }, { - "epoch": 1.29, - "grad_norm": 0.2197265625, - "learning_rate": 0.0001359958063839518, - "loss": 2.1211, + "epoch": 1.17, + "grad_norm": 0.2275390625, + "learning_rate": 8.701455127163181e-05, + "loss": 2.1662, "step": 6935 }, { - "epoch": 1.29, + "epoch": 1.18, "grad_norm": 0.224609375, - "learning_rate": 0.00013568911059258436, - "loss": 2.0699, + "learning_rate": 8.686800519331298e-05, + "loss": 2.0985, "step": 6940 }, { - "epoch": 1.29, - "grad_norm": 0.2138671875, - "learning_rate": 0.00013538258337950302, - "loss": 2.0943, + "epoch": 1.18, + "grad_norm": 0.2265625, + "learning_rate": 8.672148780621423e-05, + "loss": 2.1478, "step": 6945 }, { - "epoch": 1.29, - "grad_norm": 0.2177734375, - "learning_rate": 0.0001350762255482085, - "loss": 2.1063, + "epoch": 1.18, + "grad_norm": 0.22265625, + "learning_rate": 8.657499943045153e-05, + "loss": 2.1141, "step": 6950 }, { - "epoch": 1.29, - "grad_norm": 0.21875, - "learning_rate": 0.0001347700379017572, - "loss": 2.1212, + "epoch": 1.18, + "grad_norm": 0.224609375, + "learning_rate": 8.642854038607769e-05, + "loss": 2.1271, "step": 6955 }, { - "epoch": 1.29, - "grad_norm": 0.2138671875, - "learning_rate": 0.00013446402124275947, - "loss": 2.0913, + "epoch": 1.18, + "grad_norm": 0.2197265625, + "learning_rate": 8.628211099308119e-05, + "loss": 2.1009, "step": 6960 }, { - "epoch": 1.29, - "grad_norm": 0.224609375, - "learning_rate": 0.00013415817637337768, - "loss": 2.1277, + "epoch": 1.18, + "grad_norm": 0.216796875, + "learning_rate": 8.61357115713859e-05, + "loss": 2.1326, "step": 6965 }, { - "epoch": 1.29, - "grad_norm": 0.224609375, - "learning_rate": 0.00013385250409532343, - "loss": 2.1266, + "epoch": 1.18, + "grad_norm": 0.2236328125, + "learning_rate": 8.598934244085022e-05, + "loss": 2.1111, "step": 6970 }, { - "epoch": 1.29, + "epoch": 1.18, "grad_norm": 0.22265625, - "learning_rate": 0.0001335470052098565, - "loss": 2.0951, + "learning_rate": 8.584300392126621e-05, + "loss": 2.1095, "step": 6975 }, { - "epoch": 1.29, - "grad_norm": 0.2177734375, - "learning_rate": 0.00013324168051778162, - "loss": 2.1073, + "epoch": 1.18, + "grad_norm": 0.2236328125, + "learning_rate": 8.569669633235917e-05, + "loss": 2.1472, "step": 6980 }, { - "epoch": 1.3, - "grad_norm": 0.2138671875, - "learning_rate": 0.00013293653081944716, - "loss": 2.1286, + "epoch": 1.18, + "grad_norm": 0.216796875, + "learning_rate": 8.555041999378687e-05, + "loss": 2.1558, "step": 6985 }, { - "epoch": 1.3, - "grad_norm": 0.21484375, - "learning_rate": 0.00013263155691474286, - "loss": 2.0843, + "epoch": 1.18, + "grad_norm": 0.2216796875, + "learning_rate": 8.540417522513864e-05, + "loss": 2.0741, "step": 6990 }, { - "epoch": 1.3, - "grad_norm": 0.21875, - "learning_rate": 0.0001323267596030973, - "loss": 2.1228, + "epoch": 1.18, + "grad_norm": 0.220703125, + "learning_rate": 8.525796234593493e-05, + "loss": 2.1253, "step": 6995 }, { - "epoch": 1.3, - "grad_norm": 0.2216796875, - "learning_rate": 0.0001320221396834767, - "loss": 2.126, + "epoch": 1.19, + "grad_norm": 0.22265625, + "learning_rate": 8.511178167562662e-05, + "loss": 2.1103, "step": 7000 }, { - "epoch": 1.3, + "epoch": 1.19, "grad_norm": 0.2177734375, - "learning_rate": 0.0001317176979543817, - "loss": 2.0865, + "learning_rate": 8.496563353359398e-05, + "loss": 2.1059, "step": 7005 }, { - "epoch": 1.3, - "grad_norm": 0.21875, - "learning_rate": 0.00013141343521384634, - "loss": 2.1284, + "epoch": 1.19, + "grad_norm": 0.220703125, + "learning_rate": 8.481951823914642e-05, + "loss": 2.0873, "step": 7010 }, { - "epoch": 1.3, + "epoch": 1.19, "grad_norm": 0.228515625, - "learning_rate": 0.00013110935225943516, - "loss": 2.0905, + "learning_rate": 8.467343611152147e-05, + "loss": 2.1031, "step": 7015 }, { - "epoch": 1.3, - "grad_norm": 0.21484375, - "learning_rate": 0.0001308054498882414, - "loss": 2.0852, + "epoch": 1.19, + "grad_norm": 0.310546875, + "learning_rate": 8.452738746988425e-05, + "loss": 2.1256, "step": 7020 }, { - "epoch": 1.3, - "grad_norm": 0.212890625, - "learning_rate": 0.00013050172889688536, - "loss": 2.1067, + "epoch": 1.19, + "grad_norm": 0.21484375, + "learning_rate": 8.43813726333267e-05, + "loss": 2.0955, "step": 7025 }, { - "epoch": 1.3, - "grad_norm": 0.21484375, - "learning_rate": 0.00013019819008151128, - "loss": 2.0757, + "epoch": 1.19, + "grad_norm": 0.224609375, + "learning_rate": 8.42353919208669e-05, + "loss": 2.1246, "step": 7030 }, { - "epoch": 1.31, - "grad_norm": 0.2138671875, - "learning_rate": 0.0001298948342377864, - "loss": 2.1301, + "epoch": 1.19, + "grad_norm": 0.2275390625, + "learning_rate": 8.408944565144838e-05, + "loss": 2.1745, "step": 7035 }, { - "epoch": 1.31, - "grad_norm": 0.2177734375, - "learning_rate": 0.00012959166216089797, - "loss": 2.0895, + "epoch": 1.19, + "grad_norm": 0.220703125, + "learning_rate": 8.394353414393943e-05, + "loss": 2.1093, "step": 7040 }, { - "epoch": 1.31, - "grad_norm": 0.21484375, - "learning_rate": 0.00012928867464555177, - "loss": 2.0816, + "epoch": 1.19, + "grad_norm": 0.234375, + "learning_rate": 8.379765771713233e-05, + "loss": 2.1581, "step": 7045 }, { - "epoch": 1.31, - "grad_norm": 0.216796875, - "learning_rate": 0.0001289858724859697, - "loss": 2.1101, + "epoch": 1.19, + "grad_norm": 0.21875, + "learning_rate": 8.365181668974279e-05, + "loss": 2.0691, "step": 7050 }, { - "epoch": 1.31, - "grad_norm": 0.21875, - "learning_rate": 0.0001286832564758876, - "loss": 2.1058, + "epoch": 1.19, + "grad_norm": 0.216796875, + "learning_rate": 8.350601138040917e-05, + "loss": 2.1291, "step": 7055 }, { - "epoch": 1.31, - "grad_norm": 0.2177734375, - "learning_rate": 0.00012838082740855371, - "loss": 2.1263, + "epoch": 1.2, + "grad_norm": 0.21875, + "learning_rate": 8.336024210769172e-05, + "loss": 2.1567, "step": 7060 }, { - "epoch": 1.31, - "grad_norm": 0.212890625, - "learning_rate": 0.00012807858607672597, - "loss": 2.0942, + "epoch": 1.2, + "grad_norm": 0.22265625, + "learning_rate": 8.321450919007207e-05, + "loss": 2.147, "step": 7065 }, { - "epoch": 1.31, - "grad_norm": 0.2216796875, - "learning_rate": 0.00012777653327267037, - "loss": 2.1127, + "epoch": 1.2, + "grad_norm": 0.2236328125, + "learning_rate": 8.30688129459523e-05, + "loss": 2.1118, "step": 7070 }, { - "epoch": 1.31, - "grad_norm": 0.220703125, - "learning_rate": 0.00012747466978815862, - "loss": 2.0978, + "epoch": 1.2, + "grad_norm": 0.2216796875, + "learning_rate": 8.292315369365442e-05, + "loss": 2.0944, "step": 7075 }, { - "epoch": 1.31, - "grad_norm": 0.2197265625, - "learning_rate": 0.00012717299641446612, - "loss": 2.1021, + "epoch": 1.2, + "grad_norm": 0.2333984375, + "learning_rate": 8.27775317514197e-05, + "loss": 2.0953, "step": 7080 }, { - "epoch": 1.31, - "grad_norm": 0.21484375, - "learning_rate": 0.00012687151394237005, - "loss": 2.1147, + "epoch": 1.2, + "grad_norm": 0.228515625, + "learning_rate": 8.263194743740769e-05, + "loss": 2.1687, "step": 7085 }, { - "epoch": 1.32, - "grad_norm": 0.2119140625, - "learning_rate": 0.00012657022316214705, - "loss": 2.0918, + "epoch": 1.2, + "grad_norm": 0.220703125, + "learning_rate": 8.248640106969595e-05, + "loss": 2.1272, "step": 7090 }, { - "epoch": 1.32, - "grad_norm": 0.216796875, - "learning_rate": 0.00012626912486357147, - "loss": 2.1089, + "epoch": 1.2, + "grad_norm": 0.2216796875, + "learning_rate": 8.234089296627903e-05, + "loss": 2.1588, "step": 7095 }, { - "epoch": 1.32, - "grad_norm": 0.216796875, - "learning_rate": 0.00012596821983591289, - "loss": 2.1268, + "epoch": 1.2, + "grad_norm": 0.2265625, + "learning_rate": 8.219542344506784e-05, + "loss": 2.1068, "step": 7100 }, { - "epoch": 1.32, - "grad_norm": 0.21875, - "learning_rate": 0.0001256675088679345, - "loss": 2.1026, + "epoch": 1.2, + "grad_norm": 0.2275390625, + "learning_rate": 8.204999282388903e-05, + "loss": 2.1186, "step": 7105 }, { - "epoch": 1.32, - "grad_norm": 0.2177734375, - "learning_rate": 0.00012536699274789059, - "loss": 2.0627, + "epoch": 1.2, + "grad_norm": 0.2236328125, + "learning_rate": 8.190460142048434e-05, + "loss": 2.0982, "step": 7110 }, { - "epoch": 1.32, - "grad_norm": 0.2255859375, - "learning_rate": 0.0001250666722635247, - "loss": 2.0675, + "epoch": 1.21, + "grad_norm": 0.224609375, + "learning_rate": 8.175924955250971e-05, + "loss": 2.09, "step": 7115 }, { - "epoch": 1.32, - "grad_norm": 0.2197265625, - "learning_rate": 0.00012476654820206773, - "loss": 2.1035, + "epoch": 1.21, + "grad_norm": 0.2216796875, + "learning_rate": 8.161393753753474e-05, + "loss": 2.1447, "step": 7120 }, { - "epoch": 1.32, - "grad_norm": 0.224609375, - "learning_rate": 0.0001244666213502355, - "loss": 2.1104, + "epoch": 1.21, + "grad_norm": 0.22265625, + "learning_rate": 8.146866569304199e-05, + "loss": 2.0919, "step": 7125 }, { - "epoch": 1.32, - "grad_norm": 0.21875, - "learning_rate": 0.00012416689249422714, - "loss": 2.0844, + "epoch": 1.21, + "grad_norm": 0.2236328125, + "learning_rate": 8.13234343364262e-05, + "loss": 2.1315, "step": 7130 }, { - "epoch": 1.32, - "grad_norm": 0.22265625, - "learning_rate": 0.00012386736241972246, - "loss": 2.1405, + "epoch": 1.21, + "grad_norm": 0.2255859375, + "learning_rate": 8.117824378499374e-05, + "loss": 2.1157, "step": 7135 }, { - "epoch": 1.32, - "grad_norm": 0.2109375, - "learning_rate": 0.00012356803191188034, - "loss": 2.1525, + "epoch": 1.21, + "grad_norm": 0.22265625, + "learning_rate": 8.103309435596165e-05, + "loss": 2.1379, "step": 7140 }, { - "epoch": 1.33, - "grad_norm": 0.216796875, - "learning_rate": 0.0001232689017553366, - "loss": 2.1443, + "epoch": 1.21, + "grad_norm": 0.2265625, + "learning_rate": 8.088798636645733e-05, + "loss": 2.1274, "step": 7145 }, { - "epoch": 1.33, - "grad_norm": 0.216796875, - "learning_rate": 0.00012296997273420184, - "loss": 2.1317, + "epoch": 1.21, + "grad_norm": 0.228515625, + "learning_rate": 8.074292013351759e-05, + "loss": 2.1492, "step": 7150 }, { - "epoch": 1.33, - "grad_norm": 0.224609375, - "learning_rate": 0.00012267124563205946, - "loss": 2.0839, + "epoch": 1.21, + "grad_norm": 0.22265625, + "learning_rate": 8.059789597408785e-05, + "loss": 2.1494, "step": 7155 }, { - "epoch": 1.33, - "grad_norm": 0.2177734375, - "learning_rate": 0.0001223727212319633, - "loss": 2.0955, + "epoch": 1.21, + "grad_norm": 0.22265625, + "learning_rate": 8.045291420502182e-05, + "loss": 2.1487, "step": 7160 }, { - "epoch": 1.33, - "grad_norm": 0.2236328125, - "learning_rate": 0.00012207440031643637, - "loss": 2.1137, + "epoch": 1.21, + "grad_norm": 0.220703125, + "learning_rate": 8.030797514308052e-05, + "loss": 2.1566, "step": 7165 }, { - "epoch": 1.33, - "grad_norm": 0.2294921875, - "learning_rate": 0.0001217762836674678, - "loss": 2.0972, + "epoch": 1.21, + "grad_norm": 0.2197265625, + "learning_rate": 8.016307910493153e-05, + "loss": 2.0946, "step": 7170 }, { - "epoch": 1.33, - "grad_norm": 0.21875, - "learning_rate": 0.00012147837206651148, - "loss": 2.1198, + "epoch": 1.22, + "grad_norm": 0.2216796875, + "learning_rate": 8.001822640714865e-05, + "loss": 2.11, "step": 7175 }, { - "epoch": 1.33, - "grad_norm": 0.2197265625, - "learning_rate": 0.00012118066629448388, - "loss": 2.0673, + "epoch": 1.22, + "grad_norm": 0.22265625, + "learning_rate": 7.987341736621089e-05, + "loss": 2.1462, "step": 7180 }, { - "epoch": 1.33, - "grad_norm": 0.2177734375, - "learning_rate": 0.00012088316713176166, - "loss": 2.1093, + "epoch": 1.22, + "grad_norm": 0.2255859375, + "learning_rate": 7.972865229850176e-05, + "loss": 2.0978, "step": 7185 }, { - "epoch": 1.33, - "grad_norm": 0.21484375, - "learning_rate": 0.00012058587535818036, - "loss": 2.0892, + "epoch": 1.22, + "grad_norm": 0.2177734375, + "learning_rate": 7.958393152030894e-05, + "loss": 2.1292, "step": 7190 }, { - "epoch": 1.33, - "grad_norm": 0.21875, - "learning_rate": 0.00012028879175303137, - "loss": 2.1102, + "epoch": 1.22, + "grad_norm": 0.2294921875, + "learning_rate": 7.943925534782311e-05, + "loss": 2.1581, "step": 7195 }, { - "epoch": 1.34, - "grad_norm": 0.220703125, - "learning_rate": 0.00011999191709506072, - "loss": 2.1176, + "epoch": 1.22, + "grad_norm": 0.22265625, + "learning_rate": 7.929462409713762e-05, + "loss": 2.1376, "step": 7200 }, { - "epoch": 1.34, - "grad_norm": 0.21875, - "learning_rate": 0.00011969525216246673, - "loss": 2.0999, + "epoch": 1.22, + "grad_norm": 0.2265625, + "learning_rate": 7.915003808424771e-05, + "loss": 2.1427, "step": 7205 }, { - "epoch": 1.34, - "grad_norm": 0.212890625, - "learning_rate": 0.00011939879773289768, - "loss": 2.0787, + "epoch": 1.22, + "grad_norm": 0.2197265625, + "learning_rate": 7.900549762504963e-05, + "loss": 2.1218, "step": 7210 }, { - "epoch": 1.34, - "grad_norm": 0.2265625, - "learning_rate": 0.00011910255458345055, - "loss": 2.1507, + "epoch": 1.22, + "grad_norm": 0.2275390625, + "learning_rate": 7.886100303534022e-05, + "loss": 2.1444, "step": 7215 }, { - "epoch": 1.34, - "grad_norm": 0.2216796875, - "learning_rate": 0.00011880652349066798, - "loss": 2.1248, + "epoch": 1.22, + "grad_norm": 0.2255859375, + "learning_rate": 7.871655463081615e-05, + "loss": 2.1039, "step": 7220 }, { - "epoch": 1.34, - "grad_norm": 0.2236328125, - "learning_rate": 0.00011851070523053707, - "loss": 2.0911, + "epoch": 1.22, + "grad_norm": 0.224609375, + "learning_rate": 7.8572152727073e-05, + "loss": 2.0897, "step": 7225 }, { - "epoch": 1.34, - "grad_norm": 0.2138671875, - "learning_rate": 0.00011821510057848695, - "loss": 2.1235, + "epoch": 1.22, + "grad_norm": 0.2265625, + "learning_rate": 7.842779763960493e-05, + "loss": 2.0614, "step": 7230 }, { - "epoch": 1.34, - "grad_norm": 0.2138671875, - "learning_rate": 0.00011791971030938662, - "loss": 2.1035, + "epoch": 1.23, + "grad_norm": 0.2177734375, + "learning_rate": 7.828348968380374e-05, + "loss": 2.1025, "step": 7235 }, { - "epoch": 1.34, - "grad_norm": 0.21875, - "learning_rate": 0.00011762453519754357, - "loss": 2.1228, + "epoch": 1.23, + "grad_norm": 0.2138671875, + "learning_rate": 7.813922917495824e-05, + "loss": 2.1359, "step": 7240 }, { - "epoch": 1.34, - "grad_norm": 0.212890625, - "learning_rate": 0.00011732957601670076, - "loss": 2.0994, + "epoch": 1.23, + "grad_norm": 0.228515625, + "learning_rate": 7.799501642825364e-05, + "loss": 2.1166, "step": 7245 }, { - "epoch": 1.35, - "grad_norm": 0.2197265625, - "learning_rate": 0.00011703483354003553, - "loss": 2.1336, + "epoch": 1.23, + "grad_norm": 0.234375, + "learning_rate": 7.785085175877071e-05, + "loss": 2.1249, "step": 7250 }, { - "epoch": 1.35, - "grad_norm": 0.21875, - "learning_rate": 0.00011674030854015696, - "loss": 2.108, + "epoch": 1.23, + "grad_norm": 0.22265625, + "learning_rate": 7.770673548148524e-05, + "loss": 2.1482, "step": 7255 }, { - "epoch": 1.35, - "grad_norm": 0.220703125, - "learning_rate": 0.00011644600178910421, - "loss": 2.1178, + "epoch": 1.23, + "grad_norm": 0.228515625, + "learning_rate": 7.756266791126731e-05, + "loss": 2.1217, "step": 7260 }, { - "epoch": 1.35, - "grad_norm": 0.21875, - "learning_rate": 0.0001161519140583442, - "loss": 2.1162, + "epoch": 1.23, + "grad_norm": 0.23046875, + "learning_rate": 7.74186493628805e-05, + "loss": 2.0725, "step": 7265 }, { - "epoch": 1.35, - "grad_norm": 0.2119140625, - "learning_rate": 0.0001158580461187698, - "loss": 2.0816, + "epoch": 1.23, + "grad_norm": 0.220703125, + "learning_rate": 7.727468015098135e-05, + "loss": 2.1105, "step": 7270 }, { - "epoch": 1.35, - "grad_norm": 0.21484375, - "learning_rate": 0.00011556439874069773, - "loss": 2.1076, + "epoch": 1.23, + "grad_norm": 0.2265625, + "learning_rate": 7.713076059011864e-05, + "loss": 2.0842, "step": 7275 }, { - "epoch": 1.35, - "grad_norm": 0.216796875, - "learning_rate": 0.00011527097269386655, - "loss": 2.1465, + "epoch": 1.23, + "grad_norm": 0.2275390625, + "learning_rate": 7.698689099473254e-05, + "loss": 2.1156, "step": 7280 }, { - "epoch": 1.35, - "grad_norm": 0.2216796875, - "learning_rate": 0.00011497776874743471, - "loss": 2.1064, + "epoch": 1.23, + "grad_norm": 0.228515625, + "learning_rate": 7.68430716791542e-05, + "loss": 2.171, "step": 7285 }, { - "epoch": 1.35, - "grad_norm": 0.2158203125, - "learning_rate": 0.00011468478766997832, - "loss": 2.0849, + "epoch": 1.23, + "grad_norm": 0.2314453125, + "learning_rate": 7.669930295760486e-05, + "loss": 2.1235, "step": 7290 }, { - "epoch": 1.35, - "grad_norm": 0.208984375, - "learning_rate": 0.00011439203022948935, - "loss": 2.1352, + "epoch": 1.24, + "grad_norm": 0.2265625, + "learning_rate": 7.655558514419518e-05, + "loss": 2.1518, "step": 7295 }, { - "epoch": 1.35, - "grad_norm": 0.2177734375, - "learning_rate": 0.00011409949719337376, - "loss": 2.1146, + "epoch": 1.24, + "grad_norm": 0.2265625, + "learning_rate": 7.641191855292464e-05, + "loss": 2.0936, "step": 7300 }, { - "epoch": 1.36, - "grad_norm": 0.220703125, - "learning_rate": 0.00011380718932844882, - "loss": 2.1264, + "epoch": 1.24, + "grad_norm": 0.23046875, + "learning_rate": 7.626830349768084e-05, + "loss": 2.1468, "step": 7305 }, { - "epoch": 1.36, - "grad_norm": 0.216796875, - "learning_rate": 0.00011351510740094205, - "loss": 2.1164, + "epoch": 1.24, + "grad_norm": 0.2236328125, + "learning_rate": 7.612474029223866e-05, + "loss": 2.1352, "step": 7310 }, { - "epoch": 1.36, - "grad_norm": 0.220703125, - "learning_rate": 0.00011322325217648839, - "loss": 2.104, + "epoch": 1.24, + "grad_norm": 0.2314453125, + "learning_rate": 7.598122925025985e-05, + "loss": 2.1398, "step": 7315 }, { - "epoch": 1.36, - "grad_norm": 0.2138671875, - "learning_rate": 0.00011293162442012866, - "loss": 2.0908, + "epoch": 1.24, + "grad_norm": 0.220703125, + "learning_rate": 7.583777068529209e-05, + "loss": 2.1497, "step": 7320 }, { - "epoch": 1.36, - "grad_norm": 0.2158203125, - "learning_rate": 0.00011264022489630737, - "loss": 2.1208, + "epoch": 1.24, + "grad_norm": 0.234375, + "learning_rate": 7.569436491076842e-05, + "loss": 2.1127, "step": 7325 }, { - "epoch": 1.36, - "grad_norm": 0.2177734375, - "learning_rate": 0.00011234905436887078, - "loss": 2.1089, + "epoch": 1.24, + "grad_norm": 0.23046875, + "learning_rate": 7.55510122400066e-05, + "loss": 2.1145, "step": 7330 }, { - "epoch": 1.36, - "grad_norm": 0.22265625, - "learning_rate": 0.0001120581136010649, - "loss": 2.1016, + "epoch": 1.24, + "grad_norm": 0.2353515625, + "learning_rate": 7.540771298620826e-05, + "loss": 2.1487, "step": 7335 }, { - "epoch": 1.36, - "grad_norm": 0.21875, - "learning_rate": 0.00011176740335553333, - "loss": 2.1079, + "epoch": 1.24, + "grad_norm": 0.22265625, + "learning_rate": 7.526446746245843e-05, + "loss": 2.122, "step": 7340 }, { - "epoch": 1.36, - "grad_norm": 0.2197265625, - "learning_rate": 0.00011147692439431572, - "loss": 2.1365, + "epoch": 1.24, + "grad_norm": 0.22265625, + "learning_rate": 7.512127598172471e-05, + "loss": 2.131, "step": 7345 }, { - "epoch": 1.36, - "grad_norm": 0.2119140625, - "learning_rate": 0.00011118667747884517, - "loss": 2.0991, + "epoch": 1.24, + "grad_norm": 0.22265625, + "learning_rate": 7.497813885685661e-05, + "loss": 2.1383, "step": 7350 }, { - "epoch": 1.36, - "grad_norm": 0.2275390625, - "learning_rate": 0.0001108966633699466, - "loss": 2.1581, + "epoch": 1.25, + "grad_norm": 0.2314453125, + "learning_rate": 7.483505640058488e-05, + "loss": 2.1283, "step": 7355 }, { - "epoch": 1.37, - "grad_norm": 0.216796875, - "learning_rate": 0.00011060688282783469, - "loss": 2.1253, + "epoch": 1.25, + "grad_norm": 0.2333984375, + "learning_rate": 7.469202892552088e-05, + "loss": 2.1387, "step": 7360 }, { - "epoch": 1.37, - "grad_norm": 0.2158203125, - "learning_rate": 0.00011031733661211184, - "loss": 2.0904, + "epoch": 1.25, + "grad_norm": 0.2275390625, + "learning_rate": 7.454905674415575e-05, + "loss": 2.1122, "step": 7365 }, { - "epoch": 1.37, - "grad_norm": 0.216796875, - "learning_rate": 0.00011002802548176623, - "loss": 2.0793, + "epoch": 1.25, + "grad_norm": 0.232421875, + "learning_rate": 7.440614016885996e-05, + "loss": 2.121, "step": 7370 }, { - "epoch": 1.37, - "grad_norm": 0.2138671875, - "learning_rate": 0.00010973895019516974, - "loss": 2.1235, + "epoch": 1.25, + "grad_norm": 0.2236328125, + "learning_rate": 7.426327951188227e-05, + "loss": 2.1497, "step": 7375 }, { - "epoch": 1.37, - "grad_norm": 0.21875, - "learning_rate": 0.00010945011151007634, - "loss": 2.1639, + "epoch": 1.25, + "grad_norm": 0.224609375, + "learning_rate": 7.412047508534953e-05, + "loss": 2.1219, "step": 7380 }, { - "epoch": 1.37, - "grad_norm": 0.21484375, - "learning_rate": 0.00010916151018361929, - "loss": 2.106, + "epoch": 1.25, + "grad_norm": 0.232421875, + "learning_rate": 7.397772720126561e-05, + "loss": 2.1193, "step": 7385 }, { - "epoch": 1.37, - "grad_norm": 0.2197265625, - "learning_rate": 0.00010887314697230997, - "loss": 2.1463, + "epoch": 1.25, + "grad_norm": 0.228515625, + "learning_rate": 7.383503617151075e-05, + "loss": 2.0977, "step": 7390 }, { - "epoch": 1.37, - "grad_norm": 0.2158203125, - "learning_rate": 0.0001085850226320357, - "loss": 2.1063, + "epoch": 1.25, + "grad_norm": 0.2265625, + "learning_rate": 7.369240230784115e-05, + "loss": 2.112, "step": 7395 }, { - "epoch": 1.37, - "grad_norm": 0.216796875, - "learning_rate": 0.00010829713791805738, - "loss": 2.1395, + "epoch": 1.25, + "grad_norm": 0.23046875, + "learning_rate": 7.354982592188803e-05, + "loss": 2.1423, "step": 7400 }, { - "epoch": 1.37, - "grad_norm": 0.2138671875, - "learning_rate": 0.00010800949358500794, - "loss": 2.1373, + "epoch": 1.25, + "grad_norm": 0.22265625, + "learning_rate": 7.340730732515696e-05, + "loss": 2.1275, "step": 7405 }, { - "epoch": 1.37, - "grad_norm": 0.2216796875, - "learning_rate": 0.00010772209038689003, - "loss": 2.1083, + "epoch": 1.25, + "grad_norm": 0.23046875, + "learning_rate": 7.326484682902739e-05, + "loss": 2.1446, "step": 7410 }, { - "epoch": 1.38, - "grad_norm": 0.216796875, - "learning_rate": 0.00010743492907707455, - "loss": 2.1363, + "epoch": 1.26, + "grad_norm": 0.240234375, + "learning_rate": 7.312244474475178e-05, + "loss": 2.1214, "step": 7415 }, { - "epoch": 1.38, - "grad_norm": 0.216796875, - "learning_rate": 0.00010714801040829796, - "loss": 2.1342, + "epoch": 1.26, + "grad_norm": 0.2275390625, + "learning_rate": 7.298010138345485e-05, + "loss": 2.131, "step": 7420 }, { - "epoch": 1.38, - "grad_norm": 0.2216796875, - "learning_rate": 0.00010686133513266079, - "loss": 2.1208, + "epoch": 1.26, + "grad_norm": 0.2158203125, + "learning_rate": 7.283781705613323e-05, + "loss": 2.07, "step": 7425 }, { - "epoch": 1.38, - "grad_norm": 0.2138671875, - "learning_rate": 0.00010657490400162584, - "loss": 2.118, + "epoch": 1.26, + "grad_norm": 0.2265625, + "learning_rate": 7.26955920736544e-05, + "loss": 2.115, "step": 7430 }, { - "epoch": 1.38, - "grad_norm": 0.2177734375, - "learning_rate": 0.00010628871776601542, - "loss": 2.1088, + "epoch": 1.26, + "grad_norm": 0.2333984375, + "learning_rate": 7.255342674675625e-05, + "loss": 2.0861, "step": 7435 }, { - "epoch": 1.38, - "grad_norm": 0.2197265625, - "learning_rate": 0.00010600277717601042, - "loss": 2.1007, + "epoch": 1.26, + "grad_norm": 0.228515625, + "learning_rate": 7.241132138604634e-05, + "loss": 2.1605, "step": 7440 }, { - "epoch": 1.38, - "grad_norm": 0.220703125, - "learning_rate": 0.00010571708298114751, + "epoch": 1.26, + "grad_norm": 0.232421875, + "learning_rate": 7.226927630200117e-05, "loss": 2.1492, "step": 7445 }, { - "epoch": 1.38, - "grad_norm": 0.22265625, - "learning_rate": 0.00010543163593031753, - "loss": 2.1433, + "epoch": 1.26, + "grad_norm": 0.228515625, + "learning_rate": 7.212729180496563e-05, + "loss": 2.1075, "step": 7450 }, { - "epoch": 1.38, - "grad_norm": 0.212890625, - "learning_rate": 0.00010514643677176354, - "loss": 2.1069, + "epoch": 1.26, + "grad_norm": 0.232421875, + "learning_rate": 7.198536820515214e-05, + "loss": 2.1189, "step": 7455 }, { - "epoch": 1.38, - "grad_norm": 0.2197265625, - "learning_rate": 0.00010486148625307868, - "loss": 2.1463, + "epoch": 1.26, + "grad_norm": 0.2275390625, + "learning_rate": 7.18435058126401e-05, + "loss": 2.0948, "step": 7460 }, { - "epoch": 1.38, - "grad_norm": 0.216796875, - "learning_rate": 0.00010457678512120463, - "loss": 2.0955, + "epoch": 1.26, + "grad_norm": 0.2197265625, + "learning_rate": 7.170170493737522e-05, + "loss": 2.1453, "step": 7465 }, { - "epoch": 1.39, - "grad_norm": 0.2294921875, - "learning_rate": 0.00010429233412242887, - "loss": 2.0892, + "epoch": 1.27, + "grad_norm": 0.2255859375, + "learning_rate": 7.155996588916883e-05, + "loss": 2.1092, "step": 7470 }, { - "epoch": 1.39, - "grad_norm": 0.2099609375, - "learning_rate": 0.00010400813400238368, - "loss": 2.0786, + "epoch": 1.27, + "grad_norm": 0.2265625, + "learning_rate": 7.141828897769701e-05, + "loss": 2.1437, "step": 7475 }, { - "epoch": 1.39, - "grad_norm": 0.2158203125, - "learning_rate": 0.00010372418550604351, - "loss": 2.1339, + "epoch": 1.27, + "grad_norm": 0.2255859375, + "learning_rate": 7.127667451250031e-05, + "loss": 2.1338, "step": 7480 }, { - "epoch": 1.39, - "grad_norm": 0.21875, - "learning_rate": 0.00010344048937772297, - "loss": 2.1183, + "epoch": 1.27, + "grad_norm": 0.23046875, + "learning_rate": 7.113512280298264e-05, + "loss": 2.1306, "step": 7485 }, { - "epoch": 1.39, - "grad_norm": 0.2255859375, - "learning_rate": 0.00010315704636107564, - "loss": 2.1244, + "epoch": 1.27, + "grad_norm": 0.2236328125, + "learning_rate": 7.099363415841097e-05, + "loss": 2.1019, "step": 7490 }, { - "epoch": 1.39, - "grad_norm": 0.216796875, - "learning_rate": 0.0001028738571990913, - "loss": 2.0664, + "epoch": 1.27, + "grad_norm": 0.2255859375, + "learning_rate": 7.085220888791439e-05, + "loss": 2.0879, "step": 7495 }, { - "epoch": 1.39, - "grad_norm": 0.216796875, - "learning_rate": 0.0001025909226340944, - "loss": 2.1083, + "epoch": 1.27, + "grad_norm": 0.2314453125, + "learning_rate": 7.071084730048352e-05, + "loss": 2.1013, "step": 7500 }, { - "epoch": 1.39, - "grad_norm": 0.216796875, - "learning_rate": 0.0001023082434077419, - "loss": 2.1205, + "epoch": 1.27, + "grad_norm": 0.2314453125, + "learning_rate": 7.056954970496988e-05, + "loss": 2.1492, "step": 7505 }, { - "epoch": 1.39, - "grad_norm": 0.2138671875, - "learning_rate": 0.00010202582026102153, - "loss": 2.1116, + "epoch": 1.27, + "grad_norm": 0.248046875, + "learning_rate": 7.042831641008518e-05, + "loss": 2.1336, "step": 7510 }, { - "epoch": 1.39, - "grad_norm": 0.2236328125, - "learning_rate": 0.00010174365393424992, - "loss": 2.1348, + "epoch": 1.27, + "grad_norm": 0.22265625, + "learning_rate": 7.028714772440061e-05, + "loss": 2.1679, "step": 7515 }, { - "epoch": 1.4, - "grad_norm": 0.2177734375, - "learning_rate": 0.00010146174516707005, - "loss": 2.1105, + "epoch": 1.27, + "grad_norm": 0.224609375, + "learning_rate": 7.014604395634623e-05, + "loss": 2.122, "step": 7520 }, { - "epoch": 1.4, - "grad_norm": 0.21875, - "learning_rate": 0.00010118009469845027, - "loss": 2.1106, + "epoch": 1.27, + "grad_norm": 0.2255859375, + "learning_rate": 7.000500541421028e-05, + "loss": 2.1175, "step": 7525 }, { - "epoch": 1.4, - "grad_norm": 0.216796875, - "learning_rate": 0.00010089870326668154, - "loss": 2.1576, + "epoch": 1.28, + "grad_norm": 0.2216796875, + "learning_rate": 6.986403240613844e-05, + "loss": 2.1061, "step": 7530 }, { - "epoch": 1.4, - "grad_norm": 0.21875, - "learning_rate": 0.00010061757160937586, - "loss": 2.1146, + "epoch": 1.28, + "grad_norm": 0.22265625, + "learning_rate": 6.972312524013323e-05, + "loss": 2.1216, "step": 7535 }, { - "epoch": 1.4, - "grad_norm": 0.2177734375, - "learning_rate": 0.00010033670046346433, - "loss": 2.0823, + "epoch": 1.28, + "grad_norm": 0.2265625, + "learning_rate": 6.958228422405335e-05, + "loss": 2.1641, "step": 7540 }, { - "epoch": 1.4, - "grad_norm": 0.216796875, - "learning_rate": 0.00010005609056519513, - "loss": 2.0989, + "epoch": 1.28, + "grad_norm": 0.2265625, + "learning_rate": 6.944150966561294e-05, + "loss": 2.1464, "step": 7545 }, { - "epoch": 1.4, - "grad_norm": 0.2255859375, - "learning_rate": 9.97757426501317e-05, - "loss": 2.1095, + "epoch": 1.28, + "grad_norm": 0.2177734375, + "learning_rate": 6.930080187238095e-05, + "loss": 2.121, "step": 7550 }, { - "epoch": 1.4, - "grad_norm": 0.2177734375, - "learning_rate": 9.949565745315055e-05, - "loss": 2.1397, + "epoch": 1.28, + "grad_norm": 0.23046875, + "learning_rate": 6.916016115178043e-05, + "loss": 2.1218, "step": 7555 }, { - "epoch": 1.4, - "grad_norm": 0.2158203125, - "learning_rate": 9.921583570843986e-05, - "loss": 2.0911, + "epoch": 1.28, + "grad_norm": 0.236328125, + "learning_rate": 6.901958781108794e-05, + "loss": 2.1254, "step": 7560 }, { - "epoch": 1.4, - "grad_norm": 0.220703125, - "learning_rate": 9.893627814949693e-05, - "loss": 2.1112, + "epoch": 1.28, + "grad_norm": 0.224609375, + "learning_rate": 6.887908215743282e-05, + "loss": 2.1073, "step": 7565 }, { - "epoch": 1.4, - "grad_norm": 0.2216796875, - "learning_rate": 9.865698550912667e-05, - "loss": 2.0844, + "epoch": 1.28, + "grad_norm": 0.23046875, + "learning_rate": 6.873864449779646e-05, + "loss": 2.1278, "step": 7570 }, { - "epoch": 1.41, - "grad_norm": 0.216796875, - "learning_rate": 9.837795851943954e-05, - "loss": 2.1082, + "epoch": 1.28, + "grad_norm": 0.220703125, + "learning_rate": 6.859827513901178e-05, + "loss": 2.0982, "step": 7575 }, { - "epoch": 1.41, - "grad_norm": 0.2138671875, - "learning_rate": 9.809919791184963e-05, - "loss": 2.1108, + "epoch": 1.28, + "grad_norm": 0.2216796875, + "learning_rate": 6.845797438776241e-05, + "loss": 2.1182, "step": 7580 }, { - "epoch": 1.41, - "grad_norm": 0.216796875, - "learning_rate": 9.782070441707276e-05, - "loss": 2.1201, + "epoch": 1.28, + "grad_norm": 0.2255859375, + "learning_rate": 6.831774255058212e-05, + "loss": 2.1411, "step": 7585 }, { - "epoch": 1.41, - "grad_norm": 0.212890625, - "learning_rate": 9.754247876512457e-05, - "loss": 2.098, + "epoch": 1.29, + "grad_norm": 0.2265625, + "learning_rate": 6.81775799338541e-05, + "loss": 2.149, "step": 7590 }, { - "epoch": 1.41, - "grad_norm": 0.2236328125, - "learning_rate": 9.726452168531879e-05, - "loss": 2.1319, + "epoch": 1.29, + "grad_norm": 0.2216796875, + "learning_rate": 6.803748684381031e-05, + "loss": 2.1647, "step": 7595 }, { - "epoch": 1.41, - "grad_norm": 0.2265625, - "learning_rate": 9.698683390626476e-05, - "loss": 2.145, + "epoch": 1.29, + "grad_norm": 0.2294921875, + "learning_rate": 6.78974635865308e-05, + "loss": 2.0988, "step": 7600 }, { - "epoch": 1.41, - "grad_norm": 0.2216796875, - "learning_rate": 9.67094161558661e-05, - "loss": 2.1141, + "epoch": 1.29, + "grad_norm": 0.224609375, + "learning_rate": 6.775751046794308e-05, + "loss": 2.0968, "step": 7605 }, { - "epoch": 1.41, - "grad_norm": 0.216796875, - "learning_rate": 9.64322691613188e-05, - "loss": 2.0464, + "epoch": 1.29, + "grad_norm": 0.22265625, + "learning_rate": 6.761762779382131e-05, + "loss": 2.1752, "step": 7610 }, { - "epoch": 1.41, - "grad_norm": 0.21875, - "learning_rate": 9.615539364910881e-05, - "loss": 2.1155, + "epoch": 1.29, + "grad_norm": 0.23046875, + "learning_rate": 6.747781586978589e-05, + "loss": 2.1605, "step": 7615 }, { - "epoch": 1.41, - "grad_norm": 0.22265625, - "learning_rate": 9.587879034501062e-05, - "loss": 2.1043, + "epoch": 1.29, + "grad_norm": 0.2294921875, + "learning_rate": 6.73380750013026e-05, + "loss": 2.1392, "step": 7620 }, { - "epoch": 1.41, - "grad_norm": 0.228515625, - "learning_rate": 9.5602459974085e-05, - "loss": 2.1154, + "epoch": 1.29, + "grad_norm": 0.2275390625, + "learning_rate": 6.719840549368183e-05, + "loss": 2.1066, "step": 7625 }, { - "epoch": 1.42, - "grad_norm": 0.2138671875, - "learning_rate": 9.532640326067763e-05, - "loss": 2.0965, + "epoch": 1.29, + "grad_norm": 0.2373046875, + "learning_rate": 6.705880765207825e-05, + "loss": 2.0877, "step": 7630 }, { - "epoch": 1.42, - "grad_norm": 0.2216796875, - "learning_rate": 9.505062092841644e-05, - "loss": 2.0938, + "epoch": 1.29, + "grad_norm": 0.2265625, + "learning_rate": 6.691928178148995e-05, + "loss": 2.11, "step": 7635 }, { - "epoch": 1.42, - "grad_norm": 0.2236328125, - "learning_rate": 9.477511370021026e-05, - "loss": 2.0867, + "epoch": 1.29, + "grad_norm": 0.23828125, + "learning_rate": 6.677982818675758e-05, + "loss": 2.1526, "step": 7640 }, { - "epoch": 1.42, - "grad_norm": 0.2158203125, - "learning_rate": 9.44998822982471e-05, - "loss": 2.0947, + "epoch": 1.29, + "grad_norm": 0.23046875, + "learning_rate": 6.664044717256402e-05, + "loss": 2.0917, "step": 7645 }, { - "epoch": 1.42, - "grad_norm": 0.216796875, - "learning_rate": 9.422492744399137e-05, - "loss": 2.1152, + "epoch": 1.3, + "grad_norm": 0.2255859375, + "learning_rate": 6.650113904343366e-05, + "loss": 2.1098, "step": 7650 }, { - "epoch": 1.42, - "grad_norm": 0.2255859375, - "learning_rate": 9.395024985818309e-05, - "loss": 2.0936, + "epoch": 1.3, + "grad_norm": 0.2275390625, + "learning_rate": 6.636190410373143e-05, + "loss": 2.128, "step": 7655 }, { - "epoch": 1.42, - "grad_norm": 0.212890625, - "learning_rate": 9.367585026083518e-05, - "loss": 2.1191, + "epoch": 1.3, + "grad_norm": 0.2236328125, + "learning_rate": 6.622274265766253e-05, + "loss": 2.1164, "step": 7660 }, { - "epoch": 1.42, - "grad_norm": 0.2197265625, - "learning_rate": 9.340172937123201e-05, - "loss": 2.1362, + "epoch": 1.3, + "grad_norm": 0.2265625, + "learning_rate": 6.608365500927148e-05, + "loss": 2.0702, "step": 7665 }, { - "epoch": 1.42, - "grad_norm": 0.2197265625, - "learning_rate": 9.312788790792728e-05, - "loss": 2.1423, + "epoch": 1.3, + "grad_norm": 0.2373046875, + "learning_rate": 6.594464146244165e-05, + "loss": 2.1779, "step": 7670 }, { - "epoch": 1.42, - "grad_norm": 0.21875, - "learning_rate": 9.285432658874216e-05, - "loss": 2.1184, + "epoch": 1.3, + "grad_norm": 0.2294921875, + "learning_rate": 6.580570232089449e-05, + "loss": 2.0749, "step": 7675 }, { - "epoch": 1.42, - "grad_norm": 0.21484375, - "learning_rate": 9.258104613076385e-05, - "loss": 2.1006, + "epoch": 1.3, + "grad_norm": 0.234375, + "learning_rate": 6.56668378881888e-05, + "loss": 2.1277, "step": 7680 }, { - "epoch": 1.43, - "grad_norm": 0.2216796875, - "learning_rate": 9.230804725034274e-05, - "loss": 2.0857, + "epoch": 1.3, + "grad_norm": 0.2197265625, + "learning_rate": 6.552804846772026e-05, + "loss": 2.1526, "step": 7685 }, { - "epoch": 1.43, - "grad_norm": 0.2275390625, - "learning_rate": 9.203533066309168e-05, - "loss": 2.1165, + "epoch": 1.3, + "grad_norm": 0.21875, + "learning_rate": 6.538933436272065e-05, + "loss": 2.1642, "step": 7690 }, { - "epoch": 1.43, - "grad_norm": 0.228515625, - "learning_rate": 9.176289708388329e-05, - "loss": 2.1702, + "epoch": 1.3, + "grad_norm": 0.224609375, + "learning_rate": 6.525069587625712e-05, + "loss": 2.1258, "step": 7695 }, { - "epoch": 1.43, - "grad_norm": 0.2275390625, - "learning_rate": 9.149074722684814e-05, - "loss": 2.1102, + "epoch": 1.3, + "grad_norm": 0.2314453125, + "learning_rate": 6.511213331123168e-05, + "loss": 2.1485, "step": 7700 }, { - "epoch": 1.43, - "grad_norm": 0.2177734375, - "learning_rate": 9.121888180537348e-05, - "loss": 2.084, + "epoch": 1.3, + "grad_norm": 0.2265625, + "learning_rate": 6.497364697038047e-05, + "loss": 2.11, "step": 7705 }, { - "epoch": 1.43, - "grad_norm": 0.2138671875, - "learning_rate": 9.094730153210076e-05, - "loss": 2.1463, + "epoch": 1.31, + "grad_norm": 0.2255859375, + "learning_rate": 6.483523715627301e-05, + "loss": 2.1677, "step": 7710 }, { - "epoch": 1.43, - "grad_norm": 0.2177734375, - "learning_rate": 9.067600711892396e-05, - "loss": 2.1166, + "epoch": 1.31, + "grad_norm": 0.2265625, + "learning_rate": 6.469690417131171e-05, + "loss": 2.1133, "step": 7715 }, { - "epoch": 1.43, - "grad_norm": 0.220703125, - "learning_rate": 9.04049992769877e-05, - "loss": 2.1383, + "epoch": 1.31, + "grad_norm": 0.228515625, + "learning_rate": 6.455864831773108e-05, + "loss": 2.1201, "step": 7720 }, { - "epoch": 1.43, - "grad_norm": 0.216796875, - "learning_rate": 9.013427871668562e-05, - "loss": 2.11, + "epoch": 1.31, + "grad_norm": 0.2216796875, + "learning_rate": 6.442046989759712e-05, + "loss": 2.0895, "step": 7725 }, { - "epoch": 1.43, - "grad_norm": 0.2177734375, - "learning_rate": 8.986384614765817e-05, - "loss": 2.1083, + "epoch": 1.31, + "grad_norm": 0.22265625, + "learning_rate": 6.428236921280666e-05, + "loss": 2.0973, "step": 7730 }, { - "epoch": 1.44, - "grad_norm": 0.21875, - "learning_rate": 8.959370227879067e-05, - "loss": 2.1375, + "epoch": 1.31, + "grad_norm": 0.2314453125, + "learning_rate": 6.414434656508665e-05, + "loss": 2.1285, "step": 7735 }, { - "epoch": 1.44, - "grad_norm": 0.2177734375, - "learning_rate": 8.932384781821208e-05, - "loss": 2.111, + "epoch": 1.31, + "grad_norm": 0.228515625, + "learning_rate": 6.400640225599358e-05, + "loss": 2.1108, "step": 7740 }, { - "epoch": 1.44, - "grad_norm": 0.2177734375, - "learning_rate": 8.905428347329245e-05, - "loss": 2.1026, + "epoch": 1.31, + "grad_norm": 0.2236328125, + "learning_rate": 6.386853658691281e-05, + "loss": 2.1164, "step": 7745 }, { - "epoch": 1.44, - "grad_norm": 0.21875, - "learning_rate": 8.878500995064148e-05, - "loss": 2.1506, + "epoch": 1.31, + "grad_norm": 0.23046875, + "learning_rate": 6.373074985905781e-05, + "loss": 2.1695, "step": 7750 }, { - "epoch": 1.44, - "grad_norm": 0.2177734375, - "learning_rate": 8.851602795610646e-05, - "loss": 2.1432, + "epoch": 1.31, + "grad_norm": 0.224609375, + "learning_rate": 6.359304237346961e-05, + "loss": 2.0999, "step": 7755 }, { - "epoch": 1.44, - "grad_norm": 0.2158203125, - "learning_rate": 8.824733819477051e-05, - "loss": 2.0726, + "epoch": 1.31, + "grad_norm": 0.2275390625, + "learning_rate": 6.345541443101616e-05, + "loss": 2.1377, "step": 7760 }, { - "epoch": 1.44, - "grad_norm": 0.2109375, - "learning_rate": 8.797894137095077e-05, - "loss": 2.0853, + "epoch": 1.32, + "grad_norm": 0.228515625, + "learning_rate": 6.331786633239154e-05, + "loss": 2.113, "step": 7765 }, { - "epoch": 1.44, - "grad_norm": 0.2236328125, - "learning_rate": 8.77108381881964e-05, - "loss": 2.1539, + "epoch": 1.32, + "grad_norm": 0.2265625, + "learning_rate": 6.318039837811542e-05, + "loss": 2.1612, "step": 7770 }, { - "epoch": 1.44, - "grad_norm": 0.220703125, - "learning_rate": 8.744302934928701e-05, - "loss": 2.0791, + "epoch": 1.32, + "grad_norm": 0.2314453125, + "learning_rate": 6.304301086853243e-05, + "loss": 2.1783, "step": 7775 }, { - "epoch": 1.44, - "grad_norm": 0.2119140625, - "learning_rate": 8.717551555623051e-05, - "loss": 2.0752, + "epoch": 1.32, + "grad_norm": 0.2333984375, + "learning_rate": 6.290570410381129e-05, + "loss": 2.1309, "step": 7780 }, { - "epoch": 1.44, - "grad_norm": 0.220703125, - "learning_rate": 8.690829751026141e-05, - "loss": 2.1238, + "epoch": 1.32, + "grad_norm": 0.2353515625, + "learning_rate": 6.276847838394446e-05, + "loss": 2.0939, "step": 7785 }, { - "epoch": 1.45, - "grad_norm": 0.2333984375, - "learning_rate": 8.664137591183901e-05, - "loss": 2.1237, + "epoch": 1.32, + "grad_norm": 0.2353515625, + "learning_rate": 6.263133400874725e-05, + "loss": 2.1013, "step": 7790 }, { - "epoch": 1.45, - "grad_norm": 0.2119140625, - "learning_rate": 8.637475146064554e-05, - "loss": 2.1183, + "epoch": 1.32, + "grad_norm": 0.2216796875, + "learning_rate": 6.249427127785724e-05, + "loss": 2.1307, "step": 7795 }, { - "epoch": 1.45, - "grad_norm": 0.220703125, - "learning_rate": 8.610842485558428e-05, - "loss": 2.1416, + "epoch": 1.32, + "grad_norm": 0.2294921875, + "learning_rate": 6.235729049073371e-05, + "loss": 2.0944, "step": 7800 }, { - "epoch": 1.45, + "epoch": 1.32, "grad_norm": 0.2255859375, - "learning_rate": 8.584239679477775e-05, - "loss": 2.1213, + "learning_rate": 6.222039194665678e-05, + "loss": 2.0731, "step": 7805 }, { - "epoch": 1.45, - "grad_norm": 0.2177734375, - "learning_rate": 8.557666797556612e-05, - "loss": 2.1557, + "epoch": 1.32, + "grad_norm": 0.224609375, + "learning_rate": 6.2083575944727e-05, + "loss": 2.1293, "step": 7810 }, { - "epoch": 1.45, - "grad_norm": 0.228515625, - "learning_rate": 8.531123909450476e-05, - "loss": 2.0979, + "epoch": 1.32, + "grad_norm": 0.22265625, + "learning_rate": 6.194684278386455e-05, + "loss": 2.1658, "step": 7815 }, { - "epoch": 1.45, - "grad_norm": 0.21875, - "learning_rate": 8.504611084736305e-05, - "loss": 2.1238, + "epoch": 1.32, + "grad_norm": 0.2236328125, + "learning_rate": 6.18101927628085e-05, + "loss": 2.0989, "step": 7820 }, { - "epoch": 1.45, - "grad_norm": 0.2138671875, - "learning_rate": 8.478128392912239e-05, - "loss": 2.1331, + "epoch": 1.33, + "grad_norm": 0.2255859375, + "learning_rate": 6.167362618011648e-05, + "loss": 2.1014, "step": 7825 }, { - "epoch": 1.45, - "grad_norm": 0.2119140625, - "learning_rate": 8.451675903397416e-05, - "loss": 2.114, + "epoch": 1.33, + "grad_norm": 0.2275390625, + "learning_rate": 6.153714333416372e-05, + "loss": 2.1117, "step": 7830 }, { - "epoch": 1.45, - "grad_norm": 0.21875, - "learning_rate": 8.425253685531809e-05, - "loss": 2.152, + "epoch": 1.33, + "grad_norm": 0.2294921875, + "learning_rate": 6.140074452314236e-05, + "loss": 2.0891, "step": 7835 }, { - "epoch": 1.45, - "grad_norm": 0.2109375, - "learning_rate": 8.39886180857604e-05, - "loss": 2.1003, + "epoch": 1.33, + "grad_norm": 0.2265625, + "learning_rate": 6.126443004506122e-05, + "loss": 2.0833, "step": 7840 }, { - "epoch": 1.46, - "grad_norm": 0.220703125, - "learning_rate": 8.372500341711199e-05, - "loss": 2.1093, + "epoch": 1.33, + "grad_norm": 0.228515625, + "learning_rate": 6.112820019774461e-05, + "loss": 2.1394, "step": 7845 }, { - "epoch": 1.46, - "grad_norm": 0.212890625, - "learning_rate": 8.346169354038657e-05, - "loss": 2.1132, + "epoch": 1.33, + "grad_norm": 0.224609375, + "learning_rate": 6.099205527883207e-05, + "loss": 2.1298, "step": 7850 }, { - "epoch": 1.46, - "grad_norm": 0.2099609375, - "learning_rate": 8.319868914579894e-05, - "loss": 2.0899, + "epoch": 1.33, + "grad_norm": 0.224609375, + "learning_rate": 6.0855995585777616e-05, + "loss": 2.1394, "step": 7855 }, { - "epoch": 1.46, - "grad_norm": 0.2236328125, - "learning_rate": 8.293599092276332e-05, - "loss": 2.1106, + "epoch": 1.33, + "grad_norm": 0.2265625, + "learning_rate": 6.072002141584891e-05, + "loss": 2.1386, "step": 7860 }, { - "epoch": 1.46, - "grad_norm": 0.224609375, - "learning_rate": 8.267359955989087e-05, - "loss": 2.1119, + "epoch": 1.33, + "grad_norm": 0.2265625, + "learning_rate": 6.058413306612689e-05, + "loss": 2.1127, "step": 7865 }, { - "epoch": 1.46, - "grad_norm": 0.2197265625, - "learning_rate": 8.241151574498896e-05, - "loss": 2.1188, + "epoch": 1.33, + "grad_norm": 0.2333984375, + "learning_rate": 6.044833083350503e-05, + "loss": 2.1322, "step": 7870 }, { - "epoch": 1.46, - "grad_norm": 0.220703125, - "learning_rate": 8.214974016505838e-05, - "loss": 2.0889, + "epoch": 1.33, + "grad_norm": 0.2177734375, + "learning_rate": 6.0312615014688436e-05, + "loss": 2.1384, "step": 7875 }, { - "epoch": 1.46, - "grad_norm": 0.216796875, - "learning_rate": 8.188827350629213e-05, - "loss": 2.1212, + "epoch": 1.33, + "grad_norm": 0.2236328125, + "learning_rate": 6.017698590619362e-05, + "loss": 2.1268, "step": 7880 }, { - "epoch": 1.46, - "grad_norm": 0.2236328125, - "learning_rate": 8.162711645407335e-05, - "loss": 2.111, + "epoch": 1.34, + "grad_norm": 0.224609375, + "learning_rate": 6.004144380434763e-05, + "loss": 2.129, "step": 7885 }, { - "epoch": 1.46, - "grad_norm": 0.2177734375, - "learning_rate": 8.136626969297365e-05, - "loss": 2.1206, + "epoch": 1.34, + "grad_norm": 0.2255859375, + "learning_rate": 5.9905989005287277e-05, + "loss": 2.1109, "step": 7890 }, { - "epoch": 1.46, - "grad_norm": 0.2138671875, - "learning_rate": 8.110573390675125e-05, - "loss": 2.0869, + "epoch": 1.34, + "grad_norm": 0.228515625, + "learning_rate": 5.977062180495876e-05, + "loss": 2.1361, "step": 7895 }, { - "epoch": 1.47, - "grad_norm": 0.2265625, - "learning_rate": 8.084550977834915e-05, - "loss": 2.1158, + "epoch": 1.34, + "grad_norm": 0.2216796875, + "learning_rate": 5.96353424991169e-05, + "loss": 2.1141, "step": 7900 }, { - "epoch": 1.47, + "epoch": 1.34, "grad_norm": 0.220703125, - "learning_rate": 8.058559798989362e-05, - "loss": 2.1113, + "learning_rate": 5.950015138332434e-05, + "loss": 2.1336, "step": 7905 }, { - "epoch": 1.47, - "grad_norm": 0.220703125, - "learning_rate": 8.032599922269206e-05, - "loss": 2.1123, + "epoch": 1.34, + "grad_norm": 0.2275390625, + "learning_rate": 5.9365048752951225e-05, + "loss": 2.1268, "step": 7910 }, { - "epoch": 1.47, - "grad_norm": 0.216796875, - "learning_rate": 8.006671415723108e-05, - "loss": 2.1223, + "epoch": 1.34, + "grad_norm": 0.23046875, + "learning_rate": 5.923003490317422e-05, + "loss": 2.1146, "step": 7915 }, { - "epoch": 1.47, - "grad_norm": 0.2177734375, - "learning_rate": 7.980774347317548e-05, - "loss": 2.0928, + "epoch": 1.34, + "grad_norm": 0.228515625, + "learning_rate": 5.9095110128976104e-05, + "loss": 2.1023, "step": 7920 }, { - "epoch": 1.47, - "grad_norm": 0.21875, - "learning_rate": 7.954908784936566e-05, - "loss": 2.0741, + "epoch": 1.34, + "grad_norm": 0.2333984375, + "learning_rate": 5.8960274725145056e-05, + "loss": 2.1159, "step": 7925 }, { - "epoch": 1.47, - "grad_norm": 0.21875, - "learning_rate": 7.929074796381618e-05, - "loss": 2.1044, + "epoch": 1.34, + "grad_norm": 0.23046875, + "learning_rate": 5.882552898627391e-05, + "loss": 2.0938, "step": 7930 }, { - "epoch": 1.47, - "grad_norm": 0.21484375, - "learning_rate": 7.903272449371395e-05, - "loss": 2.119, + "epoch": 1.34, + "grad_norm": 0.2294921875, + "learning_rate": 5.8690873206759675e-05, + "loss": 2.0999, "step": 7935 }, { - "epoch": 1.47, - "grad_norm": 0.2236328125, - "learning_rate": 7.87750181154167e-05, - "loss": 2.1355, + "epoch": 1.34, + "grad_norm": 0.2177734375, + "learning_rate": 5.8556307680802826e-05, + "loss": 2.0965, "step": 7940 }, { - "epoch": 1.47, - "grad_norm": 0.2177734375, - "learning_rate": 7.851762950445057e-05, - "loss": 2.133, + "epoch": 1.35, + "grad_norm": 0.224609375, + "learning_rate": 5.842183270240652e-05, + "loss": 2.0736, "step": 7945 }, { - "epoch": 1.47, - "grad_norm": 0.2216796875, - "learning_rate": 7.826055933550891e-05, - "loss": 2.1551, + "epoch": 1.35, + "grad_norm": 0.234375, + "learning_rate": 5.8287448565376215e-05, + "loss": 2.1204, "step": 7950 }, { - "epoch": 1.48, - "grad_norm": 0.2158203125, - "learning_rate": 7.800380828245051e-05, - "loss": 2.1216, + "epoch": 1.35, + "grad_norm": 0.2255859375, + "learning_rate": 5.8153155563318904e-05, + "loss": 2.1253, "step": 7955 }, { - "epoch": 1.48, - "grad_norm": 0.220703125, - "learning_rate": 7.774737701829747e-05, - "loss": 2.0914, - "step": 7960 - }, + "epoch": 1.35, + "grad_norm": 0.22265625, + "learning_rate": 5.801895398964234e-05, + "loss": 2.1087, + "step": 7960 + }, { - "epoch": 1.48, - "grad_norm": 0.2255859375, - "learning_rate": 7.749126621523363e-05, - "loss": 2.1388, + "epoch": 1.35, + "grad_norm": 0.2265625, + "learning_rate": 5.788484413755469e-05, + "loss": 2.0863, "step": 7965 }, { - "epoch": 1.48, - "grad_norm": 0.2236328125, - "learning_rate": 7.723547654460285e-05, - "loss": 2.0935, + "epoch": 1.35, + "grad_norm": 0.22265625, + "learning_rate": 5.7750826300063496e-05, + "loss": 2.1233, "step": 7970 }, { - "epoch": 1.48, - "grad_norm": 0.21484375, - "learning_rate": 7.698000867690724e-05, - "loss": 2.091, + "epoch": 1.35, + "grad_norm": 0.2294921875, + "learning_rate": 5.761690076997543e-05, + "loss": 2.1237, "step": 7975 }, { - "epoch": 1.48, - "grad_norm": 0.216796875, - "learning_rate": 7.672486328180529e-05, - "loss": 2.1153, + "epoch": 1.35, + "grad_norm": 0.2255859375, + "learning_rate": 5.7483067839895585e-05, + "loss": 2.1592, "step": 7980 }, { - "epoch": 1.48, - "grad_norm": 0.2265625, - "learning_rate": 7.647004102811013e-05, - "loss": 2.1339, + "epoch": 1.35, + "grad_norm": 0.22265625, + "learning_rate": 5.7349327802226474e-05, + "loss": 2.1362, "step": 7985 }, { - "epoch": 1.48, - "grad_norm": 0.216796875, - "learning_rate": 7.621554258378818e-05, - "loss": 2.1211, + "epoch": 1.35, + "grad_norm": 0.2197265625, + "learning_rate": 5.721568094916783e-05, + "loss": 2.106, "step": 7990 }, { - "epoch": 1.48, - "grad_norm": 0.2216796875, - "learning_rate": 7.596136861595651e-05, - "loss": 2.1512, + "epoch": 1.35, + "grad_norm": 0.2255859375, + "learning_rate": 5.7082127572715785e-05, + "loss": 2.1259, "step": 7995 }, { - "epoch": 1.48, - "grad_norm": 0.2216796875, - "learning_rate": 7.57075197908822e-05, - "loss": 2.1514, + "epoch": 1.35, + "grad_norm": 0.234375, + "learning_rate": 5.6948667964662136e-05, + "loss": 2.1102, "step": 8000 }, { - "epoch": 1.49, - "grad_norm": 0.212890625, - "learning_rate": 7.545399677397964e-05, - "loss": 2.1236, + "epoch": 1.36, + "grad_norm": 0.2265625, + "learning_rate": 5.6815302416593894e-05, + "loss": 2.1031, "step": 8005 }, { - "epoch": 1.49, - "grad_norm": 0.216796875, - "learning_rate": 7.520080022980935e-05, - "loss": 2.103, + "epoch": 1.36, + "grad_norm": 0.22265625, + "learning_rate": 5.668203121989266e-05, + "loss": 2.1164, "step": 8010 }, { - "epoch": 1.49, - "grad_norm": 0.2138671875, - "learning_rate": 7.494793082207605e-05, - "loss": 2.1457, + "epoch": 1.36, + "grad_norm": 0.23046875, + "learning_rate": 5.6548854665733674e-05, + "loss": 2.1152, "step": 8015 }, { - "epoch": 1.49, - "grad_norm": 0.2197265625, - "learning_rate": 7.46953892136268e-05, - "loss": 2.1162, + "epoch": 1.36, + "grad_norm": 0.224609375, + "learning_rate": 5.641577304508559e-05, + "loss": 2.1385, "step": 8020 }, { - "epoch": 1.49, - "grad_norm": 0.216796875, - "learning_rate": 7.444317606644973e-05, - "loss": 2.1181, + "epoch": 1.36, + "grad_norm": 0.2294921875, + "learning_rate": 5.6282786648709484e-05, + "loss": 2.111, "step": 8025 }, { - "epoch": 1.49, - "grad_norm": 0.220703125, - "learning_rate": 7.419129204167151e-05, - "loss": 2.1427, + "epoch": 1.36, + "grad_norm": 0.2314453125, + "learning_rate": 5.614989576715852e-05, + "loss": 2.1329, "step": 8030 }, { - "epoch": 1.49, - "grad_norm": 0.2236328125, - "learning_rate": 7.39397377995565e-05, - "loss": 2.1352, + "epoch": 1.36, + "grad_norm": 0.2216796875, + "learning_rate": 5.601710069077712e-05, + "loss": 2.1403, "step": 8035 }, { - "epoch": 1.49, - "grad_norm": 0.21875, - "learning_rate": 7.368851399950447e-05, - "loss": 2.0876, + "epoch": 1.36, + "grad_norm": 0.224609375, + "learning_rate": 5.58844017097004e-05, + "loss": 2.1114, "step": 8040 }, { - "epoch": 1.49, - "grad_norm": 0.2138671875, - "learning_rate": 7.343762130004872e-05, - "loss": 2.1071, + "epoch": 1.36, + "grad_norm": 0.228515625, + "learning_rate": 5.575179911385349e-05, + "loss": 2.1271, "step": 8045 }, { - "epoch": 1.49, - "grad_norm": 0.2216796875, - "learning_rate": 7.318706035885507e-05, - "loss": 2.0977, + "epoch": 1.36, + "grad_norm": 0.224609375, + "learning_rate": 5.561929319295104e-05, + "loss": 2.1481, "step": 8050 }, { - "epoch": 1.49, - "grad_norm": 0.216796875, - "learning_rate": 7.29368318327195e-05, - "loss": 2.1158, + "epoch": 1.36, + "grad_norm": 0.2314453125, + "learning_rate": 5.5486884236496303e-05, + "loss": 2.1358, "step": 8055 }, { - "epoch": 1.5, - "grad_norm": 0.21875, - "learning_rate": 7.268693637756658e-05, - "loss": 2.0564, + "epoch": 1.37, + "grad_norm": 0.228515625, + "learning_rate": 5.535457253378082e-05, + "loss": 2.137, "step": 8060 }, { - "epoch": 1.5, - "grad_norm": 0.21875, - "learning_rate": 7.243737464844787e-05, - "loss": 2.1404, + "epoch": 1.37, + "grad_norm": 0.2333984375, + "learning_rate": 5.522235837388362e-05, + "loss": 2.1403, "step": 8065 }, { - "epoch": 1.5, - "grad_norm": 0.2197265625, - "learning_rate": 7.218814729954005e-05, - "loss": 2.1203, + "epoch": 1.37, + "grad_norm": 0.2314453125, + "learning_rate": 5.5090242045670605e-05, + "loss": 2.1279, "step": 8070 }, { - "epoch": 1.5, - "grad_norm": 0.220703125, - "learning_rate": 7.193925498414357e-05, - "loss": 2.1112, + "epoch": 1.37, + "grad_norm": 0.2236328125, + "learning_rate": 5.495822383779392e-05, + "loss": 2.1185, "step": 8075 }, { - "epoch": 1.5, - "grad_norm": 0.2265625, - "learning_rate": 7.169069835468017e-05, - "loss": 2.1359, + "epoch": 1.37, + "grad_norm": 0.232421875, + "learning_rate": 5.48263040386914e-05, + "loss": 2.1356, "step": 8080 }, { - "epoch": 1.5, - "grad_norm": 0.2265625, - "learning_rate": 7.144247806269213e-05, - "loss": 2.0982, + "epoch": 1.37, + "grad_norm": 0.2255859375, + "learning_rate": 5.469448293658574e-05, + "loss": 2.1376, "step": 8085 }, { - "epoch": 1.5, - "grad_norm": 0.220703125, - "learning_rate": 7.119459475883983e-05, - "loss": 2.0748, + "epoch": 1.37, + "grad_norm": 0.22265625, + "learning_rate": 5.4562760819484125e-05, + "loss": 2.1191, "step": 8090 }, { - "epoch": 1.5, - "grad_norm": 0.2158203125, - "learning_rate": 7.094704909290036e-05, - "loss": 2.1213, + "epoch": 1.37, + "grad_norm": 0.224609375, + "learning_rate": 5.443113797517741e-05, + "loss": 2.0956, "step": 8095 }, { - "epoch": 1.5, - "grad_norm": 0.220703125, - "learning_rate": 7.069984171376571e-05, - "loss": 2.1405, + "epoch": 1.37, + "grad_norm": 0.2314453125, + "learning_rate": 5.4299614691239576e-05, + "loss": 2.1205, "step": 8100 }, { - "epoch": 1.5, - "grad_norm": 0.2197265625, - "learning_rate": 7.045297326944125e-05, - "loss": 2.1144, + "epoch": 1.37, + "grad_norm": 0.2353515625, + "learning_rate": 5.416819125502712e-05, + "loss": 2.1297, "step": 8105 }, { - "epoch": 1.5, - "grad_norm": 0.21875, - "learning_rate": 7.020644440704376e-05, - "loss": 2.0758, + "epoch": 1.37, + "grad_norm": 0.2255859375, + "learning_rate": 5.4036867953678286e-05, + "loss": 2.1068, "step": 8110 }, { - "epoch": 1.51, - "grad_norm": 0.2392578125, - "learning_rate": 6.996025577279986e-05, - "loss": 2.1157, + "epoch": 1.37, + "grad_norm": 0.2255859375, + "learning_rate": 5.390564507411261e-05, + "loss": 2.1027, "step": 8115 }, { - "epoch": 1.51, - "grad_norm": 0.2158203125, - "learning_rate": 6.971440801204454e-05, - "loss": 2.1075, + "epoch": 1.38, + "grad_norm": 0.232421875, + "learning_rate": 5.377452290303023e-05, + "loss": 2.1181, "step": 8120 }, { - "epoch": 1.51, + "epoch": 1.38, "grad_norm": 0.2255859375, - "learning_rate": 6.946890176921915e-05, - "loss": 2.1304, + "learning_rate": 5.364350172691124e-05, + "loss": 2.1774, "step": 8125 }, { - "epoch": 1.51, - "grad_norm": 0.220703125, - "learning_rate": 6.922373768786954e-05, - "loss": 2.1381, + "epoch": 1.38, + "grad_norm": 0.2294921875, + "learning_rate": 5.3512581832015075e-05, + "loss": 2.099, "step": 8130 }, { - "epoch": 1.51, - "grad_norm": 0.2138671875, - "learning_rate": 6.897891641064511e-05, - "loss": 2.078, + "epoch": 1.38, + "grad_norm": 0.22265625, + "learning_rate": 5.3381763504379914e-05, + "loss": 2.1234, "step": 8135 }, { - "epoch": 1.51, - "grad_norm": 0.2216796875, - "learning_rate": 6.873443857929638e-05, - "loss": 2.1653, + "epoch": 1.38, + "grad_norm": 0.23046875, + "learning_rate": 5.325104702982192e-05, + "loss": 2.1567, "step": 8140 }, { - "epoch": 1.51, - "grad_norm": 0.2265625, - "learning_rate": 6.849030483467367e-05, - "loss": 2.1208, + "epoch": 1.38, + "grad_norm": 0.2236328125, + "learning_rate": 5.3120432693934894e-05, + "loss": 2.149, "step": 8145 }, { - "epoch": 1.51, - "grad_norm": 0.21484375, - "learning_rate": 6.82465158167253e-05, - "loss": 2.0856, + "epoch": 1.38, + "grad_norm": 0.2275390625, + "learning_rate": 5.2989920782089265e-05, + "loss": 2.1027, "step": 8150 }, { - "epoch": 1.51, - "grad_norm": 0.220703125, - "learning_rate": 6.800307216449615e-05, - "loss": 2.1454, + "epoch": 1.38, + "grad_norm": 0.2275390625, + "learning_rate": 5.2859511579431944e-05, + "loss": 2.1403, "step": 8155 }, { - "epoch": 1.51, - "grad_norm": 0.2197265625, - "learning_rate": 6.775997451612548e-05, - "loss": 2.1249, + "epoch": 1.38, + "grad_norm": 0.232421875, + "learning_rate": 5.272920537088528e-05, + "loss": 2.1336, "step": 8160 }, { - "epoch": 1.51, - "grad_norm": 0.2197265625, - "learning_rate": 6.75172235088457e-05, - "loss": 2.1168, + "epoch": 1.38, + "grad_norm": 0.2265625, + "learning_rate": 5.259900244114655e-05, + "loss": 2.1591, "step": 8165 }, { - "epoch": 1.52, - "grad_norm": 0.2138671875, - "learning_rate": 6.727481977898076e-05, - "loss": 2.1398, + "epoch": 1.38, + "grad_norm": 0.2265625, + "learning_rate": 5.2468903074687506e-05, + "loss": 2.1639, "step": 8170 }, { - "epoch": 1.52, - "grad_norm": 0.2138671875, - "learning_rate": 6.703276396194404e-05, - "loss": 2.0943, + "epoch": 1.38, + "grad_norm": 0.2294921875, + "learning_rate": 5.233890755575361e-05, + "loss": 2.1787, "step": 8175 }, { - "epoch": 1.52, - "grad_norm": 0.2119140625, - "learning_rate": 6.679105669223704e-05, - "loss": 2.1336, + "epoch": 1.39, + "grad_norm": 0.2236328125, + "learning_rate": 5.22090161683633e-05, + "loss": 2.1079, "step": 8180 }, { - "epoch": 1.52, - "grad_norm": 0.224609375, - "learning_rate": 6.654969860344757e-05, - "loss": 2.0909, + "epoch": 1.39, + "grad_norm": 0.232421875, + "learning_rate": 5.207922919630771e-05, + "loss": 2.1277, "step": 8185 }, { - "epoch": 1.52, - "grad_norm": 0.2109375, - "learning_rate": 6.630869032824821e-05, - "loss": 2.1254, + "epoch": 1.39, + "grad_norm": 0.224609375, + "learning_rate": 5.194954692314975e-05, + "loss": 2.1226, "step": 8190 }, { - "epoch": 1.52, - "grad_norm": 0.2158203125, - "learning_rate": 6.606803249839448e-05, - "loss": 2.1044, + "epoch": 1.39, + "grad_norm": 0.2236328125, + "learning_rate": 5.1819969632223505e-05, + "loss": 2.1081, "step": 8195 }, { - "epoch": 1.52, - "grad_norm": 0.2216796875, - "learning_rate": 6.582772574472325e-05, - "loss": 2.1604, + "epoch": 1.39, + "grad_norm": 0.2236328125, + "learning_rate": 5.1690497606633824e-05, + "loss": 2.1174, "step": 8200 }, { - "epoch": 1.52, - "grad_norm": 0.2373046875, - "learning_rate": 6.558777069715138e-05, - "loss": 2.1012, + "epoch": 1.39, + "grad_norm": 0.2177734375, + "learning_rate": 5.156113112925543e-05, + "loss": 2.1002, "step": 8205 }, { - "epoch": 1.52, - "grad_norm": 0.21875, - "learning_rate": 6.534816798467338e-05, - "loss": 2.0951, + "epoch": 1.39, + "grad_norm": 0.2255859375, + "learning_rate": 5.1431870482732516e-05, + "loss": 2.1494, "step": 8210 }, { - "epoch": 1.52, - "grad_norm": 0.2177734375, - "learning_rate": 6.510891823536054e-05, - "loss": 2.0685, + "epoch": 1.39, + "grad_norm": 0.22265625, + "learning_rate": 5.1302715949478174e-05, + "loss": 2.1323, "step": 8215 }, { - "epoch": 1.53, - "grad_norm": 0.21484375, - "learning_rate": 6.487002207635877e-05, - "loss": 2.1161, + "epoch": 1.39, + "grad_norm": 0.220703125, + "learning_rate": 5.117366781167341e-05, + "loss": 2.15, "step": 8220 }, { - "epoch": 1.53, - "grad_norm": 0.2236328125, - "learning_rate": 6.463148013388713e-05, - "loss": 2.1549, + "epoch": 1.39, + "grad_norm": 0.2314453125, + "learning_rate": 5.104472635126695e-05, + "loss": 2.1167, "step": 8225 }, { - "epoch": 1.53, - "grad_norm": 0.21875, - "learning_rate": 6.439329303323616e-05, - "loss": 2.1182, + "epoch": 1.39, + "grad_norm": 0.2216796875, + "learning_rate": 5.091589184997441e-05, + "loss": 2.1579, "step": 8230 }, { - "epoch": 1.53, - "grad_norm": 0.2158203125, - "learning_rate": 6.415546139876629e-05, - "loss": 2.0894, + "epoch": 1.39, + "grad_norm": 0.2265625, + "learning_rate": 5.0787164589277645e-05, + "loss": 2.1174, "step": 8235 }, { - "epoch": 1.53, - "grad_norm": 0.216796875, - "learning_rate": 6.391798585390614e-05, - "loss": 2.1289, + "epoch": 1.4, + "grad_norm": 0.2265625, + "learning_rate": 5.0658544850424274e-05, + "loss": 2.1221, "step": 8240 }, { - "epoch": 1.53, - "grad_norm": 0.2119140625, - "learning_rate": 6.368086702115081e-05, - "loss": 2.0734, + "epoch": 1.4, + "grad_norm": 0.236328125, + "learning_rate": 5.053003291442707e-05, + "loss": 2.1003, "step": 8245 }, { - "epoch": 1.53, - "grad_norm": 0.216796875, - "learning_rate": 6.344410552206066e-05, - "loss": 2.1034, + "epoch": 1.4, + "grad_norm": 0.23046875, + "learning_rate": 5.0401629062063115e-05, + "loss": 2.1398, "step": 8250 }, { - "epoch": 1.53, - "grad_norm": 0.224609375, - "learning_rate": 6.320770197725911e-05, - "loss": 2.1449, + "epoch": 1.4, + "grad_norm": 0.2412109375, + "learning_rate": 5.027333357387345e-05, + "loss": 2.1235, "step": 8255 }, { - "epoch": 1.53, - "grad_norm": 0.2177734375, - "learning_rate": 6.297165700643117e-05, - "loss": 2.0802, + "epoch": 1.4, + "grad_norm": 0.224609375, + "learning_rate": 5.014514673016237e-05, + "loss": 2.1306, "step": 8260 }, { - "epoch": 1.53, - "grad_norm": 0.2158203125, - "learning_rate": 6.273597122832224e-05, - "loss": 2.1051, + "epoch": 1.4, + "grad_norm": 0.2236328125, + "learning_rate": 5.00170688109967e-05, + "loss": 2.1229, "step": 8265 }, { - "epoch": 1.53, - "grad_norm": 0.212890625, - "learning_rate": 6.250064526073598e-05, - "loss": 2.1103, + "epoch": 1.4, + "grad_norm": 0.228515625, + "learning_rate": 4.988910009620537e-05, + "loss": 2.1448, "step": 8270 }, { - "epoch": 1.54, + "epoch": 1.4, "grad_norm": 0.224609375, - "learning_rate": 6.226567972053286e-05, - "loss": 2.0815, + "learning_rate": 4.976124086537871e-05, + "loss": 2.1072, "step": 8275 }, { - "epoch": 1.54, - "grad_norm": 0.21875, - "learning_rate": 6.203107522362863e-05, - "loss": 2.1086, + "epoch": 1.4, + "grad_norm": 0.2265625, + "learning_rate": 4.9633491397867815e-05, + "loss": 2.0999, "step": 8280 }, { - "epoch": 1.54, - "grad_norm": 0.2177734375, - "learning_rate": 6.179683238499263e-05, - "loss": 2.0913, + "epoch": 1.4, + "grad_norm": 0.2275390625, + "learning_rate": 4.950585197278404e-05, + "loss": 2.1003, "step": 8285 }, { - "epoch": 1.54, - "grad_norm": 0.2197265625, - "learning_rate": 6.156295181864613e-05, - "loss": 2.085, + "epoch": 1.4, + "grad_norm": 0.220703125, + "learning_rate": 4.937832286899815e-05, + "loss": 2.0978, "step": 8290 }, { - "epoch": 1.54, - "grad_norm": 0.220703125, - "learning_rate": 6.132943413766077e-05, - "loss": 2.0987, + "epoch": 1.4, + "grad_norm": 0.2275390625, + "learning_rate": 4.925090436514004e-05, + "loss": 2.184, "step": 8295 }, { - "epoch": 1.54, - "grad_norm": 0.21875, - "learning_rate": 6.109627995415712e-05, - "loss": 2.1141, + "epoch": 1.41, + "grad_norm": 0.240234375, + "learning_rate": 4.91235967395979e-05, + "loss": 2.14, "step": 8300 }, { - "epoch": 1.54, - "grad_norm": 0.2158203125, - "learning_rate": 6.086348987930273e-05, - "loss": 2.1072, + "epoch": 1.41, + "grad_norm": 0.22265625, + "learning_rate": 4.8996400270517675e-05, + "loss": 2.1209, "step": 8305 }, { - "epoch": 1.54, - "grad_norm": 0.220703125, - "learning_rate": 6.0631064523310756e-05, - "loss": 2.1292, + "epoch": 1.41, + "grad_norm": 0.2255859375, + "learning_rate": 4.886931523580246e-05, + "loss": 2.1202, "step": 8310 }, { - "epoch": 1.54, - "grad_norm": 0.2216796875, - "learning_rate": 6.039900449543836e-05, - "loss": 2.1378, + "epoch": 1.41, + "grad_norm": 0.228515625, + "learning_rate": 4.87423419131119e-05, + "loss": 2.1826, "step": 8315 }, { - "epoch": 1.54, - "grad_norm": 0.21875, - "learning_rate": 6.016731040398502e-05, - "loss": 2.128, + "epoch": 1.41, + "grad_norm": 0.2294921875, + "learning_rate": 4.861548057986147e-05, + "loss": 2.1141, "step": 8320 }, { - "epoch": 1.54, - "grad_norm": 0.2158203125, - "learning_rate": 5.9935982856291005e-05, - "loss": 2.0829, + "epoch": 1.41, + "grad_norm": 0.228515625, + "learning_rate": 4.848873151322209e-05, + "loss": 2.1506, "step": 8325 }, { - "epoch": 1.55, - "grad_norm": 0.2138671875, - "learning_rate": 5.970502245873573e-05, - "loss": 2.0905, + "epoch": 1.41, + "grad_norm": 0.2275390625, + "learning_rate": 4.836209499011932e-05, + "loss": 2.1256, "step": 8330 }, { - "epoch": 1.55, - "grad_norm": 0.2216796875, - "learning_rate": 5.947442981673637e-05, - "loss": 2.1405, + "epoch": 1.41, + "grad_norm": 0.228515625, + "learning_rate": 4.823557128723288e-05, + "loss": 2.1182, "step": 8335 }, { - "epoch": 1.55, - "grad_norm": 0.22265625, - "learning_rate": 5.924420553474581e-05, - "loss": 2.122, + "epoch": 1.41, + "grad_norm": 0.2333984375, + "learning_rate": 4.810916068099601e-05, + "loss": 2.1319, "step": 8340 }, { - "epoch": 1.55, - "grad_norm": 0.2138671875, - "learning_rate": 5.90143502162515e-05, - "loss": 2.1289, + "epoch": 1.41, + "grad_norm": 0.2275390625, + "learning_rate": 4.798286344759475e-05, + "loss": 2.1291, "step": 8345 }, { - "epoch": 1.55, + "epoch": 1.41, "grad_norm": 0.2216796875, - "learning_rate": 5.8784864463773824e-05, - "loss": 2.1152, + "learning_rate": 4.7856679862967515e-05, + "loss": 2.0805, "step": 8350 }, { - "epoch": 1.55, - "grad_norm": 0.22265625, - "learning_rate": 5.85557488788643e-05, - "loss": 2.1293, + "epoch": 1.42, + "grad_norm": 0.2255859375, + "learning_rate": 4.773061020280443e-05, + "loss": 2.1223, "step": 8355 }, { - "epoch": 1.55, - "grad_norm": 0.21875, - "learning_rate": 5.832700406210414e-05, - "loss": 2.1264, + "epoch": 1.42, + "grad_norm": 0.2255859375, + "learning_rate": 4.760465474254667e-05, + "loss": 2.1401, "step": 8360 }, { - "epoch": 1.55, - "grad_norm": 0.21484375, - "learning_rate": 5.809863061310261e-05, - "loss": 2.0865, + "epoch": 1.42, + "grad_norm": 0.234375, + "learning_rate": 4.7478813757385954e-05, + "loss": 2.1489, "step": 8365 }, { - "epoch": 1.55, - "grad_norm": 0.2177734375, - "learning_rate": 5.7870629130495746e-05, - "loss": 2.0962, + "epoch": 1.42, + "grad_norm": 0.2294921875, + "learning_rate": 4.735308752226387e-05, + "loss": 2.1411, "step": 8370 }, { - "epoch": 1.55, - "grad_norm": 0.21875, - "learning_rate": 5.76430002119442e-05, - "loss": 2.123, + "epoch": 1.42, + "grad_norm": 0.23046875, + "learning_rate": 4.722747631187123e-05, + "loss": 2.1452, "step": 8375 }, { - "epoch": 1.55, - "grad_norm": 0.2158203125, - "learning_rate": 5.741574445413218e-05, - "loss": 2.105, + "epoch": 1.42, + "grad_norm": 0.2255859375, + "learning_rate": 4.710198040064767e-05, + "loss": 2.1107, "step": 8380 }, { - "epoch": 1.56, - "grad_norm": 0.2177734375, - "learning_rate": 5.718886245276589e-05, - "loss": 2.0846, + "epoch": 1.42, + "grad_norm": 0.2275390625, + "learning_rate": 4.697660006278073e-05, + "loss": 2.1218, "step": 8385 }, { - "epoch": 1.56, - "grad_norm": 0.2197265625, - "learning_rate": 5.6962354802571574e-05, - "loss": 2.0829, + "epoch": 1.42, + "grad_norm": 0.2265625, + "learning_rate": 4.6851335572205646e-05, + "loss": 2.1221, "step": 8390 }, { - "epoch": 1.56, - "grad_norm": 0.2158203125, - "learning_rate": 5.673622209729426e-05, - "loss": 2.1085, + "epoch": 1.42, + "grad_norm": 0.236328125, + "learning_rate": 4.6726187202604465e-05, + "loss": 2.148, "step": 8395 }, { - "epoch": 1.56, - "grad_norm": 0.2216796875, - "learning_rate": 5.651046492969616e-05, - "loss": 2.1053, + "epoch": 1.42, + "grad_norm": 0.2314453125, + "learning_rate": 4.6601155227405436e-05, + "loss": 2.1665, "step": 8400 }, { - "epoch": 1.56, - "grad_norm": 0.21484375, - "learning_rate": 5.628508389155507e-05, - "loss": 2.1162, + "epoch": 1.42, + "grad_norm": 0.23828125, + "learning_rate": 4.6476239919782636e-05, + "loss": 2.1232, "step": 8405 }, { - "epoch": 1.56, - "grad_norm": 0.224609375, - "learning_rate": 5.606007957366284e-05, - "loss": 2.1035, + "epoch": 1.42, + "grad_norm": 0.2265625, + "learning_rate": 4.635144155265523e-05, + "loss": 2.1338, "step": 8410 }, { - "epoch": 1.56, - "grad_norm": 0.2216796875, - "learning_rate": 5.583545256582374e-05, - "loss": 2.1368, + "epoch": 1.43, + "grad_norm": 0.220703125, + "learning_rate": 4.622676039868672e-05, + "loss": 2.1274, "step": 8415 }, { - "epoch": 1.56, - "grad_norm": 0.2294921875, - "learning_rate": 5.5611203456853267e-05, - "loss": 2.1249, + "epoch": 1.43, + "grad_norm": 0.21875, + "learning_rate": 4.6102196730284786e-05, + "loss": 2.1651, "step": 8420 }, { - "epoch": 1.56, - "grad_norm": 0.224609375, - "learning_rate": 5.538733283457591e-05, - "loss": 2.145, + "epoch": 1.43, + "grad_norm": 0.2333984375, + "learning_rate": 4.597775081960026e-05, + "loss": 2.1164, "step": 8425 }, { - "epoch": 1.56, - "grad_norm": 0.21875, - "learning_rate": 5.516384128582444e-05, - "loss": 2.1136, + "epoch": 1.43, + "grad_norm": 0.220703125, + "learning_rate": 4.585342293852666e-05, + "loss": 2.1234, "step": 8430 }, { - "epoch": 1.56, - "grad_norm": 0.2197265625, - "learning_rate": 5.4940729396437704e-05, - "loss": 2.117, + "epoch": 1.43, + "grad_norm": 0.234375, + "learning_rate": 4.572921335869974e-05, + "loss": 2.1105, "step": 8435 }, { - "epoch": 1.57, - "grad_norm": 0.224609375, - "learning_rate": 5.471799775125943e-05, - "loss": 2.1447, + "epoch": 1.43, + "grad_norm": 0.2265625, + "learning_rate": 4.560512235149668e-05, + "loss": 2.1434, "step": 8440 }, { - "epoch": 1.57, - "grad_norm": 0.2177734375, - "learning_rate": 5.4495646934136625e-05, - "loss": 2.1128, + "epoch": 1.43, + "grad_norm": 0.228515625, + "learning_rate": 4.5481150188035626e-05, + "loss": 2.0948, "step": 8445 }, { - "epoch": 1.57, - "grad_norm": 0.2177734375, - "learning_rate": 5.4273677527917966e-05, - "loss": 2.1114, + "epoch": 1.43, + "grad_norm": 0.23046875, + "learning_rate": 4.535729713917522e-05, + "loss": 2.1562, "step": 8450 }, { - "epoch": 1.57, - "grad_norm": 0.2197265625, - "learning_rate": 5.405209011445242e-05, - "loss": 2.1023, + "epoch": 1.43, + "grad_norm": 0.2255859375, + "learning_rate": 4.5233563475513616e-05, + "loss": 2.1353, "step": 8455 }, { - "epoch": 1.57, - "grad_norm": 0.2177734375, - "learning_rate": 5.383088527458753e-05, - "loss": 2.1129, + "epoch": 1.43, + "grad_norm": 0.2275390625, + "learning_rate": 4.510994946738829e-05, + "loss": 2.1399, "step": 8460 }, { - "epoch": 1.57, - "grad_norm": 0.216796875, - "learning_rate": 5.361006358816818e-05, - "loss": 2.1446, + "epoch": 1.43, + "grad_norm": 0.232421875, + "learning_rate": 4.498645538487528e-05, + "loss": 2.1196, "step": 8465 }, { - "epoch": 1.57, - "grad_norm": 0.2099609375, - "learning_rate": 5.338962563403478e-05, - "loss": 2.0962, + "epoch": 1.43, + "grad_norm": 0.224609375, + "learning_rate": 4.4863081497788506e-05, + "loss": 2.0936, "step": 8470 }, { - "epoch": 1.57, - "grad_norm": 0.2236328125, - "learning_rate": 5.3169571990021684e-05, - "loss": 2.0977, + "epoch": 1.44, + "grad_norm": 0.2333984375, + "learning_rate": 4.473982807567937e-05, + "loss": 2.1093, "step": 8475 }, { - "epoch": 1.57, - "grad_norm": 0.220703125, - "learning_rate": 5.294990323295621e-05, - "loss": 2.0961, + "epoch": 1.44, + "grad_norm": 0.2255859375, + "learning_rate": 4.4616695387836074e-05, + "loss": 2.1156, "step": 8480 }, { - "epoch": 1.57, - "grad_norm": 0.224609375, - "learning_rate": 5.273061993865651e-05, - "loss": 2.1561, + "epoch": 1.44, + "grad_norm": 0.2265625, + "learning_rate": 4.449368370328302e-05, + "loss": 2.106, "step": 8485 }, { - "epoch": 1.58, - "grad_norm": 0.224609375, - "learning_rate": 5.251172268193041e-05, - "loss": 2.15, + "epoch": 1.44, + "grad_norm": 0.2294921875, + "learning_rate": 4.4370793290780224e-05, + "loss": 2.1387, "step": 8490 }, { - "epoch": 1.58, + "epoch": 1.44, "grad_norm": 0.2236328125, - "learning_rate": 5.229321203657382e-05, - "loss": 2.1232, + "learning_rate": 4.42480244188228e-05, + "loss": 2.1202, "step": 8495 }, { - "epoch": 1.58, - "grad_norm": 0.220703125, - "learning_rate": 5.20750885753692e-05, - "loss": 2.1288, + "epoch": 1.44, + "grad_norm": 0.2353515625, + "learning_rate": 4.412537735564019e-05, + "loss": 2.1336, "step": 8500 }, { - "epoch": 1.58, - "grad_norm": 0.21875, - "learning_rate": 5.1857352870084086e-05, - "loss": 2.1265, + "epoch": 1.44, + "grad_norm": 0.2294921875, + "learning_rate": 4.4002852369195845e-05, + "loss": 2.1211, "step": 8505 }, { - "epoch": 1.58, - "grad_norm": 0.2216796875, - "learning_rate": 5.164000549146954e-05, - "loss": 2.1376, + "epoch": 1.44, + "grad_norm": 0.2265625, + "learning_rate": 4.3880449727186427e-05, + "loss": 2.1334, "step": 8510 }, { - "epoch": 1.58, - "grad_norm": 0.21875, - "learning_rate": 5.1423047009258904e-05, - "loss": 2.1105, + "epoch": 1.44, + "grad_norm": 0.234375, + "learning_rate": 4.375816969704131e-05, + "loss": 2.1229, "step": 8515 }, { - "epoch": 1.58, - "grad_norm": 0.2138671875, - "learning_rate": 5.120647799216587e-05, - "loss": 2.1161, + "epoch": 1.44, + "grad_norm": 0.22265625, + "learning_rate": 4.363601254592201e-05, + "loss": 2.1147, "step": 8520 }, { - "epoch": 1.58, - "grad_norm": 0.2177734375, - "learning_rate": 5.0990299007883304e-05, - "loss": 2.1267, + "epoch": 1.44, + "grad_norm": 0.224609375, + "learning_rate": 4.3513978540721477e-05, + "loss": 2.1554, "step": 8525 }, { - "epoch": 1.58, - "grad_norm": 0.2216796875, - "learning_rate": 5.077451062308174e-05, - "loss": 2.104, + "epoch": 1.44, + "grad_norm": 0.23046875, + "learning_rate": 4.339206794806371e-05, + "loss": 2.1565, "step": 8530 }, { - "epoch": 1.58, - "grad_norm": 0.216796875, - "learning_rate": 5.055911340340771e-05, - "loss": 2.0874, + "epoch": 1.45, + "grad_norm": 0.2236328125, + "learning_rate": 4.327028103430303e-05, + "loss": 2.1332, "step": 8535 }, { - "epoch": 1.58, + "epoch": 1.45, "grad_norm": 0.2255859375, - "learning_rate": 5.0344107913482516e-05, - "loss": 2.1245, + "learning_rate": 4.3148618065523546e-05, + "loss": 2.1234, "step": 8540 }, { - "epoch": 1.59, - "grad_norm": 0.2177734375, - "learning_rate": 5.012949471690045e-05, - "loss": 2.0561, + "epoch": 1.45, + "grad_norm": 0.2255859375, + "learning_rate": 4.3027079307538584e-05, + "loss": 2.1442, "step": 8545 }, { - "epoch": 1.59, - "grad_norm": 0.2216796875, - "learning_rate": 4.9915274376227805e-05, - "loss": 2.1394, + "epoch": 1.45, + "grad_norm": 0.248046875, + "learning_rate": 4.290566502589011e-05, + "loss": 2.1679, "step": 8550 }, { - "epoch": 1.59, - "grad_norm": 0.21875, - "learning_rate": 4.970144745300063e-05, - "loss": 2.1378, + "epoch": 1.45, + "grad_norm": 0.234375, + "learning_rate": 4.2784375485848e-05, + "loss": 2.113, "step": 8555 }, { - "epoch": 1.59, - "grad_norm": 0.2216796875, - "learning_rate": 4.948801450772409e-05, - "loss": 2.1072, + "epoch": 1.45, + "grad_norm": 0.2275390625, + "learning_rate": 4.266321095240973e-05, + "loss": 2.1225, "step": 8560 }, { - "epoch": 1.59, - "grad_norm": 0.2216796875, - "learning_rate": 4.9274976099870415e-05, - "loss": 2.1504, + "epoch": 1.45, + "grad_norm": 0.2275390625, + "learning_rate": 4.2542171690299605e-05, + "loss": 2.0962, "step": 8565 }, { - "epoch": 1.59, - "grad_norm": 0.220703125, - "learning_rate": 4.9062332787877705e-05, - "loss": 2.0927, + "epoch": 1.45, + "grad_norm": 0.2265625, + "learning_rate": 4.242125796396827e-05, + "loss": 2.1323, "step": 8570 }, { - "epoch": 1.59, - "grad_norm": 0.21484375, - "learning_rate": 4.885008512914837e-05, - "loss": 2.0769, + "epoch": 1.45, + "grad_norm": 0.228515625, + "learning_rate": 4.230047003759206e-05, + "loss": 2.1072, "step": 8575 }, { - "epoch": 1.59, - "grad_norm": 0.21484375, - "learning_rate": 4.863823368004763e-05, - "loss": 2.0962, + "epoch": 1.45, + "grad_norm": 0.228515625, + "learning_rate": 4.217980817507242e-05, + "loss": 2.132, "step": 8580 }, { - "epoch": 1.59, - "grad_norm": 0.220703125, - "learning_rate": 4.842677899590238e-05, - "loss": 2.0833, + "epoch": 1.45, + "grad_norm": 0.2314453125, + "learning_rate": 4.205927264003544e-05, + "loss": 2.1482, "step": 8585 }, { - "epoch": 1.59, - "grad_norm": 0.21484375, - "learning_rate": 4.8215721630999075e-05, - "loss": 2.0788, + "epoch": 1.45, + "grad_norm": 0.2275390625, + "learning_rate": 4.193886369583117e-05, + "loss": 2.1228, "step": 8590 }, { - "epoch": 1.59, - "grad_norm": 0.2197265625, - "learning_rate": 4.800506213858293e-05, - "loss": 2.0668, + "epoch": 1.46, + "grad_norm": 0.224609375, + "learning_rate": 4.1818581605533094e-05, + "loss": 2.1229, "step": 8595 }, { - "epoch": 1.6, - "grad_norm": 0.224609375, - "learning_rate": 4.779480107085632e-05, - "loss": 2.1316, + "epoch": 1.46, + "grad_norm": 0.2197265625, + "learning_rate": 4.1698426631937514e-05, + "loss": 2.0852, "step": 8600 }, { - "epoch": 1.6, - "grad_norm": 0.2158203125, - "learning_rate": 4.7584938978976845e-05, - "loss": 2.0993, + "epoch": 1.46, + "grad_norm": 0.2294921875, + "learning_rate": 4.157839903756308e-05, + "loss": 2.0963, "step": 8605 }, { - "epoch": 1.6, - "grad_norm": 0.216796875, - "learning_rate": 4.737547641305668e-05, - "loss": 2.083, + "epoch": 1.46, + "grad_norm": 0.2265625, + "learning_rate": 4.145849908464999e-05, + "loss": 2.1152, "step": 8610 }, { - "epoch": 1.6, - "grad_norm": 0.224609375, - "learning_rate": 4.716641392216048e-05, - "loss": 2.0819, + "epoch": 1.46, + "grad_norm": 0.23046875, + "learning_rate": 4.133872703515975e-05, + "loss": 2.1029, "step": 8615 }, { - "epoch": 1.6, - "grad_norm": 0.2216796875, - "learning_rate": 4.695775205430426e-05, - "loss": 2.0953, + "epoch": 1.46, + "grad_norm": 0.22265625, + "learning_rate": 4.121908315077421e-05, + "loss": 2.1612, "step": 8620 }, { - "epoch": 1.6, - "grad_norm": 0.21484375, - "learning_rate": 4.674949135645383e-05, - "loss": 2.1031, + "epoch": 1.46, + "grad_norm": 0.234375, + "learning_rate": 4.1099567692895426e-05, + "loss": 2.1364, "step": 8625 }, { - "epoch": 1.6, - "grad_norm": 0.2158203125, - "learning_rate": 4.654163237452345e-05, - "loss": 2.0836, + "epoch": 1.46, + "grad_norm": 0.2265625, + "learning_rate": 4.098018092264474e-05, + "loss": 2.0914, "step": 8630 }, { - "epoch": 1.6, - "grad_norm": 0.2138671875, - "learning_rate": 4.6334175653374476e-05, - "loss": 2.0672, + "epoch": 1.46, + "grad_norm": 0.21875, + "learning_rate": 4.08609231008623e-05, + "loss": 2.1178, "step": 8635 }, { - "epoch": 1.6, - "grad_norm": 0.216796875, - "learning_rate": 4.612712173681353e-05, - "loss": 2.0927, + "epoch": 1.46, + "grad_norm": 0.2353515625, + "learning_rate": 4.0741794488106585e-05, + "loss": 2.1975, "step": 8640 }, { - "epoch": 1.6, - "grad_norm": 0.2294921875, - "learning_rate": 4.592047116759164e-05, - "loss": 2.0775, + "epoch": 1.46, + "grad_norm": 0.2333984375, + "learning_rate": 4.0622795344653816e-05, + "loss": 2.1351, "step": 8645 }, { - "epoch": 1.6, - "grad_norm": 0.2119140625, - "learning_rate": 4.571422448740246e-05, - "loss": 2.1157, + "epoch": 1.46, + "grad_norm": 0.232421875, + "learning_rate": 4.05039259304972e-05, + "loss": 2.1472, "step": 8650 }, { - "epoch": 1.61, - "grad_norm": 0.2197265625, - "learning_rate": 4.550838223688074e-05, - "loss": 2.128, + "epoch": 1.47, + "grad_norm": 0.2294921875, + "learning_rate": 4.038518650534661e-05, + "loss": 2.1258, "step": 8655 }, { - "epoch": 1.61, - "grad_norm": 0.2236328125, - "learning_rate": 4.530294495560141e-05, - "loss": 2.1186, + "epoch": 1.47, + "grad_norm": 0.2255859375, + "learning_rate": 4.0266577328627996e-05, + "loss": 2.0783, "step": 8660 }, { - "epoch": 1.61, - "grad_norm": 0.216796875, - "learning_rate": 4.5097913182077656e-05, - "loss": 2.1303, + "epoch": 1.47, + "grad_norm": 0.232421875, + "learning_rate": 4.0148098659482537e-05, + "loss": 2.1506, "step": 8665 }, { - "epoch": 1.61, - "grad_norm": 0.2265625, - "learning_rate": 4.4893287453759755e-05, - "loss": 2.124, + "epoch": 1.47, + "grad_norm": 0.232421875, + "learning_rate": 4.002975075676641e-05, + "loss": 2.1108, "step": 8670 }, { - "epoch": 1.61, - "grad_norm": 0.2158203125, - "learning_rate": 4.4689068307033544e-05, - "loss": 2.1581, - "step": 8675 + "epoch": 1.47, + "grad_norm": 0.2255859375, + "learning_rate": 3.991153387905011e-05, + "loss": 2.1207, + "step": 8675 }, { - "epoch": 1.61, - "grad_norm": 0.2158203125, - "learning_rate": 4.4485256277219246e-05, - "loss": 2.1182, + "epoch": 1.47, + "grad_norm": 0.2333984375, + "learning_rate": 3.979344828461773e-05, + "loss": 2.1169, "step": 8680 }, { - "epoch": 1.61, - "grad_norm": 0.21875, - "learning_rate": 4.428185189856986e-05, - "loss": 2.0936, + "epoch": 1.47, + "grad_norm": 0.236328125, + "learning_rate": 3.967549423146665e-05, + "loss": 2.1205, "step": 8685 }, { - "epoch": 1.61, - "grad_norm": 0.2158203125, - "learning_rate": 4.4078855704269575e-05, - "loss": 2.0769, + "epoch": 1.47, + "grad_norm": 0.23046875, + "learning_rate": 3.955767197730681e-05, + "loss": 2.1345, "step": 8690 }, { - "epoch": 1.61, - "grad_norm": 0.2158203125, - "learning_rate": 4.387626822643294e-05, - "loss": 2.1441, + "epoch": 1.47, + "grad_norm": 0.2265625, + "learning_rate": 3.943998177956022e-05, + "loss": 2.1559, "step": 8695 }, { - "epoch": 1.61, - "grad_norm": 0.21484375, - "learning_rate": 4.3674089996102966e-05, - "loss": 2.1038, + "epoch": 1.47, + "grad_norm": 0.22265625, + "learning_rate": 3.932242389536036e-05, + "loss": 2.1094, "step": 8700 }, { - "epoch": 1.62, - "grad_norm": 0.2158203125, - "learning_rate": 4.347232154324992e-05, - "loss": 2.1426, + "epoch": 1.47, + "grad_norm": 0.2255859375, + "learning_rate": 3.9204998581551554e-05, + "loss": 2.1194, "step": 8705 }, { - "epoch": 1.62, - "grad_norm": 0.2138671875, - "learning_rate": 4.32709633967699e-05, - "loss": 2.0974, + "epoch": 1.48, + "grad_norm": 0.2265625, + "learning_rate": 3.908770609468858e-05, + "loss": 2.0894, "step": 8710 }, { - "epoch": 1.62, - "grad_norm": 0.220703125, - "learning_rate": 4.307001608448353e-05, - "loss": 2.0787, + "epoch": 1.48, + "grad_norm": 0.236328125, + "learning_rate": 3.897054669103597e-05, + "loss": 2.1092, "step": 8715 }, { - "epoch": 1.62, - "grad_norm": 0.212890625, - "learning_rate": 4.2869480133134435e-05, - "loss": 2.0808, + "epoch": 1.48, + "grad_norm": 0.2294921875, + "learning_rate": 3.885352062656749e-05, + "loss": 2.1491, "step": 8720 }, { - "epoch": 1.62, - "grad_norm": 0.224609375, - "learning_rate": 4.266935606838796e-05, - "loss": 2.1249, + "epoch": 1.48, + "grad_norm": 0.2255859375, + "learning_rate": 3.8736628156965594e-05, + "loss": 2.1457, "step": 8725 }, { - "epoch": 1.62, - "grad_norm": 0.2197265625, - "learning_rate": 4.246964441482986e-05, - "loss": 2.1447, + "epoch": 1.48, + "grad_norm": 0.2275390625, + "learning_rate": 3.861986953762088e-05, + "loss": 2.0965, "step": 8730 }, { - "epoch": 1.62, - "grad_norm": 0.2177734375, - "learning_rate": 4.2270345695964734e-05, - "loss": 2.1202, + "epoch": 1.48, + "grad_norm": 0.2265625, + "learning_rate": 3.850324502363141e-05, + "loss": 2.1247, "step": 8735 }, { - "epoch": 1.62, - "grad_norm": 0.2216796875, - "learning_rate": 4.207146043421477e-05, - "loss": 2.1282, + "epoch": 1.48, + "grad_norm": 0.2236328125, + "learning_rate": 3.838675486980232e-05, + "loss": 2.1692, "step": 8740 }, { - "epoch": 1.62, - "grad_norm": 0.21875, - "learning_rate": 4.1872989150918375e-05, - "loss": 2.1001, + "epoch": 1.48, + "grad_norm": 0.2236328125, + "learning_rate": 3.8270399330645216e-05, + "loss": 2.0999, "step": 8745 }, { - "epoch": 1.62, - "grad_norm": 0.220703125, - "learning_rate": 4.16749323663288e-05, - "loss": 2.1221, + "epoch": 1.48, + "grad_norm": 0.228515625, + "learning_rate": 3.815417866037753e-05, + "loss": 2.1126, "step": 8750 }, { - "epoch": 1.62, - "grad_norm": 0.21875, - "learning_rate": 4.147729059961278e-05, - "loss": 2.1309, + "epoch": 1.48, + "grad_norm": 0.2236328125, + "learning_rate": 3.80380931129221e-05, + "loss": 2.1166, "step": 8755 }, { - "epoch": 1.63, - "grad_norm": 0.216796875, - "learning_rate": 4.128006436884906e-05, - "loss": 2.152, + "epoch": 1.48, + "grad_norm": 0.21875, + "learning_rate": 3.792214294190643e-05, + "loss": 2.0955, "step": 8760 }, { - "epoch": 1.63, - "grad_norm": 0.2216796875, - "learning_rate": 4.1083254191027384e-05, - "loss": 2.1294, + "epoch": 1.48, + "grad_norm": 0.228515625, + "learning_rate": 3.7806328400662374e-05, + "loss": 2.1366, "step": 8765 }, { - "epoch": 1.63, - "grad_norm": 0.22265625, - "learning_rate": 4.088686058204656e-05, - "loss": 2.1232, + "epoch": 1.49, + "grad_norm": 0.2392578125, + "learning_rate": 3.769064974222537e-05, + "loss": 2.1004, "step": 8770 }, { - "epoch": 1.63, - "grad_norm": 0.2158203125, - "learning_rate": 4.069088405671375e-05, - "loss": 2.0924, + "epoch": 1.49, + "grad_norm": 0.2421875, + "learning_rate": 3.757510721933403e-05, + "loss": 2.1277, "step": 8775 }, { - "epoch": 1.63, - "grad_norm": 0.21875, - "learning_rate": 4.049532512874261e-05, - "loss": 2.1028, + "epoch": 1.49, + "grad_norm": 0.228515625, + "learning_rate": 3.74597010844295e-05, + "loss": 2.1272, "step": 8780 }, { - "epoch": 1.63, - "grad_norm": 0.2265625, - "learning_rate": 4.0300184310752265e-05, - "loss": 2.1369, + "epoch": 1.49, + "grad_norm": 0.2294921875, + "learning_rate": 3.734443158965499e-05, + "loss": 2.1392, "step": 8785 }, { - "epoch": 1.63, - "grad_norm": 0.2255859375, - "learning_rate": 4.0105462114265754e-05, - "loss": 2.0931, + "epoch": 1.49, + "grad_norm": 0.2275390625, + "learning_rate": 3.722929898685507e-05, + "loss": 2.1155, "step": 8790 }, { - "epoch": 1.63, - "grad_norm": 0.2236328125, - "learning_rate": 3.991115904970888e-05, - "loss": 2.1279, + "epoch": 1.49, + "grad_norm": 0.23046875, + "learning_rate": 3.71143035275753e-05, + "loss": 2.1582, "step": 8795 }, { - "epoch": 1.63, - "grad_norm": 0.21875, - "learning_rate": 3.97172756264087e-05, - "loss": 2.0991, + "epoch": 1.49, + "grad_norm": 0.2333984375, + "learning_rate": 3.699944546306162e-05, + "loss": 2.1508, "step": 8800 }, { - "epoch": 1.63, - "grad_norm": 0.220703125, - "learning_rate": 3.952381235259228e-05, - "loss": 2.1481, + "epoch": 1.49, + "grad_norm": 0.2265625, + "learning_rate": 3.6884725044259746e-05, + "loss": 2.1642, "step": 8805 }, { - "epoch": 1.63, - "grad_norm": 0.2158203125, - "learning_rate": 3.933076973538532e-05, - "loss": 2.0896, + "epoch": 1.49, + "grad_norm": 0.2294921875, + "learning_rate": 3.677014252181472e-05, + "loss": 2.0776, "step": 8810 }, { - "epoch": 1.64, - "grad_norm": 0.2236328125, - "learning_rate": 3.9138148280811014e-05, - "loss": 2.1299, + "epoch": 1.49, + "grad_norm": 0.228515625, + "learning_rate": 3.665569814607017e-05, + "loss": 2.1675, "step": 8815 }, { - "epoch": 1.64, - "grad_norm": 0.220703125, - "learning_rate": 3.894594849378828e-05, - "loss": 2.1204, + "epoch": 1.49, + "grad_norm": 0.2255859375, + "learning_rate": 3.6541392167068033e-05, + "loss": 2.1034, "step": 8820 }, { - "epoch": 1.64, - "grad_norm": 0.212890625, - "learning_rate": 3.8754170878131e-05, - "loss": 2.0644, + "epoch": 1.49, + "grad_norm": 0.2353515625, + "learning_rate": 3.642722483454781e-05, + "loss": 2.1548, "step": 8825 }, { - "epoch": 1.64, - "grad_norm": 0.21875, - "learning_rate": 3.856281593654623e-05, - "loss": 2.1133, + "epoch": 1.5, + "grad_norm": 0.2265625, + "learning_rate": 3.6313196397946106e-05, + "loss": 2.0931, "step": 8830 }, { - "epoch": 1.64, + "epoch": 1.5, "grad_norm": 0.2197265625, - "learning_rate": 3.8371884170633134e-05, - "loss": 2.0851, + "learning_rate": 3.619930710639604e-05, + "loss": 2.1602, "step": 8835 }, { - "epoch": 1.64, - "grad_norm": 0.212890625, - "learning_rate": 3.818137608088161e-05, - "loss": 2.1087, + "epoch": 1.5, + "grad_norm": 0.23046875, + "learning_rate": 3.608555720872678e-05, + "loss": 2.0739, "step": 8840 }, { - "epoch": 1.64, - "grad_norm": 0.21875, - "learning_rate": 3.7991292166670966e-05, - "loss": 2.1384, + "epoch": 1.5, + "grad_norm": 0.224609375, + "learning_rate": 3.597194695346282e-05, + "loss": 2.1437, "step": 8845 }, { - "epoch": 1.64, - "grad_norm": 0.2158203125, - "learning_rate": 3.780163292626859e-05, - "loss": 2.1075, + "epoch": 1.5, + "grad_norm": 0.2294921875, + "learning_rate": 3.5858476588823664e-05, + "loss": 2.1333, "step": 8850 }, { - "epoch": 1.64, - "grad_norm": 0.2158203125, - "learning_rate": 3.76123988568287e-05, - "loss": 2.0926, + "epoch": 1.5, + "grad_norm": 0.2470703125, + "learning_rate": 3.574514636272318e-05, + "loss": 2.1147, "step": 8855 }, { - "epoch": 1.64, - "grad_norm": 0.2177734375, - "learning_rate": 3.742359045439105e-05, - "loss": 2.1276, + "epoch": 1.5, + "grad_norm": 0.2255859375, + "learning_rate": 3.563195652276893e-05, + "loss": 2.1096, "step": 8860 }, { - "epoch": 1.64, - "grad_norm": 0.216796875, - "learning_rate": 3.723520821387958e-05, - "loss": 2.1149, + "epoch": 1.5, + "grad_norm": 0.2314453125, + "learning_rate": 3.551890731626197e-05, + "loss": 2.1184, "step": 8865 }, { - "epoch": 1.65, - "grad_norm": 0.2158203125, - "learning_rate": 3.704725262910094e-05, - "loss": 2.0676, + "epoch": 1.5, + "grad_norm": 0.224609375, + "learning_rate": 3.54059989901959e-05, + "loss": 2.1549, "step": 8870 }, { - "epoch": 1.65, - "grad_norm": 0.2275390625, - "learning_rate": 3.6859724192743704e-05, - "loss": 2.1569, + "epoch": 1.5, + "grad_norm": 0.228515625, + "learning_rate": 3.529323179125661e-05, + "loss": 2.1688, "step": 8875 }, { - "epoch": 1.65, - "grad_norm": 0.2138671875, - "learning_rate": 3.6672623396376584e-05, - "loss": 2.0912, + "epoch": 1.5, + "grad_norm": 0.2294921875, + "learning_rate": 3.518060596582167e-05, + "loss": 2.1652, "step": 8880 }, { - "epoch": 1.65, - "grad_norm": 0.224609375, - "learning_rate": 3.648595073044729e-05, - "loss": 2.0974, + "epoch": 1.5, + "grad_norm": 0.2255859375, + "learning_rate": 3.506812175995967e-05, + "loss": 2.1266, "step": 8885 }, { - "epoch": 1.65, - "grad_norm": 0.2177734375, - "learning_rate": 3.629970668428129e-05, - "loss": 2.1083, + "epoch": 1.51, + "grad_norm": 0.22265625, + "learning_rate": 3.4955779419429856e-05, + "loss": 2.1351, "step": 8890 }, { - "epoch": 1.65, - "grad_norm": 0.212890625, - "learning_rate": 3.611389174608068e-05, - "loss": 2.1057, + "epoch": 1.51, + "grad_norm": 0.22265625, + "learning_rate": 3.484357918968163e-05, + "loss": 2.1242, "step": 8895 }, { - "epoch": 1.65, - "grad_norm": 0.2177734375, - "learning_rate": 3.592850640292249e-05, - "loss": 2.0982, + "epoch": 1.51, + "grad_norm": 0.224609375, + "learning_rate": 3.4731521315853675e-05, + "loss": 2.1029, "step": 8900 }, { - "epoch": 1.65, - "grad_norm": 0.2236328125, - "learning_rate": 3.574355114075773e-05, - "loss": 2.0863, + "epoch": 1.51, + "grad_norm": 0.2353515625, + "learning_rate": 3.461960604277381e-05, + "loss": 2.129, "step": 8905 }, { - "epoch": 1.65, - "grad_norm": 0.220703125, - "learning_rate": 3.555902644441016e-05, - "loss": 2.1439, + "epoch": 1.51, + "grad_norm": 0.2236328125, + "learning_rate": 3.45078336149583e-05, + "loss": 2.1262, "step": 8910 }, { - "epoch": 1.65, - "grad_norm": 0.224609375, - "learning_rate": 3.5374932797574734e-05, - "loss": 2.1372, + "epoch": 1.51, + "grad_norm": 0.228515625, + "learning_rate": 3.439620427661119e-05, + "loss": 2.14, "step": 8915 }, { - "epoch": 1.65, - "grad_norm": 0.220703125, - "learning_rate": 3.5191270682816604e-05, - "loss": 2.1334, + "epoch": 1.51, + "grad_norm": 0.224609375, + "learning_rate": 3.4284718271624015e-05, + "loss": 2.1609, "step": 8920 }, { - "epoch": 1.66, - "grad_norm": 0.2177734375, - "learning_rate": 3.5008040581569634e-05, - "loss": 2.1059, + "epoch": 1.51, + "grad_norm": 0.2314453125, + "learning_rate": 3.417337584357512e-05, + "loss": 2.0996, "step": 8925 }, { - "epoch": 1.66, - "grad_norm": 0.220703125, - "learning_rate": 3.4825242974135474e-05, - "loss": 2.1059, + "epoch": 1.51, + "grad_norm": 0.22265625, + "learning_rate": 3.4062177235729145e-05, + "loss": 2.0893, "step": 8930 }, { - "epoch": 1.66, - "grad_norm": 0.2197265625, - "learning_rate": 3.464287833968176e-05, - "loss": 2.1372, + "epoch": 1.51, + "grad_norm": 0.2255859375, + "learning_rate": 3.3951122691036564e-05, + "loss": 2.1178, "step": 8935 }, { - "epoch": 1.66, - "grad_norm": 0.2177734375, - "learning_rate": 3.4460947156241376e-05, - "loss": 2.1008, + "epoch": 1.51, + "grad_norm": 0.2216796875, + "learning_rate": 3.384021245213297e-05, + "loss": 2.1169, "step": 8940 }, { - "epoch": 1.66, - "grad_norm": 0.2216796875, - "learning_rate": 3.427944990071108e-05, - "loss": 2.0944, + "epoch": 1.51, + "grad_norm": 0.22265625, + "learning_rate": 3.372944676133878e-05, + "loss": 2.1666, "step": 8945 }, { - "epoch": 1.66, - "grad_norm": 0.2197265625, - "learning_rate": 3.409838704884984e-05, - "loss": 2.1204, + "epoch": 1.52, + "grad_norm": 0.2294921875, + "learning_rate": 3.3618825860658576e-05, + "loss": 2.1317, "step": 8950 }, { - "epoch": 1.66, - "grad_norm": 0.2197265625, - "learning_rate": 3.391775907527834e-05, - "loss": 2.1195, + "epoch": 1.52, + "grad_norm": 0.228515625, + "learning_rate": 3.35083499917806e-05, + "loss": 2.1147, "step": 8955 }, { - "epoch": 1.66, - "grad_norm": 0.2265625, - "learning_rate": 3.373756645347703e-05, - "loss": 2.1051, + "epoch": 1.52, + "grad_norm": 0.224609375, + "learning_rate": 3.3398019396076184e-05, + "loss": 2.1252, "step": 8960 }, { - "epoch": 1.66, - "grad_norm": 0.2138671875, - "learning_rate": 3.355780965578526e-05, - "loss": 2.0931, + "epoch": 1.52, + "grad_norm": 0.224609375, + "learning_rate": 3.328783431459936e-05, + "loss": 2.0857, "step": 8965 }, { - "epoch": 1.66, - "grad_norm": 0.21484375, - "learning_rate": 3.337848915339994e-05, - "loss": 2.1239, + "epoch": 1.52, + "grad_norm": 0.228515625, + "learning_rate": 3.3177794988086074e-05, + "loss": 2.1214, "step": 8970 }, { - "epoch": 1.67, - "grad_norm": 0.2236328125, - "learning_rate": 3.31996054163743e-05, - "loss": 2.0869, + "epoch": 1.52, + "grad_norm": 0.234375, + "learning_rate": 3.306790165695396e-05, + "loss": 2.0711, "step": 8975 }, { - "epoch": 1.67, - "grad_norm": 0.21484375, - "learning_rate": 3.302115891361683e-05, - "loss": 2.082, + "epoch": 1.52, + "grad_norm": 0.232421875, + "learning_rate": 3.295815456130162e-05, + "loss": 2.1091, "step": 8980 }, { - "epoch": 1.67, - "grad_norm": 0.2216796875, - "learning_rate": 3.2843150112889564e-05, - "loss": 2.085, + "epoch": 1.52, + "grad_norm": 0.232421875, + "learning_rate": 3.2848553940908186e-05, + "loss": 2.134, "step": 8985 }, { - "epoch": 1.67, - "grad_norm": 0.2177734375, - "learning_rate": 3.266557948080757e-05, - "loss": 2.0749, + "epoch": 1.52, + "grad_norm": 0.224609375, + "learning_rate": 3.2739100035232776e-05, + "loss": 2.103, "step": 8990 }, { - "epoch": 1.67, - "grad_norm": 0.21875, - "learning_rate": 3.2488447482837146e-05, - "loss": 2.0904, + "epoch": 1.52, + "grad_norm": 0.2353515625, + "learning_rate": 3.262979308341385e-05, + "loss": 2.1696, "step": 8995 }, { - "epoch": 1.67, - "grad_norm": 0.212890625, - "learning_rate": 3.231175458329465e-05, - "loss": 2.1296, + "epoch": 1.52, + "grad_norm": 0.2275390625, + "learning_rate": 3.2520633324268924e-05, + "loss": 2.1352, "step": 9000 }, { - "epoch": 1.67, - "grad_norm": 0.216796875, - "learning_rate": 3.213550124534579e-05, - "loss": 2.1353, + "epoch": 1.53, + "grad_norm": 0.224609375, + "learning_rate": 3.2411620996293876e-05, + "loss": 2.1056, "step": 9005 }, { - "epoch": 1.67, - "grad_norm": 0.220703125, - "learning_rate": 3.1959687931003765e-05, - "loss": 2.1098, + "epoch": 1.53, + "grad_norm": 0.2294921875, + "learning_rate": 3.230275633766248e-05, + "loss": 2.1891, "step": 9010 }, { - "epoch": 1.67, - "grad_norm": 0.216796875, - "learning_rate": 3.178431510112845e-05, - "loss": 2.095, + "epoch": 1.53, + "grad_norm": 0.2275390625, + "learning_rate": 3.219403958622587e-05, + "loss": 2.1408, "step": 9015 }, { - "epoch": 1.67, - "grad_norm": 0.21484375, - "learning_rate": 3.160938321542506e-05, - "loss": 2.1216, + "epoch": 1.53, + "grad_norm": 0.228515625, + "learning_rate": 3.208547097951206e-05, + "loss": 2.1556, "step": 9020 }, { - "epoch": 1.67, - "grad_norm": 0.2177734375, - "learning_rate": 3.143489273244291e-05, - "loss": 2.1403, + "epoch": 1.53, + "grad_norm": 0.2265625, + "learning_rate": 3.197705075472529e-05, + "loss": 2.1061, "step": 9025 }, { - "epoch": 1.68, - "grad_norm": 0.216796875, - "learning_rate": 3.126084410957446e-05, - "loss": 2.0983, + "epoch": 1.53, + "grad_norm": 0.2294921875, + "learning_rate": 3.186877914874572e-05, + "loss": 2.1357, "step": 9030 }, { - "epoch": 1.68, - "grad_norm": 0.2158203125, - "learning_rate": 3.1087237803053584e-05, - "loss": 2.0944, + "epoch": 1.53, + "grad_norm": 0.2255859375, + "learning_rate": 3.1760656398128764e-05, + "loss": 2.1443, "step": 9035 }, { - "epoch": 1.68, - "grad_norm": 0.2197265625, - "learning_rate": 3.091407426795503e-05, - "loss": 2.1514, + "epoch": 1.53, + "grad_norm": 0.2275390625, + "learning_rate": 3.165268273910461e-05, + "loss": 2.0885, "step": 9040 }, { - "epoch": 1.68, - "grad_norm": 0.22265625, - "learning_rate": 3.0741353958192755e-05, - "loss": 2.0635, + "epoch": 1.53, + "grad_norm": 0.2255859375, + "learning_rate": 3.154485840757775e-05, + "loss": 2.0986, "step": 9045 }, { - "epoch": 1.68, - "grad_norm": 0.22265625, - "learning_rate": 3.0569077326518904e-05, - "loss": 2.104, + "epoch": 1.53, + "grad_norm": 0.2294921875, + "learning_rate": 3.14371836391263e-05, + "loss": 2.1272, "step": 9050 }, { - "epoch": 1.68, - "grad_norm": 0.2138671875, - "learning_rate": 3.0397244824522618e-05, - "loss": 2.0914, + "epoch": 1.53, + "grad_norm": 0.228515625, + "learning_rate": 3.1329658669001724e-05, + "loss": 2.1175, "step": 9055 }, { - "epoch": 1.68, - "grad_norm": 0.22265625, - "learning_rate": 3.0225856902628847e-05, - "loss": 2.1016, + "epoch": 1.53, + "grad_norm": 0.2314453125, + "learning_rate": 3.1222283732128186e-05, + "loss": 2.1358, "step": 9060 }, { - "epoch": 1.68, - "grad_norm": 0.2177734375, - "learning_rate": 3.0054914010097145e-05, - "loss": 2.0724, + "epoch": 1.54, + "grad_norm": 0.228515625, + "learning_rate": 3.111505906310194e-05, + "loss": 2.1075, "step": 9065 }, { - "epoch": 1.68, - "grad_norm": 0.2158203125, - "learning_rate": 2.9884416595020505e-05, - "loss": 2.1027, + "epoch": 1.54, + "grad_norm": 0.228515625, + "learning_rate": 3.100798489619111e-05, + "loss": 2.1126, "step": 9070 }, { - "epoch": 1.68, - "grad_norm": 0.216796875, - "learning_rate": 2.971436510432424e-05, - "loss": 2.1249, + "epoch": 1.54, + "grad_norm": 0.2236328125, + "learning_rate": 3.0901061465334905e-05, + "loss": 2.1089, "step": 9075 }, { - "epoch": 1.68, - "grad_norm": 0.2177734375, - "learning_rate": 2.9544759983764736e-05, - "loss": 2.1062, + "epoch": 1.54, + "grad_norm": 0.2294921875, + "learning_rate": 3.079428900414314e-05, + "loss": 2.1171, "step": 9080 }, { - "epoch": 1.69, - "grad_norm": 0.2197265625, - "learning_rate": 2.9375601677928254e-05, - "loss": 2.0937, + "epoch": 1.54, + "grad_norm": 0.224609375, + "learning_rate": 3.0687667745895876e-05, + "loss": 2.1315, "step": 9085 }, { - "epoch": 1.69, - "grad_norm": 0.2197265625, - "learning_rate": 2.9206890630229876e-05, - "loss": 2.1203, + "epoch": 1.54, + "grad_norm": 0.2294921875, + "learning_rate": 3.058119792354283e-05, + "loss": 2.1353, "step": 9090 }, { - "epoch": 1.69, - "grad_norm": 0.2255859375, - "learning_rate": 2.9038627282912268e-05, - "loss": 2.1202, + "epoch": 1.54, + "grad_norm": 0.2314453125, + "learning_rate": 3.0474879769702703e-05, + "loss": 2.1024, "step": 9095 }, { - "epoch": 1.69, - "grad_norm": 0.212890625, - "learning_rate": 2.887081207704454e-05, - "loss": 2.0738, + "epoch": 1.54, + "grad_norm": 0.23046875, + "learning_rate": 3.0368713516663093e-05, + "loss": 2.1436, "step": 9100 }, { - "epoch": 1.69, - "grad_norm": 0.220703125, - "learning_rate": 2.870344545252106e-05, - "loss": 2.1016, + "epoch": 1.54, + "grad_norm": 0.23046875, + "learning_rate": 3.0262699396379467e-05, + "loss": 2.157, "step": 9105 }, { - "epoch": 1.69, - "grad_norm": 0.2236328125, - "learning_rate": 2.8536527848060446e-05, - "loss": 2.0854, + "epoch": 1.54, + "grad_norm": 0.2275390625, + "learning_rate": 3.0156837640475046e-05, + "loss": 2.1166, "step": 9110 }, { - "epoch": 1.69, - "grad_norm": 0.21484375, - "learning_rate": 2.8370059701204122e-05, - "loss": 2.1124, + "epoch": 1.54, + "grad_norm": 0.2275390625, + "learning_rate": 3.0051128480240143e-05, + "loss": 2.1443, "step": 9115 }, { - "epoch": 1.69, - "grad_norm": 0.220703125, - "learning_rate": 2.820404144831541e-05, - "loss": 2.0718, + "epoch": 1.54, + "grad_norm": 0.2236328125, + "learning_rate": 2.9945572146631605e-05, + "loss": 2.1404, "step": 9120 }, { - "epoch": 1.69, - "grad_norm": 0.22265625, - "learning_rate": 2.8038473524578447e-05, - "loss": 2.0919, + "epoch": 1.55, + "grad_norm": 0.2255859375, + "learning_rate": 2.9840168870272413e-05, + "loss": 2.0834, "step": 9125 }, { - "epoch": 1.69, - "grad_norm": 0.2177734375, - "learning_rate": 2.787335636399675e-05, - "loss": 2.1262, + "epoch": 1.55, + "grad_norm": 0.2265625, + "learning_rate": 2.973491888145127e-05, + "loss": 2.1451, "step": 9130 }, { - "epoch": 1.69, - "grad_norm": 0.21484375, - "learning_rate": 2.7708690399392366e-05, - "loss": 2.1434, + "epoch": 1.55, + "grad_norm": 0.2294921875, + "learning_rate": 2.9629822410121754e-05, + "loss": 2.1062, "step": 9135 }, { - "epoch": 1.7, - "grad_norm": 0.216796875, - "learning_rate": 2.7544476062404557e-05, - "loss": 2.1098, + "epoch": 1.55, + "grad_norm": 0.22265625, + "learning_rate": 2.9524879685902173e-05, + "loss": 2.112, "step": 9140 }, { - "epoch": 1.7, - "grad_norm": 0.2197265625, - "learning_rate": 2.738071378348872e-05, - "loss": 2.1332, - "step": 9145 + "epoch": 1.55, + "grad_norm": 0.2255859375, + "learning_rate": 2.9420090938074917e-05, + "loss": 2.1231, + "step": 9145 }, { - "epoch": 1.7, - "grad_norm": 0.216796875, - "learning_rate": 2.7217403991915368e-05, - "loss": 2.1317, + "epoch": 1.55, + "grad_norm": 0.2333984375, + "learning_rate": 2.9315456395585884e-05, + "loss": 2.1256, "step": 9150 }, { - "epoch": 1.7, - "grad_norm": 0.2138671875, - "learning_rate": 2.7054547115768735e-05, - "loss": 2.0972, + "epoch": 1.55, + "grad_norm": 0.2275390625, + "learning_rate": 2.9210976287044144e-05, + "loss": 2.1237, "step": 9155 }, { - "epoch": 1.7, - "grad_norm": 0.2265625, - "learning_rate": 2.6892143581946116e-05, - "loss": 2.0682, + "epoch": 1.55, + "grad_norm": 0.2353515625, + "learning_rate": 2.9106650840721305e-05, + "loss": 2.1511, "step": 9160 }, { - "epoch": 1.7, - "grad_norm": 0.220703125, - "learning_rate": 2.673019381615609e-05, - "loss": 2.1369, + "epoch": 1.55, + "grad_norm": 0.21875, + "learning_rate": 2.9002480284551094e-05, + "loss": 2.1458, "step": 9165 }, { - "epoch": 1.7, - "grad_norm": 0.2177734375, - "learning_rate": 2.6568698242918055e-05, - "loss": 2.068, + "epoch": 1.55, + "grad_norm": 0.2255859375, + "learning_rate": 2.8898464846128837e-05, + "loss": 2.1324, "step": 9170 }, { - "epoch": 1.7, - "grad_norm": 0.2138671875, - "learning_rate": 2.640765728556074e-05, - "loss": 2.134, + "epoch": 1.55, + "grad_norm": 0.232421875, + "learning_rate": 2.8794604752710873e-05, + "loss": 2.1192, "step": 9175 }, { - "epoch": 1.7, - "grad_norm": 0.2177734375, - "learning_rate": 2.6247071366221175e-05, - "loss": 2.1242, + "epoch": 1.55, + "grad_norm": 0.23046875, + "learning_rate": 2.8690900231214224e-05, + "loss": 2.1224, "step": 9180 }, { - "epoch": 1.7, - "grad_norm": 0.224609375, - "learning_rate": 2.6086940905843606e-05, - "loss": 2.1113, + "epoch": 1.56, + "grad_norm": 0.2275390625, + "learning_rate": 2.8587351508215997e-05, + "loss": 2.1159, "step": 9185 }, { - "epoch": 1.71, - "grad_norm": 0.22265625, - "learning_rate": 2.5927266324178345e-05, - "loss": 2.151, + "epoch": 1.56, + "grad_norm": 0.2314453125, + "learning_rate": 2.8483958809952883e-05, + "loss": 2.1377, "step": 9190 }, { - "epoch": 1.71, - "grad_norm": 0.2265625, - "learning_rate": 2.5768048039780858e-05, - "loss": 2.1334, + "epoch": 1.56, + "grad_norm": 0.232421875, + "learning_rate": 2.838072236232069e-05, + "loss": 2.1342, "step": 9195 }, { - "epoch": 1.71, - "grad_norm": 0.2216796875, - "learning_rate": 2.5609286470010262e-05, - "loss": 2.0881, + "epoch": 1.56, + "grad_norm": 0.220703125, + "learning_rate": 2.8277642390873904e-05, + "loss": 2.1474, "step": 9200 }, { - "epoch": 1.71, - "grad_norm": 0.2158203125, - "learning_rate": 2.545098203102876e-05, - "loss": 2.0929, + "epoch": 1.56, + "grad_norm": 0.2236328125, + "learning_rate": 2.8174719120825e-05, + "loss": 2.0472, "step": 9205 }, { - "epoch": 1.71, - "grad_norm": 0.21484375, - "learning_rate": 2.529313513780016e-05, - "loss": 2.0808, + "epoch": 1.56, + "grad_norm": 0.228515625, + "learning_rate": 2.8071952777044208e-05, + "loss": 2.1405, "step": 9210 }, { - "epoch": 1.71, - "grad_norm": 0.2216796875, - "learning_rate": 2.513574620408874e-05, - "loss": 2.1191, + "epoch": 1.56, + "grad_norm": 0.240234375, + "learning_rate": 2.796934358405887e-05, + "loss": 2.135, "step": 9215 }, { - "epoch": 1.71, - "grad_norm": 0.2158203125, - "learning_rate": 2.4978815642458654e-05, - "loss": 2.0935, + "epoch": 1.56, + "grad_norm": 0.23046875, + "learning_rate": 2.786689176605295e-05, + "loss": 2.178, "step": 9220 }, { - "epoch": 1.71, - "grad_norm": 0.212890625, - "learning_rate": 2.482234386427227e-05, - "loss": 2.106, + "epoch": 1.56, + "grad_norm": 0.2265625, + "learning_rate": 2.7764597546866656e-05, + "loss": 2.1374, "step": 9225 }, { - "epoch": 1.71, - "grad_norm": 0.212890625, - "learning_rate": 2.4666331279689425e-05, - "loss": 2.1415, + "epoch": 1.56, + "grad_norm": 0.2275390625, + "learning_rate": 2.7662461149995723e-05, + "loss": 2.1224, "step": 9230 }, { - "epoch": 1.71, - "grad_norm": 0.2177734375, - "learning_rate": 2.4510778297666282e-05, - "loss": 2.1146, + "epoch": 1.56, + "grad_norm": 0.2265625, + "learning_rate": 2.7560482798591193e-05, + "loss": 2.0993, "step": 9235 }, { - "epoch": 1.71, - "grad_norm": 0.2177734375, - "learning_rate": 2.435568532595427e-05, - "loss": 2.1097, + "epoch": 1.56, + "grad_norm": 0.23046875, + "learning_rate": 2.745866271545876e-05, + "loss": 2.1677, "step": 9240 }, { - "epoch": 1.72, - "grad_norm": 0.2158203125, - "learning_rate": 2.4201052771099008e-05, - "loss": 2.1454, + "epoch": 1.57, + "grad_norm": 0.2314453125, + "learning_rate": 2.7357001123058358e-05, + "loss": 2.1336, "step": 9245 }, { - "epoch": 1.72, - "grad_norm": 0.220703125, - "learning_rate": 2.404688103843902e-05, - "loss": 2.1059, + "epoch": 1.57, + "grad_norm": 0.2314453125, + "learning_rate": 2.7255498243503607e-05, + "loss": 2.1442, "step": 9250 }, { - "epoch": 1.72, - "grad_norm": 0.2197265625, - "learning_rate": 2.389317053210518e-05, - "loss": 2.116, + "epoch": 1.57, + "grad_norm": 0.2275390625, + "learning_rate": 2.7154154298561407e-05, + "loss": 2.0766, "step": 9255 }, { - "epoch": 1.72, - "grad_norm": 0.2197265625, - "learning_rate": 2.3739921655019147e-05, - "loss": 2.0831, + "epoch": 1.57, + "grad_norm": 0.2275390625, + "learning_rate": 2.705296950965135e-05, + "loss": 2.1467, "step": 9260 }, { - "epoch": 1.72, - "grad_norm": 0.2177734375, - "learning_rate": 2.358713480889254e-05, - "loss": 2.1126, + "epoch": 1.57, + "grad_norm": 0.2333984375, + "learning_rate": 2.695194409784534e-05, + "loss": 2.1041, "step": 9265 }, { - "epoch": 1.72, - "grad_norm": 0.2216796875, - "learning_rate": 2.3434810394225927e-05, - "loss": 2.08, + "epoch": 1.57, + "grad_norm": 0.21875, + "learning_rate": 2.685107828386708e-05, + "loss": 2.0962, "step": 9270 }, { - "epoch": 1.72, - "grad_norm": 0.21484375, - "learning_rate": 2.3282948810307637e-05, - "loss": 2.0732, + "epoch": 1.57, + "grad_norm": 0.2236328125, + "learning_rate": 2.6750372288091563e-05, + "loss": 2.0952, "step": 9275 }, { - "epoch": 1.72, - "grad_norm": 0.2265625, - "learning_rate": 2.313155045521278e-05, - "loss": 2.1507, + "epoch": 1.57, + "grad_norm": 0.234375, + "learning_rate": 2.6649826330544624e-05, + "loss": 2.1158, "step": 9280 }, { - "epoch": 1.72, - "grad_norm": 0.2138671875, - "learning_rate": 2.2980615725802213e-05, - "loss": 2.1155, + "epoch": 1.57, + "grad_norm": 0.244140625, + "learning_rate": 2.6549440630902377e-05, + "loss": 2.0895, "step": 9285 }, { - "epoch": 1.72, - "grad_norm": 0.216796875, - "learning_rate": 2.283014501772154e-05, - "loss": 2.152, + "epoch": 1.57, + "grad_norm": 0.2265625, + "learning_rate": 2.644921540849087e-05, + "loss": 2.1119, "step": 9290 }, { - "epoch": 1.72, - "grad_norm": 0.2138671875, - "learning_rate": 2.268013872539998e-05, - "loss": 2.1221, + "epoch": 1.57, + "grad_norm": 0.2236328125, + "learning_rate": 2.6349150882285535e-05, + "loss": 2.1148, "step": 9295 }, { - "epoch": 1.73, - "grad_norm": 0.21875, - "learning_rate": 2.2530597242049378e-05, - "loss": 2.1292, + "epoch": 1.58, + "grad_norm": 0.2294921875, + "learning_rate": 2.6249247270910594e-05, + "loss": 2.0864, "step": 9300 }, { - "epoch": 1.73, - "grad_norm": 0.224609375, - "learning_rate": 2.238152095966315e-05, - "loss": 2.0971, + "epoch": 1.58, + "grad_norm": 0.2236328125, + "learning_rate": 2.614950479263889e-05, + "loss": 2.1098, "step": 9305 }, { - "epoch": 1.73, - "grad_norm": 0.2265625, - "learning_rate": 2.223291026901533e-05, - "loss": 2.1177, + "epoch": 1.58, + "grad_norm": 0.228515625, + "learning_rate": 2.6049923665391108e-05, + "loss": 2.1359, "step": 9310 }, { - "epoch": 1.73, - "grad_norm": 0.220703125, - "learning_rate": 2.208476555965946e-05, - "loss": 2.088, + "epoch": 1.58, + "grad_norm": 0.2294921875, + "learning_rate": 2.5950504106735353e-05, + "loss": 2.1003, "step": 9315 }, { - "epoch": 1.73, - "grad_norm": 0.2216796875, - "learning_rate": 2.1937087219927576e-05, - "loss": 2.1734, + "epoch": 1.58, + "grad_norm": 0.228515625, + "learning_rate": 2.5851246333886815e-05, + "loss": 2.1277, "step": 9320 }, { - "epoch": 1.73, - "grad_norm": 0.2158203125, - "learning_rate": 2.178987563692938e-05, - "loss": 2.1248, + "epoch": 1.58, + "grad_norm": 0.220703125, + "learning_rate": 2.5752150563707234e-05, + "loss": 2.0998, "step": 9325 }, { - "epoch": 1.73, - "grad_norm": 0.22265625, - "learning_rate": 2.1643131196550835e-05, - "loss": 2.1097, + "epoch": 1.58, + "grad_norm": 0.23046875, + "learning_rate": 2.5653217012704244e-05, + "loss": 2.1263, "step": 9330 }, { - "epoch": 1.73, - "grad_norm": 0.236328125, - "learning_rate": 2.1496854283453472e-05, - "loss": 2.0857, + "epoch": 1.58, + "grad_norm": 0.234375, + "learning_rate": 2.5554445897031286e-05, + "loss": 2.0996, "step": 9335 }, { - "epoch": 1.73, - "grad_norm": 0.212890625, - "learning_rate": 2.1351045281073412e-05, - "loss": 2.0775, + "epoch": 1.58, + "grad_norm": 0.23046875, + "learning_rate": 2.5455837432486707e-05, + "loss": 2.0911, "step": 9340 }, { - "epoch": 1.73, - "grad_norm": 0.22265625, - "learning_rate": 2.1205704571620076e-05, - "loss": 2.1096, + "epoch": 1.58, + "grad_norm": 0.228515625, + "learning_rate": 2.5357391834513588e-05, + "loss": 2.1413, "step": 9345 }, { - "epoch": 1.73, - "grad_norm": 0.2119140625, - "learning_rate": 2.1060832536075403e-05, - "loss": 2.1098, + "epoch": 1.58, + "grad_norm": 0.2275390625, + "learning_rate": 2.5259109318199194e-05, + "loss": 2.1594, "step": 9350 }, { - "epoch": 1.74, - "grad_norm": 0.216796875, - "learning_rate": 2.0916429554192818e-05, - "loss": 2.0878, + "epoch": 1.58, + "grad_norm": 0.2265625, + "learning_rate": 2.5160990098274373e-05, + "loss": 2.0828, "step": 9355 }, { - "epoch": 1.74, - "grad_norm": 0.216796875, - "learning_rate": 2.07724960044962e-05, - "loss": 2.138, + "epoch": 1.59, + "grad_norm": 0.2275390625, + "learning_rate": 2.5063034389113282e-05, + "loss": 2.1489, "step": 9360 }, { - "epoch": 1.74, - "grad_norm": 0.220703125, - "learning_rate": 2.0629032264278904e-05, - "loss": 2.1298, + "epoch": 1.59, + "grad_norm": 0.236328125, + "learning_rate": 2.4965242404732892e-05, + "loss": 2.1443, "step": 9365 }, { - "epoch": 1.74, - "grad_norm": 0.21484375, - "learning_rate": 2.0486038709602706e-05, - "loss": 2.0918, + "epoch": 1.59, + "grad_norm": 0.228515625, + "learning_rate": 2.48676143587923e-05, + "loss": 2.1547, "step": 9370 }, { - "epoch": 1.74, - "grad_norm": 0.220703125, - "learning_rate": 2.034351571529709e-05, - "loss": 2.1409, + "epoch": 1.59, + "grad_norm": 0.2255859375, + "learning_rate": 2.4770150464592566e-05, + "loss": 2.0968, "step": 9375 }, { - "epoch": 1.74, - "grad_norm": 0.2119140625, - "learning_rate": 2.0201463654957766e-05, - "loss": 2.1113, + "epoch": 1.59, + "grad_norm": 0.2255859375, + "learning_rate": 2.4672850935076065e-05, + "loss": 2.0872, "step": 9380 }, { - "epoch": 1.74, - "grad_norm": 0.22265625, - "learning_rate": 2.0059882900946227e-05, - "loss": 2.1025, + "epoch": 1.59, + "grad_norm": 0.228515625, + "learning_rate": 2.4575715982825997e-05, + "loss": 2.1518, "step": 9385 }, { - "epoch": 1.74, - "grad_norm": 0.21875, - "learning_rate": 1.9918773824388405e-05, - "loss": 2.0689, + "epoch": 1.59, + "grad_norm": 0.2216796875, + "learning_rate": 2.4478745820066084e-05, + "loss": 2.1032, "step": 9390 }, { - "epoch": 1.74, - "grad_norm": 0.224609375, - "learning_rate": 1.977813679517386e-05, - "loss": 2.1106, + "epoch": 1.59, + "grad_norm": 0.23828125, + "learning_rate": 2.4381940658659963e-05, + "loss": 2.102, "step": 9395 }, { - "epoch": 1.74, - "grad_norm": 0.21875, - "learning_rate": 1.96379721819548e-05, - "loss": 2.1136, + "epoch": 1.59, + "grad_norm": 0.2333984375, + "learning_rate": 2.4285300710110782e-05, + "loss": 2.1821, "step": 9400 }, { - "epoch": 1.74, - "grad_norm": 0.2216796875, - "learning_rate": 1.9498280352145004e-05, - "loss": 2.143, + "epoch": 1.59, + "grad_norm": 0.224609375, + "learning_rate": 2.4188826185560743e-05, + "loss": 2.0965, "step": 9405 }, { - "epoch": 1.75, - "grad_norm": 0.2275390625, - "learning_rate": 1.9359061671919032e-05, - "loss": 2.1268, + "epoch": 1.59, + "grad_norm": 0.2255859375, + "learning_rate": 2.409251729579055e-05, + "loss": 2.1287, "step": 9410 }, { - "epoch": 1.75, - "grad_norm": 0.21875, - "learning_rate": 1.9220316506211077e-05, - "loss": 2.0934, + "epoch": 1.59, + "grad_norm": 0.2314453125, + "learning_rate": 2.399637425121911e-05, + "loss": 2.1487, "step": 9415 }, { - "epoch": 1.75, - "grad_norm": 0.21484375, - "learning_rate": 1.9082045218714262e-05, - "loss": 2.1128, + "epoch": 1.6, + "grad_norm": 0.2177734375, + "learning_rate": 2.390039726190295e-05, + "loss": 2.1267, "step": 9420 }, { - "epoch": 1.75, - "grad_norm": 0.2236328125, - "learning_rate": 1.8944248171879453e-05, - "loss": 2.1024, + "epoch": 1.6, + "grad_norm": 0.2294921875, + "learning_rate": 2.380458653753579e-05, + "loss": 2.1301, "step": 9425 }, { - "epoch": 1.75, - "grad_norm": 0.21875, - "learning_rate": 1.8806925726914225e-05, - "loss": 2.122, + "epoch": 1.6, + "grad_norm": 0.2314453125, + "learning_rate": 2.370894228744809e-05, + "loss": 2.1212, "step": 9430 }, { - "epoch": 1.75, - "grad_norm": 0.2138671875, - "learning_rate": 1.8670078243782353e-05, - "loss": 2.0871, + "epoch": 1.6, + "grad_norm": 0.216796875, + "learning_rate": 2.3613464720606637e-05, + "loss": 2.0878, "step": 9435 }, { - "epoch": 1.75, - "grad_norm": 0.21875, - "learning_rate": 1.853370608120244e-05, - "loss": 2.0643, + "epoch": 1.6, + "grad_norm": 0.2265625, + "learning_rate": 2.351815404561394e-05, + "loss": 2.1501, "step": 9440 }, { - "epoch": 1.75, - "grad_norm": 0.2197265625, - "learning_rate": 1.839780959664714e-05, - "loss": 2.1121, + "epoch": 1.6, + "grad_norm": 0.224609375, + "learning_rate": 2.3423010470707972e-05, + "loss": 2.1325, "step": 9445 }, { - "epoch": 1.75, - "grad_norm": 0.21875, - "learning_rate": 1.8262389146342217e-05, - "loss": 2.0914, + "epoch": 1.6, + "grad_norm": 0.2294921875, + "learning_rate": 2.3328034203761582e-05, + "loss": 2.1175, "step": 9450 }, { - "epoch": 1.75, - "grad_norm": 0.220703125, - "learning_rate": 1.8127445085265716e-05, - "loss": 2.1003, + "epoch": 1.6, + "grad_norm": 0.228515625, + "learning_rate": 2.323322545228208e-05, + "loss": 2.1565, "step": 9455 }, { - "epoch": 1.76, - "grad_norm": 0.2197265625, - "learning_rate": 1.79929777671467e-05, - "loss": 2.0827, + "epoch": 1.6, + "grad_norm": 0.23046875, + "learning_rate": 2.3138584423410823e-05, + "loss": 2.126, "step": 9460 }, { - "epoch": 1.76, - "grad_norm": 0.216796875, - "learning_rate": 1.785898754446469e-05, - "loss": 2.1114, + "epoch": 1.6, + "grad_norm": 0.2275390625, + "learning_rate": 2.3044111323922623e-05, + "loss": 2.1131, "step": 9465 }, { - "epoch": 1.76, - "grad_norm": 0.2158203125, - "learning_rate": 1.7725474768448636e-05, - "loss": 2.0928, + "epoch": 1.6, + "grad_norm": 0.23046875, + "learning_rate": 2.2949806360225502e-05, + "loss": 2.1226, "step": 9470 }, { - "epoch": 1.76, - "grad_norm": 0.2236328125, - "learning_rate": 1.759243978907583e-05, - "loss": 2.0907, + "epoch": 1.6, + "grad_norm": 0.2333984375, + "learning_rate": 2.2855669738360064e-05, + "loss": 2.1327, "step": 9475 }, { - "epoch": 1.76, - "grad_norm": 0.216796875, - "learning_rate": 1.7459882955071237e-05, - "loss": 2.1334, + "epoch": 1.61, + "grad_norm": 0.224609375, + "learning_rate": 2.2761701663999158e-05, + "loss": 2.1363, "step": 9480 }, { - "epoch": 1.76, - "grad_norm": 0.22265625, - "learning_rate": 1.732780461390635e-05, - "loss": 2.096, + "epoch": 1.61, + "grad_norm": 0.2275390625, + "learning_rate": 2.2667902342447356e-05, + "loss": 2.0965, "step": 9485 }, { - "epoch": 1.76, - "grad_norm": 0.21484375, - "learning_rate": 1.7196205111798446e-05, - "loss": 2.1169, + "epoch": 1.61, + "grad_norm": 0.2373046875, + "learning_rate": 2.2574271978640572e-05, + "loss": 2.1373, "step": 9490 }, { - "epoch": 1.76, - "grad_norm": 0.216796875, - "learning_rate": 1.7065084793709607e-05, - "loss": 2.1197, + "epoch": 1.61, + "grad_norm": 0.2236328125, + "learning_rate": 2.248081077714549e-05, + "loss": 2.1131, "step": 9495 }, { - "epoch": 1.76, - "grad_norm": 0.2158203125, - "learning_rate": 1.693444400334583e-05, - "loss": 2.0802, + "epoch": 1.61, + "grad_norm": 0.224609375, + "learning_rate": 2.2387518942159292e-05, + "loss": 2.1056, "step": 9500 }, { - "epoch": 1.76, - "grad_norm": 0.23046875, - "learning_rate": 1.680428308315618e-05, - "loss": 2.079, + "epoch": 1.61, + "grad_norm": 0.2294921875, + "learning_rate": 2.2294396677509078e-05, + "loss": 2.1192, "step": 9505 }, { - "epoch": 1.76, - "grad_norm": 0.216796875, - "learning_rate": 1.6674602374331693e-05, - "loss": 2.0876, + "epoch": 1.61, + "grad_norm": 0.2294921875, + "learning_rate": 2.2201444186651487e-05, + "loss": 2.1341, "step": 9510 }, { - "epoch": 1.77, - "grad_norm": 0.2265625, - "learning_rate": 1.6545402216804783e-05, - "loss": 2.1172, + "epoch": 1.61, + "grad_norm": 0.224609375, + "learning_rate": 2.210866167267225e-05, + "loss": 2.0922, "step": 9515 }, { - "epoch": 1.77, - "grad_norm": 0.2177734375, - "learning_rate": 1.6416682949248142e-05, - "loss": 2.0708, + "epoch": 1.61, + "grad_norm": 0.22265625, + "learning_rate": 2.2016049338285628e-05, + "loss": 2.1433, "step": 9520 }, { - "epoch": 1.77, - "grad_norm": 0.2158203125, - "learning_rate": 1.628844490907384e-05, - "loss": 2.1358, + "epoch": 1.61, + "grad_norm": 0.2275390625, + "learning_rate": 2.1923607385834167e-05, + "loss": 2.1042, "step": 9525 }, { - "epoch": 1.77, - "grad_norm": 0.2216796875, - "learning_rate": 1.616068843243257e-05, - "loss": 2.1241, + "epoch": 1.61, + "grad_norm": 0.2265625, + "learning_rate": 2.1831336017288174e-05, + "loss": 2.0894, "step": 9530 }, { - "epoch": 1.77, - "grad_norm": 0.2158203125, - "learning_rate": 1.6033413854212643e-05, - "loss": 2.1311, + "epoch": 1.61, + "grad_norm": 0.228515625, + "learning_rate": 2.1739235434245097e-05, + "loss": 2.1704, "step": 9535 }, { - "epoch": 1.77, - "grad_norm": 0.2119140625, - "learning_rate": 1.5906621508039342e-05, - "loss": 2.0832, + "epoch": 1.62, + "grad_norm": 0.234375, + "learning_rate": 2.1647305837929466e-05, + "loss": 2.0889, "step": 9540 }, { - "epoch": 1.77, - "grad_norm": 0.2197265625, - "learning_rate": 1.5780311726273634e-05, - "loss": 2.091, + "epoch": 1.62, + "grad_norm": 0.2333984375, + "learning_rate": 2.1555547429192112e-05, + "loss": 2.0969, "step": 9545 }, { - "epoch": 1.77, - "grad_norm": 0.21875, - "learning_rate": 1.5654484840011617e-05, - "loss": 2.0968, + "epoch": 1.62, + "grad_norm": 0.2265625, + "learning_rate": 2.1463960408509832e-05, + "loss": 2.136, "step": 9550 }, { - "epoch": 1.77, - "grad_norm": 0.220703125, - "learning_rate": 1.552914117908375e-05, - "loss": 2.1037, + "epoch": 1.62, + "grad_norm": 0.2236328125, + "learning_rate": 2.137254497598501e-05, + "loss": 2.1246, "step": 9555 }, { - "epoch": 1.77, - "grad_norm": 0.216796875, - "learning_rate": 1.5404281072053517e-05, - "loss": 2.1281, + "epoch": 1.62, + "grad_norm": 0.228515625, + "learning_rate": 2.128130133134516e-05, + "loss": 2.1073, "step": 9560 }, { - "epoch": 1.77, - "grad_norm": 0.216796875, - "learning_rate": 1.5279904846217085e-05, - "loss": 2.1145, + "epoch": 1.62, + "grad_norm": 0.224609375, + "learning_rate": 2.1190229673942363e-05, + "loss": 2.142, "step": 9565 }, { - "epoch": 1.78, - "grad_norm": 0.220703125, - "learning_rate": 1.515601282760215e-05, - "loss": 2.1418, + "epoch": 1.62, + "grad_norm": 0.236328125, + "learning_rate": 2.109933020275312e-05, + "loss": 2.1124, "step": 9570 }, { - "epoch": 1.78, - "grad_norm": 0.2177734375, - "learning_rate": 1.5032605340967132e-05, - "loss": 2.0932, + "epoch": 1.62, + "grad_norm": 0.2236328125, + "learning_rate": 2.1008603116377545e-05, + "loss": 2.1026, "step": 9575 }, { - "epoch": 1.78, - "grad_norm": 0.220703125, - "learning_rate": 1.4909682709800355e-05, - "loss": 2.1146, + "epoch": 1.62, + "grad_norm": 0.2265625, + "learning_rate": 2.091804861303922e-05, + "loss": 2.1151, "step": 9580 }, { - "epoch": 1.78, - "grad_norm": 0.2236328125, - "learning_rate": 1.4787245256319227e-05, - "loss": 2.104, + "epoch": 1.62, + "grad_norm": 0.2294921875, + "learning_rate": 2.0827666890584685e-05, + "loss": 2.0735, "step": 9585 }, { - "epoch": 1.78, - "grad_norm": 0.2177734375, - "learning_rate": 1.4665293301469374e-05, - "loss": 2.0964, + "epoch": 1.62, + "grad_norm": 0.2265625, + "learning_rate": 2.073745814648287e-05, + "loss": 2.119, "step": 9590 }, { - "epoch": 1.78, - "grad_norm": 0.2216796875, - "learning_rate": 1.4543827164923619e-05, - "loss": 2.1167, + "epoch": 1.63, + "grad_norm": 0.228515625, + "learning_rate": 2.0647422577824882e-05, + "loss": 2.1127, "step": 9595 }, { - "epoch": 1.78, - "grad_norm": 0.2197265625, - "learning_rate": 1.4422847165081555e-05, - "loss": 2.0994, + "epoch": 1.63, + "grad_norm": 0.224609375, + "learning_rate": 2.0557560381323437e-05, + "loss": 2.1275, "step": 9600 }, { - "epoch": 1.78, - "grad_norm": 0.220703125, - "learning_rate": 1.4302353619068309e-05, - "loss": 2.0851, + "epoch": 1.63, + "grad_norm": 0.236328125, + "learning_rate": 2.046787175331244e-05, + "loss": 2.1583, "step": 9605 }, { - "epoch": 1.78, - "grad_norm": 0.216796875, - "learning_rate": 1.4182346842733873e-05, - "loss": 2.1048, + "epoch": 1.63, + "grad_norm": 0.2236328125, + "learning_rate": 2.037835688974662e-05, + "loss": 2.1137, "step": 9610 }, { - "epoch": 1.78, - "grad_norm": 0.2177734375, - "learning_rate": 1.4062827150652302e-05, - "loss": 2.0988, - "step": 9615 - }, + "epoch": 1.63, + "grad_norm": 0.2265625, + "learning_rate": 2.0289015986201043e-05, + "loss": 2.086, + "step": 9615 + }, + { + "epoch": 1.63, + "grad_norm": 0.23046875, + "learning_rate": 2.019984923787065e-05, + "loss": 2.1226, + "step": 9620 + }, + { + "epoch": 1.63, + "grad_norm": 0.228515625, + "learning_rate": 2.0110856839569947e-05, + "loss": 2.1492, + "step": 9625 + }, + { + "epoch": 1.63, + "grad_norm": 0.232421875, + "learning_rate": 2.0022038985732495e-05, + "loss": 2.1303, + "step": 9630 + }, + { + "epoch": 1.63, + "grad_norm": 0.228515625, + "learning_rate": 1.99333958704105e-05, + "loss": 2.1495, + "step": 9635 + }, + { + "epoch": 1.63, + "grad_norm": 0.2333984375, + "learning_rate": 1.984492768727443e-05, + "loss": 2.1262, + "step": 9640 + }, + { + "epoch": 1.63, + "grad_norm": 0.2294921875, + "learning_rate": 1.9756634629612447e-05, + "loss": 2.1363, + "step": 9645 + }, + { + "epoch": 1.63, + "grad_norm": 0.234375, + "learning_rate": 1.9668516890330212e-05, + "loss": 2.1487, + "step": 9650 + }, + { + "epoch": 1.64, + "grad_norm": 0.224609375, + "learning_rate": 1.95805746619503e-05, + "loss": 2.1058, + "step": 9655 + }, + { + "epoch": 1.64, + "grad_norm": 0.2314453125, + "learning_rate": 1.9492808136611818e-05, + "loss": 2.1014, + "step": 9660 + }, + { + "epoch": 1.64, + "grad_norm": 0.2294921875, + "learning_rate": 1.9405217506069994e-05, + "loss": 2.1296, + "step": 9665 + }, + { + "epoch": 1.64, + "grad_norm": 0.2265625, + "learning_rate": 1.9317802961695786e-05, + "loss": 2.1045, + "step": 9670 + }, + { + "epoch": 1.64, + "grad_norm": 0.224609375, + "learning_rate": 1.923056469447535e-05, + "loss": 2.1638, + "step": 9675 + }, + { + "epoch": 1.64, + "grad_norm": 0.2216796875, + "learning_rate": 1.914350289500979e-05, + "loss": 2.1128, + "step": 9680 + }, + { + "epoch": 1.64, + "grad_norm": 0.2255859375, + "learning_rate": 1.9056617753514628e-05, + "loss": 2.1096, + "step": 9685 + }, + { + "epoch": 1.64, + "grad_norm": 0.232421875, + "learning_rate": 1.8969909459819412e-05, + "loss": 2.1324, + "step": 9690 + }, + { + "epoch": 1.64, + "grad_norm": 0.228515625, + "learning_rate": 1.888337820336735e-05, + "loss": 2.1221, + "step": 9695 + }, + { + "epoch": 1.64, + "grad_norm": 0.2236328125, + "learning_rate": 1.879702417321475e-05, + "loss": 2.112, + "step": 9700 + }, + { + "epoch": 1.64, + "grad_norm": 0.23046875, + "learning_rate": 1.871084755803082e-05, + "loss": 2.137, + "step": 9705 + }, + { + "epoch": 1.64, + "grad_norm": 0.2255859375, + "learning_rate": 1.8624848546097086e-05, + "loss": 2.1575, + "step": 9710 + }, + { + "epoch": 1.65, + "grad_norm": 0.2294921875, + "learning_rate": 1.8539027325307056e-05, + "loss": 2.1784, + "step": 9715 + }, + { + "epoch": 1.65, + "grad_norm": 0.23046875, + "learning_rate": 1.8453384083165803e-05, + "loss": 2.0949, + "step": 9720 + }, + { + "epoch": 1.65, + "grad_norm": 0.224609375, + "learning_rate": 1.8367919006789558e-05, + "loss": 2.1114, + "step": 9725 + }, + { + "epoch": 1.65, + "grad_norm": 0.2236328125, + "learning_rate": 1.828263228290522e-05, + "loss": 2.1596, + "step": 9730 + }, + { + "epoch": 1.65, + "grad_norm": 0.2265625, + "learning_rate": 1.8197524097850095e-05, + "loss": 2.1079, + "step": 9735 + }, + { + "epoch": 1.65, + "grad_norm": 0.2275390625, + "learning_rate": 1.8112594637571366e-05, + "loss": 2.0991, + "step": 9740 + }, + { + "epoch": 1.65, + "grad_norm": 0.2353515625, + "learning_rate": 1.802784408762578e-05, + "loss": 2.1254, + "step": 9745 + }, + { + "epoch": 1.65, + "grad_norm": 0.224609375, + "learning_rate": 1.7943272633179166e-05, + "loss": 2.0966, + "step": 9750 + }, + { + "epoch": 1.65, + "grad_norm": 0.234375, + "learning_rate": 1.7858880459006e-05, + "loss": 2.1437, + "step": 9755 + }, + { + "epoch": 1.65, + "grad_norm": 0.2275390625, + "learning_rate": 1.777466774948916e-05, + "loss": 2.1718, + "step": 9760 + }, + { + "epoch": 1.65, + "grad_norm": 0.22265625, + "learning_rate": 1.769063468861941e-05, + "loss": 2.1158, + "step": 9765 + }, + { + "epoch": 1.65, + "grad_norm": 0.21875, + "learning_rate": 1.7606781459994913e-05, + "loss": 2.0889, + "step": 9770 + }, + { + "epoch": 1.66, + "grad_norm": 0.2421875, + "learning_rate": 1.7523108246821017e-05, + "loss": 2.1166, + "step": 9775 + }, + { + "epoch": 1.66, + "grad_norm": 0.2265625, + "learning_rate": 1.743961523190981e-05, + "loss": 2.0749, + "step": 9780 + }, + { + "epoch": 1.66, + "grad_norm": 0.23046875, + "learning_rate": 1.7356302597679554e-05, + "loss": 2.1447, + "step": 9785 + }, + { + "epoch": 1.66, + "grad_norm": 0.2255859375, + "learning_rate": 1.727317052615447e-05, + "loss": 2.121, + "step": 9790 + }, + { + "epoch": 1.66, + "grad_norm": 0.224609375, + "learning_rate": 1.719021919896433e-05, + "loss": 2.0826, + "step": 9795 + }, + { + "epoch": 1.66, + "grad_norm": 0.228515625, + "learning_rate": 1.7107448797343893e-05, + "loss": 2.102, + "step": 9800 + }, + { + "epoch": 1.66, + "grad_norm": 0.2314453125, + "learning_rate": 1.7024859502132696e-05, + "loss": 2.1022, + "step": 9805 + }, + { + "epoch": 1.66, + "grad_norm": 0.22265625, + "learning_rate": 1.6942451493774657e-05, + "loss": 2.0963, + "step": 9810 + }, + { + "epoch": 1.66, + "grad_norm": 0.2294921875, + "learning_rate": 1.6860224952317473e-05, + "loss": 2.1186, + "step": 9815 + }, + { + "epoch": 1.66, + "grad_norm": 0.236328125, + "learning_rate": 1.6778180057412486e-05, + "loss": 2.1112, + "step": 9820 + }, + { + "epoch": 1.66, + "grad_norm": 0.220703125, + "learning_rate": 1.6696316988314043e-05, + "loss": 2.1388, + "step": 9825 + }, + { + "epoch": 1.66, + "grad_norm": 0.232421875, + "learning_rate": 1.6614635923879362e-05, + "loss": 2.1583, + "step": 9830 + }, + { + "epoch": 1.67, + "grad_norm": 0.228515625, + "learning_rate": 1.6533137042567936e-05, + "loss": 2.1003, + "step": 9835 + }, + { + "epoch": 1.67, + "grad_norm": 0.220703125, + "learning_rate": 1.645182052244124e-05, + "loss": 2.111, + "step": 9840 + }, + { + "epoch": 1.67, + "grad_norm": 0.2333984375, + "learning_rate": 1.6370686541162327e-05, + "loss": 2.122, + "step": 9845 + }, + { + "epoch": 1.67, + "grad_norm": 0.2265625, + "learning_rate": 1.6289735275995433e-05, + "loss": 2.0957, + "step": 9850 + }, + { + "epoch": 1.67, + "grad_norm": 0.2294921875, + "learning_rate": 1.6208966903805555e-05, + "loss": 2.0987, + "step": 9855 + }, + { + "epoch": 1.67, + "grad_norm": 0.228515625, + "learning_rate": 1.6128381601058128e-05, + "loss": 2.0697, + "step": 9860 + }, + { + "epoch": 1.67, + "grad_norm": 0.2314453125, + "learning_rate": 1.6047979543818624e-05, + "loss": 2.1318, + "step": 9865 + }, + { + "epoch": 1.67, + "grad_norm": 0.2236328125, + "learning_rate": 1.5967760907752115e-05, + "loss": 2.1134, + "step": 9870 + }, + { + "epoch": 1.67, + "grad_norm": 0.2333984375, + "learning_rate": 1.5887725868123006e-05, + "loss": 2.1264, + "step": 9875 + }, + { + "epoch": 1.67, + "grad_norm": 0.232421875, + "learning_rate": 1.580787459979446e-05, + "loss": 2.0945, + "step": 9880 + }, + { + "epoch": 1.67, + "grad_norm": 0.2255859375, + "learning_rate": 1.57282072772282e-05, + "loss": 2.0919, + "step": 9885 + }, + { + "epoch": 1.67, + "grad_norm": 0.2236328125, + "learning_rate": 1.5648724074484056e-05, + "loss": 2.1147, + "step": 9890 + }, + { + "epoch": 1.68, + "grad_norm": 0.2294921875, + "learning_rate": 1.5569425165219586e-05, + "loss": 2.107, + "step": 9895 + }, + { + "epoch": 1.68, + "grad_norm": 0.232421875, + "learning_rate": 1.5490310722689693e-05, + "loss": 2.0979, + "step": 9900 + }, + { + "epoch": 1.68, + "grad_norm": 0.2353515625, + "learning_rate": 1.5411380919746255e-05, + "loss": 2.0866, + "step": 9905 + }, + { + "epoch": 1.68, + "grad_norm": 0.228515625, + "learning_rate": 1.5332635928837714e-05, + "loss": 2.1099, + "step": 9910 + }, + { + "epoch": 1.68, + "grad_norm": 0.2314453125, + "learning_rate": 1.5254075922008748e-05, + "loss": 2.1573, + "step": 9915 + }, + { + "epoch": 1.68, + "grad_norm": 0.228515625, + "learning_rate": 1.5175701070899896e-05, + "loss": 2.134, + "step": 9920 + }, + { + "epoch": 1.68, + "grad_norm": 0.23046875, + "learning_rate": 1.5097511546747146e-05, + "loss": 2.1199, + "step": 9925 + }, + { + "epoch": 1.68, + "grad_norm": 0.224609375, + "learning_rate": 1.501950752038158e-05, + "loss": 2.1321, + "step": 9930 + }, + { + "epoch": 1.68, + "grad_norm": 0.2294921875, + "learning_rate": 1.4941689162228977e-05, + "loss": 2.1165, + "step": 9935 + }, + { + "epoch": 1.68, + "grad_norm": 0.228515625, + "learning_rate": 1.4864056642309499e-05, + "loss": 2.1185, + "step": 9940 + }, + { + "epoch": 1.68, + "grad_norm": 0.224609375, + "learning_rate": 1.4786610130237244e-05, + "loss": 2.1314, + "step": 9945 + }, + { + "epoch": 1.69, + "grad_norm": 0.228515625, + "learning_rate": 1.4709349795219939e-05, + "loss": 2.0686, + "step": 9950 + }, + { + "epoch": 1.69, + "grad_norm": 0.2353515625, + "learning_rate": 1.4632275806058559e-05, + "loss": 2.1141, + "step": 9955 + }, + { + "epoch": 1.69, + "grad_norm": 0.2333984375, + "learning_rate": 1.4555388331146924e-05, + "loss": 2.1641, + "step": 9960 + }, + { + "epoch": 1.69, + "grad_norm": 0.2294921875, + "learning_rate": 1.4478687538471313e-05, + "loss": 2.0876, + "step": 9965 + }, + { + "epoch": 1.69, + "grad_norm": 0.224609375, + "learning_rate": 1.4402173595610213e-05, + "loss": 2.132, + "step": 9970 + }, + { + "epoch": 1.69, + "grad_norm": 0.2275390625, + "learning_rate": 1.4325846669733844e-05, + "loss": 2.0967, + "step": 9975 + }, + { + "epoch": 1.69, + "grad_norm": 0.2333984375, + "learning_rate": 1.4249706927603756e-05, + "loss": 2.1232, + "step": 9980 + }, + { + "epoch": 1.69, + "grad_norm": 0.2265625, + "learning_rate": 1.4173754535572658e-05, + "loss": 2.0908, + "step": 9985 + }, + { + "epoch": 1.69, + "grad_norm": 0.2236328125, + "learning_rate": 1.4097989659583876e-05, + "loss": 2.1086, + "step": 9990 + }, + { + "epoch": 1.69, + "grad_norm": 0.22265625, + "learning_rate": 1.4022412465170987e-05, + "loss": 2.117, + "step": 9995 + }, + { + "epoch": 1.69, + "grad_norm": 0.23046875, + "learning_rate": 1.3947023117457613e-05, + "loss": 2.1503, + "step": 10000 + }, + { + "epoch": 1.69, + "grad_norm": 0.236328125, + "learning_rate": 1.3871821781156858e-05, + "loss": 2.1238, + "step": 10005 + }, + { + "epoch": 1.7, + "grad_norm": 0.2294921875, + "learning_rate": 1.3796808620571121e-05, + "loss": 2.124, + "step": 10010 + }, + { + "epoch": 1.7, + "grad_norm": 0.228515625, + "learning_rate": 1.3721983799591732e-05, + "loss": 2.1265, + "step": 10015 + }, + { + "epoch": 1.7, + "grad_norm": 0.240234375, + "learning_rate": 1.3647347481698358e-05, + "loss": 2.1128, + "step": 10020 + }, + { + "epoch": 1.7, + "grad_norm": 0.22265625, + "learning_rate": 1.3572899829958963e-05, + "loss": 2.109, + "step": 10025 + }, + { + "epoch": 1.7, + "grad_norm": 0.2197265625, + "learning_rate": 1.3498641007029278e-05, + "loss": 2.1203, + "step": 10030 + }, + { + "epoch": 1.7, + "grad_norm": 0.232421875, + "learning_rate": 1.342457117515239e-05, + "loss": 2.1492, + "step": 10035 + }, + { + "epoch": 1.7, + "grad_norm": 0.2216796875, + "learning_rate": 1.3350690496158558e-05, + "loss": 2.0852, + "step": 10040 + }, + { + "epoch": 1.7, + "grad_norm": 0.2255859375, + "learning_rate": 1.3276999131464818e-05, + "loss": 2.1232, + "step": 10045 + }, + { + "epoch": 1.7, + "grad_norm": 0.228515625, + "learning_rate": 1.3203497242074437e-05, + "loss": 2.1541, + "step": 10050 + }, + { + "epoch": 1.7, + "grad_norm": 0.2314453125, + "learning_rate": 1.3130184988576855e-05, + "loss": 2.1114, + "step": 10055 + }, + { + "epoch": 1.7, + "grad_norm": 0.2265625, + "learning_rate": 1.3057062531147068e-05, + "loss": 2.0998, + "step": 10060 + }, + { + "epoch": 1.7, + "grad_norm": 0.2294921875, + "learning_rate": 1.2984130029545494e-05, + "loss": 2.1038, + "step": 10065 + }, + { + "epoch": 1.71, + "grad_norm": 0.220703125, + "learning_rate": 1.291138764311749e-05, + "loss": 2.135, + "step": 10070 + }, + { + "epoch": 1.71, + "grad_norm": 0.2236328125, + "learning_rate": 1.2838835530793048e-05, + "loss": 2.1491, + "step": 10075 + }, + { + "epoch": 1.71, + "grad_norm": 0.2294921875, + "learning_rate": 1.2766473851086435e-05, + "loss": 2.1368, + "step": 10080 + }, + { + "epoch": 1.71, + "grad_norm": 0.228515625, + "learning_rate": 1.2694302762095889e-05, + "loss": 2.0915, + "step": 10085 + }, + { + "epoch": 1.71, + "grad_norm": 0.2255859375, + "learning_rate": 1.2622322421503174e-05, + "loss": 2.1016, + "step": 10090 + }, + { + "epoch": 1.71, + "grad_norm": 0.2236328125, + "learning_rate": 1.2550532986573349e-05, + "loss": 2.1309, + "step": 10095 + }, + { + "epoch": 1.71, + "grad_norm": 0.23046875, + "learning_rate": 1.2478934614154359e-05, + "loss": 2.1227, + "step": 10100 + }, + { + "epoch": 1.71, + "grad_norm": 0.232421875, + "learning_rate": 1.2407527460676727e-05, + "loss": 2.1593, + "step": 10105 + }, + { + "epoch": 1.71, + "grad_norm": 0.228515625, + "learning_rate": 1.2336311682153201e-05, + "loss": 2.1171, + "step": 10110 + }, + { + "epoch": 1.71, + "grad_norm": 0.224609375, + "learning_rate": 1.2265287434178352e-05, + "loss": 2.0602, + "step": 10115 + }, + { + "epoch": 1.71, + "grad_norm": 0.2236328125, + "learning_rate": 1.2194454871928329e-05, + "loss": 2.099, + "step": 10120 + }, + { + "epoch": 1.71, + "grad_norm": 0.2314453125, + "learning_rate": 1.2123814150160484e-05, + "loss": 2.0976, + "step": 10125 + }, + { + "epoch": 1.72, + "grad_norm": 0.2333984375, + "learning_rate": 1.2053365423213026e-05, + "loss": 2.1502, + "step": 10130 + }, + { + "epoch": 1.72, + "grad_norm": 0.228515625, + "learning_rate": 1.1983108845004675e-05, + "loss": 2.1327, + "step": 10135 + }, + { + "epoch": 1.72, + "grad_norm": 0.224609375, + "learning_rate": 1.1913044569034382e-05, + "loss": 2.1257, + "step": 10140 + }, + { + "epoch": 1.72, + "grad_norm": 0.220703125, + "learning_rate": 1.1843172748380848e-05, + "loss": 2.1449, + "step": 10145 + }, + { + "epoch": 1.72, + "grad_norm": 0.2314453125, + "learning_rate": 1.1773493535702385e-05, + "loss": 2.0872, + "step": 10150 + }, + { + "epoch": 1.72, + "grad_norm": 0.2216796875, + "learning_rate": 1.1704007083236457e-05, + "loss": 2.1356, + "step": 10155 + }, + { + "epoch": 1.72, + "grad_norm": 0.2236328125, + "learning_rate": 1.1634713542799402e-05, + "loss": 2.1342, + "step": 10160 + }, + { + "epoch": 1.72, + "grad_norm": 0.23046875, + "learning_rate": 1.1565613065786029e-05, + "loss": 2.1246, + "step": 10165 + }, + { + "epoch": 1.72, + "grad_norm": 0.2275390625, + "learning_rate": 1.1496705803169405e-05, + "loss": 2.1233, + "step": 10170 + }, + { + "epoch": 1.72, + "grad_norm": 0.23046875, + "learning_rate": 1.1427991905500369e-05, + "loss": 2.1482, + "step": 10175 + }, + { + "epoch": 1.72, + "grad_norm": 0.234375, + "learning_rate": 1.1359471522907361e-05, + "loss": 2.1573, + "step": 10180 + }, + { + "epoch": 1.72, + "grad_norm": 0.228515625, + "learning_rate": 1.1291144805095954e-05, + "loss": 2.1015, + "step": 10185 + }, + { + "epoch": 1.73, + "grad_norm": 0.2236328125, + "learning_rate": 1.12230119013487e-05, + "loss": 2.1213, + "step": 10190 + }, + { + "epoch": 1.73, + "grad_norm": 0.2275390625, + "learning_rate": 1.1155072960524626e-05, + "loss": 2.1287, + "step": 10195 + }, + { + "epoch": 1.73, + "grad_norm": 0.2275390625, + "learning_rate": 1.1087328131058961e-05, + "loss": 2.1512, + "step": 10200 + }, + { + "epoch": 1.73, + "grad_norm": 0.2294921875, + "learning_rate": 1.1019777560962885e-05, + "loss": 2.1717, + "step": 10205 + }, + { + "epoch": 1.73, + "grad_norm": 0.2236328125, + "learning_rate": 1.0952421397823165e-05, + "loss": 2.1036, + "step": 10210 + }, + { + "epoch": 1.73, + "grad_norm": 0.228515625, + "learning_rate": 1.0885259788801716e-05, + "loss": 2.1408, + "step": 10215 + }, + { + "epoch": 1.73, + "grad_norm": 0.224609375, + "learning_rate": 1.0818292880635528e-05, + "loss": 2.1403, + "step": 10220 + }, + { + "epoch": 1.73, + "grad_norm": 0.228515625, + "learning_rate": 1.0751520819636141e-05, + "loss": 2.1093, + "step": 10225 + }, + { + "epoch": 1.73, + "grad_norm": 0.232421875, + "learning_rate": 1.0684943751689336e-05, + "loss": 2.1154, + "step": 10230 + }, + { + "epoch": 1.73, + "grad_norm": 0.234375, + "learning_rate": 1.0618561822254935e-05, + "loss": 2.1379, + "step": 10235 + }, + { + "epoch": 1.73, + "grad_norm": 0.23046875, + "learning_rate": 1.0552375176366369e-05, + "loss": 2.1437, + "step": 10240 + }, + { + "epoch": 1.74, + "grad_norm": 0.2451171875, + "learning_rate": 1.048638395863043e-05, + "loss": 2.0852, + "step": 10245 + }, + { + "epoch": 1.74, + "grad_norm": 0.2236328125, + "learning_rate": 1.0420588313226975e-05, + "loss": 2.1063, + "step": 10250 + }, + { + "epoch": 1.74, + "grad_norm": 0.23046875, + "learning_rate": 1.0354988383908482e-05, + "loss": 2.1128, + "step": 10255 + }, + { + "epoch": 1.74, + "grad_norm": 0.232421875, + "learning_rate": 1.0289584313999867e-05, + "loss": 2.1065, + "step": 10260 + }, + { + "epoch": 1.74, + "grad_norm": 0.228515625, + "learning_rate": 1.0224376246398148e-05, + "loss": 2.114, + "step": 10265 + }, + { + "epoch": 1.74, + "grad_norm": 0.2265625, + "learning_rate": 1.0159364323572052e-05, + "loss": 2.1456, + "step": 10270 + }, + { + "epoch": 1.74, + "grad_norm": 0.22265625, + "learning_rate": 1.0094548687561777e-05, + "loss": 2.0623, + "step": 10275 + }, + { + "epoch": 1.74, + "grad_norm": 0.232421875, + "learning_rate": 1.0029929479978773e-05, + "loss": 2.08, + "step": 10280 + }, + { + "epoch": 1.74, + "grad_norm": 0.224609375, + "learning_rate": 9.965506842005145e-06, + "loss": 2.1093, + "step": 10285 + }, + { + "epoch": 1.74, + "grad_norm": 0.2314453125, + "learning_rate": 9.901280914393696e-06, + "loss": 2.0921, + "step": 10290 + }, + { + "epoch": 1.74, + "grad_norm": 0.2275390625, + "learning_rate": 9.83725183746731e-06, + "loss": 2.1175, + "step": 10295 + }, + { + "epoch": 1.74, + "grad_norm": 0.2265625, + "learning_rate": 9.773419751118872e-06, + "loss": 2.1462, + "step": 10300 + }, + { + "epoch": 1.75, + "grad_norm": 0.23046875, + "learning_rate": 9.70978479481085e-06, + "loss": 2.1439, + "step": 10305 + }, + { + "epoch": 1.75, + "grad_norm": 0.2236328125, + "learning_rate": 9.646347107575037e-06, + "loss": 2.1056, + "step": 10310 + }, + { + "epoch": 1.75, + "grad_norm": 0.2333984375, + "learning_rate": 9.58310682801219e-06, + "loss": 2.1516, + "step": 10315 + }, + { + "epoch": 1.75, + "grad_norm": 0.2216796875, + "learning_rate": 9.520064094291791e-06, + "loss": 2.1227, + "step": 10320 + }, + { + "epoch": 1.75, + "grad_norm": 0.224609375, + "learning_rate": 9.457219044151689e-06, + "loss": 2.125, + "step": 10325 + }, + { + "epoch": 1.75, + "grad_norm": 0.2265625, + "learning_rate": 9.394571814897856e-06, + "loss": 2.1679, + "step": 10330 + }, + { + "epoch": 1.75, + "grad_norm": 0.2314453125, + "learning_rate": 9.332122543404031e-06, + "loss": 2.1152, + "step": 10335 + }, + { + "epoch": 1.75, + "grad_norm": 0.224609375, + "learning_rate": 9.269871366111494e-06, + "loss": 2.1207, + "step": 10340 + }, + { + "epoch": 1.75, + "grad_norm": 0.2265625, + "learning_rate": 9.207818419028669e-06, + "loss": 2.1568, + "step": 10345 + }, + { + "epoch": 1.75, + "grad_norm": 0.2314453125, + "learning_rate": 9.14596383773093e-06, + "loss": 2.1264, + "step": 10350 + }, + { + "epoch": 1.75, + "grad_norm": 0.2177734375, + "learning_rate": 9.0843077573602e-06, + "loss": 2.1534, + "step": 10355 + }, + { + "epoch": 1.75, + "grad_norm": 0.228515625, + "learning_rate": 9.02285031262473e-06, + "loss": 2.1215, + "step": 10360 + }, + { + "epoch": 1.76, + "grad_norm": 0.2294921875, + "learning_rate": 8.961591637798827e-06, + "loss": 2.1418, + "step": 10365 + }, + { + "epoch": 1.76, + "grad_norm": 0.2216796875, + "learning_rate": 8.900531866722472e-06, + "loss": 2.1256, + "step": 10370 + }, + { + "epoch": 1.76, + "grad_norm": 0.228515625, + "learning_rate": 8.839671132801097e-06, + "loss": 2.143, + "step": 10375 + }, + { + "epoch": 1.76, + "grad_norm": 0.2255859375, + "learning_rate": 8.779009569005236e-06, + "loss": 2.1145, + "step": 10380 + }, + { + "epoch": 1.76, + "grad_norm": 0.2265625, + "learning_rate": 8.718547307870316e-06, + "loss": 2.1316, + "step": 10385 + }, + { + "epoch": 1.76, + "grad_norm": 0.228515625, + "learning_rate": 8.658284481496303e-06, + "loss": 2.165, + "step": 10390 + }, + { + "epoch": 1.76, + "grad_norm": 0.2412109375, + "learning_rate": 8.59822122154741e-06, + "loss": 2.1197, + "step": 10395 + }, + { + "epoch": 1.76, + "grad_norm": 0.228515625, + "learning_rate": 8.538357659251872e-06, + "loss": 2.1258, + "step": 10400 + }, + { + "epoch": 1.76, + "grad_norm": 0.2236328125, + "learning_rate": 8.478693925401604e-06, + "loss": 2.1139, + "step": 10405 + }, + { + "epoch": 1.76, + "grad_norm": 0.2275390625, + "learning_rate": 8.419230150351886e-06, + "loss": 2.1272, + "step": 10410 + }, + { + "epoch": 1.76, + "grad_norm": 0.22265625, + "learning_rate": 8.359966464021196e-06, + "loss": 2.1235, + "step": 10415 + }, + { + "epoch": 1.76, + "grad_norm": 0.234375, + "learning_rate": 8.300902995890747e-06, + "loss": 2.1193, + "step": 10420 + }, + { + "epoch": 1.77, + "grad_norm": 0.2353515625, + "learning_rate": 8.242039875004437e-06, + "loss": 2.1293, + "step": 10425 + }, + { + "epoch": 1.77, + "grad_norm": 0.2265625, + "learning_rate": 8.18337722996837e-06, + "loss": 2.1085, + "step": 10430 + }, + { + "epoch": 1.77, + "grad_norm": 0.228515625, + "learning_rate": 8.124915188950611e-06, + "loss": 2.1161, + "step": 10435 + }, + { + "epoch": 1.77, + "grad_norm": 0.228515625, + "learning_rate": 8.066653879680997e-06, + "loss": 2.0748, + "step": 10440 + }, { - "epoch": 1.78, - "grad_norm": 0.2177734375, - "learning_rate": 1.394379485612085e-05, - "loss": 2.1497, - "step": 9620 + "epoch": 1.77, + "grad_norm": 0.2333984375, + "learning_rate": 8.008593429450806e-06, + "loss": 2.1358, + "step": 10445 }, { - "epoch": 1.79, + "epoch": 1.77, + "grad_norm": 0.21875, + "learning_rate": 7.950733965112378e-06, + "loss": 2.1242, + "step": 10450 + }, + { + "epoch": 1.77, + "grad_norm": 0.2294921875, + "learning_rate": 7.893075613079048e-06, + "loss": 2.1048, + "step": 10455 + }, + { + "epoch": 1.77, + "grad_norm": 0.2236328125, + "learning_rate": 7.835618499324726e-06, + "loss": 2.0658, + "step": 10460 + }, + { + "epoch": 1.77, + "grad_norm": 0.2275390625, + "learning_rate": 7.778362749383571e-06, + "loss": 2.1162, + "step": 10465 + }, + { + "epoch": 1.77, + "grad_norm": 0.2275390625, + "learning_rate": 7.72130848834991e-06, + "loss": 2.148, + "step": 10470 + }, + { + "epoch": 1.77, + "grad_norm": 0.2275390625, + "learning_rate": 7.66445584087776e-06, + "loss": 2.1371, + "step": 10475 + }, + { + "epoch": 1.77, "grad_norm": 0.224609375, - "learning_rate": 1.3825250271159173e-05, - "loss": 2.1561, - "step": 9625 + "learning_rate": 7.607804931180662e-06, + "loss": 2.0816, + "step": 10480 + }, + { + "epoch": 1.78, + "grad_norm": 0.2294921875, + "learning_rate": 7.5513558830314745e-06, + "loss": 2.1102, + "step": 10485 + }, + { + "epoch": 1.78, + "grad_norm": 0.228515625, + "learning_rate": 7.495108819761898e-06, + "loss": 2.1227, + "step": 10490 + }, + { + "epoch": 1.78, + "grad_norm": 0.23046875, + "learning_rate": 7.43906386426243e-06, + "loss": 2.1205, + "step": 10495 + }, + { + "epoch": 1.78, + "grad_norm": 0.2255859375, + "learning_rate": 7.383221138981966e-06, + "loss": 2.1385, + "step": 10500 + }, + { + "epoch": 1.78, + "grad_norm": 0.228515625, + "learning_rate": 7.3275807659275e-06, + "loss": 2.0769, + "step": 10505 + }, + { + "epoch": 1.78, + "grad_norm": 0.2236328125, + "learning_rate": 7.272142866664023e-06, + "loss": 2.1113, + "step": 10510 + }, + { + "epoch": 1.78, + "grad_norm": 0.2294921875, + "learning_rate": 7.216907562314079e-06, + "loss": 2.1326, + "step": 10515 + }, + { + "epoch": 1.78, + "grad_norm": 0.2197265625, + "learning_rate": 7.161874973557625e-06, + "loss": 2.1203, + "step": 10520 + }, + { + "epoch": 1.78, + "grad_norm": 0.2255859375, + "learning_rate": 7.107045220631692e-06, + "loss": 2.1155, + "step": 10525 + }, + { + "epoch": 1.78, + "grad_norm": 0.2255859375, + "learning_rate": 7.05241842333012e-06, + "loss": 2.1306, + "step": 10530 + }, + { + "epoch": 1.78, + "grad_norm": 0.21875, + "learning_rate": 6.9979947010033965e-06, + "loss": 2.1211, + "step": 10535 }, { "epoch": 1.79, "grad_norm": 0.2265625, - "learning_rate": 1.3707193706508391e-05, - "loss": 2.0875, - "step": 9630 + "learning_rate": 6.943774172558259e-06, + "loss": 2.1107, + "step": 10540 }, { "epoch": 1.79, - "grad_norm": 0.2138671875, - "learning_rate": 1.3589625471630562e-05, - "loss": 2.0693, - "step": 9635 + "grad_norm": 0.228515625, + "learning_rate": 6.889756956457538e-06, + "loss": 2.1414, + "step": 10545 }, { "epoch": 1.79, - "grad_norm": 0.2216796875, - "learning_rate": 1.3472545874707565e-05, - "loss": 2.1013, - "step": 9640 + "grad_norm": 0.2333984375, + "learning_rate": 6.835943170719839e-06, + "loss": 2.106, + "step": 10550 }, { "epoch": 1.79, - "grad_norm": 0.2197265625, - "learning_rate": 1.3355955222640326e-05, - "loss": 2.1421, - "step": 9645 + "grad_norm": 0.2265625, + "learning_rate": 6.782332932919344e-06, + "loss": 2.085, + "step": 10555 }, { "epoch": 1.79, - "grad_norm": 0.216796875, - "learning_rate": 1.3239853821048287e-05, - "loss": 2.0988, - "step": 9650 + "grad_norm": 0.220703125, + "learning_rate": 6.72892636018545e-06, + "loss": 2.113, + "step": 10560 }, { "epoch": 1.79, - "grad_norm": 0.2216796875, - "learning_rate": 1.3124241974268291e-05, - "loss": 2.1146, - "step": 9655 + "grad_norm": 0.2294921875, + "learning_rate": 6.6757235692026295e-06, + "loss": 2.1459, + "step": 10565 }, { "epoch": 1.79, - "grad_norm": 0.2216796875, - "learning_rate": 1.3009119985353969e-05, - "loss": 2.1093, - "step": 9660 + "grad_norm": 0.2314453125, + "learning_rate": 6.622724676210113e-06, + "loss": 2.1171, + "step": 10570 }, { "epoch": 1.79, - "grad_norm": 0.2216796875, - "learning_rate": 1.2894488156074813e-05, - "loss": 2.1261, - "step": 9665 + "grad_norm": 0.228515625, + "learning_rate": 6.569929797001651e-06, + "loss": 2.0854, + "step": 10575 }, { "epoch": 1.79, - "grad_norm": 0.21484375, - "learning_rate": 1.2780346786915598e-05, - "loss": 2.1356, - "step": 9670 + "grad_norm": 0.23046875, + "learning_rate": 6.517339046925264e-06, + "loss": 2.0713, + "step": 10580 }, { "epoch": 1.79, - "grad_norm": 0.2236328125, - "learning_rate": 1.2666696177075276e-05, - "loss": 2.1216, - "step": 9675 + "grad_norm": 0.2265625, + "learning_rate": 6.4649525408829846e-06, + "loss": 2.1328, + "step": 10585 + }, + { + "epoch": 1.79, + "grad_norm": 0.228515625, + "learning_rate": 6.412770393330558e-06, + "loss": 2.0968, + "step": 10590 + }, + { + "epoch": 1.79, + "grad_norm": 0.228515625, + "learning_rate": 6.36079271827732e-06, + "loss": 2.1114, + "step": 10595 }, { "epoch": 1.8, - "grad_norm": 0.224609375, - "learning_rate": 1.2553536624466456e-05, - "loss": 2.137, - "step": 9680 + "grad_norm": 0.2333984375, + "learning_rate": 6.309019629285795e-06, + "loss": 2.1412, + "step": 10600 }, { "epoch": 1.8, - "grad_norm": 0.224609375, - "learning_rate": 1.2440868425714613e-05, - "loss": 2.1244, - "step": 9685 + "grad_norm": 0.240234375, + "learning_rate": 6.257451239471579e-06, + "loss": 2.1464, + "step": 10605 }, { "epoch": 1.8, - "grad_norm": 0.216796875, - "learning_rate": 1.2328691876157128e-05, - "loss": 2.1037, - "step": 9690 + "grad_norm": 0.22265625, + "learning_rate": 6.206087661503013e-06, + "loss": 2.0909, + "step": 10610 }, { "epoch": 1.8, - "grad_norm": 0.2216796875, - "learning_rate": 1.2217007269842651e-05, - "loss": 2.0922, - "step": 9695 + "grad_norm": 0.23046875, + "learning_rate": 6.154929007600929e-06, + "loss": 2.1204, + "step": 10615 }, { "epoch": 1.8, - "grad_norm": 0.2197265625, - "learning_rate": 1.2105814899530288e-05, - "loss": 2.1027, - "step": 9700 + "grad_norm": 0.2265625, + "learning_rate": 6.103975389538474e-06, + "loss": 2.1196, + "step": 10620 }, { "epoch": 1.8, - "grad_norm": 0.2294921875, - "learning_rate": 1.1995115056688889e-05, - "loss": 2.1215, - "step": 9705 + "grad_norm": 0.2265625, + "learning_rate": 6.053226918640809e-06, + "loss": 2.1293, + "step": 10625 }, { "epoch": 1.8, - "grad_norm": 0.220703125, - "learning_rate": 1.18849080314962e-05, - "loss": 2.1065, - "step": 9710 + "grad_norm": 0.2236328125, + "learning_rate": 6.002683705784884e-06, + "loss": 2.1184, + "step": 10630 }, { "epoch": 1.8, - "grad_norm": 0.224609375, - "learning_rate": 1.177519411283814e-05, - "loss": 2.112, - "step": 9715 + "grad_norm": 0.23046875, + "learning_rate": 5.9523458613992e-06, + "loss": 2.1225, + "step": 10635 }, { "epoch": 1.8, - "grad_norm": 0.2197265625, - "learning_rate": 1.1665973588308122e-05, - "loss": 2.1123, - "step": 9720 + "grad_norm": 0.2294921875, + "learning_rate": 5.902213495463571e-06, + "loss": 2.1736, + "step": 10640 }, { "epoch": 1.8, - "grad_norm": 0.22265625, - "learning_rate": 1.1557246744206084e-05, - "loss": 2.1194, - "step": 9725 + "grad_norm": 0.2236328125, + "learning_rate": 5.852286717508826e-06, + "loss": 2.1048, + "step": 10645 + }, + { + "epoch": 1.8, + "grad_norm": 0.234375, + "learning_rate": 5.802565636616686e-06, + "loss": 2.108, + "step": 10650 + }, + { + "epoch": 1.8, + "grad_norm": 0.2236328125, + "learning_rate": 5.753050361419388e-06, + "loss": 2.1427, + "step": 10655 }, { "epoch": 1.81, "grad_norm": 0.2333984375, - "learning_rate": 1.1449013865538027e-05, - "loss": 2.0927, - "step": 9730 + "learning_rate": 5.703741000099594e-06, + "loss": 2.1246, + "step": 10660 + }, + { + "epoch": 1.81, + "grad_norm": 0.2216796875, + "learning_rate": 5.65463766039005e-06, + "loss": 2.1215, + "step": 10665 + }, + { + "epoch": 1.81, + "grad_norm": 0.23046875, + "learning_rate": 5.605740449573327e-06, + "loss": 2.0739, + "step": 10670 }, { "epoch": 1.81, - "grad_norm": 0.2138671875, - "learning_rate": 1.1341275236015003e-05, - "loss": 2.1198, - "step": 9735 + "grad_norm": 0.23046875, + "learning_rate": 5.557049474481702e-06, + "loss": 2.1136, + "step": 10675 }, { "epoch": 1.81, - "grad_norm": 0.2138671875, - "learning_rate": 1.1234031138052592e-05, - "loss": 2.0599, - "step": 9740 + "grad_norm": 0.2314453125, + "learning_rate": 5.508564841496855e-06, + "loss": 2.0865, + "step": 10680 }, { "epoch": 1.81, - "grad_norm": 0.224609375, - "learning_rate": 1.1127281852769944e-05, - "loss": 2.1762, - "step": 9745 + "grad_norm": 0.228515625, + "learning_rate": 5.4602866565495845e-06, + "loss": 2.1447, + "step": 10685 }, { "epoch": 1.81, - "grad_norm": 0.2255859375, - "learning_rate": 1.1021027659989225e-05, - "loss": 2.106, - "step": 9750 + "grad_norm": 0.228515625, + "learning_rate": 5.412215025119716e-06, + "loss": 2.1571, + "step": 10690 }, { "epoch": 1.81, - "grad_norm": 0.2236328125, - "learning_rate": 1.0915268838234838e-05, - "loss": 2.1149, - "step": 9755 + "grad_norm": 0.21484375, + "learning_rate": 5.364350052235767e-06, + "loss": 2.1296, + "step": 10695 }, { "epoch": 1.81, - "grad_norm": 0.22265625, - "learning_rate": 1.0810005664732558e-05, - "loss": 2.1219, - "step": 9760 + "grad_norm": 0.220703125, + "learning_rate": 5.316691842474686e-06, + "loss": 2.1316, + "step": 10700 }, { "epoch": 1.81, - "grad_norm": 0.2177734375, - "learning_rate": 1.0705238415409068e-05, - "loss": 2.1004, - "step": 9765 + "grad_norm": 0.23046875, + "learning_rate": 5.269240499961747e-06, + "loss": 2.1234, + "step": 10705 }, { "epoch": 1.81, - "grad_norm": 0.224609375, - "learning_rate": 1.0600967364891001e-05, - "loss": 2.0973, - "step": 9770 + "grad_norm": 0.2265625, + "learning_rate": 5.22199612837021e-06, + "loss": 2.1094, + "step": 10710 }, { "epoch": 1.81, - "grad_norm": 0.21875, - "learning_rate": 1.0497192786504228e-05, - "loss": 2.1278, - "step": 9775 + "grad_norm": 0.2236328125, + "learning_rate": 5.17495883092115e-06, + "loss": 2.1003, + "step": 10715 }, { - "epoch": 1.81, - "grad_norm": 0.2216796875, - "learning_rate": 1.0393914952273398e-05, - "loss": 2.1386, - "step": 9780 + "epoch": 1.82, + "grad_norm": 0.224609375, + "learning_rate": 5.1281287103832285e-06, + "loss": 2.1195, + "step": 10720 }, { "epoch": 1.82, - "grad_norm": 0.2158203125, - "learning_rate": 1.0291134132920866e-05, - "loss": 2.1055, - "step": 9785 + "grad_norm": 0.2353515625, + "learning_rate": 5.081505869072445e-06, + "loss": 2.1281, + "step": 10725 }, { "epoch": 1.82, - "grad_norm": 0.2177734375, - "learning_rate": 1.0188850597866273e-05, - "loss": 2.1044, - "step": 9790 + "grad_norm": 0.228515625, + "learning_rate": 5.035090408851961e-06, + "loss": 2.1098, + "step": 10730 }, { "epoch": 1.82, - "grad_norm": 0.2255859375, - "learning_rate": 1.0087064615225683e-05, - "loss": 2.1127, - "step": 9795 + "grad_norm": 0.228515625, + "learning_rate": 4.988882431131814e-06, + "loss": 2.1547, + "step": 10735 }, { "epoch": 1.82, - "grad_norm": 0.2177734375, - "learning_rate": 9.985776451810936e-06, - "loss": 2.1143, - "step": 9800 + "grad_norm": 0.2294921875, + "learning_rate": 4.942882036868712e-06, + "loss": 2.1152, + "step": 10740 }, { "epoch": 1.82, - "grad_norm": 0.2138671875, - "learning_rate": 9.884986373128934e-06, - "loss": 2.1134, - "step": 9805 + "grad_norm": 0.2333984375, + "learning_rate": 4.897089326565874e-06, + "loss": 2.1086, + "step": 10745 }, { "epoch": 1.82, - "grad_norm": 0.2197265625, - "learning_rate": 9.78469464338092e-06, - "loss": 2.1277, - "step": 9810 + "grad_norm": 0.234375, + "learning_rate": 4.851504400272722e-06, + "loss": 2.1177, + "step": 10750 }, { "epoch": 1.82, - "grad_norm": 0.2197265625, - "learning_rate": 9.684901525461865e-06, - "loss": 2.1005, - "step": 9815 + "grad_norm": 0.2333984375, + "learning_rate": 4.806127357584745e-06, + "loss": 2.1149, + "step": 10755 }, { "epoch": 1.82, - "grad_norm": 0.2216796875, - "learning_rate": 9.58560728095974e-06, - "loss": 2.1352, - "step": 9820 + "grad_norm": 0.2255859375, + "learning_rate": 4.760958297643192e-06, + "loss": 2.1224, + "step": 10760 }, { "epoch": 1.82, - "grad_norm": 0.224609375, - "learning_rate": 9.486812170154724e-06, - "loss": 2.1596, - "step": 9825 + "grad_norm": 0.2314453125, + "learning_rate": 4.715997319134968e-06, + "loss": 2.0825, + "step": 10765 }, { "epoch": 1.82, - "grad_norm": 0.21875, - "learning_rate": 9.388516452018702e-06, - "loss": 2.1258, - "step": 9830 + "grad_norm": 0.2314453125, + "learning_rate": 4.671244520292273e-06, + "loss": 2.1383, + "step": 10770 }, { "epoch": 1.82, - "grad_norm": 0.2158203125, - "learning_rate": 9.290720384214479e-06, - "loss": 2.159, - "step": 9835 + "grad_norm": 0.232421875, + "learning_rate": 4.626699998892548e-06, + "loss": 2.1529, + "step": 10775 }, { "epoch": 1.83, - "grad_norm": 0.22265625, - "learning_rate": 9.193424223095103e-06, - "loss": 2.093, - "step": 9840 + "grad_norm": 0.234375, + "learning_rate": 4.58236385225812e-06, + "loss": 2.1247, + "step": 10780 }, { "epoch": 1.83, - "grad_norm": 0.2177734375, - "learning_rate": 9.096628223703207e-06, - "loss": 2.0915, - "step": 9845 + "grad_norm": 0.224609375, + "learning_rate": 4.538236177256106e-06, + "loss": 2.1216, + "step": 10785 }, { "epoch": 1.83, - "grad_norm": 0.212890625, - "learning_rate": 9.00033263977047e-06, - "loss": 2.1096, - "step": 9850 + "grad_norm": 0.236328125, + "learning_rate": 4.4943170702981266e-06, + "loss": 2.1224, + "step": 10790 }, { "epoch": 1.83, - "grad_norm": 0.2177734375, - "learning_rate": 8.904537723716621e-06, - "loss": 2.0945, - "step": 9855 + "grad_norm": 0.236328125, + "learning_rate": 4.45060662734007e-06, + "loss": 2.1268, + "step": 10795 }, { "epoch": 1.83, - "grad_norm": 0.2216796875, - "learning_rate": 8.809243726649107e-06, - "loss": 2.111, - "step": 9860 + "grad_norm": 0.220703125, + "learning_rate": 4.407104943882001e-06, + "loss": 2.1131, + "step": 10800 }, { "epoch": 1.83, - "grad_norm": 0.216796875, - "learning_rate": 8.71445089836238e-06, - "loss": 2.1174, - "step": 9865 + "grad_norm": 0.23046875, + "learning_rate": 4.363812114967847e-06, + "loss": 2.1314, + "step": 10805 }, { "epoch": 1.83, - "grad_norm": 0.21875, - "learning_rate": 8.620159487337076e-06, - "loss": 2.1279, - "step": 9870 + "grad_norm": 0.228515625, + "learning_rate": 4.320728235185212e-06, + "loss": 2.1682, + "step": 10810 }, { "epoch": 1.83, - "grad_norm": 0.2158203125, - "learning_rate": 8.526369740739481e-06, - "loss": 2.1237, - "step": 9875 + "grad_norm": 0.2275390625, + "learning_rate": 4.277853398665199e-06, + "loss": 2.1185, + "step": 10815 }, { "epoch": 1.83, - "grad_norm": 0.2314453125, - "learning_rate": 8.43308190442087e-06, - "loss": 2.1158, - "step": 9880 + "grad_norm": 0.23046875, + "learning_rate": 4.2351876990821995e-06, + "loss": 2.1275, + "step": 10820 }, { "epoch": 1.83, - "grad_norm": 0.228515625, - "learning_rate": 8.340296222916921e-06, - "loss": 2.1349, - "step": 9885 + "grad_norm": 0.2294921875, + "learning_rate": 4.192731229653623e-06, + "loss": 2.1367, + "step": 10825 }, { "epoch": 1.83, - "grad_norm": 0.2216796875, - "learning_rate": 8.24801293944688e-06, - "loss": 2.0741, - "step": 9890 + "grad_norm": 0.2236328125, + "learning_rate": 4.150484083139783e-06, + "loss": 2.1177, + "step": 10830 }, { "epoch": 1.84, - "grad_norm": 0.2138671875, - "learning_rate": 8.15623229591318e-06, - "loss": 2.1205, - "step": 9895 + "grad_norm": 0.2236328125, + "learning_rate": 4.108446351843676e-06, + "loss": 2.1122, + "step": 10835 }, { "epoch": 1.84, - "grad_norm": 0.21875, - "learning_rate": 8.064954532900659e-06, - "loss": 2.1016, - "step": 9900 + "grad_norm": 0.228515625, + "learning_rate": 4.066618127610722e-06, + "loss": 2.1636, + "step": 10840 }, { "epoch": 1.84, - "grad_norm": 0.2158203125, - "learning_rate": 7.97417988967588e-06, - "loss": 2.1264, - "step": 9905 + "grad_norm": 0.2255859375, + "learning_rate": 4.0249995018286415e-06, + "loss": 2.1378, + "step": 10845 }, { "epoch": 1.84, - "grad_norm": 0.2177734375, - "learning_rate": 7.883908604186685e-06, - "loss": 2.0931, - "step": 9910 + "grad_norm": 0.2216796875, + "learning_rate": 3.9835905654271535e-06, + "loss": 2.1095, + "step": 10850 }, { "epoch": 1.84, - "grad_norm": 0.2177734375, - "learning_rate": 7.794140913061366e-06, - "loss": 2.125, - "step": 9915 + "grad_norm": 0.23046875, + "learning_rate": 3.942391408877922e-06, + "loss": 2.1403, + "step": 10855 }, { "epoch": 1.84, - "grad_norm": 0.21875, - "learning_rate": 7.704877051608206e-06, - "loss": 2.0809, - "step": 9920 + "grad_norm": 0.2236328125, + "learning_rate": 3.90140212219422e-06, + "loss": 2.0605, + "step": 10860 }, { "epoch": 1.84, "grad_norm": 0.2265625, - "learning_rate": 7.61611725381477e-06, - "loss": 2.1238, - "step": 9925 + "learning_rate": 3.860622794930801e-06, + "loss": 2.0844, + "step": 10865 }, { "epoch": 1.84, - "grad_norm": 0.21484375, - "learning_rate": 7.5278617523472985e-06, - "loss": 2.0693, - "step": 9930 + "grad_norm": 0.23046875, + "learning_rate": 3.820053516183719e-06, + "loss": 2.1389, + "step": 10870 }, { "epoch": 1.84, "grad_norm": 0.2255859375, - "learning_rate": 7.440110778550224e-06, - "loss": 2.1027, - "step": 9935 + "learning_rate": 3.7796943745900924e-06, + "loss": 2.132, + "step": 10875 }, { "epoch": 1.84, "grad_norm": 0.2255859375, - "learning_rate": 7.352864562445283e-06, - "loss": 2.1164, - "step": 9940 + "learning_rate": 3.7395454583278868e-06, + "loss": 2.1547, + "step": 10880 + }, + { + "epoch": 1.84, + "grad_norm": 0.2353515625, + "learning_rate": 3.6996068551158115e-06, + "loss": 2.1167, + "step": 10885 + }, + { + "epoch": 1.84, + "grad_norm": 0.232421875, + "learning_rate": 3.659878652213056e-06, + "loss": 2.0709, + "step": 10890 }, { "epoch": 1.85, - "grad_norm": 0.216796875, - "learning_rate": 7.266123332731267e-06, - "loss": 2.1191, - "step": 9945 + "grad_norm": 0.220703125, + "learning_rate": 3.620360936419109e-06, + "loss": 2.1322, + "step": 10895 }, { "epoch": 1.85, - "grad_norm": 0.2158203125, - "learning_rate": 7.17988731678314e-06, - "loss": 2.1598, - "step": 9950 + "grad_norm": 0.2275390625, + "learning_rate": 3.581053794073619e-06, + "loss": 2.1527, + "step": 10900 }, { "epoch": 1.85, - "grad_norm": 0.2158203125, - "learning_rate": 7.094156740651525e-06, - "loss": 2.0958, - "step": 9955 + "grad_norm": 0.2275390625, + "learning_rate": 3.541957311056132e-06, + "loss": 2.0757, + "step": 10905 }, { "epoch": 1.85, - "grad_norm": 0.220703125, - "learning_rate": 7.0089318290622375e-06, - "loss": 2.1188, - "step": 9960 + "grad_norm": 0.224609375, + "learning_rate": 3.503071572785932e-06, + "loss": 2.1048, + "step": 10910 }, { "epoch": 1.85, - "grad_norm": 0.2216796875, - "learning_rate": 6.924212805415553e-06, - "loss": 2.0864, - "step": 9965 + "grad_norm": 0.2314453125, + "learning_rate": 3.4643966642219137e-06, + "loss": 2.126, + "step": 10915 }, { "epoch": 1.85, - "grad_norm": 0.2255859375, - "learning_rate": 6.839999891785609e-06, - "loss": 2.1835, - "step": 9970 + "grad_norm": 0.228515625, + "learning_rate": 3.425932669862264e-06, + "loss": 2.1336, + "step": 10920 }, { "epoch": 1.85, - "grad_norm": 0.22265625, - "learning_rate": 6.756293308919892e-06, - "loss": 2.091, - "step": 9975 + "grad_norm": 0.23828125, + "learning_rate": 3.387679673744404e-06, + "loss": 2.1349, + "step": 10925 }, { "epoch": 1.85, - "grad_norm": 0.2177734375, - "learning_rate": 6.673093276238751e-06, - "loss": 2.1132, - "step": 9980 + "grad_norm": 0.2197265625, + "learning_rate": 3.3496377594447905e-06, + "loss": 2.1169, + "step": 10930 + }, + { + "epoch": 1.85, + "grad_norm": 0.2265625, + "learning_rate": 3.311807010078627e-06, + "loss": 2.1101, + "step": 10935 }, { "epoch": 1.85, - "grad_norm": 0.224609375, - "learning_rate": 6.5904000118345745e-06, - "loss": 2.1281, - "step": 9985 + "grad_norm": 0.2314453125, + "learning_rate": 3.2741875082998195e-06, + "loss": 2.1645, + "step": 10940 }, { "epoch": 1.85, - "grad_norm": 0.21875, - "learning_rate": 6.508213732471391e-06, - "loss": 2.1066, - "step": 9990 + "grad_norm": 0.2294921875, + "learning_rate": 3.2367793363007213e-06, + "loss": 2.1003, + "step": 10945 }, { "epoch": 1.85, - "grad_norm": 0.2236328125, - "learning_rate": 6.426534653584337e-06, - "loss": 2.1207, - "step": 9995 + "grad_norm": 0.2314453125, + "learning_rate": 3.19958257581191e-06, + "loss": 2.1141, + "step": 10950 }, { "epoch": 1.86, - "grad_norm": 0.2236328125, - "learning_rate": 6.345362989278947e-06, - "loss": 2.0985, - "step": 10000 + "grad_norm": 0.224609375, + "learning_rate": 3.162597308102144e-06, + "loss": 2.1581, + "step": 10955 }, { "epoch": 1.86, - "grad_norm": 0.2236328125, - "learning_rate": 6.264698952330705e-06, - "loss": 2.1387, - "step": 10005 + "grad_norm": 0.2314453125, + "learning_rate": 3.125823613978052e-06, + "loss": 2.0951, + "step": 10960 }, { "epoch": 1.86, - "grad_norm": 0.21875, - "learning_rate": 6.18454275418443e-06, - "loss": 2.1239, - "step": 10010 + "grad_norm": 0.232421875, + "learning_rate": 3.0892615737840413e-06, + "loss": 2.07, + "step": 10965 }, { "epoch": 1.86, "grad_norm": 0.22265625, - "learning_rate": 6.104894604953759e-06, - "loss": 2.0651, - "step": 10015 + "learning_rate": 3.05291126740207e-06, + "loss": 2.1168, + "step": 10970 }, { "epoch": 1.86, - "grad_norm": 0.2255859375, - "learning_rate": 6.0257547134205725e-06, - "loss": 2.1373, - "step": 10020 + "grad_norm": 0.22265625, + "learning_rate": 3.0167727742514974e-06, + "loss": 2.1106, + "step": 10975 }, { "epoch": 1.86, - "grad_norm": 0.216796875, - "learning_rate": 5.947123287034439e-06, - "loss": 2.098, - "step": 10025 + "grad_norm": 0.232421875, + "learning_rate": 2.980846173288898e-06, + "loss": 2.1058, + "step": 10980 }, { "epoch": 1.86, - "grad_norm": 0.2177734375, - "learning_rate": 5.86900053191215e-06, - "loss": 2.0951, - "step": 10030 + "grad_norm": 0.2314453125, + "learning_rate": 2.9451315430079174e-06, + "loss": 2.0987, + "step": 10985 }, { "epoch": 1.86, - "grad_norm": 0.2216796875, - "learning_rate": 5.791386652837027e-06, - "loss": 2.1572, - "step": 10035 + "grad_norm": 0.2314453125, + "learning_rate": 2.9096289614390815e-06, + "loss": 2.0906, + "step": 10990 }, { "epoch": 1.86, - "grad_norm": 0.2294921875, - "learning_rate": 5.7142818532585515e-06, - "loss": 2.1361, - "step": 10040 + "grad_norm": 0.2392578125, + "learning_rate": 2.8743385061495876e-06, + "loss": 2.1334, + "step": 10995 }, { "epoch": 1.86, - "grad_norm": 0.220703125, - "learning_rate": 5.63768633529167e-06, - "loss": 2.1041, - "step": 10045 + "grad_norm": 0.2275390625, + "learning_rate": 2.8392602542432366e-06, + "loss": 2.1099, + "step": 11000 }, { "epoch": 1.86, - "grad_norm": 0.2119140625, - "learning_rate": 5.5616002997164185e-06, - "loss": 2.1299, - "step": 10050 + "grad_norm": 0.234375, + "learning_rate": 2.8043942823601233e-06, + "loss": 2.0759, + "step": 11005 }, { - "epoch": 1.87, - "grad_norm": 0.2294921875, - "learning_rate": 5.486023945977304e-06, - "loss": 2.1305, - "step": 10055 + "epoch": 1.86, + "grad_norm": 0.2236328125, + "learning_rate": 2.7697406666766123e-06, + "loss": 2.1445, + "step": 11010 }, { "epoch": 1.87, - "grad_norm": 0.2138671875, - "learning_rate": 5.4109574721827646e-06, - "loss": 2.0973, - "step": 10060 + "grad_norm": 0.23046875, + "learning_rate": 2.7352994829050627e-06, + "loss": 2.1399, + "step": 11015 }, { "epoch": 1.87, - "grad_norm": 0.2138671875, - "learning_rate": 5.336401075104825e-06, - "loss": 2.0838, - "step": 10065 + "grad_norm": 0.232421875, + "learning_rate": 2.701070806293726e-06, + "loss": 2.1307, + "step": 11020 }, { "epoch": 1.87, - "grad_norm": 0.21875, - "learning_rate": 5.262354950178217e-06, - "loss": 2.1095, - "step": 10070 + "grad_norm": 0.2236328125, + "learning_rate": 2.66705471162656e-06, + "loss": 2.1047, + "step": 11025 }, { "epoch": 1.87, "grad_norm": 0.2255859375, - "learning_rate": 5.188819291500302e-06, - "loss": 2.1235, - "step": 10075 + "learning_rate": 2.6332512732230585e-06, + "loss": 2.102, + "step": 11030 }, { "epoch": 1.87, - "grad_norm": 0.220703125, - "learning_rate": 5.115794291830245e-06, - "loss": 2.1142, - "step": 10080 + "grad_norm": 0.2353515625, + "learning_rate": 2.5996605649381e-06, + "loss": 2.1327, + "step": 11035 }, { "epoch": 1.87, - "grad_norm": 0.21484375, - "learning_rate": 5.04328014258868e-06, - "loss": 2.09, - "step": 10085 + "grad_norm": 0.2294921875, + "learning_rate": 2.5662826601617783e-06, + "loss": 2.1174, + "step": 11040 }, { "epoch": 1.87, - "grad_norm": 0.22265625, - "learning_rate": 4.971277033857092e-06, - "loss": 2.1228, - "step": 10090 + "grad_norm": 0.23046875, + "learning_rate": 2.5331176318192706e-06, + "loss": 2.1236, + "step": 11045 }, { "epoch": 1.87, - "grad_norm": 0.2138671875, - "learning_rate": 4.89978515437739e-06, - "loss": 2.0957, - "step": 10095 + "grad_norm": 0.232421875, + "learning_rate": 2.500165552370615e-06, + "loss": 2.0935, + "step": 11050 }, { "epoch": 1.87, - "grad_norm": 0.2216796875, - "learning_rate": 4.828804691551448e-06, - "loss": 2.1217, - "step": 10100 + "grad_norm": 0.224609375, + "learning_rate": 2.467426493810643e-06, + "loss": 2.1414, + "step": 11055 }, { "epoch": 1.87, - "grad_norm": 0.2197265625, - "learning_rate": 4.758335831440497e-06, - "loss": 2.133, - "step": 10105 + "grad_norm": 0.220703125, + "learning_rate": 2.4349005276687042e-06, + "loss": 2.1383, + "step": 11060 }, { - "epoch": 1.88, - "grad_norm": 0.216796875, - "learning_rate": 4.688378758764689e-06, - "loss": 2.1422, - "step": 10110 + "epoch": 1.87, + "grad_norm": 0.224609375, + "learning_rate": 2.4025877250086316e-06, + "loss": 2.1079, + "step": 11065 + }, + { + "epoch": 1.87, + "grad_norm": 0.2255859375, + "learning_rate": 2.3704881564285184e-06, + "loss": 2.1241, + "step": 11070 }, { "epoch": 1.88, - "grad_norm": 0.2177734375, - "learning_rate": 4.618933656902758e-06, - "loss": 2.1195, - "step": 10115 + "grad_norm": 0.23046875, + "learning_rate": 2.338601892060566e-06, + "loss": 2.0867, + "step": 11075 }, { "epoch": 1.88, - "grad_norm": 0.22265625, - "learning_rate": 4.5500007078911996e-06, - "loss": 2.0933, - "step": 10120 + "grad_norm": 0.2265625, + "learning_rate": 2.3069290015709565e-06, + "loss": 2.1409, + "step": 11080 }, { "epoch": 1.88, - "grad_norm": 0.2138671875, - "learning_rate": 4.481580092424187e-06, - "loss": 2.1086, - "step": 10125 + "grad_norm": 0.2265625, + "learning_rate": 2.2754695541596593e-06, + "loss": 2.1097, + "step": 11085 }, { "epoch": 1.88, - "grad_norm": 0.21484375, - "learning_rate": 4.41367198985283e-06, - "loss": 2.09, - "step": 10130 + "grad_norm": 0.2333984375, + "learning_rate": 2.2442236185603262e-06, + "loss": 2.0971, + "step": 11090 }, { "epoch": 1.88, - "grad_norm": 0.2216796875, - "learning_rate": 4.3462765781848045e-06, - "loss": 2.107, - "step": 10135 + "grad_norm": 0.220703125, + "learning_rate": 2.2131912630401485e-06, + "loss": 2.1069, + "step": 11095 }, { "epoch": 1.88, - "grad_norm": 0.2197265625, - "learning_rate": 4.279394034083839e-06, - "loss": 2.0996, - "step": 10140 + "grad_norm": 0.2275390625, + "learning_rate": 2.182372555399603e-06, + "loss": 2.1526, + "step": 11100 }, { "epoch": 1.88, - "grad_norm": 0.2197265625, - "learning_rate": 4.213024532869314e-06, - "loss": 2.1024, - "step": 10145 + "grad_norm": 0.228515625, + "learning_rate": 2.151767562972462e-06, + "loss": 2.1291, + "step": 11105 }, { "epoch": 1.88, - "grad_norm": 0.21875, - "learning_rate": 4.147168248515798e-06, - "loss": 2.1083, - "step": 10150 + "grad_norm": 0.234375, + "learning_rate": 2.121376352625537e-06, + "loss": 2.0771, + "step": 11110 }, { "epoch": 1.88, - "grad_norm": 0.22265625, - "learning_rate": 4.081825353652424e-06, - "loss": 2.1142, - "step": 10155 + "grad_norm": 0.2255859375, + "learning_rate": 2.091198990758547e-06, + "loss": 2.1103, + "step": 11115 }, { "epoch": 1.88, - "grad_norm": 0.22265625, - "learning_rate": 4.01699601956278e-06, - "loss": 2.1095, - "step": 10160 + "grad_norm": 0.232421875, + "learning_rate": 2.0612355433039965e-06, + "loss": 2.1198, + "step": 11120 }, { - "epoch": 1.89, - "grad_norm": 0.2373046875, - "learning_rate": 3.952680416184151e-06, - "loss": 2.097, - "step": 10165 + "epoch": 1.88, + "grad_norm": 0.2255859375, + "learning_rate": 2.0314860757270295e-06, + "loss": 2.1741, + "step": 11125 }, { "epoch": 1.89, "grad_norm": 0.2275390625, - "learning_rate": 3.88887871210708e-06, - "loss": 2.0742, - "step": 10170 + "learning_rate": 2.001950653025253e-06, + "loss": 2.1404, + "step": 11130 }, { "epoch": 1.89, - "grad_norm": 0.220703125, - "learning_rate": 3.825591074575208e-06, - "loss": 2.098, - "step": 10175 + "grad_norm": 0.2236328125, + "learning_rate": 1.9726293397286823e-06, + "loss": 2.1171, + "step": 11135 }, { "epoch": 1.89, - "grad_norm": 0.2138671875, - "learning_rate": 3.762817669484564e-06, - "loss": 2.1232, - "step": 10180 + "grad_norm": 0.2265625, + "learning_rate": 1.943522199899472e-06, + "loss": 2.12, + "step": 11140 }, { "epoch": 1.89, - "grad_norm": 0.2294921875, - "learning_rate": 3.700558661383191e-06, - "loss": 2.1437, - "step": 10185 + "grad_norm": 0.2255859375, + "learning_rate": 1.914629297131876e-06, + "loss": 2.1035, + "step": 11145 }, { "epoch": 1.89, - "grad_norm": 0.2158203125, - "learning_rate": 3.638814213470787e-06, - "loss": 2.0822, - "step": 10190 + "grad_norm": 0.23046875, + "learning_rate": 1.8859506945520856e-06, + "loss": 2.1324, + "step": 11150 }, { "epoch": 1.89, "grad_norm": 0.2333984375, - "learning_rate": 3.577584487598218e-06, - "loss": 2.1089, - "step": 10195 + "learning_rate": 1.857486454818047e-06, + "loss": 2.0816, + "step": 11155 }, { "epoch": 1.89, - "grad_norm": 0.2197265625, - "learning_rate": 3.51686964426714e-06, - "loss": 2.1156, - "step": 10200 + "grad_norm": 0.2255859375, + "learning_rate": 1.8292366401193805e-06, + "loss": 2.1412, + "step": 11160 + }, + { + "epoch": 1.89, + "grad_norm": 0.224609375, + "learning_rate": 1.8012013121772475e-06, + "loss": 2.1293, + "step": 11165 + }, + { + "epoch": 1.89, + "grad_norm": 0.2216796875, + "learning_rate": 1.7733805322441398e-06, + "loss": 2.0747, + "step": 11170 + }, + { + "epoch": 1.89, + "grad_norm": 0.2265625, + "learning_rate": 1.7457743611038468e-06, + "loss": 2.126, + "step": 11175 }, { "epoch": 1.89, "grad_norm": 0.22265625, - "learning_rate": 3.4566698426294674e-06, - "loss": 2.1033, - "step": 10205 + "learning_rate": 1.7183828590712436e-06, + "loss": 2.102, + "step": 11180 }, { "epoch": 1.89, - "grad_norm": 0.2158203125, - "learning_rate": 3.396985240487105e-06, - "loss": 2.0581, - "step": 10210 + "grad_norm": 0.2255859375, + "learning_rate": 1.691206085992192e-06, + "loss": 2.1216, + "step": 11185 }, { "epoch": 1.9, - "grad_norm": 0.22265625, - "learning_rate": 3.3378159942914376e-06, - "loss": 2.1041, - "step": 10215 + "grad_norm": 0.2255859375, + "learning_rate": 1.6642441012434172e-06, + "loss": 2.1466, + "step": 11190 }, { "epoch": 1.9, - "grad_norm": 0.21484375, - "learning_rate": 3.2791622591429536e-06, - "loss": 2.089, - "step": 10220 + "grad_norm": 0.22265625, + "learning_rate": 1.6374969637323545e-06, + "loss": 2.1029, + "step": 11195 }, { "epoch": 1.9, - "grad_norm": 0.220703125, - "learning_rate": 3.2210241887908444e-06, - "loss": 2.1456, - "step": 10225 + "grad_norm": 0.2255859375, + "learning_rate": 1.6109647318970466e-06, + "loss": 2.1073, + "step": 11200 }, { "epoch": 1.9, - "grad_norm": 0.22265625, - "learning_rate": 3.163401935632537e-06, - "loss": 2.1347, - "step": 10230 + "grad_norm": 0.2197265625, + "learning_rate": 1.5846474637060015e-06, + "loss": 2.0883, + "step": 11205 }, { "epoch": 1.9, - "grad_norm": 0.2158203125, - "learning_rate": 3.1062956507133867e-06, - "loss": 2.0823, - "step": 10235 + "grad_norm": 0.228515625, + "learning_rate": 1.5585452166580583e-06, + "loss": 2.1062, + "step": 11210 }, { "epoch": 1.9, "grad_norm": 0.2236328125, - "learning_rate": 3.0497054837262506e-06, - "loss": 2.0742, - "step": 10240 + "learning_rate": 1.5326580477822761e-06, + "loss": 2.167, + "step": 11215 }, { "epoch": 1.9, "grad_norm": 0.2255859375, - "learning_rate": 2.9936315830110473e-06, - "loss": 2.1019, - "step": 10245 + "learning_rate": 1.5069860136378121e-06, + "loss": 2.1129, + "step": 11220 }, { "epoch": 1.9, - "grad_norm": 0.21875, - "learning_rate": 2.938074095554444e-06, - "loss": 2.0905, - "step": 10250 + "grad_norm": 0.2353515625, + "learning_rate": 1.481529170313778e-06, + "loss": 2.075, + "step": 11225 }, { "epoch": 1.9, - "grad_norm": 0.21875, - "learning_rate": 2.883033166989413e-06, - "loss": 2.0628, - "step": 10255 + "grad_norm": 0.2314453125, + "learning_rate": 1.456287573429138e-06, + "loss": 2.1242, + "step": 11230 }, { "epoch": 1.9, - "grad_norm": 0.2197265625, - "learning_rate": 2.828508941594854e-06, - "loss": 2.1302, - "step": 10260 + "grad_norm": 0.224609375, + "learning_rate": 1.4312612781325785e-06, + "loss": 2.1539, + "step": 11235 + }, + { + "epoch": 1.9, + "grad_norm": 0.2255859375, + "learning_rate": 1.406450339102361e-06, + "loss": 2.0581, + "step": 11240 }, { "epoch": 1.9, - "grad_norm": 0.2177734375, - "learning_rate": 2.774501562295262e-06, - "loss": 2.0996, - "step": 10265 + "grad_norm": 0.2236328125, + "learning_rate": 1.381854810546268e-06, + "loss": 2.1453, + "step": 11245 }, { "epoch": 1.91, - "grad_norm": 0.2197265625, - "learning_rate": 2.7210111706603036e-06, - "loss": 2.113, - "step": 10270 + "grad_norm": 0.228515625, + "learning_rate": 1.357474746201426e-06, + "loss": 2.1207, + "step": 11250 }, { "epoch": 1.91, - "grad_norm": 0.220703125, - "learning_rate": 2.6680379069044416e-06, - "loss": 2.1421, - "step": 10275 + "grad_norm": 0.2490234375, + "learning_rate": 1.3333101993342145e-06, + "loss": 2.1136, + "step": 11255 }, { "epoch": 1.91, "grad_norm": 0.22265625, - "learning_rate": 2.6155819098866664e-06, - "loss": 2.1139, - "step": 10280 + "learning_rate": 1.3093612227401576e-06, + "loss": 2.0805, + "step": 11260 }, { "epoch": 1.91, - "grad_norm": 0.22265625, - "learning_rate": 2.563643317109965e-06, - "loss": 2.1383, - "step": 10285 + "grad_norm": 0.2216796875, + "learning_rate": 1.285627868743744e-06, + "loss": 2.1168, + "step": 11265 }, { "epoch": 1.91, - "grad_norm": 0.22265625, - "learning_rate": 2.5122222647211424e-06, - "loss": 2.1012, - "step": 10290 + "grad_norm": 0.228515625, + "learning_rate": 1.2621101891984289e-06, + "loss": 2.0865, + "step": 11270 }, { "epoch": 1.91, - "grad_norm": 0.224609375, - "learning_rate": 2.4613188875102667e-06, - "loss": 2.1486, - "step": 10295 + "grad_norm": 0.232421875, + "learning_rate": 1.2388082354863994e-06, + "loss": 2.1729, + "step": 11275 }, { "epoch": 1.91, - "grad_norm": 0.2216796875, - "learning_rate": 2.410933318910513e-06, - "loss": 2.1413, - "step": 10300 + "grad_norm": 0.232421875, + "learning_rate": 1.2157220585185536e-06, + "loss": 2.0999, + "step": 11280 }, { "epoch": 1.91, "grad_norm": 0.22265625, - "learning_rate": 2.3610656909976993e-06, - "loss": 2.1081, - "step": 10305 + "learning_rate": 1.1928517087343327e-06, + "loss": 2.1423, + "step": 11285 }, { "epoch": 1.91, - "grad_norm": 0.224609375, - "learning_rate": 2.3117161344899274e-06, - "loss": 2.1173, - "step": 10310 + "grad_norm": 0.2294921875, + "learning_rate": 1.1701972361016443e-06, + "loss": 2.1503, + "step": 11290 }, { "epoch": 1.91, - "grad_norm": 0.220703125, - "learning_rate": 2.2628847787473427e-06, - "loss": 2.1125, - "step": 10315 + "grad_norm": 0.2265625, + "learning_rate": 1.1477586901167403e-06, + "loss": 2.1066, + "step": 11295 }, { "epoch": 1.91, - "grad_norm": 0.216796875, - "learning_rate": 2.2145717517716437e-06, - "loss": 2.1233, - "step": 10320 + "grad_norm": 0.22265625, + "learning_rate": 1.1255361198040938e-06, + "loss": 2.1753, + "step": 11300 + }, + { + "epoch": 1.91, + "grad_norm": 0.228515625, + "learning_rate": 1.1035295737163221e-06, + "loss": 2.1592, + "step": 11305 }, { "epoch": 1.92, - "grad_norm": 0.2177734375, - "learning_rate": 2.1667771802059255e-06, - "loss": 2.1439, - "step": 10325 + "grad_norm": 0.2265625, + "learning_rate": 1.0817390999340537e-06, + "loss": 2.1417, + "step": 11310 }, { "epoch": 1.92, - "grad_norm": 0.2197265625, - "learning_rate": 2.1195011893341945e-06, - "loss": 2.1062, - "step": 10330 + "grad_norm": 0.2294921875, + "learning_rate": 1.0601647460658615e-06, + "loss": 2.1685, + "step": 11315 }, { "epoch": 1.92, - "grad_norm": 0.2158203125, - "learning_rate": 2.07274390308112e-06, - "loss": 2.1253, - "step": 10335 + "grad_norm": 0.220703125, + "learning_rate": 1.0388065592480956e-06, + "loss": 2.0922, + "step": 11320 }, { "epoch": 1.92, - "grad_norm": 0.22265625, - "learning_rate": 2.026505444011684e-06, - "loss": 2.1364, - "step": 10340 + "grad_norm": 0.2275390625, + "learning_rate": 1.0176645861448285e-06, + "loss": 2.1161, + "step": 11325 }, { "epoch": 1.92, - "grad_norm": 0.21875, - "learning_rate": 1.9807859333308865e-06, - "loss": 2.1224, - "step": 10345 + "grad_norm": 0.23046875, + "learning_rate": 9.967388729477779e-07, + "loss": 2.1453, + "step": 11330 }, { "epoch": 1.92, - "grad_norm": 0.2255859375, - "learning_rate": 1.9355854908833514e-06, - "loss": 2.1277, - "step": 10350 + "grad_norm": 0.2216796875, + "learning_rate": 9.760294653761048e-07, + "loss": 2.1358, + "step": 11335 }, { "epoch": 1.92, - "grad_norm": 0.220703125, - "learning_rate": 1.8909042351531459e-06, - "loss": 2.1251, - "step": 10355 + "grad_norm": 0.2294921875, + "learning_rate": 9.555364086764273e-07, + "loss": 2.0958, + "step": 11340 }, { "epoch": 1.92, "grad_norm": 0.22265625, - "learning_rate": 1.8467422832633142e-06, - "loss": 2.1136, - "step": 10360 + "learning_rate": 9.352597476226743e-07, + "loss": 2.1375, + "step": 11345 }, { "epoch": 1.92, - "grad_norm": 0.2265625, - "learning_rate": 1.8030997509757007e-06, - "loss": 2.1412, - "step": 10365 + "grad_norm": 0.22265625, + "learning_rate": 9.15199526515953e-07, + "loss": 2.1524, + "step": 11350 }, { "epoch": 1.92, - "grad_norm": 0.21484375, - "learning_rate": 1.7599767526905953e-06, - "loss": 2.04, - "step": 10370 + "grad_norm": 0.2275390625, + "learning_rate": 8.953557891844933e-07, + "loss": 2.0964, + "step": 11355 }, { "epoch": 1.92, - "grad_norm": 0.2177734375, - "learning_rate": 1.717373401446376e-06, - "loss": 2.0922, - "step": 10375 + "grad_norm": 0.2392578125, + "learning_rate": 8.757285789835923e-07, + "loss": 2.1318, + "step": 11360 + }, + { + "epoch": 1.92, + "grad_norm": 0.220703125, + "learning_rate": 8.563179387953812e-07, + "loss": 2.1265, + "step": 11365 }, { "epoch": 1.93, - "grad_norm": 0.212890625, - "learning_rate": 1.675289808919378e-06, - "loss": 2.0762, - "step": 10380 + "grad_norm": 0.236328125, + "learning_rate": 8.371239110289252e-07, + "loss": 2.1221, + "step": 11370 }, { "epoch": 1.93, - "grad_norm": 0.2158203125, - "learning_rate": 1.633726085423337e-06, - "loss": 2.138, - "step": 10385 + "grad_norm": 0.224609375, + "learning_rate": 8.181465376199348e-07, + "loss": 2.1379, + "step": 11375 }, { "epoch": 1.93, - "grad_norm": 0.2236328125, - "learning_rate": 1.5926823399093905e-06, - "loss": 2.1341, - "step": 10390 + "grad_norm": 0.251953125, + "learning_rate": 7.993858600308324e-07, + "loss": 2.1531, + "step": 11380 }, { "epoch": 1.93, - "grad_norm": 0.220703125, - "learning_rate": 1.5521586799655874e-06, - "loss": 2.0983, - "step": 10395 + "grad_norm": 0.228515625, + "learning_rate": 7.808419192505745e-07, + "loss": 2.1383, + "step": 11385 }, { "epoch": 1.93, - "grad_norm": 0.224609375, - "learning_rate": 1.5121552118167125e-06, - "loss": 2.1025, - "step": 10400 + "grad_norm": 0.2265625, + "learning_rate": 7.625147557945633e-07, + "loss": 2.174, + "step": 11390 }, { "epoch": 1.93, - "grad_norm": 0.2138671875, - "learning_rate": 1.472672040323908e-06, - "loss": 2.099, - "step": 10405 + "grad_norm": 0.2197265625, + "learning_rate": 7.44404409704591e-07, + "loss": 2.1006, + "step": 11395 }, { "epoch": 1.93, - "grad_norm": 0.2236328125, - "learning_rate": 1.4337092689845844e-06, - "loss": 2.1013, - "step": 10410 + "grad_norm": 0.22265625, + "learning_rate": 7.26510920548773e-07, + "loss": 2.1225, + "step": 11400 }, { "epoch": 1.93, - "grad_norm": 0.2177734375, - "learning_rate": 1.3952669999318657e-06, - "loss": 2.1162, - "step": 10415 + "grad_norm": 0.22265625, + "learning_rate": 7.088343274213926e-07, + "loss": 2.0748, + "step": 11405 }, { "epoch": 1.93, - "grad_norm": 0.2177734375, - "learning_rate": 1.3573453339345898e-06, - "loss": 2.1213, - "step": 10420 + "grad_norm": 0.228515625, + "learning_rate": 6.913746689428458e-07, + "loss": 2.1151, + "step": 11410 }, { "epoch": 1.93, - "grad_norm": 0.2216796875, - "learning_rate": 1.3199443703969083e-06, - "loss": 2.1008, - "step": 10425 + "grad_norm": 0.2314453125, + "learning_rate": 6.741319832595849e-07, + "loss": 2.1177, + "step": 11415 + }, + { + "epoch": 1.93, + "grad_norm": 0.2275390625, + "learning_rate": 6.571063080440087e-07, + "loss": 2.1477, + "step": 11420 + }, + { + "epoch": 1.93, + "grad_norm": 0.244140625, + "learning_rate": 6.402976804943728e-07, + "loss": 2.1342, + "step": 11425 }, { "epoch": 1.94, - "grad_norm": 0.2216796875, - "learning_rate": 1.2830642073580645e-06, - "loss": 2.1289, - "step": 10430 + "grad_norm": 0.2236328125, + "learning_rate": 6.23706137334723e-07, + "loss": 2.1416, + "step": 11430 }, { "epoch": 1.94, - "grad_norm": 0.2294921875, - "learning_rate": 1.2467049414921273e-06, - "loss": 2.1208, - "step": 10435 + "grad_norm": 0.2197265625, + "learning_rate": 6.073317148148294e-07, + "loss": 2.0855, + "step": 11435 }, { "epoch": 1.94, - "grad_norm": 0.2236328125, - "learning_rate": 1.2108666681076796e-06, - "loss": 2.1018, - "step": 10440 + "grad_norm": 0.224609375, + "learning_rate": 5.911744487100745e-07, + "loss": 2.1301, + "step": 11440 }, { "epoch": 1.94, - "grad_norm": 0.228515625, - "learning_rate": 1.175549481147753e-06, - "loss": 2.1416, - "step": 10445 + "grad_norm": 0.22265625, + "learning_rate": 5.752343743213873e-07, + "loss": 2.1179, + "step": 11445 }, { "epoch": 1.94, - "grad_norm": 0.220703125, - "learning_rate": 1.1407534731892933e-06, - "loss": 2.1281, - "step": 10450 + "grad_norm": 0.23046875, + "learning_rate": 5.595115264751649e-07, + "loss": 2.0996, + "step": 11450 }, { "epoch": 1.94, - "grad_norm": 0.2138671875, - "learning_rate": 1.106478735443184e-06, - "loss": 2.0847, - "step": 10455 + "grad_norm": 0.2314453125, + "learning_rate": 5.440059395232178e-07, + "loss": 2.128, + "step": 11455 }, { "epoch": 1.94, - "grad_norm": 0.2138671875, - "learning_rate": 1.07272535775389e-06, - "loss": 2.078, - "step": 10460 + "grad_norm": 0.224609375, + "learning_rate": 5.287176473426692e-07, + "loss": 2.1684, + "step": 11460 }, { "epoch": 1.94, - "grad_norm": 0.216796875, - "learning_rate": 1.039493428599192e-06, - "loss": 2.1367, - "step": 10465 + "grad_norm": 0.2314453125, + "learning_rate": 5.136466833358999e-07, + "loss": 2.1402, + "step": 11465 }, { "epoch": 1.94, - "grad_norm": 0.21875, - "learning_rate": 1.0067830350900532e-06, - "loss": 2.12, - "step": 10470 + "grad_norm": 0.2236328125, + "learning_rate": 4.987930804304375e-07, + "loss": 2.0991, + "step": 11470 }, { "epoch": 1.94, - "grad_norm": 0.220703125, - "learning_rate": 9.745942629703075e-07, - "loss": 2.0917, - "step": 10475 + "grad_norm": 0.2236328125, + "learning_rate": 4.841568710789335e-07, + "loss": 2.0907, + "step": 11475 }, { "epoch": 1.94, - "grad_norm": 0.2197265625, - "learning_rate": 9.429271966164388e-07, - "loss": 2.1365, - "step": 10480 + "grad_norm": 0.23046875, + "learning_rate": 4.697380872590751e-07, + "loss": 2.1306, + "step": 11480 }, { "epoch": 1.95, - "grad_norm": 0.2177734375, - "learning_rate": 9.117819190374022e-07, - "loss": 2.1, - "step": 10485 + "grad_norm": 0.228515625, + "learning_rate": 4.55536760473485e-07, + "loss": 2.1029, + "step": 11485 }, { "epoch": 1.95, - "grad_norm": 0.2236328125, - "learning_rate": 8.811585118744026e-07, - "loss": 2.0986, - "step": 10490 + "grad_norm": 0.232421875, + "learning_rate": 4.4155292174971054e-07, + "loss": 2.1302, + "step": 11490 }, { "epoch": 1.95, - "grad_norm": 0.2265625, - "learning_rate": 8.510570554006502e-07, - "loss": 2.1679, - "step": 10495 + "grad_norm": 0.2275390625, + "learning_rate": 4.2778660164011217e-07, + "loss": 2.1341, + "step": 11495 }, { "epoch": 1.95, - "grad_norm": 0.212890625, - "learning_rate": 8.21477628521139e-07, - "loss": 2.1362, - "step": 10500 + "grad_norm": 0.2373046875, + "learning_rate": 4.142378302217864e-07, + "loss": 2.1308, + "step": 11500 }, { "epoch": 1.95, - "grad_norm": 0.220703125, - "learning_rate": 7.924203087725124e-07, - "loss": 2.0937, - "step": 10505 + "grad_norm": 0.23046875, + "learning_rate": 4.0090663709655417e-07, + "loss": 2.1504, + "step": 11505 }, { "epoch": 1.95, - "grad_norm": 0.21875, - "learning_rate": 7.63885172322798e-07, - "loss": 2.1098, - "step": 10510 + "grad_norm": 0.2275390625, + "learning_rate": 3.877930513908501e-07, + "loss": 2.1156, + "step": 11510 }, { "epoch": 1.95, - "grad_norm": 0.216796875, - "learning_rate": 7.35872293971207e-07, - "loss": 2.1114, - "step": 10515 + "grad_norm": 0.228515625, + "learning_rate": 3.7489710175566686e-07, + "loss": 2.1101, + "step": 11515 }, { "epoch": 1.95, - "grad_norm": 0.2158203125, - "learning_rate": 7.083817471479349e-07, - "loss": 2.1369, - "step": 10520 + "grad_norm": 0.2314453125, + "learning_rate": 3.622188163664997e-07, + "loss": 2.1366, + "step": 11520 + }, + { + "epoch": 1.95, + "grad_norm": 0.2373046875, + "learning_rate": 3.4975822292331317e-07, + "loss": 2.1219, + "step": 11525 + }, + { + "epoch": 1.95, + "grad_norm": 0.2265625, + "learning_rate": 3.375153486504079e-07, + "loss": 2.0802, + "step": 11530 }, { "epoch": 1.95, - "grad_norm": 0.212890625, - "learning_rate": 6.814136039140717e-07, - "loss": 2.0751, - "step": 10525 + "grad_norm": 0.2265625, + "learning_rate": 3.254902202964205e-07, + "loss": 2.117, + "step": 11535 }, { "epoch": 1.95, "grad_norm": 0.2197265625, - "learning_rate": 6.54967934961248e-07, - "loss": 2.1256, - "step": 10530 + "learning_rate": 3.1368286413426817e-07, + "loss": 2.0799, + "step": 11540 }, { - "epoch": 1.95, - "grad_norm": 0.2294921875, - "learning_rate": 6.290448096115453e-07, - "loss": 2.0886, - "step": 10535 + "epoch": 1.96, + "grad_norm": 0.2314453125, + "learning_rate": 3.0209330596104866e-07, + "loss": 2.1347, + "step": 11545 }, { "epoch": 1.96, - "grad_norm": 0.216796875, - "learning_rate": 6.036442958173183e-07, - "loss": 2.1488, - "step": 10540 + "grad_norm": 0.236328125, + "learning_rate": 2.9072157109800714e-07, + "loss": 2.1671, + "step": 11550 }, { "epoch": 1.96, - "grad_norm": 0.22265625, - "learning_rate": 5.787664601609954e-07, - "loss": 2.1258, - "step": 10545 + "grad_norm": 0.2353515625, + "learning_rate": 2.7956768439050265e-07, + "loss": 2.1185, + "step": 11555 }, { "epoch": 1.96, - "grad_norm": 0.216796875, - "learning_rate": 5.544113678549235e-07, - "loss": 2.0979, - "step": 10550 + "grad_norm": 0.224609375, + "learning_rate": 2.686316702079084e-07, + "loss": 2.1238, + "step": 11560 }, { "epoch": 1.96, - "grad_norm": 0.2177734375, - "learning_rate": 5.30579082741145e-07, - "loss": 2.0995, - "step": 10555 + "grad_norm": 0.2294921875, + "learning_rate": 2.579135524436005e-07, + "loss": 2.1151, + "step": 11565 }, { "epoch": 1.96, - "grad_norm": 0.220703125, - "learning_rate": 5.072696672913102e-07, - "loss": 2.1074, - "step": 10560 + "grad_norm": 0.2255859375, + "learning_rate": 2.4741335451488047e-07, + "loss": 2.0894, + "step": 11570 }, { "epoch": 1.96, - "grad_norm": 0.2158203125, - "learning_rate": 4.8448318260641e-07, - "loss": 2.0968, - "step": 10565 + "grad_norm": 0.23046875, + "learning_rate": 2.3713109936291944e-07, + "loss": 2.1385, + "step": 11575 }, { "epoch": 1.96, - "grad_norm": 0.22265625, - "learning_rate": 4.622196884167318e-07, - "loss": 2.1238, - "step": 10570 + "grad_norm": 0.2236328125, + "learning_rate": 2.2706680945273617e-07, + "loss": 2.1139, + "step": 11580 }, { "epoch": 1.96, - "grad_norm": 0.21875, - "learning_rate": 4.4047924308161516e-07, - "loss": 2.0813, - "step": 10575 + "grad_norm": 0.224609375, + "learning_rate": 2.1722050677313032e-07, + "loss": 2.1246, + "step": 11585 }, { "epoch": 1.96, - "grad_norm": 0.2236328125, - "learning_rate": 4.1926190358934083e-07, - "loss": 2.1372, - "step": 10580 + "grad_norm": 0.2265625, + "learning_rate": 2.075922128366381e-07, + "loss": 2.1213, + "step": 11590 }, { "epoch": 1.96, - "grad_norm": 0.216796875, - "learning_rate": 3.985677255569753e-07, - "loss": 2.0857, - "step": 10585 + "grad_norm": 0.2265625, + "learning_rate": 1.981819486794656e-07, + "loss": 2.1515, + "step": 11595 }, { "epoch": 1.96, - "grad_norm": 0.2177734375, - "learning_rate": 3.7839676323023765e-07, - "loss": 2.1184, - "step": 10590 + "grad_norm": 0.2294921875, + "learning_rate": 1.8898973486146664e-07, + "loss": 2.1291, + "step": 11600 }, { "epoch": 1.97, - "grad_norm": 0.2197265625, - "learning_rate": 3.5874906948327737e-07, - "loss": 2.1057, - "step": 10595 + "grad_norm": 0.224609375, + "learning_rate": 1.8001559146612058e-07, + "loss": 2.112, + "step": 11605 }, { "epoch": 1.97, - "grad_norm": 0.2138671875, - "learning_rate": 3.396246958186744e-07, - "loss": 2.1105, - "step": 10600 + "grad_norm": 0.2373046875, + "learning_rate": 1.7125953810041007e-07, + "loss": 2.1438, + "step": 11610 }, { "epoch": 1.97, - "grad_norm": 0.21875, - "learning_rate": 3.210236923671728e-07, - "loss": 2.1313, - "step": 10605 + "grad_norm": 0.232421875, + "learning_rate": 1.6272159389486564e-07, + "loss": 2.1497, + "step": 11615 }, { "epoch": 1.97, - "grad_norm": 0.2255859375, - "learning_rate": 3.029461078876361e-07, - "loss": 2.158, - "step": 10610 + "grad_norm": 0.228515625, + "learning_rate": 1.5440177750346563e-07, + "loss": 2.1304, + "step": 11620 }, { "epoch": 1.97, - "grad_norm": 0.2294921875, - "learning_rate": 2.8539198976686997e-07, - "loss": 2.1015, - "step": 10615 + "grad_norm": 0.2265625, + "learning_rate": 1.4630010710363628e-07, + "loss": 2.1374, + "step": 11625 }, { "epoch": 1.97, - "grad_norm": 0.21484375, - "learning_rate": 2.6836138401955534e-07, - "loss": 2.1259, - "step": 10620 + "grad_norm": 0.22265625, + "learning_rate": 1.384166003961518e-07, + "loss": 2.137, + "step": 11630 }, { "epoch": 1.97, - "grad_norm": 0.22265625, - "learning_rate": 2.518543352880265e-07, - "loss": 2.1453, - "step": 10625 + "grad_norm": 0.2255859375, + "learning_rate": 1.3075127460518976e-07, + "loss": 2.1289, + "step": 11635 }, { "epoch": 1.97, - "grad_norm": 0.2197265625, - "learning_rate": 2.35870886842271e-07, - "loss": 2.1135, - "step": 10630 + "grad_norm": 0.2275390625, + "learning_rate": 1.23304146478187e-07, + "loss": 2.147, + "step": 11640 }, { "epoch": 1.97, - "grad_norm": 0.2177734375, - "learning_rate": 2.204110805797077e-07, - "loss": 2.0939, - "step": 10635 + "grad_norm": 0.2265625, + "learning_rate": 1.1607523228588379e-07, + "loss": 2.1013, + "step": 11645 }, { "epoch": 1.97, - "grad_norm": 0.2177734375, - "learning_rate": 2.0547495702518682e-07, - "loss": 2.1107, - "step": 10640 + "grad_norm": 0.2216796875, + "learning_rate": 1.090645478222574e-07, + "loss": 2.1047, + "step": 11650 }, { "epoch": 1.97, - "grad_norm": 0.2265625, - "learning_rate": 1.9106255533083428e-07, - "loss": 2.0952, - "step": 10645 + "grad_norm": 0.224609375, + "learning_rate": 1.0227210840448864e-07, + "loss": 2.116, + "step": 11655 + }, + { + "epoch": 1.97, + "grad_norm": 0.2158203125, + "learning_rate": 9.569792887290651e-08, + "loss": 2.0968, + "step": 11660 }, { "epoch": 1.98, - "grad_norm": 0.21875, - "learning_rate": 1.7717391327585208e-07, - "loss": 2.1414, - "step": 10650 + "grad_norm": 0.23046875, + "learning_rate": 8.934202359102139e-08, + "loss": 2.1216, + "step": 11665 }, { "epoch": 1.98, - "grad_norm": 0.220703125, - "learning_rate": 1.6380906726660705e-07, - "loss": 2.1247, - "step": 10655 + "grad_norm": 0.2275390625, + "learning_rate": 8.320440644541405e-08, + "loss": 2.1317, + "step": 11670 }, { "epoch": 1.98, - "grad_norm": 0.2197265625, - "learning_rate": 1.5096805233638654e-07, - "loss": 2.1159, - "step": 10660 + "grad_norm": 0.2265625, + "learning_rate": 7.728509084574676e-08, + "loss": 2.1112, + "step": 11675 }, { "epoch": 1.98, - "grad_norm": 0.2158203125, - "learning_rate": 1.3865090214539856e-07, - "loss": 2.0838, - "step": 10665 + "grad_norm": 0.2294921875, + "learning_rate": 7.158408972476327e-08, + "loss": 2.1439, + "step": 11680 }, { "epoch": 1.98, - "grad_norm": 0.220703125, - "learning_rate": 1.2685764898059393e-07, - "loss": 2.0818, - "step": 10670 + "grad_norm": 0.2255859375, + "learning_rate": 6.610141553816674e-08, + "loss": 2.084, + "step": 11685 }, { "epoch": 1.98, - "grad_norm": 0.21875, - "learning_rate": 1.1558832375566653e-07, - "loss": 2.0904, - "step": 10675 + "grad_norm": 0.2333984375, + "learning_rate": 6.083708026471957e-08, + "loss": 2.1251, + "step": 11690 }, { "epoch": 1.98, - "grad_norm": 0.2197265625, - "learning_rate": 1.0484295601089767e-07, - "loss": 2.137, - "step": 10680 + "grad_norm": 0.22265625, + "learning_rate": 5.579109540609917e-08, + "loss": 2.1635, + "step": 11695 }, { "epoch": 1.98, - "grad_norm": 0.2294921875, - "learning_rate": 9.462157391317838e-08, - "loss": 2.1282, - "step": 10685 + "grad_norm": 0.2275390625, + "learning_rate": 5.096347198694229e-08, + "loss": 2.0743, + "step": 11700 }, { "epoch": 1.98, - "grad_norm": 0.2255859375, - "learning_rate": 8.492420425583181e-08, - "loss": 2.1308, - "step": 10690 + "grad_norm": 0.2373046875, + "learning_rate": 4.6354220554800655e-08, + "loss": 2.1271, + "step": 11705 }, { "epoch": 1.98, - "grad_norm": 0.21484375, - "learning_rate": 7.575087245861313e-08, - "loss": 2.0737, - "step": 10695 + "grad_norm": 0.22265625, + "learning_rate": 4.196335118012984e-08, + "loss": 2.0982, + "step": 11710 + }, + { + "epoch": 1.98, + "grad_norm": 0.228515625, + "learning_rate": 3.779087345624488e-08, + "loss": 2.1128, + "step": 11715 + }, + { + "epoch": 1.98, + "grad_norm": 0.23046875, + "learning_rate": 3.383679649929805e-08, + "loss": 2.1136, + "step": 11720 }, { "epoch": 1.99, - "grad_norm": 0.21484375, - "learning_rate": 6.710160256755416e-08, - "loss": 2.102, - "step": 10700 + "grad_norm": 0.228515625, + "learning_rate": 3.010112894831219e-08, + "loss": 2.1439, + "step": 11725 }, { "epoch": 1.99, - "grad_norm": 0.2236328125, - "learning_rate": 5.897641725505221e-08, - "loss": 2.0975, - "step": 10705 + "grad_norm": 0.2255859375, + "learning_rate": 2.6583878965080745e-08, + "loss": 2.1431, + "step": 11730 }, { "epoch": 1.99, - "grad_norm": 0.2177734375, - "learning_rate": 5.137533781964798e-08, - "loss": 2.0821, - "step": 10710 + "grad_norm": 0.224609375, + "learning_rate": 2.3285054234223334e-08, + "loss": 2.1114, + "step": 11735 }, { "epoch": 1.99, - "grad_norm": 0.220703125, - "learning_rate": 4.429838418607002e-08, - "loss": 2.1072, - "step": 10715 + "grad_norm": 0.2275390625, + "learning_rate": 2.0204661963107996e-08, + "loss": 2.1307, + "step": 11740 }, { "epoch": 1.99, - "grad_norm": 0.2158203125, - "learning_rate": 3.774557490516806e-08, - "loss": 2.1058, - "step": 10720 + "grad_norm": 0.2275390625, + "learning_rate": 1.7342708881884496e-08, + "loss": 2.1688, + "step": 11745 }, { "epoch": 1.99, - "grad_norm": 0.224609375, - "learning_rate": 3.171692715382424e-08, - "loss": 2.1674, - "step": 10725 + "grad_norm": 0.2255859375, + "learning_rate": 1.469920124343993e-08, + "loss": 2.1105, + "step": 11750 }, { "epoch": 1.99, - "grad_norm": 0.2197265625, - "learning_rate": 2.6212456734953094e-08, - "loss": 2.1061, - "step": 10730 + "grad_norm": 0.2314453125, + "learning_rate": 1.2274144823409828e-08, + "loss": 2.1762, + "step": 11755 }, { "epoch": 1.99, - "grad_norm": 0.2216796875, - "learning_rate": 2.123217807743494e-08, - "loss": 2.1099, - "step": 10735 + "grad_norm": 0.23828125, + "learning_rate": 1.006754492012263e-08, + "loss": 2.174, + "step": 11760 }, { "epoch": 1.99, - "grad_norm": 0.2216796875, - "learning_rate": 1.6776104236071455e-08, - "loss": 2.1166, - "step": 10740 + "grad_norm": 0.2275390625, + "learning_rate": 8.079406354644103e-09, + "loss": 2.1249, + "step": 11765 }, { "epoch": 1.99, - "grad_norm": 0.220703125, - "learning_rate": 1.2844246891607904e-08, - "loss": 2.1206, - "step": 10745 + "grad_norm": 0.2294921875, + "learning_rate": 6.309733470721835e-09, + "loss": 2.096, + "step": 11770 }, { "epoch": 1.99, - "grad_norm": 0.2177734375, - "learning_rate": 9.436616350622096e-09, - "loss": 2.1129, - "step": 10750 + "grad_norm": 0.22265625, + "learning_rate": 4.758530134785222e-09, + "loss": 2.089, + "step": 11775 }, { "epoch": 2.0, - "grad_norm": 0.220703125, - "learning_rate": 6.5532215455244015e-09, - "loss": 2.0892, - "step": 10755 + "grad_norm": 0.2265625, + "learning_rate": 3.425799735978785e-09, + "loss": 2.1153, + "step": 11780 }, { "epoch": 2.0, - "grad_norm": 0.228515625, - "learning_rate": 4.194070034579944e-09, - "loss": 2.139, - "step": 10760 + "grad_norm": 0.2265625, + "learning_rate": 2.3115451860733495e-09, + "loss": 2.1192, + "step": 11785 }, { "epoch": 2.0, - "grad_norm": 0.2255859375, - "learning_rate": 2.3591680018419935e-09, - "loss": 2.1614, - "step": 10765 + "grad_norm": 0.236328125, + "learning_rate": 1.4157689195326563e-09, + "loss": 2.1282, + "step": 11790 }, { "epoch": 2.0, - "grad_norm": 0.2177734375, - "learning_rate": 1.0485202571297593e-09, - "loss": 2.118, - "step": 10770 + "grad_norm": 0.2353515625, + "learning_rate": 7.3847289349116e-10, + "loss": 2.0832, + "step": 11795 }, { "epoch": 2.0, - "grad_norm": 0.2177734375, - "learning_rate": 2.621302360727995e-10, - "loss": 2.111, - "step": 10775 + "grad_norm": 0.2265625, + "learning_rate": 2.796585877207214e-10, + "loss": 2.1107, + "step": 11800 }, { "epoch": 2.0, - "grad_norm": 0.427734375, - "learning_rate": 0.0, - "loss": 2.0952, - "step": 10780 + "grad_norm": 0.2353515625, + "learning_rate": 3.932700465281158e-11, + "loss": 2.1311, + "step": 11805 }, { "epoch": 2.0, - "eval_loss": 2.1485531330108643, - "eval_runtime": 171.3478, - "eval_samples_per_second": 28.299, - "eval_steps_per_second": 3.543, - "step": 10780 + "eval_loss": 2.1430556774139404, + "eval_runtime": 161.578, + "eval_samples_per_second": 16.444, + "eval_steps_per_second": 2.061, + "step": 11808 }, { "epoch": 2.0, - "step": 10780, - "total_flos": 6.122136059783414e+17, - "train_loss": 2.151964877841645, - "train_runtime": 21635.5911, - "train_samples_per_second": 7.971, - "train_steps_per_second": 0.498 + "step": 11808, + "total_flos": 6.077393230092042e+17, + "train_loss": 2.1527459967507903, + "train_runtime": 22011.903, + "train_samples_per_second": 4.292, + "train_steps_per_second": 0.536 } ], "logging_steps": 5, - "max_steps": 10780, + "max_steps": 11808, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, - "total_flos": 6.122136059783414e+17, - "train_batch_size": 8, + "total_flos": 6.077393230092042e+17, + "train_batch_size": 4, "trial_name": null, "trial_params": null }