diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,16579 +1,8312 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 1.9998306376492505, + "epoch": 1.9996613039796782, "eval_steps": 500, - "global_step": 11808, + "global_step": 5904, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "grad_norm": 0.484375, - "learning_rate": 1.6934801016088062e-07, - "loss": 2.5162, + "grad_norm": 0.671875, + "learning_rate": 3.38409475465313e-07, + "loss": 2.4832, "step": 1 }, { "epoch": 0.0, - "grad_norm": 0.40234375, - "learning_rate": 8.46740050804403e-07, - "loss": 2.6069, + "grad_norm": 0.546875, + "learning_rate": 1.6920473773265652e-06, + "loss": 2.5283, "step": 5 }, { "epoch": 0.0, - "grad_norm": 0.515625, - "learning_rate": 1.693480101608806e-06, - "loss": 2.5858, + "grad_norm": 0.71484375, + "learning_rate": 3.3840947546531303e-06, + "loss": 2.5149, "step": 10 }, { - "epoch": 0.0, - "grad_norm": 0.5859375, - "learning_rate": 2.5402201524132094e-06, - "loss": 2.5739, + "epoch": 0.01, + "grad_norm": 0.859375, + "learning_rate": 5.076142131979695e-06, + "loss": 2.5184, "step": 15 }, { - "epoch": 0.0, - "grad_norm": 0.404296875, - "learning_rate": 3.386960203217612e-06, - "loss": 2.588, + "epoch": 0.01, + "grad_norm": 0.89453125, + "learning_rate": 6.768189509306261e-06, + "loss": 2.5027, "step": 20 }, { - "epoch": 0.0, - "grad_norm": 0.46484375, - "learning_rate": 4.233700254022015e-06, - "loss": 2.5688, + "epoch": 0.01, + "grad_norm": 0.76953125, + "learning_rate": 8.460236886632826e-06, + "loss": 2.5038, "step": 25 }, { "epoch": 0.01, - "grad_norm": 0.5859375, - "learning_rate": 5.080440304826419e-06, - "loss": 2.5694, + "grad_norm": 0.59375, + "learning_rate": 1.015228426395939e-05, + "loss": 2.4692, "step": 30 }, { "epoch": 0.01, - "grad_norm": 0.77734375, - "learning_rate": 5.927180355630822e-06, - "loss": 2.5357, + "grad_norm": 0.5546875, + "learning_rate": 1.1844331641285957e-05, + "loss": 2.4494, "step": 35 }, { "epoch": 0.01, - "grad_norm": 0.44921875, - "learning_rate": 6.773920406435224e-06, - "loss": 2.5469, + "grad_norm": 0.474609375, + "learning_rate": 1.3536379018612521e-05, + "loss": 2.4364, "step": 40 }, { - "epoch": 0.01, - "grad_norm": 0.5234375, - "learning_rate": 7.620660457239629e-06, - "loss": 2.5776, + "epoch": 0.02, + "grad_norm": 0.34765625, + "learning_rate": 1.5228426395939088e-05, + "loss": 2.4704, "step": 45 }, { - "epoch": 0.01, - "grad_norm": 0.55859375, - "learning_rate": 8.46740050804403e-06, - "loss": 2.5495, + "epoch": 0.02, + "grad_norm": 0.259765625, + "learning_rate": 1.6920473773265652e-05, + "loss": 2.4237, "step": 50 }, { - "epoch": 0.01, - "grad_norm": 0.57421875, - "learning_rate": 9.314140558848434e-06, - "loss": 2.489, + "epoch": 0.02, + "grad_norm": 0.2265625, + "learning_rate": 1.8612521150592218e-05, + "loss": 2.4019, "step": 55 }, { - "epoch": 0.01, - "grad_norm": 0.427734375, - "learning_rate": 1.0160880609652838e-05, - "loss": 2.5028, + "epoch": 0.02, + "grad_norm": 0.19140625, + "learning_rate": 2.030456852791878e-05, + "loss": 2.3979, "step": 60 }, { - "epoch": 0.01, - "grad_norm": 0.37890625, - "learning_rate": 1.1007620660457241e-05, - "loss": 2.5345, + "epoch": 0.02, + "grad_norm": 0.1708984375, + "learning_rate": 2.199661590524535e-05, + "loss": 2.3894, "step": 65 }, { - "epoch": 0.01, - "grad_norm": 0.353515625, - "learning_rate": 1.1854360711261643e-05, - "loss": 2.4845, + "epoch": 0.02, + "grad_norm": 0.1748046875, + "learning_rate": 2.3688663282571914e-05, + "loss": 2.4104, "step": 70 }, { - "epoch": 0.01, - "grad_norm": 0.28515625, - "learning_rate": 1.2701100762066045e-05, - "loss": 2.4452, + "epoch": 0.03, + "grad_norm": 0.1552734375, + "learning_rate": 2.5380710659898476e-05, + "loss": 2.3876, "step": 75 }, { - "epoch": 0.01, - "grad_norm": 0.267578125, - "learning_rate": 1.3547840812870449e-05, - "loss": 2.455, + "epoch": 0.03, + "grad_norm": 0.162109375, + "learning_rate": 2.7072758037225043e-05, + "loss": 2.4002, "step": 80 }, { - "epoch": 0.01, - "grad_norm": 0.2421875, - "learning_rate": 1.4394580863674852e-05, - "loss": 2.5327, + "epoch": 0.03, + "grad_norm": 0.1640625, + "learning_rate": 2.876480541455161e-05, + "loss": 2.3435, "step": 85 }, { - "epoch": 0.02, - "grad_norm": 0.2265625, - "learning_rate": 1.5241320914479258e-05, - "loss": 2.4376, + "epoch": 0.03, + "grad_norm": 0.1806640625, + "learning_rate": 3.0456852791878175e-05, + "loss": 2.3394, "step": 90 }, { - "epoch": 0.02, - "grad_norm": 0.2275390625, - "learning_rate": 1.608806096528366e-05, - "loss": 2.4626, + "epoch": 0.03, + "grad_norm": 0.1611328125, + "learning_rate": 3.214890016920474e-05, + "loss": 2.3178, "step": 95 }, { - "epoch": 0.02, - "grad_norm": 0.2265625, - "learning_rate": 1.693480101608806e-05, - "loss": 2.4267, + "epoch": 0.03, + "grad_norm": 0.1513671875, + "learning_rate": 3.3840947546531304e-05, + "loss": 2.3124, "step": 100 }, { - "epoch": 0.02, - "grad_norm": 0.2177734375, - "learning_rate": 1.7781541066892467e-05, - "loss": 2.4574, + "epoch": 0.04, + "grad_norm": 0.1552734375, + "learning_rate": 3.553299492385787e-05, + "loss": 2.3051, "step": 105 }, { - "epoch": 0.02, - "grad_norm": 0.2333984375, - "learning_rate": 1.862828111769687e-05, - "loss": 2.3983, + "epoch": 0.04, + "grad_norm": 0.138671875, + "learning_rate": 3.7225042301184437e-05, + "loss": 2.3203, "step": 110 }, { - "epoch": 0.02, - "grad_norm": 0.2421875, - "learning_rate": 1.947502116850127e-05, - "loss": 2.4354, + "epoch": 0.04, + "grad_norm": 0.1416015625, + "learning_rate": 3.8917089678510996e-05, + "loss": 2.293, "step": 115 }, { - "epoch": 0.02, - "grad_norm": 0.232421875, - "learning_rate": 2.0321761219305676e-05, - "loss": 2.4131, + "epoch": 0.04, + "grad_norm": 0.1435546875, + "learning_rate": 4.060913705583756e-05, + "loss": 2.3071, "step": 120 }, { - "epoch": 0.02, - "grad_norm": 0.2080078125, - "learning_rate": 2.1168501270110077e-05, - "loss": 2.4226, + "epoch": 0.04, + "grad_norm": 0.138671875, + "learning_rate": 4.230118443316413e-05, + "loss": 2.2731, "step": 125 }, { - "epoch": 0.02, - "grad_norm": 0.208984375, - "learning_rate": 2.2015241320914483e-05, - "loss": 2.4228, + "epoch": 0.04, + "grad_norm": 0.134765625, + "learning_rate": 4.39932318104907e-05, + "loss": 2.2847, "step": 130 }, { - "epoch": 0.02, - "grad_norm": 0.2060546875, - "learning_rate": 2.286198137171888e-05, - "loss": 2.3872, + "epoch": 0.05, + "grad_norm": 0.134765625, + "learning_rate": 4.568527918781726e-05, + "loss": 2.2561, "step": 135 }, { - "epoch": 0.02, - "grad_norm": 0.1904296875, - "learning_rate": 2.3708721422523286e-05, - "loss": 2.3907, + "epoch": 0.05, + "grad_norm": 0.1328125, + "learning_rate": 4.737732656514383e-05, + "loss": 2.2524, "step": 140 }, { - "epoch": 0.02, - "grad_norm": 0.2001953125, - "learning_rate": 2.455546147332769e-05, - "loss": 2.3826, + "epoch": 0.05, + "grad_norm": 0.1298828125, + "learning_rate": 4.906937394247039e-05, + "loss": 2.2716, "step": 145 }, { - "epoch": 0.03, - "grad_norm": 0.2119140625, - "learning_rate": 2.540220152413209e-05, - "loss": 2.3294, + "epoch": 0.05, + "grad_norm": 0.1318359375, + "learning_rate": 5.076142131979695e-05, + "loss": 2.2681, "step": 150 }, { - "epoch": 0.03, - "grad_norm": 0.197265625, - "learning_rate": 2.62489415749365e-05, - "loss": 2.3485, + "epoch": 0.05, + "grad_norm": 0.1279296875, + "learning_rate": 5.245346869712352e-05, + "loss": 2.2574, "step": 155 }, { - "epoch": 0.03, - "grad_norm": 0.2236328125, - "learning_rate": 2.7095681625740897e-05, - "loss": 2.3282, + "epoch": 0.05, + "grad_norm": 0.1298828125, + "learning_rate": 5.4145516074450085e-05, + "loss": 2.2446, "step": 160 }, { - "epoch": 0.03, - "grad_norm": 0.216796875, - "learning_rate": 2.79424216765453e-05, - "loss": 2.3451, + "epoch": 0.06, + "grad_norm": 0.1318359375, + "learning_rate": 5.583756345177665e-05, + "loss": 2.2111, "step": 165 }, { - "epoch": 0.03, - "grad_norm": 0.19921875, - "learning_rate": 2.8789161727349705e-05, - "loss": 2.2971, + "epoch": 0.06, + "grad_norm": 0.1396484375, + "learning_rate": 5.752961082910322e-05, + "loss": 2.2193, "step": 170 }, { - "epoch": 0.03, - "grad_norm": 0.21484375, - "learning_rate": 2.9635901778154106e-05, - "loss": 2.3093, + "epoch": 0.06, + "grad_norm": 0.13671875, + "learning_rate": 5.9221658206429784e-05, + "loss": 2.2511, "step": 175 }, { - "epoch": 0.03, - "grad_norm": 0.232421875, - "learning_rate": 3.0482641828958515e-05, - "loss": 2.2861, + "epoch": 0.06, + "grad_norm": 0.1357421875, + "learning_rate": 6.091370558375635e-05, + "loss": 2.2315, "step": 180 }, { - "epoch": 0.03, - "grad_norm": 0.205078125, - "learning_rate": 3.132938187976292e-05, - "loss": 2.3381, + "epoch": 0.06, + "grad_norm": 0.142578125, + "learning_rate": 6.26057529610829e-05, + "loss": 2.2352, "step": 185 }, { - "epoch": 0.03, - "grad_norm": 0.1845703125, - "learning_rate": 3.217612193056732e-05, - "loss": 2.269, + "epoch": 0.06, + "grad_norm": 0.140625, + "learning_rate": 6.429780033840948e-05, + "loss": 2.2085, "step": 190 }, { - "epoch": 0.03, - "grad_norm": 0.185546875, - "learning_rate": 3.302286198137172e-05, - "loss": 2.3178, + "epoch": 0.07, + "grad_norm": 0.140625, + "learning_rate": 6.598984771573604e-05, + "loss": 2.2269, "step": 195 }, { - "epoch": 0.03, - "grad_norm": 0.181640625, - "learning_rate": 3.386960203217612e-05, - "loss": 2.2231, + "epoch": 0.07, + "grad_norm": 0.1416015625, + "learning_rate": 6.768189509306261e-05, + "loss": 2.2032, "step": 200 }, { - "epoch": 0.03, - "grad_norm": 0.173828125, - "learning_rate": 3.4716342082980524e-05, - "loss": 2.2515, + "epoch": 0.07, + "grad_norm": 0.14453125, + "learning_rate": 6.937394247038918e-05, + "loss": 2.212, "step": 205 }, { - "epoch": 0.04, - "grad_norm": 0.1865234375, - "learning_rate": 3.556308213378493e-05, - "loss": 2.3124, + "epoch": 0.07, + "grad_norm": 0.1376953125, + "learning_rate": 7.106598984771574e-05, + "loss": 2.2063, "step": 210 }, { - "epoch": 0.04, - "grad_norm": 0.1826171875, - "learning_rate": 3.6409822184589335e-05, - "loss": 2.2382, + "epoch": 0.07, + "grad_norm": 0.138671875, + "learning_rate": 7.275803722504231e-05, + "loss": 2.2317, "step": 215 }, { - "epoch": 0.04, - "grad_norm": 0.1884765625, - "learning_rate": 3.725656223539374e-05, - "loss": 2.2292, + "epoch": 0.07, + "grad_norm": 0.142578125, + "learning_rate": 7.445008460236887e-05, + "loss": 2.209, "step": 220 }, { - "epoch": 0.04, - "grad_norm": 0.1650390625, - "learning_rate": 3.810330228619814e-05, - "loss": 2.2462, + "epoch": 0.08, + "grad_norm": 0.1435546875, + "learning_rate": 7.614213197969543e-05, + "loss": 2.2178, "step": 225 }, { - "epoch": 0.04, - "grad_norm": 0.171875, - "learning_rate": 3.895004233700254e-05, - "loss": 2.2408, + "epoch": 0.08, + "grad_norm": 0.1455078125, + "learning_rate": 7.783417935702199e-05, + "loss": 2.2295, "step": 230 }, { - "epoch": 0.04, - "grad_norm": 0.1884765625, - "learning_rate": 3.979678238780695e-05, - "loss": 2.2485, + "epoch": 0.08, + "grad_norm": 0.150390625, + "learning_rate": 7.952622673434857e-05, + "loss": 2.2275, "step": 235 }, { - "epoch": 0.04, - "grad_norm": 0.1884765625, - "learning_rate": 4.064352243861135e-05, - "loss": 2.319, + "epoch": 0.08, + "grad_norm": 0.150390625, + "learning_rate": 8.121827411167512e-05, + "loss": 2.185, "step": 240 }, { - "epoch": 0.04, - "grad_norm": 0.1904296875, - "learning_rate": 4.1490262489415746e-05, - "loss": 2.2125, + "epoch": 0.08, + "grad_norm": 0.15234375, + "learning_rate": 8.29103214890017e-05, + "loss": 2.179, "step": 245 }, { - "epoch": 0.04, - "grad_norm": 0.1826171875, - "learning_rate": 4.2337002540220155e-05, - "loss": 2.2627, + "epoch": 0.08, + "grad_norm": 0.1494140625, + "learning_rate": 8.460236886632826e-05, + "loss": 2.2004, "step": 250 }, { - "epoch": 0.04, - "grad_norm": 0.1826171875, - "learning_rate": 4.318374259102456e-05, - "loss": 2.2665, + "epoch": 0.09, + "grad_norm": 0.1494140625, + "learning_rate": 8.629441624365483e-05, + "loss": 2.2298, "step": 255 }, { - "epoch": 0.04, - "grad_norm": 0.1904296875, - "learning_rate": 4.4030482641828965e-05, - "loss": 2.2375, + "epoch": 0.09, + "grad_norm": 0.1435546875, + "learning_rate": 8.79864636209814e-05, + "loss": 2.2295, "step": 260 }, { - "epoch": 0.04, - "grad_norm": 0.1884765625, - "learning_rate": 4.487722269263336e-05, - "loss": 2.2342, + "epoch": 0.09, + "grad_norm": 0.1513671875, + "learning_rate": 8.967851099830795e-05, + "loss": 2.2361, "step": 265 }, { - "epoch": 0.05, - "grad_norm": 0.189453125, - "learning_rate": 4.572396274343776e-05, - "loss": 2.2752, + "epoch": 0.09, + "grad_norm": 0.1494140625, + "learning_rate": 9.137055837563452e-05, + "loss": 2.2203, "step": 270 }, { - "epoch": 0.05, - "grad_norm": 0.18359375, - "learning_rate": 4.657070279424217e-05, - "loss": 2.2579, + "epoch": 0.09, + "grad_norm": 0.15234375, + "learning_rate": 9.306260575296108e-05, + "loss": 2.1898, "step": 275 }, { - "epoch": 0.05, - "grad_norm": 0.181640625, - "learning_rate": 4.741744284504657e-05, - "loss": 2.2639, + "epoch": 0.09, + "grad_norm": 0.1474609375, + "learning_rate": 9.475465313028765e-05, + "loss": 2.1787, "step": 280 }, { - "epoch": 0.05, - "grad_norm": 0.1953125, - "learning_rate": 4.8264182895850975e-05, - "loss": 2.2554, + "epoch": 0.1, + "grad_norm": 0.1552734375, + "learning_rate": 9.644670050761421e-05, + "loss": 2.1783, "step": 285 }, { - "epoch": 0.05, - "grad_norm": 0.185546875, - "learning_rate": 4.911092294665538e-05, - "loss": 2.2714, + "epoch": 0.1, + "grad_norm": 0.150390625, + "learning_rate": 9.813874788494079e-05, + "loss": 2.1904, "step": 290 }, { - "epoch": 0.05, - "grad_norm": 0.18359375, - "learning_rate": 4.995766299745978e-05, - "loss": 2.2491, + "epoch": 0.1, + "grad_norm": 0.1474609375, + "learning_rate": 9.983079526226735e-05, + "loss": 2.1735, "step": 295 }, { - "epoch": 0.05, - "grad_norm": 0.1806640625, - "learning_rate": 5.080440304826418e-05, - "loss": 2.2583, + "epoch": 0.1, + "grad_norm": 0.1513671875, + "learning_rate": 0.0001015228426395939, + "loss": 2.1938, "step": 300 }, { - "epoch": 0.05, - "grad_norm": 0.19921875, - "learning_rate": 5.165114309906859e-05, - "loss": 2.2448, + "epoch": 0.1, + "grad_norm": 0.1513671875, + "learning_rate": 0.00010321489001692048, + "loss": 2.1987, "step": 305 }, { - "epoch": 0.05, - "grad_norm": 0.19140625, - "learning_rate": 5.2497883149873e-05, - "loss": 2.2126, + "epoch": 0.1, + "grad_norm": 0.1591796875, + "learning_rate": 0.00010490693739424704, + "loss": 2.1945, "step": 310 }, { - "epoch": 0.05, - "grad_norm": 0.1845703125, - "learning_rate": 5.334462320067739e-05, - "loss": 2.2718, + "epoch": 0.11, + "grad_norm": 0.1591796875, + "learning_rate": 0.00010659898477157362, + "loss": 2.2216, "step": 315 }, { - "epoch": 0.05, - "grad_norm": 0.1943359375, - "learning_rate": 5.4191363251481795e-05, - "loss": 2.2236, + "epoch": 0.11, + "grad_norm": 0.158203125, + "learning_rate": 0.00010829103214890017, + "loss": 2.208, "step": 320 }, { - "epoch": 0.06, - "grad_norm": 0.1943359375, - "learning_rate": 5.5038103302286203e-05, - "loss": 2.2657, + "epoch": 0.11, + "grad_norm": 0.1572265625, + "learning_rate": 0.00010998307952622673, + "loss": 2.1782, "step": 325 }, { - "epoch": 0.06, - "grad_norm": 0.1943359375, - "learning_rate": 5.58848433530906e-05, - "loss": 2.2426, + "epoch": 0.11, + "grad_norm": 0.1513671875, + "learning_rate": 0.0001116751269035533, + "loss": 2.1854, "step": 330 }, { - "epoch": 0.06, - "grad_norm": 0.193359375, - "learning_rate": 5.673158340389501e-05, - "loss": 2.2843, + "epoch": 0.11, + "grad_norm": 0.158203125, + "learning_rate": 0.00011336717428087986, + "loss": 2.1777, "step": 335 }, { - "epoch": 0.06, - "grad_norm": 0.193359375, - "learning_rate": 5.757832345469941e-05, - "loss": 2.2423, + "epoch": 0.12, + "grad_norm": 0.158203125, + "learning_rate": 0.00011505922165820644, + "loss": 2.1846, "step": 340 }, { - "epoch": 0.06, - "grad_norm": 0.197265625, - "learning_rate": 5.842506350550381e-05, - "loss": 2.2573, + "epoch": 0.12, + "grad_norm": 0.162109375, + "learning_rate": 0.000116751269035533, + "loss": 2.2163, "step": 345 }, { - "epoch": 0.06, - "grad_norm": 0.2099609375, - "learning_rate": 5.927180355630821e-05, - "loss": 2.2623, + "epoch": 0.12, + "grad_norm": 0.15625, + "learning_rate": 0.00011844331641285957, + "loss": 2.1784, "step": 350 }, { - "epoch": 0.06, - "grad_norm": 0.1953125, - "learning_rate": 6.011854360711262e-05, - "loss": 2.2224, + "epoch": 0.12, + "grad_norm": 0.1640625, + "learning_rate": 0.00012013536379018613, + "loss": 2.1957, "step": 355 }, { - "epoch": 0.06, - "grad_norm": 0.2001953125, - "learning_rate": 6.096528365791703e-05, - "loss": 2.227, + "epoch": 0.12, + "grad_norm": 0.1552734375, + "learning_rate": 0.0001218274111675127, + "loss": 2.1977, "step": 360 }, { - "epoch": 0.06, - "grad_norm": 0.1982421875, - "learning_rate": 6.181202370872143e-05, - "loss": 2.2246, + "epoch": 0.12, + "grad_norm": 0.1611328125, + "learning_rate": 0.00012351945854483927, + "loss": 2.2133, "step": 365 }, { - "epoch": 0.06, - "grad_norm": 0.1962890625, - "learning_rate": 6.265876375952583e-05, - "loss": 2.2499, + "epoch": 0.13, + "grad_norm": 0.1640625, + "learning_rate": 0.0001252115059221658, + "loss": 2.1746, "step": 370 }, { - "epoch": 0.06, - "grad_norm": 0.2001953125, - "learning_rate": 6.350550381033024e-05, - "loss": 2.2423, + "epoch": 0.13, + "grad_norm": 0.1494140625, + "learning_rate": 0.0001269035532994924, + "loss": 2.1744, "step": 375 }, { - "epoch": 0.06, - "grad_norm": 0.1962890625, - "learning_rate": 6.435224386113464e-05, - "loss": 2.2377, + "epoch": 0.13, + "grad_norm": 0.158203125, + "learning_rate": 0.00012859560067681895, + "loss": 2.1899, "step": 380 }, { - "epoch": 0.07, - "grad_norm": 0.2060546875, - "learning_rate": 6.519898391193903e-05, - "loss": 2.2515, + "epoch": 0.13, + "grad_norm": 0.1591796875, + "learning_rate": 0.00013028764805414554, + "loss": 2.2186, "step": 385 }, { - "epoch": 0.07, - "grad_norm": 0.2021484375, - "learning_rate": 6.604572396274344e-05, - "loss": 2.2243, + "epoch": 0.13, + "grad_norm": 0.162109375, + "learning_rate": 0.00013197969543147207, + "loss": 2.1991, "step": 390 }, { - "epoch": 0.07, - "grad_norm": 0.197265625, - "learning_rate": 6.689246401354784e-05, - "loss": 2.2758, + "epoch": 0.13, + "grad_norm": 0.16015625, + "learning_rate": 0.00013367174280879866, + "loss": 2.1906, "step": 395 }, { - "epoch": 0.07, - "grad_norm": 0.2099609375, - "learning_rate": 6.773920406435225e-05, - "loss": 2.2412, + "epoch": 0.14, + "grad_norm": 0.1572265625, + "learning_rate": 0.00013536379018612522, + "loss": 2.1837, "step": 400 }, { - "epoch": 0.07, - "grad_norm": 0.1982421875, - "learning_rate": 6.858594411515665e-05, - "loss": 2.2574, + "epoch": 0.14, + "grad_norm": 0.15625, + "learning_rate": 0.00013705583756345178, + "loss": 2.2043, "step": 405 }, { - "epoch": 0.07, - "grad_norm": 0.1943359375, - "learning_rate": 6.943268416596105e-05, - "loss": 2.2604, + "epoch": 0.14, + "grad_norm": 0.158203125, + "learning_rate": 0.00013874788494077836, + "loss": 2.2061, "step": 410 }, { - "epoch": 0.07, - "grad_norm": 0.2265625, - "learning_rate": 7.027942421676546e-05, - "loss": 2.2502, + "epoch": 0.14, + "grad_norm": 0.150390625, + "learning_rate": 0.0001404399323181049, + "loss": 2.1866, "step": 415 }, { - "epoch": 0.07, - "grad_norm": 0.1962890625, - "learning_rate": 7.112616426756987e-05, - "loss": 2.2565, + "epoch": 0.14, + "grad_norm": 0.1552734375, + "learning_rate": 0.00014213197969543148, + "loss": 2.1919, "step": 420 }, { - "epoch": 0.07, - "grad_norm": 0.197265625, - "learning_rate": 7.197290431837426e-05, - "loss": 2.2028, + "epoch": 0.14, + "grad_norm": 0.1513671875, + "learning_rate": 0.00014382402707275804, + "loss": 2.188, "step": 425 }, { - "epoch": 0.07, - "grad_norm": 0.2001953125, - "learning_rate": 7.281964436917867e-05, - "loss": 2.2372, + "epoch": 0.15, + "grad_norm": 0.15234375, + "learning_rate": 0.00014551607445008463, + "loss": 2.182, "step": 430 }, { - "epoch": 0.07, - "grad_norm": 0.2080078125, - "learning_rate": 7.366638441998307e-05, - "loss": 2.2608, + "epoch": 0.15, + "grad_norm": 0.1611328125, + "learning_rate": 0.00014720812182741116, + "loss": 2.1641, "step": 435 }, { - "epoch": 0.07, - "grad_norm": 0.205078125, - "learning_rate": 7.451312447078747e-05, - "loss": 2.2387, + "epoch": 0.15, + "grad_norm": 0.1513671875, + "learning_rate": 0.00014890016920473775, + "loss": 2.1927, "step": 440 }, { - "epoch": 0.08, - "grad_norm": 0.212890625, - "learning_rate": 7.535986452159187e-05, - "loss": 2.2552, + "epoch": 0.15, + "grad_norm": 0.15625, + "learning_rate": 0.0001505922165820643, + "loss": 2.1952, "step": 445 }, { - "epoch": 0.08, - "grad_norm": 0.205078125, - "learning_rate": 7.620660457239628e-05, - "loss": 2.183, + "epoch": 0.15, + "grad_norm": 0.166015625, + "learning_rate": 0.00015228426395939087, + "loss": 2.1962, "step": 450 }, { - "epoch": 0.08, - "grad_norm": 0.220703125, - "learning_rate": 7.705334462320069e-05, - "loss": 2.2255, + "epoch": 0.15, + "grad_norm": 0.1484375, + "learning_rate": 0.00015397631133671742, + "loss": 2.1983, "step": 455 }, { - "epoch": 0.08, - "grad_norm": 0.2001953125, - "learning_rate": 7.790008467400508e-05, - "loss": 2.2245, + "epoch": 0.16, + "grad_norm": 0.1474609375, + "learning_rate": 0.00015566835871404398, + "loss": 2.1702, "step": 460 }, { - "epoch": 0.08, - "grad_norm": 0.2275390625, - "learning_rate": 7.874682472480949e-05, - "loss": 2.2154, + "epoch": 0.16, + "grad_norm": 0.15234375, + "learning_rate": 0.00015736040609137057, + "loss": 2.2, "step": 465 }, { - "epoch": 0.08, - "grad_norm": 0.203125, - "learning_rate": 7.95935647756139e-05, - "loss": 2.2057, + "epoch": 0.16, + "grad_norm": 0.1533203125, + "learning_rate": 0.00015905245346869713, + "loss": 2.1524, "step": 470 }, { - "epoch": 0.08, - "grad_norm": 0.2001953125, - "learning_rate": 8.04403048264183e-05, - "loss": 2.2309, + "epoch": 0.16, + "grad_norm": 0.146484375, + "learning_rate": 0.00016074450084602372, + "loss": 2.1905, "step": 475 }, { - "epoch": 0.08, - "grad_norm": 0.2041015625, - "learning_rate": 8.12870448772227e-05, - "loss": 2.2176, + "epoch": 0.16, + "grad_norm": 0.1494140625, + "learning_rate": 0.00016243654822335025, + "loss": 2.1742, "step": 480 }, { - "epoch": 0.08, - "grad_norm": 0.2060546875, - "learning_rate": 8.21337849280271e-05, - "loss": 2.2527, + "epoch": 0.16, + "grad_norm": 0.1552734375, + "learning_rate": 0.00016412859560067684, + "loss": 2.2074, "step": 485 }, { - "epoch": 0.08, - "grad_norm": 0.205078125, - "learning_rate": 8.298052497883149e-05, - "loss": 2.2488, + "epoch": 0.17, + "grad_norm": 0.150390625, + "learning_rate": 0.0001658206429780034, + "loss": 2.1577, "step": 490 }, { - "epoch": 0.08, - "grad_norm": 0.21484375, - "learning_rate": 8.38272650296359e-05, - "loss": 2.2384, + "epoch": 0.17, + "grad_norm": 0.1552734375, + "learning_rate": 0.00016751269035532995, + "loss": 2.1828, "step": 495 }, { - "epoch": 0.08, - "grad_norm": 0.203125, - "learning_rate": 8.467400508044031e-05, - "loss": 2.246, + "epoch": 0.17, + "grad_norm": 0.15234375, + "learning_rate": 0.00016920473773265651, + "loss": 2.1915, "step": 500 }, { - "epoch": 0.09, - "grad_norm": 0.2060546875, - "learning_rate": 8.552074513124472e-05, - "loss": 2.2167, + "epoch": 0.17, + "grad_norm": 0.16015625, + "learning_rate": 0.00017089678510998307, + "loss": 2.1874, "step": 505 }, { - "epoch": 0.09, - "grad_norm": 0.2109375, - "learning_rate": 8.636748518204911e-05, - "loss": 2.2183, + "epoch": 0.17, + "grad_norm": 0.154296875, + "learning_rate": 0.00017258883248730966, + "loss": 2.1935, "step": 510 }, { - "epoch": 0.09, - "grad_norm": 0.208984375, - "learning_rate": 8.721422523285352e-05, - "loss": 2.2027, + "epoch": 0.17, + "grad_norm": 0.15625, + "learning_rate": 0.00017428087986463622, + "loss": 2.1887, "step": 515 }, { - "epoch": 0.09, - "grad_norm": 0.251953125, - "learning_rate": 8.806096528365793e-05, - "loss": 2.1894, - "step": 520 + "epoch": 0.18, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001759729272419628, + "loss": 2.2071, + "step": 520 }, { - "epoch": 0.09, - "grad_norm": 0.2080078125, - "learning_rate": 8.890770533446233e-05, - "loss": 2.2201, + "epoch": 0.18, + "grad_norm": 0.1533203125, + "learning_rate": 0.00017766497461928934, + "loss": 2.2, "step": 525 }, { - "epoch": 0.09, - "grad_norm": 0.2138671875, - "learning_rate": 8.975444538526672e-05, - "loss": 2.2116, + "epoch": 0.18, + "grad_norm": 0.146484375, + "learning_rate": 0.0001793570219966159, + "loss": 2.193, "step": 530 }, { - "epoch": 0.09, - "grad_norm": 0.2041015625, - "learning_rate": 9.060118543607113e-05, - "loss": 2.2211, + "epoch": 0.18, + "grad_norm": 0.154296875, + "learning_rate": 0.00018104906937394248, + "loss": 2.1653, "step": 535 }, { - "epoch": 0.09, - "grad_norm": 0.203125, - "learning_rate": 9.144792548687552e-05, - "loss": 2.2107, + "epoch": 0.18, + "grad_norm": 0.1513671875, + "learning_rate": 0.00018274111675126904, + "loss": 2.1909, "step": 540 }, { - "epoch": 0.09, - "grad_norm": 0.2041015625, - "learning_rate": 9.229466553767993e-05, - "loss": 2.2446, + "epoch": 0.18, + "grad_norm": 0.154296875, + "learning_rate": 0.0001844331641285956, + "loss": 2.2016, "step": 545 }, { - "epoch": 0.09, - "grad_norm": 0.2080078125, - "learning_rate": 9.314140558848434e-05, - "loss": 2.2244, + "epoch": 0.19, + "grad_norm": 0.1533203125, + "learning_rate": 0.00018612521150592216, + "loss": 2.1922, "step": 550 }, { - "epoch": 0.09, - "grad_norm": 0.208984375, - "learning_rate": 9.398814563928874e-05, - "loss": 2.2381, + "epoch": 0.19, + "grad_norm": 0.150390625, + "learning_rate": 0.00018781725888324875, + "loss": 2.1688, "step": 555 }, { - "epoch": 0.09, - "grad_norm": 0.2099609375, - "learning_rate": 9.483488569009315e-05, - "loss": 2.2091, + "epoch": 0.19, + "grad_norm": 0.1435546875, + "learning_rate": 0.0001895093062605753, + "loss": 2.1694, "step": 560 }, { - "epoch": 0.1, - "grad_norm": 0.2236328125, - "learning_rate": 9.568162574089755e-05, - "loss": 2.2132, + "epoch": 0.19, + "grad_norm": 0.1494140625, + "learning_rate": 0.00019120135363790187, + "loss": 2.1756, "step": 565 }, { - "epoch": 0.1, - "grad_norm": 0.205078125, - "learning_rate": 9.652836579170195e-05, - "loss": 2.2323, + "epoch": 0.19, + "grad_norm": 0.1552734375, + "learning_rate": 0.00019289340101522843, + "loss": 2.2084, "step": 570 }, { - "epoch": 0.1, - "grad_norm": 0.2138671875, - "learning_rate": 9.737510584250636e-05, - "loss": 2.2082, + "epoch": 0.19, + "grad_norm": 0.1533203125, + "learning_rate": 0.000194585448392555, + "loss": 2.1611, "step": 575 }, { - "epoch": 0.1, - "grad_norm": 0.2099609375, - "learning_rate": 9.822184589331075e-05, - "loss": 2.2041, + "epoch": 0.2, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019627749576988157, + "loss": 2.1808, "step": 580 }, { - "epoch": 0.1, - "grad_norm": 0.2138671875, - "learning_rate": 9.906858594411516e-05, - "loss": 2.2193, + "epoch": 0.2, + "grad_norm": 0.1513671875, + "learning_rate": 0.00019796954314720813, + "loss": 2.1557, "step": 585 }, { - "epoch": 0.1, - "grad_norm": 0.203125, - "learning_rate": 9.991532599491956e-05, - "loss": 2.2349, + "epoch": 0.2, + "grad_norm": 0.14453125, + "learning_rate": 0.0001996615905245347, + "loss": 2.1607, "step": 590 }, { - "epoch": 0.1, - "grad_norm": 0.201171875, - "learning_rate": 0.00010076206604572395, - "loss": 2.2278, + "epoch": 0.2, + "grad_norm": 0.15234375, + "learning_rate": 0.00019999972028877317, + "loss": 2.1788, "step": 595 }, { - "epoch": 0.1, - "grad_norm": 0.2138671875, - "learning_rate": 0.00010160880609652836, - "loss": 2.2127, + "epoch": 0.2, + "grad_norm": 0.1494140625, + "learning_rate": 0.00019999858396459598, + "loss": 2.1947, "step": 600 }, { - "epoch": 0.1, - "grad_norm": 0.21875, - "learning_rate": 0.00010245554614733277, - "loss": 2.2367, + "epoch": 0.2, + "grad_norm": 0.14453125, + "learning_rate": 0.00019999657355544167, + "loss": 2.1525, "step": 605 }, { - "epoch": 0.1, - "grad_norm": 0.2119140625, - "learning_rate": 0.00010330228619813718, - "loss": 2.2172, + "epoch": 0.21, + "grad_norm": 0.142578125, + "learning_rate": 0.00019999368907888313, + "loss": 2.1582, "step": 610 }, { - "epoch": 0.1, - "grad_norm": 0.2197265625, - "learning_rate": 0.00010414902624894159, - "loss": 2.222, + "epoch": 0.21, + "grad_norm": 0.1455078125, + "learning_rate": 0.0001999899305601336, + "loss": 2.2174, "step": 615 }, { - "epoch": 0.11, - "grad_norm": 0.1982421875, - "learning_rate": 0.000104995766299746, - "loss": 2.1955, + "epoch": 0.21, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001999852980320461, + "loss": 2.1962, "step": 620 }, { - "epoch": 0.11, - "grad_norm": 0.203125, - "learning_rate": 0.00010584250635055039, - "loss": 2.2568, + "epoch": 0.21, + "grad_norm": 0.142578125, + "learning_rate": 0.0001999797915351135, + "loss": 2.1669, "step": 625 }, { - "epoch": 0.11, - "grad_norm": 0.21484375, - "learning_rate": 0.00010668924640135479, - "loss": 2.2051, + "epoch": 0.21, + "grad_norm": 0.142578125, + "learning_rate": 0.00019997341111746791, + "loss": 2.1438, "step": 630 }, { - "epoch": 0.11, - "grad_norm": 0.2158203125, - "learning_rate": 0.00010753598645215918, - "loss": 2.2461, + "epoch": 0.22, + "grad_norm": 0.146484375, + "learning_rate": 0.00019996615683488039, + "loss": 2.1759, "step": 635 }, { - "epoch": 0.11, - "grad_norm": 0.208984375, - "learning_rate": 0.00010838272650296359, - "loss": 2.2322, + "epoch": 0.22, + "grad_norm": 0.1484375, + "learning_rate": 0.00019995802875076042, + "loss": 2.1692, "step": 640 }, { - "epoch": 0.11, - "grad_norm": 0.2138671875, - "learning_rate": 0.000109229466553768, - "loss": 2.2358, + "epoch": 0.22, + "grad_norm": 0.14453125, + "learning_rate": 0.0001999490269361554, + "loss": 2.171, "step": 645 }, { - "epoch": 0.11, - "grad_norm": 0.2001953125, - "learning_rate": 0.00011007620660457241, - "loss": 2.1646, + "epoch": 0.22, + "grad_norm": 0.1455078125, + "learning_rate": 0.00019993915146974996, + "loss": 2.1641, "step": 650 }, { - "epoch": 0.11, - "grad_norm": 0.2216796875, - "learning_rate": 0.00011092294665537682, - "loss": 2.2039, + "epoch": 0.22, + "grad_norm": 0.142578125, + "learning_rate": 0.00019992840243786525, + "loss": 2.1699, "step": 655 }, { - "epoch": 0.11, - "grad_norm": 0.2060546875, - "learning_rate": 0.0001117696867061812, - "loss": 2.1748, + "epoch": 0.22, + "grad_norm": 0.1494140625, + "learning_rate": 0.0001999167799344583, + "loss": 2.1677, "step": 660 }, { - "epoch": 0.11, - "grad_norm": 0.205078125, - "learning_rate": 0.0001126164267569856, - "loss": 2.249, + "epoch": 0.23, + "grad_norm": 0.14453125, + "learning_rate": 0.0001999042840611211, + "loss": 2.195, "step": 665 }, { - "epoch": 0.11, - "grad_norm": 0.201171875, - "learning_rate": 0.00011346316680779001, - "loss": 2.2732, + "epoch": 0.23, + "grad_norm": 0.142578125, + "learning_rate": 0.00019989091492707975, + "loss": 2.1987, "step": 670 }, { - "epoch": 0.11, - "grad_norm": 0.2109375, - "learning_rate": 0.00011430990685859442, - "loss": 2.2496, + "epoch": 0.23, + "grad_norm": 0.150390625, + "learning_rate": 0.0001998766726491935, + "loss": 2.1513, "step": 675 }, { - "epoch": 0.12, - "grad_norm": 0.201171875, - "learning_rate": 0.00011515664690939882, - "loss": 2.2412, + "epoch": 0.23, + "grad_norm": 0.142578125, + "learning_rate": 0.00019986155735195372, + "loss": 2.1756, "step": 680 }, { - "epoch": 0.12, - "grad_norm": 0.1982421875, - "learning_rate": 0.00011600338696020323, - "loss": 2.1998, + "epoch": 0.23, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001998455691674828, + "loss": 2.1832, "step": 685 }, { - "epoch": 0.12, - "grad_norm": 0.2099609375, - "learning_rate": 0.00011685012701100762, - "loss": 2.202, + "epoch": 0.23, + "grad_norm": 0.1396484375, + "learning_rate": 0.00019982870823553308, + "loss": 2.1832, "step": 690 }, { - "epoch": 0.12, - "grad_norm": 0.2001953125, - "learning_rate": 0.00011769686706181202, - "loss": 2.208, + "epoch": 0.24, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019981097470348548, + "loss": 2.1757, "step": 695 }, { - "epoch": 0.12, - "grad_norm": 0.2021484375, - "learning_rate": 0.00011854360711261643, - "loss": 2.212, + "epoch": 0.24, + "grad_norm": 0.140625, + "learning_rate": 0.00019979236872634838, + "loss": 2.1821, "step": 700 }, { - "epoch": 0.12, - "grad_norm": 0.2041015625, - "learning_rate": 0.00011939034716342083, - "loss": 2.2409, + "epoch": 0.24, + "grad_norm": 0.14453125, + "learning_rate": 0.0001997728904667561, + "loss": 2.1929, "step": 705 }, { - "epoch": 0.12, - "grad_norm": 0.19921875, - "learning_rate": 0.00012023708721422524, - "loss": 2.2069, + "epoch": 0.24, + "grad_norm": 0.140625, + "learning_rate": 0.00019975254009496762, + "loss": 2.156, "step": 710 }, { - "epoch": 0.12, - "grad_norm": 0.2197265625, - "learning_rate": 0.00012108382726502965, - "loss": 2.2205, + "epoch": 0.24, + "grad_norm": 0.140625, + "learning_rate": 0.00019973131778886497, + "loss": 2.1597, "step": 715 }, { - "epoch": 0.12, - "grad_norm": 0.1982421875, - "learning_rate": 0.00012193056731583406, - "loss": 2.2525, + "epoch": 0.24, + "grad_norm": 0.1416015625, + "learning_rate": 0.00019970922373395176, + "loss": 2.1797, "step": 720 }, { - "epoch": 0.12, - "grad_norm": 0.20703125, - "learning_rate": 0.00012277730736663843, - "loss": 2.2261, + "epoch": 0.25, + "grad_norm": 0.1435546875, + "learning_rate": 0.00019968625812335158, + "loss": 2.1946, "step": 725 }, { - "epoch": 0.12, - "grad_norm": 0.1962890625, - "learning_rate": 0.00012362404741744285, - "loss": 2.1868, + "epoch": 0.25, + "grad_norm": 0.1435546875, + "learning_rate": 0.00019966242115780617, + "loss": 2.1836, "step": 730 }, { - "epoch": 0.12, - "grad_norm": 0.2041015625, - "learning_rate": 0.00012447078746824725, - "loss": 2.2363, + "epoch": 0.25, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019963771304567387, + "loss": 2.1733, "step": 735 }, { - "epoch": 0.13, - "grad_norm": 0.220703125, - "learning_rate": 0.00012531752751905167, - "loss": 2.1955, + "epoch": 0.25, + "grad_norm": 0.146484375, + "learning_rate": 0.00019961213400292762, + "loss": 2.1611, "step": 740 }, { - "epoch": 0.13, - "grad_norm": 0.2021484375, - "learning_rate": 0.00012616426756985606, - "loss": 2.1964, + "epoch": 0.25, + "grad_norm": 0.1513671875, + "learning_rate": 0.00019958568425315314, + "loss": 2.188, "step": 745 }, { - "epoch": 0.13, - "grad_norm": 0.1982421875, - "learning_rate": 0.00012701100762066049, - "loss": 2.2456, + "epoch": 0.25, + "grad_norm": 0.146484375, + "learning_rate": 0.000199558364027547, + "loss": 2.1381, "step": 750 }, { - "epoch": 0.13, - "grad_norm": 0.2041015625, - "learning_rate": 0.00012785774767146485, - "loss": 2.1887, + "epoch": 0.26, + "grad_norm": 0.1533203125, + "learning_rate": 0.00019953017356491457, + "loss": 2.1972, "step": 755 }, { - "epoch": 0.13, - "grad_norm": 0.21484375, - "learning_rate": 0.00012870448772226928, - "loss": 2.2023, + "epoch": 0.26, + "grad_norm": 0.150390625, + "learning_rate": 0.0001995011131116679, + "loss": 2.1809, "step": 760 }, { - "epoch": 0.13, - "grad_norm": 0.197265625, - "learning_rate": 0.00012955122777307367, - "loss": 2.21, + "epoch": 0.26, + "grad_norm": 0.1513671875, + "learning_rate": 0.00019947118292182377, + "loss": 2.1648, "step": 765 }, { - "epoch": 0.13, - "grad_norm": 0.20703125, - "learning_rate": 0.00013039796782387807, - "loss": 2.1852, + "epoch": 0.26, + "grad_norm": 0.146484375, + "learning_rate": 0.00019944038325700103, + "loss": 2.1709, "step": 770 }, { - "epoch": 0.13, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001312447078746825, - "loss": 2.1978, + "epoch": 0.26, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019940871438641882, + "loss": 2.1709, "step": 775 }, { - "epoch": 0.13, - "grad_norm": 0.197265625, - "learning_rate": 0.00013209144792548688, - "loss": 2.2205, + "epoch": 0.26, + "grad_norm": 0.1416015625, + "learning_rate": 0.00019937617658689384, + "loss": 2.1944, "step": 780 }, { - "epoch": 0.13, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001329381879762913, - "loss": 2.179, + "epoch": 0.27, + "grad_norm": 0.138671875, + "learning_rate": 0.0001993427701428382, + "loss": 2.1225, "step": 785 }, { - "epoch": 0.13, - "grad_norm": 0.203125, - "learning_rate": 0.00013378492802709567, - "loss": 2.2183, + "epoch": 0.27, + "grad_norm": 0.1513671875, + "learning_rate": 0.0001993084953462567, + "loss": 2.1785, "step": 790 }, { - "epoch": 0.13, - "grad_norm": 0.203125, - "learning_rate": 0.0001346316680779001, - "loss": 2.1874, + "epoch": 0.27, + "grad_norm": 0.142578125, + "learning_rate": 0.00019927335249674447, + "loss": 2.1775, "step": 795 }, { - "epoch": 0.14, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001354784081287045, - "loss": 2.2123, + "epoch": 0.27, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019923734190148418, + "loss": 2.1732, "step": 800 }, { - "epoch": 0.14, - "grad_norm": 0.205078125, - "learning_rate": 0.0001363251481795089, - "loss": 2.2362, + "epoch": 0.27, + "grad_norm": 0.14453125, + "learning_rate": 0.0001992004638752435, + "loss": 2.1627, "step": 805 }, { - "epoch": 0.14, - "grad_norm": 0.201171875, - "learning_rate": 0.0001371718882303133, - "loss": 2.2316, + "epoch": 0.27, + "grad_norm": 0.1455078125, + "learning_rate": 0.0001991627187403723, + "loss": 2.1665, "step": 810 }, { - "epoch": 0.14, - "grad_norm": 0.1982421875, - "learning_rate": 0.0001380186282811177, + "epoch": 0.28, + "grad_norm": 0.1435546875, + "learning_rate": 0.0001991241068267998, "loss": 2.1873, "step": 815 }, { - "epoch": 0.14, - "grad_norm": 0.1982421875, - "learning_rate": 0.0001388653683319221, - "loss": 2.2417, + "epoch": 0.28, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019908462847203172, + "loss": 2.1639, "step": 820 }, { - "epoch": 0.14, - "grad_norm": 0.1953125, - "learning_rate": 0.0001397121083827265, - "loss": 2.1944, + "epoch": 0.28, + "grad_norm": 0.1416015625, + "learning_rate": 0.0001990442840211473, + "loss": 2.1677, "step": 825 }, { - "epoch": 0.14, - "grad_norm": 0.1982421875, - "learning_rate": 0.00014055884843353092, - "loss": 2.1983, + "epoch": 0.28, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019900307382679638, + "loss": 2.138, "step": 830 }, { - "epoch": 0.14, - "grad_norm": 0.1953125, - "learning_rate": 0.0001414055884843353, - "loss": 2.2246, + "epoch": 0.28, + "grad_norm": 0.1376953125, + "learning_rate": 0.00019896099824919604, + "loss": 2.1726, "step": 835 }, { - "epoch": 0.14, - "grad_norm": 0.193359375, - "learning_rate": 0.00014225232853513973, - "loss": 2.2173, + "epoch": 0.28, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019891805765612794, + "loss": 2.1588, "step": 840 }, { - "epoch": 0.14, - "grad_norm": 0.201171875, - "learning_rate": 0.00014309906858594413, - "loss": 2.1354, + "epoch": 0.29, + "grad_norm": 0.1435546875, + "learning_rate": 0.0001988742524229346, + "loss": 2.1581, "step": 845 }, { - "epoch": 0.14, - "grad_norm": 0.2021484375, - "learning_rate": 0.00014394580863674852, - "loss": 2.2328, + "epoch": 0.29, + "grad_norm": 0.142578125, + "learning_rate": 0.00019882958293251636, + "loss": 2.1803, "step": 850 }, { - "epoch": 0.14, - "grad_norm": 0.1953125, - "learning_rate": 0.00014479254868755292, - "loss": 2.215, + "epoch": 0.29, + "grad_norm": 0.138671875, + "learning_rate": 0.00019878404957532814, + "loss": 2.1448, "step": 855 }, { - "epoch": 0.15, - "grad_norm": 0.2060546875, - "learning_rate": 0.00014563928873835734, - "loss": 2.214, + "epoch": 0.29, + "grad_norm": 0.1416015625, + "learning_rate": 0.00019873765274937578, + "loss": 2.1585, "step": 860 }, { - "epoch": 0.15, - "grad_norm": 0.212890625, - "learning_rate": 0.00014648602878916173, - "loss": 2.1958, + "epoch": 0.29, + "grad_norm": 0.1435546875, + "learning_rate": 0.00019869039286021271, + "loss": 2.1749, "step": 865 }, { - "epoch": 0.15, - "grad_norm": 0.2041015625, - "learning_rate": 0.00014733276883996613, - "loss": 2.1809, + "epoch": 0.29, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019864227032093633, + "loss": 2.1606, "step": 870 }, { - "epoch": 0.15, - "grad_norm": 0.2001953125, - "learning_rate": 0.00014817950889077055, - "loss": 2.264, + "epoch": 0.3, + "grad_norm": 0.1435546875, + "learning_rate": 0.00019859328555218455, + "loss": 2.1621, "step": 875 }, { - "epoch": 0.15, - "grad_norm": 0.19921875, - "learning_rate": 0.00014902624894157495, - "loss": 2.2147, + "epoch": 0.3, + "grad_norm": 0.1435546875, + "learning_rate": 0.0001985434389821319, + "loss": 2.1608, "step": 880 }, { - "epoch": 0.15, - "grad_norm": 0.193359375, - "learning_rate": 0.00014987298899237934, - "loss": 2.1813, + "epoch": 0.3, + "grad_norm": 0.142578125, + "learning_rate": 0.00019849273104648592, + "loss": 2.1602, "step": 885 }, { - "epoch": 0.15, - "grad_norm": 0.2001953125, - "learning_rate": 0.00015071972904318374, - "loss": 2.1541, + "epoch": 0.3, + "grad_norm": 0.1455078125, + "learning_rate": 0.00019844116218848334, + "loss": 2.15, "step": 890 }, { - "epoch": 0.15, - "grad_norm": 0.205078125, - "learning_rate": 0.00015156646909398816, - "loss": 2.1661, + "epoch": 0.3, + "grad_norm": 0.142578125, + "learning_rate": 0.0001983887328588862, + "loss": 2.1896, "step": 895 }, { - "epoch": 0.15, - "grad_norm": 0.2060546875, - "learning_rate": 0.00015241320914479255, - "loss": 2.2126, + "epoch": 0.3, + "grad_norm": 0.1416015625, + "learning_rate": 0.00019833544351597788, + "loss": 2.1391, "step": 900 }, { - "epoch": 0.15, - "grad_norm": 0.19921875, - "learning_rate": 0.00015325994919559695, - "loss": 2.192, + "epoch": 0.31, + "grad_norm": 0.1484375, + "learning_rate": 0.0001982812946255591, + "loss": 2.149, "step": 905 }, { - "epoch": 0.15, - "grad_norm": 0.1982421875, - "learning_rate": 0.00015410668924640137, - "loss": 2.2546, + "epoch": 0.31, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001982262866609439, + "loss": 2.1337, "step": 910 }, { - "epoch": 0.15, - "grad_norm": 0.197265625, - "learning_rate": 0.00015495342929720577, - "loss": 2.1839, + "epoch": 0.31, + "grad_norm": 0.14453125, + "learning_rate": 0.00019817042010295544, + "loss": 2.1388, "step": 915 }, { - "epoch": 0.16, - "grad_norm": 0.2041015625, - "learning_rate": 0.00015580016934801016, - "loss": 2.2013, + "epoch": 0.31, + "grad_norm": 0.1435546875, + "learning_rate": 0.00019811369543992183, + "loss": 2.1663, "step": 920 }, { - "epoch": 0.16, - "grad_norm": 0.19140625, - "learning_rate": 0.00015664690939881456, - "loss": 2.2374, + "epoch": 0.31, + "grad_norm": 0.14453125, + "learning_rate": 0.0001980561131676718, + "loss": 2.1468, "step": 925 }, { - "epoch": 0.16, - "grad_norm": 0.19921875, - "learning_rate": 0.00015749364944961898, - "loss": 2.1984, + "epoch": 0.31, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001979976737895305, + "loss": 2.1823, "step": 930 }, { - "epoch": 0.16, - "grad_norm": 0.19921875, - "learning_rate": 0.00015834038950042337, - "loss": 2.171, + "epoch": 0.32, + "grad_norm": 0.14453125, + "learning_rate": 0.00019793837781631506, + "loss": 2.1764, "step": 935 }, { - "epoch": 0.16, - "grad_norm": 0.19140625, - "learning_rate": 0.0001591871295512278, - "loss": 2.199, + "epoch": 0.32, + "grad_norm": 0.142578125, + "learning_rate": 0.0001978782257663299, + "loss": 2.152, "step": 940 }, { - "epoch": 0.16, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001600338696020322, - "loss": 2.2011, + "epoch": 0.32, + "grad_norm": 0.146484375, + "learning_rate": 0.00019781721816536257, + "loss": 2.1694, "step": 945 }, { - "epoch": 0.16, - "grad_norm": 0.2109375, - "learning_rate": 0.0001608806096528366, - "loss": 2.2099, - "step": 950 + "epoch": 0.32, + "grad_norm": 0.14453125, + "learning_rate": 0.00019775535554667886, + "loss": 2.178, + "step": 950 }, { - "epoch": 0.16, - "grad_norm": 0.1982421875, - "learning_rate": 0.00016172734970364098, - "loss": 2.203, + "epoch": 0.32, + "grad_norm": 0.1455078125, + "learning_rate": 0.00019769263845101828, + "loss": 2.1894, "step": 955 }, { - "epoch": 0.16, - "grad_norm": 0.197265625, - "learning_rate": 0.0001625740897544454, - "loss": 2.2233, + "epoch": 0.33, + "grad_norm": 0.1396484375, + "learning_rate": 0.00019762906742658935, + "loss": 2.1464, "step": 960 }, { - "epoch": 0.16, - "grad_norm": 0.19921875, - "learning_rate": 0.0001634208298052498, - "loss": 2.2154, + "epoch": 0.33, + "grad_norm": 0.1484375, + "learning_rate": 0.00019756464302906465, + "loss": 2.1603, "step": 965 }, { - "epoch": 0.16, - "grad_norm": 0.197265625, - "learning_rate": 0.0001642675698560542, - "loss": 2.1916, + "epoch": 0.33, + "grad_norm": 0.1494140625, + "learning_rate": 0.0001974993658215762, + "loss": 2.161, "step": 970 }, { - "epoch": 0.17, - "grad_norm": 0.193359375, - "learning_rate": 0.00016511430990685862, - "loss": 2.2565, + "epoch": 0.33, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019743323637471028, + "loss": 2.1644, "step": 975 }, { - "epoch": 0.17, - "grad_norm": 0.1875, - "learning_rate": 0.00016596104995766298, - "loss": 2.2449, + "epoch": 0.33, + "grad_norm": 0.140625, + "learning_rate": 0.00019736625526650269, + "loss": 2.1671, "step": 980 }, { - "epoch": 0.17, - "grad_norm": 0.19140625, - "learning_rate": 0.0001668077900084674, - "loss": 2.2073, + "epoch": 0.33, + "grad_norm": 0.1455078125, + "learning_rate": 0.0001972984230824335, + "loss": 2.2011, "step": 985 }, { - "epoch": 0.17, - "grad_norm": 0.1884765625, - "learning_rate": 0.0001676545300592718, - "loss": 2.2011, + "epoch": 0.34, + "grad_norm": 0.1435546875, + "learning_rate": 0.00019722974041542203, + "loss": 2.1563, "step": 990 }, { - "epoch": 0.17, - "grad_norm": 0.193359375, - "learning_rate": 0.00016850127011007622, - "loss": 2.2025, + "epoch": 0.34, + "grad_norm": 0.1396484375, + "learning_rate": 0.00019716020786582169, + "loss": 2.1351, "step": 995 }, { - "epoch": 0.17, - "grad_norm": 0.1865234375, - "learning_rate": 0.00016934801016088062, - "loss": 2.2179, + "epoch": 0.34, + "grad_norm": 0.1435546875, + "learning_rate": 0.0001970898260414146, + "loss": 2.1661, "step": 1000 }, { - "epoch": 0.17, - "grad_norm": 0.197265625, - "learning_rate": 0.00017019475021168501, - "loss": 2.2083, + "epoch": 0.34, + "grad_norm": 0.1572265625, + "learning_rate": 0.00019701859555740648, + "loss": 2.16, "step": 1005 }, { - "epoch": 0.17, - "grad_norm": 0.1865234375, - "learning_rate": 0.00017104149026248944, - "loss": 2.1914, + "epoch": 0.34, + "grad_norm": 0.1513671875, + "learning_rate": 0.00019694651703642104, + "loss": 2.1325, "step": 1010 }, { - "epoch": 0.17, - "grad_norm": 0.1962890625, - "learning_rate": 0.00017188823031329383, - "loss": 2.2375, + "epoch": 0.34, + "grad_norm": 0.1484375, + "learning_rate": 0.0001968735911084948, + "loss": 2.1696, "step": 1015 }, { - "epoch": 0.17, - "grad_norm": 0.193359375, - "learning_rate": 0.00017273497036409823, - "loss": 2.2289, + "epoch": 0.35, + "grad_norm": 0.142578125, + "learning_rate": 0.0001967998184110713, + "loss": 2.138, "step": 1020 }, { - "epoch": 0.17, - "grad_norm": 0.1904296875, - "learning_rate": 0.00017358171041490262, - "loss": 2.2088, + "epoch": 0.35, + "grad_norm": 0.1484375, + "learning_rate": 0.00019672519958899583, + "loss": 2.1445, "step": 1025 }, { - "epoch": 0.17, - "grad_norm": 0.193359375, - "learning_rate": 0.00017442845046570704, - "loss": 2.1675, + "epoch": 0.35, + "grad_norm": 0.1455078125, + "learning_rate": 0.00019664973529450946, + "loss": 2.1684, "step": 1030 }, { - "epoch": 0.18, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017527519051651144, - "loss": 2.2139, + "epoch": 0.35, + "grad_norm": 0.146484375, + "learning_rate": 0.00019657342618724358, + "loss": 2.1496, "step": 1035 }, { - "epoch": 0.18, - "grad_norm": 0.1845703125, - "learning_rate": 0.00017612193056731586, - "loss": 2.1872, + "epoch": 0.35, + "grad_norm": 0.146484375, + "learning_rate": 0.00019649627293421413, + "loss": 2.1179, "step": 1040 }, { - "epoch": 0.18, - "grad_norm": 0.189453125, - "learning_rate": 0.00017696867061812023, - "loss": 2.1982, + "epoch": 0.35, + "grad_norm": 0.142578125, + "learning_rate": 0.00019641827620981564, + "loss": 2.1262, "step": 1045 }, { - "epoch": 0.18, - "grad_norm": 0.1953125, - "learning_rate": 0.00017781541066892465, - "loss": 2.2075, + "epoch": 0.36, + "grad_norm": 0.15234375, + "learning_rate": 0.0001963394366958154, + "loss": 2.1477, "step": 1050 }, { - "epoch": 0.18, - "grad_norm": 0.19921875, - "learning_rate": 0.00017866215071972905, - "loss": 2.2447, + "epoch": 0.36, + "grad_norm": 0.1416015625, + "learning_rate": 0.00019625975508134755, + "loss": 2.1596, "step": 1055 }, { - "epoch": 0.18, - "grad_norm": 0.193359375, - "learning_rate": 0.00017950889077053344, - "loss": 2.1948, + "epoch": 0.36, + "grad_norm": 0.142578125, + "learning_rate": 0.00019617923206290692, + "loss": 2.1651, "step": 1060 }, { - "epoch": 0.18, - "grad_norm": 0.205078125, - "learning_rate": 0.00018035563082133786, - "loss": 2.2087, + "epoch": 0.36, + "grad_norm": 0.150390625, + "learning_rate": 0.00019609786834434313, + "loss": 2.1671, "step": 1065 }, { - "epoch": 0.18, - "grad_norm": 0.20703125, - "learning_rate": 0.00018120237087214226, - "loss": 2.2168, + "epoch": 0.36, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019601566463685425, + "loss": 2.1639, "step": 1070 }, { - "epoch": 0.18, - "grad_norm": 0.2080078125, - "learning_rate": 0.00018204911092294668, - "loss": 2.2122, + "epoch": 0.36, + "grad_norm": 0.14453125, + "learning_rate": 0.00019593262165898076, + "loss": 2.1581, "step": 1075 }, { - "epoch": 0.18, - "grad_norm": 0.189453125, - "learning_rate": 0.00018289585097375105, - "loss": 2.2347, + "epoch": 0.37, + "grad_norm": 0.1484375, + "learning_rate": 0.0001958487401365991, + "loss": 2.1628, "step": 1080 }, { - "epoch": 0.18, - "grad_norm": 0.1962890625, - "learning_rate": 0.00018374259102455547, - "loss": 2.2307, + "epoch": 0.37, + "grad_norm": 0.1416015625, + "learning_rate": 0.00019576402080291545, + "loss": 2.1963, "step": 1085 }, { - "epoch": 0.18, - "grad_norm": 0.18359375, - "learning_rate": 0.00018458933107535987, - "loss": 2.1836, + "epoch": 0.37, + "grad_norm": 0.1435546875, + "learning_rate": 0.00019567846439845927, + "loss": 2.1792, "step": 1090 }, { - "epoch": 0.19, - "grad_norm": 0.1953125, - "learning_rate": 0.0001854360711261643, - "loss": 2.1929, + "epoch": 0.37, + "grad_norm": 0.146484375, + "learning_rate": 0.00019559207167107684, + "loss": 2.1703, "step": 1095 }, { - "epoch": 0.19, - "grad_norm": 0.189453125, - "learning_rate": 0.00018628281117696868, - "loss": 2.1908, + "epoch": 0.37, + "grad_norm": 0.1455078125, + "learning_rate": 0.00019550484337592464, + "loss": 2.1512, "step": 1100 }, { - "epoch": 0.19, - "grad_norm": 0.18359375, - "learning_rate": 0.00018712955122777308, - "loss": 2.2123, + "epoch": 0.37, + "grad_norm": 0.1435546875, + "learning_rate": 0.00019541678027546296, + "loss": 2.1654, "step": 1105 }, { - "epoch": 0.19, - "grad_norm": 0.1875, - "learning_rate": 0.00018797629127857747, - "loss": 2.1282, + "epoch": 0.38, + "grad_norm": 0.140625, + "learning_rate": 0.00019532788313944904, + "loss": 2.1729, "step": 1110 }, { - "epoch": 0.19, - "grad_norm": 0.1953125, - "learning_rate": 0.0001888230313293819, - "loss": 2.1387, + "epoch": 0.38, + "grad_norm": 0.1494140625, + "learning_rate": 0.00019523815274493031, + "loss": 2.1702, "step": 1115 }, { - "epoch": 0.19, - "grad_norm": 0.193359375, - "learning_rate": 0.0001896697713801863, - "loss": 2.2153, + "epoch": 0.38, + "grad_norm": 0.15234375, + "learning_rate": 0.00019514758987623784, + "loss": 2.1723, "step": 1120 }, { - "epoch": 0.19, - "grad_norm": 0.1875, - "learning_rate": 0.0001905165114309907, - "loss": 2.1932, + "epoch": 0.38, + "grad_norm": 0.1416015625, + "learning_rate": 0.00019505619532497926, + "loss": 2.174, "step": 1125 }, { - "epoch": 0.19, - "grad_norm": 0.1923828125, - "learning_rate": 0.0001913632514817951, - "loss": 2.1695, + "epoch": 0.38, + "grad_norm": 0.1435546875, + "learning_rate": 0.00019496396989003193, + "loss": 2.1431, "step": 1130 }, { - "epoch": 0.19, - "grad_norm": 0.1865234375, - "learning_rate": 0.0001922099915325995, - "loss": 2.1855, + "epoch": 0.38, + "grad_norm": 0.1455078125, + "learning_rate": 0.00019487091437753594, + "loss": 2.1579, "step": 1135 }, { - "epoch": 0.19, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001930567315834039, - "loss": 2.2053, + "epoch": 0.39, + "grad_norm": 0.14453125, + "learning_rate": 0.00019477702960088702, + "loss": 2.1764, "step": 1140 }, { - "epoch": 0.19, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001939034716342083, - "loss": 2.2082, + "epoch": 0.39, + "grad_norm": 0.146484375, + "learning_rate": 0.0001946823163807296, + "loss": 2.1656, "step": 1145 }, { - "epoch": 0.19, - "grad_norm": 0.1962890625, - "learning_rate": 0.00019475021168501272, - "loss": 2.1713, + "epoch": 0.39, + "grad_norm": 0.14453125, + "learning_rate": 0.00019458677554494932, + "loss": 2.1645, "step": 1150 }, { - "epoch": 0.2, - "grad_norm": 0.1865234375, - "learning_rate": 0.0001955969517358171, - "loss": 2.1904, + "epoch": 0.39, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001944904079286662, + "loss": 2.1502, "step": 1155 }, { - "epoch": 0.2, - "grad_norm": 0.1953125, - "learning_rate": 0.0001964436917866215, - "loss": 2.235, + "epoch": 0.39, + "grad_norm": 0.14453125, + "learning_rate": 0.00019439321437422695, + "loss": 2.1561, "step": 1160 }, { - "epoch": 0.2, - "grad_norm": 0.1982421875, - "learning_rate": 0.00019729043183742593, - "loss": 2.1879, + "epoch": 0.39, + "grad_norm": 0.146484375, + "learning_rate": 0.00019429519573119794, + "loss": 2.1558, "step": 1165 }, { - "epoch": 0.2, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019813717188823032, - "loss": 2.2022, + "epoch": 0.4, + "grad_norm": 0.150390625, + "learning_rate": 0.00019419635285635746, + "loss": 2.1707, "step": 1170 }, { - "epoch": 0.2, - "grad_norm": 0.1953125, - "learning_rate": 0.00019898391193903472, - "loss": 2.1715, + "epoch": 0.4, + "grad_norm": 0.1455078125, + "learning_rate": 0.0001940966866136885, + "loss": 2.1393, "step": 1175 }, { - "epoch": 0.2, - "grad_norm": 0.189453125, - "learning_rate": 0.00019983065198983911, - "loss": 2.2363, + "epoch": 0.4, + "grad_norm": 0.1494140625, + "learning_rate": 0.00019399619787437104, + "loss": 2.1422, "step": 1180 }, { - "epoch": 0.2, - "grad_norm": 0.197265625, - "learning_rate": 0.00019999993008532863, - "loss": 2.1869, + "epoch": 0.4, + "grad_norm": 0.146484375, + "learning_rate": 0.0001938948875167745, + "loss": 2.1468, "step": 1185 }, { - "epoch": 0.2, - "grad_norm": 0.2001953125, - "learning_rate": 0.00019999964605714373, - "loss": 2.1716, + "epoch": 0.4, + "grad_norm": 0.1455078125, + "learning_rate": 0.00019379275642645002, + "loss": 2.1618, "step": 1190 }, { - "epoch": 0.2, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019999914354639845, - "loss": 2.1728, + "epoch": 0.4, + "grad_norm": 0.1435546875, + "learning_rate": 0.0001936898054961228, + "loss": 2.1826, "step": 1195 }, { - "epoch": 0.2, - "grad_norm": 0.1845703125, - "learning_rate": 0.00019999842255419064, - "loss": 2.1702, + "epoch": 0.41, + "grad_norm": 0.1484375, + "learning_rate": 0.00019358603562568416, + "loss": 2.1238, "step": 1200 }, { - "epoch": 0.2, - "grad_norm": 0.1845703125, - "learning_rate": 0.0001999974830820956, - "loss": 2.1999, + "epoch": 0.41, + "grad_norm": 0.1435546875, + "learning_rate": 0.0001934814477221838, + "loss": 2.1468, "step": 1205 }, { - "epoch": 0.2, - "grad_norm": 0.193359375, - "learning_rate": 0.00019999632513216587, - "loss": 2.2017, + "epoch": 0.41, + "grad_norm": 0.140625, + "learning_rate": 0.0001933760426998218, + "loss": 2.1826, "step": 1210 }, { - "epoch": 0.21, - "grad_norm": 0.1865234375, - "learning_rate": 0.00019999494870693142, - "loss": 2.1771, + "epoch": 0.41, + "grad_norm": 0.14453125, + "learning_rate": 0.0001932698214799407, + "loss": 2.1484, "step": 1215 }, { - "epoch": 0.21, - "grad_norm": 0.1875, - "learning_rate": 0.00019999335380939948, - "loss": 2.2625, + "epoch": 0.41, + "grad_norm": 0.14453125, + "learning_rate": 0.0001931627849910174, + "loss": 2.1427, "step": 1220 }, { - "epoch": 0.21, - "grad_norm": 0.1875, - "learning_rate": 0.00019999154044305465, - "loss": 2.1442, + "epoch": 0.41, + "grad_norm": 0.1435546875, + "learning_rate": 0.00019305493416865493, + "loss": 2.1281, "step": 1225 }, { - "epoch": 0.21, - "grad_norm": 0.1875, - "learning_rate": 0.00019998950861185885, - "loss": 2.1751, + "epoch": 0.42, + "grad_norm": 0.14453125, + "learning_rate": 0.00019294626995557457, + "loss": 2.1611, "step": 1230 }, { - "epoch": 0.21, - "grad_norm": 0.185546875, - "learning_rate": 0.00019998725832025125, - "loss": 2.1923, + "epoch": 0.42, + "grad_norm": 0.1494140625, + "learning_rate": 0.00019283679330160726, + "loss": 2.1455, "step": 1235 }, { - "epoch": 0.21, - "grad_norm": 0.1904296875, - "learning_rate": 0.0001999847895731484, - "loss": 2.1393, + "epoch": 0.42, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001927265051636856, + "loss": 2.1553, "step": 1240 }, { - "epoch": 0.21, - "grad_norm": 0.1875, - "learning_rate": 0.0001999821023759441, - "loss": 2.1904, + "epoch": 0.42, + "grad_norm": 0.14453125, + "learning_rate": 0.00019261540650583522, + "loss": 2.1855, "step": 1245 }, { - "epoch": 0.21, - "grad_norm": 0.1865234375, - "learning_rate": 0.00019997919673450938, - "loss": 2.1866, + "epoch": 0.42, + "grad_norm": 0.142578125, + "learning_rate": 0.00019250349829916661, + "loss": 2.1404, "step": 1250 }, { - "epoch": 0.21, - "grad_norm": 0.19140625, - "learning_rate": 0.00019997607265519264, - "loss": 2.1876, + "epoch": 0.43, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001923907815218664, + "loss": 2.1797, "step": 1255 }, { - "epoch": 0.21, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019997273014481942, - "loss": 2.1773, + "epoch": 0.43, + "grad_norm": 0.1416015625, + "learning_rate": 0.00019227725715918897, + "loss": 2.1369, "step": 1260 }, { - "epoch": 0.21, - "grad_norm": 0.185546875, - "learning_rate": 0.0001999691692106926, - "loss": 2.2087, + "epoch": 0.43, + "grad_norm": 0.14453125, + "learning_rate": 0.00019216292620344777, + "loss": 2.1737, "step": 1265 }, { - "epoch": 0.22, - "grad_norm": 0.1826171875, - "learning_rate": 0.00019996538986059221, - "loss": 2.1703, + "epoch": 0.43, + "grad_norm": 0.1533203125, + "learning_rate": 0.00019204778965400667, + "loss": 2.1469, "step": 1270 }, { - "epoch": 0.22, - "grad_norm": 0.1904296875, - "learning_rate": 0.0001999613921027755, - "loss": 2.1993, + "epoch": 0.43, + "grad_norm": 0.1455078125, + "learning_rate": 0.0001919318485172712, + "loss": 2.1758, "step": 1275 }, { - "epoch": 0.22, - "grad_norm": 0.189453125, - "learning_rate": 0.0001999571759459769, - "loss": 2.1809, + "epoch": 0.43, + "grad_norm": 0.15234375, + "learning_rate": 0.00019181510380667977, + "loss": 2.1649, "step": 1280 }, { - "epoch": 0.22, - "grad_norm": 0.1875, - "learning_rate": 0.000199952741399408, - "loss": 2.1823, + "epoch": 0.44, + "grad_norm": 0.14453125, + "learning_rate": 0.0001916975565426948, + "loss": 2.17, "step": 1285 }, { - "epoch": 0.22, - "grad_norm": 0.18359375, - "learning_rate": 0.00019994808847275755, - "loss": 2.1903, + "epoch": 0.44, + "grad_norm": 0.1455078125, + "learning_rate": 0.00019157920775279383, + "loss": 2.1468, "step": 1290 }, { - "epoch": 0.22, - "grad_norm": 0.185546875, - "learning_rate": 0.00019994321717619143, - "loss": 2.1749, + "epoch": 0.44, + "grad_norm": 0.1513671875, + "learning_rate": 0.0001914600584714605, + "loss": 2.1454, "step": 1295 }, { - "epoch": 0.22, - "grad_norm": 0.1845703125, - "learning_rate": 0.0001999381275203526, - "loss": 2.1858, + "epoch": 0.44, + "grad_norm": 0.1552734375, + "learning_rate": 0.0001913401097401755, + "loss": 2.1362, "step": 1300 }, { - "epoch": 0.22, - "grad_norm": 0.185546875, - "learning_rate": 0.00019993281951636113, - "loss": 2.1789, + "epoch": 0.44, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019121936260740752, + "loss": 2.1761, "step": 1305 }, { - "epoch": 0.22, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019992729317581408, - "loss": 2.1869, + "epoch": 0.44, + "grad_norm": 0.1435546875, + "learning_rate": 0.0001910978181286041, + "loss": 2.1609, "step": 1310 }, { - "epoch": 0.22, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019992154851078563, - "loss": 2.2076, + "epoch": 0.45, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019097547736618228, + "loss": 2.1565, "step": 1315 }, { - "epoch": 0.22, - "grad_norm": 0.1865234375, - "learning_rate": 0.0001999155855338269, - "loss": 2.2026, + "epoch": 0.45, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001908523413895194, + "loss": 2.1391, "step": 1320 }, { - "epoch": 0.22, - "grad_norm": 0.193359375, - "learning_rate": 0.00019990940425796604, - "loss": 2.1573, + "epoch": 0.45, + "grad_norm": 0.14453125, + "learning_rate": 0.0001907284112749438, + "loss": 2.1393, "step": 1325 }, { - "epoch": 0.23, - "grad_norm": 0.1865234375, - "learning_rate": 0.000199903004696708, - "loss": 2.1708, + "epoch": 0.45, + "grad_norm": 0.1474609375, + "learning_rate": 0.00019060368810572539, + "loss": 2.1562, "step": 1330 }, { - "epoch": 0.23, - "grad_norm": 0.1865234375, - "learning_rate": 0.00019989638686403484, - "loss": 2.2057, + "epoch": 0.45, + "grad_norm": 0.1494140625, + "learning_rate": 0.00019047817297206598, + "loss": 2.1202, "step": 1335 }, { - "epoch": 0.23, - "grad_norm": 0.1884765625, - "learning_rate": 0.0001998895507744054, - "loss": 2.201, + "epoch": 0.45, + "grad_norm": 0.1513671875, + "learning_rate": 0.00019035186697109011, + "loss": 2.1534, "step": 1340 }, { - "epoch": 0.23, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019988249644275542, - "loss": 2.2209, + "epoch": 0.46, + "grad_norm": 0.14453125, + "learning_rate": 0.0001902247712068352, + "loss": 2.1503, "step": 1345 }, { - "epoch": 0.23, - "grad_norm": 0.1796875, - "learning_rate": 0.0001998752238844974, - "loss": 2.2228, + "epoch": 0.46, + "grad_norm": 0.142578125, + "learning_rate": 0.0001900968867902419, + "loss": 2.1799, "step": 1350 }, { - "epoch": 0.23, - "grad_norm": 0.189453125, - "learning_rate": 0.00019986773311552069, - "loss": 2.1913, + "epoch": 0.46, + "grad_norm": 0.150390625, + "learning_rate": 0.0001899682148391446, + "loss": 2.154, "step": 1355 }, { - "epoch": 0.23, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019986002415219137, - "loss": 2.1614, + "epoch": 0.46, + "grad_norm": 0.1513671875, + "learning_rate": 0.00018983875647826136, + "loss": 2.1285, "step": 1360 }, { - "epoch": 0.23, - "grad_norm": 0.1875, - "learning_rate": 0.00019985209701135222, - "loss": 2.1918, + "epoch": 0.46, + "grad_norm": 0.1474609375, + "learning_rate": 0.00018970851283918428, + "loss": 2.1471, "step": 1365 }, { - "epoch": 0.23, - "grad_norm": 0.19140625, - "learning_rate": 0.00019984395171032278, - "loss": 2.1789, + "epoch": 0.46, + "grad_norm": 0.1435546875, + "learning_rate": 0.00018957748506036957, + "loss": 2.1369, "step": 1370 }, { - "epoch": 0.23, - "grad_norm": 0.193359375, - "learning_rate": 0.0001998355882668991, - "loss": 2.1664, + "epoch": 0.47, + "grad_norm": 0.1474609375, + "learning_rate": 0.00018944567428712765, + "loss": 2.1514, "step": 1375 }, { - "epoch": 0.23, - "grad_norm": 0.19921875, - "learning_rate": 0.00019982700669935396, - "loss": 2.2059, + "epoch": 0.47, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001893130816716129, + "loss": 2.1668, "step": 1380 }, { - "epoch": 0.23, - "grad_norm": 0.185546875, - "learning_rate": 0.00019981820702643662, - "loss": 2.1638, + "epoch": 0.47, + "grad_norm": 0.1455078125, + "learning_rate": 0.00018917970837281392, + "loss": 2.1723, "step": 1385 }, { - "epoch": 0.24, - "grad_norm": 0.189453125, - "learning_rate": 0.00019980918926737294, - "loss": 2.2125, + "epoch": 0.47, + "grad_norm": 0.1484375, + "learning_rate": 0.00018904555555654317, + "loss": 2.1846, "step": 1390 }, { - "epoch": 0.24, - "grad_norm": 0.1904296875, - "learning_rate": 0.0001997999534418652, - "loss": 2.1583, + "epoch": 0.47, + "grad_norm": 0.150390625, + "learning_rate": 0.0001889106243954269, + "loss": 2.1571, "step": 1395 }, { - "epoch": 0.24, - "grad_norm": 0.1806640625, - "learning_rate": 0.00019979049957009212, - "loss": 2.1899, + "epoch": 0.47, + "grad_norm": 0.1484375, + "learning_rate": 0.00018877491606889476, + "loss": 2.1312, "step": 1400 }, { - "epoch": 0.24, - "grad_norm": 0.1845703125, - "learning_rate": 0.00019978082767270884, - "loss": 2.2027, + "epoch": 0.48, + "grad_norm": 0.146484375, + "learning_rate": 0.0001886384317631697, + "loss": 2.1694, "step": 1405 }, { - "epoch": 0.24, - "grad_norm": 0.18359375, - "learning_rate": 0.0001997709377708469, - "loss": 2.2096, + "epoch": 0.48, + "grad_norm": 0.146484375, + "learning_rate": 0.00018850117267125738, + "loss": 2.1772, "step": 1410 }, { - "epoch": 0.24, - "grad_norm": 0.1845703125, - "learning_rate": 0.000199760829886114, - "loss": 2.1597, + "epoch": 0.48, + "grad_norm": 0.14453125, + "learning_rate": 0.00018836313999293593, + "loss": 2.1599, "step": 1415 }, { - "epoch": 0.24, - "grad_norm": 0.1953125, - "learning_rate": 0.00019975050404059426, - "loss": 2.1986, + "epoch": 0.48, + "grad_norm": 0.1435546875, + "learning_rate": 0.00018822433493474532, + "loss": 2.1282, "step": 1420 }, { - "epoch": 0.24, - "grad_norm": 0.193359375, - "learning_rate": 0.00019973996025684788, - "loss": 2.2003, + "epoch": 0.48, + "grad_norm": 0.1533203125, + "learning_rate": 0.0001880847587099769, + "loss": 2.1315, "step": 1425 }, { - "epoch": 0.24, - "grad_norm": 0.189453125, - "learning_rate": 0.00019972919855791132, - "loss": 2.1415, + "epoch": 0.48, + "grad_norm": 0.146484375, + "learning_rate": 0.00018794441253866274, + "loss": 2.1701, "step": 1430 }, { - "epoch": 0.24, - "grad_norm": 0.1865234375, - "learning_rate": 0.00019971821896729703, - "loss": 2.1862, + "epoch": 0.49, + "grad_norm": 0.150390625, + "learning_rate": 0.00018780329764756505, + "loss": 2.15, "step": 1435 }, { - "epoch": 0.24, - "grad_norm": 0.1875, - "learning_rate": 0.00019970702150899365, - "loss": 2.1944, + "epoch": 0.49, + "grad_norm": 0.1474609375, + "learning_rate": 0.00018766141527016533, + "loss": 2.1336, "step": 1440 }, { - "epoch": 0.24, - "grad_norm": 0.193359375, - "learning_rate": 0.00019969560620746571, - "loss": 2.2099, + "epoch": 0.49, + "grad_norm": 0.14453125, + "learning_rate": 0.00018751876664665367, + "loss": 2.1352, "step": 1445 }, { - "epoch": 0.25, - "grad_norm": 0.1865234375, - "learning_rate": 0.00019968397308765375, - "loss": 2.2194, + "epoch": 0.49, + "grad_norm": 0.150390625, + "learning_rate": 0.00018737535302391795, + "loss": 2.1777, "step": 1450 }, { - "epoch": 0.25, - "grad_norm": 0.1875, - "learning_rate": 0.00019967212217497426, - "loss": 2.2112, + "epoch": 0.49, + "grad_norm": 0.1484375, + "learning_rate": 0.00018723117565553284, + "loss": 2.1516, "step": 1455 }, { - "epoch": 0.25, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019966005349531942, - "loss": 2.1745, + "epoch": 0.49, + "grad_norm": 0.150390625, + "learning_rate": 0.00018708623580174889, + "loss": 2.1431, "step": 1460 }, { - "epoch": 0.25, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019964776707505734, - "loss": 2.1624, + "epoch": 0.5, + "grad_norm": 0.1484375, + "learning_rate": 0.00018694053472948156, + "loss": 2.1385, "step": 1465 }, { - "epoch": 0.25, - "grad_norm": 0.1904296875, - "learning_rate": 0.0001996352629410318, - "loss": 2.1977, + "epoch": 0.5, + "grad_norm": 0.1484375, + "learning_rate": 0.00018679407371230002, + "loss": 2.1572, "step": 1470 }, { - "epoch": 0.25, - "grad_norm": 0.181640625, - "learning_rate": 0.00019962254112056223, - "loss": 2.2192, + "epoch": 0.5, + "grad_norm": 0.1484375, + "learning_rate": 0.00018664685403041619, + "loss": 2.1623, "step": 1475 }, { - "epoch": 0.25, - "grad_norm": 0.189453125, - "learning_rate": 0.00019960960164144368, - "loss": 2.1652, + "epoch": 0.5, + "grad_norm": 0.142578125, + "learning_rate": 0.0001864988769706734, + "loss": 2.1619, "step": 1480 }, { - "epoch": 0.25, - "grad_norm": 0.1982421875, - "learning_rate": 0.00019959644453194678, - "loss": 2.1841, + "epoch": 0.5, + "grad_norm": 0.1455078125, + "learning_rate": 0.0001863501438265352, + "loss": 2.1514, "step": 1485 }, { - "epoch": 0.25, - "grad_norm": 0.19140625, - "learning_rate": 0.00019958306982081761, - "loss": 2.2137, + "epoch": 0.5, + "grad_norm": 0.1494140625, + "learning_rate": 0.00018620065589807413, + "loss": 2.1649, "step": 1490 }, { - "epoch": 0.25, - "grad_norm": 0.18359375, - "learning_rate": 0.00019956947753727765, - "loss": 2.1878, + "epoch": 0.51, + "grad_norm": 0.1484375, + "learning_rate": 0.00018605041449196012, + "loss": 2.1802, "step": 1495 }, { - "epoch": 0.25, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019955566771102384, - "loss": 2.148, + "epoch": 0.51, + "grad_norm": 0.14453125, + "learning_rate": 0.00018589942092144942, + "loss": 2.1805, "step": 1500 }, { - "epoch": 0.25, - "grad_norm": 0.1923828125, - "learning_rate": 0.0001995416403722283, - "loss": 2.2543, + "epoch": 0.51, + "grad_norm": 0.1552734375, + "learning_rate": 0.00018574767650637278, + "loss": 2.1141, "step": 1505 }, { - "epoch": 0.26, - "grad_norm": 0.1865234375, - "learning_rate": 0.00019952739555153848, - "loss": 2.1969, + "epoch": 0.51, + "grad_norm": 0.1484375, + "learning_rate": 0.0001855951825731241, + "loss": 2.1333, "step": 1510 }, { - "epoch": 0.26, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001995129332800769, - "loss": 2.2019, + "epoch": 0.51, + "grad_norm": 0.1435546875, + "learning_rate": 0.00018544194045464886, + "loss": 2.1366, "step": 1515 }, { - "epoch": 0.26, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019949825358944113, - "loss": 2.1805, + "epoch": 0.51, + "grad_norm": 0.1474609375, + "learning_rate": 0.00018528795149043236, + "loss": 2.158, "step": 1520 }, { - "epoch": 0.26, - "grad_norm": 0.19140625, - "learning_rate": 0.00019948335651170403, - "loss": 2.1349, + "epoch": 0.52, + "grad_norm": 0.1484375, + "learning_rate": 0.00018513321702648807, + "loss": 2.1432, "step": 1525 }, { - "epoch": 0.26, - "grad_norm": 0.193359375, - "learning_rate": 0.00019946824207941308, - "loss": 2.1884, + "epoch": 0.52, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001849777384153458, + "loss": 2.1542, "step": 1530 }, { - "epoch": 0.26, - "grad_norm": 0.19140625, - "learning_rate": 0.00019945291032559087, - "loss": 2.1758, + "epoch": 0.52, + "grad_norm": 0.1455078125, + "learning_rate": 0.00018482151701604003, + "loss": 2.1629, "step": 1535 }, { - "epoch": 0.26, - "grad_norm": 0.1904296875, - "learning_rate": 0.0001994373612837347, - "loss": 2.2044, + "epoch": 0.52, + "grad_norm": 0.1455078125, + "learning_rate": 0.00018466455419409786, + "loss": 2.1542, "step": 1540 }, { - "epoch": 0.26, - "grad_norm": 0.18359375, - "learning_rate": 0.00019942159498781667, - "loss": 2.1701, + "epoch": 0.52, + "grad_norm": 0.1455078125, + "learning_rate": 0.0001845068513215271, + "loss": 2.1429, "step": 1545 }, { - "epoch": 0.26, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019940561147228347, - "loss": 2.1771, + "epoch": 0.52, + "grad_norm": 0.146484375, + "learning_rate": 0.00018434840977680453, + "loss": 2.1376, "step": 1550 }, { - "epoch": 0.26, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001993894107720564, - "loss": 2.1836, + "epoch": 0.53, + "grad_norm": 0.146484375, + "learning_rate": 0.00018418923094486338, + "loss": 2.1539, "step": 1555 }, { - "epoch": 0.26, - "grad_norm": 0.1865234375, - "learning_rate": 0.00019937299292253137, - "loss": 2.1649, + "epoch": 0.53, + "grad_norm": 0.146484375, + "learning_rate": 0.00018402931621708165, + "loss": 2.1611, "step": 1560 }, { - "epoch": 0.27, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019935635795957857, - "loss": 2.1816, + "epoch": 0.53, + "grad_norm": 0.1494140625, + "learning_rate": 0.00018386866699126973, + "loss": 2.1788, "step": 1565 }, { - "epoch": 0.27, - "grad_norm": 0.193359375, - "learning_rate": 0.00019933950591954265, - "loss": 2.189, + "epoch": 0.53, + "grad_norm": 0.1474609375, + "learning_rate": 0.00018370728467165828, + "loss": 2.142, "step": 1570 }, { - "epoch": 0.27, - "grad_norm": 0.1943359375, - "learning_rate": 0.0001993224368392425, - "loss": 2.2155, + "epoch": 0.53, + "grad_norm": 0.146484375, + "learning_rate": 0.0001835451706688859, + "loss": 2.175, "step": 1575 }, { - "epoch": 0.27, - "grad_norm": 0.1845703125, - "learning_rate": 0.00019930515075597123, - "loss": 2.1719, + "epoch": 0.54, + "grad_norm": 0.1455078125, + "learning_rate": 0.0001833823263999867, + "loss": 2.1268, "step": 1580 }, { - "epoch": 0.27, - "grad_norm": 0.185546875, - "learning_rate": 0.00019928764770749604, - "loss": 2.1808, + "epoch": 0.54, + "grad_norm": 0.150390625, + "learning_rate": 0.00018321875328837828, + "loss": 2.1319, "step": 1585 }, { - "epoch": 0.27, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019926992773205816, - "loss": 2.1824, + "epoch": 0.54, + "grad_norm": 0.1494140625, + "learning_rate": 0.00018305445276384875, + "loss": 2.1436, "step": 1590 }, { - "epoch": 0.27, - "grad_norm": 0.1845703125, - "learning_rate": 0.00019925199086837282, - "loss": 2.1842, + "epoch": 0.54, + "grad_norm": 0.1474609375, + "learning_rate": 0.00018288942626254473, + "loss": 2.1573, "step": 1595 }, { - "epoch": 0.27, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019923383715562902, - "loss": 2.1908, + "epoch": 0.54, + "grad_norm": 0.150390625, + "learning_rate": 0.00018272367522695844, + "loss": 2.1463, "step": 1600 }, { - "epoch": 0.27, - "grad_norm": 0.18359375, - "learning_rate": 0.00019921546663348964, - "loss": 2.2098, + "epoch": 0.54, + "grad_norm": 0.1455078125, + "learning_rate": 0.00018255720110591533, + "loss": 2.157, "step": 1605 }, { - "epoch": 0.27, - "grad_norm": 0.19140625, - "learning_rate": 0.00019919687934209123, - "loss": 2.1821, + "epoch": 0.55, + "grad_norm": 0.150390625, + "learning_rate": 0.0001823900053545613, + "loss": 2.1392, "step": 1610 }, { - "epoch": 0.27, - "grad_norm": 0.1904296875, - "learning_rate": 0.0001991780753220439, - "loss": 2.1931, + "epoch": 0.55, + "grad_norm": 0.154296875, + "learning_rate": 0.00018222208943434999, + "loss": 2.1496, "step": 1615 }, { - "epoch": 0.27, - "grad_norm": 0.2001953125, - "learning_rate": 0.00019915905461443125, - "loss": 2.2284, + "epoch": 0.55, + "grad_norm": 0.150390625, + "learning_rate": 0.00018205345481302998, + "loss": 2.1474, "step": 1620 }, { - "epoch": 0.28, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019913981726081046, - "loss": 2.1604, + "epoch": 0.55, + "grad_norm": 0.1533203125, + "learning_rate": 0.0001818841029646321, + "loss": 2.1532, "step": 1625 }, { - "epoch": 0.28, - "grad_norm": 0.1826171875, - "learning_rate": 0.00019912036330321185, - "loss": 2.2391, + "epoch": 0.55, + "grad_norm": 0.1513671875, + "learning_rate": 0.00018171403536945628, + "loss": 2.1697, "step": 1630 }, { - "epoch": 0.28, - "grad_norm": 0.185546875, - "learning_rate": 0.0001991006927841391, - "loss": 2.1986, + "epoch": 0.55, + "grad_norm": 0.1474609375, + "learning_rate": 0.00018154325351405897, + "loss": 2.1849, "step": 1635 }, { - "epoch": 0.28, - "grad_norm": 0.19140625, - "learning_rate": 0.00019908080574656905, - "loss": 2.2385, + "epoch": 0.56, + "grad_norm": 0.1513671875, + "learning_rate": 0.00018137175889123978, + "loss": 2.1444, "step": 1640 }, { - "epoch": 0.28, - "grad_norm": 0.1845703125, - "learning_rate": 0.00019906070223395153, - "loss": 2.1974, + "epoch": 0.56, + "grad_norm": 0.1513671875, + "learning_rate": 0.0001811995530000287, + "loss": 2.1402, "step": 1645 }, { - "epoch": 0.28, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019904038229020935, - "loss": 2.1889, + "epoch": 0.56, + "grad_norm": 0.1494140625, + "learning_rate": 0.00018102663734567283, + "loss": 2.1413, "step": 1650 }, { - "epoch": 0.28, - "grad_norm": 0.189453125, - "learning_rate": 0.00019901984595973823, - "loss": 2.1733, + "epoch": 0.56, + "grad_norm": 0.1494140625, + "learning_rate": 0.0001808530134396234, + "loss": 2.1554, "step": 1655 }, { - "epoch": 0.28, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019899909328740666, - "loss": 2.1783, + "epoch": 0.56, + "grad_norm": 0.1474609375, + "learning_rate": 0.00018067868279952236, + "loss": 2.1375, "step": 1660 }, { - "epoch": 0.28, - "grad_norm": 0.189453125, - "learning_rate": 0.00019897812431855569, - "loss": 2.1863, + "epoch": 0.56, + "grad_norm": 0.1494140625, + "learning_rate": 0.0001805036469491892, + "loss": 2.1671, "step": 1665 }, { - "epoch": 0.28, - "grad_norm": 0.197265625, - "learning_rate": 0.00019895693909899908, - "loss": 2.1418, + "epoch": 0.57, + "grad_norm": 0.1494140625, + "learning_rate": 0.00018032790741860763, + "loss": 2.1369, "step": 1670 }, { - "epoch": 0.28, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019893553767502299, - "loss": 2.1798, + "epoch": 0.57, + "grad_norm": 0.1513671875, + "learning_rate": 0.00018015146574391233, + "loss": 2.1284, "step": 1675 }, { - "epoch": 0.28, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019891392009338597, - "loss": 2.2185, + "epoch": 0.57, + "grad_norm": 0.1484375, + "learning_rate": 0.00017997432346737524, + "loss": 2.1505, "step": 1680 }, { - "epoch": 0.29, - "grad_norm": 0.185546875, - "learning_rate": 0.0001988920864013188, - "loss": 2.2217, + "epoch": 0.57, + "grad_norm": 0.1474609375, + "learning_rate": 0.00017979648213739232, + "loss": 2.1339, "step": 1685 }, { - "epoch": 0.29, - "grad_norm": 0.189453125, - "learning_rate": 0.00019887003664652452, - "loss": 2.19, + "epoch": 0.57, + "grad_norm": 0.1474609375, + "learning_rate": 0.00017961794330846994, + "loss": 2.1554, "step": 1690 }, { - "epoch": 0.29, - "grad_norm": 0.19140625, - "learning_rate": 0.0001988477708771781, - "loss": 2.2109, + "epoch": 0.57, + "grad_norm": 0.150390625, + "learning_rate": 0.00017943870854121124, + "loss": 2.1543, "step": 1695 }, { - "epoch": 0.29, - "grad_norm": 0.19921875, - "learning_rate": 0.00019882528914192657, - "loss": 2.1982, + "epoch": 0.58, + "grad_norm": 0.1533203125, + "learning_rate": 0.0001792587794023027, + "loss": 2.1429, "step": 1700 }, { - "epoch": 0.29, - "grad_norm": 0.1875, - "learning_rate": 0.0001988025914898888, - "loss": 2.1367, + "epoch": 0.58, + "grad_norm": 0.1494140625, + "learning_rate": 0.00017907815746450004, + "loss": 2.1174, "step": 1705 }, { - "epoch": 0.29, - "grad_norm": 0.1904296875, - "learning_rate": 0.0001987796779706553, - "loss": 2.1894, + "epoch": 0.58, + "grad_norm": 0.1513671875, + "learning_rate": 0.0001788968443066149, + "loss": 2.1251, "step": 1710 }, { - "epoch": 0.29, - "grad_norm": 0.1962890625, - "learning_rate": 0.00019875654863428838, - "loss": 2.1371, + "epoch": 0.58, + "grad_norm": 0.1474609375, + "learning_rate": 0.00017871484151350077, + "loss": 2.1073, "step": 1715 }, { - "epoch": 0.29, - "grad_norm": 0.197265625, - "learning_rate": 0.00019873320353132174, - "loss": 2.1592, + "epoch": 0.58, + "grad_norm": 0.150390625, + "learning_rate": 0.00017853215067603926, + "loss": 2.1919, "step": 1720 }, { - "epoch": 0.29, - "grad_norm": 0.19921875, - "learning_rate": 0.00019870964271276055, - "loss": 2.1695, + "epoch": 0.58, + "grad_norm": 0.1474609375, + "learning_rate": 0.00017834877339112612, + "loss": 2.126, "step": 1725 }, { - "epoch": 0.29, - "grad_norm": 0.18359375, - "learning_rate": 0.00019868586623008125, - "loss": 2.1658, + "epoch": 0.59, + "grad_norm": 0.150390625, + "learning_rate": 0.0001781647112616574, + "loss": 2.1565, "step": 1730 }, { - "epoch": 0.29, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019866187413523153, - "loss": 2.1584, + "epoch": 0.59, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001779799658965153, + "loss": 2.137, "step": 1735 }, { - "epoch": 0.29, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019863766648063006, - "loss": 2.2071, - "step": 1740 + "epoch": 0.59, + "grad_norm": 0.146484375, + "learning_rate": 0.00017779453891055412, + "loss": 2.1714, + "step": 1740 }, { - "epoch": 0.3, - "grad_norm": 0.19140625, - "learning_rate": 0.00019861324331916662, - "loss": 2.2012, + "epoch": 0.59, + "grad_norm": 0.1533203125, + "learning_rate": 0.00017760843192458626, + "loss": 2.1456, "step": 1745 }, { - "epoch": 0.3, - "grad_norm": 0.193359375, - "learning_rate": 0.00019858860470420167, - "loss": 2.2062, + "epoch": 0.59, + "grad_norm": 0.150390625, + "learning_rate": 0.00017742164656536798, + "loss": 2.1575, "step": 1750 }, { - "epoch": 0.3, - "grad_norm": 0.1845703125, - "learning_rate": 0.00019856375068956651, - "loss": 2.1877, + "epoch": 0.59, + "grad_norm": 0.1552734375, + "learning_rate": 0.00017723418446558516, + "loss": 2.1461, "step": 1755 }, { - "epoch": 0.3, - "grad_norm": 0.1923828125, - "learning_rate": 0.000198538681329563, - "loss": 2.1791, + "epoch": 0.6, + "grad_norm": 0.1484375, + "learning_rate": 0.00017704604726383904, + "loss": 2.1383, "step": 1760 }, { - "epoch": 0.3, - "grad_norm": 0.2021484375, - "learning_rate": 0.00019851339667896354, - "loss": 2.155, + "epoch": 0.6, + "grad_norm": 0.1474609375, + "learning_rate": 0.00017685723660463193, + "loss": 2.13, "step": 1765 }, { - "epoch": 0.3, - "grad_norm": 0.193359375, - "learning_rate": 0.00019848789679301085, - "loss": 2.1589, + "epoch": 0.6, + "grad_norm": 0.1484375, + "learning_rate": 0.00017666775413835282, + "loss": 2.1463, "step": 1770 }, { - "epoch": 0.3, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019846218172741794, - "loss": 2.1752, + "epoch": 0.6, + "grad_norm": 0.150390625, + "learning_rate": 0.0001764776015212629, + "loss": 2.1075, "step": 1775 }, { - "epoch": 0.3, - "grad_norm": 0.185546875, - "learning_rate": 0.00019843625153836798, - "loss": 2.2145, + "epoch": 0.6, + "grad_norm": 0.1484375, + "learning_rate": 0.0001762867804154812, + "loss": 2.144, "step": 1780 }, { - "epoch": 0.3, - "grad_norm": 0.1826171875, - "learning_rate": 0.00019841010628251406, - "loss": 2.166, + "epoch": 0.6, + "grad_norm": 0.1484375, + "learning_rate": 0.00017609529248896997, + "loss": 2.1802, "step": 1785 }, { - "epoch": 0.3, - "grad_norm": 0.1875, - "learning_rate": 0.00019838374601697923, - "loss": 2.2264, + "epoch": 0.61, + "grad_norm": 0.15234375, + "learning_rate": 0.00017590313941552002, + "loss": 2.1746, "step": 1790 }, { - "epoch": 0.3, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019835717079935624, - "loss": 2.1749, + "epoch": 0.61, + "grad_norm": 0.1513671875, + "learning_rate": 0.00017571032287473642, + "loss": 2.1199, "step": 1795 }, { - "epoch": 0.3, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019833038068770757, - "loss": 2.1778, + "epoch": 0.61, + "grad_norm": 0.15234375, + "learning_rate": 0.00017551684455202336, + "loss": 2.1279, "step": 1800 }, { - "epoch": 0.31, - "grad_norm": 0.193359375, - "learning_rate": 0.00019830337574056514, - "loss": 2.1967, + "epoch": 0.61, + "grad_norm": 0.1494140625, + "learning_rate": 0.00017532270613856976, + "loss": 2.1532, "step": 1805 }, { - "epoch": 0.31, - "grad_norm": 0.19140625, - "learning_rate": 0.00019827615601693022, - "loss": 2.1804, + "epoch": 0.61, + "grad_norm": 0.1494140625, + "learning_rate": 0.00017512790933133437, + "loss": 2.1274, "step": 1810 }, { - "epoch": 0.31, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019824872157627339, - "loss": 2.2043, + "epoch": 0.61, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001749324558330309, + "loss": 2.1527, "step": 1815 }, { - "epoch": 0.31, - "grad_norm": 0.1953125, - "learning_rate": 0.00019822107247853435, - "loss": 2.1591, + "epoch": 0.62, + "grad_norm": 0.146484375, + "learning_rate": 0.0001747363473521132, + "loss": 2.1483, "step": 1820 }, { - "epoch": 0.31, - "grad_norm": 0.193359375, - "learning_rate": 0.00019819320878412174, - "loss": 2.1763, + "epoch": 0.62, + "grad_norm": 0.1533203125, + "learning_rate": 0.00017453958560276038, + "loss": 2.1474, "step": 1825 }, { - "epoch": 0.31, - "grad_norm": 0.1865234375, - "learning_rate": 0.00019816513055391307, - "loss": 2.1789, + "epoch": 0.62, + "grad_norm": 0.1494140625, + "learning_rate": 0.00017434217230486164, + "loss": 2.1605, "step": 1830 }, { - "epoch": 0.31, - "grad_norm": 0.189453125, - "learning_rate": 0.00019813683784925467, - "loss": 2.2, + "epoch": 0.62, + "grad_norm": 0.15234375, + "learning_rate": 0.0001741441091840014, + "loss": 2.1658, "step": 1835 }, { - "epoch": 0.31, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019810833073196133, - "loss": 2.1581, + "epoch": 0.62, + "grad_norm": 0.1533203125, + "learning_rate": 0.00017394539797144413, + "loss": 2.1167, "step": 1840 }, { - "epoch": 0.31, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019807960926431634, - "loss": 2.2085, + "epoch": 0.62, + "grad_norm": 0.150390625, + "learning_rate": 0.00017374604040411935, + "loss": 2.1318, "step": 1845 }, { - "epoch": 0.31, - "grad_norm": 0.1875, - "learning_rate": 0.00019805067350907134, - "loss": 2.1584, + "epoch": 0.63, + "grad_norm": 0.150390625, + "learning_rate": 0.00017354603822460621, + "loss": 2.1321, "step": 1850 }, { - "epoch": 0.31, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019802152352944616, - "loss": 2.2049, + "epoch": 0.63, + "grad_norm": 0.154296875, + "learning_rate": 0.00017334539318111856, + "loss": 2.1417, "step": 1855 }, { - "epoch": 0.32, - "grad_norm": 0.1923828125, - "learning_rate": 0.0001979921593891286, - "loss": 2.1572, + "epoch": 0.63, + "grad_norm": 0.150390625, + "learning_rate": 0.00017314410702748932, + "loss": 2.1343, "step": 1860 }, { - "epoch": 0.32, - "grad_norm": 0.197265625, - "learning_rate": 0.00019796258115227443, - "loss": 2.2329, + "epoch": 0.63, + "grad_norm": 0.15234375, + "learning_rate": 0.00017294218152315546, + "loss": 2.1471, "step": 1865 }, { - "epoch": 0.32, - "grad_norm": 0.189453125, - "learning_rate": 0.00019793278888350716, - "loss": 2.1925, + "epoch": 0.63, + "grad_norm": 0.150390625, + "learning_rate": 0.00017273961843314252, + "loss": 2.147, "step": 1870 }, { - "epoch": 0.32, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019790278264791795, - "loss": 2.1534, + "epoch": 0.64, + "grad_norm": 0.1552734375, + "learning_rate": 0.0001725364195280491, + "loss": 2.1528, "step": 1875 }, { - "epoch": 0.32, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019787256251106543, - "loss": 2.1437, + "epoch": 0.64, + "grad_norm": 0.1533203125, + "learning_rate": 0.00017233258658403138, + "loss": 2.1462, "step": 1880 }, { - "epoch": 0.32, - "grad_norm": 0.19140625, - "learning_rate": 0.00019784212853897552, - "loss": 2.193, + "epoch": 0.64, + "grad_norm": 0.1513671875, + "learning_rate": 0.0001721281213827878, + "loss": 2.1401, "step": 1885 }, { - "epoch": 0.32, - "grad_norm": 0.1923828125, - "learning_rate": 0.0001978114807981414, - "loss": 2.1838, + "epoch": 0.64, + "grad_norm": 0.1494140625, + "learning_rate": 0.00017192302571154331, + "loss": 2.1613, "step": 1890 }, { - "epoch": 0.32, - "grad_norm": 0.1884765625, - "learning_rate": 0.0001977806193555233, - "loss": 2.1925, + "epoch": 0.64, + "grad_norm": 0.15234375, + "learning_rate": 0.00017171730136303364, + "loss": 2.1487, "step": 1895 }, { - "epoch": 0.32, - "grad_norm": 0.189453125, - "learning_rate": 0.00019774954427854833, - "loss": 2.1709, + "epoch": 0.64, + "grad_norm": 0.1474609375, + "learning_rate": 0.00017151095013548994, + "loss": 2.1443, "step": 1900 }, { - "epoch": 0.32, - "grad_norm": 0.1875, - "learning_rate": 0.0001977182556351103, - "loss": 2.1448, + "epoch": 0.65, + "grad_norm": 0.1484375, + "learning_rate": 0.00017130397383262284, + "loss": 2.1251, "step": 1905 }, { - "epoch": 0.32, - "grad_norm": 0.19921875, - "learning_rate": 0.0001976867534935697, - "loss": 2.2003, + "epoch": 0.65, + "grad_norm": 0.1455078125, + "learning_rate": 0.0001710963742636067, + "loss": 2.1351, "step": 1910 }, { - "epoch": 0.32, - "grad_norm": 0.2060546875, - "learning_rate": 0.00019765503792275354, - "loss": 2.1616, + "epoch": 0.65, + "grad_norm": 0.1494140625, + "learning_rate": 0.00017088815324306392, + "loss": 2.1285, "step": 1915 }, { - "epoch": 0.33, - "grad_norm": 0.2001953125, - "learning_rate": 0.0001976231089919549, - "loss": 2.171, + "epoch": 0.65, + "grad_norm": 0.154296875, + "learning_rate": 0.00017067931259104885, + "loss": 2.1599, "step": 1920 }, { - "epoch": 0.33, - "grad_norm": 0.19140625, - "learning_rate": 0.00019759096677093334, - "loss": 2.1726, + "epoch": 0.65, + "grad_norm": 0.1484375, + "learning_rate": 0.00017046985413303215, + "loss": 2.1219, "step": 1925 }, { - "epoch": 0.33, - "grad_norm": 0.189453125, - "learning_rate": 0.00019755861132991412, - "loss": 2.1745, + "epoch": 0.65, + "grad_norm": 0.1484375, + "learning_rate": 0.00017025977969988465, + "loss": 2.1373, "step": 1930 }, { - "epoch": 0.33, - "grad_norm": 0.1953125, - "learning_rate": 0.0001975260427395886, - "loss": 2.1956, + "epoch": 0.66, + "grad_norm": 0.1513671875, + "learning_rate": 0.00017004909112786144, + "loss": 2.1299, "step": 1935 }, { - "epoch": 0.33, - "grad_norm": 0.19140625, - "learning_rate": 0.00019749326107111362, - "loss": 2.2004, + "epoch": 0.66, + "grad_norm": 0.1533203125, + "learning_rate": 0.0001698377902585857, + "loss": 2.1378, "step": 1940 }, { - "epoch": 0.33, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019746026639611174, - "loss": 2.1805, + "epoch": 0.66, + "grad_norm": 0.1494140625, + "learning_rate": 0.00016962587893903276, + "loss": 2.1502, "step": 1945 }, { - "epoch": 0.33, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019742705878667075, - "loss": 2.2056, + "epoch": 0.66, + "grad_norm": 0.150390625, + "learning_rate": 0.0001694133590215139, + "loss": 2.1611, "step": 1950 }, { - "epoch": 0.33, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001973936383153438, - "loss": 2.1754, + "epoch": 0.66, + "grad_norm": 0.1474609375, + "learning_rate": 0.00016920023236366002, + "loss": 2.1134, "step": 1955 }, { - "epoch": 0.33, - "grad_norm": 0.1875, - "learning_rate": 0.00019736000505514908, - "loss": 2.1286, + "epoch": 0.66, + "grad_norm": 0.146484375, + "learning_rate": 0.00016898650082840572, + "loss": 2.1338, "step": 1960 }, { - "epoch": 0.33, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001973261590795696, - "loss": 2.1644, + "epoch": 0.67, + "grad_norm": 0.1474609375, + "learning_rate": 0.00016877216628397257, + "loss": 2.1257, "step": 1965 }, { - "epoch": 0.33, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019729210046255316, - "loss": 2.2054, + "epoch": 0.67, + "grad_norm": 0.15234375, + "learning_rate": 0.0001685572306038532, + "loss": 2.1085, "step": 1970 }, { - "epoch": 0.33, - "grad_norm": 0.1943359375, - "learning_rate": 0.0001972578292785122, - "loss": 2.2077, + "epoch": 0.67, + "grad_norm": 0.1572265625, + "learning_rate": 0.0001683416956667947, + "loss": 2.1549, "step": 1975 }, { - "epoch": 0.34, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019722334560232354, - "loss": 2.1545, + "epoch": 0.67, + "grad_norm": 0.1513671875, + "learning_rate": 0.0001681255633567823, + "loss": 2.1571, "step": 1980 }, { - "epoch": 0.34, - "grad_norm": 0.1845703125, - "learning_rate": 0.00019718864950932826, - "loss": 2.1974, + "epoch": 0.67, + "grad_norm": 0.150390625, + "learning_rate": 0.00016790883556302272, + "loss": 2.152, "step": 1985 }, { - "epoch": 0.34, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019715374107533157, - "loss": 2.1435, + "epoch": 0.67, + "grad_norm": 0.158203125, + "learning_rate": 0.00016769151417992791, + "loss": 2.1425, "step": 1990 }, { - "epoch": 0.34, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019711862037660253, - "loss": 2.195, + "epoch": 0.68, + "grad_norm": 0.1513671875, + "learning_rate": 0.00016747360110709838, + "loss": 2.1405, "step": 1995 }, { - "epoch": 0.34, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019708328748987403, - "loss": 2.2048, + "epoch": 0.68, + "grad_norm": 0.1552734375, + "learning_rate": 0.00016725509824930645, + "loss": 2.1496, "step": 2000 }, { - "epoch": 0.34, - "grad_norm": 0.1845703125, - "learning_rate": 0.00019704774249234256, - "loss": 2.2101, + "epoch": 0.68, + "grad_norm": 0.154296875, + "learning_rate": 0.0001670360075164799, + "loss": 2.1588, "step": 2005 }, { - "epoch": 0.34, - "grad_norm": 0.1962890625, - "learning_rate": 0.00019701198546166803, - "loss": 2.2184, + "epoch": 0.68, + "grad_norm": 0.150390625, + "learning_rate": 0.00016681633082368498, + "loss": 2.1571, "step": 2010 }, { - "epoch": 0.34, - "grad_norm": 0.19140625, - "learning_rate": 0.0001969760164759735, - "loss": 2.1553, + "epoch": 0.68, + "grad_norm": 0.150390625, + "learning_rate": 0.00016659607009110984, + "loss": 2.1428, "step": 2015 }, { - "epoch": 0.34, - "grad_norm": 0.1953125, - "learning_rate": 0.0001969398356138453, - "loss": 2.1782, + "epoch": 0.68, + "grad_norm": 0.15234375, + "learning_rate": 0.00016637522724404774, + "loss": 2.1548, "step": 2020 }, { - "epoch": 0.34, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019690344295433256, - "loss": 2.1714, + "epoch": 0.69, + "grad_norm": 0.1494140625, + "learning_rate": 0.00016615380421288018, + "loss": 2.1186, "step": 2025 }, { - "epoch": 0.34, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019686683857694716, - "loss": 2.1662, + "epoch": 0.69, + "grad_norm": 0.1474609375, + "learning_rate": 0.00016593180293306001, + "loss": 2.1405, "step": 2030 }, { - "epoch": 0.34, - "grad_norm": 0.193359375, - "learning_rate": 0.0001968300225616636, - "loss": 2.1654, + "epoch": 0.69, + "grad_norm": 0.15234375, + "learning_rate": 0.0001657092253450945, + "loss": 2.1324, "step": 2035 }, { - "epoch": 0.35, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019679299498891873, - "loss": 2.2053, + "epoch": 0.69, + "grad_norm": 0.1474609375, + "learning_rate": 0.00016548607339452853, + "loss": 2.1105, "step": 2040 }, { - "epoch": 0.35, - "grad_norm": 0.189453125, - "learning_rate": 0.00019675575593961156, - "loss": 2.1423, + "epoch": 0.69, + "grad_norm": 0.150390625, + "learning_rate": 0.00016526234903192733, + "loss": 2.1533, "step": 2045 }, { - "epoch": 0.35, - "grad_norm": 0.205078125, - "learning_rate": 0.0001967183054951033, - "loss": 2.1537, + "epoch": 0.69, + "grad_norm": 0.1484375, + "learning_rate": 0.00016503805421285968, + "loss": 2.1515, "step": 2050 }, { - "epoch": 0.35, - "grad_norm": 0.19140625, - "learning_rate": 0.00019668064373721685, - "loss": 2.2083, + "epoch": 0.7, + "grad_norm": 0.15234375, + "learning_rate": 0.00016481319089788063, + "loss": 2.1282, "step": 2055 }, { - "epoch": 0.35, - "grad_norm": 0.1875, - "learning_rate": 0.00019664277074823693, - "loss": 2.164, + "epoch": 0.7, + "grad_norm": 0.1494140625, + "learning_rate": 0.00016458776105251447, + "loss": 2.1587, "step": 2060 }, { - "epoch": 0.35, - "grad_norm": 0.189453125, - "learning_rate": 0.0001966046866109097, - "loss": 2.1678, + "epoch": 0.7, + "grad_norm": 0.1494140625, + "learning_rate": 0.0001643617666472376, + "loss": 2.1292, "step": 2065 }, { - "epoch": 0.35, - "grad_norm": 0.19140625, - "learning_rate": 0.00019656639140844262, - "loss": 2.2032, + "epoch": 0.7, + "grad_norm": 0.1513671875, + "learning_rate": 0.00016413520965746097, + "loss": 2.1662, "step": 2070 }, { - "epoch": 0.35, - "grad_norm": 0.193359375, - "learning_rate": 0.00019652788522450437, - "loss": 2.2068, + "epoch": 0.7, + "grad_norm": 0.1484375, + "learning_rate": 0.0001639080920635134, + "loss": 2.1416, "step": 2075 }, { - "epoch": 0.35, - "grad_norm": 0.1875, - "learning_rate": 0.00019648916814322446, - "loss": 2.1622, + "epoch": 0.7, + "grad_norm": 0.1591796875, + "learning_rate": 0.0001636804158506237, + "loss": 2.1533, "step": 2080 }, { - "epoch": 0.35, - "grad_norm": 0.19140625, - "learning_rate": 0.00019645024024919337, - "loss": 2.2037, + "epoch": 0.71, + "grad_norm": 0.1533203125, + "learning_rate": 0.00016345218300890357, + "loss": 2.1246, "step": 2085 }, { - "epoch": 0.35, - "grad_norm": 0.1826171875, - "learning_rate": 0.00019641110162746202, - "loss": 2.1631, + "epoch": 0.71, + "grad_norm": 0.1533203125, + "learning_rate": 0.00016322339553333034, + "loss": 2.1425, "step": 2090 }, { - "epoch": 0.35, - "grad_norm": 0.19921875, - "learning_rate": 0.00019637175236354175, - "loss": 2.2035, + "epoch": 0.71, + "grad_norm": 0.1484375, + "learning_rate": 0.00016299405542372924, + "loss": 2.1527, "step": 2095 }, { - "epoch": 0.36, - "grad_norm": 0.2021484375, - "learning_rate": 0.00019633219254340417, - "loss": 2.1476, + "epoch": 0.71, + "grad_norm": 0.15625, + "learning_rate": 0.00016276416468475607, + "loss": 2.1265, "step": 2100 }, { - "epoch": 0.36, - "grad_norm": 0.1953125, - "learning_rate": 0.00019629242225348086, - "loss": 2.1799, + "epoch": 0.71, + "grad_norm": 0.1484375, + "learning_rate": 0.00016253372532587976, + "loss": 2.1505, "step": 2105 }, { - "epoch": 0.36, - "grad_norm": 0.19140625, - "learning_rate": 0.00019625244158066332, - "loss": 2.2112, + "epoch": 0.71, + "grad_norm": 0.1533203125, + "learning_rate": 0.0001623027393613646, + "loss": 2.1566, "step": 2110 }, { - "epoch": 0.36, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001962122506123026, - "loss": 2.1967, + "epoch": 0.72, + "grad_norm": 0.1552734375, + "learning_rate": 0.00016207120881025282, + "loss": 2.1498, "step": 2115 }, { - "epoch": 0.36, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019617184943620936, - "loss": 2.1841, + "epoch": 0.72, + "grad_norm": 0.1474609375, + "learning_rate": 0.0001618391356963468, + "loss": 2.1094, "step": 2120 }, { - "epoch": 0.36, - "grad_norm": 0.1875, - "learning_rate": 0.00019613123814065335, - "loss": 2.2235, + "epoch": 0.72, + "grad_norm": 0.150390625, + "learning_rate": 0.00016160652204819158, + "loss": 2.144, "step": 2125 }, { - "epoch": 0.36, - "grad_norm": 0.189453125, - "learning_rate": 0.00019609041681436354, - "loss": 2.1743, + "epoch": 0.72, + "grad_norm": 0.154296875, + "learning_rate": 0.0001613733698990568, + "loss": 2.1447, "step": 2130 }, { - "epoch": 0.36, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019604938554652765, - "loss": 2.1865, + "epoch": 0.72, + "grad_norm": 0.150390625, + "learning_rate": 0.00016113968128691933, + "loss": 2.1403, "step": 2135 }, { - "epoch": 0.36, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019600814442679226, - "loss": 2.143, + "epoch": 0.72, + "grad_norm": 0.15234375, + "learning_rate": 0.00016090545825444506, + "loss": 2.1621, "step": 2140 }, { - "epoch": 0.36, - "grad_norm": 0.205078125, - "learning_rate": 0.00019596669354526224, - "loss": 2.2324, + "epoch": 0.73, + "grad_norm": 0.1494140625, + "learning_rate": 0.00016067070284897137, + "loss": 2.1606, "step": 2145 }, { - "epoch": 0.36, - "grad_norm": 0.1982421875, - "learning_rate": 0.00019592503299250096, - "loss": 2.2198, + "epoch": 0.73, + "grad_norm": 0.1552734375, + "learning_rate": 0.000160435417122489, + "loss": 2.1262, "step": 2150 }, { - "epoch": 0.36, - "grad_norm": 0.193359375, - "learning_rate": 0.0001958831628595297, - "loss": 2.1736, + "epoch": 0.73, + "grad_norm": 0.150390625, + "learning_rate": 0.00016019960313162434, + "loss": 2.1193, "step": 2155 }, { - "epoch": 0.37, - "grad_norm": 0.189453125, - "learning_rate": 0.00019584108323782777, - "loss": 2.1709, + "epoch": 0.73, + "grad_norm": 0.15234375, + "learning_rate": 0.0001599632629376212, + "loss": 2.1529, "step": 2160 }, { - "epoch": 0.37, - "grad_norm": 0.1904296875, - "learning_rate": 0.0001957987942193321, - "loss": 2.1806, + "epoch": 0.73, + "grad_norm": 0.1484375, + "learning_rate": 0.00015972639860632292, + "loss": 2.1828, "step": 2165 }, { - "epoch": 0.37, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019575629589643718, - "loss": 2.1568, + "epoch": 0.73, + "grad_norm": 0.154296875, + "learning_rate": 0.00015948901220815445, + "loss": 2.1639, "step": 2170 }, { - "epoch": 0.37, - "grad_norm": 0.1845703125, - "learning_rate": 0.00019571358836199476, - "loss": 2.1647, + "epoch": 0.74, + "grad_norm": 0.1533203125, + "learning_rate": 0.00015925110581810394, + "loss": 2.1432, "step": 2175 }, { - "epoch": 0.37, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019567067170931366, - "loss": 2.2088, + "epoch": 0.74, + "grad_norm": 0.1533203125, + "learning_rate": 0.00015901268151570491, + "loss": 2.1863, "step": 2180 }, { - "epoch": 0.37, - "grad_norm": 0.19140625, - "learning_rate": 0.00019562754603215962, - "loss": 2.1749, + "epoch": 0.74, + "grad_norm": 0.1484375, + "learning_rate": 0.0001587737413850178, + "loss": 2.1314, "step": 2185 }, { - "epoch": 0.37, - "grad_norm": 0.197265625, - "learning_rate": 0.00019558421142475507, - "loss": 2.1569, + "epoch": 0.74, + "grad_norm": 0.15625, + "learning_rate": 0.00015853428751461202, + "loss": 2.1317, "step": 2190 }, { - "epoch": 0.37, - "grad_norm": 0.193359375, - "learning_rate": 0.0001955406679817789, - "loss": 2.1758, + "epoch": 0.74, + "grad_norm": 0.15234375, + "learning_rate": 0.00015829432199754756, + "loss": 2.1344, "step": 2195 }, { - "epoch": 0.37, - "grad_norm": 0.193359375, - "learning_rate": 0.00019549691579836626, - "loss": 2.2226, + "epoch": 0.75, + "grad_norm": 0.15234375, + "learning_rate": 0.0001580538469313566, + "loss": 2.1352, "step": 2200 }, { - "epoch": 0.37, - "grad_norm": 0.201171875, - "learning_rate": 0.00019545295497010843, - "loss": 2.1599, + "epoch": 0.75, + "grad_norm": 0.1484375, + "learning_rate": 0.00015781286441802534, + "loss": 2.135, "step": 2205 }, { - "epoch": 0.37, - "grad_norm": 0.19921875, - "learning_rate": 0.0001954087855930524, - "loss": 2.1589, + "epoch": 0.75, + "grad_norm": 0.1552734375, + "learning_rate": 0.00015757137656397557, + "loss": 2.1041, "step": 2210 }, { - "epoch": 0.38, - "grad_norm": 0.1923828125, - "learning_rate": 0.000195364407763701, - "loss": 2.1723, + "epoch": 0.75, + "grad_norm": 0.15234375, + "learning_rate": 0.0001573293854800462, + "loss": 2.1074, "step": 2215 }, { - "epoch": 0.38, - "grad_norm": 0.19140625, - "learning_rate": 0.00019531982157901232, - "loss": 2.1533, + "epoch": 0.75, + "grad_norm": 0.1533203125, + "learning_rate": 0.00015708689328147493, + "loss": 2.1311, "step": 2220 }, { - "epoch": 0.38, - "grad_norm": 0.193359375, - "learning_rate": 0.00019527502713639975, - "loss": 2.1804, + "epoch": 0.75, + "grad_norm": 0.1513671875, + "learning_rate": 0.00015684390208787962, + "loss": 2.143, "step": 2225 }, { - "epoch": 0.38, - "grad_norm": 0.2021484375, - "learning_rate": 0.00019523002453373175, - "loss": 2.163, + "epoch": 0.76, + "grad_norm": 0.15234375, + "learning_rate": 0.0001566004140232399, + "loss": 2.1528, "step": 2230 }, { - "epoch": 0.38, - "grad_norm": 0.193359375, - "learning_rate": 0.0001951848138693314, - "loss": 2.1807, + "epoch": 0.76, + "grad_norm": 0.146484375, + "learning_rate": 0.00015635643121587848, + "loss": 2.164, "step": 2235 }, { - "epoch": 0.38, - "grad_norm": 0.2001953125, - "learning_rate": 0.00019513939524197656, - "loss": 2.1523, + "epoch": 0.76, + "grad_norm": 0.1552734375, + "learning_rate": 0.00015611195579844265, + "loss": 2.1369, "step": 2240 }, { - "epoch": 0.38, - "grad_norm": 0.1875, - "learning_rate": 0.0001950937687508993, - "loss": 2.1963, + "epoch": 0.76, + "grad_norm": 0.1572265625, + "learning_rate": 0.00015586698990788554, + "loss": 2.1538, "step": 2245 }, { - "epoch": 0.38, - "grad_norm": 0.2001953125, - "learning_rate": 0.00019504793449578593, - "loss": 2.171, + "epoch": 0.76, + "grad_norm": 0.1484375, + "learning_rate": 0.00015562153568544752, + "loss": 2.1305, "step": 2250 }, { - "epoch": 0.38, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019500189257677666, - "loss": 2.1529, + "epoch": 0.76, + "grad_norm": 0.15234375, + "learning_rate": 0.00015537559527663744, + "loss": 2.1832, "step": 2255 }, { - "epoch": 0.38, - "grad_norm": 0.1943359375, - "learning_rate": 0.0001949556430944654, - "loss": 2.1683, + "epoch": 0.77, + "grad_norm": 0.15625, + "learning_rate": 0.00015512917083121397, + "loss": 2.1522, "step": 2260 }, { - "epoch": 0.38, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019490918614989956, - "loss": 2.1611, + "epoch": 0.77, + "grad_norm": 0.15234375, + "learning_rate": 0.00015488226450316664, + "loss": 2.1116, "step": 2265 }, { - "epoch": 0.38, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019486252184457977, - "loss": 2.1865, + "epoch": 0.77, + "grad_norm": 0.1533203125, + "learning_rate": 0.00015463487845069707, + "loss": 2.1527, "step": 2270 }, { - "epoch": 0.39, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019481565028045986, - "loss": 2.1827, + "epoch": 0.77, + "grad_norm": 0.1572265625, + "learning_rate": 0.00015438701483620027, + "loss": 2.1485, "step": 2275 }, { - "epoch": 0.39, - "grad_norm": 0.19140625, - "learning_rate": 0.00019476857155994635, - "loss": 2.1502, + "epoch": 0.77, + "grad_norm": 0.15234375, + "learning_rate": 0.00015413867582624553, + "loss": 2.1296, "step": 2280 }, { - "epoch": 0.39, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019472128578589833, - "loss": 2.1553, + "epoch": 0.77, + "grad_norm": 0.15234375, + "learning_rate": 0.00015388986359155758, + "loss": 2.1407, "step": 2285 }, { - "epoch": 0.39, - "grad_norm": 0.1962890625, - "learning_rate": 0.00019467379306162746, - "loss": 2.2209, + "epoch": 0.78, + "grad_norm": 0.1572265625, + "learning_rate": 0.0001536405803069975, + "loss": 2.1033, "step": 2290 }, { - "epoch": 0.39, - "grad_norm": 0.19140625, - "learning_rate": 0.0001946260934908973, - "loss": 2.202, + "epoch": 0.78, + "grad_norm": 0.1513671875, + "learning_rate": 0.00015339082815154394, + "loss": 2.1388, "step": 2295 }, { - "epoch": 0.39, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019457818717792357, - "loss": 2.1814, + "epoch": 0.78, + "grad_norm": 0.15625, + "learning_rate": 0.00015314060930827393, + "loss": 2.1754, "step": 2300 }, { - "epoch": 0.39, - "grad_norm": 0.19140625, - "learning_rate": 0.0001945300742273735, - "loss": 2.1992, + "epoch": 0.78, + "grad_norm": 0.1552734375, + "learning_rate": 0.0001528899259643437, + "loss": 2.1608, "step": 2305 }, { - "epoch": 0.39, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019448175474436592, - "loss": 2.1637, + "epoch": 0.78, + "grad_norm": 0.1513671875, + "learning_rate": 0.00015263878031096975, + "loss": 2.1443, "step": 2310 }, { - "epoch": 0.39, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019443322883447078, - "loss": 2.1961, + "epoch": 0.78, + "grad_norm": 0.15625, + "learning_rate": 0.00015238717454340957, + "loss": 2.1384, "step": 2315 }, { - "epoch": 0.39, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019438449660370922, - "loss": 2.1988, + "epoch": 0.79, + "grad_norm": 0.1533203125, + "learning_rate": 0.00015213511086094254, + "loss": 2.1206, "step": 2320 }, { - "epoch": 0.39, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019433555815855292, - "loss": 2.1567, + "epoch": 0.79, + "grad_norm": 0.15234375, + "learning_rate": 0.00015188259146685064, + "loss": 2.1266, "step": 2325 }, { - "epoch": 0.39, - "grad_norm": 0.1943359375, - "learning_rate": 0.0001942864136059243, - "loss": 2.1266, + "epoch": 0.79, + "grad_norm": 0.15234375, + "learning_rate": 0.0001516296185683992, + "loss": 2.1565, "step": 2330 }, { - "epoch": 0.4, - "grad_norm": 0.19921875, - "learning_rate": 0.000194237063053196, - "loss": 2.1608, + "epoch": 0.79, + "grad_norm": 0.1552734375, + "learning_rate": 0.00015137619437681767, + "loss": 2.1289, "step": 2335 }, { - "epoch": 0.4, - "grad_norm": 0.189453125, - "learning_rate": 0.00019418750660819074, - "loss": 2.1657, + "epoch": 0.79, + "grad_norm": 0.162109375, + "learning_rate": 0.00015112232110728015, + "loss": 2.1517, "step": 2340 }, { - "epoch": 0.4, - "grad_norm": 0.189453125, - "learning_rate": 0.0001941377443791811, - "loss": 2.1726, + "epoch": 0.79, + "grad_norm": 0.15234375, + "learning_rate": 0.00015086800097888624, + "loss": 2.1423, "step": 2345 }, { - "epoch": 0.4, - "grad_norm": 0.193359375, - "learning_rate": 0.00019408777647488928, - "loss": 2.2402, + "epoch": 0.8, + "grad_norm": 0.15625, + "learning_rate": 0.00015061323621464134, + "loss": 2.1345, "step": 2350 }, { - "epoch": 0.4, - "grad_norm": 0.197265625, - "learning_rate": 0.00019403760300448677, - "loss": 2.1513, + "epoch": 0.8, + "grad_norm": 0.154296875, + "learning_rate": 0.00015035802904143762, + "loss": 2.1628, "step": 2355 }, { - "epoch": 0.4, - "grad_norm": 0.193359375, - "learning_rate": 0.0001939872240775943, - "loss": 2.1811, + "epoch": 0.8, + "grad_norm": 0.1513671875, + "learning_rate": 0.0001501023816900342, + "loss": 2.168, "step": 2360 }, { - "epoch": 0.4, - "grad_norm": 0.2001953125, - "learning_rate": 0.0001939366398042814, - "loss": 2.1592, + "epoch": 0.8, + "grad_norm": 0.15625, + "learning_rate": 0.00014984629639503785, + "loss": 2.1476, "step": 2365 }, { - "epoch": 0.4, - "grad_norm": 0.2060546875, - "learning_rate": 0.00019388585029506627, - "loss": 2.1665, + "epoch": 0.8, + "grad_norm": 0.150390625, + "learning_rate": 0.0001495897753948833, + "loss": 2.1486, "step": 2370 }, { - "epoch": 0.4, - "grad_norm": 0.193359375, - "learning_rate": 0.00019383485566091554, - "loss": 2.1636, + "epoch": 0.8, + "grad_norm": 0.1533203125, + "learning_rate": 0.00014933282093181383, + "loss": 2.1508, "step": 2375 }, { - "epoch": 0.4, - "grad_norm": 0.1884765625, - "learning_rate": 0.000193783656013244, - "loss": 2.144, + "epoch": 0.81, + "grad_norm": 0.1552734375, + "learning_rate": 0.00014907543525186166, + "loss": 2.1244, "step": 2380 }, { - "epoch": 0.4, - "grad_norm": 0.1982421875, - "learning_rate": 0.0001937322514639143, - "loss": 2.1331, + "epoch": 0.81, + "grad_norm": 0.1533203125, + "learning_rate": 0.00014881762060482814, + "loss": 2.123, "step": 2385 }, { - "epoch": 0.4, - "grad_norm": 0.1904296875, - "learning_rate": 0.00019368064212523686, - "loss": 2.1441, + "epoch": 0.81, + "grad_norm": 0.150390625, + "learning_rate": 0.00014855937924426434, + "loss": 2.1333, "step": 2390 }, { - "epoch": 0.41, - "grad_norm": 0.1953125, - "learning_rate": 0.0001936288281099694, - "loss": 2.2009, + "epoch": 0.81, + "grad_norm": 0.1513671875, + "learning_rate": 0.00014830071342745112, + "loss": 2.1471, "step": 2395 }, { - "epoch": 0.41, - "grad_norm": 0.19140625, - "learning_rate": 0.00019357680953131703, - "loss": 2.1558, + "epoch": 0.81, + "grad_norm": 0.1533203125, + "learning_rate": 0.00014804162541537955, + "loss": 2.1544, "step": 2400 }, { - "epoch": 0.41, - "grad_norm": 0.1865234375, - "learning_rate": 0.0001935245865029316, - "loss": 2.1831, + "epoch": 0.81, + "grad_norm": 0.15234375, + "learning_rate": 0.00014778211747273114, + "loss": 2.1675, "step": 2405 }, { - "epoch": 0.41, - "grad_norm": 0.19921875, - "learning_rate": 0.00019347215913891175, - "loss": 2.1691, + "epoch": 0.82, + "grad_norm": 0.146484375, + "learning_rate": 0.00014752219186785784, + "loss": 2.1267, "step": 2410 }, { - "epoch": 0.41, - "grad_norm": 0.1923828125, - "learning_rate": 0.00019341952755380252, - "loss": 2.1821, + "epoch": 0.82, + "grad_norm": 0.154296875, + "learning_rate": 0.0001472618508727626, + "loss": 2.1661, "step": 2415 }, { - "epoch": 0.41, - "grad_norm": 0.197265625, - "learning_rate": 0.00019336669186259515, - "loss": 2.1822, + "epoch": 0.82, + "grad_norm": 0.154296875, + "learning_rate": 0.00014700109676307914, + "loss": 2.1349, "step": 2420 }, { - "epoch": 0.41, - "grad_norm": 0.19140625, - "learning_rate": 0.00019331365218072682, - "loss": 2.2013, + "epoch": 0.82, + "grad_norm": 0.15625, + "learning_rate": 0.0001467399318180522, + "loss": 2.1568, "step": 2425 }, { - "epoch": 0.41, - "grad_norm": 0.189453125, - "learning_rate": 0.0001932604086240804, - "loss": 2.2009, + "epoch": 0.82, + "grad_norm": 0.154296875, + "learning_rate": 0.0001464783583205177, + "loss": 2.1437, "step": 2430 }, { - "epoch": 0.41, - "grad_norm": 0.2021484375, - "learning_rate": 0.00019320696130898418, - "loss": 2.0917, + "epoch": 0.82, + "grad_norm": 0.1513671875, + "learning_rate": 0.0001462163785568826, + "loss": 2.1418, "step": 2435 }, { - "epoch": 0.41, - "grad_norm": 0.185546875, - "learning_rate": 0.00019315331035221162, - "loss": 2.1562, + "epoch": 0.83, + "grad_norm": 0.1552734375, + "learning_rate": 0.00014595399481710515, + "loss": 2.1296, "step": 2440 }, { - "epoch": 0.41, - "grad_norm": 0.2001953125, - "learning_rate": 0.00019309945587098117, - "loss": 2.1827, + "epoch": 0.83, + "grad_norm": 0.1572265625, + "learning_rate": 0.00014569120939467465, + "loss": 2.1445, "step": 2445 }, { - "epoch": 0.41, - "grad_norm": 0.19921875, - "learning_rate": 0.00019304539798295587, - "loss": 2.1584, + "epoch": 0.83, + "grad_norm": 0.1552734375, + "learning_rate": 0.00014542802458659152, + "loss": 2.1482, "step": 2450 }, { - "epoch": 0.42, - "grad_norm": 0.197265625, - "learning_rate": 0.0001929911368062432, - "loss": 2.158, + "epoch": 0.83, + "grad_norm": 0.1552734375, + "learning_rate": 0.0001451644426933472, + "loss": 2.128, "step": 2455 }, { - "epoch": 0.42, - "grad_norm": 0.193359375, - "learning_rate": 0.00019293667245939475, - "loss": 2.171, + "epoch": 0.83, + "grad_norm": 0.1552734375, + "learning_rate": 0.00014490046601890405, + "loss": 2.1445, "step": 2460 }, { - "epoch": 0.42, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001928820050614061, - "loss": 2.1782, + "epoch": 0.83, + "grad_norm": 0.1513671875, + "learning_rate": 0.00014463609687067526, + "loss": 2.1528, "step": 2465 }, { - "epoch": 0.42, - "grad_norm": 0.1962890625, - "learning_rate": 0.00019282713473171633, - "loss": 2.2018, + "epoch": 0.84, + "grad_norm": 0.1513671875, + "learning_rate": 0.00014437133755950448, + "loss": 2.154, "step": 2470 }, { - "epoch": 0.42, - "grad_norm": 0.193359375, - "learning_rate": 0.00019277206159020805, - "loss": 2.1583, + "epoch": 0.84, + "grad_norm": 0.150390625, + "learning_rate": 0.00014410619039964586, + "loss": 2.1333, "step": 2475 }, { - "epoch": 0.42, - "grad_norm": 0.2138671875, - "learning_rate": 0.00019271678575720683, - "loss": 2.1846, + "epoch": 0.84, + "grad_norm": 0.1552734375, + "learning_rate": 0.00014384065770874373, + "loss": 2.1379, "step": 2480 }, { - "epoch": 0.42, - "grad_norm": 0.1953125, - "learning_rate": 0.00019266130735348118, - "loss": 2.1489, + "epoch": 0.84, + "grad_norm": 0.15625, + "learning_rate": 0.00014357474180781232, + "loss": 2.1293, "step": 2485 }, { - "epoch": 0.42, - "grad_norm": 0.203125, - "learning_rate": 0.0001926056265002422, - "loss": 2.1503, + "epoch": 0.84, + "grad_norm": 0.15625, + "learning_rate": 0.00014330844502121547, + "loss": 2.1311, "step": 2490 }, { - "epoch": 0.42, - "grad_norm": 0.1884765625, - "learning_rate": 0.00019254974331914322, - "loss": 2.1489, + "epoch": 0.85, + "grad_norm": 0.1533203125, + "learning_rate": 0.00014304176967664637, + "loss": 2.1037, "step": 2495 }, { - "epoch": 0.42, - "grad_norm": 0.2001953125, - "learning_rate": 0.00019249365793227966, - "loss": 2.2092, + "epoch": 0.85, + "grad_norm": 0.154296875, + "learning_rate": 0.0001427747181051071, + "loss": 2.127, "step": 2500 }, { - "epoch": 0.42, - "grad_norm": 0.197265625, - "learning_rate": 0.0001924373704621888, - "loss": 2.1788, + "epoch": 0.85, + "grad_norm": 0.1513671875, + "learning_rate": 0.00014250729264088843, + "loss": 2.1565, "step": 2505 }, { - "epoch": 0.43, - "grad_norm": 0.1943359375, - "learning_rate": 0.0001923808810318494, - "loss": 2.1331, + "epoch": 0.85, + "grad_norm": 0.1552734375, + "learning_rate": 0.00014223949562154929, + "loss": 2.1459, "step": 2510 }, { - "epoch": 0.43, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019232418976468133, - "loss": 2.1295, + "epoch": 0.85, + "grad_norm": 0.15234375, + "learning_rate": 0.00014197132938789629, + "loss": 2.1221, "step": 2515 }, { - "epoch": 0.43, - "grad_norm": 0.1953125, - "learning_rate": 0.0001922672967845457, - "loss": 2.1849, + "epoch": 0.85, + "grad_norm": 0.1591796875, + "learning_rate": 0.0001417027962839634, + "loss": 2.15, "step": 2520 }, { - "epoch": 0.43, - "grad_norm": 0.1953125, - "learning_rate": 0.00019221020221574413, - "loss": 2.1991, + "epoch": 0.86, + "grad_norm": 0.1513671875, + "learning_rate": 0.00014143389865699132, + "loss": 2.1567, "step": 2525 }, { - "epoch": 0.43, - "grad_norm": 0.193359375, - "learning_rate": 0.00019215290618301875, - "loss": 2.1679, + "epoch": 0.86, + "grad_norm": 0.154296875, + "learning_rate": 0.00014116463885740723, + "loss": 2.1289, "step": 2530 }, { - "epoch": 0.43, - "grad_norm": 0.1953125, - "learning_rate": 0.00019209540881155176, - "loss": 2.1439, + "epoch": 0.86, + "grad_norm": 0.154296875, + "learning_rate": 0.00014089501923880384, + "loss": 2.1547, "step": 2535 }, { - "epoch": 0.43, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019203771022696547, - "loss": 2.1732, + "epoch": 0.86, + "grad_norm": 0.1533203125, + "learning_rate": 0.00014062504215791905, + "loss": 2.1306, "step": 2540 }, { - "epoch": 0.43, - "grad_norm": 0.19140625, - "learning_rate": 0.00019197981055532156, - "loss": 2.1724, + "epoch": 0.86, + "grad_norm": 0.1552734375, + "learning_rate": 0.00014035470997461548, + "loss": 2.1208, "step": 2545 }, { - "epoch": 0.43, - "grad_norm": 0.1953125, - "learning_rate": 0.00019192170992312125, - "loss": 2.1703, + "epoch": 0.86, + "grad_norm": 0.15625, + "learning_rate": 0.00014008402505185952, + "loss": 2.1112, "step": 2550 }, { - "epoch": 0.43, - "grad_norm": 0.1953125, - "learning_rate": 0.00019186340845730467, - "loss": 2.1369, + "epoch": 0.87, + "grad_norm": 0.154296875, + "learning_rate": 0.000139812989755701, + "loss": 2.1231, "step": 2555 }, { - "epoch": 0.43, - "grad_norm": 0.193359375, - "learning_rate": 0.00019180490628525082, - "loss": 2.1925, + "epoch": 0.87, + "grad_norm": 0.1552734375, + "learning_rate": 0.00013954160645525217, + "loss": 2.1254, "step": 2560 }, { - "epoch": 0.43, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019174620353477724, - "loss": 2.1806, + "epoch": 0.87, + "grad_norm": 0.1591796875, + "learning_rate": 0.00013926987752266735, + "loss": 2.1362, "step": 2565 }, { - "epoch": 0.44, - "grad_norm": 0.1923828125, - "learning_rate": 0.0001916873003341396, - "loss": 2.1636, + "epoch": 0.87, + "grad_norm": 0.1552734375, + "learning_rate": 0.0001389978053331219, + "loss": 2.1495, "step": 2570 }, { - "epoch": 0.44, - "grad_norm": 0.1923828125, - "learning_rate": 0.0001916281968120316, - "loss": 2.1723, + "epoch": 0.87, + "grad_norm": 0.15234375, + "learning_rate": 0.00013872539226479172, + "loss": 2.1389, "step": 2575 }, { - "epoch": 0.44, - "grad_norm": 0.1943359375, - "learning_rate": 0.0001915688930975846, - "loss": 2.1838, + "epoch": 0.87, + "grad_norm": 0.16015625, + "learning_rate": 0.00013845264069883216, + "loss": 2.1305, "step": 2580 }, { - "epoch": 0.44, - "grad_norm": 0.1943359375, - "learning_rate": 0.0001915093893203673, - "loss": 2.1576, + "epoch": 0.88, + "grad_norm": 0.15625, + "learning_rate": 0.00013817955301935743, + "loss": 2.1316, "step": 2585 }, { - "epoch": 0.44, - "grad_norm": 0.189453125, - "learning_rate": 0.00019144968561038558, - "loss": 2.1672, + "epoch": 0.88, + "grad_norm": 0.15234375, + "learning_rate": 0.0001379061316134198, + "loss": 2.1408, "step": 2590 }, { - "epoch": 0.44, - "grad_norm": 0.197265625, - "learning_rate": 0.00019138978209808208, - "loss": 2.1246, + "epoch": 0.88, + "grad_norm": 0.162109375, + "learning_rate": 0.00013763237887098843, + "loss": 2.1299, "step": 2595 }, { - "epoch": 0.44, - "grad_norm": 0.19921875, - "learning_rate": 0.00019132967891433595, - "loss": 2.1887, + "epoch": 0.88, + "grad_norm": 0.154296875, + "learning_rate": 0.0001373582971849289, + "loss": 2.1311, "step": 2600 }, { - "epoch": 0.44, - "grad_norm": 0.201171875, - "learning_rate": 0.00019126937619046267, - "loss": 2.2243, + "epoch": 0.88, + "grad_norm": 0.1552734375, + "learning_rate": 0.00013708388895098192, + "loss": 2.1358, "step": 2605 }, { - "epoch": 0.44, - "grad_norm": 0.2021484375, - "learning_rate": 0.00019120887405821361, - "loss": 2.1627, + "epoch": 0.88, + "grad_norm": 0.15625, + "learning_rate": 0.00013680915656774265, + "loss": 2.1452, "step": 2610 }, { - "epoch": 0.44, - "grad_norm": 0.19140625, - "learning_rate": 0.00019114817264977588, - "loss": 2.1638, + "epoch": 0.89, + "grad_norm": 0.1572265625, + "learning_rate": 0.00013653410243663952, + "loss": 2.1328, "step": 2615 }, { - "epoch": 0.44, - "grad_norm": 0.2021484375, - "learning_rate": 0.00019108727209777196, - "loss": 2.1382, + "epoch": 0.89, + "grad_norm": 0.154296875, + "learning_rate": 0.00013625872896191345, + "loss": 2.1629, "step": 2620 }, { - "epoch": 0.44, - "grad_norm": 0.197265625, - "learning_rate": 0.00019102617253525934, - "loss": 2.1539, + "epoch": 0.89, + "grad_norm": 0.16015625, + "learning_rate": 0.0001359830385505967, + "loss": 2.1212, "step": 2625 }, { - "epoch": 0.45, - "grad_norm": 0.189453125, - "learning_rate": 0.00019096487409573043, - "loss": 2.1688, + "epoch": 0.89, + "grad_norm": 0.1552734375, + "learning_rate": 0.00013570703361249188, + "loss": 2.154, "step": 2630 }, { - "epoch": 0.45, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019090337691311207, - "loss": 2.1974, + "epoch": 0.89, + "grad_norm": 0.158203125, + "learning_rate": 0.00013543071656015084, + "loss": 2.1221, "step": 2635 }, { - "epoch": 0.45, - "grad_norm": 0.1904296875, - "learning_rate": 0.0001908416811217654, - "loss": 2.178, + "epoch": 0.89, + "grad_norm": 0.158203125, + "learning_rate": 0.0001351540898088536, + "loss": 2.1542, "step": 2640 }, { - "epoch": 0.45, - "grad_norm": 0.1953125, - "learning_rate": 0.0001907797868564854, - "loss": 2.1297, + "epoch": 0.9, + "grad_norm": 0.1552734375, + "learning_rate": 0.00013487715577658726, + "loss": 2.1586, "step": 2645 }, { - "epoch": 0.45, - "grad_norm": 0.19140625, - "learning_rate": 0.00019071769425250075, - "loss": 2.161, + "epoch": 0.9, + "grad_norm": 0.150390625, + "learning_rate": 0.00013459991688402492, + "loss": 2.1158, "step": 2650 }, { - "epoch": 0.45, - "grad_norm": 0.1982421875, - "learning_rate": 0.00019065540344547342, - "loss": 2.1568, + "epoch": 0.9, + "grad_norm": 0.154296875, + "learning_rate": 0.00013432237555450444, + "loss": 2.1498, "step": 2655 }, { - "epoch": 0.45, - "grad_norm": 0.2080078125, - "learning_rate": 0.00019059291457149846, - "loss": 2.2083, + "epoch": 0.9, + "grad_norm": 0.154296875, + "learning_rate": 0.00013404453421400714, + "loss": 2.1519, "step": 2660 }, { - "epoch": 0.45, - "grad_norm": 0.2021484375, - "learning_rate": 0.00019053022776710363, - "loss": 2.1752, + "epoch": 0.9, + "grad_norm": 0.1572265625, + "learning_rate": 0.00013376639529113688, + "loss": 2.1233, "step": 2665 }, { - "epoch": 0.45, - "grad_norm": 0.189453125, - "learning_rate": 0.0001904673431692492, - "loss": 2.145, + "epoch": 0.9, + "grad_norm": 0.15234375, + "learning_rate": 0.00013348796121709862, + "loss": 2.1329, "step": 2670 }, { - "epoch": 0.45, - "grad_norm": 0.19140625, - "learning_rate": 0.00019040426091532743, - "loss": 2.1651, + "epoch": 0.91, + "grad_norm": 0.1572265625, + "learning_rate": 0.00013320923442567727, + "loss": 2.1402, "step": 2675 }, { - "epoch": 0.45, - "grad_norm": 0.1943359375, - "learning_rate": 0.00019034098114316264, - "loss": 2.2082, + "epoch": 0.91, + "grad_norm": 0.15234375, + "learning_rate": 0.00013293021735321628, + "loss": 2.1526, "step": 2680 }, { - "epoch": 0.45, - "grad_norm": 0.1982421875, - "learning_rate": 0.00019027750399101053, - "loss": 2.1772, + "epoch": 0.91, + "grad_norm": 0.1591796875, + "learning_rate": 0.00013265091243859652, + "loss": 2.1236, "step": 2685 }, { - "epoch": 0.46, - "grad_norm": 0.201171875, - "learning_rate": 0.00019021382959755808, - "loss": 2.2035, + "epoch": 0.91, + "grad_norm": 0.15625, + "learning_rate": 0.00013237132212321487, + "loss": 2.1443, "step": 2690 }, { - "epoch": 0.46, - "grad_norm": 0.19140625, - "learning_rate": 0.00019014995810192332, - "loss": 2.1952, + "epoch": 0.91, + "grad_norm": 0.1591796875, + "learning_rate": 0.0001320914488509629, + "loss": 2.1559, "step": 2695 }, { - "epoch": 0.46, - "grad_norm": 0.1953125, - "learning_rate": 0.0001900858896436547, - "loss": 2.0956, + "epoch": 0.91, + "grad_norm": 0.1630859375, + "learning_rate": 0.00013181129506820545, + "loss": 2.1195, "step": 2700 }, { - "epoch": 0.46, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001900216243627312, - "loss": 2.1508, + "epoch": 0.92, + "grad_norm": 0.158203125, + "learning_rate": 0.0001315308632237593, + "loss": 2.1201, "step": 2705 }, { - "epoch": 0.46, - "grad_norm": 0.2021484375, - "learning_rate": 0.00018995716239956175, - "loss": 2.2125, + "epoch": 0.92, + "grad_norm": 0.1533203125, + "learning_rate": 0.00013125015576887186, + "loss": 2.1523, "step": 2710 }, { - "epoch": 0.46, - "grad_norm": 0.193359375, - "learning_rate": 0.00018989250389498497, - "loss": 2.15, + "epoch": 0.92, + "grad_norm": 0.15625, + "learning_rate": 0.00013096917515719952, + "loss": 2.1442, "step": 2715 }, { - "epoch": 0.46, - "grad_norm": 0.1953125, - "learning_rate": 0.0001898276489902689, - "loss": 2.1861, + "epoch": 0.92, + "grad_norm": 0.16015625, + "learning_rate": 0.00013068792384478636, + "loss": 2.1488, "step": 2720 }, { - "epoch": 0.46, - "grad_norm": 0.1904296875, - "learning_rate": 0.00018976259782711074, - "loss": 2.1673, + "epoch": 0.92, + "grad_norm": 0.1552734375, + "learning_rate": 0.00013040640429004267, + "loss": 2.1624, "step": 2725 }, { - "epoch": 0.46, - "grad_norm": 0.197265625, - "learning_rate": 0.00018969735054763645, - "loss": 2.1716, + "epoch": 0.92, + "grad_norm": 0.1513671875, + "learning_rate": 0.00013012461895372344, + "loss": 2.1394, "step": 2730 }, { - "epoch": 0.46, - "grad_norm": 0.1923828125, - "learning_rate": 0.0001896319072944004, - "loss": 2.186, + "epoch": 0.93, + "grad_norm": 0.15625, + "learning_rate": 0.00012984257029890683, + "loss": 2.1418, "step": 2735 }, { - "epoch": 0.46, - "grad_norm": 0.197265625, - "learning_rate": 0.00018956626821038522, - "loss": 2.2132, + "epoch": 0.93, + "grad_norm": 0.15625, + "learning_rate": 0.00012956026079097272, + "loss": 2.1476, "step": 2740 }, { - "epoch": 0.46, - "grad_norm": 0.197265625, - "learning_rate": 0.00018950043343900138, - "loss": 2.1679, + "epoch": 0.93, + "grad_norm": 0.1533203125, + "learning_rate": 0.000129277692897581, + "loss": 2.1373, "step": 2745 }, { - "epoch": 0.47, - "grad_norm": 0.1953125, - "learning_rate": 0.0001894344031240869, - "loss": 2.169, + "epoch": 0.93, + "grad_norm": 0.158203125, + "learning_rate": 0.00012899486908865012, + "loss": 2.1379, "step": 2750 }, { - "epoch": 0.47, - "grad_norm": 0.1923828125, - "learning_rate": 0.00018936817740990692, - "loss": 2.1564, + "epoch": 0.93, + "grad_norm": 0.154296875, + "learning_rate": 0.0001287117918363356, + "loss": 2.1418, "step": 2755 }, { - "epoch": 0.47, - "grad_norm": 0.197265625, - "learning_rate": 0.00018930175644115373, - "loss": 2.1463, + "epoch": 0.93, + "grad_norm": 0.1572265625, + "learning_rate": 0.00012842846361500816, + "loss": 2.1108, "step": 2760 }, { - "epoch": 0.47, - "grad_norm": 0.193359375, - "learning_rate": 0.00018923514036294598, - "loss": 2.1655, + "epoch": 0.94, + "grad_norm": 0.1572265625, + "learning_rate": 0.00012814488690123226, + "loss": 2.1407, "step": 2765 }, { - "epoch": 0.47, - "grad_norm": 0.2041015625, - "learning_rate": 0.00018916832932082872, - "loss": 2.1705, + "epoch": 0.94, + "grad_norm": 0.1572265625, + "learning_rate": 0.00012786106417374455, + "loss": 2.1762, "step": 2770 }, { - "epoch": 0.47, - "grad_norm": 0.203125, - "learning_rate": 0.00018910132346077295, - "loss": 2.1628, + "epoch": 0.94, + "grad_norm": 0.158203125, + "learning_rate": 0.00012757699791343186, + "loss": 2.1445, "step": 2775 }, { - "epoch": 0.47, - "grad_norm": 0.19921875, - "learning_rate": 0.0001890341229291753, - "loss": 2.1291, + "epoch": 0.94, + "grad_norm": 0.15625, + "learning_rate": 0.00012729269060330999, + "loss": 2.1306, "step": 2780 }, { - "epoch": 0.47, - "grad_norm": 0.1943359375, - "learning_rate": 0.00018896672787285774, - "loss": 2.1664, + "epoch": 0.94, + "grad_norm": 0.15234375, + "learning_rate": 0.0001270081447285015, + "loss": 2.1432, "step": 2785 }, { - "epoch": 0.47, - "grad_norm": 0.1943359375, - "learning_rate": 0.00018889913843906725, - "loss": 2.1971, + "epoch": 0.94, + "grad_norm": 0.158203125, + "learning_rate": 0.00012672336277621442, + "loss": 2.1351, "step": 2790 }, { - "epoch": 0.47, - "grad_norm": 0.201171875, - "learning_rate": 0.00018883135477547542, - "loss": 2.1711, + "epoch": 0.95, + "grad_norm": 0.16015625, + "learning_rate": 0.0001264383472357202, + "loss": 2.1394, "step": 2795 }, { - "epoch": 0.47, - "grad_norm": 0.19921875, - "learning_rate": 0.0001887633770301783, - "loss": 2.169, + "epoch": 0.95, + "grad_norm": 0.1611328125, + "learning_rate": 0.0001261531005983322, + "loss": 2.1492, "step": 2800 }, { - "epoch": 0.48, - "grad_norm": 0.1962890625, - "learning_rate": 0.00018869520535169597, - "loss": 2.1618, + "epoch": 0.95, + "grad_norm": 0.1572265625, + "learning_rate": 0.00012586762535738374, + "loss": 2.1344, "step": 2805 }, { - "epoch": 0.48, - "grad_norm": 0.1904296875, - "learning_rate": 0.00018862683988897212, - "loss": 2.1426, + "epoch": 0.95, + "grad_norm": 0.158203125, + "learning_rate": 0.0001255819240082063, + "loss": 2.1352, "step": 2810 }, { - "epoch": 0.48, - "grad_norm": 0.2001953125, - "learning_rate": 0.0001885582807913739, - "loss": 2.1659, + "epoch": 0.95, + "grad_norm": 0.158203125, + "learning_rate": 0.00012529599904810784, + "loss": 2.1555, "step": 2815 }, { - "epoch": 0.48, - "grad_norm": 0.19921875, - "learning_rate": 0.00018848952820869154, - "loss": 2.1803, + "epoch": 0.96, + "grad_norm": 0.16015625, + "learning_rate": 0.00012500985297635088, + "loss": 2.1249, "step": 2820 }, { - "epoch": 0.48, - "grad_norm": 0.1923828125, - "learning_rate": 0.00018842058229113796, - "loss": 2.1246, + "epoch": 0.96, + "grad_norm": 0.1591796875, + "learning_rate": 0.00012472348829413064, + "loss": 2.1479, "step": 2825 }, { - "epoch": 0.48, - "grad_norm": 0.19921875, - "learning_rate": 0.00018835144318934854, - "loss": 2.167, + "epoch": 0.96, + "grad_norm": 0.16015625, + "learning_rate": 0.00012443690750455326, + "loss": 2.1159, "step": 2830 }, { - "epoch": 0.48, - "grad_norm": 0.201171875, - "learning_rate": 0.0001882821110543806, - "loss": 2.1674, + "epoch": 0.96, + "grad_norm": 0.1533203125, + "learning_rate": 0.0001241501131126138, + "loss": 2.1667, "step": 2835 }, { - "epoch": 0.48, - "grad_norm": 0.1943359375, - "learning_rate": 0.0001882125860377134, - "loss": 2.147, + "epoch": 0.96, + "grad_norm": 0.1572265625, + "learning_rate": 0.00012386310762517452, + "loss": 2.1339, "step": 2840 }, { - "epoch": 0.48, - "grad_norm": 0.2060546875, - "learning_rate": 0.00018814286829124747, - "loss": 2.1274, + "epoch": 0.96, + "grad_norm": 0.158203125, + "learning_rate": 0.00012357589355094275, + "loss": 2.109, "step": 2845 }, { - "epoch": 0.48, - "grad_norm": 0.1923828125, - "learning_rate": 0.00018807295796730445, - "loss": 2.1769, + "epoch": 0.97, + "grad_norm": 0.15625, + "learning_rate": 0.0001232884734004491, + "loss": 2.1317, "step": 2850 }, { - "epoch": 0.48, - "grad_norm": 0.19921875, - "learning_rate": 0.00018800285521862679, - "loss": 2.1788, + "epoch": 0.97, + "grad_norm": 0.154296875, + "learning_rate": 0.00012300084968602549, + "loss": 2.1117, "step": 2855 }, { - "epoch": 0.48, - "grad_norm": 0.1962890625, - "learning_rate": 0.00018793256019837727, - "loss": 2.1786, + "epoch": 0.97, + "grad_norm": 0.154296875, + "learning_rate": 0.00012271302492178327, + "loss": 2.1378, "step": 2860 }, { - "epoch": 0.49, - "grad_norm": 0.197265625, - "learning_rate": 0.00018786207306013882, - "loss": 2.1968, + "epoch": 0.97, + "grad_norm": 0.1552734375, + "learning_rate": 0.00012242500162359105, + "loss": 2.1222, "step": 2865 }, { - "epoch": 0.49, - "grad_norm": 0.203125, - "learning_rate": 0.00018779139395791407, - "loss": 2.1675, + "epoch": 0.97, + "grad_norm": 0.1591796875, + "learning_rate": 0.00012213678230905284, + "loss": 2.1652, "step": 2870 }, { - "epoch": 0.49, - "grad_norm": 0.1962890625, - "learning_rate": 0.00018772052304612507, - "loss": 2.1596, + "epoch": 0.97, + "grad_norm": 0.154296875, + "learning_rate": 0.00012184836949748608, + "loss": 2.1159, "step": 2875 }, { - "epoch": 0.49, - "grad_norm": 0.2041015625, - "learning_rate": 0.000187649460479613, - "loss": 2.1348, + "epoch": 0.98, + "grad_norm": 0.1611328125, + "learning_rate": 0.00012155976570989949, + "loss": 2.1242, "step": 2880 }, { - "epoch": 0.49, - "grad_norm": 0.193359375, - "learning_rate": 0.0001875782064136377, - "loss": 2.1215, + "epoch": 0.98, + "grad_norm": 0.1591796875, + "learning_rate": 0.0001212709734689712, + "loss": 2.1345, "step": 2885 }, { - "epoch": 0.49, - "grad_norm": 0.19921875, - "learning_rate": 0.00018750676100387742, - "loss": 2.2065, + "epoch": 0.98, + "grad_norm": 0.158203125, + "learning_rate": 0.00012098199529902648, + "loss": 2.1341, "step": 2890 }, { - "epoch": 0.49, - "grad_norm": 0.1943359375, - "learning_rate": 0.00018743512440642845, - "loss": 2.1686, + "epoch": 0.98, + "grad_norm": 0.154296875, + "learning_rate": 0.0001206928337260159, + "loss": 2.1265, "step": 2895 }, { - "epoch": 0.49, - "grad_norm": 0.203125, - "learning_rate": 0.00018736329677780487, - "loss": 2.1854, + "epoch": 0.98, + "grad_norm": 0.158203125, + "learning_rate": 0.00012040349127749313, + "loss": 2.153, "step": 2900 }, { - "epoch": 0.49, - "grad_norm": 0.1904296875, - "learning_rate": 0.00018729127827493805, - "loss": 2.1674, + "epoch": 0.98, + "grad_norm": 0.1552734375, + "learning_rate": 0.00012011397048259285, + "loss": 2.1379, "step": 2905 }, { - "epoch": 0.49, - "grad_norm": 0.1953125, - "learning_rate": 0.0001872190690551764, - "loss": 2.1876, + "epoch": 0.99, + "grad_norm": 0.154296875, + "learning_rate": 0.00011982427387200867, + "loss": 2.1186, "step": 2910 }, { - "epoch": 0.49, - "grad_norm": 0.1982421875, - "learning_rate": 0.00018714666927628504, - "loss": 2.1409, + "epoch": 0.99, + "grad_norm": 0.158203125, + "learning_rate": 0.00011953440397797097, + "loss": 2.1583, "step": 2915 }, { - "epoch": 0.49, - "grad_norm": 0.197265625, - "learning_rate": 0.00018707407909644542, - "loss": 2.1408, + "epoch": 0.99, + "grad_norm": 0.1552734375, + "learning_rate": 0.00011924436333422489, + "loss": 2.1316, "step": 2920 }, { - "epoch": 0.5, - "grad_norm": 0.205078125, - "learning_rate": 0.00018700129867425504, - "loss": 2.2294, + "epoch": 0.99, + "grad_norm": 0.1552734375, + "learning_rate": 0.000118954154476008, + "loss": 2.1223, "step": 2925 }, { - "epoch": 0.5, - "grad_norm": 0.212890625, - "learning_rate": 0.0001869283281687269, - "loss": 2.1731, + "epoch": 0.99, + "grad_norm": 0.1572265625, + "learning_rate": 0.0001186637799400282, + "loss": 2.1399, "step": 2930 }, { - "epoch": 0.5, - "grad_norm": 0.1982421875, - "learning_rate": 0.00018685516773928943, - "loss": 2.1667, + "epoch": 0.99, + "grad_norm": 0.1552734375, + "learning_rate": 0.00011837324226444169, + "loss": 2.1388, "step": 2935 }, { - "epoch": 0.5, - "grad_norm": 0.1982421875, - "learning_rate": 0.00018678181754578602, - "loss": 2.1689, + "epoch": 1.0, + "grad_norm": 0.1552734375, + "learning_rate": 0.00011808254398883056, + "loss": 2.1443, "step": 2940 }, { - "epoch": 0.5, - "grad_norm": 0.1982421875, - "learning_rate": 0.00018670827774847456, - "loss": 2.2155, + "epoch": 1.0, + "grad_norm": 0.1552734375, + "learning_rate": 0.00011779168765418079, + "loss": 2.1314, "step": 2945 }, { - "epoch": 0.5, - "grad_norm": 0.2158203125, - "learning_rate": 0.00018663454850802728, - "loss": 2.1756, + "epoch": 1.0, + "grad_norm": 0.1572265625, + "learning_rate": 0.00011750067580285988, + "loss": 2.1288, "step": 2950 }, { - "epoch": 0.5, - "grad_norm": 0.19921875, - "learning_rate": 0.0001865606299855303, - "loss": 2.1609, + "epoch": 1.0, + "eval_loss": 2.13199520111084, + "eval_runtime": 156.8723, + "eval_samples_per_second": 8.465, + "eval_steps_per_second": 1.058, + "step": 2952 + }, + { + "epoch": 1.0, + "grad_norm": 0.1572265625, + "learning_rate": 0.00011720951097859476, + "loss": 2.1113, "step": 2955 }, { - "epoch": 0.5, - "grad_norm": 0.19140625, - "learning_rate": 0.0001864865223424832, - "loss": 2.1553, + "epoch": 1.0, + "grad_norm": 0.1533203125, + "learning_rate": 0.00011691819572644939, + "loss": 2.0923, "step": 2960 }, { - "epoch": 0.5, - "grad_norm": 0.193359375, - "learning_rate": 0.0001864122257407989, - "loss": 2.1826, + "epoch": 1.0, + "grad_norm": 0.1513671875, + "learning_rate": 0.00011662673259280276, + "loss": 2.1001, "step": 2965 }, { - "epoch": 0.5, - "grad_norm": 0.1884765625, - "learning_rate": 0.00018633774034280306, - "loss": 2.1677, + "epoch": 1.01, + "grad_norm": 0.1572265625, + "learning_rate": 0.00011633512412532637, + "loss": 2.1235, "step": 2970 }, { - "epoch": 0.5, - "grad_norm": 0.203125, - "learning_rate": 0.00018626306631123386, - "loss": 2.156, + "epoch": 1.01, + "grad_norm": 0.158203125, + "learning_rate": 0.0001160433728729621, + "loss": 2.1127, "step": 2975 }, { - "epoch": 0.5, - "grad_norm": 0.19921875, - "learning_rate": 0.00018618820380924165, - "loss": 2.1514, + "epoch": 1.01, + "grad_norm": 0.158203125, + "learning_rate": 0.00011575148138589996, + "loss": 2.1217, "step": 2980 }, { - "epoch": 0.51, - "grad_norm": 0.2041015625, - "learning_rate": 0.00018611315300038847, - "loss": 2.1479, + "epoch": 1.01, + "grad_norm": 0.158203125, + "learning_rate": 0.0001154594522155557, + "loss": 2.1346, "step": 2985 }, { - "epoch": 0.51, - "grad_norm": 0.201171875, - "learning_rate": 0.00018603791404864784, - "loss": 2.1405, + "epoch": 1.01, + "grad_norm": 0.1630859375, + "learning_rate": 0.00011516728791454861, + "loss": 2.1046, "step": 2990 }, { - "epoch": 0.51, - "grad_norm": 0.212890625, - "learning_rate": 0.00018596248711840436, - "loss": 2.1531, + "epoch": 1.01, + "grad_norm": 0.1611328125, + "learning_rate": 0.00011487499103667904, + "loss": 2.0974, "step": 2995 }, { - "epoch": 0.51, - "grad_norm": 0.1982421875, - "learning_rate": 0.0001858868723744533, - "loss": 2.1746, + "epoch": 1.02, + "grad_norm": 0.16015625, + "learning_rate": 0.00011458256413690633, + "loss": 2.0935, "step": 3000 }, { - "epoch": 0.51, - "grad_norm": 0.1953125, - "learning_rate": 0.00018581106998200023, - "loss": 2.1487, + "epoch": 1.02, + "grad_norm": 0.1630859375, + "learning_rate": 0.00011429000977132629, + "loss": 2.1148, "step": 3005 }, { - "epoch": 0.51, - "grad_norm": 0.21484375, - "learning_rate": 0.00018573508010666078, - "loss": 2.2017, + "epoch": 1.02, + "grad_norm": 0.162109375, + "learning_rate": 0.00011399733049714884, + "loss": 2.1048, "step": 3010 }, { - "epoch": 0.51, - "grad_norm": 0.1962890625, - "learning_rate": 0.00018565890291446014, - "loss": 2.1301, + "epoch": 1.02, + "grad_norm": 0.1611328125, + "learning_rate": 0.00011370452887267582, + "loss": 2.0877, "step": 3015 }, { - "epoch": 0.51, - "grad_norm": 0.1982421875, - "learning_rate": 0.00018558253857183277, - "loss": 2.139, + "epoch": 1.02, + "grad_norm": 0.1640625, + "learning_rate": 0.00011341160745727844, + "loss": 2.1225, "step": 3020 }, { - "epoch": 0.51, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001855059872456221, - "loss": 2.1775, + "epoch": 1.02, + "grad_norm": 0.16015625, + "learning_rate": 0.00011311856881137506, + "loss": 2.1125, "step": 3025 }, { - "epoch": 0.51, - "grad_norm": 0.1962890625, - "learning_rate": 0.00018542924910307996, - "loss": 2.1787, + "epoch": 1.03, + "grad_norm": 0.162109375, + "learning_rate": 0.00011282541549640873, + "loss": 2.0913, "step": 3030 }, { - "epoch": 0.51, - "grad_norm": 0.1923828125, - "learning_rate": 0.0001853523243118664, - "loss": 2.14, + "epoch": 1.03, + "grad_norm": 0.1630859375, + "learning_rate": 0.0001125321500748248, + "loss": 2.0832, "step": 3035 }, { - "epoch": 0.51, - "grad_norm": 0.203125, - "learning_rate": 0.00018527521304004932, - "loss": 2.1609, + "epoch": 1.03, + "grad_norm": 0.162109375, + "learning_rate": 0.00011223877511004863, + "loss": 2.1109, "step": 3040 }, { - "epoch": 0.52, - "grad_norm": 0.1943359375, - "learning_rate": 0.00018519791545610392, - "loss": 2.1944, + "epoch": 1.03, + "grad_norm": 0.1591796875, + "learning_rate": 0.00011194529316646293, + "loss": 2.1353, "step": 3045 }, { - "epoch": 0.52, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001851204317289126, - "loss": 2.1888, + "epoch": 1.03, + "grad_norm": 0.1591796875, + "learning_rate": 0.00011165170680938572, + "loss": 2.1065, "step": 3050 }, { - "epoch": 0.52, - "grad_norm": 0.1953125, - "learning_rate": 0.00018504276202776438, - "loss": 2.1624, + "epoch": 1.03, + "grad_norm": 0.1572265625, + "learning_rate": 0.00011135801860504749, + "loss": 2.1287, "step": 3055 }, { - "epoch": 0.52, - "grad_norm": 0.19921875, - "learning_rate": 0.00018496490652235455, - "loss": 2.1327, + "epoch": 1.04, + "grad_norm": 0.162109375, + "learning_rate": 0.00011106423112056911, + "loss": 2.1505, "step": 3060 }, { - "epoch": 0.52, - "grad_norm": 0.2001953125, - "learning_rate": 0.00018488686538278452, - "loss": 2.154, + "epoch": 1.04, + "grad_norm": 0.16015625, + "learning_rate": 0.00011077034692393917, + "loss": 2.1052, "step": 3065 }, { - "epoch": 0.52, - "grad_norm": 0.1875, - "learning_rate": 0.0001848086387795611, - "loss": 2.1481, + "epoch": 1.04, + "grad_norm": 0.1591796875, + "learning_rate": 0.00011047636858399169, + "loss": 2.1197, "step": 3070 }, { - "epoch": 0.52, - "grad_norm": 0.2001953125, - "learning_rate": 0.0001847302268835964, - "loss": 2.1466, + "epoch": 1.04, + "grad_norm": 0.1611328125, + "learning_rate": 0.00011018229867038356, + "loss": 2.086, "step": 3075 }, { - "epoch": 0.52, - "grad_norm": 0.197265625, - "learning_rate": 0.00018465162986620737, - "loss": 2.1797, + "epoch": 1.04, + "grad_norm": 0.1650390625, + "learning_rate": 0.00010988813975357208, + "loss": 2.1272, "step": 3080 }, { - "epoch": 0.52, - "grad_norm": 0.19921875, - "learning_rate": 0.00018457284789911532, - "loss": 2.1701, + "epoch": 1.04, + "grad_norm": 0.1591796875, + "learning_rate": 0.00010959389440479264, + "loss": 2.1135, "step": 3085 }, { - "epoch": 0.52, - "grad_norm": 0.1962890625, - "learning_rate": 0.00018449388115444578, - "loss": 2.1868, + "epoch": 1.05, + "grad_norm": 0.1611328125, + "learning_rate": 0.00010929956519603594, + "loss": 2.1101, "step": 3090 }, { - "epoch": 0.52, - "grad_norm": 0.1982421875, - "learning_rate": 0.00018441472980472795, - "loss": 2.1842, + "epoch": 1.05, + "grad_norm": 0.1650390625, + "learning_rate": 0.00010900515470002595, + "loss": 2.122, "step": 3095 }, { - "epoch": 0.53, - "grad_norm": 0.1982421875, - "learning_rate": 0.00018433539402289427, - "loss": 2.1489, + "epoch": 1.05, + "grad_norm": 0.1630859375, + "learning_rate": 0.00010871066549019688, + "loss": 2.1027, "step": 3100 }, { - "epoch": 0.53, - "grad_norm": 0.2041015625, - "learning_rate": 0.00018425587398228021, - "loss": 2.1478, + "epoch": 1.05, + "grad_norm": 0.162109375, + "learning_rate": 0.0001084161001406712, + "loss": 2.1149, "step": 3105 }, { - "epoch": 0.53, - "grad_norm": 0.2021484375, - "learning_rate": 0.00018417616985662386, - "loss": 2.166, + "epoch": 1.05, + "grad_norm": 0.16015625, + "learning_rate": 0.00010812146122623683, + "loss": 2.0942, "step": 3110 }, { - "epoch": 0.53, - "grad_norm": 0.189453125, - "learning_rate": 0.0001840962818200654, - "loss": 2.1668, + "epoch": 1.06, + "grad_norm": 0.162109375, + "learning_rate": 0.00010782675132232474, + "loss": 2.1331, "step": 3115 }, { - "epoch": 0.53, - "grad_norm": 0.2001953125, - "learning_rate": 0.0001840162100471469, - "loss": 2.1575, + "epoch": 1.06, + "grad_norm": 0.1611328125, + "learning_rate": 0.00010753197300498638, + "loss": 2.1118, "step": 3120 }, { - "epoch": 0.53, - "grad_norm": 0.2060546875, - "learning_rate": 0.00018393595471281182, - "loss": 2.1573, + "epoch": 1.06, + "grad_norm": 0.1650390625, + "learning_rate": 0.00010723712885087123, + "loss": 2.0959, "step": 3125 }, { - "epoch": 0.53, - "grad_norm": 0.1982421875, - "learning_rate": 0.00018385551599240472, - "loss": 2.2463, + "epoch": 1.06, + "grad_norm": 0.162109375, + "learning_rate": 0.00010694222143720423, + "loss": 2.0654, "step": 3130 }, { - "epoch": 0.53, - "grad_norm": 0.1923828125, - "learning_rate": 0.00018377489406167077, - "loss": 2.1743, + "epoch": 1.06, + "grad_norm": 0.1611328125, + "learning_rate": 0.00010664725334176331, + "loss": 2.1195, "step": 3135 }, { - "epoch": 0.53, - "grad_norm": 0.201171875, - "learning_rate": 0.00018369408909675543, - "loss": 2.1865, + "epoch": 1.06, + "grad_norm": 0.166015625, + "learning_rate": 0.00010635222714285676, + "loss": 2.1053, "step": 3140 }, { - "epoch": 0.53, - "grad_norm": 0.2041015625, - "learning_rate": 0.00018361310127420417, - "loss": 2.1548, + "epoch": 1.07, + "grad_norm": 0.1669921875, + "learning_rate": 0.0001060571454193008, + "loss": 2.1174, "step": 3145 }, { - "epoch": 0.53, - "grad_norm": 0.197265625, - "learning_rate": 0.00018353193077096178, - "loss": 2.1521, + "epoch": 1.07, + "grad_norm": 0.1640625, + "learning_rate": 0.00010576201075039696, + "loss": 2.0996, "step": 3150 }, { - "epoch": 0.53, - "grad_norm": 0.2021484375, - "learning_rate": 0.00018345057776437233, - "loss": 2.1347, + "epoch": 1.07, + "grad_norm": 0.1650390625, + "learning_rate": 0.00010546682571590958, + "loss": 2.12, "step": 3155 }, { - "epoch": 0.54, - "grad_norm": 0.1982421875, - "learning_rate": 0.0001833690424321786, - "loss": 2.176, + "epoch": 1.07, + "grad_norm": 0.1640625, + "learning_rate": 0.00010517159289604324, + "loss": 2.1057, "step": 3160 }, { - "epoch": 0.54, - "grad_norm": 0.2001953125, - "learning_rate": 0.00018328732495252167, - "loss": 2.1327, + "epoch": 1.07, + "grad_norm": 0.1630859375, + "learning_rate": 0.00010487631487142017, + "loss": 2.135, "step": 3165 }, { - "epoch": 0.54, - "grad_norm": 0.203125, - "learning_rate": 0.00018320542550394065, - "loss": 2.1949, + "epoch": 1.07, + "grad_norm": 0.162109375, + "learning_rate": 0.00010458099422305785, + "loss": 2.1585, "step": 3170 }, { - "epoch": 0.54, - "grad_norm": 0.19921875, - "learning_rate": 0.00018312334426537214, - "loss": 2.1317, + "epoch": 1.08, + "grad_norm": 0.1669921875, + "learning_rate": 0.0001042856335323462, + "loss": 2.1096, "step": 3175 }, { - "epoch": 0.54, - "grad_norm": 0.2001953125, - "learning_rate": 0.00018304108141615, - "loss": 2.1799, + "epoch": 1.08, + "grad_norm": 0.16015625, + "learning_rate": 0.00010399023538102522, + "loss": 2.1008, "step": 3180 }, { - "epoch": 0.54, - "grad_norm": 0.1943359375, - "learning_rate": 0.0001829586371360048, - "loss": 2.214, + "epoch": 1.08, + "grad_norm": 0.16015625, + "learning_rate": 0.00010369480235116229, + "loss": 2.1124, "step": 3185 }, { - "epoch": 0.54, - "grad_norm": 0.20703125, - "learning_rate": 0.00018287601160506362, - "loss": 2.1478, + "epoch": 1.08, + "grad_norm": 0.1650390625, + "learning_rate": 0.00010339933702512979, + "loss": 2.1561, "step": 3190 }, { - "epoch": 0.54, - "grad_norm": 0.2001953125, - "learning_rate": 0.00018279320500384942, - "loss": 2.1804, + "epoch": 1.08, + "grad_norm": 0.1640625, + "learning_rate": 0.00010310384198558225, + "loss": 2.1253, "step": 3195 }, { - "epoch": 0.54, - "grad_norm": 0.19921875, - "learning_rate": 0.00018271021751328084, - "loss": 2.1779, + "epoch": 1.08, + "grad_norm": 0.16015625, + "learning_rate": 0.00010280831981543405, + "loss": 2.0947, "step": 3200 }, { - "epoch": 0.54, - "grad_norm": 0.19921875, - "learning_rate": 0.00018262704931467174, - "loss": 2.1433, + "epoch": 1.09, + "grad_norm": 0.1640625, + "learning_rate": 0.00010251277309783663, + "loss": 2.0967, "step": 3205 }, { - "epoch": 0.54, - "grad_norm": 0.201171875, - "learning_rate": 0.00018254370058973072, - "loss": 2.1722, + "epoch": 1.09, + "grad_norm": 0.1591796875, + "learning_rate": 0.00010221720441615599, + "loss": 2.1105, "step": 3210 }, { - "epoch": 0.54, - "grad_norm": 0.193359375, - "learning_rate": 0.0001824601715205609, - "loss": 2.133, + "epoch": 1.09, + "grad_norm": 0.1630859375, + "learning_rate": 0.00010192161635395026, + "loss": 2.0713, "step": 3215 }, { - "epoch": 0.55, - "grad_norm": 0.205078125, - "learning_rate": 0.00018237646228965937, - "loss": 2.1461, + "epoch": 1.09, + "grad_norm": 0.1591796875, + "learning_rate": 0.00010162601149494676, + "loss": 2.1249, "step": 3220 }, { - "epoch": 0.55, - "grad_norm": 0.197265625, - "learning_rate": 0.0001822925730799168, - "loss": 2.1887, + "epoch": 1.09, + "grad_norm": 0.1630859375, + "learning_rate": 0.00010133039242301985, + "loss": 2.0996, "step": 3225 }, { - "epoch": 0.55, - "grad_norm": 0.193359375, - "learning_rate": 0.00018220850407461717, - "loss": 2.1615, + "epoch": 1.09, + "grad_norm": 0.1611328125, + "learning_rate": 0.00010103476172216792, + "loss": 2.1181, "step": 3230 }, { - "epoch": 0.55, - "grad_norm": 0.1923828125, - "learning_rate": 0.0001821242554574373, - "loss": 2.1579, + "epoch": 1.1, + "grad_norm": 0.162109375, + "learning_rate": 0.00010073912197649116, + "loss": 2.0768, "step": 3235 }, { - "epoch": 0.55, - "grad_norm": 0.208984375, - "learning_rate": 0.00018203982741244628, - "loss": 2.1899, + "epoch": 1.1, + "grad_norm": 0.162109375, + "learning_rate": 0.0001004434757701688, + "loss": 2.1403, "step": 3240 }, { - "epoch": 0.55, - "grad_norm": 0.208984375, - "learning_rate": 0.00018195522012410536, - "loss": 2.1738, + "epoch": 1.1, + "grad_norm": 0.1630859375, + "learning_rate": 0.00010014782568743641, + "loss": 2.1352, "step": 3245 }, { - "epoch": 0.55, - "grad_norm": 0.21484375, - "learning_rate": 0.00018187043377726735, - "loss": 2.1169, + "epoch": 1.1, + "grad_norm": 0.1630859375, + "learning_rate": 9.98521743125636e-05, + "loss": 2.1234, "step": 3250 }, { - "epoch": 0.55, - "grad_norm": 0.2041015625, - "learning_rate": 0.0001817854685571763, - "loss": 2.1394, + "epoch": 1.1, + "grad_norm": 0.1640625, + "learning_rate": 9.955652422983122e-05, + "loss": 2.1037, "step": 3255 }, { - "epoch": 0.55, - "grad_norm": 0.19921875, - "learning_rate": 0.00018170032464946708, - "loss": 2.1765, + "epoch": 1.1, + "grad_norm": 0.16796875, + "learning_rate": 9.926087802350886e-05, + "loss": 2.116, "step": 3260 }, { - "epoch": 0.55, - "grad_norm": 0.19921875, - "learning_rate": 0.0001816150022401649, - "loss": 2.174, + "epoch": 1.11, + "grad_norm": 0.1650390625, + "learning_rate": 9.896523827783207e-05, + "loss": 2.1135, "step": 3265 }, { - "epoch": 0.55, - "grad_norm": 0.1962890625, - "learning_rate": 0.00018152950151568504, - "loss": 2.1572, + "epoch": 1.11, + "grad_norm": 0.1640625, + "learning_rate": 9.866960757698017e-05, + "loss": 2.1019, "step": 3270 }, { - "epoch": 0.55, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001814438226628323, - "loss": 2.1492, + "epoch": 1.11, + "grad_norm": 0.162109375, + "learning_rate": 9.837398850505324e-05, + "loss": 2.1151, "step": 3275 }, { - "epoch": 0.56, - "grad_norm": 0.2021484375, - "learning_rate": 0.00018135796586880068, - "loss": 2.1177, + "epoch": 1.11, + "grad_norm": 0.162109375, + "learning_rate": 9.807838364604978e-05, + "loss": 2.1192, "step": 3280 }, { - "epoch": 0.56, - "grad_norm": 0.19921875, - "learning_rate": 0.000181271931321173, - "loss": 2.1699, + "epoch": 1.11, + "grad_norm": 0.1611328125, + "learning_rate": 9.7782795583844e-05, + "loss": 2.1127, "step": 3285 }, { - "epoch": 0.56, - "grad_norm": 0.1943359375, - "learning_rate": 0.0001811857192079204, - "loss": 2.1318, + "epoch": 1.11, + "grad_norm": 0.1630859375, + "learning_rate": 9.748722690216341e-05, + "loss": 2.1007, "step": 3290 }, { - "epoch": 0.56, - "grad_norm": 0.197265625, - "learning_rate": 0.000181099329717402, - "loss": 2.1738, + "epoch": 1.12, + "grad_norm": 0.162109375, + "learning_rate": 9.719168018456598e-05, + "loss": 2.1043, "step": 3295 }, { - "epoch": 0.56, - "grad_norm": 0.1953125, - "learning_rate": 0.00018101276303836438, - "loss": 2.1476, + "epoch": 1.12, + "grad_norm": 0.1611328125, + "learning_rate": 9.689615801441774e-05, + "loss": 2.1124, "step": 3300 }, { - "epoch": 0.56, - "grad_norm": 0.1943359375, - "learning_rate": 0.00018092601935994137, - "loss": 2.1671, + "epoch": 1.12, + "grad_norm": 0.1640625, + "learning_rate": 9.660066297487022e-05, + "loss": 2.1261, "step": 3305 }, { - "epoch": 0.56, - "grad_norm": 0.1982421875, - "learning_rate": 0.0001808390988716534, - "loss": 2.1648, + "epoch": 1.12, + "grad_norm": 0.1611328125, + "learning_rate": 9.630519764883772e-05, + "loss": 2.1013, "step": 3310 }, { - "epoch": 0.56, - "grad_norm": 0.197265625, - "learning_rate": 0.0001807520017634073, - "loss": 2.1088, + "epoch": 1.12, + "grad_norm": 0.1640625, + "learning_rate": 9.600976461897483e-05, + "loss": 2.1159, "step": 3315 }, { - "epoch": 0.56, - "grad_norm": 0.1982421875, - "learning_rate": 0.00018066472822549567, - "loss": 2.154, + "epoch": 1.12, + "grad_norm": 0.1650390625, + "learning_rate": 9.571436646765382e-05, + "loss": 2.113, "step": 3320 }, { - "epoch": 0.56, - "grad_norm": 0.208984375, - "learning_rate": 0.00018057727844859672, - "loss": 2.1864, + "epoch": 1.13, + "grad_norm": 0.162109375, + "learning_rate": 9.541900577694217e-05, + "loss": 2.1211, "step": 3325 }, { - "epoch": 0.56, - "grad_norm": 0.205078125, - "learning_rate": 0.00018048965262377358, - "loss": 2.1863, + "epoch": 1.13, + "grad_norm": 0.1611328125, + "learning_rate": 9.512368512857984e-05, + "loss": 2.1061, "step": 3330 }, { - "epoch": 0.56, - "grad_norm": 0.19921875, - "learning_rate": 0.00018040185094247413, - "loss": 2.1081, + "epoch": 1.13, + "grad_norm": 0.162109375, + "learning_rate": 9.482840710395675e-05, + "loss": 2.1036, "step": 3335 }, { - "epoch": 0.57, - "grad_norm": 0.203125, - "learning_rate": 0.00018031387359653035, - "loss": 2.1357, + "epoch": 1.13, + "grad_norm": 0.1650390625, + "learning_rate": 9.453317428409044e-05, + "loss": 2.1096, "step": 3340 }, { - "epoch": 0.57, - "grad_norm": 0.19921875, - "learning_rate": 0.00018022572077815808, - "loss": 2.1908, + "epoch": 1.13, + "grad_norm": 0.166015625, + "learning_rate": 9.423798924960306e-05, + "loss": 2.1065, "step": 3345 }, { - "epoch": 0.57, - "grad_norm": 0.1943359375, - "learning_rate": 0.00018013739267995659, - "loss": 2.1439, + "epoch": 1.13, + "grad_norm": 0.16796875, + "learning_rate": 9.394285458069923e-05, + "loss": 2.1397, "step": 3350 }, { - "epoch": 0.57, - "grad_norm": 0.2021484375, - "learning_rate": 0.00018004888949490802, - "loss": 2.2017, + "epoch": 1.14, + "grad_norm": 0.166015625, + "learning_rate": 9.364777285714324e-05, + "loss": 2.1006, "step": 3355 }, { - "epoch": 0.57, - "grad_norm": 0.19921875, - "learning_rate": 0.00017996021141637709, - "loss": 2.1607, + "epoch": 1.14, + "grad_norm": 0.1650390625, + "learning_rate": 9.33527466582367e-05, + "loss": 2.1188, "step": 3360 }, { - "epoch": 0.57, - "grad_norm": 0.2080078125, - "learning_rate": 0.00017987135863811062, - "loss": 2.1538, + "epoch": 1.14, + "grad_norm": 0.166015625, + "learning_rate": 9.30577785627958e-05, + "loss": 2.1265, "step": 3365 }, { - "epoch": 0.57, - "grad_norm": 0.19921875, - "learning_rate": 0.0001797823313542371, - "loss": 2.1318, + "epoch": 1.14, + "grad_norm": 0.1630859375, + "learning_rate": 9.276287114912878e-05, + "loss": 2.1252, "step": 3370 }, { - "epoch": 0.57, - "grad_norm": 0.203125, - "learning_rate": 0.00017969312975926632, - "loss": 2.1433, + "epoch": 1.14, + "grad_norm": 0.16796875, + "learning_rate": 9.246802699501363e-05, + "loss": 2.1333, "step": 3375 }, { - "epoch": 0.57, - "grad_norm": 0.1923828125, - "learning_rate": 0.0001796037540480889, - "loss": 2.1633, + "epoch": 1.14, + "grad_norm": 0.1630859375, + "learning_rate": 9.217324867767527e-05, + "loss": 2.1119, "step": 3380 }, { - "epoch": 0.57, - "grad_norm": 0.2177734375, - "learning_rate": 0.0001795142044159759, - "loss": 2.1587, + "epoch": 1.15, + "grad_norm": 0.1650390625, + "learning_rate": 9.187853877376318e-05, + "loss": 2.1104, "step": 3385 }, { - "epoch": 0.57, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001794244810585783, - "loss": 2.1575, + "epoch": 1.15, + "grad_norm": 0.1669921875, + "learning_rate": 9.158389985932881e-05, + "loss": 2.1238, "step": 3390 }, { - "epoch": 0.57, - "grad_norm": 0.2041015625, - "learning_rate": 0.00017933458417192672, - "loss": 2.1543, + "epoch": 1.15, + "grad_norm": 0.1640625, + "learning_rate": 9.128933450980314e-05, + "loss": 2.1229, "step": 3395 }, { - "epoch": 0.58, - "grad_norm": 0.19921875, - "learning_rate": 0.00017924451395243086, - "loss": 2.1969, + "epoch": 1.15, + "grad_norm": 0.1611328125, + "learning_rate": 9.099484529997409e-05, + "loss": 2.0859, "step": 3400 }, { - "epoch": 0.58, - "grad_norm": 0.19921875, - "learning_rate": 0.00017915427059687908, - "loss": 2.1322, + "epoch": 1.15, + "grad_norm": 0.166015625, + "learning_rate": 9.070043480396404e-05, + "loss": 2.0925, "step": 3405 }, { - "epoch": 0.58, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017906385430243817, - "loss": 2.1745, + "epoch": 1.15, + "grad_norm": 0.1630859375, + "learning_rate": 9.04061055952074e-05, + "loss": 2.1346, "step": 3410 }, { - "epoch": 0.58, - "grad_norm": 0.205078125, - "learning_rate": 0.0001789732652666526, - "loss": 2.1668, + "epoch": 1.16, + "grad_norm": 0.162109375, + "learning_rate": 9.011186024642793e-05, + "loss": 2.096, "step": 3415 }, { - "epoch": 0.58, - "grad_norm": 0.193359375, - "learning_rate": 0.00017888250368744437, - "loss": 2.1606, + "epoch": 1.16, + "grad_norm": 0.162109375, + "learning_rate": 8.981770132961649e-05, + "loss": 2.1153, "step": 3420 }, { - "epoch": 0.58, - "grad_norm": 0.1962890625, - "learning_rate": 0.00017879156976311234, - "loss": 2.1449, + "epoch": 1.16, + "grad_norm": 0.162109375, + "learning_rate": 8.952363141600834e-05, + "loss": 2.1073, "step": 3425 }, { - "epoch": 0.58, - "grad_norm": 0.2041015625, - "learning_rate": 0.000178700463692332, - "loss": 2.1682, + "epoch": 1.16, + "grad_norm": 0.1728515625, + "learning_rate": 8.922965307606086e-05, + "loss": 2.1085, "step": 3430 }, { - "epoch": 0.58, - "grad_norm": 0.203125, - "learning_rate": 0.00017860918567415496, - "loss": 2.1207, + "epoch": 1.16, + "grad_norm": 0.1630859375, + "learning_rate": 8.893576887943094e-05, + "loss": 2.0935, "step": 3435 }, { - "epoch": 0.58, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017851773590800844, - "loss": 2.1677, + "epoch": 1.17, + "grad_norm": 0.1669921875, + "learning_rate": 8.86419813949525e-05, + "loss": 2.091, "step": 3440 }, { - "epoch": 0.58, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017842611459369497, - "loss": 2.1592, + "epoch": 1.17, + "grad_norm": 0.166015625, + "learning_rate": 8.834829319061431e-05, + "loss": 2.114, "step": 3445 }, { - "epoch": 0.58, - "grad_norm": 0.2060546875, - "learning_rate": 0.0001783343219313918, - "loss": 2.1685, + "epoch": 1.17, + "grad_norm": 0.16796875, + "learning_rate": 8.805470683353708e-05, + "loss": 2.1197, "step": 3450 }, { - "epoch": 0.59, - "grad_norm": 0.228515625, - "learning_rate": 0.0001782423581216507, - "loss": 2.1355, + "epoch": 1.17, + "grad_norm": 0.171875, + "learning_rate": 8.77612248899514e-05, + "loss": 2.1037, "step": 3455 }, { - "epoch": 0.59, - "grad_norm": 0.20703125, - "learning_rate": 0.00017815022336539716, - "loss": 2.1463, + "epoch": 1.17, + "grad_norm": 0.1630859375, + "learning_rate": 8.746784992517518e-05, + "loss": 2.0911, "step": 3460 }, { - "epoch": 0.59, - "grad_norm": 0.1982421875, - "learning_rate": 0.00017805791786393028, - "loss": 2.1927, + "epoch": 1.17, + "grad_norm": 0.1640625, + "learning_rate": 8.71745845035913e-05, + "loss": 2.1016, "step": 3465 }, { - "epoch": 0.59, - "grad_norm": 0.201171875, - "learning_rate": 0.00017796544181892228, - "loss": 2.1393, + "epoch": 1.18, + "grad_norm": 0.1640625, + "learning_rate": 8.688143118862499e-05, + "loss": 2.1213, "step": 3470 }, { - "epoch": 0.59, - "grad_norm": 0.2041015625, - "learning_rate": 0.00017787279543241783, - "loss": 2.1723, + "epoch": 1.18, + "grad_norm": 0.16796875, + "learning_rate": 8.658839254272157e-05, + "loss": 2.1115, "step": 3475 }, { - "epoch": 0.59, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017777997890683385, - "loss": 2.1761, + "epoch": 1.18, + "grad_norm": 0.169921875, + "learning_rate": 8.62954711273242e-05, + "loss": 2.1117, "step": 3480 }, { - "epoch": 0.59, - "grad_norm": 0.2021484375, - "learning_rate": 0.00017768699244495904, - "loss": 2.1744, + "epoch": 1.18, + "grad_norm": 0.162109375, + "learning_rate": 8.600266950285117e-05, + "loss": 2.1494, "step": 3485 }, { - "epoch": 0.59, - "grad_norm": 0.205078125, - "learning_rate": 0.00017759383624995321, - "loss": 2.1923, + "epoch": 1.18, + "grad_norm": 0.1689453125, + "learning_rate": 8.570999022867373e-05, + "loss": 2.1145, "step": 3490 }, { - "epoch": 0.59, - "grad_norm": 0.1962890625, - "learning_rate": 0.00017750051052534724, - "loss": 2.1148, + "epoch": 1.18, + "grad_norm": 0.16796875, + "learning_rate": 8.541743586309365e-05, + "loss": 2.0887, "step": 3495 }, { - "epoch": 0.59, - "grad_norm": 0.201171875, - "learning_rate": 0.0001774070154750422, - "loss": 2.1625, + "epoch": 1.19, + "grad_norm": 0.1650390625, + "learning_rate": 8.512500896332097e-05, + "loss": 2.1073, "step": 3500 }, { - "epoch": 0.59, - "grad_norm": 0.205078125, - "learning_rate": 0.00017731335130330927, - "loss": 2.1456, + "epoch": 1.19, + "grad_norm": 0.16796875, + "learning_rate": 8.483271208545144e-05, + "loss": 2.096, "step": 3505 }, { - "epoch": 0.59, - "grad_norm": 0.1953125, - "learning_rate": 0.00017721951821478898, - "loss": 2.1667, + "epoch": 1.19, + "grad_norm": 0.1689453125, + "learning_rate": 8.454054778444431e-05, + "loss": 2.1306, "step": 3510 }, { - "epoch": 0.6, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017712551641449099, - "loss": 2.2208, + "epoch": 1.19, + "grad_norm": 0.166015625, + "learning_rate": 8.424851861410007e-05, + "loss": 2.0804, "step": 3515 }, { - "epoch": 0.6, - "grad_norm": 0.1953125, - "learning_rate": 0.00017703134610779362, - "loss": 2.1765, + "epoch": 1.19, + "grad_norm": 0.1728515625, + "learning_rate": 8.395662712703793e-05, + "loss": 2.0963, "step": 3520 }, { - "epoch": 0.6, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017693700750044328, - "loss": 2.176, + "epoch": 1.19, + "grad_norm": 0.16796875, + "learning_rate": 8.366487587467368e-05, + "loss": 2.1138, "step": 3525 }, { - "epoch": 0.6, - "grad_norm": 0.1953125, - "learning_rate": 0.0001768425007985541, - "loss": 2.1524, + "epoch": 1.2, + "grad_norm": 0.16796875, + "learning_rate": 8.337326740719726e-05, + "loss": 2.0983, "step": 3530 }, { - "epoch": 0.6, - "grad_norm": 0.197265625, - "learning_rate": 0.00017674782620860744, - "loss": 2.1427, + "epoch": 1.2, + "grad_norm": 0.1650390625, + "learning_rate": 8.308180427355062e-05, + "loss": 2.1253, "step": 3535 }, { - "epoch": 0.6, - "grad_norm": 0.1943359375, - "learning_rate": 0.00017665298393745152, - "loss": 2.1892, + "epoch": 1.2, + "grad_norm": 0.1640625, + "learning_rate": 8.279048902140528e-05, + "loss": 2.1103, "step": 3540 }, { - "epoch": 0.6, - "grad_norm": 0.197265625, - "learning_rate": 0.00017655797419230095, - "loss": 2.1542, + "epoch": 1.2, + "grad_norm": 0.16796875, + "learning_rate": 8.24993241971401e-05, + "loss": 2.1124, "step": 3545 }, { - "epoch": 0.6, - "grad_norm": 0.203125, - "learning_rate": 0.00017646279718073611, - "loss": 2.1891, + "epoch": 1.2, + "grad_norm": 0.1669921875, + "learning_rate": 8.220831234581922e-05, + "loss": 2.1135, "step": 3550 }, { - "epoch": 0.6, - "grad_norm": 0.2109375, - "learning_rate": 0.00017636745311070296, - "loss": 2.1905, + "epoch": 1.2, + "grad_norm": 0.166015625, + "learning_rate": 8.191745601116947e-05, + "loss": 2.1134, "step": 3555 }, { - "epoch": 0.6, - "grad_norm": 0.197265625, - "learning_rate": 0.00017627194219051238, - "loss": 2.164, + "epoch": 1.21, + "grad_norm": 0.16015625, + "learning_rate": 8.162675773555836e-05, + "loss": 2.0978, "step": 3560 }, { - "epoch": 0.6, - "grad_norm": 0.19921875, - "learning_rate": 0.0001761762646288398, - "loss": 2.1426, + "epoch": 1.21, + "grad_norm": 0.162109375, + "learning_rate": 8.133622005997181e-05, + "loss": 2.095, "step": 3565 }, { - "epoch": 0.6, - "grad_norm": 0.2060546875, - "learning_rate": 0.0001760804206347248, - "loss": 2.1479, + "epoch": 1.21, + "grad_norm": 0.1689453125, + "learning_rate": 8.104584552399204e-05, + "loss": 2.1298, "step": 3570 }, { - "epoch": 0.61, - "grad_norm": 0.205078125, - "learning_rate": 0.00017598441041757047, - "loss": 2.1541, + "epoch": 1.21, + "grad_norm": 0.166015625, + "learning_rate": 8.075563666577515e-05, + "loss": 2.1018, "step": 3575 }, { - "epoch": 0.61, - "grad_norm": 0.19921875, - "learning_rate": 0.00017588823418714314, - "loss": 2.1498, + "epoch": 1.21, + "grad_norm": 0.1708984375, + "learning_rate": 8.046559602202901e-05, + "loss": 2.1075, "step": 3580 }, { - "epoch": 0.61, - "grad_norm": 0.205078125, - "learning_rate": 0.00017579189215357187, - "loss": 2.1466, + "epoch": 1.21, + "grad_norm": 0.16796875, + "learning_rate": 8.017572612799135e-05, + "loss": 2.1308, "step": 3585 }, { - "epoch": 0.61, - "grad_norm": 0.203125, - "learning_rate": 0.00017569538452734797, - "loss": 2.1702, + "epoch": 1.22, + "grad_norm": 0.162109375, + "learning_rate": 7.988602951740717e-05, + "loss": 2.1214, "step": 3590 }, { - "epoch": 0.61, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017559871151932448, - "loss": 2.1822, + "epoch": 1.22, + "grad_norm": 0.166015625, + "learning_rate": 7.959650872250688e-05, + "loss": 2.1063, "step": 3595 }, { - "epoch": 0.61, - "grad_norm": 0.201171875, - "learning_rate": 0.0001755018733407158, - "loss": 2.1712, - "step": 3600 + "epoch": 1.22, + "grad_norm": 0.1650390625, + "learning_rate": 7.930716627398412e-05, + "loss": 2.1155, + "step": 3600 }, { - "epoch": 0.61, - "grad_norm": 0.19921875, - "learning_rate": 0.00017540487020309726, - "loss": 2.1469, + "epoch": 1.22, + "grad_norm": 0.1669921875, + "learning_rate": 7.901800470097355e-05, + "loss": 2.1169, "step": 3605 }, { - "epoch": 0.61, - "grad_norm": 0.20703125, - "learning_rate": 0.0001753077023184045, - "loss": 2.1792, + "epoch": 1.22, + "grad_norm": 0.166015625, + "learning_rate": 7.872902653102884e-05, + "loss": 2.0943, "step": 3610 }, { - "epoch": 0.61, - "grad_norm": 0.19921875, - "learning_rate": 0.00017521036989893318, - "loss": 2.1561, + "epoch": 1.22, + "grad_norm": 0.169921875, + "learning_rate": 7.84402342901005e-05, + "loss": 2.1381, "step": 3615 }, { - "epoch": 0.61, - "grad_norm": 0.203125, - "learning_rate": 0.00017511287315733837, - "loss": 2.164, + "epoch": 1.23, + "grad_norm": 0.169921875, + "learning_rate": 7.815163050251395e-05, + "loss": 2.1235, "step": 3620 }, { - "epoch": 0.61, - "grad_norm": 0.2021484375, - "learning_rate": 0.00017501521230663429, - "loss": 2.1957, + "epoch": 1.23, + "grad_norm": 0.1630859375, + "learning_rate": 7.786321769094717e-05, + "loss": 2.1158, "step": 3625 }, { - "epoch": 0.61, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017491738756019357, - "loss": 2.147, + "epoch": 1.23, + "grad_norm": 0.16796875, + "learning_rate": 7.7574998376409e-05, + "loss": 2.107, "step": 3630 }, { - "epoch": 0.62, - "grad_norm": 0.197265625, - "learning_rate": 0.00017481939913174696, - "loss": 2.1521, + "epoch": 1.23, + "grad_norm": 0.1689453125, + "learning_rate": 7.728697507821674e-05, + "loss": 2.1166, "step": 3635 }, { - "epoch": 0.62, - "grad_norm": 0.21875, - "learning_rate": 0.00017472124723538288, - "loss": 2.1614, + "epoch": 1.23, + "grad_norm": 0.1689453125, + "learning_rate": 7.699915031397452e-05, + "loss": 2.1276, "step": 3640 }, { - "epoch": 0.62, - "grad_norm": 0.203125, - "learning_rate": 0.00017462293208554683, - "loss": 2.1617, + "epoch": 1.23, + "grad_norm": 0.1689453125, + "learning_rate": 7.671152659955096e-05, + "loss": 2.1199, "step": 3645 }, { - "epoch": 0.62, - "grad_norm": 0.2080078125, - "learning_rate": 0.00017452445389704106, - "loss": 2.1826, + "epoch": 1.24, + "grad_norm": 0.1630859375, + "learning_rate": 7.642410644905726e-05, + "loss": 2.1144, "step": 3650 }, { - "epoch": 0.62, - "grad_norm": 0.2041015625, - "learning_rate": 0.00017442581288502397, - "loss": 2.1398, + "epoch": 1.24, + "grad_norm": 0.162109375, + "learning_rate": 7.613689237482551e-05, + "loss": 2.1057, "step": 3655 }, { - "epoch": 0.62, - "grad_norm": 0.205078125, - "learning_rate": 0.00017432700926500977, - "loss": 2.1396, + "epoch": 1.24, + "grad_norm": 0.1650390625, + "learning_rate": 7.584988688738622e-05, + "loss": 2.0954, "step": 3660 }, { - "epoch": 0.62, - "grad_norm": 0.20703125, - "learning_rate": 0.00017422804325286788, - "loss": 2.133, + "epoch": 1.24, + "grad_norm": 0.1650390625, + "learning_rate": 7.556309249544678e-05, + "loss": 2.1457, "step": 3665 }, { - "epoch": 0.62, - "grad_norm": 0.2041015625, - "learning_rate": 0.0001741289150648225, - "loss": 2.2083, + "epoch": 1.24, + "grad_norm": 0.16796875, + "learning_rate": 7.527651170586936e-05, + "loss": 2.106, "step": 3670 }, { - "epoch": 0.62, - "grad_norm": 0.2080078125, - "learning_rate": 0.00017402962491745228, - "loss": 2.1073, + "epoch": 1.24, + "grad_norm": 0.1640625, + "learning_rate": 7.499014702364913e-05, + "loss": 2.1119, "step": 3675 }, { - "epoch": 0.62, - "grad_norm": 0.205078125, - "learning_rate": 0.00017393017302768963, - "loss": 2.2294, + "epoch": 1.25, + "grad_norm": 0.166015625, + "learning_rate": 7.470400095189219e-05, + "loss": 2.1395, "step": 3680 }, { - "epoch": 0.62, - "grad_norm": 0.1982421875, - "learning_rate": 0.00017383055961282028, - "loss": 2.1668, + "epoch": 1.25, + "grad_norm": 0.1640625, + "learning_rate": 7.44180759917937e-05, + "loss": 2.0965, "step": 3685 }, { - "epoch": 0.62, - "grad_norm": 0.203125, - "learning_rate": 0.00017373078489048302, - "loss": 2.1639, + "epoch": 1.25, + "grad_norm": 0.169921875, + "learning_rate": 7.413237464261627e-05, + "loss": 2.1036, "step": 3690 }, { - "epoch": 0.63, - "grad_norm": 0.19140625, - "learning_rate": 0.00017363084907866895, - "loss": 2.1752, + "epoch": 1.25, + "grad_norm": 0.1689453125, + "learning_rate": 7.38468994016678e-05, + "loss": 2.1184, "step": 3695 }, { - "epoch": 0.63, - "grad_norm": 0.201171875, - "learning_rate": 0.00017353075239572117, - "loss": 2.1609, + "epoch": 1.25, + "grad_norm": 0.1669921875, + "learning_rate": 7.356165276427983e-05, + "loss": 2.1214, "step": 3700 }, { - "epoch": 0.63, - "grad_norm": 0.2021484375, - "learning_rate": 0.00017343049506033425, - "loss": 2.1996, + "epoch": 1.25, + "grad_norm": 0.1611328125, + "learning_rate": 7.327663722378561e-05, + "loss": 2.0925, "step": 3705 }, { - "epoch": 0.63, - "grad_norm": 0.1982421875, - "learning_rate": 0.00017333007729155377, - "loss": 2.1319, + "epoch": 1.26, + "grad_norm": 0.1640625, + "learning_rate": 7.299185527149853e-05, + "loss": 2.1019, "step": 3710 }, { - "epoch": 0.63, - "grad_norm": 0.208984375, - "learning_rate": 0.00017322949930877583, - "loss": 2.1314, + "epoch": 1.26, + "grad_norm": 0.166015625, + "learning_rate": 7.270730939669006e-05, + "loss": 2.0904, "step": 3715 }, { - "epoch": 0.63, - "grad_norm": 0.2021484375, - "learning_rate": 0.00017312876133174655, - "loss": 2.1361, + "epoch": 1.26, + "grad_norm": 0.166015625, + "learning_rate": 7.242300208656814e-05, + "loss": 2.1251, "step": 3720 }, { - "epoch": 0.63, - "grad_norm": 0.2041015625, - "learning_rate": 0.00017302786358056155, - "loss": 2.1442, + "epoch": 1.26, + "grad_norm": 0.162109375, + "learning_rate": 7.213893582625548e-05, + "loss": 2.1357, "step": 3725 }, { - "epoch": 0.63, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017292680627566568, - "loss": 2.1606, + "epoch": 1.26, + "grad_norm": 0.169921875, + "learning_rate": 7.185511309876775e-05, + "loss": 2.0908, "step": 3730 }, { - "epoch": 0.63, - "grad_norm": 0.2021484375, - "learning_rate": 0.00017282558963785234, - "loss": 2.1756, + "epoch": 1.27, + "grad_norm": 0.166015625, + "learning_rate": 7.157153638499188e-05, + "loss": 2.0689, "step": 3735 }, { - "epoch": 0.63, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001727242138882629, - "loss": 2.1586, + "epoch": 1.27, + "grad_norm": 0.1640625, + "learning_rate": 7.128820816366442e-05, + "loss": 2.1062, "step": 3740 }, { - "epoch": 0.63, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017262267924838658, - "loss": 2.148, + "epoch": 1.27, + "grad_norm": 0.1689453125, + "learning_rate": 7.100513091134989e-05, + "loss": 2.1295, "step": 3745 }, { - "epoch": 0.64, - "grad_norm": 0.1982421875, - "learning_rate": 0.0001725209859400596, - "loss": 2.1351, + "epoch": 1.27, + "grad_norm": 0.173828125, + "learning_rate": 7.072230710241905e-05, + "loss": 2.1384, "step": 3750 }, { - "epoch": 0.64, - "grad_norm": 0.2041015625, - "learning_rate": 0.0001724191341854649, - "loss": 2.1536, + "epoch": 1.27, + "grad_norm": 0.1689453125, + "learning_rate": 7.043973920902729e-05, + "loss": 2.1216, "step": 3755 }, { - "epoch": 0.64, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017231712420713157, - "loss": 2.15, + "epoch": 1.27, + "grad_norm": 0.1640625, + "learning_rate": 7.015742970109317e-05, + "loss": 2.1304, "step": 3760 }, { - "epoch": 0.64, - "grad_norm": 0.197265625, - "learning_rate": 0.00017221495622793444, - "loss": 2.1419, + "epoch": 1.28, + "grad_norm": 0.1630859375, + "learning_rate": 6.98753810462766e-05, + "loss": 2.1098, "step": 3765 }, { - "epoch": 0.64, - "grad_norm": 0.19921875, - "learning_rate": 0.00017211263047109353, - "loss": 2.1394, + "epoch": 1.28, + "grad_norm": 0.16796875, + "learning_rate": 6.959359570995738e-05, + "loss": 2.127, "step": 3770 }, { - "epoch": 0.64, - "grad_norm": 0.208984375, - "learning_rate": 0.00017201014716017348, - "loss": 2.2241, + "epoch": 1.28, + "grad_norm": 0.169921875, + "learning_rate": 6.931207615521366e-05, + "loss": 2.1041, "step": 3775 }, { - "epoch": 0.64, - "grad_norm": 0.1982421875, - "learning_rate": 0.00017190750651908336, - "loss": 2.1367, + "epoch": 1.28, + "grad_norm": 0.16796875, + "learning_rate": 6.903082484280053e-05, + "loss": 2.1141, "step": 3780 }, { - "epoch": 0.64, - "grad_norm": 0.1982421875, - "learning_rate": 0.00017180470877207576, - "loss": 2.1249, + "epoch": 1.28, + "grad_norm": 0.169921875, + "learning_rate": 6.874984423112819e-05, + "loss": 2.1364, "step": 3785 }, { - "epoch": 0.64, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001717017541437467, - "loss": 2.1624, + "epoch": 1.28, + "grad_norm": 0.1708984375, + "learning_rate": 6.84691367762407e-05, + "loss": 2.1172, "step": 3790 }, { - "epoch": 0.64, - "grad_norm": 0.1943359375, - "learning_rate": 0.00017159864285903488, - "loss": 2.1353, + "epoch": 1.29, + "grad_norm": 0.16796875, + "learning_rate": 6.818870493179458e-05, + "loss": 2.0933, "step": 3795 }, { - "epoch": 0.64, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017149537514322123, - "loss": 2.1452, + "epoch": 1.29, + "grad_norm": 0.1650390625, + "learning_rate": 6.790855114903714e-05, + "loss": 2.1207, "step": 3800 }, { - "epoch": 0.64, - "grad_norm": 0.201171875, - "learning_rate": 0.0001713919512219285, - "loss": 2.1418, + "epoch": 1.29, + "grad_norm": 0.1640625, + "learning_rate": 6.762867787678512e-05, + "loss": 2.121, "step": 3805 }, { - "epoch": 0.65, - "grad_norm": 0.2060546875, - "learning_rate": 0.00017128837132112076, - "loss": 2.1446, + "epoch": 1.29, + "grad_norm": 0.1650390625, + "learning_rate": 6.73490875614035e-05, + "loss": 2.1112, "step": 3810 }, { - "epoch": 0.65, - "grad_norm": 0.2041015625, - "learning_rate": 0.00017118463566710284, - "loss": 2.1525, + "epoch": 1.29, + "grad_norm": 0.171875, + "learning_rate": 6.706978264678376e-05, + "loss": 2.1089, "step": 3815 }, { - "epoch": 0.65, - "grad_norm": 0.2041015625, - "learning_rate": 0.00017108074448651976, - "loss": 2.1617, + "epoch": 1.29, + "grad_norm": 0.1708984375, + "learning_rate": 6.679076557432278e-05, + "loss": 2.122, "step": 3820 }, { - "epoch": 0.65, - "grad_norm": 0.201171875, - "learning_rate": 0.00017097669800635653, - "loss": 2.1856, + "epoch": 1.3, + "grad_norm": 0.16796875, + "learning_rate": 6.651203878290139e-05, + "loss": 2.124, "step": 3825 }, { - "epoch": 0.65, - "grad_norm": 0.205078125, - "learning_rate": 0.00017087249645393734, - "loss": 2.1432, + "epoch": 1.3, + "grad_norm": 0.1650390625, + "learning_rate": 6.623360470886314e-05, + "loss": 2.123, "step": 3830 }, { - "epoch": 0.65, - "grad_norm": 0.2041015625, - "learning_rate": 0.00017076814005692522, - "loss": 2.1366, + "epoch": 1.3, + "grad_norm": 0.1669921875, + "learning_rate": 6.59554657859929e-05, + "loss": 2.0984, "step": 3835 }, { - "epoch": 0.65, - "grad_norm": 0.203125, - "learning_rate": 0.0001706636290433215, - "loss": 2.1779, + "epoch": 1.3, + "grad_norm": 0.16796875, + "learning_rate": 6.567762444549558e-05, + "loss": 2.1041, "step": 3840 }, { - "epoch": 0.65, - "grad_norm": 0.2001953125, - "learning_rate": 0.00017055896364146528, - "loss": 2.1282, + "epoch": 1.3, + "grad_norm": 0.1630859375, + "learning_rate": 6.540008311597507e-05, + "loss": 2.1201, "step": 3845 }, { - "epoch": 0.65, - "grad_norm": 0.197265625, - "learning_rate": 0.00017045414408003312, - "loss": 2.1785, + "epoch": 1.3, + "grad_norm": 0.1669921875, + "learning_rate": 6.512284422341275e-05, + "loss": 2.0983, "step": 3850 }, { - "epoch": 0.65, - "grad_norm": 0.2080078125, - "learning_rate": 0.00017034917058803822, - "loss": 2.1825, + "epoch": 1.31, + "grad_norm": 0.166015625, + "learning_rate": 6.484591019114646e-05, + "loss": 2.1121, "step": 3855 }, { - "epoch": 0.65, - "grad_norm": 0.19921875, - "learning_rate": 0.00017024404339483016, - "loss": 2.1743, + "epoch": 1.31, + "grad_norm": 0.1650390625, + "learning_rate": 6.456928343984919e-05, + "loss": 2.1393, "step": 3860 }, { - "epoch": 0.65, - "grad_norm": 0.201171875, - "learning_rate": 0.00017013876273009438, - "loss": 2.1668, + "epoch": 1.31, + "grad_norm": 0.16796875, + "learning_rate": 6.429296638750814e-05, + "loss": 2.1089, "step": 3865 }, { - "epoch": 0.66, - "grad_norm": 0.197265625, - "learning_rate": 0.00017003332882385155, - "loss": 2.1435, + "epoch": 1.31, + "grad_norm": 0.16796875, + "learning_rate": 6.401696144940332e-05, + "loss": 2.0942, "step": 3870 }, { - "epoch": 0.66, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001699277419064572, - "loss": 2.1453, + "epoch": 1.31, + "grad_norm": 0.1669921875, + "learning_rate": 6.374127103808654e-05, + "loss": 2.1108, "step": 3875 }, { - "epoch": 0.66, - "grad_norm": 0.201171875, - "learning_rate": 0.00016982200220860114, - "loss": 2.1617, + "epoch": 1.31, + "grad_norm": 0.1669921875, + "learning_rate": 6.34658975633605e-05, + "loss": 2.1064, "step": 3880 }, { - "epoch": 0.66, - "grad_norm": 0.197265625, - "learning_rate": 0.00016971610996130703, - "loss": 2.1807, + "epoch": 1.32, + "grad_norm": 0.1689453125, + "learning_rate": 6.319084343225738e-05, + "loss": 2.131, "step": 3885 }, { - "epoch": 0.66, - "grad_norm": 0.201171875, - "learning_rate": 0.0001696100653959317, - "loss": 2.1619, + "epoch": 1.32, + "grad_norm": 0.1689453125, + "learning_rate": 6.291611104901812e-05, + "loss": 2.1288, "step": 3890 }, { - "epoch": 0.66, - "grad_norm": 0.203125, - "learning_rate": 0.0001695038687441649, - "loss": 2.1842, + "epoch": 1.32, + "grad_norm": 0.166015625, + "learning_rate": 6.264170281507111e-05, + "loss": 2.1196, "step": 3895 }, { - "epoch": 0.66, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001693975202380286, - "loss": 2.1851, + "epoch": 1.32, + "grad_norm": 0.166015625, + "learning_rate": 6.236762112901158e-05, + "loss": 2.1203, "step": 3900 }, { - "epoch": 0.66, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001692910201098766, - "loss": 2.1786, + "epoch": 1.32, + "grad_norm": 0.1689453125, + "learning_rate": 6.209386838658024e-05, + "loss": 2.1263, "step": 3905 }, { - "epoch": 0.66, - "grad_norm": 0.1982421875, - "learning_rate": 0.00016918436859239387, - "loss": 2.1408, + "epoch": 1.32, + "grad_norm": 0.169921875, + "learning_rate": 6.182044698064256e-05, + "loss": 2.1083, "step": 3910 }, { - "epoch": 0.66, - "grad_norm": 0.205078125, - "learning_rate": 0.00016907756591859628, - "loss": 2.1447, + "epoch": 1.33, + "grad_norm": 0.166015625, + "learning_rate": 6.154735930116786e-05, + "loss": 2.1206, "step": 3915 }, { - "epoch": 0.66, - "grad_norm": 0.2041015625, - "learning_rate": 0.00016897061232182977, - "loss": 2.1707, + "epoch": 1.33, + "grad_norm": 0.169921875, + "learning_rate": 6.12746077352083e-05, + "loss": 2.0995, "step": 3920 }, { - "epoch": 0.66, - "grad_norm": 0.205078125, - "learning_rate": 0.0001688635080357702, - "loss": 2.1928, + "epoch": 1.33, + "grad_norm": 0.1650390625, + "learning_rate": 6.1002194666878106e-05, + "loss": 2.1249, "step": 3925 }, { - "epoch": 0.67, - "grad_norm": 0.2177734375, - "learning_rate": 0.00016875625329442257, - "loss": 2.169, + "epoch": 1.33, + "grad_norm": 0.1669921875, + "learning_rate": 6.0730122477332675e-05, + "loss": 2.1023, "step": 3930 }, { - "epoch": 0.67, - "grad_norm": 0.2001953125, - "learning_rate": 0.0001686488483321206, - "loss": 2.1886, + "epoch": 1.33, + "grad_norm": 0.169921875, + "learning_rate": 6.045839354474786e-05, + "loss": 2.0902, "step": 3935 }, { - "epoch": 0.67, - "grad_norm": 0.205078125, - "learning_rate": 0.00016854129338352624, - "loss": 2.2059, + "epoch": 1.33, + "grad_norm": 0.1630859375, + "learning_rate": 6.0187010244299046e-05, + "loss": 2.0995, "step": 3940 }, { - "epoch": 0.67, - "grad_norm": 0.2060546875, - "learning_rate": 0.00016843358868362914, - "loss": 2.1918, + "epoch": 1.34, + "grad_norm": 0.169921875, + "learning_rate": 5.9915974948140474e-05, + "loss": 2.101, "step": 3945 }, { - "epoch": 0.67, - "grad_norm": 0.1962890625, - "learning_rate": 0.0001683257344677461, - "loss": 2.1218, + "epoch": 1.34, + "grad_norm": 0.1669921875, + "learning_rate": 5.964529002538455e-05, + "loss": 2.0826, "step": 3950 }, { - "epoch": 0.67, - "grad_norm": 0.205078125, - "learning_rate": 0.0001682177309715206, - "loss": 2.1635, + "epoch": 1.34, + "grad_norm": 0.1669921875, + "learning_rate": 5.937495784208096e-05, + "loss": 2.1324, "step": 3955 }, { - "epoch": 0.67, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001681095784309223, - "loss": 2.1986, + "epoch": 1.34, + "grad_norm": 0.171875, + "learning_rate": 5.910498076119622e-05, + "loss": 2.1116, "step": 3960 }, { - "epoch": 0.67, - "grad_norm": 0.19921875, - "learning_rate": 0.00016800127708224648, - "loss": 2.101, + "epoch": 1.34, + "grad_norm": 0.1669921875, + "learning_rate": 5.883536114259277e-05, + "loss": 2.0813, "step": 3965 }, { - "epoch": 0.67, - "grad_norm": 0.197265625, - "learning_rate": 0.0001678928271621135, - "loss": 2.1343, + "epoch": 1.34, + "grad_norm": 0.166015625, + "learning_rate": 5.8566101343008687e-05, + "loss": 2.103, "step": 3970 }, { - "epoch": 0.67, - "grad_norm": 0.205078125, - "learning_rate": 0.0001677842289074684, - "loss": 2.1897, + "epoch": 1.35, + "grad_norm": 0.171875, + "learning_rate": 5.829720371603664e-05, + "loss": 2.1116, "step": 3975 }, { - "epoch": 0.67, - "grad_norm": 0.19921875, - "learning_rate": 0.00016767548255558023, - "loss": 2.1349, + "epoch": 1.35, + "grad_norm": 0.16796875, + "learning_rate": 5.802867061210375e-05, + "loss": 2.1095, "step": 3980 }, { - "epoch": 0.67, - "grad_norm": 0.2109375, - "learning_rate": 0.0001675665883440417, - "loss": 2.1386, + "epoch": 1.35, + "grad_norm": 0.169921875, + "learning_rate": 5.776050437845075e-05, + "loss": 2.1008, "step": 3985 }, { - "epoch": 0.68, - "grad_norm": 0.2021484375, - "learning_rate": 0.00016745754651076848, - "loss": 2.1427, + "epoch": 1.35, + "grad_norm": 0.1708984375, + "learning_rate": 5.749270735911158e-05, + "loss": 2.1075, "step": 3990 }, { - "epoch": 0.68, - "grad_norm": 0.2099609375, - "learning_rate": 0.00016734835729399877, - "loss": 2.1637, + "epoch": 1.35, + "grad_norm": 0.16796875, + "learning_rate": 5.7225281894892935e-05, + "loss": 2.1046, "step": 3995 }, { - "epoch": 0.68, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001672390209322929, - "loss": 2.1205, + "epoch": 1.35, + "grad_norm": 0.166015625, + "learning_rate": 5.695823032335366e-05, + "loss": 2.1118, "step": 4000 }, { - "epoch": 0.68, - "grad_norm": 0.203125, - "learning_rate": 0.0001671295376645325, - "loss": 2.1365, + "epoch": 1.36, + "grad_norm": 0.1689453125, + "learning_rate": 5.669155497878454e-05, + "loss": 2.1185, "step": 4005 }, { - "epoch": 0.68, - "grad_norm": 0.205078125, - "learning_rate": 0.0001670199077299203, - "loss": 2.1636, + "epoch": 1.36, + "grad_norm": 0.1650390625, + "learning_rate": 5.642525819218769e-05, + "loss": 2.0828, "step": 4010 }, { - "epoch": 0.68, - "grad_norm": 0.2021484375, - "learning_rate": 0.00016691013136797947, - "loss": 2.1305, + "epoch": 1.36, + "grad_norm": 0.166015625, + "learning_rate": 5.6159342291256254e-05, + "loss": 2.0953, "step": 4015 }, { - "epoch": 0.68, - "grad_norm": 0.2041015625, - "learning_rate": 0.00016680020881855301, - "loss": 2.1441, + "epoch": 1.36, + "grad_norm": 0.16796875, + "learning_rate": 5.589380960035417e-05, + "loss": 2.1268, "step": 4020 }, { - "epoch": 0.68, - "grad_norm": 0.205078125, - "learning_rate": 0.0001666901403218034, - "loss": 2.1418, + "epoch": 1.36, + "grad_norm": 0.1640625, + "learning_rate": 5.562866244049557e-05, + "loss": 2.1283, "step": 4025 }, { - "epoch": 0.68, - "grad_norm": 0.197265625, - "learning_rate": 0.000166579926118212, - "loss": 2.15, + "epoch": 1.36, + "grad_norm": 0.16796875, + "learning_rate": 5.53639031293248e-05, + "loss": 2.1114, "step": 4030 }, { - "epoch": 0.68, - "grad_norm": 0.205078125, - "learning_rate": 0.00016646956644857837, - "loss": 2.1799, + "epoch": 1.37, + "grad_norm": 0.1728515625, + "learning_rate": 5.509953398109594e-05, + "loss": 2.1172, "step": 4035 }, { - "epoch": 0.68, - "grad_norm": 0.2119140625, - "learning_rate": 0.0001663590615540201, - "loss": 2.1624, + "epoch": 1.37, + "grad_norm": 0.166015625, + "learning_rate": 5.483555730665282e-05, + "loss": 2.0859, "step": 4040 }, { - "epoch": 0.69, - "grad_norm": 0.205078125, - "learning_rate": 0.00016624841167597193, - "loss": 2.1176, + "epoch": 1.37, + "grad_norm": 0.16796875, + "learning_rate": 5.457197541340853e-05, + "loss": 2.1137, "step": 4045 }, { - "epoch": 0.69, - "grad_norm": 0.203125, - "learning_rate": 0.00016613761705618538, - "loss": 2.1614, + "epoch": 1.37, + "grad_norm": 0.1669921875, + "learning_rate": 5.4308790605325364e-05, + "loss": 2.1407, "step": 4050 }, { - "epoch": 0.69, - "grad_norm": 0.203125, - "learning_rate": 0.0001660266779367283, - "loss": 2.1349, + "epoch": 1.37, + "grad_norm": 0.166015625, + "learning_rate": 5.404600518289487e-05, + "loss": 2.1155, "step": 4055 }, { - "epoch": 0.69, - "grad_norm": 0.208984375, - "learning_rate": 0.00016591559455998408, - "loss": 2.1229, + "epoch": 1.38, + "grad_norm": 0.166015625, + "learning_rate": 5.3783621443117414e-05, + "loss": 2.1037, "step": 4060 }, { - "epoch": 0.69, - "grad_norm": 0.203125, - "learning_rate": 0.0001658043671686514, - "loss": 2.1506, + "epoch": 1.38, + "grad_norm": 0.1650390625, + "learning_rate": 5.352164167948233e-05, + "loss": 2.1022, "step": 4065 }, { - "epoch": 0.69, - "grad_norm": 0.208984375, - "learning_rate": 0.00016569299600574365, - "loss": 2.1233, + "epoch": 1.38, + "grad_norm": 0.1689453125, + "learning_rate": 5.326006818194782e-05, + "loss": 2.1037, "step": 4070 }, { - "epoch": 0.69, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001655814813145882, - "loss": 2.151, + "epoch": 1.38, + "grad_norm": 0.1669921875, + "learning_rate": 5.2998903236920895e-05, + "loss": 2.1188, "step": 4075 }, { - "epoch": 0.69, - "grad_norm": 0.2119140625, - "learning_rate": 0.00016546982333882608, - "loss": 2.1246, + "epoch": 1.38, + "grad_norm": 0.171875, + "learning_rate": 5.273814912723742e-05, + "loss": 2.1134, "step": 4080 }, { - "epoch": 0.69, - "grad_norm": 0.2041015625, - "learning_rate": 0.00016535802232241133, - "loss": 2.1752, + "epoch": 1.38, + "grad_norm": 0.169921875, + "learning_rate": 5.247780813214214e-05, + "loss": 2.126, "step": 4085 }, { - "epoch": 0.69, - "grad_norm": 0.203125, - "learning_rate": 0.0001652460785096106, - "loss": 2.1706, + "epoch": 1.39, + "grad_norm": 0.171875, + "learning_rate": 5.221788252726889e-05, + "loss": 2.1114, "step": 4090 }, { - "epoch": 0.69, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001651339921450024, - "loss": 2.1159, + "epoch": 1.39, + "grad_norm": 0.16796875, + "learning_rate": 5.195837458462045e-05, + "loss": 2.1119, "step": 4095 }, { - "epoch": 0.69, - "grad_norm": 0.21484375, - "learning_rate": 0.0001650217634734768, - "loss": 2.1378, + "epoch": 1.39, + "grad_norm": 0.16796875, + "learning_rate": 5.16992865725489e-05, + "loss": 2.1249, "step": 4100 }, { - "epoch": 0.7, - "grad_norm": 0.2060546875, - "learning_rate": 0.0001649093927402347, - "loss": 2.1695, + "epoch": 1.39, + "grad_norm": 0.1669921875, + "learning_rate": 5.14406207557357e-05, + "loss": 2.0831, "step": 4105 }, { - "epoch": 0.7, - "grad_norm": 0.208984375, - "learning_rate": 0.00016479688019078748, - "loss": 2.1548, + "epoch": 1.39, + "grad_norm": 0.1708984375, + "learning_rate": 5.11823793951719e-05, + "loss": 2.1304, "step": 4110 }, { - "epoch": 0.7, - "grad_norm": 0.201171875, - "learning_rate": 0.00016468422607095626, - "loss": 2.1457, + "epoch": 1.39, + "grad_norm": 0.1748046875, + "learning_rate": 5.092456474813841e-05, + "loss": 2.1067, "step": 4115 }, { - "epoch": 0.7, - "grad_norm": 0.2001953125, - "learning_rate": 0.00016457143062687153, - "loss": 2.1345, + "epoch": 1.4, + "grad_norm": 0.1640625, + "learning_rate": 5.066717906818618e-05, + "loss": 2.1206, "step": 4120 }, { - "epoch": 0.7, - "grad_norm": 0.2021484375, - "learning_rate": 0.00016445849410497257, - "loss": 2.1505, + "epoch": 1.4, + "grad_norm": 0.1611328125, + "learning_rate": 5.041022460511673e-05, + "loss": 2.1134, "step": 4125 }, { - "epoch": 0.7, - "grad_norm": 0.2060546875, - "learning_rate": 0.00016434541675200678, - "loss": 2.1584, + "epoch": 1.4, + "grad_norm": 0.169921875, + "learning_rate": 5.015370360496219e-05, + "loss": 2.1385, "step": 4130 }, { - "epoch": 0.7, - "grad_norm": 0.20703125, - "learning_rate": 0.00016423219881502946, - "loss": 2.1538, + "epoch": 1.4, + "grad_norm": 0.166015625, + "learning_rate": 4.989761830996581e-05, + "loss": 2.1043, "step": 4135 }, { - "epoch": 0.7, - "grad_norm": 0.19921875, - "learning_rate": 0.00016411884054140277, - "loss": 2.1481, + "epoch": 1.4, + "grad_norm": 0.1640625, + "learning_rate": 4.9641970958562366e-05, + "loss": 2.1241, "step": 4140 }, { - "epoch": 0.7, - "grad_norm": 0.2080078125, - "learning_rate": 0.00016400534217879574, - "loss": 2.1452, + "epoch": 1.4, + "grad_norm": 0.171875, + "learning_rate": 4.938676378535866e-05, + "loss": 2.1237, "step": 4145 }, { - "epoch": 0.7, - "grad_norm": 0.208984375, - "learning_rate": 0.0001638917039751834, - "loss": 2.1736, + "epoch": 1.41, + "grad_norm": 0.177734375, + "learning_rate": 4.913199902111385e-05, + "loss": 2.1188, "step": 4150 }, { - "epoch": 0.7, - "grad_norm": 0.2021484375, - "learning_rate": 0.00016377792617884625, - "loss": 2.1551, + "epoch": 1.41, + "grad_norm": 0.166015625, + "learning_rate": 4.8877678892719866e-05, + "loss": 2.0727, "step": 4155 }, { - "epoch": 0.7, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001636640090383698, - "loss": 2.1443, + "epoch": 1.41, + "grad_norm": 0.171875, + "learning_rate": 4.862380562318236e-05, + "loss": 2.1085, "step": 4160 }, { - "epoch": 0.71, - "grad_norm": 0.2080078125, - "learning_rate": 0.00016354995280264402, - "loss": 2.1875, + "epoch": 1.41, + "grad_norm": 0.16796875, + "learning_rate": 4.837038143160082e-05, + "loss": 2.1181, "step": 4165 }, { - "epoch": 0.71, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001634357577208628, - "loss": 2.1689, + "epoch": 1.41, + "grad_norm": 0.166015625, + "learning_rate": 4.811740853314939e-05, + "loss": 2.118, "step": 4170 }, { - "epoch": 0.71, - "grad_norm": 0.2001953125, - "learning_rate": 0.00016332142404252333, - "loss": 2.1903, + "epoch": 1.41, + "grad_norm": 0.169921875, + "learning_rate": 4.786488913905745e-05, + "loss": 2.1102, "step": 4175 }, { - "epoch": 0.71, - "grad_norm": 0.2041015625, - "learning_rate": 0.00016320695201742566, - "loss": 2.1529, + "epoch": 1.42, + "grad_norm": 0.169921875, + "learning_rate": 4.7612825456590435e-05, + "loss": 2.0915, "step": 4180 }, { - "epoch": 0.71, - "grad_norm": 0.201171875, - "learning_rate": 0.0001630923418956721, - "loss": 2.1493, + "epoch": 1.42, + "grad_norm": 0.16796875, + "learning_rate": 4.736121968903027e-05, + "loss": 2.1318, "step": 4185 }, { - "epoch": 0.71, - "grad_norm": 0.20703125, - "learning_rate": 0.00016297759392766667, - "loss": 2.1718, + "epoch": 1.42, + "grad_norm": 0.16796875, + "learning_rate": 4.7110074035656316e-05, + "loss": 2.0898, "step": 4190 }, { - "epoch": 0.71, - "grad_norm": 0.20703125, - "learning_rate": 0.0001628627083641145, - "loss": 2.155, + "epoch": 1.42, + "grad_norm": 0.16796875, + "learning_rate": 4.685939069172609e-05, + "loss": 2.1212, "step": 4195 }, { - "epoch": 0.71, - "grad_norm": 0.1953125, - "learning_rate": 0.00016274768545602143, - "loss": 2.1576, + "epoch": 1.42, + "grad_norm": 0.1669921875, + "learning_rate": 4.6609171848456066e-05, + "loss": 2.0966, "step": 4200 }, { - "epoch": 0.71, - "grad_norm": 0.205078125, - "learning_rate": 0.00016263252545469338, - "loss": 2.1408, + "epoch": 1.42, + "grad_norm": 0.1669921875, + "learning_rate": 4.6359419693002534e-05, + "loss": 2.095, "step": 4205 }, { - "epoch": 0.71, - "grad_norm": 0.20703125, - "learning_rate": 0.0001625172286117357, - "loss": 2.1832, + "epoch": 1.43, + "grad_norm": 0.1728515625, + "learning_rate": 4.611013640844245e-05, + "loss": 2.118, "step": 4210 }, { - "epoch": 0.71, - "grad_norm": 0.203125, - "learning_rate": 0.00016240179517905282, - "loss": 2.1728, + "epoch": 1.43, + "grad_norm": 0.169921875, + "learning_rate": 4.5861324173754484e-05, + "loss": 2.1136, "step": 4215 }, { - "epoch": 0.71, - "grad_norm": 0.2001953125, - "learning_rate": 0.00016228622540884755, - "loss": 2.1633, + "epoch": 1.43, + "grad_norm": 0.16796875, + "learning_rate": 4.561298516379974e-05, + "loss": 2.1026, "step": 4220 }, { - "epoch": 0.72, - "grad_norm": 0.2060546875, - "learning_rate": 0.00016217051955362056, - "loss": 2.1659, + "epoch": 1.43, + "grad_norm": 0.166015625, + "learning_rate": 4.5365121549302914e-05, + "loss": 2.0938, "step": 4225 }, { - "epoch": 0.72, - "grad_norm": 0.203125, - "learning_rate": 0.00016205467786616984, - "loss": 2.174, + "epoch": 1.43, + "grad_norm": 0.1689453125, + "learning_rate": 4.5117735496833415e-05, + "loss": 2.1279, "step": 4230 }, { - "epoch": 0.72, - "grad_norm": 0.2138671875, - "learning_rate": 0.0001619387005995902, - "loss": 2.2027, + "epoch": 1.43, + "grad_norm": 0.166015625, + "learning_rate": 4.487082916878606e-05, + "loss": 2.1078, "step": 4235 }, { - "epoch": 0.72, - "grad_norm": 0.2080078125, - "learning_rate": 0.00016182258800727267, - "loss": 2.1338, + "epoch": 1.44, + "grad_norm": 0.1650390625, + "learning_rate": 4.4624404723362576e-05, + "loss": 2.1142, "step": 4240 }, { - "epoch": 0.72, - "grad_norm": 0.2080078125, - "learning_rate": 0.00016170634034290383, - "loss": 2.1211, + "epoch": 1.44, + "grad_norm": 0.1640625, + "learning_rate": 4.437846431455249e-05, + "loss": 2.0946, "step": 4245 }, { - "epoch": 0.72, - "grad_norm": 0.2021484375, - "learning_rate": 0.00016158995786046552, - "loss": 2.1571, + "epoch": 1.44, + "grad_norm": 0.171875, + "learning_rate": 4.4133010092114494e-05, + "loss": 2.1203, "step": 4250 }, { - "epoch": 0.72, - "grad_norm": 0.2041015625, - "learning_rate": 0.00016147344081423402, - "loss": 2.1354, + "epoch": 1.44, + "grad_norm": 0.166015625, + "learning_rate": 4.3888044201557376e-05, + "loss": 2.1113, "step": 4255 }, { - "epoch": 0.72, - "grad_norm": 0.2080078125, - "learning_rate": 0.0001613567894587797, - "loss": 2.2071, + "epoch": 1.44, + "grad_norm": 0.1708984375, + "learning_rate": 4.36435687841215e-05, + "loss": 2.0874, "step": 4260 }, { - "epoch": 0.72, - "grad_norm": 0.20703125, - "learning_rate": 0.00016124000404896632, - "loss": 2.1344, + "epoch": 1.44, + "grad_norm": 0.173828125, + "learning_rate": 4.3399585976760105e-05, + "loss": 2.1055, "step": 4265 }, { - "epoch": 0.72, - "grad_norm": 0.2060546875, - "learning_rate": 0.00016112308483995052, - "loss": 2.1864, + "epoch": 1.45, + "grad_norm": 0.169921875, + "learning_rate": 4.3156097912120385e-05, + "loss": 2.1009, "step": 4270 }, { - "epoch": 0.72, - "grad_norm": 0.203125, - "learning_rate": 0.0001610060320871813, - "loss": 2.1774, + "epoch": 1.45, + "grad_norm": 0.1728515625, + "learning_rate": 4.29131067185251e-05, + "loss": 2.1301, "step": 4275 }, { - "epoch": 0.72, - "grad_norm": 0.2001953125, - "learning_rate": 0.0001608888460463994, - "loss": 2.1568, + "epoch": 1.45, + "grad_norm": 0.1689453125, + "learning_rate": 4.2670614519953834e-05, + "loss": 2.0863, "step": 4280 }, { - "epoch": 0.73, - "grad_norm": 0.2060546875, - "learning_rate": 0.0001607715269736368, - "loss": 2.1271, + "epoch": 1.45, + "grad_norm": 0.169921875, + "learning_rate": 4.242862343602447e-05, + "loss": 2.0793, "step": 4285 }, { - "epoch": 0.73, - "grad_norm": 0.203125, - "learning_rate": 0.00016065407512521612, - "loss": 2.1558, + "epoch": 1.45, + "grad_norm": 0.1689453125, + "learning_rate": 4.21871355819747e-05, + "loss": 2.1121, "step": 4290 }, { - "epoch": 0.73, - "grad_norm": 0.2080078125, - "learning_rate": 0.0001605364907577501, - "loss": 2.1547, + "epoch": 1.45, + "grad_norm": 0.1708984375, + "learning_rate": 4.19461530686434e-05, + "loss": 2.1081, "step": 4295 }, { - "epoch": 0.73, - "grad_norm": 0.2080078125, - "learning_rate": 0.00016041877412814094, - "loss": 2.1729, + "epoch": 1.46, + "grad_norm": 0.177734375, + "learning_rate": 4.170567800245244e-05, + "loss": 2.1194, "step": 4300 }, { - "epoch": 0.73, - "grad_norm": 0.2021484375, - "learning_rate": 0.00016030092549357988, - "loss": 2.191, + "epoch": 1.46, + "grad_norm": 0.169921875, + "learning_rate": 4.1465712485387966e-05, + "loss": 2.1102, "step": 4305 }, { - "epoch": 0.73, - "grad_norm": 0.2001953125, - "learning_rate": 0.00016018294511154654, - "loss": 2.1488, + "epoch": 1.46, + "grad_norm": 0.169921875, + "learning_rate": 4.1226258614982214e-05, + "loss": 2.1125, "step": 4310 }, { - "epoch": 0.73, - "grad_norm": 0.203125, - "learning_rate": 0.00016006483323980844, - "loss": 2.1452, + "epoch": 1.46, + "grad_norm": 0.1708984375, + "learning_rate": 4.0987318484295135e-05, + "loss": 2.103, "step": 4315 }, { - "epoch": 0.73, - "grad_norm": 0.205078125, - "learning_rate": 0.0001599465901364202, - "loss": 2.1807, + "epoch": 1.46, + "grad_norm": 0.1748046875, + "learning_rate": 4.074889418189608e-05, + "loss": 2.1171, "step": 4320 }, { - "epoch": 0.73, - "grad_norm": 0.2041015625, - "learning_rate": 0.00015982821605972346, - "loss": 2.1537, + "epoch": 1.46, + "grad_norm": 0.1640625, + "learning_rate": 4.051098779184559e-05, + "loss": 2.0826, "step": 4325 }, { - "epoch": 0.73, - "grad_norm": 0.1982421875, - "learning_rate": 0.00015970971126834575, - "loss": 2.1796, + "epoch": 1.47, + "grad_norm": 0.1630859375, + "learning_rate": 4.0273601393677064e-05, + "loss": 2.1348, "step": 4330 }, { - "epoch": 0.73, - "grad_norm": 0.205078125, - "learning_rate": 0.00015959107602120032, - "loss": 2.1339, + "epoch": 1.47, + "grad_norm": 0.1708984375, + "learning_rate": 4.0036737062378823e-05, + "loss": 2.1057, "step": 4335 }, { - "epoch": 0.74, - "grad_norm": 0.2021484375, - "learning_rate": 0.00015947231057748535, - "loss": 2.1562, - "step": 4340 + "epoch": 1.47, + "grad_norm": 0.16796875, + "learning_rate": 3.980039686837568e-05, + "loss": 2.1364, + "step": 4340 }, { - "epoch": 0.74, - "grad_norm": 0.2119140625, - "learning_rate": 0.00015935341519668367, - "loss": 2.1585, + "epoch": 1.47, + "grad_norm": 0.171875, + "learning_rate": 3.956458287751097e-05, + "loss": 2.1176, "step": 4345 }, { - "epoch": 0.74, - "grad_norm": 0.203125, - "learning_rate": 0.00015923439013856174, - "loss": 2.1133, + "epoch": 1.47, + "grad_norm": 0.16796875, + "learning_rate": 3.932929715102863e-05, + "loss": 2.0995, "step": 4350 }, { - "epoch": 0.74, - "grad_norm": 0.2001953125, - "learning_rate": 0.00015911523566316954, - "loss": 2.176, + "epoch": 1.48, + "grad_norm": 0.1728515625, + "learning_rate": 3.9094541745554946e-05, + "loss": 2.0963, "step": 4355 }, { - "epoch": 0.74, - "grad_norm": 0.2041015625, - "learning_rate": 0.00015899595203083976, - "loss": 2.1195, + "epoch": 1.48, + "grad_norm": 0.1708984375, + "learning_rate": 3.8860318713080725e-05, + "loss": 2.1128, "step": 4360 }, { - "epoch": 0.74, - "grad_norm": 0.2021484375, - "learning_rate": 0.00015887653950218722, - "loss": 2.1538, + "epoch": 1.48, + "grad_norm": 0.16796875, + "learning_rate": 3.8626630100943196e-05, + "loss": 2.1146, "step": 4365 }, { - "epoch": 0.74, - "grad_norm": 0.2060546875, - "learning_rate": 0.00015875699833810839, - "loss": 2.1617, + "epoch": 1.48, + "grad_norm": 0.169921875, + "learning_rate": 3.8393477951808444e-05, + "loss": 2.1023, "step": 4370 }, { - "epoch": 0.74, - "grad_norm": 0.2021484375, - "learning_rate": 0.00015863732879978082, - "loss": 2.1945, + "epoch": 1.48, + "grad_norm": 0.16796875, + "learning_rate": 3.816086430365321e-05, + "loss": 2.1185, "step": 4375 }, { - "epoch": 0.74, - "grad_norm": 0.2158203125, - "learning_rate": 0.00015851753114866251, - "loss": 2.1321, + "epoch": 1.48, + "grad_norm": 0.1669921875, + "learning_rate": 3.7928791189747195e-05, + "loss": 2.1078, "step": 4380 }, { - "epoch": 0.74, - "grad_norm": 0.208984375, - "learning_rate": 0.0001583976056464913, - "loss": 2.1336, + "epoch": 1.49, + "grad_norm": 0.16796875, + "learning_rate": 3.769726063863541e-05, + "loss": 2.1135, "step": 4385 }, { - "epoch": 0.74, - "grad_norm": 0.2041015625, - "learning_rate": 0.00015827755255528448, - "loss": 2.1547, + "epoch": 1.49, + "grad_norm": 0.1650390625, + "learning_rate": 3.746627467412026e-05, + "loss": 2.1013, "step": 4390 }, { - "epoch": 0.74, - "grad_norm": 0.205078125, - "learning_rate": 0.000158157372137338, - "loss": 2.1544, + "epoch": 1.49, + "grad_norm": 0.1669921875, + "learning_rate": 3.723583531524394e-05, + "loss": 2.115, "step": 4395 }, { - "epoch": 0.75, - "grad_norm": 0.203125, - "learning_rate": 0.00015803706465522614, - "loss": 2.1145, + "epoch": 1.49, + "grad_norm": 0.1689453125, + "learning_rate": 3.700594457627079e-05, + "loss": 2.1374, "step": 4400 }, { - "epoch": 0.75, - "grad_norm": 0.205078125, - "learning_rate": 0.00015791663037180057, - "loss": 2.1527, + "epoch": 1.49, + "grad_norm": 0.1669921875, + "learning_rate": 3.6776604466669686e-05, + "loss": 2.1082, "step": 4405 }, { - "epoch": 0.75, - "grad_norm": 0.208984375, - "learning_rate": 0.0001577960695501902, - "loss": 2.1787, + "epoch": 1.49, + "grad_norm": 0.169921875, + "learning_rate": 3.654781699109645e-05, + "loss": 2.1172, "step": 4410 }, { - "epoch": 0.75, - "grad_norm": 0.205078125, - "learning_rate": 0.00015767538245380037, - "loss": 2.1749, + "epoch": 1.5, + "grad_norm": 0.16796875, + "learning_rate": 3.631958414937633e-05, + "loss": 2.1055, "step": 4415 }, { - "epoch": 0.75, - "grad_norm": 0.2041015625, - "learning_rate": 0.00015755456934631222, - "loss": 2.1307, + "epoch": 1.5, + "grad_norm": 0.166015625, + "learning_rate": 3.609190793648661e-05, + "loss": 2.1365, "step": 4420 }, { - "epoch": 0.75, - "grad_norm": 0.2021484375, - "learning_rate": 0.00015743363049168223, - "loss": 2.1711, + "epoch": 1.5, + "grad_norm": 0.171875, + "learning_rate": 3.586479034253902e-05, + "loss": 2.116, "step": 4425 }, { - "epoch": 0.75, - "grad_norm": 0.201171875, - "learning_rate": 0.00015731256615414166, - "loss": 2.1446, + "epoch": 1.5, + "grad_norm": 0.1689453125, + "learning_rate": 3.563823335276244e-05, + "loss": 2.1005, "step": 4430 }, { - "epoch": 0.75, - "grad_norm": 0.2041015625, - "learning_rate": 0.00015719137659819593, - "loss": 2.1615, + "epoch": 1.5, + "grad_norm": 0.1689453125, + "learning_rate": 3.541223894748553e-05, + "loss": 2.096, "step": 4435 }, { - "epoch": 0.75, - "grad_norm": 0.203125, - "learning_rate": 0.00015707006208862402, - "loss": 2.1711, + "epoch": 1.5, + "grad_norm": 0.1669921875, + "learning_rate": 3.51868091021194e-05, + "loss": 2.1085, "step": 4440 }, { - "epoch": 0.75, - "grad_norm": 0.20703125, - "learning_rate": 0.0001569486228904779, - "loss": 2.1336, + "epoch": 1.51, + "grad_norm": 0.1689453125, + "learning_rate": 3.496194578714036e-05, + "loss": 2.1207, "step": 4445 }, { - "epoch": 0.75, - "grad_norm": 0.2099609375, - "learning_rate": 0.000156827059269082, - "loss": 2.1397, + "epoch": 1.51, + "grad_norm": 0.1689453125, + "learning_rate": 3.473765096807269e-05, + "loss": 2.1179, "step": 4450 }, { - "epoch": 0.75, - "grad_norm": 0.2041015625, - "learning_rate": 0.00015670537149003257, - "loss": 2.1769, + "epoch": 1.51, + "grad_norm": 0.166015625, + "learning_rate": 3.45139266054715e-05, + "loss": 2.1069, "step": 4455 }, { - "epoch": 0.76, - "grad_norm": 0.212890625, - "learning_rate": 0.0001565835598191971, - "loss": 2.14, + "epoch": 1.51, + "grad_norm": 0.16796875, + "learning_rate": 3.429077465490551e-05, + "loss": 2.1214, "step": 4460 }, { - "epoch": 0.76, - "grad_norm": 0.208984375, - "learning_rate": 0.00015646162452271378, - "loss": 2.1609, + "epoch": 1.51, + "grad_norm": 0.1728515625, + "learning_rate": 3.406819706694003e-05, + "loss": 2.0987, "step": 4465 }, { - "epoch": 0.76, - "grad_norm": 0.208984375, - "learning_rate": 0.00015633956586699096, - "loss": 2.1562, + "epoch": 1.51, + "grad_norm": 0.1708984375, + "learning_rate": 3.3846195787119814e-05, + "loss": 2.0997, "step": 4470 }, { - "epoch": 0.76, - "grad_norm": 0.2021484375, - "learning_rate": 0.00015621738411870643, - "loss": 2.1282, + "epoch": 1.52, + "grad_norm": 0.1689453125, + "learning_rate": 3.362477275595225e-05, + "loss": 2.0933, "step": 4475 }, { - "epoch": 0.76, - "grad_norm": 0.20703125, - "learning_rate": 0.00015609507954480697, - "loss": 2.1813, + "epoch": 1.52, + "grad_norm": 0.171875, + "learning_rate": 3.340392990889018e-05, + "loss": 2.0976, "step": 4480 }, { - "epoch": 0.76, - "grad_norm": 0.216796875, - "learning_rate": 0.00015597265241250763, - "loss": 2.1393, + "epoch": 1.52, + "grad_norm": 0.173828125, + "learning_rate": 3.3183669176315045e-05, + "loss": 2.1335, "step": 4485 }, { - "epoch": 0.76, - "grad_norm": 0.208984375, - "learning_rate": 0.00015585010298929138, - "loss": 2.1257, + "epoch": 1.52, + "grad_norm": 0.166015625, + "learning_rate": 3.296399248352012e-05, + "loss": 2.0964, "step": 4490 }, { - "epoch": 0.76, - "grad_norm": 0.203125, - "learning_rate": 0.00015572743154290824, - "loss": 2.1303, + "epoch": 1.52, + "grad_norm": 0.1689453125, + "learning_rate": 3.2744901750693556e-05, + "loss": 2.0953, "step": 4495 }, { - "epoch": 0.76, - "grad_norm": 0.2041015625, - "learning_rate": 0.00015560463834137482, - "loss": 2.1328, + "epoch": 1.52, + "grad_norm": 0.169921875, + "learning_rate": 3.2526398892901654e-05, + "loss": 2.0882, "step": 4500 }, { - "epoch": 0.76, - "grad_norm": 0.2041015625, - "learning_rate": 0.0001554817236529739, - "loss": 2.1419, + "epoch": 1.53, + "grad_norm": 0.169921875, + "learning_rate": 3.2308485820072075e-05, + "loss": 2.1045, "step": 4505 }, { - "epoch": 0.76, - "grad_norm": 0.2021484375, - "learning_rate": 0.00015535868774625353, - "loss": 2.1534, + "epoch": 1.53, + "grad_norm": 0.1669921875, + "learning_rate": 3.2091164436977294e-05, + "loss": 2.109, "step": 4510 }, { - "epoch": 0.76, - "grad_norm": 0.2041015625, - "learning_rate": 0.00015523553089002667, - "loss": 2.1393, + "epoch": 1.53, + "grad_norm": 0.1650390625, + "learning_rate": 3.187443664321773e-05, + "loss": 2.0628, "step": 4515 }, { - "epoch": 0.77, - "grad_norm": 0.2080078125, - "learning_rate": 0.0001551122533533705, - "loss": 2.145, + "epoch": 1.53, + "grad_norm": 0.16796875, + "learning_rate": 3.165830433320531e-05, + "loss": 2.1031, "step": 4520 }, { - "epoch": 0.77, - "grad_norm": 0.2109375, - "learning_rate": 0.00015498885540562597, - "loss": 2.1604, + "epoch": 1.53, + "grad_norm": 0.162109375, + "learning_rate": 3.144276939614683e-05, + "loss": 2.1295, "step": 4525 }, { - "epoch": 0.77, - "grad_norm": 0.1982421875, - "learning_rate": 0.000154865337316397, - "loss": 2.137, + "epoch": 1.53, + "grad_norm": 0.16796875, + "learning_rate": 3.122783371602747e-05, + "loss": 2.1322, "step": 4530 }, { - "epoch": 0.77, - "grad_norm": 0.2099609375, - "learning_rate": 0.00015474169935554994, - "loss": 2.1242, + "epoch": 1.54, + "grad_norm": 0.166015625, + "learning_rate": 3.101349917159433e-05, + "loss": 2.1366, "step": 4535 }, { - "epoch": 0.77, - "grad_norm": 0.208984375, - "learning_rate": 0.00015461794179321323, - "loss": 2.2208, + "epoch": 1.54, + "grad_norm": 0.1669921875, + "learning_rate": 3.079976763633996e-05, + "loss": 2.1136, "step": 4540 }, { - "epoch": 0.77, - "grad_norm": 0.203125, - "learning_rate": 0.0001544940648997765, - "loss": 2.1613, + "epoch": 1.54, + "grad_norm": 0.16796875, + "learning_rate": 3.058664097848612e-05, + "loss": 2.1107, "step": 4545 }, { - "epoch": 0.77, - "grad_norm": 0.20703125, - "learning_rate": 0.00015437006894589007, - "loss": 2.1307, + "epoch": 1.54, + "grad_norm": 0.171875, + "learning_rate": 3.0374121060967255e-05, + "loss": 2.1025, "step": 4550 }, { - "epoch": 0.77, - "grad_norm": 0.205078125, - "learning_rate": 0.00015424595420246448, - "loss": 2.1235, + "epoch": 1.54, + "grad_norm": 0.1669921875, + "learning_rate": 3.0162209741414304e-05, + "loss": 2.1045, "step": 4555 }, { - "epoch": 0.77, - "grad_norm": 0.203125, - "learning_rate": 0.00015412172094066975, - "loss": 2.1515, + "epoch": 1.54, + "grad_norm": 0.1708984375, + "learning_rate": 2.9950908872138584e-05, + "loss": 2.0756, "step": 4560 }, { - "epoch": 0.77, - "grad_norm": 0.2041015625, - "learning_rate": 0.00015399736943193487, - "loss": 2.1534, + "epoch": 1.55, + "grad_norm": 0.1669921875, + "learning_rate": 2.9740220300115386e-05, + "loss": 2.119, "step": 4565 }, { - "epoch": 0.77, - "grad_norm": 0.203125, - "learning_rate": 0.0001538728999479471, - "loss": 2.1222, + "epoch": 1.55, + "grad_norm": 0.171875, + "learning_rate": 2.9530145866967895e-05, + "loss": 2.1415, "step": 4570 }, { - "epoch": 0.77, - "grad_norm": 0.201171875, - "learning_rate": 0.00015374831276065157, - "loss": 2.2067, + "epoch": 1.55, + "grad_norm": 0.1650390625, + "learning_rate": 2.9320687408951162e-05, + "loss": 2.0845, "step": 4575 }, { - "epoch": 0.78, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001536236081422505, - "loss": 2.1823, + "epoch": 1.55, + "grad_norm": 0.1669921875, + "learning_rate": 2.9111846756936113e-05, + "loss": 2.1181, "step": 4580 }, { - "epoch": 0.78, - "grad_norm": 0.203125, - "learning_rate": 0.00015349878636520273, - "loss": 2.1067, + "epoch": 1.55, + "grad_norm": 0.1669921875, + "learning_rate": 2.8903625736393304e-05, + "loss": 2.088, "step": 4585 }, { - "epoch": 0.78, - "grad_norm": 0.20703125, - "learning_rate": 0.00015337384770222295, - "loss": 2.1536, + "epoch": 1.55, + "grad_norm": 0.1689453125, + "learning_rate": 2.8696026167377155e-05, + "loss": 2.1498, "step": 4590 }, { - "epoch": 0.78, - "grad_norm": 0.2080078125, - "learning_rate": 0.00015324879242628145, - "loss": 2.149, + "epoch": 1.56, + "grad_norm": 0.1748046875, + "learning_rate": 2.8489049864510054e-05, + "loss": 2.1044, "step": 4595 }, { - "epoch": 0.78, - "grad_norm": 0.208984375, - "learning_rate": 0.00015312362081060308, - "loss": 2.1436, + "epoch": 1.56, + "grad_norm": 0.1708984375, + "learning_rate": 2.8282698636966375e-05, + "loss": 2.139, "step": 4600 }, { - "epoch": 0.78, - "grad_norm": 0.2080078125, - "learning_rate": 0.00015299833312866696, - "loss": 2.1826, + "epoch": 1.56, + "grad_norm": 0.169921875, + "learning_rate": 2.8076974288456726e-05, + "loss": 2.0717, "step": 4605 }, { - "epoch": 0.78, - "grad_norm": 0.19921875, - "learning_rate": 0.0001528729296542058, - "loss": 2.1255, + "epoch": 1.56, + "grad_norm": 0.171875, + "learning_rate": 2.78718786172122e-05, + "loss": 2.1046, "step": 4610 }, { - "epoch": 0.78, - "grad_norm": 0.2021484375, - "learning_rate": 0.00015274741066120535, - "loss": 2.1766, + "epoch": 1.56, + "grad_norm": 0.169921875, + "learning_rate": 2.7667413415968635e-05, + "loss": 2.0918, "step": 4615 }, { - "epoch": 0.78, - "grad_norm": 0.2197265625, - "learning_rate": 0.0001526217764239036, - "loss": 2.1777, + "epoch": 1.56, + "grad_norm": 0.1689453125, + "learning_rate": 2.7463580471950943e-05, + "loss": 2.1356, "step": 4620 }, { - "epoch": 0.78, - "grad_norm": 0.201171875, - "learning_rate": 0.00015249602721679047, - "loss": 2.1478, + "epoch": 1.57, + "grad_norm": 0.1650390625, + "learning_rate": 2.7260381566857473e-05, + "loss": 2.0996, "step": 4625 }, { - "epoch": 0.78, - "grad_norm": 0.2109375, - "learning_rate": 0.00015237016331460702, - "loss": 2.1484, + "epoch": 1.57, + "grad_norm": 0.166015625, + "learning_rate": 2.7057818476844533e-05, + "loss": 2.0962, "step": 4630 }, { - "epoch": 0.78, - "grad_norm": 0.2001953125, - "learning_rate": 0.00015224418499234488, - "loss": 2.1994, + "epoch": 1.57, + "grad_norm": 0.16796875, + "learning_rate": 2.68558929725107e-05, + "loss": 2.0948, "step": 4635 }, { - "epoch": 0.79, - "grad_norm": 0.2041015625, - "learning_rate": 0.00015211809252524568, - "loss": 2.1161, + "epoch": 1.57, + "grad_norm": 0.166015625, + "learning_rate": 2.6654606818881465e-05, + "loss": 2.1209, "step": 4640 }, { - "epoch": 0.79, - "grad_norm": 0.232421875, - "learning_rate": 0.00015199188618880049, - "loss": 2.1493, + "epoch": 1.57, + "grad_norm": 0.1640625, + "learning_rate": 2.645396177539379e-05, + "loss": 2.1148, "step": 4645 }, { - "epoch": 0.79, - "grad_norm": 0.208984375, - "learning_rate": 0.0001518655662587491, - "loss": 2.1431, + "epoch": 1.57, + "grad_norm": 0.1650390625, + "learning_rate": 2.6253959595880673e-05, + "loss": 2.1074, "step": 4650 }, { - "epoch": 0.79, - "grad_norm": 0.203125, - "learning_rate": 0.0001517391330110795, - "loss": 2.1434, + "epoch": 1.58, + "grad_norm": 0.16796875, + "learning_rate": 2.6054602028555885e-05, + "loss": 2.113, "step": 4655 }, { - "epoch": 0.79, - "grad_norm": 0.2080078125, - "learning_rate": 0.00015161258672202724, - "loss": 2.1252, + "epoch": 1.58, + "grad_norm": 0.16796875, + "learning_rate": 2.585589081599862e-05, + "loss": 2.1115, "step": 4660 }, { - "epoch": 0.79, - "grad_norm": 0.203125, - "learning_rate": 0.0001514859276680749, - "loss": 2.1591, + "epoch": 1.58, + "grad_norm": 0.1689453125, + "learning_rate": 2.5657827695138372e-05, + "loss": 2.1018, "step": 4665 }, { - "epoch": 0.79, - "grad_norm": 0.1982421875, - "learning_rate": 0.0001513591561259514, - "loss": 2.1137, + "epoch": 1.58, + "grad_norm": 0.177734375, + "learning_rate": 2.546041439723963e-05, + "loss": 2.1269, "step": 4670 }, { - "epoch": 0.79, - "grad_norm": 0.2060546875, - "learning_rate": 0.00015123227237263148, - "loss": 2.1636, + "epoch": 1.58, + "grad_norm": 0.1669921875, + "learning_rate": 2.5263652647886803e-05, + "loss": 2.1149, "step": 4675 }, { - "epoch": 0.79, - "grad_norm": 0.2080078125, - "learning_rate": 0.00015110527668533486, - "loss": 2.1489, + "epoch": 1.59, + "grad_norm": 0.16796875, + "learning_rate": 2.5067544166969114e-05, + "loss": 2.1177, "step": 4680 }, { - "epoch": 0.79, - "grad_norm": 0.2060546875, - "learning_rate": 0.0001509781693415261, - "loss": 2.1301, + "epoch": 1.59, + "grad_norm": 0.169921875, + "learning_rate": 2.487209066866565e-05, + "loss": 2.0814, "step": 4685 }, { - "epoch": 0.79, - "grad_norm": 0.203125, - "learning_rate": 0.00015085095061891348, - "loss": 2.1761, + "epoch": 1.59, + "grad_norm": 0.16796875, + "learning_rate": 2.467729386143025e-05, + "loss": 2.106, "step": 4690 }, { - "epoch": 0.8, - "grad_norm": 0.203125, - "learning_rate": 0.0001507236207954487, - "loss": 2.2051, + "epoch": 1.59, + "grad_norm": 0.1689453125, + "learning_rate": 2.4483155447976657e-05, + "loss": 2.1241, "step": 4695 }, { - "epoch": 0.8, - "grad_norm": 0.203125, - "learning_rate": 0.00015059618014932625, - "loss": 2.1436, - "step": 4700 + "epoch": 1.59, + "grad_norm": 0.1708984375, + "learning_rate": 2.42896771252636e-05, + "loss": 2.0944, + "step": 4700 }, { - "epoch": 0.8, - "grad_norm": 0.2119140625, - "learning_rate": 0.00015046862895898267, - "loss": 2.1323, + "epoch": 1.59, + "grad_norm": 0.1669921875, + "learning_rate": 2.4096860584479974e-05, + "loss": 2.1351, "step": 4705 }, { - "epoch": 0.8, - "grad_norm": 0.2060546875, - "learning_rate": 0.00015034096750309609, - "loss": 2.1837, + "epoch": 1.6, + "grad_norm": 0.16796875, + "learning_rate": 2.390470751103008e-05, + "loss": 2.09, "step": 4710 }, { - "epoch": 0.8, - "grad_norm": 0.205078125, - "learning_rate": 0.00015021319606058544, - "loss": 2.1374, + "epoch": 1.6, + "grad_norm": 0.16796875, + "learning_rate": 2.37132195845188e-05, + "loss": 2.1335, "step": 4715 }, { - "epoch": 0.8, - "grad_norm": 0.208984375, - "learning_rate": 0.00015008531491061012, - "loss": 2.1646, + "epoch": 1.6, + "grad_norm": 0.16796875, + "learning_rate": 2.3522398478737108e-05, + "loss": 2.097, "step": 4720 }, { - "epoch": 0.8, - "grad_norm": 0.21484375, - "learning_rate": 0.00014995732433256906, - "loss": 2.1726, + "epoch": 1.6, + "grad_norm": 0.1689453125, + "learning_rate": 2.3332245861647206e-05, + "loss": 2.0869, "step": 4725 }, { - "epoch": 0.8, - "grad_norm": 0.208984375, - "learning_rate": 0.00014982922460610038, - "loss": 2.1688, + "epoch": 1.6, + "grad_norm": 0.1728515625, + "learning_rate": 2.3142763395368095e-05, + "loss": 2.0909, "step": 4730 }, { - "epoch": 0.8, - "grad_norm": 0.205078125, - "learning_rate": 0.00014970101601108059, - "loss": 2.1733, + "epoch": 1.6, + "grad_norm": 0.1669921875, + "learning_rate": 2.295395273616099e-05, + "loss": 2.1025, "step": 4735 }, { - "epoch": 0.8, - "grad_norm": 0.2109375, - "learning_rate": 0.00014957269882762416, - "loss": 2.1179, + "epoch": 1.61, + "grad_norm": 0.1708984375, + "learning_rate": 2.2765815534414868e-05, + "loss": 2.0932, "step": 4740 }, { - "epoch": 0.8, - "grad_norm": 0.21484375, - "learning_rate": 0.0001494442733360827, - "loss": 2.1872, + "epoch": 1.61, + "grad_norm": 0.1630859375, + "learning_rate": 2.257835343463205e-05, + "loss": 2.0842, "step": 4745 }, { - "epoch": 0.8, - "grad_norm": 0.2109375, - "learning_rate": 0.00014931573981704453, - "loss": 2.1705, + "epoch": 1.61, + "grad_norm": 0.1728515625, + "learning_rate": 2.239156807541375e-05, + "loss": 2.1045, "step": 4750 }, { - "epoch": 0.81, - "grad_norm": 0.2119140625, - "learning_rate": 0.00014918709855133396, - "loss": 2.1283, + "epoch": 1.61, + "grad_norm": 0.173828125, + "learning_rate": 2.22054610894459e-05, + "loss": 2.1367, "step": 4755 }, { - "epoch": 0.81, - "grad_norm": 0.208984375, - "learning_rate": 0.00014905834982001075, - "loss": 2.1372, + "epoch": 1.61, + "grad_norm": 0.1806640625, + "learning_rate": 2.202003410348473e-05, + "loss": 2.0936, "step": 4760 }, { - "epoch": 0.81, - "grad_norm": 0.212890625, - "learning_rate": 0.00014892949390436934, - "loss": 2.161, + "epoch": 1.61, + "grad_norm": 0.1669921875, + "learning_rate": 2.1835288738342596e-05, + "loss": 2.1331, "step": 4765 }, { - "epoch": 0.81, - "grad_norm": 0.2060546875, - "learning_rate": 0.0001488005310859385, - "loss": 2.1579, + "epoch": 1.62, + "grad_norm": 0.169921875, + "learning_rate": 2.1651226608873877e-05, + "loss": 2.0986, "step": 4770 }, { - "epoch": 0.81, - "grad_norm": 0.212890625, - "learning_rate": 0.0001486714616464805, - "loss": 2.1786, + "epoch": 1.62, + "grad_norm": 0.16796875, + "learning_rate": 2.1467849323960797e-05, + "loss": 2.1361, "step": 4775 }, { - "epoch": 0.81, - "grad_norm": 0.20703125, - "learning_rate": 0.0001485422858679905, - "loss": 2.1798, + "epoch": 1.62, + "grad_norm": 0.1748046875, + "learning_rate": 2.128515848649929e-05, + "loss": 2.1241, "step": 4780 }, { - "epoch": 0.81, - "grad_norm": 0.205078125, - "learning_rate": 0.0001484130040326961, - "loss": 2.1244, + "epoch": 1.62, + "grad_norm": 0.16796875, + "learning_rate": 2.1103155693385136e-05, + "loss": 2.1371, "step": 4785 }, { - "epoch": 0.81, - "grad_norm": 0.2080078125, - "learning_rate": 0.0001482836164230565, - "loss": 2.1467, + "epoch": 1.62, + "grad_norm": 0.1748046875, + "learning_rate": 2.092184253549998e-05, + "loss": 2.1204, "step": 4790 }, { - "epoch": 0.81, - "grad_norm": 0.205078125, - "learning_rate": 0.00014815412332176212, - "loss": 2.1469, + "epoch": 1.62, + "grad_norm": 0.169921875, + "learning_rate": 2.074122059769733e-05, + "loss": 2.08, "step": 4795 }, { - "epoch": 0.81, - "grad_norm": 0.212890625, - "learning_rate": 0.00014802452501173384, - "loss": 2.1511, + "epoch": 1.63, + "grad_norm": 0.16796875, + "learning_rate": 2.0561291458788733e-05, + "loss": 2.1013, "step": 4800 }, { - "epoch": 0.81, - "grad_norm": 0.2060546875, - "learning_rate": 0.00014789482177612225, - "loss": 2.1176, + "epoch": 1.63, + "grad_norm": 0.171875, + "learning_rate": 2.0382056691530084e-05, + "loss": 2.1382, "step": 4805 }, { - "epoch": 0.81, - "grad_norm": 0.21484375, - "learning_rate": 0.00014776501389830737, - "loss": 2.1606, + "epoch": 1.63, + "grad_norm": 0.16796875, + "learning_rate": 2.02035178626077e-05, + "loss": 2.1155, "step": 4810 }, { - "epoch": 0.82, - "grad_norm": 0.212890625, - "learning_rate": 0.00014763510166189783, - "loss": 2.1423, + "epoch": 1.63, + "grad_norm": 0.166015625, + "learning_rate": 2.002567653262479e-05, + "loss": 2.0965, "step": 4815 }, { - "epoch": 0.82, - "grad_norm": 0.216796875, - "learning_rate": 0.00014750508535073012, - "loss": 2.166, + "epoch": 1.63, + "grad_norm": 0.1640625, + "learning_rate": 1.984853425608769e-05, + "loss": 2.1354, "step": 4820 }, { - "epoch": 0.82, - "grad_norm": 0.2080078125, - "learning_rate": 0.00014737496524886828, - "loss": 2.1404, + "epoch": 1.63, + "grad_norm": 0.169921875, + "learning_rate": 1.9672092581392375e-05, + "loss": 2.1271, "step": 4825 }, { - "epoch": 0.82, - "grad_norm": 0.2021484375, - "learning_rate": 0.00014724474164060298, - "loss": 2.1461, + "epoch": 1.64, + "grad_norm": 0.171875, + "learning_rate": 1.9496353050810843e-05, + "loss": 2.1183, "step": 4830 }, { - "epoch": 0.82, - "grad_norm": 0.2080078125, - "learning_rate": 0.00014711441481045115, - "loss": 2.1584, + "epoch": 1.64, + "grad_norm": 0.1630859375, + "learning_rate": 1.9321317200477653e-05, + "loss": 2.1449, "step": 4835 }, { - "epoch": 0.82, - "grad_norm": 0.2099609375, - "learning_rate": 0.00014698398504315522, - "loss": 2.1381, + "epoch": 1.64, + "grad_norm": 0.16796875, + "learning_rate": 1.91469865603766e-05, + "loss": 2.0944, "step": 4840 }, { - "epoch": 0.82, - "grad_norm": 0.2109375, - "learning_rate": 0.00014685345262368242, - "loss": 2.1385, + "epoch": 1.64, + "grad_norm": 0.16796875, + "learning_rate": 1.8973362654327175e-05, + "loss": 2.1171, "step": 4845 }, { - "epoch": 0.82, - "grad_norm": 0.203125, - "learning_rate": 0.00014672281783722438, - "loss": 2.187, + "epoch": 1.64, + "grad_norm": 0.166015625, + "learning_rate": 1.8800446999971346e-05, + "loss": 2.1076, "step": 4850 }, { - "epoch": 0.82, - "grad_norm": 0.20703125, - "learning_rate": 0.00014659208096919635, - "loss": 2.2096, + "epoch": 1.64, + "grad_norm": 0.1767578125, + "learning_rate": 1.8628241108760268e-05, + "loss": 2.0964, "step": 4855 }, { - "epoch": 0.82, - "grad_norm": 0.2060546875, - "learning_rate": 0.00014646124230523652, - "loss": 2.1409, + "epoch": 1.65, + "grad_norm": 0.169921875, + "learning_rate": 1.845674648594108e-05, + "loss": 2.1312, "step": 4860 }, { - "epoch": 0.82, - "grad_norm": 0.2080078125, - "learning_rate": 0.00014633030213120568, - "loss": 2.1633, + "epoch": 1.65, + "grad_norm": 0.1669921875, + "learning_rate": 1.828596463054375e-05, + "loss": 2.0881, "step": 4865 }, { - "epoch": 0.82, - "grad_norm": 0.2060546875, - "learning_rate": 0.00014619926073318617, - "loss": 2.1757, + "epoch": 1.65, + "grad_norm": 0.1689453125, + "learning_rate": 1.8115897035367934e-05, + "loss": 2.0857, "step": 4870 }, { - "epoch": 0.83, - "grad_norm": 0.2021484375, - "learning_rate": 0.00014606811839748172, - "loss": 2.1703, + "epoch": 1.65, + "grad_norm": 0.1689453125, + "learning_rate": 1.7946545186970022e-05, + "loss": 2.1279, "step": 4875 }, { - "epoch": 0.83, - "grad_norm": 0.2158203125, - "learning_rate": 0.00014593687541061636, - "loss": 2.1715, + "epoch": 1.65, + "grad_norm": 0.1708984375, + "learning_rate": 1.7777910565650024e-05, + "loss": 2.0998, "step": 4880 }, { - "epoch": 0.83, - "grad_norm": 0.2109375, - "learning_rate": 0.00014580553205933422, - "loss": 2.1174, + "epoch": 1.65, + "grad_norm": 0.169921875, + "learning_rate": 1.760999464543869e-05, + "loss": 2.0972, "step": 4885 }, { - "epoch": 0.83, - "grad_norm": 0.2119140625, - "learning_rate": 0.00014567408863059864, - "loss": 2.1588, + "epoch": 1.66, + "grad_norm": 0.1728515625, + "learning_rate": 1.7442798894084655e-05, + "loss": 2.1296, "step": 4890 }, { - "epoch": 0.83, - "grad_norm": 0.20703125, - "learning_rate": 0.00014554254541159154, - "loss": 2.1533, + "epoch": 1.66, + "grad_norm": 0.1689453125, + "learning_rate": 1.7276324773041565e-05, + "loss": 2.1326, "step": 4895 }, { - "epoch": 0.83, - "grad_norm": 0.2109375, - "learning_rate": 0.00014541090268971297, - "loss": 2.1168, + "epoch": 1.66, + "grad_norm": 0.171875, + "learning_rate": 1.7110573737455295e-05, + "loss": 2.1088, "step": 4900 }, { - "epoch": 0.83, - "grad_norm": 0.203125, - "learning_rate": 0.00014527916075258036, - "loss": 2.1413, + "epoch": 1.66, + "grad_norm": 0.166015625, + "learning_rate": 1.694554723615126e-05, + "loss": 2.1011, "step": 4905 }, { - "epoch": 0.83, - "grad_norm": 0.205078125, - "learning_rate": 0.00014514731988802786, - "loss": 2.1658, + "epoch": 1.66, + "grad_norm": 0.16796875, + "learning_rate": 1.6781246711621744e-05, + "loss": 2.1268, "step": 4910 }, { - "epoch": 0.83, - "grad_norm": 0.2060546875, - "learning_rate": 0.00014501538038410574, - "loss": 2.1561, + "epoch": 1.66, + "grad_norm": 0.1650390625, + "learning_rate": 1.6617673600013296e-05, + "loss": 2.1262, "step": 4915 }, { - "epoch": 0.83, - "grad_norm": 0.2041015625, - "learning_rate": 0.00014488334252907992, - "loss": 2.1379, + "epoch": 1.67, + "grad_norm": 0.1689453125, + "learning_rate": 1.645482933111412e-05, + "loss": 2.0923, "step": 4920 }, { - "epoch": 0.83, - "grad_norm": 0.2060546875, - "learning_rate": 0.00014475120661143107, - "loss": 2.131, + "epoch": 1.67, + "grad_norm": 0.16796875, + "learning_rate": 1.6292715328341712e-05, + "loss": 2.0922, "step": 4925 }, { - "epoch": 0.83, - "grad_norm": 0.19921875, - "learning_rate": 0.00014461897291985411, - "loss": 2.1684, + "epoch": 1.67, + "grad_norm": 0.1728515625, + "learning_rate": 1.6131333008730277e-05, + "loss": 2.1354, "step": 4930 }, { - "epoch": 0.84, - "grad_norm": 0.2109375, - "learning_rate": 0.00014448664174325764, - "loss": 2.1809, + "epoch": 1.67, + "grad_norm": 0.1728515625, + "learning_rate": 1.5970683782918374e-05, + "loss": 2.1079, "step": 4935 }, { - "epoch": 0.84, - "grad_norm": 0.21875, - "learning_rate": 0.00014435421337076327, - "loss": 2.1414, + "epoch": 1.67, + "grad_norm": 0.16796875, + "learning_rate": 1.5810769055136644e-05, + "loss": 2.1116, "step": 4940 }, { - "epoch": 0.84, - "grad_norm": 0.205078125, - "learning_rate": 0.00014422168809170486, - "loss": 2.1749, + "epoch": 1.67, + "grad_norm": 0.171875, + "learning_rate": 1.56515902231955e-05, + "loss": 2.1022, "step": 4945 }, { - "epoch": 0.84, - "grad_norm": 0.2158203125, - "learning_rate": 0.00014408906619562808, - "loss": 2.1698, + "epoch": 1.68, + "grad_norm": 0.169921875, + "learning_rate": 1.5493148678472903e-05, + "loss": 2.1269, "step": 4950 }, { - "epoch": 0.84, - "grad_norm": 0.2060546875, - "learning_rate": 0.0001439563479722897, - "loss": 2.1361, + "epoch": 1.68, + "grad_norm": 0.171875, + "learning_rate": 1.533544580590217e-05, + "loss": 2.1085, "step": 4955 }, { - "epoch": 0.84, - "grad_norm": 0.2099609375, - "learning_rate": 0.00014382353371165685, - "loss": 2.1304, + "epoch": 1.68, + "grad_norm": 0.1728515625, + "learning_rate": 1.5178482983959985e-05, + "loss": 2.0965, "step": 4960 }, { - "epoch": 0.84, - "grad_norm": 0.2109375, - "learning_rate": 0.00014369062370390667, - "loss": 2.1559, + "epoch": 1.68, + "grad_norm": 0.169921875, + "learning_rate": 1.5022261584654207e-05, + "loss": 2.129, "step": 4965 }, { - "epoch": 0.84, - "grad_norm": 0.208984375, - "learning_rate": 0.00014355761823942525, - "loss": 2.1343, + "epoch": 1.68, + "grad_norm": 0.1669921875, + "learning_rate": 1.4866782973511962e-05, + "loss": 2.0724, "step": 4970 }, { - "epoch": 0.84, - "grad_norm": 0.208984375, - "learning_rate": 0.0001434245176088074, - "loss": 2.1623, + "epoch": 1.69, + "grad_norm": 0.16796875, + "learning_rate": 1.4712048509567634e-05, + "loss": 2.1429, "step": 4975 }, { - "epoch": 0.84, - "grad_norm": 0.20703125, - "learning_rate": 0.00014329132210285587, - "loss": 2.1498, + "epoch": 1.69, + "grad_norm": 0.16796875, + "learning_rate": 1.4558059545351143e-05, + "loss": 2.1329, "step": 4980 }, { - "epoch": 0.84, - "grad_norm": 0.2138671875, - "learning_rate": 0.00014315803201258058, - "loss": 2.1251, + "epoch": 1.69, + "grad_norm": 0.16796875, + "learning_rate": 1.4404817426875938e-05, + "loss": 2.0845, "step": 4985 }, { - "epoch": 0.85, - "grad_norm": 0.2001953125, - "learning_rate": 0.00014302464762919819, - "loss": 2.1, + "epoch": 1.69, + "grad_norm": 0.1689453125, + "learning_rate": 1.4252323493627251e-05, + "loss": 2.1036, "step": 4990 }, { - "epoch": 0.85, - "grad_norm": 0.2138671875, - "learning_rate": 0.00014289116924413132, - "loss": 2.1501, + "epoch": 1.69, + "grad_norm": 0.169921875, + "learning_rate": 1.4100579078550613e-05, + "loss": 2.0901, "step": 4995 }, { - "epoch": 0.85, - "grad_norm": 0.2001953125, - "learning_rate": 0.00014275759714900806, - "loss": 2.135, + "epoch": 1.69, + "grad_norm": 0.1748046875, + "learning_rate": 1.3949585508039886e-05, + "loss": 2.123, "step": 5000 }, { - "epoch": 0.85, - "grad_norm": 0.2138671875, - "learning_rate": 0.0001426239316356611, - "loss": 2.1379, + "epoch": 1.7, + "grad_norm": 0.1669921875, + "learning_rate": 1.3799344101925904e-05, + "loss": 2.1344, "step": 5005 }, { - "epoch": 0.85, - "grad_norm": 0.208984375, - "learning_rate": 0.00014249017299612735, - "loss": 2.1039, + "epoch": 1.7, + "grad_norm": 0.177734375, + "learning_rate": 1.3649856173464781e-05, + "loss": 2.1068, "step": 5010 }, { - "epoch": 0.85, - "grad_norm": 0.2060546875, - "learning_rate": 0.00014235632152264716, - "loss": 2.1887, + "epoch": 1.7, + "grad_norm": 0.1669921875, + "learning_rate": 1.3501123029326601e-05, + "loss": 2.1274, "step": 5015 }, { - "epoch": 0.85, - "grad_norm": 0.208984375, - "learning_rate": 0.00014222237750766365, - "loss": 2.1571, + "epoch": 1.7, + "grad_norm": 0.169921875, + "learning_rate": 1.3353145969583813e-05, + "loss": 2.1117, "step": 5020 }, { - "epoch": 0.85, - "grad_norm": 0.216796875, - "learning_rate": 0.0001420883412438222, - "loss": 2.1553, + "epoch": 1.7, + "grad_norm": 0.1669921875, + "learning_rate": 1.3205926287699988e-05, + "loss": 2.114, "step": 5025 }, { - "epoch": 0.85, - "grad_norm": 0.20703125, - "learning_rate": 0.00014195421302396968, - "loss": 2.1225, + "epoch": 1.7, + "grad_norm": 0.1728515625, + "learning_rate": 1.3059465270518468e-05, + "loss": 2.113, "step": 5030 }, { - "epoch": 0.85, - "grad_norm": 0.2109375, - "learning_rate": 0.00014181999314115393, - "loss": 2.151, + "epoch": 1.71, + "grad_norm": 0.16796875, + "learning_rate": 1.2913764198251132e-05, + "loss": 2.1568, "step": 5035 }, { - "epoch": 0.85, - "grad_norm": 0.205078125, - "learning_rate": 0.000141685681888623, - "loss": 2.1276, + "epoch": 1.71, + "grad_norm": 0.1708984375, + "learning_rate": 1.276882434446719e-05, + "loss": 2.1104, "step": 5040 }, { - "epoch": 0.85, - "grad_norm": 0.20703125, - "learning_rate": 0.0001415512795598246, - "loss": 2.162, + "epoch": 1.71, + "grad_norm": 0.1689453125, + "learning_rate": 1.2624646976082066e-05, + "loss": 2.1108, "step": 5045 }, { - "epoch": 0.86, - "grad_norm": 0.205078125, - "learning_rate": 0.00014141678644840542, - "loss": 2.1619, + "epoch": 1.71, + "grad_norm": 0.16796875, + "learning_rate": 1.2481233353346344e-05, + "loss": 2.1027, "step": 5050 }, { - "epoch": 0.86, - "grad_norm": 0.212890625, - "learning_rate": 0.0001412822028482105, - "loss": 2.1493, + "epoch": 1.71, + "grad_norm": 0.1689453125, + "learning_rate": 1.2338584729834701e-05, + "loss": 2.0853, "step": 5055 }, { - "epoch": 0.86, - "grad_norm": 0.1982421875, - "learning_rate": 0.00014114752905328257, - "loss": 2.1496, + "epoch": 1.71, + "grad_norm": 0.16796875, + "learning_rate": 1.2196702352434953e-05, + "loss": 2.1188, "step": 5060 }, { - "epoch": 0.86, - "grad_norm": 0.2099609375, - "learning_rate": 0.00014101276535786138, - "loss": 2.1648, + "epoch": 1.72, + "grad_norm": 0.169921875, + "learning_rate": 1.205558746133727e-05, + "loss": 2.1078, "step": 5065 }, { - "epoch": 0.86, - "grad_norm": 0.208984375, - "learning_rate": 0.00014087791205638324, - "loss": 2.1168, + "epoch": 1.72, + "grad_norm": 0.16796875, + "learning_rate": 1.1915241290023115e-05, + "loss": 2.0969, "step": 5070 }, { - "epoch": 0.86, - "grad_norm": 0.2109375, - "learning_rate": 0.00014074296944348007, - "loss": 2.1447, + "epoch": 1.72, + "grad_norm": 0.1728515625, + "learning_rate": 1.1775665065254704e-05, + "loss": 2.1288, "step": 5075 }, { - "epoch": 0.86, - "grad_norm": 0.216796875, - "learning_rate": 0.000140607937813979, - "loss": 2.1338, + "epoch": 1.72, + "grad_norm": 0.169921875, + "learning_rate": 1.1636860007064076e-05, + "loss": 2.082, "step": 5080 }, { - "epoch": 0.86, - "grad_norm": 0.212890625, - "learning_rate": 0.00014047281746290167, - "loss": 2.1485, + "epoch": 1.72, + "grad_norm": 0.171875, + "learning_rate": 1.1498827328742623e-05, + "loss": 2.1079, "step": 5085 }, { - "epoch": 0.86, - "grad_norm": 0.2060546875, - "learning_rate": 0.00014033760868546345, - "loss": 2.1682, + "epoch": 1.72, + "grad_norm": 0.1689453125, + "learning_rate": 1.1361568236830323e-05, + "loss": 2.129, "step": 5090 }, { - "epoch": 0.86, - "grad_norm": 0.2109375, - "learning_rate": 0.00014020231177707307, - "loss": 2.1584, + "epoch": 1.73, + "grad_norm": 0.166015625, + "learning_rate": 1.122508393110524e-05, + "loss": 2.1246, "step": 5095 }, { - "epoch": 0.86, - "grad_norm": 0.2109375, - "learning_rate": 0.00014006692703333171, - "loss": 2.1144, + "epoch": 1.73, + "grad_norm": 0.16796875, + "learning_rate": 1.1089375604573116e-05, + "loss": 2.0824, "step": 5100 }, { - "epoch": 0.86, - "grad_norm": 0.201171875, - "learning_rate": 0.00013993145475003243, - "loss": 2.1796, + "epoch": 1.73, + "grad_norm": 0.1708984375, + "learning_rate": 1.0954444443456824e-05, + "loss": 2.0788, "step": 5105 }, { - "epoch": 0.87, - "grad_norm": 0.205078125, - "learning_rate": 0.00013979589522315959, - "loss": 2.1514, + "epoch": 1.73, + "grad_norm": 0.169921875, + "learning_rate": 1.0820291627186107e-05, + "loss": 2.1059, "step": 5110 }, { - "epoch": 0.87, - "grad_norm": 0.203125, - "learning_rate": 0.00013966024874888821, - "loss": 2.101, + "epoch": 1.73, + "grad_norm": 0.1640625, + "learning_rate": 1.0686918328387118e-05, + "loss": 2.1059, "step": 5115 }, { - "epoch": 0.87, - "grad_norm": 0.2109375, - "learning_rate": 0.0001395245156235832, - "loss": 2.1363, + "epoch": 1.73, + "grad_norm": 0.16796875, + "learning_rate": 1.0554325712872381e-05, + "loss": 2.1064, "step": 5120 }, { - "epoch": 0.87, - "grad_norm": 0.20703125, - "learning_rate": 0.00013938869614379883, - "loss": 2.145, + "epoch": 1.74, + "grad_norm": 0.173828125, + "learning_rate": 1.0422514939630424e-05, + "loss": 2.1088, "step": 5125 }, { - "epoch": 0.87, - "grad_norm": 0.2041015625, - "learning_rate": 0.000139252790606278, - "loss": 2.1134, + "epoch": 1.74, + "grad_norm": 0.1708984375, + "learning_rate": 1.0291487160815726e-05, + "loss": 2.1372, "step": 5130 }, { - "epoch": 0.87, - "grad_norm": 0.2060546875, - "learning_rate": 0.0001391167993079517, - "loss": 2.1702, + "epoch": 1.74, + "grad_norm": 0.1669921875, + "learning_rate": 1.0161243521738661e-05, + "loss": 2.1255, "step": 5135 }, { - "epoch": 0.87, - "grad_norm": 0.205078125, - "learning_rate": 0.00013898072254593823, - "loss": 2.1752, + "epoch": 1.74, + "grad_norm": 0.1650390625, + "learning_rate": 1.003178516085541e-05, + "loss": 2.1214, "step": 5140 }, { - "epoch": 0.87, - "grad_norm": 0.2060546875, - "learning_rate": 0.00013884456061754265, - "loss": 2.1614, + "epoch": 1.74, + "grad_norm": 0.169921875, + "learning_rate": 9.903113209758096e-06, + "loss": 2.1239, "step": 5145 }, { - "epoch": 0.87, - "grad_norm": 0.201171875, - "learning_rate": 0.00013870831382025602, - "loss": 2.1189, + "epoch": 1.74, + "grad_norm": 0.16796875, + "learning_rate": 9.775228793164826e-06, + "loss": 2.0882, "step": 5150 }, { - "epoch": 0.87, - "grad_norm": 0.205078125, - "learning_rate": 0.00013857198245175497, - "loss": 2.1356, + "epoch": 1.75, + "grad_norm": 0.1689453125, + "learning_rate": 9.6481330289099e-06, + "loss": 2.0986, "step": 5155 }, { - "epoch": 0.87, - "grad_norm": 0.20703125, - "learning_rate": 0.00013843556680990078, - "loss": 2.1685, + "epoch": 1.75, + "grad_norm": 0.16796875, + "learning_rate": 9.521827027934038e-06, + "loss": 2.1189, "step": 5160 }, { - "epoch": 0.87, - "grad_norm": 0.205078125, - "learning_rate": 0.00013829906719273885, - "loss": 2.1494, + "epoch": 1.75, + "grad_norm": 0.1689453125, + "learning_rate": 9.396311894274645e-06, + "loss": 2.1266, "step": 5165 }, { - "epoch": 0.88, - "grad_norm": 0.2109375, - "learning_rate": 0.0001381624838984982, - "loss": 2.1311, + "epoch": 1.75, + "grad_norm": 0.1669921875, + "learning_rate": 9.271588725056201e-06, + "loss": 2.1027, "step": 5170 }, { - "epoch": 0.88, - "grad_norm": 0.2158203125, - "learning_rate": 0.00013802581722559048, - "loss": 2.1802, + "epoch": 1.75, + "grad_norm": 0.169921875, + "learning_rate": 9.147658610480625e-06, + "loss": 2.097, "step": 5175 }, { - "epoch": 0.88, - "grad_norm": 0.2109375, - "learning_rate": 0.0001378890674726096, - "loss": 2.1341, + "epoch": 1.75, + "grad_norm": 0.1689453125, + "learning_rate": 9.024522633817756e-06, + "loss": 2.1242, "step": 5180 }, { - "epoch": 0.88, - "grad_norm": 0.2109375, - "learning_rate": 0.000137752234938331, - "loss": 2.1643, + "epoch": 1.76, + "grad_norm": 0.169921875, + "learning_rate": 8.90218187139591e-06, + "loss": 2.1412, "step": 5185 }, { - "epoch": 0.88, - "grad_norm": 0.20703125, - "learning_rate": 0.00013761531992171095, - "loss": 2.1469, + "epoch": 1.76, + "grad_norm": 0.1689453125, + "learning_rate": 8.780637392592495e-06, + "loss": 2.1045, "step": 5190 }, { - "epoch": 0.88, - "grad_norm": 0.2099609375, - "learning_rate": 0.00013747832272188596, - "loss": 2.1857, + "epoch": 1.76, + "grad_norm": 0.16796875, + "learning_rate": 8.659890259824532e-06, + "loss": 2.1174, "step": 5195 }, { - "epoch": 0.88, - "grad_norm": 0.2138671875, - "learning_rate": 0.00013734124363817208, - "loss": 2.1803, + "epoch": 1.76, + "grad_norm": 0.1640625, + "learning_rate": 8.53994152853952e-06, + "loss": 2.1193, "step": 5200 }, { - "epoch": 0.88, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001372040829700642, - "loss": 2.1367, + "epoch": 1.76, + "grad_norm": 0.1689453125, + "learning_rate": 8.420792247206177e-06, + "loss": 2.1263, "step": 5205 }, { - "epoch": 0.88, - "grad_norm": 0.2021484375, - "learning_rate": 0.00013706684101723562, - "loss": 2.1175, + "epoch": 1.76, + "grad_norm": 0.16796875, + "learning_rate": 8.302443457305209e-06, + "loss": 2.0897, "step": 5210 }, { - "epoch": 0.88, - "grad_norm": 0.2060546875, - "learning_rate": 0.00013692951807953708, - "loss": 2.1463, + "epoch": 1.77, + "grad_norm": 0.1708984375, + "learning_rate": 8.184896193320246e-06, + "loss": 2.1078, "step": 5215 }, { - "epoch": 0.88, - "grad_norm": 0.2138671875, - "learning_rate": 0.00013679211445699632, - "loss": 2.1948, + "epoch": 1.77, + "grad_norm": 0.1689453125, + "learning_rate": 8.068151482728802e-06, + "loss": 2.0994, "step": 5220 }, { - "epoch": 0.88, - "grad_norm": 0.20703125, - "learning_rate": 0.0001366546304498173, - "loss": 2.1677, + "epoch": 1.77, + "grad_norm": 0.16796875, + "learning_rate": 7.952210345993339e-06, + "loss": 2.1262, "step": 5225 }, { - "epoch": 0.89, - "grad_norm": 0.208984375, - "learning_rate": 0.00013651706635837976, - "loss": 2.1749, + "epoch": 1.77, + "grad_norm": 0.16796875, + "learning_rate": 7.837073796552241e-06, + "loss": 2.096, "step": 5230 }, { - "epoch": 0.89, - "grad_norm": 0.2041015625, - "learning_rate": 0.00013637942248323828, - "loss": 2.1, + "epoch": 1.77, + "grad_norm": 0.169921875, + "learning_rate": 7.72274284081106e-06, + "loss": 2.107, "step": 5235 }, { - "epoch": 0.89, - "grad_norm": 0.205078125, - "learning_rate": 0.00013624169912512173, - "loss": 2.1391, + "epoch": 1.77, + "grad_norm": 0.169921875, + "learning_rate": 7.609218478133628e-06, + "loss": 2.1182, "step": 5240 }, { - "epoch": 0.89, - "grad_norm": 0.20703125, - "learning_rate": 0.00013610389658493276, - "loss": 2.1248, + "epoch": 1.78, + "grad_norm": 0.1669921875, + "learning_rate": 7.4965017008334245e-06, + "loss": 2.0934, "step": 5245 }, { - "epoch": 0.89, - "grad_norm": 0.2099609375, - "learning_rate": 0.00013596601516374697, - "loss": 2.1287, + "epoch": 1.78, + "grad_norm": 0.1708984375, + "learning_rate": 7.3845934941648046e-06, + "loss": 2.1126, "step": 5250 }, { - "epoch": 0.89, - "grad_norm": 0.2080078125, - "learning_rate": 0.00013582805516281217, - "loss": 2.1049, + "epoch": 1.78, + "grad_norm": 0.16796875, + "learning_rate": 7.2734948363144206e-06, + "loss": 2.0974, "step": 5255 }, { - "epoch": 0.89, - "grad_norm": 0.21484375, - "learning_rate": 0.0001356900168835481, - "loss": 2.1764, + "epoch": 1.78, + "grad_norm": 0.16796875, + "learning_rate": 7.163206698392744e-06, + "loss": 2.1165, "step": 5260 }, { - "epoch": 0.89, - "grad_norm": 0.2392578125, - "learning_rate": 0.00013555190062754534, - "loss": 2.163, + "epoch": 1.78, + "grad_norm": 0.1650390625, + "learning_rate": 7.0537300444254435e-06, + "loss": 2.1232, "step": 5265 }, { - "epoch": 0.89, - "grad_norm": 0.2099609375, - "learning_rate": 0.00013541370669656487, - "loss": 2.1276, + "epoch": 1.78, + "grad_norm": 0.169921875, + "learning_rate": 6.945065831345077e-06, + "loss": 2.1181, "step": 5270 }, { - "epoch": 0.89, - "grad_norm": 0.2109375, - "learning_rate": 0.00013527543539253742, - "loss": 2.1712, + "epoch": 1.79, + "grad_norm": 0.1728515625, + "learning_rate": 6.837215008982633e-06, + "loss": 2.1167, "step": 5275 }, { - "epoch": 0.89, - "grad_norm": 0.208984375, - "learning_rate": 0.00013513708701756277, - "loss": 2.1763, + "epoch": 1.79, + "grad_norm": 0.169921875, + "learning_rate": 6.7301785200593046e-06, + "loss": 2.1474, "step": 5280 }, { - "epoch": 0.9, - "grad_norm": 0.2021484375, - "learning_rate": 0.000134998661873909, - "loss": 2.1273, + "epoch": 1.79, + "grad_norm": 0.1748046875, + "learning_rate": 6.623957300178207e-06, + "loss": 2.0954, "step": 5285 }, { - "epoch": 0.9, - "grad_norm": 0.2099609375, - "learning_rate": 0.00013486016026401202, - "loss": 2.1176, + "epoch": 1.79, + "grad_norm": 0.16796875, + "learning_rate": 6.518552277816215e-06, + "loss": 2.1279, "step": 5290 }, { - "epoch": 0.9, - "grad_norm": 0.203125, - "learning_rate": 0.00013472158249047477, - "loss": 2.1221, + "epoch": 1.79, + "grad_norm": 0.169921875, + "learning_rate": 6.413964374315851e-06, + "loss": 2.129, "step": 5295 }, { - "epoch": 0.9, - "grad_norm": 0.2138671875, - "learning_rate": 0.00013458292885606656, - "loss": 2.1508, + "epoch": 1.8, + "grad_norm": 0.1669921875, + "learning_rate": 6.31019450387721e-06, + "loss": 2.1031, "step": 5300 }, { - "epoch": 0.9, - "grad_norm": 0.20703125, - "learning_rate": 0.00013444419966372252, - "loss": 2.1472, + "epoch": 1.8, + "grad_norm": 0.17578125, + "learning_rate": 6.207243573549959e-06, + "loss": 2.1047, "step": 5305 }, { - "epoch": 0.9, - "grad_norm": 0.21875, - "learning_rate": 0.00013430539521654277, - "loss": 2.2225, + "epoch": 1.8, + "grad_norm": 0.1689453125, + "learning_rate": 6.1051124832254944e-06, + "loss": 2.0973, "step": 5310 }, { - "epoch": 0.9, - "grad_norm": 0.2021484375, - "learning_rate": 0.0001341665158177919, - "loss": 2.1097, + "epoch": 1.8, + "grad_norm": 0.1689453125, + "learning_rate": 6.003802125628966e-06, + "loss": 2.133, "step": 5315 }, { - "epoch": 0.9, - "grad_norm": 0.205078125, - "learning_rate": 0.00013402756177089827, - "loss": 2.1191, + "epoch": 1.8, + "grad_norm": 0.171875, + "learning_rate": 5.903313386311527e-06, + "loss": 2.094, "step": 5320 }, { - "epoch": 0.9, - "grad_norm": 0.2119140625, - "learning_rate": 0.0001338885333794533, - "loss": 2.1713, + "epoch": 1.8, + "grad_norm": 0.1689453125, + "learning_rate": 5.803647143642554e-06, + "loss": 2.1193, "step": 5325 }, { - "epoch": 0.9, - "grad_norm": 0.205078125, - "learning_rate": 0.00013374943094721084, - "loss": 2.1795, + "epoch": 1.81, + "grad_norm": 0.1728515625, + "learning_rate": 5.704804268802077e-06, + "loss": 2.1236, "step": 5330 }, { - "epoch": 0.9, - "grad_norm": 0.2138671875, - "learning_rate": 0.00013361025477808656, - "loss": 2.1675, + "epoch": 1.81, + "grad_norm": 0.17578125, + "learning_rate": 5.606785625773048e-06, + "loss": 2.1485, "step": 5335 }, { - "epoch": 0.9, - "grad_norm": 0.2099609375, - "learning_rate": 0.00013347100517615716, - "loss": 2.1828, + "epoch": 1.81, + "grad_norm": 0.1748046875, + "learning_rate": 5.5095920713338134e-06, + "loss": 2.1401, "step": 5340 }, { - "epoch": 0.91, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001333316824456598, - "loss": 2.1384, + "epoch": 1.81, + "grad_norm": 0.1748046875, + "learning_rate": 5.413224455050692e-06, + "loss": 2.1279, "step": 5345 }, { - "epoch": 0.91, - "grad_norm": 0.2119140625, - "learning_rate": 0.00013319228689099154, - "loss": 2.1835, + "epoch": 1.81, + "grad_norm": 0.1689453125, + "learning_rate": 5.3176836192704414e-06, + "loss": 2.1355, "step": 5350 }, { - "epoch": 0.91, - "grad_norm": 0.205078125, - "learning_rate": 0.00013305281881670827, - "loss": 2.1461, + "epoch": 1.81, + "grad_norm": 0.16796875, + "learning_rate": 5.222970399112981e-06, + "loss": 2.119, "step": 5355 }, { - "epoch": 0.91, - "grad_norm": 0.208984375, - "learning_rate": 0.00013291327852752458, - "loss": 2.1473, + "epoch": 1.82, + "grad_norm": 0.16796875, + "learning_rate": 5.12908562246408e-06, + "loss": 2.1322, "step": 5360 }, { - "epoch": 0.91, - "grad_norm": 0.22265625, - "learning_rate": 0.00013277366632831271, - "loss": 2.1584, + "epoch": 1.82, + "grad_norm": 0.171875, + "learning_rate": 5.036030109968082e-06, + "loss": 2.1283, "step": 5365 }, { - "epoch": 0.91, - "grad_norm": 0.21484375, - "learning_rate": 0.0001326339825241021, - "loss": 2.1336, + "epoch": 1.82, + "grad_norm": 0.1650390625, + "learning_rate": 4.9438046750207465e-06, + "loss": 2.1171, "step": 5370 }, { - "epoch": 0.91, - "grad_norm": 0.205078125, - "learning_rate": 0.00013249422742007852, - "loss": 2.1211, + "epoch": 1.82, + "grad_norm": 0.16796875, + "learning_rate": 4.8524101237621635e-06, + "loss": 2.1062, "step": 5375 }, { - "epoch": 0.91, - "grad_norm": 0.2060546875, - "learning_rate": 0.00013235440132158366, - "loss": 2.1066, + "epoch": 1.82, + "grad_norm": 0.1689453125, + "learning_rate": 4.7618472550696954e-06, + "loss": 2.1147, "step": 5380 }, { - "epoch": 0.91, - "grad_norm": 0.212890625, - "learning_rate": 0.00013221450453411413, - "loss": 2.1636, + "epoch": 1.82, + "grad_norm": 0.1728515625, + "learning_rate": 4.67211686055099e-06, + "loss": 2.1405, "step": 5385 }, { - "epoch": 0.91, - "grad_norm": 0.20703125, - "learning_rate": 0.00013207453736332117, - "loss": 2.1426, + "epoch": 1.83, + "grad_norm": 0.1728515625, + "learning_rate": 4.583219724537046e-06, + "loss": 2.1144, "step": 5390 }, { - "epoch": 0.91, - "grad_norm": 0.2138671875, - "learning_rate": 0.0001319345001150097, - "loss": 2.178, + "epoch": 1.83, + "grad_norm": 0.16796875, + "learning_rate": 4.495156624075347e-06, + "loss": 2.1064, "step": 5395 }, { - "epoch": 0.91, - "grad_norm": 0.205078125, - "learning_rate": 0.0001317943930951378, - "loss": 2.1224, + "epoch": 1.83, + "grad_norm": 0.169921875, + "learning_rate": 4.407928328923194e-06, + "loss": 2.1348, "step": 5400 }, { - "epoch": 0.92, - "grad_norm": 0.2216796875, - "learning_rate": 0.00013165421660981592, - "loss": 2.1353, + "epoch": 1.83, + "grad_norm": 0.1728515625, + "learning_rate": 4.321535601540738e-06, + "loss": 2.1005, "step": 5405 }, { - "epoch": 0.92, - "grad_norm": 0.2119140625, - "learning_rate": 0.00013151397096530635, - "loss": 2.1219, + "epoch": 1.83, + "grad_norm": 0.16796875, + "learning_rate": 4.2359791970845496e-06, + "loss": 2.0963, "step": 5410 }, { - "epoch": 0.92, - "grad_norm": 0.2158203125, - "learning_rate": 0.0001313736564680224, - "loss": 2.1358, + "epoch": 1.83, + "grad_norm": 0.1708984375, + "learning_rate": 4.1512598634009025e-06, + "loss": 2.0732, "step": 5415 }, { - "epoch": 0.92, - "grad_norm": 0.208984375, - "learning_rate": 0.0001312332734245279, - "loss": 2.1331, + "epoch": 1.84, + "grad_norm": 0.1689453125, + "learning_rate": 4.067378341019257e-06, + "loss": 2.1269, "step": 5420 }, { - "epoch": 0.92, - "grad_norm": 0.2060546875, - "learning_rate": 0.0001310928221415364, - "loss": 2.126, + "epoch": 1.84, + "grad_norm": 0.173828125, + "learning_rate": 3.984335363145752e-06, + "loss": 2.1027, "step": 5425 }, { - "epoch": 0.92, - "grad_norm": 0.2119140625, - "learning_rate": 0.00013095230292591055, - "loss": 2.1418, + "epoch": 1.84, + "grad_norm": 0.1708984375, + "learning_rate": 3.902131655656893e-06, + "loss": 2.0816, "step": 5430 }, { - "epoch": 0.92, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001308117160846614, - "loss": 2.1893, + "epoch": 1.84, + "grad_norm": 0.16796875, + "learning_rate": 3.820767937093095e-06, + "loss": 2.1153, "step": 5435 }, { - "epoch": 0.92, - "grad_norm": 0.208984375, - "learning_rate": 0.0001306710619249478, - "loss": 2.1608, + "epoch": 1.84, + "grad_norm": 0.166015625, + "learning_rate": 3.740244918652469e-06, + "loss": 2.1148, "step": 5440 }, { - "epoch": 0.92, - "grad_norm": 0.2041015625, - "learning_rate": 0.00013053034075407555, - "loss": 2.1653, + "epoch": 1.84, + "grad_norm": 0.169921875, + "learning_rate": 3.6605633041846053e-06, + "loss": 2.0833, "step": 5445 }, { - "epoch": 0.92, - "grad_norm": 0.2109375, - "learning_rate": 0.00013038955287949708, - "loss": 2.141, + "epoch": 1.85, + "grad_norm": 0.16796875, + "learning_rate": 3.581723790184377e-06, + "loss": 2.1154, "step": 5450 }, { - "epoch": 0.92, - "grad_norm": 0.212890625, - "learning_rate": 0.00013024869860881036, - "loss": 2.145, + "epoch": 1.85, + "grad_norm": 0.1650390625, + "learning_rate": 3.503727065785878e-06, + "loss": 2.1344, "step": 5455 }, { - "epoch": 0.92, - "grad_norm": 0.2099609375, - "learning_rate": 0.00013010777824975852, - "loss": 2.1504, + "epoch": 1.85, + "grad_norm": 0.173828125, + "learning_rate": 3.4265738127564286e-06, + "loss": 2.1045, "step": 5460 }, { - "epoch": 0.93, - "grad_norm": 0.2119140625, - "learning_rate": 0.00012996679211022908, - "loss": 2.1545, + "epoch": 1.85, + "grad_norm": 0.171875, + "learning_rate": 3.350264705490569e-06, + "loss": 2.1144, "step": 5465 }, { - "epoch": 0.93, - "grad_norm": 0.2109375, - "learning_rate": 0.00012982574049825324, - "loss": 2.118, + "epoch": 1.85, + "grad_norm": 0.1708984375, + "learning_rate": 3.2748004110041863e-06, + "loss": 2.1073, "step": 5470 }, { - "epoch": 0.93, - "grad_norm": 0.203125, - "learning_rate": 0.00012968462372200517, - "loss": 2.1523, + "epoch": 1.85, + "grad_norm": 0.1669921875, + "learning_rate": 3.2001815889286856e-06, + "loss": 2.0838, "step": 5475 }, { - "epoch": 0.93, - "grad_norm": 0.2109375, - "learning_rate": 0.00012954344208980167, - "loss": 2.1289, + "epoch": 1.86, + "grad_norm": 0.1689453125, + "learning_rate": 3.126408891505217e-06, + "loss": 2.0874, "step": 5480 }, { - "epoch": 0.93, - "grad_norm": 0.2138671875, - "learning_rate": 0.000129402195910101, - "loss": 2.15, + "epoch": 1.86, + "grad_norm": 0.16796875, + "learning_rate": 3.0534829635789684e-06, + "loss": 2.1232, "step": 5485 }, { - "epoch": 0.93, - "grad_norm": 0.2158203125, - "learning_rate": 0.00012926088549150246, - "loss": 2.1693, + "epoch": 1.86, + "grad_norm": 0.1669921875, + "learning_rate": 2.9814044425935606e-06, + "loss": 2.1074, "step": 5490 }, { - "epoch": 0.93, - "grad_norm": 0.2041015625, - "learning_rate": 0.00012911951114274588, - "loss": 2.1559, + "epoch": 1.86, + "grad_norm": 0.1689453125, + "learning_rate": 2.910173958585416e-06, + "loss": 2.0819, "step": 5495 }, { - "epoch": 0.93, - "grad_norm": 0.2158203125, - "learning_rate": 0.0001289780731727106, - "loss": 2.1352, + "epoch": 1.86, + "grad_norm": 0.169921875, + "learning_rate": 2.8397921341783317e-06, + "loss": 2.0771, "step": 5500 }, { - "epoch": 0.93, - "grad_norm": 0.2109375, - "learning_rate": 0.00012883657189041495, - "loss": 2.1314, + "epoch": 1.86, + "grad_norm": 0.1669921875, + "learning_rate": 2.770259584577972e-06, + "loss": 2.1079, "step": 5505 }, { - "epoch": 0.93, - "grad_norm": 0.208984375, - "learning_rate": 0.00012869500760501572, - "loss": 2.1777, + "epoch": 1.87, + "grad_norm": 0.1650390625, + "learning_rate": 2.7015769175665063e-06, + "loss": 2.0805, "step": 5510 }, { - "epoch": 0.93, - "grad_norm": 0.2109375, - "learning_rate": 0.00012855338062580732, - "loss": 2.1191, + "epoch": 1.87, + "grad_norm": 0.169921875, + "learning_rate": 2.633744733497312e-06, + "loss": 2.1227, "step": 5515 }, { - "epoch": 0.93, - "grad_norm": 0.2099609375, - "learning_rate": 0.000128411691262221, - "loss": 2.1499, + "epoch": 1.87, + "grad_norm": 0.1767578125, + "learning_rate": 2.5667636252897143e-06, + "loss": 2.109, "step": 5520 }, { - "epoch": 0.94, - "grad_norm": 0.212890625, - "learning_rate": 0.00012826993982382448, - "loss": 2.1447, + "epoch": 1.87, + "grad_norm": 0.1630859375, + "learning_rate": 2.5006341784238107e-06, + "loss": 2.096, "step": 5525 }, { - "epoch": 0.94, - "grad_norm": 0.2099609375, - "learning_rate": 0.00012812812662032098, - "loss": 2.1268, + "epoch": 1.87, + "grad_norm": 0.171875, + "learning_rate": 2.435356970935354e-06, + "loss": 2.1027, "step": 5530 }, { - "epoch": 0.94, - "grad_norm": 0.2119140625, - "learning_rate": 0.0001279862519615487, - "loss": 2.15, + "epoch": 1.87, + "grad_norm": 0.1728515625, + "learning_rate": 2.370932573410667e-06, + "loss": 2.1081, "step": 5535 }, { - "epoch": 0.94, - "grad_norm": 0.205078125, - "learning_rate": 0.0001278443161574802, - "loss": 2.1364, + "epoch": 1.88, + "grad_norm": 0.171875, + "learning_rate": 2.3073615489817235e-06, + "loss": 2.1316, "step": 5540 }, { - "epoch": 0.94, - "grad_norm": 0.203125, - "learning_rate": 0.00012770231951822144, - "loss": 2.1466, + "epoch": 1.88, + "grad_norm": 0.16796875, + "learning_rate": 2.244644453321165e-06, + "loss": 2.0947, "step": 5545 }, { - "epoch": 0.94, - "grad_norm": 0.208984375, - "learning_rate": 0.00012756026235401154, - "loss": 2.1302, + "epoch": 1.88, + "grad_norm": 0.1728515625, + "learning_rate": 2.1827818346374482e-06, + "loss": 2.1408, "step": 5550 }, { - "epoch": 0.94, - "grad_norm": 0.2099609375, - "learning_rate": 0.00012741814497522165, - "loss": 2.1373, + "epoch": 1.88, + "grad_norm": 0.169921875, + "learning_rate": 2.121774233670104e-06, + "loss": 2.1094, "step": 5555 }, { - "epoch": 0.94, - "grad_norm": 0.212890625, - "learning_rate": 0.0001272759676923546, - "loss": 2.1432, + "epoch": 1.88, + "grad_norm": 0.16796875, + "learning_rate": 2.0616221836849638e-06, + "loss": 2.1169, "step": 5560 }, { - "epoch": 0.94, - "grad_norm": 0.2060546875, - "learning_rate": 0.00012713373081604397, - "loss": 2.128, + "epoch": 1.88, + "grad_norm": 0.1708984375, + "learning_rate": 2.0023262104694852e-06, + "loss": 2.1125, "step": 5565 }, { - "epoch": 0.94, - "grad_norm": 0.208984375, - "learning_rate": 0.00012699143465705378, - "loss": 2.1319, + "epoch": 1.89, + "grad_norm": 0.1708984375, + "learning_rate": 1.9438868323282124e-06, + "loss": 2.1181, "step": 5570 }, { - "epoch": 0.94, - "grad_norm": 0.2041015625, - "learning_rate": 0.0001268490795262773, - "loss": 2.1415, + "epoch": 1.89, + "grad_norm": 0.1689453125, + "learning_rate": 1.8863045600782003e-06, + "loss": 2.1209, "step": 5575 }, { - "epoch": 0.95, - "grad_norm": 0.2109375, - "learning_rate": 0.00012670666573473676, - "loss": 2.21, + "epoch": 1.89, + "grad_norm": 0.1748046875, + "learning_rate": 1.8295798970445754e-06, + "loss": 2.1231, "step": 5580 }, { - "epoch": 0.95, - "grad_norm": 0.20703125, - "learning_rate": 0.00012656419359358261, - "loss": 2.1752, + "epoch": 1.89, + "grad_norm": 0.171875, + "learning_rate": 1.7737133390561046e-06, + "loss": 2.112, "step": 5585 }, { - "epoch": 0.95, - "grad_norm": 0.2080078125, - "learning_rate": 0.00012642166341409277, - "loss": 2.1218, + "epoch": 1.89, + "grad_norm": 0.16796875, + "learning_rate": 1.7187053744409098e-06, + "loss": 2.098, "step": 5590 }, { - "epoch": 0.95, - "grad_norm": 0.2041015625, - "learning_rate": 0.00012627907550767187, - "loss": 2.1361, + "epoch": 1.9, + "grad_norm": 0.1708984375, + "learning_rate": 1.6645564840221396e-06, + "loss": 2.1044, "step": 5595 }, { - "epoch": 0.95, - "grad_norm": 0.20703125, - "learning_rate": 0.0001261364301858507, - "loss": 2.1305, + "epoch": 1.9, + "grad_norm": 0.1669921875, + "learning_rate": 1.6112671411138036e-06, + "loss": 2.0861, "step": 5600 }, { - "epoch": 0.95, - "grad_norm": 0.2041015625, - "learning_rate": 0.00012599372776028557, - "loss": 2.1319, + "epoch": 1.9, + "grad_norm": 0.1669921875, + "learning_rate": 1.5588378115166669e-06, + "loss": 2.0714, "step": 5605 }, { - "epoch": 0.95, - "grad_norm": 0.208984375, - "learning_rate": 0.0001258509685427575, - "loss": 2.1559, + "epoch": 1.9, + "grad_norm": 0.16796875, + "learning_rate": 1.5072689535141072e-06, + "loss": 2.1117, "step": 5610 }, { - "epoch": 0.95, - "grad_norm": 0.212890625, - "learning_rate": 0.00012570815284517153, - "loss": 2.1181, + "epoch": 1.9, + "grad_norm": 0.16796875, + "learning_rate": 1.4565610178681299e-06, + "loss": 2.1317, "step": 5615 }, { - "epoch": 0.95, - "grad_norm": 0.21875, - "learning_rate": 0.00012556528097955617, - "loss": 2.1424, + "epoch": 1.9, + "grad_norm": 0.169921875, + "learning_rate": 1.4067144478154604e-06, + "loss": 2.116, "step": 5620 }, { - "epoch": 0.95, - "grad_norm": 0.2216796875, - "learning_rate": 0.00012542235325806267, - "loss": 2.1025, + "epoch": 1.91, + "grad_norm": 0.1689453125, + "learning_rate": 1.3577296790636684e-06, + "loss": 2.1301, "step": 5625 }, { - "epoch": 0.95, - "grad_norm": 0.2099609375, - "learning_rate": 0.00012527936999296428, - "loss": 2.2013, + "epoch": 1.91, + "grad_norm": 0.1708984375, + "learning_rate": 1.3096071397873056e-06, + "loss": 2.0979, "step": 5630 }, { - "epoch": 0.95, - "grad_norm": 0.205078125, - "learning_rate": 0.00012513633149665557, - "loss": 2.1427, + "epoch": 1.91, + "grad_norm": 0.166015625, + "learning_rate": 1.2623472506242184e-06, + "loss": 2.1322, "step": 5635 }, { - "epoch": 0.96, - "grad_norm": 0.2119140625, - "learning_rate": 0.00012499323808165183, - "loss": 2.1794, + "epoch": 1.91, + "grad_norm": 0.171875, + "learning_rate": 1.2159504246718522e-06, + "loss": 2.1103, "step": 5640 }, { - "epoch": 0.96, - "grad_norm": 0.205078125, - "learning_rate": 0.00012485009006058835, - "loss": 2.1722, + "epoch": 1.91, + "grad_norm": 0.16796875, + "learning_rate": 1.1704170674836313e-06, + "loss": 2.1058, "step": 5645 }, { - "epoch": 0.96, - "grad_norm": 0.2177734375, - "learning_rate": 0.00012470688774621964, - "loss": 2.1241, + "epoch": 1.91, + "grad_norm": 0.1630859375, + "learning_rate": 1.125747577065428e-06, + "loss": 2.1137, "step": 5650 }, { - "epoch": 0.96, - "grad_norm": 0.21484375, - "learning_rate": 0.00012456363145141894, - "loss": 2.1439, + "epoch": 1.92, + "grad_norm": 0.169921875, + "learning_rate": 1.0819423438720665e-06, + "loss": 2.1135, "step": 5655 }, { - "epoch": 0.96, - "grad_norm": 0.2060546875, - "learning_rate": 0.00012442032148917738, - "loss": 2.1363, + "epoch": 1.92, + "grad_norm": 0.1748046875, + "learning_rate": 1.0390017508039473e-06, + "loss": 2.1429, "step": 5660 }, { - "epoch": 0.96, - "grad_norm": 0.212890625, - "learning_rate": 0.00012427695817260329, - "loss": 2.1426, + "epoch": 1.92, + "grad_norm": 0.17578125, + "learning_rate": 9.969261732036605e-07, + "loss": 2.1311, "step": 5665 }, { - "epoch": 0.96, - "grad_norm": 0.2119140625, - "learning_rate": 0.0001241335418149217, - "loss": 2.1132, + "epoch": 1.92, + "grad_norm": 0.1650390625, + "learning_rate": 9.557159788526892e-07, + "loss": 2.1151, "step": 5670 }, { - "epoch": 0.96, - "grad_norm": 0.208984375, - "learning_rate": 0.00012399007272947341, - "loss": 2.1441, + "epoch": 1.92, + "grad_norm": 0.1708984375, + "learning_rate": 9.153715279682784e-07, + "loss": 2.1337, "step": 5675 }, { - "epoch": 0.96, - "grad_norm": 0.2158203125, - "learning_rate": 0.00012384655122971445, - "loss": 2.1381, + "epoch": 1.92, + "grad_norm": 0.1689453125, + "learning_rate": 8.75893173200204e-07, + "loss": 2.1069, "step": 5680 }, { - "epoch": 0.96, - "grad_norm": 0.2119140625, - "learning_rate": 0.00012370297762921538, - "loss": 2.1614, + "epoch": 1.93, + "grad_norm": 0.169921875, + "learning_rate": 8.372812596277091e-07, + "loss": 2.1095, "step": 5685 }, { - "epoch": 0.96, - "grad_norm": 0.2060546875, - "learning_rate": 0.0001235593522416606, - "loss": 2.1412, + "epoch": 1.93, + "grad_norm": 0.169921875, + "learning_rate": 7.99536124756517e-07, + "loss": 2.0929, "step": 5690 }, { - "epoch": 0.96, - "grad_norm": 0.2138671875, - "learning_rate": 0.00012341567538084764, - "loss": 2.1509, + "epoch": 1.93, + "grad_norm": 0.1708984375, + "learning_rate": 7.62658098515856e-07, + "loss": 2.0956, "step": 5695 }, { - "epoch": 0.97, - "grad_norm": 0.22265625, - "learning_rate": 0.00012327194736068653, - "loss": 2.1336, + "epoch": 1.93, + "grad_norm": 0.1689453125, + "learning_rate": 7.266475032555619e-07, + "loss": 2.12, "step": 5700 }, { - "epoch": 0.97, - "grad_norm": 0.212890625, - "learning_rate": 0.00012312816849519899, - "loss": 2.1298, + "epoch": 1.93, + "grad_norm": 0.1708984375, + "learning_rate": 6.915046537433023e-07, + "loss": 2.1026, "step": 5705 }, { - "epoch": 0.97, - "grad_norm": 0.216796875, - "learning_rate": 0.00012298433909851785, - "loss": 2.189, + "epoch": 1.93, + "grad_norm": 0.1669921875, + "learning_rate": 6.572298571618118e-07, + "loss": 2.1075, "step": 5710 }, { - "epoch": 0.97, - "grad_norm": 0.2216796875, - "learning_rate": 0.00012284045948488648, - "loss": 2.1234, + "epoch": 1.94, + "grad_norm": 0.1689453125, + "learning_rate": 6.238234131061616e-07, + "loss": 2.0959, "step": 5715 }, { - "epoch": 0.97, - "grad_norm": 0.2041015625, - "learning_rate": 0.00012269652996865776, - "loss": 2.1426, + "epoch": 1.94, + "grad_norm": 0.1689453125, + "learning_rate": 5.912856135812051e-07, + "loss": 2.1018, "step": 5720 }, { - "epoch": 0.97, - "grad_norm": 0.2177734375, - "learning_rate": 0.00012255255086429372, - "loss": 2.2038, + "epoch": 1.94, + "grad_norm": 0.16796875, + "learning_rate": 5.596167429989807e-07, + "loss": 2.1054, "step": 5725 }, { - "epoch": 0.97, - "grad_norm": 0.2177734375, - "learning_rate": 0.00012240852248636473, - "loss": 2.1829, + "epoch": 1.94, + "grad_norm": 0.169921875, + "learning_rate": 5.288170781762469e-07, + "loss": 2.1114, "step": 5730 }, { - "epoch": 0.97, - "grad_norm": 0.2099609375, - "learning_rate": 0.00012226444514954878, - "loss": 2.1347, + "epoch": 1.94, + "grad_norm": 0.1689453125, + "learning_rate": 4.98886888332073e-07, + "loss": 2.1091, "step": 5735 }, { - "epoch": 0.97, - "grad_norm": 0.2041015625, - "learning_rate": 0.00012212031916863082, - "loss": 2.1792, + "epoch": 1.94, + "grad_norm": 0.16796875, + "learning_rate": 4.698264350854409e-07, + "loss": 2.1136, "step": 5740 }, { - "epoch": 0.97, - "grad_norm": 0.2080078125, - "learning_rate": 0.0001219761448585021, - "loss": 2.1241, + "epoch": 1.95, + "grad_norm": 0.1767578125, + "learning_rate": 4.416359724530139e-07, + "loss": 2.1143, "step": 5745 }, { - "epoch": 0.97, - "grad_norm": 0.21484375, - "learning_rate": 0.00012183192253415952, - "loss": 2.1887, + "epoch": 1.95, + "grad_norm": 0.1708984375, + "learning_rate": 4.143157468468717e-07, + "loss": 2.1154, "step": 5750 }, { - "epoch": 0.97, - "grad_norm": 0.208984375, - "learning_rate": 0.00012168765251070473, - "loss": 2.1419, + "epoch": 1.95, + "grad_norm": 0.1689453125, + "learning_rate": 3.878659970724008e-07, + "loss": 2.1069, "step": 5755 }, { - "epoch": 0.98, - "grad_norm": 0.2119140625, - "learning_rate": 0.00012154333510334375, - "loss": 2.1507, + "epoch": 1.95, + "grad_norm": 0.16796875, + "learning_rate": 3.622869543261298e-07, + "loss": 2.0821, "step": 5760 }, { - "epoch": 0.98, - "grad_norm": 0.2138671875, - "learning_rate": 0.00012139897062738606, - "loss": 2.1603, + "epoch": 1.95, + "grad_norm": 0.1748046875, + "learning_rate": 3.3757884219383085e-07, + "loss": 2.1078, "step": 5765 }, { - "epoch": 0.98, - "grad_norm": 0.2119140625, - "learning_rate": 0.00012125455939824393, - "loss": 2.1563, + "epoch": 1.95, + "grad_norm": 0.1728515625, + "learning_rate": 3.1374187664844346e-07, + "loss": 2.115, "step": 5770 }, { - "epoch": 0.98, - "grad_norm": 0.2119140625, - "learning_rate": 0.0001211101017314319, - "loss": 2.1201, + "epoch": 1.96, + "grad_norm": 0.16796875, + "learning_rate": 2.907762660482538e-07, + "loss": 2.1022, "step": 5775 }, { - "epoch": 0.98, - "grad_norm": 0.2158203125, - "learning_rate": 0.00012096559794256581, - "loss": 2.1329, + "epoch": 1.96, + "grad_norm": 0.1708984375, + "learning_rate": 2.6868221113505175e-07, + "loss": 2.1222, "step": 5780 }, { - "epoch": 0.98, - "grad_norm": 0.208984375, - "learning_rate": 0.00012082104834736244, - "loss": 2.1179, + "epoch": 1.96, + "grad_norm": 0.166015625, + "learning_rate": 2.474599050323989e-07, + "loss": 2.1228, "step": 5785 }, { - "epoch": 0.98, - "grad_norm": 0.208984375, - "learning_rate": 0.0001206764532616385, - "loss": 2.1557, + "epoch": 1.96, + "grad_norm": 0.169921875, + "learning_rate": 2.271095332438966e-07, + "loss": 2.0893, "step": 5790 }, - { - "epoch": 0.98, - "grad_norm": 0.2080078125, - "learning_rate": 0.00012053181300131022, - "loss": 2.1671, - "step": 5795 - }, - { - "epoch": 0.98, - "grad_norm": 0.2138671875, - "learning_rate": 0.00012038712788239236, - "loss": 2.1472, - "step": 5800 - }, - { - "epoch": 0.98, - "grad_norm": 0.2158203125, - "learning_rate": 0.00012024239822099792, - "loss": 2.1443, - "step": 5805 - }, - { - "epoch": 0.98, - "grad_norm": 0.212890625, - "learning_rate": 0.000120097624333337, - "loss": 2.1556, - "step": 5810 - }, - { - "epoch": 0.98, - "grad_norm": 0.21484375, - "learning_rate": 0.00011995280653571641, - "loss": 2.122, - "step": 5815 - }, - { - "epoch": 0.99, - "grad_norm": 0.212890625, - "learning_rate": 0.00011980794514453897, - "loss": 2.0965, - "step": 5820 - }, - { - "epoch": 0.99, - "grad_norm": 0.205078125, - "learning_rate": 0.00011966304047630263, - "loss": 2.1735, - "step": 5825 - }, - { - "epoch": 0.99, - "grad_norm": 0.21484375, - "learning_rate": 0.00011951809284759993, - "loss": 2.1382, - "step": 5830 - }, - { - "epoch": 0.99, - "grad_norm": 0.2138671875, - "learning_rate": 0.00011937310257511732, - "loss": 2.1571, - "step": 5835 - }, - { - "epoch": 0.99, - "grad_norm": 0.2041015625, - "learning_rate": 0.00011922806997563435, - "loss": 2.1056, - "step": 5840 - }, - { - "epoch": 0.99, - "grad_norm": 0.2099609375, - "learning_rate": 0.0001190829953660231, - "loss": 2.1016, - "step": 5845 - }, - { - "epoch": 0.99, - "grad_norm": 0.212890625, - "learning_rate": 0.00011893787906324738, - "loss": 2.1113, - "step": 5850 - }, - { - "epoch": 0.99, - "grad_norm": 0.2080078125, - "learning_rate": 0.0001187927213843622, - "loss": 2.1492, - "step": 5855 - }, - { - "epoch": 0.99, - "grad_norm": 0.2119140625, - "learning_rate": 0.0001186475226465128, - "loss": 2.1852, - "step": 5860 - }, - { - "epoch": 0.99, - "grad_norm": 0.21484375, - "learning_rate": 0.00011850228316693428, - "loss": 2.1586, - "step": 5865 - }, - { - "epoch": 0.99, - "grad_norm": 0.2080078125, - "learning_rate": 0.00011835700326295067, - "loss": 2.156, - "step": 5870 - }, - { - "epoch": 1.0, - "grad_norm": 0.212890625, - "learning_rate": 0.00011821168325197436, - "loss": 2.1291, - "step": 5875 - }, - { - "epoch": 1.0, - "grad_norm": 0.220703125, - "learning_rate": 0.00011806632345150538, - "loss": 2.1686, - "step": 5880 - }, - { - "epoch": 1.0, - "grad_norm": 0.2099609375, - "learning_rate": 0.00011792092417913063, - "loss": 2.1898, - "step": 5885 - }, - { - "epoch": 1.0, - "grad_norm": 0.2109375, - "learning_rate": 0.0001177754857525233, - "loss": 2.1896, - "step": 5890 - }, - { - "epoch": 1.0, - "grad_norm": 0.2021484375, - "learning_rate": 0.00011763000848944212, - "loss": 2.1315, - "step": 5895 - }, - { - "epoch": 1.0, - "grad_norm": 0.205078125, - "learning_rate": 0.00011748449270773066, - "loss": 2.1473, - "step": 5900 - }, - { - "epoch": 1.0, - "eval_loss": 2.1470842361450195, - "eval_runtime": 161.6105, - "eval_samples_per_second": 16.441, - "eval_steps_per_second": 2.061, - "step": 5904 - }, - { - "epoch": 1.0, - "grad_norm": 0.2197265625, - "learning_rate": 0.00011733893872531664, - "loss": 2.1194, - "step": 5905 - }, - { - "epoch": 1.0, - "grad_norm": 0.208984375, - "learning_rate": 0.00011719334686021129, - "loss": 2.1479, - "step": 5910 - }, - { - "epoch": 1.0, - "grad_norm": 0.2099609375, - "learning_rate": 0.00011704771743050851, - "loss": 2.1207, - "step": 5915 - }, - { - "epoch": 1.0, - "grad_norm": 0.2109375, - "learning_rate": 0.00011690205075438438, - "loss": 2.157, - "step": 5920 - }, - { - "epoch": 1.0, - "grad_norm": 0.2158203125, - "learning_rate": 0.00011675634715009631, - "loss": 2.1193, - "step": 5925 - }, - { - "epoch": 1.0, - "grad_norm": 0.2119140625, - "learning_rate": 0.00011661060693598233, - "loss": 2.1361, - "step": 5930 - }, - { - "epoch": 1.01, - "grad_norm": 0.2109375, - "learning_rate": 0.00011646483043046063, - "loss": 2.124, - "step": 5935 - }, - { - "epoch": 1.01, - "grad_norm": 0.208984375, - "learning_rate": 0.00011631901795202849, - "loss": 2.1033, - "step": 5940 - }, - { - "epoch": 1.01, - "grad_norm": 0.212890625, - "learning_rate": 0.0001161731698192619, - "loss": 2.1761, - "step": 5945 - }, - { - "epoch": 1.01, - "grad_norm": 0.2177734375, - "learning_rate": 0.0001160272863508147, - "loss": 2.1375, - "step": 5950 - }, - { - "epoch": 1.01, - "grad_norm": 0.220703125, - "learning_rate": 0.00011588136786541802, - "loss": 2.1671, - "step": 5955 - }, - { - "epoch": 1.01, - "grad_norm": 0.21484375, - "learning_rate": 0.00011573541468187936, - "loss": 2.1324, - "step": 5960 - }, - { - "epoch": 1.01, - "grad_norm": 0.216796875, - "learning_rate": 0.00011558942711908212, - "loss": 2.1454, - "step": 5965 - }, - { - "epoch": 1.01, - "grad_norm": 0.2099609375, - "learning_rate": 0.00011544340549598482, - "loss": 2.1152, - "step": 5970 - }, - { - "epoch": 1.01, - "grad_norm": 0.212890625, - "learning_rate": 0.00011529735013162036, - "loss": 2.1125, - "step": 5975 - }, - { - "epoch": 1.01, - "grad_norm": 0.2109375, - "learning_rate": 0.00011515126134509533, - "loss": 2.1649, - "step": 5980 - }, - { - "epoch": 1.01, - "grad_norm": 0.2109375, - "learning_rate": 0.00011500513945558947, - "loss": 2.1339, - "step": 5985 - }, - { - "epoch": 1.01, - "grad_norm": 0.22265625, - "learning_rate": 0.00011485898478235464, - "loss": 2.1462, - "step": 5990 - }, - { - "epoch": 1.02, - "grad_norm": 0.216796875, - "learning_rate": 0.00011471279764471452, - "loss": 2.1817, - "step": 5995 - }, - { - "epoch": 1.02, - "grad_norm": 0.21484375, - "learning_rate": 0.00011456657836206366, - "loss": 2.1261, - "step": 6000 - }, - { - "epoch": 1.02, - "grad_norm": 0.22265625, - "learning_rate": 0.00011442032725386675, - "loss": 2.1029, - "step": 6005 - }, - { - "epoch": 1.02, - "grad_norm": 0.2158203125, - "learning_rate": 0.00011427404463965814, - "loss": 2.1269, - "step": 6010 - }, - { - "epoch": 1.02, - "grad_norm": 0.20703125, - "learning_rate": 0.00011412773083904094, - "loss": 2.116, - "step": 6015 - }, - { - "epoch": 1.02, - "grad_norm": 0.216796875, - "learning_rate": 0.00011398138617168642, - "loss": 2.1198, - "step": 6020 - }, - { - "epoch": 1.02, - "grad_norm": 0.21875, - "learning_rate": 0.0001138350109573333, - "loss": 2.1262, - "step": 6025 - }, - { - "epoch": 1.02, - "grad_norm": 0.21484375, - "learning_rate": 0.00011368860551578702, - "loss": 2.1268, - "step": 6030 - }, - { - "epoch": 1.02, - "grad_norm": 0.2138671875, - "learning_rate": 0.00011354217016691905, - "loss": 2.157, - "step": 6035 - }, - { - "epoch": 1.02, - "grad_norm": 0.212890625, - "learning_rate": 0.0001133957052306663, - "loss": 2.1132, - "step": 6040 - }, - { - "epoch": 1.02, - "grad_norm": 0.2158203125, - "learning_rate": 0.00011324921102703015, - "loss": 2.1324, - "step": 6045 - }, - { - "epoch": 1.02, - "grad_norm": 0.2099609375, - "learning_rate": 0.00011310268787607603, - "loss": 2.1372, - "step": 6050 - }, - { - "epoch": 1.03, - "grad_norm": 0.224609375, - "learning_rate": 0.00011295613609793267, - "loss": 2.1227, - "step": 6055 - }, - { - "epoch": 1.03, - "grad_norm": 0.2119140625, - "learning_rate": 0.00011280955601279127, - "loss": 2.1311, - "step": 6060 - }, - { - "epoch": 1.03, - "grad_norm": 0.2109375, - "learning_rate": 0.0001126629479409048, - "loss": 2.1219, - "step": 6065 - }, - { - "epoch": 1.03, - "grad_norm": 0.216796875, - "learning_rate": 0.00011251631220258753, - "loss": 2.0692, - "step": 6070 - }, - { - "epoch": 1.03, - "grad_norm": 0.2177734375, - "learning_rate": 0.00011236964911821413, - "loss": 2.1236, - "step": 6075 - }, - { - "epoch": 1.03, - "grad_norm": 0.2119140625, - "learning_rate": 0.00011222295900821896, - "loss": 2.1425, - "step": 6080 - }, - { - "epoch": 1.03, - "grad_norm": 0.21484375, - "learning_rate": 0.00011207624219309544, - "loss": 2.1312, - "step": 6085 - }, - { - "epoch": 1.03, - "grad_norm": 0.2275390625, - "learning_rate": 0.00011192949899339544, - "loss": 2.1528, - "step": 6090 - }, - { - "epoch": 1.03, - "grad_norm": 0.2119140625, - "learning_rate": 0.00011178272972972833, - "loss": 2.1495, - "step": 6095 - }, - { - "epoch": 1.03, - "grad_norm": 0.212890625, - "learning_rate": 0.00011163593472276048, - "loss": 2.1504, - "step": 6100 - }, - { - "epoch": 1.03, - "grad_norm": 0.2099609375, - "learning_rate": 0.00011148911429321456, - "loss": 2.0733, - "step": 6105 - }, - { - "epoch": 1.03, - "grad_norm": 0.21875, - "learning_rate": 0.00011134226876186871, - "loss": 2.0977, - "step": 6110 - }, - { - "epoch": 1.04, - "grad_norm": 0.216796875, - "learning_rate": 0.00011119539844955595, - "loss": 2.1138, - "step": 6115 - }, - { - "epoch": 1.04, - "grad_norm": 0.2177734375, - "learning_rate": 0.00011104850367716344, - "loss": 2.1027, - "step": 6120 - }, - { - "epoch": 1.04, - "grad_norm": 0.216796875, - "learning_rate": 0.00011090158476563175, - "loss": 2.1559, - "step": 6125 - }, - { - "epoch": 1.04, - "grad_norm": 0.212890625, - "learning_rate": 0.00011075464203595427, - "loss": 2.1822, - "step": 6130 - }, - { - "epoch": 1.04, - "grad_norm": 0.2177734375, - "learning_rate": 0.00011060767580917634, - "loss": 2.1362, - "step": 6135 - }, - { - "epoch": 1.04, - "grad_norm": 0.2138671875, - "learning_rate": 0.00011046068640639464, - "loss": 2.1073, - "step": 6140 - }, - { - "epoch": 1.04, - "grad_norm": 0.22265625, - "learning_rate": 0.00011031367414875658, - "loss": 2.1463, - "step": 6145 - }, - { - "epoch": 1.04, - "grad_norm": 0.224609375, - "learning_rate": 0.0001101666393574594, - "loss": 2.1327, - "step": 6150 - }, - { - "epoch": 1.04, - "grad_norm": 0.2119140625, - "learning_rate": 0.00011001958235374963, - "loss": 2.1137, - "step": 6155 - }, - { - "epoch": 1.04, - "grad_norm": 0.216796875, - "learning_rate": 0.0001098725034589223, - "loss": 2.1158, - "step": 6160 - }, - { - "epoch": 1.04, - "grad_norm": 0.2197265625, - "learning_rate": 0.00010972540299432033, - "loss": 2.1705, - "step": 6165 - }, - { - "epoch": 1.04, - "grad_norm": 0.228515625, - "learning_rate": 0.00010957828128133363, - "loss": 2.1357, - "step": 6170 - }, - { - "epoch": 1.05, - "grad_norm": 0.2158203125, - "learning_rate": 0.00010943113864139868, - "loss": 2.1597, - "step": 6175 - }, - { - "epoch": 1.05, - "grad_norm": 0.2265625, - "learning_rate": 0.00010928397539599766, - "loss": 2.1408, - "step": 6180 - }, - { - "epoch": 1.05, - "grad_norm": 0.220703125, - "learning_rate": 0.00010913679186665766, - "loss": 2.1515, - "step": 6185 - }, - { - "epoch": 1.05, - "grad_norm": 0.224609375, - "learning_rate": 0.00010898958837495021, - "loss": 2.1577, - "step": 6190 - }, - { - "epoch": 1.05, - "grad_norm": 0.224609375, - "learning_rate": 0.00010884236524249039, - "loss": 2.1414, - "step": 6195 - }, - { - "epoch": 1.05, - "grad_norm": 0.21875, - "learning_rate": 0.0001086951227909362, - "loss": 2.0958, - "step": 6200 - }, - { - "epoch": 1.05, - "grad_norm": 0.216796875, - "learning_rate": 0.00010854786134198786, - "loss": 2.0813, - "step": 6205 - }, - { - "epoch": 1.05, - "grad_norm": 0.2197265625, - "learning_rate": 0.00010840058121738712, - "loss": 2.107, - "step": 6210 - }, - { - "epoch": 1.05, - "grad_norm": 0.2177734375, - "learning_rate": 0.00010825328273891646, - "loss": 2.1572, - "step": 6215 - }, - { - "epoch": 1.05, - "grad_norm": 0.220703125, - "learning_rate": 0.00010810596622839854, - "loss": 2.1621, - "step": 6220 - }, - { - "epoch": 1.05, - "grad_norm": 0.2197265625, - "learning_rate": 0.00010795863200769538, - "loss": 2.1263, - "step": 6225 - }, - { - "epoch": 1.06, - "grad_norm": 0.216796875, - "learning_rate": 0.00010781128039870769, - "loss": 2.113, - "step": 6230 - }, - { - "epoch": 1.06, - "grad_norm": 0.216796875, - "learning_rate": 0.0001076639117233742, - "loss": 2.1485, - "step": 6235 - }, - { - "epoch": 1.06, - "grad_norm": 0.2255859375, - "learning_rate": 0.00010751652630367086, - "loss": 2.0961, - "step": 6240 - }, - { - "epoch": 1.06, - "grad_norm": 0.21875, - "learning_rate": 0.0001073691244616103, - "loss": 2.1367, - "step": 6245 - }, - { - "epoch": 1.06, - "grad_norm": 0.23828125, - "learning_rate": 0.00010722170651924091, - "loss": 2.1195, - "step": 6250 - }, - { - "epoch": 1.06, - "grad_norm": 0.2294921875, - "learning_rate": 0.00010707427279864637, - "loss": 2.1521, - "step": 6255 - }, - { - "epoch": 1.06, - "grad_norm": 0.21875, - "learning_rate": 0.00010692682362194481, - "loss": 2.1207, - "step": 6260 - }, - { - "epoch": 1.06, - "grad_norm": 0.2197265625, - "learning_rate": 0.00010677935931128807, - "loss": 2.1476, - "step": 6265 - }, - { - "epoch": 1.06, - "grad_norm": 0.21875, - "learning_rate": 0.0001066318801888611, - "loss": 2.0966, - "step": 6270 - }, - { - "epoch": 1.06, - "grad_norm": 0.224609375, - "learning_rate": 0.00010648438657688123, - "loss": 2.1013, - "step": 6275 - }, - { - "epoch": 1.06, - "grad_norm": 0.2216796875, - "learning_rate": 0.00010633687879759738, - "loss": 2.1487, - "step": 6280 - }, - { - "epoch": 1.06, - "grad_norm": 0.2197265625, - "learning_rate": 0.00010618935717328944, - "loss": 2.1477, - "step": 6285 - }, - { - "epoch": 1.07, - "grad_norm": 0.2236328125, - "learning_rate": 0.00010604182202626765, - "loss": 2.1778, - "step": 6290 - }, - { - "epoch": 1.07, - "grad_norm": 0.234375, - "learning_rate": 0.0001058942736788717, - "loss": 2.1494, - "step": 6295 - }, - { - "epoch": 1.07, - "grad_norm": 0.2138671875, - "learning_rate": 0.00010574671245347005, - "loss": 2.1321, - "step": 6300 - }, - { - "epoch": 1.07, - "grad_norm": 0.216796875, - "learning_rate": 0.00010559913867245952, - "loss": 2.1529, - "step": 6305 - }, - { - "epoch": 1.07, - "grad_norm": 0.21875, - "learning_rate": 0.00010545155265826414, - "loss": 2.1089, - "step": 6310 - }, - { - "epoch": 1.07, - "grad_norm": 0.22265625, - "learning_rate": 0.00010530395473333477, - "loss": 2.1105, - "step": 6315 - }, - { - "epoch": 1.07, - "grad_norm": 0.2197265625, - "learning_rate": 0.00010515634522014828, - "loss": 2.0971, - "step": 6320 - }, - { - "epoch": 1.07, - "grad_norm": 0.2236328125, - "learning_rate": 0.00010500872444120686, - "loss": 2.1279, - "step": 6325 - }, - { - "epoch": 1.07, - "grad_norm": 0.234375, - "learning_rate": 0.0001048610927190373, - "loss": 2.1178, - "step": 6330 - }, - { - "epoch": 1.07, - "grad_norm": 0.2197265625, - "learning_rate": 0.00010471345037619032, - "loss": 2.1238, - "step": 6335 - }, - { - "epoch": 1.07, - "grad_norm": 0.2236328125, - "learning_rate": 0.0001045657977352398, - "loss": 2.1127, - "step": 6340 - }, - { - "epoch": 1.07, - "grad_norm": 0.2177734375, - "learning_rate": 0.0001044181351187822, - "loss": 2.1348, - "step": 6345 - }, - { - "epoch": 1.08, - "grad_norm": 0.224609375, - "learning_rate": 0.00010427046284943572, - "loss": 2.1502, - "step": 6350 - }, - { - "epoch": 1.08, - "grad_norm": 0.2255859375, - "learning_rate": 0.0001041227812498396, - "loss": 2.1288, - "step": 6355 - }, - { - "epoch": 1.08, - "grad_norm": 0.216796875, - "learning_rate": 0.00010397509064265359, - "loss": 2.1578, - "step": 6360 - }, - { - "epoch": 1.08, - "grad_norm": 0.2265625, - "learning_rate": 0.00010382739135055703, - "loss": 2.1656, - "step": 6365 - }, - { - "epoch": 1.08, - "grad_norm": 0.220703125, - "learning_rate": 0.00010367968369624825, - "loss": 2.1261, - "step": 6370 - }, - { - "epoch": 1.08, - "grad_norm": 0.21875, - "learning_rate": 0.00010353196800244382, - "loss": 2.1418, - "step": 6375 - }, - { - "epoch": 1.08, - "grad_norm": 0.21875, - "learning_rate": 0.00010338424459187801, - "loss": 2.1163, - "step": 6380 - }, - { - "epoch": 1.08, - "grad_norm": 0.220703125, - "learning_rate": 0.00010323651378730179, - "loss": 2.114, - "step": 6385 - }, - { - "epoch": 1.08, - "grad_norm": 0.2265625, - "learning_rate": 0.0001030887759114823, - "loss": 2.0651, - "step": 6390 - }, - { - "epoch": 1.08, - "grad_norm": 0.21484375, - "learning_rate": 0.00010294103128720227, - "loss": 2.1278, - "step": 6395 - }, - { - "epoch": 1.08, - "grad_norm": 0.224609375, - "learning_rate": 0.00010279328023725905, - "loss": 2.1356, - "step": 6400 - }, - { - "epoch": 1.08, - "grad_norm": 0.2197265625, - "learning_rate": 0.00010264552308446403, - "loss": 2.1141, - "step": 6405 - }, - { - "epoch": 1.09, - "grad_norm": 0.220703125, - "learning_rate": 0.00010249776015164197, - "loss": 2.0926, - "step": 6410 - }, - { - "epoch": 1.09, - "grad_norm": 0.224609375, - "learning_rate": 0.00010234999176163026, - "loss": 2.1441, - "step": 6415 - }, - { - "epoch": 1.09, - "grad_norm": 0.224609375, - "learning_rate": 0.00010220221823727822, - "loss": 2.15, - "step": 6420 - }, - { - "epoch": 1.09, - "grad_norm": 0.2177734375, - "learning_rate": 0.00010205443990144636, - "loss": 2.126, - "step": 6425 - }, - { - "epoch": 1.09, - "grad_norm": 0.2265625, - "learning_rate": 0.0001019066570770057, - "loss": 2.1257, - "step": 6430 - }, - { - "epoch": 1.09, - "grad_norm": 0.228515625, - "learning_rate": 0.00010175887008683712, - "loss": 2.1286, - "step": 6435 - }, - { - "epoch": 1.09, - "grad_norm": 0.2255859375, - "learning_rate": 0.00010161107925383054, - "loss": 2.1173, - "step": 6440 - }, - { - "epoch": 1.09, - "grad_norm": 0.21875, - "learning_rate": 0.00010146328490088428, - "loss": 2.1478, - "step": 6445 - }, - { - "epoch": 1.09, - "grad_norm": 0.2216796875, - "learning_rate": 0.00010131548735090437, - "loss": 2.134, - "step": 6450 - }, - { - "epoch": 1.09, - "grad_norm": 0.22265625, - "learning_rate": 0.00010116768692680387, - "loss": 2.1342, - "step": 6455 - }, - { - "epoch": 1.09, - "grad_norm": 0.2294921875, - "learning_rate": 0.00010101988395150203, - "loss": 2.1318, - "step": 6460 - }, - { - "epoch": 1.09, - "grad_norm": 0.2216796875, - "learning_rate": 0.00010087207874792374, - "loss": 2.1647, - "step": 6465 - }, - { - "epoch": 1.1, - "grad_norm": 0.224609375, - "learning_rate": 0.00010072427163899874, - "loss": 2.1257, - "step": 6470 - }, - { - "epoch": 1.1, - "grad_norm": 0.2158203125, - "learning_rate": 0.0001005764629476609, - "loss": 2.1106, - "step": 6475 - }, - { - "epoch": 1.1, - "grad_norm": 0.224609375, - "learning_rate": 0.0001004286529968476, - "loss": 2.1002, - "step": 6480 - }, - { - "epoch": 1.1, - "grad_norm": 0.22265625, - "learning_rate": 0.00010028084210949895, - "loss": 2.1074, - "step": 6485 - }, - { - "epoch": 1.1, - "grad_norm": 0.2294921875, - "learning_rate": 0.00010013303060855708, - "loss": 2.0886, - "step": 6490 - }, - { - "epoch": 1.1, - "grad_norm": 0.2255859375, - "learning_rate": 9.998521881696551e-05, - "loss": 2.0777, - "step": 6495 - }, - { - "epoch": 1.1, - "grad_norm": 0.2216796875, - "learning_rate": 9.98374070576684e-05, - "loss": 2.1192, - "step": 6500 - }, - { - "epoch": 1.1, - "grad_norm": 0.2080078125, - "learning_rate": 9.968959565360973e-05, - "loss": 2.103, - "step": 6505 - }, - { - "epoch": 1.1, - "grad_norm": 0.2138671875, - "learning_rate": 9.954178492773278e-05, - "loss": 2.1614, - "step": 6510 - }, - { - "epoch": 1.1, - "grad_norm": 0.2197265625, - "learning_rate": 9.939397520297949e-05, - "loss": 2.1397, - "step": 6515 - }, - { - "epoch": 1.1, - "grad_norm": 0.2216796875, - "learning_rate": 9.924616680228933e-05, - "loss": 2.0756, - "step": 6520 - }, - { - "epoch": 1.11, - "grad_norm": 0.2216796875, - "learning_rate": 9.909836004859908e-05, - "loss": 2.1093, - "step": 6525 - }, - { - "epoch": 1.11, - "grad_norm": 0.21875, - "learning_rate": 9.895055526484184e-05, - "loss": 2.1218, - "step": 6530 - }, - { - "epoch": 1.11, - "grad_norm": 0.224609375, - "learning_rate": 9.880275277394644e-05, - "loss": 2.1829, - "step": 6535 - }, - { - "epoch": 1.11, - "grad_norm": 0.2177734375, - "learning_rate": 9.865495289883672e-05, - "loss": 2.1078, - "step": 6540 - }, - { - "epoch": 1.11, - "grad_norm": 0.2294921875, - "learning_rate": 9.850715596243073e-05, - "loss": 2.1234, - "step": 6545 - }, - { - "epoch": 1.11, - "grad_norm": 0.220703125, - "learning_rate": 9.835936228764014e-05, - "loss": 2.0701, - "step": 6550 - }, - { - "epoch": 1.11, - "grad_norm": 0.2216796875, - "learning_rate": 9.821157219736955e-05, - "loss": 2.1111, - "step": 6555 - }, - { - "epoch": 1.11, - "grad_norm": 0.2197265625, - "learning_rate": 9.806378601451563e-05, - "loss": 2.1091, - "step": 6560 - }, - { - "epoch": 1.11, - "grad_norm": 0.22265625, - "learning_rate": 9.791600406196656e-05, - "loss": 2.1229, - "step": 6565 - }, - { - "epoch": 1.11, - "grad_norm": 0.216796875, - "learning_rate": 9.776822666260133e-05, - "loss": 2.1289, - "step": 6570 - }, - { - "epoch": 1.11, - "grad_norm": 0.2177734375, - "learning_rate": 9.762045413928884e-05, - "loss": 2.0959, - "step": 6575 - }, - { - "epoch": 1.11, - "grad_norm": 0.216796875, - "learning_rate": 9.747268681488749e-05, - "loss": 2.1405, - "step": 6580 - }, - { - "epoch": 1.12, - "grad_norm": 0.2197265625, - "learning_rate": 9.732492501224426e-05, - "loss": 2.1203, - "step": 6585 - }, - { - "epoch": 1.12, - "grad_norm": 0.2265625, - "learning_rate": 9.717716905419403e-05, - "loss": 2.1509, - "step": 6590 - }, - { - "epoch": 1.12, - "grad_norm": 0.22265625, - "learning_rate": 9.702941926355897e-05, - "loss": 2.1252, - "step": 6595 - }, - { - "epoch": 1.12, - "grad_norm": 0.2158203125, - "learning_rate": 9.688167596314772e-05, - "loss": 2.1211, - "step": 6600 - }, - { - "epoch": 1.12, - "grad_norm": 0.21875, - "learning_rate": 9.673393947575477e-05, - "loss": 2.1291, - "step": 6605 - }, - { - "epoch": 1.12, - "grad_norm": 0.2216796875, - "learning_rate": 9.658621012415974e-05, - "loss": 2.1686, - "step": 6610 - }, - { - "epoch": 1.12, - "grad_norm": 0.2197265625, - "learning_rate": 9.643848823112664e-05, - "loss": 2.1454, - "step": 6615 - }, - { - "epoch": 1.12, - "grad_norm": 0.21875, - "learning_rate": 9.629077411940318e-05, - "loss": 2.1243, - "step": 6620 - }, - { - "epoch": 1.12, - "grad_norm": 0.2177734375, - "learning_rate": 9.614306811172009e-05, - "loss": 2.1075, - "step": 6625 - }, - { - "epoch": 1.12, - "grad_norm": 0.2177734375, - "learning_rate": 9.599537053079037e-05, - "loss": 2.1105, - "step": 6630 - }, - { - "epoch": 1.12, - "grad_norm": 0.224609375, - "learning_rate": 9.58476816993086e-05, - "loss": 2.1203, - "step": 6635 - }, - { - "epoch": 1.12, - "grad_norm": 0.2255859375, - "learning_rate": 9.570000193995028e-05, - "loss": 2.1075, - "step": 6640 - }, - { - "epoch": 1.13, - "grad_norm": 0.228515625, - "learning_rate": 9.555233157537109e-05, - "loss": 2.1306, - "step": 6645 - }, - { - "epoch": 1.13, - "grad_norm": 0.21484375, - "learning_rate": 9.540467092820614e-05, - "loss": 2.1238, - "step": 6650 - }, - { - "epoch": 1.13, - "grad_norm": 0.2255859375, - "learning_rate": 9.525702032106933e-05, - "loss": 2.1468, - "step": 6655 - }, - { - "epoch": 1.13, - "grad_norm": 0.2236328125, - "learning_rate": 9.510938007655264e-05, - "loss": 2.1477, - "step": 6660 - }, - { - "epoch": 1.13, - "grad_norm": 0.22265625, - "learning_rate": 9.496175051722542e-05, - "loss": 2.1205, - "step": 6665 - }, - { - "epoch": 1.13, - "grad_norm": 0.2158203125, - "learning_rate": 9.481413196563362e-05, - "loss": 2.1107, - "step": 6670 - }, - { - "epoch": 1.13, - "grad_norm": 0.216796875, - "learning_rate": 9.466652474429915e-05, - "loss": 2.1116, - "step": 6675 - }, - { - "epoch": 1.13, - "grad_norm": 0.2236328125, - "learning_rate": 9.451892917571927e-05, - "loss": 2.1433, - "step": 6680 - }, - { - "epoch": 1.13, - "grad_norm": 0.2158203125, - "learning_rate": 9.437134558236562e-05, - "loss": 2.1305, - "step": 6685 - }, - { - "epoch": 1.13, - "grad_norm": 0.2333984375, - "learning_rate": 9.42237742866838e-05, - "loss": 2.141, - "step": 6690 - }, - { - "epoch": 1.13, - "grad_norm": 0.22265625, - "learning_rate": 9.407621561109251e-05, - "loss": 2.0987, - "step": 6695 - }, - { - "epoch": 1.13, - "grad_norm": 0.22265625, - "learning_rate": 9.392866987798277e-05, - "loss": 2.1598, - "step": 6700 - }, - { - "epoch": 1.14, - "grad_norm": 0.21875, - "learning_rate": 9.378113740971754e-05, - "loss": 2.1487, - "step": 6705 - }, - { - "epoch": 1.14, - "grad_norm": 0.2158203125, - "learning_rate": 9.363361852863058e-05, - "loss": 2.1104, - "step": 6710 - }, - { - "epoch": 1.14, - "grad_norm": 0.2216796875, - "learning_rate": 9.348611355702608e-05, - "loss": 2.1171, - "step": 6715 - }, - { - "epoch": 1.14, - "grad_norm": 0.2177734375, - "learning_rate": 9.333862281717788e-05, - "loss": 2.1482, - "step": 6720 - }, - { - "epoch": 1.14, - "grad_norm": 0.21484375, - "learning_rate": 9.31911466313286e-05, - "loss": 2.0912, - "step": 6725 - }, - { - "epoch": 1.14, - "grad_norm": 0.2158203125, - "learning_rate": 9.304368532168912e-05, - "loss": 2.0972, - "step": 6730 - }, - { - "epoch": 1.14, - "grad_norm": 0.2236328125, - "learning_rate": 9.28962392104379e-05, - "loss": 2.1025, - "step": 6735 - }, - { - "epoch": 1.14, - "grad_norm": 0.224609375, - "learning_rate": 9.274880861972005e-05, - "loss": 2.0854, - "step": 6740 - }, - { - "epoch": 1.14, - "grad_norm": 0.224609375, - "learning_rate": 9.260139387164684e-05, - "loss": 2.1208, - "step": 6745 - }, - { - "epoch": 1.14, - "grad_norm": 0.22265625, - "learning_rate": 9.245399528829501e-05, - "loss": 2.1269, - "step": 6750 - }, - { - "epoch": 1.14, - "grad_norm": 0.21875, - "learning_rate": 9.230661319170578e-05, - "loss": 2.0986, - "step": 6755 - }, - { - "epoch": 1.14, - "grad_norm": 0.2236328125, - "learning_rate": 9.215924790388451e-05, - "loss": 2.1067, - "step": 6760 - }, - { - "epoch": 1.15, - "grad_norm": 0.2275390625, - "learning_rate": 9.201189974679986e-05, - "loss": 2.1029, - "step": 6765 - }, - { - "epoch": 1.15, - "grad_norm": 0.2197265625, - "learning_rate": 9.186456904238292e-05, - "loss": 2.1548, - "step": 6770 - }, - { - "epoch": 1.15, - "grad_norm": 0.220703125, - "learning_rate": 9.171725611252676e-05, - "loss": 2.1147, - "step": 6775 - }, - { - "epoch": 1.15, - "grad_norm": 0.2236328125, - "learning_rate": 9.156996127908555e-05, - "loss": 2.1242, - "step": 6780 - }, - { - "epoch": 1.15, - "grad_norm": 0.23046875, - "learning_rate": 9.142268486387398e-05, - "loss": 2.0846, - "step": 6785 - }, - { - "epoch": 1.15, - "grad_norm": 0.23828125, - "learning_rate": 9.127542718866646e-05, - "loss": 2.1363, - "step": 6790 - }, - { - "epoch": 1.15, - "grad_norm": 0.220703125, - "learning_rate": 9.112818857519647e-05, - "loss": 2.1028, - "step": 6795 - }, - { - "epoch": 1.15, - "grad_norm": 0.21875, - "learning_rate": 9.098096934515583e-05, - "loss": 2.1668, - "step": 6800 - }, - { - "epoch": 1.15, - "grad_norm": 0.2236328125, - "learning_rate": 9.083376982019406e-05, - "loss": 2.1371, - "step": 6805 - }, - { - "epoch": 1.15, - "grad_norm": 0.2255859375, - "learning_rate": 9.068659032191753e-05, - "loss": 2.1092, - "step": 6810 - }, - { - "epoch": 1.15, - "grad_norm": 0.21875, - "learning_rate": 9.053943117188896e-05, - "loss": 2.1803, - "step": 6815 - }, - { - "epoch": 1.16, - "grad_norm": 0.2255859375, - "learning_rate": 9.039229269162656e-05, - "loss": 2.1319, - "step": 6820 - }, - { - "epoch": 1.16, - "grad_norm": 0.2236328125, - "learning_rate": 9.024517520260339e-05, - "loss": 2.1312, - "step": 6825 - }, - { - "epoch": 1.16, - "grad_norm": 0.2255859375, - "learning_rate": 9.009807902624662e-05, - "loss": 2.1224, - "step": 6830 - }, - { - "epoch": 1.16, - "grad_norm": 0.2197265625, - "learning_rate": 8.99510044839369e-05, - "loss": 2.1172, - "step": 6835 - }, - { - "epoch": 1.16, - "grad_norm": 0.21484375, - "learning_rate": 8.980395189700758e-05, - "loss": 2.1406, - "step": 6840 - }, - { - "epoch": 1.16, - "grad_norm": 0.22265625, - "learning_rate": 8.965692158674408e-05, - "loss": 2.1704, - "step": 6845 - }, - { - "epoch": 1.16, - "grad_norm": 0.2216796875, - "learning_rate": 8.950991387438308e-05, - "loss": 2.0968, - "step": 6850 - }, - { - "epoch": 1.16, - "grad_norm": 0.25, - "learning_rate": 8.936292908111197e-05, - "loss": 2.1551, - "step": 6855 - }, - { - "epoch": 1.16, - "grad_norm": 0.2236328125, - "learning_rate": 8.921596752806802e-05, - "loss": 2.134, - "step": 6860 - }, - { - "epoch": 1.16, - "grad_norm": 0.2236328125, - "learning_rate": 8.906902953633771e-05, - "loss": 2.1215, - "step": 6865 - }, - { - "epoch": 1.16, - "grad_norm": 0.216796875, - "learning_rate": 8.892211542695607e-05, - "loss": 2.1057, - "step": 6870 - }, - { - "epoch": 1.16, - "grad_norm": 0.2265625, - "learning_rate": 8.877522552090598e-05, - "loss": 2.14, - "step": 6875 - }, - { - "epoch": 1.17, - "grad_norm": 0.2177734375, - "learning_rate": 8.862836013911735e-05, - "loss": 2.0927, - "step": 6880 - }, - { - "epoch": 1.17, - "grad_norm": 0.2255859375, - "learning_rate": 8.848151960246663e-05, - "loss": 2.1415, - "step": 6885 - }, - { - "epoch": 1.17, - "grad_norm": 0.21875, - "learning_rate": 8.833470423177578e-05, - "loss": 2.1684, - "step": 6890 - }, - { - "epoch": 1.17, - "grad_norm": 0.21875, - "learning_rate": 8.818791434781208e-05, - "loss": 2.1264, - "step": 6895 - }, - { - "epoch": 1.17, - "grad_norm": 0.220703125, - "learning_rate": 8.804115027128692e-05, - "loss": 2.0953, - "step": 6900 - }, - { - "epoch": 1.17, - "grad_norm": 0.234375, - "learning_rate": 8.789441232285524e-05, - "loss": 2.1234, - "step": 6905 - }, - { - "epoch": 1.17, - "grad_norm": 0.2236328125, - "learning_rate": 8.774770082311512e-05, - "loss": 2.1046, - "step": 6910 - }, - { - "epoch": 1.17, - "grad_norm": 0.2265625, - "learning_rate": 8.760101609260673e-05, - "loss": 2.1038, - "step": 6915 - }, - { - "epoch": 1.17, - "grad_norm": 0.21875, - "learning_rate": 8.745435845181168e-05, - "loss": 2.1039, - "step": 6920 - }, - { - "epoch": 1.17, - "grad_norm": 0.2236328125, - "learning_rate": 8.730772822115252e-05, - "loss": 2.1259, - "step": 6925 - }, - { - "epoch": 1.17, - "grad_norm": 0.216796875, - "learning_rate": 8.716112572099193e-05, - "loss": 2.0748, - "step": 6930 - }, - { - "epoch": 1.17, - "grad_norm": 0.2275390625, - "learning_rate": 8.701455127163181e-05, - "loss": 2.1662, - "step": 6935 - }, - { - "epoch": 1.18, - "grad_norm": 0.224609375, - "learning_rate": 8.686800519331298e-05, - "loss": 2.0985, - "step": 6940 - }, - { - "epoch": 1.18, - "grad_norm": 0.2265625, - "learning_rate": 8.672148780621423e-05, - "loss": 2.1478, - "step": 6945 - }, - { - "epoch": 1.18, - "grad_norm": 0.22265625, - "learning_rate": 8.657499943045153e-05, - "loss": 2.1141, - "step": 6950 - }, - { - "epoch": 1.18, - "grad_norm": 0.224609375, - "learning_rate": 8.642854038607769e-05, - "loss": 2.1271, - "step": 6955 - }, - { - "epoch": 1.18, - "grad_norm": 0.2197265625, - "learning_rate": 8.628211099308119e-05, - "loss": 2.1009, - "step": 6960 - }, - { - "epoch": 1.18, - "grad_norm": 0.216796875, - "learning_rate": 8.61357115713859e-05, - "loss": 2.1326, - "step": 6965 - }, - { - "epoch": 1.18, - "grad_norm": 0.2236328125, - "learning_rate": 8.598934244085022e-05, - "loss": 2.1111, - "step": 6970 - }, - { - "epoch": 1.18, - "grad_norm": 0.22265625, - "learning_rate": 8.584300392126621e-05, - "loss": 2.1095, - "step": 6975 - }, - { - "epoch": 1.18, - "grad_norm": 0.2236328125, - "learning_rate": 8.569669633235917e-05, - "loss": 2.1472, - "step": 6980 - }, - { - "epoch": 1.18, - "grad_norm": 0.216796875, - "learning_rate": 8.555041999378687e-05, - "loss": 2.1558, - "step": 6985 - }, - { - "epoch": 1.18, - "grad_norm": 0.2216796875, - "learning_rate": 8.540417522513864e-05, - "loss": 2.0741, - "step": 6990 - }, - { - "epoch": 1.18, - "grad_norm": 0.220703125, - "learning_rate": 8.525796234593493e-05, - "loss": 2.1253, - "step": 6995 - }, - { - "epoch": 1.19, - "grad_norm": 0.22265625, - "learning_rate": 8.511178167562662e-05, - "loss": 2.1103, - "step": 7000 - }, - { - "epoch": 1.19, - "grad_norm": 0.2177734375, - "learning_rate": 8.496563353359398e-05, - "loss": 2.1059, - "step": 7005 - }, - { - "epoch": 1.19, - "grad_norm": 0.220703125, - "learning_rate": 8.481951823914642e-05, - "loss": 2.0873, - "step": 7010 - }, - { - "epoch": 1.19, - "grad_norm": 0.228515625, - "learning_rate": 8.467343611152147e-05, - "loss": 2.1031, - "step": 7015 - }, - { - "epoch": 1.19, - "grad_norm": 0.310546875, - "learning_rate": 8.452738746988425e-05, - "loss": 2.1256, - "step": 7020 - }, - { - "epoch": 1.19, - "grad_norm": 0.21484375, - "learning_rate": 8.43813726333267e-05, - "loss": 2.0955, - "step": 7025 - }, - { - "epoch": 1.19, - "grad_norm": 0.224609375, - "learning_rate": 8.42353919208669e-05, - "loss": 2.1246, - "step": 7030 - }, - { - "epoch": 1.19, - "grad_norm": 0.2275390625, - "learning_rate": 8.408944565144838e-05, - "loss": 2.1745, - "step": 7035 - }, - { - "epoch": 1.19, - "grad_norm": 0.220703125, - "learning_rate": 8.394353414393943e-05, - "loss": 2.1093, - "step": 7040 - }, - { - "epoch": 1.19, - "grad_norm": 0.234375, - "learning_rate": 8.379765771713233e-05, - "loss": 2.1581, - "step": 7045 - }, - { - "epoch": 1.19, - "grad_norm": 0.21875, - "learning_rate": 8.365181668974279e-05, - "loss": 2.0691, - "step": 7050 - }, - { - "epoch": 1.19, - "grad_norm": 0.216796875, - "learning_rate": 8.350601138040917e-05, - "loss": 2.1291, - "step": 7055 - }, - { - "epoch": 1.2, - "grad_norm": 0.21875, - "learning_rate": 8.336024210769172e-05, - "loss": 2.1567, - "step": 7060 - }, - { - "epoch": 1.2, - "grad_norm": 0.22265625, - "learning_rate": 8.321450919007207e-05, - "loss": 2.147, - "step": 7065 - }, - { - "epoch": 1.2, - "grad_norm": 0.2236328125, - "learning_rate": 8.30688129459523e-05, - "loss": 2.1118, - "step": 7070 - }, - { - "epoch": 1.2, - "grad_norm": 0.2216796875, - "learning_rate": 8.292315369365442e-05, - "loss": 2.0944, - "step": 7075 - }, - { - "epoch": 1.2, - "grad_norm": 0.2333984375, - "learning_rate": 8.27775317514197e-05, - "loss": 2.0953, - "step": 7080 - }, - { - "epoch": 1.2, - "grad_norm": 0.228515625, - "learning_rate": 8.263194743740769e-05, - "loss": 2.1687, - "step": 7085 - }, - { - "epoch": 1.2, - "grad_norm": 0.220703125, - "learning_rate": 8.248640106969595e-05, - "loss": 2.1272, - "step": 7090 - }, - { - "epoch": 1.2, - "grad_norm": 0.2216796875, - "learning_rate": 8.234089296627903e-05, - "loss": 2.1588, - "step": 7095 - }, - { - "epoch": 1.2, - "grad_norm": 0.2265625, - "learning_rate": 8.219542344506784e-05, - "loss": 2.1068, - "step": 7100 - }, - { - "epoch": 1.2, - "grad_norm": 0.2275390625, - "learning_rate": 8.204999282388903e-05, - "loss": 2.1186, - "step": 7105 - }, - { - "epoch": 1.2, - "grad_norm": 0.2236328125, - "learning_rate": 8.190460142048434e-05, - "loss": 2.0982, - "step": 7110 - }, - { - "epoch": 1.21, - "grad_norm": 0.224609375, - "learning_rate": 8.175924955250971e-05, - "loss": 2.09, - "step": 7115 - }, - { - "epoch": 1.21, - "grad_norm": 0.2216796875, - "learning_rate": 8.161393753753474e-05, - "loss": 2.1447, - "step": 7120 - }, - { - "epoch": 1.21, - "grad_norm": 0.22265625, - "learning_rate": 8.146866569304199e-05, - "loss": 2.0919, - "step": 7125 - }, - { - "epoch": 1.21, - "grad_norm": 0.2236328125, - "learning_rate": 8.13234343364262e-05, - "loss": 2.1315, - "step": 7130 - }, - { - "epoch": 1.21, - "grad_norm": 0.2255859375, - "learning_rate": 8.117824378499374e-05, - "loss": 2.1157, - "step": 7135 - }, - { - "epoch": 1.21, - "grad_norm": 0.22265625, - "learning_rate": 8.103309435596165e-05, - "loss": 2.1379, - "step": 7140 - }, - { - "epoch": 1.21, - "grad_norm": 0.2265625, - "learning_rate": 8.088798636645733e-05, - "loss": 2.1274, - "step": 7145 - }, - { - "epoch": 1.21, - "grad_norm": 0.228515625, - "learning_rate": 8.074292013351759e-05, - "loss": 2.1492, - "step": 7150 - }, - { - "epoch": 1.21, - "grad_norm": 0.22265625, - "learning_rate": 8.059789597408785e-05, - "loss": 2.1494, - "step": 7155 - }, - { - "epoch": 1.21, - "grad_norm": 0.22265625, - "learning_rate": 8.045291420502182e-05, - "loss": 2.1487, - "step": 7160 - }, - { - "epoch": 1.21, - "grad_norm": 0.220703125, - "learning_rate": 8.030797514308052e-05, - "loss": 2.1566, - "step": 7165 - }, - { - "epoch": 1.21, - "grad_norm": 0.2197265625, - "learning_rate": 8.016307910493153e-05, - "loss": 2.0946, - "step": 7170 - }, - { - "epoch": 1.22, - "grad_norm": 0.2216796875, - "learning_rate": 8.001822640714865e-05, - "loss": 2.11, - "step": 7175 - }, - { - "epoch": 1.22, - "grad_norm": 0.22265625, - "learning_rate": 7.987341736621089e-05, - "loss": 2.1462, - "step": 7180 - }, - { - "epoch": 1.22, - "grad_norm": 0.2255859375, - "learning_rate": 7.972865229850176e-05, - "loss": 2.0978, - "step": 7185 - }, - { - "epoch": 1.22, - "grad_norm": 0.2177734375, - "learning_rate": 7.958393152030894e-05, - "loss": 2.1292, - "step": 7190 - }, - { - "epoch": 1.22, - "grad_norm": 0.2294921875, - "learning_rate": 7.943925534782311e-05, - "loss": 2.1581, - "step": 7195 - }, - { - "epoch": 1.22, - "grad_norm": 0.22265625, - "learning_rate": 7.929462409713762e-05, - "loss": 2.1376, - "step": 7200 - }, - { - "epoch": 1.22, - "grad_norm": 0.2265625, - "learning_rate": 7.915003808424771e-05, - "loss": 2.1427, - "step": 7205 - }, - { - "epoch": 1.22, - "grad_norm": 0.2197265625, - "learning_rate": 7.900549762504963e-05, - "loss": 2.1218, - "step": 7210 - }, - { - "epoch": 1.22, - "grad_norm": 0.2275390625, - "learning_rate": 7.886100303534022e-05, - "loss": 2.1444, - "step": 7215 - }, - { - "epoch": 1.22, - "grad_norm": 0.2255859375, - "learning_rate": 7.871655463081615e-05, - "loss": 2.1039, - "step": 7220 - }, - { - "epoch": 1.22, - "grad_norm": 0.224609375, - "learning_rate": 7.8572152727073e-05, - "loss": 2.0897, - "step": 7225 - }, - { - "epoch": 1.22, - "grad_norm": 0.2265625, - "learning_rate": 7.842779763960493e-05, - "loss": 2.0614, - "step": 7230 - }, - { - "epoch": 1.23, - "grad_norm": 0.2177734375, - "learning_rate": 7.828348968380374e-05, - "loss": 2.1025, - "step": 7235 - }, - { - "epoch": 1.23, - "grad_norm": 0.2138671875, - "learning_rate": 7.813922917495824e-05, - "loss": 2.1359, - "step": 7240 - }, - { - "epoch": 1.23, - "grad_norm": 0.228515625, - "learning_rate": 7.799501642825364e-05, - "loss": 2.1166, - "step": 7245 - }, - { - "epoch": 1.23, - "grad_norm": 0.234375, - "learning_rate": 7.785085175877071e-05, - "loss": 2.1249, - "step": 7250 - }, - { - "epoch": 1.23, - "grad_norm": 0.22265625, - "learning_rate": 7.770673548148524e-05, - "loss": 2.1482, - "step": 7255 - }, - { - "epoch": 1.23, - "grad_norm": 0.228515625, - "learning_rate": 7.756266791126731e-05, - "loss": 2.1217, - "step": 7260 - }, - { - "epoch": 1.23, - "grad_norm": 0.23046875, - "learning_rate": 7.74186493628805e-05, - "loss": 2.0725, - "step": 7265 - }, - { - "epoch": 1.23, - "grad_norm": 0.220703125, - "learning_rate": 7.727468015098135e-05, - "loss": 2.1105, - "step": 7270 - }, - { - "epoch": 1.23, - "grad_norm": 0.2265625, - "learning_rate": 7.713076059011864e-05, - "loss": 2.0842, - "step": 7275 - }, - { - "epoch": 1.23, - "grad_norm": 0.2275390625, - "learning_rate": 7.698689099473254e-05, - "loss": 2.1156, - "step": 7280 - }, - { - "epoch": 1.23, - "grad_norm": 0.228515625, - "learning_rate": 7.68430716791542e-05, - "loss": 2.171, - "step": 7285 - }, - { - "epoch": 1.23, - "grad_norm": 0.2314453125, - "learning_rate": 7.669930295760486e-05, - "loss": 2.1235, - "step": 7290 - }, - { - "epoch": 1.24, - "grad_norm": 0.2265625, - "learning_rate": 7.655558514419518e-05, - "loss": 2.1518, - "step": 7295 - }, - { - "epoch": 1.24, - "grad_norm": 0.2265625, - "learning_rate": 7.641191855292464e-05, - "loss": 2.0936, - "step": 7300 - }, - { - "epoch": 1.24, - "grad_norm": 0.23046875, - "learning_rate": 7.626830349768084e-05, - "loss": 2.1468, - "step": 7305 - }, - { - "epoch": 1.24, - "grad_norm": 0.2236328125, - "learning_rate": 7.612474029223866e-05, - "loss": 2.1352, - "step": 7310 - }, - { - "epoch": 1.24, - "grad_norm": 0.2314453125, - "learning_rate": 7.598122925025985e-05, - "loss": 2.1398, - "step": 7315 - }, - { - "epoch": 1.24, - "grad_norm": 0.220703125, - "learning_rate": 7.583777068529209e-05, - "loss": 2.1497, - "step": 7320 - }, - { - "epoch": 1.24, - "grad_norm": 0.234375, - "learning_rate": 7.569436491076842e-05, - "loss": 2.1127, - "step": 7325 - }, - { - "epoch": 1.24, - "grad_norm": 0.23046875, - "learning_rate": 7.55510122400066e-05, - "loss": 2.1145, - "step": 7330 - }, - { - "epoch": 1.24, - "grad_norm": 0.2353515625, - "learning_rate": 7.540771298620826e-05, - "loss": 2.1487, - "step": 7335 - }, - { - "epoch": 1.24, - "grad_norm": 0.22265625, - "learning_rate": 7.526446746245843e-05, - "loss": 2.122, - "step": 7340 - }, - { - "epoch": 1.24, - "grad_norm": 0.22265625, - "learning_rate": 7.512127598172471e-05, - "loss": 2.131, - "step": 7345 - }, - { - "epoch": 1.24, - "grad_norm": 0.22265625, - "learning_rate": 7.497813885685661e-05, - "loss": 2.1383, - "step": 7350 - }, - { - "epoch": 1.25, - "grad_norm": 0.2314453125, - "learning_rate": 7.483505640058488e-05, - "loss": 2.1283, - "step": 7355 - }, - { - "epoch": 1.25, - "grad_norm": 0.2333984375, - "learning_rate": 7.469202892552088e-05, - "loss": 2.1387, - "step": 7360 - }, - { - "epoch": 1.25, - "grad_norm": 0.2275390625, - "learning_rate": 7.454905674415575e-05, - "loss": 2.1122, - "step": 7365 - }, - { - "epoch": 1.25, - "grad_norm": 0.232421875, - "learning_rate": 7.440614016885996e-05, - "loss": 2.121, - "step": 7370 - }, - { - "epoch": 1.25, - "grad_norm": 0.2236328125, - "learning_rate": 7.426327951188227e-05, - "loss": 2.1497, - "step": 7375 - }, - { - "epoch": 1.25, - "grad_norm": 0.224609375, - "learning_rate": 7.412047508534953e-05, - "loss": 2.1219, - "step": 7380 - }, - { - "epoch": 1.25, - "grad_norm": 0.232421875, - "learning_rate": 7.397772720126561e-05, - "loss": 2.1193, - "step": 7385 - }, - { - "epoch": 1.25, - "grad_norm": 0.228515625, - "learning_rate": 7.383503617151075e-05, - "loss": 2.0977, - "step": 7390 - }, - { - "epoch": 1.25, - "grad_norm": 0.2265625, - "learning_rate": 7.369240230784115e-05, - "loss": 2.112, - "step": 7395 - }, - { - "epoch": 1.25, - "grad_norm": 0.23046875, - "learning_rate": 7.354982592188803e-05, - "loss": 2.1423, - "step": 7400 - }, - { - "epoch": 1.25, - "grad_norm": 0.22265625, - "learning_rate": 7.340730732515696e-05, - "loss": 2.1275, - "step": 7405 - }, - { - "epoch": 1.25, - "grad_norm": 0.23046875, - "learning_rate": 7.326484682902739e-05, - "loss": 2.1446, - "step": 7410 - }, - { - "epoch": 1.26, - "grad_norm": 0.240234375, - "learning_rate": 7.312244474475178e-05, - "loss": 2.1214, - "step": 7415 - }, - { - "epoch": 1.26, - "grad_norm": 0.2275390625, - "learning_rate": 7.298010138345485e-05, - "loss": 2.131, - "step": 7420 - }, - { - "epoch": 1.26, - "grad_norm": 0.2158203125, - "learning_rate": 7.283781705613323e-05, - "loss": 2.07, - "step": 7425 - }, - { - "epoch": 1.26, - "grad_norm": 0.2265625, - "learning_rate": 7.26955920736544e-05, - "loss": 2.115, - "step": 7430 - }, - { - "epoch": 1.26, - "grad_norm": 0.2333984375, - "learning_rate": 7.255342674675625e-05, - "loss": 2.0861, - "step": 7435 - }, - { - "epoch": 1.26, - "grad_norm": 0.228515625, - "learning_rate": 7.241132138604634e-05, - "loss": 2.1605, - "step": 7440 - }, - { - "epoch": 1.26, - "grad_norm": 0.232421875, - "learning_rate": 7.226927630200117e-05, - "loss": 2.1492, - "step": 7445 - }, - { - "epoch": 1.26, - "grad_norm": 0.228515625, - "learning_rate": 7.212729180496563e-05, - "loss": 2.1075, - "step": 7450 - }, - { - "epoch": 1.26, - "grad_norm": 0.232421875, - "learning_rate": 7.198536820515214e-05, - "loss": 2.1189, - "step": 7455 - }, - { - "epoch": 1.26, - "grad_norm": 0.2275390625, - "learning_rate": 7.18435058126401e-05, - "loss": 2.0948, - "step": 7460 - }, - { - "epoch": 1.26, - "grad_norm": 0.2197265625, - "learning_rate": 7.170170493737522e-05, - "loss": 2.1453, - "step": 7465 - }, - { - "epoch": 1.27, - "grad_norm": 0.2255859375, - "learning_rate": 7.155996588916883e-05, - "loss": 2.1092, - "step": 7470 - }, - { - "epoch": 1.27, - "grad_norm": 0.2265625, - "learning_rate": 7.141828897769701e-05, - "loss": 2.1437, - "step": 7475 - }, - { - "epoch": 1.27, - "grad_norm": 0.2255859375, - "learning_rate": 7.127667451250031e-05, - "loss": 2.1338, - "step": 7480 - }, - { - "epoch": 1.27, - "grad_norm": 0.23046875, - "learning_rate": 7.113512280298264e-05, - "loss": 2.1306, - "step": 7485 - }, - { - "epoch": 1.27, - "grad_norm": 0.2236328125, - "learning_rate": 7.099363415841097e-05, - "loss": 2.1019, - "step": 7490 - }, - { - "epoch": 1.27, - "grad_norm": 0.2255859375, - "learning_rate": 7.085220888791439e-05, - "loss": 2.0879, - "step": 7495 - }, - { - "epoch": 1.27, - "grad_norm": 0.2314453125, - "learning_rate": 7.071084730048352e-05, - "loss": 2.1013, - "step": 7500 - }, - { - "epoch": 1.27, - "grad_norm": 0.2314453125, - "learning_rate": 7.056954970496988e-05, - "loss": 2.1492, - "step": 7505 - }, - { - "epoch": 1.27, - "grad_norm": 0.248046875, - "learning_rate": 7.042831641008518e-05, - "loss": 2.1336, - "step": 7510 - }, - { - "epoch": 1.27, - "grad_norm": 0.22265625, - "learning_rate": 7.028714772440061e-05, - "loss": 2.1679, - "step": 7515 - }, - { - "epoch": 1.27, - "grad_norm": 0.224609375, - "learning_rate": 7.014604395634623e-05, - "loss": 2.122, - "step": 7520 - }, - { - "epoch": 1.27, - "grad_norm": 0.2255859375, - "learning_rate": 7.000500541421028e-05, - "loss": 2.1175, - "step": 7525 - }, - { - "epoch": 1.28, - "grad_norm": 0.2216796875, - "learning_rate": 6.986403240613844e-05, - "loss": 2.1061, - "step": 7530 - }, - { - "epoch": 1.28, - "grad_norm": 0.22265625, - "learning_rate": 6.972312524013323e-05, - "loss": 2.1216, - "step": 7535 - }, - { - "epoch": 1.28, - "grad_norm": 0.2265625, - "learning_rate": 6.958228422405335e-05, - "loss": 2.1641, - "step": 7540 - }, - { - "epoch": 1.28, - "grad_norm": 0.2265625, - "learning_rate": 6.944150966561294e-05, - "loss": 2.1464, - "step": 7545 - }, - { - "epoch": 1.28, - "grad_norm": 0.2177734375, - "learning_rate": 6.930080187238095e-05, - "loss": 2.121, - "step": 7550 - }, - { - "epoch": 1.28, - "grad_norm": 0.23046875, - "learning_rate": 6.916016115178043e-05, - "loss": 2.1218, - "step": 7555 - }, - { - "epoch": 1.28, - "grad_norm": 0.236328125, - "learning_rate": 6.901958781108794e-05, - "loss": 2.1254, - "step": 7560 - }, - { - "epoch": 1.28, - "grad_norm": 0.224609375, - "learning_rate": 6.887908215743282e-05, - "loss": 2.1073, - "step": 7565 - }, - { - "epoch": 1.28, - "grad_norm": 0.23046875, - "learning_rate": 6.873864449779646e-05, - "loss": 2.1278, - "step": 7570 - }, - { - "epoch": 1.28, - "grad_norm": 0.220703125, - "learning_rate": 6.859827513901178e-05, - "loss": 2.0982, - "step": 7575 - }, - { - "epoch": 1.28, - "grad_norm": 0.2216796875, - "learning_rate": 6.845797438776241e-05, - "loss": 2.1182, - "step": 7580 - }, - { - "epoch": 1.28, - "grad_norm": 0.2255859375, - "learning_rate": 6.831774255058212e-05, - "loss": 2.1411, - "step": 7585 - }, - { - "epoch": 1.29, - "grad_norm": 0.2265625, - "learning_rate": 6.81775799338541e-05, - "loss": 2.149, - "step": 7590 - }, - { - "epoch": 1.29, - "grad_norm": 0.2216796875, - "learning_rate": 6.803748684381031e-05, - "loss": 2.1647, - "step": 7595 - }, - { - "epoch": 1.29, - "grad_norm": 0.2294921875, - "learning_rate": 6.78974635865308e-05, - "loss": 2.0988, - "step": 7600 - }, - { - "epoch": 1.29, - "grad_norm": 0.224609375, - "learning_rate": 6.775751046794308e-05, - "loss": 2.0968, - "step": 7605 - }, - { - "epoch": 1.29, - "grad_norm": 0.22265625, - "learning_rate": 6.761762779382131e-05, - "loss": 2.1752, - "step": 7610 - }, - { - "epoch": 1.29, - "grad_norm": 0.23046875, - "learning_rate": 6.747781586978589e-05, - "loss": 2.1605, - "step": 7615 - }, - { - "epoch": 1.29, - "grad_norm": 0.2294921875, - "learning_rate": 6.73380750013026e-05, - "loss": 2.1392, - "step": 7620 - }, - { - "epoch": 1.29, - "grad_norm": 0.2275390625, - "learning_rate": 6.719840549368183e-05, - "loss": 2.1066, - "step": 7625 - }, - { - "epoch": 1.29, - "grad_norm": 0.2373046875, - "learning_rate": 6.705880765207825e-05, - "loss": 2.0877, - "step": 7630 - }, - { - "epoch": 1.29, - "grad_norm": 0.2265625, - "learning_rate": 6.691928178148995e-05, - "loss": 2.11, - "step": 7635 - }, - { - "epoch": 1.29, - "grad_norm": 0.23828125, - "learning_rate": 6.677982818675758e-05, - "loss": 2.1526, - "step": 7640 - }, - { - "epoch": 1.29, - "grad_norm": 0.23046875, - "learning_rate": 6.664044717256402e-05, - "loss": 2.0917, - "step": 7645 - }, - { - "epoch": 1.3, - "grad_norm": 0.2255859375, - "learning_rate": 6.650113904343366e-05, - "loss": 2.1098, - "step": 7650 - }, - { - "epoch": 1.3, - "grad_norm": 0.2275390625, - "learning_rate": 6.636190410373143e-05, - "loss": 2.128, - "step": 7655 - }, - { - "epoch": 1.3, - "grad_norm": 0.2236328125, - "learning_rate": 6.622274265766253e-05, - "loss": 2.1164, - "step": 7660 - }, - { - "epoch": 1.3, - "grad_norm": 0.2265625, - "learning_rate": 6.608365500927148e-05, - "loss": 2.0702, - "step": 7665 - }, - { - "epoch": 1.3, - "grad_norm": 0.2373046875, - "learning_rate": 6.594464146244165e-05, - "loss": 2.1779, - "step": 7670 - }, - { - "epoch": 1.3, - "grad_norm": 0.2294921875, - "learning_rate": 6.580570232089449e-05, - "loss": 2.0749, - "step": 7675 - }, - { - "epoch": 1.3, - "grad_norm": 0.234375, - "learning_rate": 6.56668378881888e-05, - "loss": 2.1277, - "step": 7680 - }, - { - "epoch": 1.3, - "grad_norm": 0.2197265625, - "learning_rate": 6.552804846772026e-05, - "loss": 2.1526, - "step": 7685 - }, - { - "epoch": 1.3, - "grad_norm": 0.21875, - "learning_rate": 6.538933436272065e-05, - "loss": 2.1642, - "step": 7690 - }, - { - "epoch": 1.3, - "grad_norm": 0.224609375, - "learning_rate": 6.525069587625712e-05, - "loss": 2.1258, - "step": 7695 - }, - { - "epoch": 1.3, - "grad_norm": 0.2314453125, - "learning_rate": 6.511213331123168e-05, - "loss": 2.1485, - "step": 7700 - }, - { - "epoch": 1.3, - "grad_norm": 0.2265625, - "learning_rate": 6.497364697038047e-05, - "loss": 2.11, - "step": 7705 - }, - { - "epoch": 1.31, - "grad_norm": 0.2255859375, - "learning_rate": 6.483523715627301e-05, - "loss": 2.1677, - "step": 7710 - }, - { - "epoch": 1.31, - "grad_norm": 0.2265625, - "learning_rate": 6.469690417131171e-05, - "loss": 2.1133, - "step": 7715 - }, - { - "epoch": 1.31, - "grad_norm": 0.228515625, - "learning_rate": 6.455864831773108e-05, - "loss": 2.1201, - "step": 7720 - }, - { - "epoch": 1.31, - "grad_norm": 0.2216796875, - "learning_rate": 6.442046989759712e-05, - "loss": 2.0895, - "step": 7725 - }, - { - "epoch": 1.31, - "grad_norm": 0.22265625, - "learning_rate": 6.428236921280666e-05, - "loss": 2.0973, - "step": 7730 - }, - { - "epoch": 1.31, - "grad_norm": 0.2314453125, - "learning_rate": 6.414434656508665e-05, - "loss": 2.1285, - "step": 7735 - }, - { - "epoch": 1.31, - "grad_norm": 0.228515625, - "learning_rate": 6.400640225599358e-05, - "loss": 2.1108, - "step": 7740 - }, - { - "epoch": 1.31, - "grad_norm": 0.2236328125, - "learning_rate": 6.386853658691281e-05, - "loss": 2.1164, - "step": 7745 - }, - { - "epoch": 1.31, - "grad_norm": 0.23046875, - "learning_rate": 6.373074985905781e-05, - "loss": 2.1695, - "step": 7750 - }, - { - "epoch": 1.31, - "grad_norm": 0.224609375, - "learning_rate": 6.359304237346961e-05, - "loss": 2.0999, - "step": 7755 - }, - { - "epoch": 1.31, - "grad_norm": 0.2275390625, - "learning_rate": 6.345541443101616e-05, - "loss": 2.1377, - "step": 7760 - }, - { - "epoch": 1.32, - "grad_norm": 0.228515625, - "learning_rate": 6.331786633239154e-05, - "loss": 2.113, - "step": 7765 - }, - { - "epoch": 1.32, - "grad_norm": 0.2265625, - "learning_rate": 6.318039837811542e-05, - "loss": 2.1612, - "step": 7770 - }, - { - "epoch": 1.32, - "grad_norm": 0.2314453125, - "learning_rate": 6.304301086853243e-05, - "loss": 2.1783, - "step": 7775 - }, - { - "epoch": 1.32, - "grad_norm": 0.2333984375, - "learning_rate": 6.290570410381129e-05, - "loss": 2.1309, - "step": 7780 - }, - { - "epoch": 1.32, - "grad_norm": 0.2353515625, - "learning_rate": 6.276847838394446e-05, - "loss": 2.0939, - "step": 7785 - }, - { - "epoch": 1.32, - "grad_norm": 0.2353515625, - "learning_rate": 6.263133400874725e-05, - "loss": 2.1013, - "step": 7790 - }, - { - "epoch": 1.32, - "grad_norm": 0.2216796875, - "learning_rate": 6.249427127785724e-05, - "loss": 2.1307, - "step": 7795 - }, - { - "epoch": 1.32, - "grad_norm": 0.2294921875, - "learning_rate": 6.235729049073371e-05, - "loss": 2.0944, - "step": 7800 - }, - { - "epoch": 1.32, - "grad_norm": 0.2255859375, - "learning_rate": 6.222039194665678e-05, - "loss": 2.0731, - "step": 7805 - }, - { - "epoch": 1.32, - "grad_norm": 0.224609375, - "learning_rate": 6.2083575944727e-05, - "loss": 2.1293, - "step": 7810 - }, - { - "epoch": 1.32, - "grad_norm": 0.22265625, - "learning_rate": 6.194684278386455e-05, - "loss": 2.1658, - "step": 7815 - }, - { - "epoch": 1.32, - "grad_norm": 0.2236328125, - "learning_rate": 6.18101927628085e-05, - "loss": 2.0989, - "step": 7820 - }, - { - "epoch": 1.33, - "grad_norm": 0.2255859375, - "learning_rate": 6.167362618011648e-05, - "loss": 2.1014, - "step": 7825 - }, - { - "epoch": 1.33, - "grad_norm": 0.2275390625, - "learning_rate": 6.153714333416372e-05, - "loss": 2.1117, - "step": 7830 - }, - { - "epoch": 1.33, - "grad_norm": 0.2294921875, - "learning_rate": 6.140074452314236e-05, - "loss": 2.0891, - "step": 7835 - }, - { - "epoch": 1.33, - "grad_norm": 0.2265625, - "learning_rate": 6.126443004506122e-05, - "loss": 2.0833, - "step": 7840 - }, - { - "epoch": 1.33, - "grad_norm": 0.228515625, - "learning_rate": 6.112820019774461e-05, - "loss": 2.1394, - "step": 7845 - }, - { - "epoch": 1.33, - "grad_norm": 0.224609375, - "learning_rate": 6.099205527883207e-05, - "loss": 2.1298, - "step": 7850 - }, - { - "epoch": 1.33, - "grad_norm": 0.224609375, - "learning_rate": 6.0855995585777616e-05, - "loss": 2.1394, - "step": 7855 - }, - { - "epoch": 1.33, - "grad_norm": 0.2265625, - "learning_rate": 6.072002141584891e-05, - "loss": 2.1386, - "step": 7860 - }, - { - "epoch": 1.33, - "grad_norm": 0.2265625, - "learning_rate": 6.058413306612689e-05, - "loss": 2.1127, - "step": 7865 - }, - { - "epoch": 1.33, - "grad_norm": 0.2333984375, - "learning_rate": 6.044833083350503e-05, - "loss": 2.1322, - "step": 7870 - }, - { - "epoch": 1.33, - "grad_norm": 0.2177734375, - "learning_rate": 6.0312615014688436e-05, - "loss": 2.1384, - "step": 7875 - }, - { - "epoch": 1.33, - "grad_norm": 0.2236328125, - "learning_rate": 6.017698590619362e-05, - "loss": 2.1268, - "step": 7880 - }, - { - "epoch": 1.34, - "grad_norm": 0.224609375, - "learning_rate": 6.004144380434763e-05, - "loss": 2.129, - "step": 7885 - }, - { - "epoch": 1.34, - "grad_norm": 0.2255859375, - "learning_rate": 5.9905989005287277e-05, - "loss": 2.1109, - "step": 7890 - }, - { - "epoch": 1.34, - "grad_norm": 0.228515625, - "learning_rate": 5.977062180495876e-05, - "loss": 2.1361, - "step": 7895 - }, - { - "epoch": 1.34, - "grad_norm": 0.2216796875, - "learning_rate": 5.96353424991169e-05, - "loss": 2.1141, - "step": 7900 - }, - { - "epoch": 1.34, - "grad_norm": 0.220703125, - "learning_rate": 5.950015138332434e-05, - "loss": 2.1336, - "step": 7905 - }, - { - "epoch": 1.34, - "grad_norm": 0.2275390625, - "learning_rate": 5.9365048752951225e-05, - "loss": 2.1268, - "step": 7910 - }, - { - "epoch": 1.34, - "grad_norm": 0.23046875, - "learning_rate": 5.923003490317422e-05, - "loss": 2.1146, - "step": 7915 - }, - { - "epoch": 1.34, - "grad_norm": 0.228515625, - "learning_rate": 5.9095110128976104e-05, - "loss": 2.1023, - "step": 7920 - }, - { - "epoch": 1.34, - "grad_norm": 0.2333984375, - "learning_rate": 5.8960274725145056e-05, - "loss": 2.1159, - "step": 7925 - }, - { - "epoch": 1.34, - "grad_norm": 0.23046875, - "learning_rate": 5.882552898627391e-05, - "loss": 2.0938, - "step": 7930 - }, - { - "epoch": 1.34, - "grad_norm": 0.2294921875, - "learning_rate": 5.8690873206759675e-05, - "loss": 2.0999, - "step": 7935 - }, - { - "epoch": 1.34, - "grad_norm": 0.2177734375, - "learning_rate": 5.8556307680802826e-05, - "loss": 2.0965, - "step": 7940 - }, - { - "epoch": 1.35, - "grad_norm": 0.224609375, - "learning_rate": 5.842183270240652e-05, - "loss": 2.0736, - "step": 7945 - }, - { - "epoch": 1.35, - "grad_norm": 0.234375, - "learning_rate": 5.8287448565376215e-05, - "loss": 2.1204, - "step": 7950 - }, - { - "epoch": 1.35, - "grad_norm": 0.2255859375, - "learning_rate": 5.8153155563318904e-05, - "loss": 2.1253, - "step": 7955 - }, - { - "epoch": 1.35, - "grad_norm": 0.22265625, - "learning_rate": 5.801895398964234e-05, - "loss": 2.1087, - "step": 7960 - }, - { - "epoch": 1.35, - "grad_norm": 0.2265625, - "learning_rate": 5.788484413755469e-05, - "loss": 2.0863, - "step": 7965 - }, - { - "epoch": 1.35, - "grad_norm": 0.22265625, - "learning_rate": 5.7750826300063496e-05, - "loss": 2.1233, - "step": 7970 - }, - { - "epoch": 1.35, - "grad_norm": 0.2294921875, - "learning_rate": 5.761690076997543e-05, - "loss": 2.1237, - "step": 7975 - }, - { - "epoch": 1.35, - "grad_norm": 0.2255859375, - "learning_rate": 5.7483067839895585e-05, - "loss": 2.1592, - "step": 7980 - }, - { - "epoch": 1.35, - "grad_norm": 0.22265625, - "learning_rate": 5.7349327802226474e-05, - "loss": 2.1362, - "step": 7985 - }, - { - "epoch": 1.35, - "grad_norm": 0.2197265625, - "learning_rate": 5.721568094916783e-05, - "loss": 2.106, - "step": 7990 - }, - { - "epoch": 1.35, - "grad_norm": 0.2255859375, - "learning_rate": 5.7082127572715785e-05, - "loss": 2.1259, - "step": 7995 - }, - { - "epoch": 1.35, - "grad_norm": 0.234375, - "learning_rate": 5.6948667964662136e-05, - "loss": 2.1102, - "step": 8000 - }, - { - "epoch": 1.36, - "grad_norm": 0.2265625, - "learning_rate": 5.6815302416593894e-05, - "loss": 2.1031, - "step": 8005 - }, - { - "epoch": 1.36, - "grad_norm": 0.22265625, - "learning_rate": 5.668203121989266e-05, - "loss": 2.1164, - "step": 8010 - }, - { - "epoch": 1.36, - "grad_norm": 0.23046875, - "learning_rate": 5.6548854665733674e-05, - "loss": 2.1152, - "step": 8015 - }, - { - "epoch": 1.36, - "grad_norm": 0.224609375, - "learning_rate": 5.641577304508559e-05, - "loss": 2.1385, - "step": 8020 - }, - { - "epoch": 1.36, - "grad_norm": 0.2294921875, - "learning_rate": 5.6282786648709484e-05, - "loss": 2.111, - "step": 8025 - }, - { - "epoch": 1.36, - "grad_norm": 0.2314453125, - "learning_rate": 5.614989576715852e-05, - "loss": 2.1329, - "step": 8030 - }, - { - "epoch": 1.36, - "grad_norm": 0.2216796875, - "learning_rate": 5.601710069077712e-05, - "loss": 2.1403, - "step": 8035 - }, - { - "epoch": 1.36, - "grad_norm": 0.224609375, - "learning_rate": 5.58844017097004e-05, - "loss": 2.1114, - "step": 8040 - }, - { - "epoch": 1.36, - "grad_norm": 0.228515625, - "learning_rate": 5.575179911385349e-05, - "loss": 2.1271, - "step": 8045 - }, - { - "epoch": 1.36, - "grad_norm": 0.224609375, - "learning_rate": 5.561929319295104e-05, - "loss": 2.1481, - "step": 8050 - }, - { - "epoch": 1.36, - "grad_norm": 0.2314453125, - "learning_rate": 5.5486884236496303e-05, - "loss": 2.1358, - "step": 8055 - }, - { - "epoch": 1.37, - "grad_norm": 0.228515625, - "learning_rate": 5.535457253378082e-05, - "loss": 2.137, - "step": 8060 - }, - { - "epoch": 1.37, - "grad_norm": 0.2333984375, - "learning_rate": 5.522235837388362e-05, - "loss": 2.1403, - "step": 8065 - }, - { - "epoch": 1.37, - "grad_norm": 0.2314453125, - "learning_rate": 5.5090242045670605e-05, - "loss": 2.1279, - "step": 8070 - }, - { - "epoch": 1.37, - "grad_norm": 0.2236328125, - "learning_rate": 5.495822383779392e-05, - "loss": 2.1185, - "step": 8075 - }, - { - "epoch": 1.37, - "grad_norm": 0.232421875, - "learning_rate": 5.48263040386914e-05, - "loss": 2.1356, - "step": 8080 - }, - { - "epoch": 1.37, - "grad_norm": 0.2255859375, - "learning_rate": 5.469448293658574e-05, - "loss": 2.1376, - "step": 8085 - }, - { - "epoch": 1.37, - "grad_norm": 0.22265625, - "learning_rate": 5.4562760819484125e-05, - "loss": 2.1191, - "step": 8090 - }, - { - "epoch": 1.37, - "grad_norm": 0.224609375, - "learning_rate": 5.443113797517741e-05, - "loss": 2.0956, - "step": 8095 - }, - { - "epoch": 1.37, - "grad_norm": 0.2314453125, - "learning_rate": 5.4299614691239576e-05, - "loss": 2.1205, - "step": 8100 - }, - { - "epoch": 1.37, - "grad_norm": 0.2353515625, - "learning_rate": 5.416819125502712e-05, - "loss": 2.1297, - "step": 8105 - }, - { - "epoch": 1.37, - "grad_norm": 0.2255859375, - "learning_rate": 5.4036867953678286e-05, - "loss": 2.1068, - "step": 8110 - }, - { - "epoch": 1.37, - "grad_norm": 0.2255859375, - "learning_rate": 5.390564507411261e-05, - "loss": 2.1027, - "step": 8115 - }, - { - "epoch": 1.38, - "grad_norm": 0.232421875, - "learning_rate": 5.377452290303023e-05, - "loss": 2.1181, - "step": 8120 - }, - { - "epoch": 1.38, - "grad_norm": 0.2255859375, - "learning_rate": 5.364350172691124e-05, - "loss": 2.1774, - "step": 8125 - }, - { - "epoch": 1.38, - "grad_norm": 0.2294921875, - "learning_rate": 5.3512581832015075e-05, - "loss": 2.099, - "step": 8130 - }, - { - "epoch": 1.38, - "grad_norm": 0.22265625, - "learning_rate": 5.3381763504379914e-05, - "loss": 2.1234, - "step": 8135 - }, - { - "epoch": 1.38, - "grad_norm": 0.23046875, - "learning_rate": 5.325104702982192e-05, - "loss": 2.1567, - "step": 8140 - }, - { - "epoch": 1.38, - "grad_norm": 0.2236328125, - "learning_rate": 5.3120432693934894e-05, - "loss": 2.149, - "step": 8145 - }, - { - "epoch": 1.38, - "grad_norm": 0.2275390625, - "learning_rate": 5.2989920782089265e-05, - "loss": 2.1027, - "step": 8150 - }, - { - "epoch": 1.38, - "grad_norm": 0.2275390625, - "learning_rate": 5.2859511579431944e-05, - "loss": 2.1403, - "step": 8155 - }, - { - "epoch": 1.38, - "grad_norm": 0.232421875, - "learning_rate": 5.272920537088528e-05, - "loss": 2.1336, - "step": 8160 - }, - { - "epoch": 1.38, - "grad_norm": 0.2265625, - "learning_rate": 5.259900244114655e-05, - "loss": 2.1591, - "step": 8165 - }, - { - "epoch": 1.38, - "grad_norm": 0.2265625, - "learning_rate": 5.2468903074687506e-05, - "loss": 2.1639, - "step": 8170 - }, - { - "epoch": 1.38, - "grad_norm": 0.2294921875, - "learning_rate": 5.233890755575361e-05, - "loss": 2.1787, - "step": 8175 - }, - { - "epoch": 1.39, - "grad_norm": 0.2236328125, - "learning_rate": 5.22090161683633e-05, - "loss": 2.1079, - "step": 8180 - }, - { - "epoch": 1.39, - "grad_norm": 0.232421875, - "learning_rate": 5.207922919630771e-05, - "loss": 2.1277, - "step": 8185 - }, - { - "epoch": 1.39, - "grad_norm": 0.224609375, - "learning_rate": 5.194954692314975e-05, - "loss": 2.1226, - "step": 8190 - }, - { - "epoch": 1.39, - "grad_norm": 0.2236328125, - "learning_rate": 5.1819969632223505e-05, - "loss": 2.1081, - "step": 8195 - }, - { - "epoch": 1.39, - "grad_norm": 0.2236328125, - "learning_rate": 5.1690497606633824e-05, - "loss": 2.1174, - "step": 8200 - }, - { - "epoch": 1.39, - "grad_norm": 0.2177734375, - "learning_rate": 5.156113112925543e-05, - "loss": 2.1002, - "step": 8205 - }, - { - "epoch": 1.39, - "grad_norm": 0.2255859375, - "learning_rate": 5.1431870482732516e-05, - "loss": 2.1494, - "step": 8210 - }, - { - "epoch": 1.39, - "grad_norm": 0.22265625, - "learning_rate": 5.1302715949478174e-05, - "loss": 2.1323, - "step": 8215 - }, - { - "epoch": 1.39, - "grad_norm": 0.220703125, - "learning_rate": 5.117366781167341e-05, - "loss": 2.15, - "step": 8220 - }, - { - "epoch": 1.39, - "grad_norm": 0.2314453125, - "learning_rate": 5.104472635126695e-05, - "loss": 2.1167, - "step": 8225 - }, - { - "epoch": 1.39, - "grad_norm": 0.2216796875, - "learning_rate": 5.091589184997441e-05, - "loss": 2.1579, - "step": 8230 - }, - { - "epoch": 1.39, - "grad_norm": 0.2265625, - "learning_rate": 5.0787164589277645e-05, - "loss": 2.1174, - "step": 8235 - }, - { - "epoch": 1.4, - "grad_norm": 0.2265625, - "learning_rate": 5.0658544850424274e-05, - "loss": 2.1221, - "step": 8240 - }, - { - "epoch": 1.4, - "grad_norm": 0.236328125, - "learning_rate": 5.053003291442707e-05, - "loss": 2.1003, - "step": 8245 - }, - { - "epoch": 1.4, - "grad_norm": 0.23046875, - "learning_rate": 5.0401629062063115e-05, - "loss": 2.1398, - "step": 8250 - }, - { - "epoch": 1.4, - "grad_norm": 0.2412109375, - "learning_rate": 5.027333357387345e-05, - "loss": 2.1235, - "step": 8255 - }, - { - "epoch": 1.4, - "grad_norm": 0.224609375, - "learning_rate": 5.014514673016237e-05, - "loss": 2.1306, - "step": 8260 - }, - { - "epoch": 1.4, - "grad_norm": 0.2236328125, - "learning_rate": 5.00170688109967e-05, - "loss": 2.1229, - "step": 8265 - }, - { - "epoch": 1.4, - "grad_norm": 0.228515625, - "learning_rate": 4.988910009620537e-05, - "loss": 2.1448, - "step": 8270 - }, - { - "epoch": 1.4, - "grad_norm": 0.224609375, - "learning_rate": 4.976124086537871e-05, - "loss": 2.1072, - "step": 8275 - }, - { - "epoch": 1.4, - "grad_norm": 0.2265625, - "learning_rate": 4.9633491397867815e-05, - "loss": 2.0999, - "step": 8280 - }, - { - "epoch": 1.4, - "grad_norm": 0.2275390625, - "learning_rate": 4.950585197278404e-05, - "loss": 2.1003, - "step": 8285 - }, - { - "epoch": 1.4, - "grad_norm": 0.220703125, - "learning_rate": 4.937832286899815e-05, - "loss": 2.0978, - "step": 8290 - }, - { - "epoch": 1.4, - "grad_norm": 0.2275390625, - "learning_rate": 4.925090436514004e-05, - "loss": 2.184, - "step": 8295 - }, - { - "epoch": 1.41, - "grad_norm": 0.240234375, - "learning_rate": 4.91235967395979e-05, - "loss": 2.14, - "step": 8300 - }, - { - "epoch": 1.41, - "grad_norm": 0.22265625, - "learning_rate": 4.8996400270517675e-05, - "loss": 2.1209, - "step": 8305 - }, - { - "epoch": 1.41, - "grad_norm": 0.2255859375, - "learning_rate": 4.886931523580246e-05, - "loss": 2.1202, - "step": 8310 - }, - { - "epoch": 1.41, - "grad_norm": 0.228515625, - "learning_rate": 4.87423419131119e-05, - "loss": 2.1826, - "step": 8315 - }, - { - "epoch": 1.41, - "grad_norm": 0.2294921875, - "learning_rate": 4.861548057986147e-05, - "loss": 2.1141, - "step": 8320 - }, - { - "epoch": 1.41, - "grad_norm": 0.228515625, - "learning_rate": 4.848873151322209e-05, - "loss": 2.1506, - "step": 8325 - }, - { - "epoch": 1.41, - "grad_norm": 0.2275390625, - "learning_rate": 4.836209499011932e-05, - "loss": 2.1256, - "step": 8330 - }, - { - "epoch": 1.41, - "grad_norm": 0.228515625, - "learning_rate": 4.823557128723288e-05, - "loss": 2.1182, - "step": 8335 - }, - { - "epoch": 1.41, - "grad_norm": 0.2333984375, - "learning_rate": 4.810916068099601e-05, - "loss": 2.1319, - "step": 8340 - }, - { - "epoch": 1.41, - "grad_norm": 0.2275390625, - "learning_rate": 4.798286344759475e-05, - "loss": 2.1291, - "step": 8345 - }, - { - "epoch": 1.41, - "grad_norm": 0.2216796875, - "learning_rate": 4.7856679862967515e-05, - "loss": 2.0805, - "step": 8350 - }, - { - "epoch": 1.42, - "grad_norm": 0.2255859375, - "learning_rate": 4.773061020280443e-05, - "loss": 2.1223, - "step": 8355 - }, - { - "epoch": 1.42, - "grad_norm": 0.2255859375, - "learning_rate": 4.760465474254667e-05, - "loss": 2.1401, - "step": 8360 - }, - { - "epoch": 1.42, - "grad_norm": 0.234375, - "learning_rate": 4.7478813757385954e-05, - "loss": 2.1489, - "step": 8365 - }, - { - "epoch": 1.42, - "grad_norm": 0.2294921875, - "learning_rate": 4.735308752226387e-05, - "loss": 2.1411, - "step": 8370 - }, - { - "epoch": 1.42, - "grad_norm": 0.23046875, - "learning_rate": 4.722747631187123e-05, - "loss": 2.1452, - "step": 8375 - }, - { - "epoch": 1.42, - "grad_norm": 0.2255859375, - "learning_rate": 4.710198040064767e-05, - "loss": 2.1107, - "step": 8380 - }, - { - "epoch": 1.42, - "grad_norm": 0.2275390625, - "learning_rate": 4.697660006278073e-05, - "loss": 2.1218, - "step": 8385 - }, - { - "epoch": 1.42, - "grad_norm": 0.2265625, - "learning_rate": 4.6851335572205646e-05, - "loss": 2.1221, - "step": 8390 - }, - { - "epoch": 1.42, - "grad_norm": 0.236328125, - "learning_rate": 4.6726187202604465e-05, - "loss": 2.148, - "step": 8395 - }, - { - "epoch": 1.42, - "grad_norm": 0.2314453125, - "learning_rate": 4.6601155227405436e-05, - "loss": 2.1665, - "step": 8400 - }, - { - "epoch": 1.42, - "grad_norm": 0.23828125, - "learning_rate": 4.6476239919782636e-05, - "loss": 2.1232, - "step": 8405 - }, - { - "epoch": 1.42, - "grad_norm": 0.2265625, - "learning_rate": 4.635144155265523e-05, - "loss": 2.1338, - "step": 8410 - }, - { - "epoch": 1.43, - "grad_norm": 0.220703125, - "learning_rate": 4.622676039868672e-05, - "loss": 2.1274, - "step": 8415 - }, - { - "epoch": 1.43, - "grad_norm": 0.21875, - "learning_rate": 4.6102196730284786e-05, - "loss": 2.1651, - "step": 8420 - }, - { - "epoch": 1.43, - "grad_norm": 0.2333984375, - "learning_rate": 4.597775081960026e-05, - "loss": 2.1164, - "step": 8425 - }, - { - "epoch": 1.43, - "grad_norm": 0.220703125, - "learning_rate": 4.585342293852666e-05, - "loss": 2.1234, - "step": 8430 - }, - { - "epoch": 1.43, - "grad_norm": 0.234375, - "learning_rate": 4.572921335869974e-05, - "loss": 2.1105, - "step": 8435 - }, - { - "epoch": 1.43, - "grad_norm": 0.2265625, - "learning_rate": 4.560512235149668e-05, - "loss": 2.1434, - "step": 8440 - }, - { - "epoch": 1.43, - "grad_norm": 0.228515625, - "learning_rate": 4.5481150188035626e-05, - "loss": 2.0948, - "step": 8445 - }, - { - "epoch": 1.43, - "grad_norm": 0.23046875, - "learning_rate": 4.535729713917522e-05, - "loss": 2.1562, - "step": 8450 - }, - { - "epoch": 1.43, - "grad_norm": 0.2255859375, - "learning_rate": 4.5233563475513616e-05, - "loss": 2.1353, - "step": 8455 - }, - { - "epoch": 1.43, - "grad_norm": 0.2275390625, - "learning_rate": 4.510994946738829e-05, - "loss": 2.1399, - "step": 8460 - }, - { - "epoch": 1.43, - "grad_norm": 0.232421875, - "learning_rate": 4.498645538487528e-05, - "loss": 2.1196, - "step": 8465 - }, - { - "epoch": 1.43, - "grad_norm": 0.224609375, - "learning_rate": 4.4863081497788506e-05, - "loss": 2.0936, - "step": 8470 - }, - { - "epoch": 1.44, - "grad_norm": 0.2333984375, - "learning_rate": 4.473982807567937e-05, - "loss": 2.1093, - "step": 8475 - }, - { - "epoch": 1.44, - "grad_norm": 0.2255859375, - "learning_rate": 4.4616695387836074e-05, - "loss": 2.1156, - "step": 8480 - }, - { - "epoch": 1.44, - "grad_norm": 0.2265625, - "learning_rate": 4.449368370328302e-05, - "loss": 2.106, - "step": 8485 - }, - { - "epoch": 1.44, - "grad_norm": 0.2294921875, - "learning_rate": 4.4370793290780224e-05, - "loss": 2.1387, - "step": 8490 - }, - { - "epoch": 1.44, - "grad_norm": 0.2236328125, - "learning_rate": 4.42480244188228e-05, - "loss": 2.1202, - "step": 8495 - }, - { - "epoch": 1.44, - "grad_norm": 0.2353515625, - "learning_rate": 4.412537735564019e-05, - "loss": 2.1336, - "step": 8500 - }, - { - "epoch": 1.44, - "grad_norm": 0.2294921875, - "learning_rate": 4.4002852369195845e-05, - "loss": 2.1211, - "step": 8505 - }, - { - "epoch": 1.44, - "grad_norm": 0.2265625, - "learning_rate": 4.3880449727186427e-05, - "loss": 2.1334, - "step": 8510 - }, - { - "epoch": 1.44, - "grad_norm": 0.234375, - "learning_rate": 4.375816969704131e-05, - "loss": 2.1229, - "step": 8515 - }, - { - "epoch": 1.44, - "grad_norm": 0.22265625, - "learning_rate": 4.363601254592201e-05, - "loss": 2.1147, - "step": 8520 - }, - { - "epoch": 1.44, - "grad_norm": 0.224609375, - "learning_rate": 4.3513978540721477e-05, - "loss": 2.1554, - "step": 8525 - }, - { - "epoch": 1.44, - "grad_norm": 0.23046875, - "learning_rate": 4.339206794806371e-05, - "loss": 2.1565, - "step": 8530 - }, - { - "epoch": 1.45, - "grad_norm": 0.2236328125, - "learning_rate": 4.327028103430303e-05, - "loss": 2.1332, - "step": 8535 - }, - { - "epoch": 1.45, - "grad_norm": 0.2255859375, - "learning_rate": 4.3148618065523546e-05, - "loss": 2.1234, - "step": 8540 - }, - { - "epoch": 1.45, - "grad_norm": 0.2255859375, - "learning_rate": 4.3027079307538584e-05, - "loss": 2.1442, - "step": 8545 - }, - { - "epoch": 1.45, - "grad_norm": 0.248046875, - "learning_rate": 4.290566502589011e-05, - "loss": 2.1679, - "step": 8550 - }, - { - "epoch": 1.45, - "grad_norm": 0.234375, - "learning_rate": 4.2784375485848e-05, - "loss": 2.113, - "step": 8555 - }, - { - "epoch": 1.45, - "grad_norm": 0.2275390625, - "learning_rate": 4.266321095240973e-05, - "loss": 2.1225, - "step": 8560 - }, - { - "epoch": 1.45, - "grad_norm": 0.2275390625, - "learning_rate": 4.2542171690299605e-05, - "loss": 2.0962, - "step": 8565 - }, - { - "epoch": 1.45, - "grad_norm": 0.2265625, - "learning_rate": 4.242125796396827e-05, - "loss": 2.1323, - "step": 8570 - }, - { - "epoch": 1.45, - "grad_norm": 0.228515625, - "learning_rate": 4.230047003759206e-05, - "loss": 2.1072, - "step": 8575 - }, - { - "epoch": 1.45, - "grad_norm": 0.228515625, - "learning_rate": 4.217980817507242e-05, - "loss": 2.132, - "step": 8580 - }, - { - "epoch": 1.45, - "grad_norm": 0.2314453125, - "learning_rate": 4.205927264003544e-05, - "loss": 2.1482, - "step": 8585 - }, - { - "epoch": 1.45, - "grad_norm": 0.2275390625, - "learning_rate": 4.193886369583117e-05, - "loss": 2.1228, - "step": 8590 - }, - { - "epoch": 1.46, - "grad_norm": 0.224609375, - "learning_rate": 4.1818581605533094e-05, - "loss": 2.1229, - "step": 8595 - }, - { - "epoch": 1.46, - "grad_norm": 0.2197265625, - "learning_rate": 4.1698426631937514e-05, - "loss": 2.0852, - "step": 8600 - }, - { - "epoch": 1.46, - "grad_norm": 0.2294921875, - "learning_rate": 4.157839903756308e-05, - "loss": 2.0963, - "step": 8605 - }, - { - "epoch": 1.46, - "grad_norm": 0.2265625, - "learning_rate": 4.145849908464999e-05, - "loss": 2.1152, - "step": 8610 - }, - { - "epoch": 1.46, - "grad_norm": 0.23046875, - "learning_rate": 4.133872703515975e-05, - "loss": 2.1029, - "step": 8615 - }, - { - "epoch": 1.46, - "grad_norm": 0.22265625, - "learning_rate": 4.121908315077421e-05, - "loss": 2.1612, - "step": 8620 - }, - { - "epoch": 1.46, - "grad_norm": 0.234375, - "learning_rate": 4.1099567692895426e-05, - "loss": 2.1364, - "step": 8625 - }, - { - "epoch": 1.46, - "grad_norm": 0.2265625, - "learning_rate": 4.098018092264474e-05, - "loss": 2.0914, - "step": 8630 - }, - { - "epoch": 1.46, - "grad_norm": 0.21875, - "learning_rate": 4.08609231008623e-05, - "loss": 2.1178, - "step": 8635 - }, - { - "epoch": 1.46, - "grad_norm": 0.2353515625, - "learning_rate": 4.0741794488106585e-05, - "loss": 2.1975, - "step": 8640 - }, - { - "epoch": 1.46, - "grad_norm": 0.2333984375, - "learning_rate": 4.0622795344653816e-05, - "loss": 2.1351, - "step": 8645 - }, - { - "epoch": 1.46, - "grad_norm": 0.232421875, - "learning_rate": 4.05039259304972e-05, - "loss": 2.1472, - "step": 8650 - }, - { - "epoch": 1.47, - "grad_norm": 0.2294921875, - "learning_rate": 4.038518650534661e-05, - "loss": 2.1258, - "step": 8655 - }, - { - "epoch": 1.47, - "grad_norm": 0.2255859375, - "learning_rate": 4.0266577328627996e-05, - "loss": 2.0783, - "step": 8660 - }, - { - "epoch": 1.47, - "grad_norm": 0.232421875, - "learning_rate": 4.0148098659482537e-05, - "loss": 2.1506, - "step": 8665 - }, - { - "epoch": 1.47, - "grad_norm": 0.232421875, - "learning_rate": 4.002975075676641e-05, - "loss": 2.1108, - "step": 8670 - }, - { - "epoch": 1.47, - "grad_norm": 0.2255859375, - "learning_rate": 3.991153387905011e-05, - "loss": 2.1207, - "step": 8675 - }, - { - "epoch": 1.47, - "grad_norm": 0.2333984375, - "learning_rate": 3.979344828461773e-05, - "loss": 2.1169, - "step": 8680 - }, - { - "epoch": 1.47, - "grad_norm": 0.236328125, - "learning_rate": 3.967549423146665e-05, - "loss": 2.1205, - "step": 8685 - }, - { - "epoch": 1.47, - "grad_norm": 0.23046875, - "learning_rate": 3.955767197730681e-05, - "loss": 2.1345, - "step": 8690 - }, - { - "epoch": 1.47, - "grad_norm": 0.2265625, - "learning_rate": 3.943998177956022e-05, - "loss": 2.1559, - "step": 8695 - }, - { - "epoch": 1.47, - "grad_norm": 0.22265625, - "learning_rate": 3.932242389536036e-05, - "loss": 2.1094, - "step": 8700 - }, - { - "epoch": 1.47, - "grad_norm": 0.2255859375, - "learning_rate": 3.9204998581551554e-05, - "loss": 2.1194, - "step": 8705 - }, - { - "epoch": 1.48, - "grad_norm": 0.2265625, - "learning_rate": 3.908770609468858e-05, - "loss": 2.0894, - "step": 8710 - }, - { - "epoch": 1.48, - "grad_norm": 0.236328125, - "learning_rate": 3.897054669103597e-05, - "loss": 2.1092, - "step": 8715 - }, - { - "epoch": 1.48, - "grad_norm": 0.2294921875, - "learning_rate": 3.885352062656749e-05, - "loss": 2.1491, - "step": 8720 - }, - { - "epoch": 1.48, - "grad_norm": 0.2255859375, - "learning_rate": 3.8736628156965594e-05, - "loss": 2.1457, - "step": 8725 - }, - { - "epoch": 1.48, - "grad_norm": 0.2275390625, - "learning_rate": 3.861986953762088e-05, - "loss": 2.0965, - "step": 8730 - }, - { - "epoch": 1.48, - "grad_norm": 0.2265625, - "learning_rate": 3.850324502363141e-05, - "loss": 2.1247, - "step": 8735 - }, - { - "epoch": 1.48, - "grad_norm": 0.2236328125, - "learning_rate": 3.838675486980232e-05, - "loss": 2.1692, - "step": 8740 - }, - { - "epoch": 1.48, - "grad_norm": 0.2236328125, - "learning_rate": 3.8270399330645216e-05, - "loss": 2.0999, - "step": 8745 - }, - { - "epoch": 1.48, - "grad_norm": 0.228515625, - "learning_rate": 3.815417866037753e-05, - "loss": 2.1126, - "step": 8750 - }, - { - "epoch": 1.48, - "grad_norm": 0.2236328125, - "learning_rate": 3.80380931129221e-05, - "loss": 2.1166, - "step": 8755 - }, - { - "epoch": 1.48, - "grad_norm": 0.21875, - "learning_rate": 3.792214294190643e-05, - "loss": 2.0955, - "step": 8760 - }, - { - "epoch": 1.48, - "grad_norm": 0.228515625, - "learning_rate": 3.7806328400662374e-05, - "loss": 2.1366, - "step": 8765 - }, - { - "epoch": 1.49, - "grad_norm": 0.2392578125, - "learning_rate": 3.769064974222537e-05, - "loss": 2.1004, - "step": 8770 - }, - { - "epoch": 1.49, - "grad_norm": 0.2421875, - "learning_rate": 3.757510721933403e-05, - "loss": 2.1277, - "step": 8775 - }, - { - "epoch": 1.49, - "grad_norm": 0.228515625, - "learning_rate": 3.74597010844295e-05, - "loss": 2.1272, - "step": 8780 - }, - { - "epoch": 1.49, - "grad_norm": 0.2294921875, - "learning_rate": 3.734443158965499e-05, - "loss": 2.1392, - "step": 8785 - }, - { - "epoch": 1.49, - "grad_norm": 0.2275390625, - "learning_rate": 3.722929898685507e-05, - "loss": 2.1155, - "step": 8790 - }, - { - "epoch": 1.49, - "grad_norm": 0.23046875, - "learning_rate": 3.71143035275753e-05, - "loss": 2.1582, - "step": 8795 - }, - { - "epoch": 1.49, - "grad_norm": 0.2333984375, - "learning_rate": 3.699944546306162e-05, - "loss": 2.1508, - "step": 8800 - }, - { - "epoch": 1.49, - "grad_norm": 0.2265625, - "learning_rate": 3.6884725044259746e-05, - "loss": 2.1642, - "step": 8805 - }, - { - "epoch": 1.49, - "grad_norm": 0.2294921875, - "learning_rate": 3.677014252181472e-05, - "loss": 2.0776, - "step": 8810 - }, - { - "epoch": 1.49, - "grad_norm": 0.228515625, - "learning_rate": 3.665569814607017e-05, - "loss": 2.1675, - "step": 8815 - }, - { - "epoch": 1.49, - "grad_norm": 0.2255859375, - "learning_rate": 3.6541392167068033e-05, - "loss": 2.1034, - "step": 8820 - }, - { - "epoch": 1.49, - "grad_norm": 0.2353515625, - "learning_rate": 3.642722483454781e-05, - "loss": 2.1548, - "step": 8825 - }, - { - "epoch": 1.5, - "grad_norm": 0.2265625, - "learning_rate": 3.6313196397946106e-05, - "loss": 2.0931, - "step": 8830 - }, - { - "epoch": 1.5, - "grad_norm": 0.2197265625, - "learning_rate": 3.619930710639604e-05, - "loss": 2.1602, - "step": 8835 - }, - { - "epoch": 1.5, - "grad_norm": 0.23046875, - "learning_rate": 3.608555720872678e-05, - "loss": 2.0739, - "step": 8840 - }, - { - "epoch": 1.5, - "grad_norm": 0.224609375, - "learning_rate": 3.597194695346282e-05, - "loss": 2.1437, - "step": 8845 - }, - { - "epoch": 1.5, - "grad_norm": 0.2294921875, - "learning_rate": 3.5858476588823664e-05, - "loss": 2.1333, - "step": 8850 - }, - { - "epoch": 1.5, - "grad_norm": 0.2470703125, - "learning_rate": 3.574514636272318e-05, - "loss": 2.1147, - "step": 8855 - }, - { - "epoch": 1.5, - "grad_norm": 0.2255859375, - "learning_rate": 3.563195652276893e-05, - "loss": 2.1096, - "step": 8860 - }, - { - "epoch": 1.5, - "grad_norm": 0.2314453125, - "learning_rate": 3.551890731626197e-05, - "loss": 2.1184, - "step": 8865 - }, - { - "epoch": 1.5, - "grad_norm": 0.224609375, - "learning_rate": 3.54059989901959e-05, - "loss": 2.1549, - "step": 8870 - }, - { - "epoch": 1.5, - "grad_norm": 0.228515625, - "learning_rate": 3.529323179125661e-05, - "loss": 2.1688, - "step": 8875 - }, - { - "epoch": 1.5, - "grad_norm": 0.2294921875, - "learning_rate": 3.518060596582167e-05, - "loss": 2.1652, - "step": 8880 - }, - { - "epoch": 1.5, - "grad_norm": 0.2255859375, - "learning_rate": 3.506812175995967e-05, - "loss": 2.1266, - "step": 8885 - }, - { - "epoch": 1.51, - "grad_norm": 0.22265625, - "learning_rate": 3.4955779419429856e-05, - "loss": 2.1351, - "step": 8890 - }, - { - "epoch": 1.51, - "grad_norm": 0.22265625, - "learning_rate": 3.484357918968163e-05, - "loss": 2.1242, - "step": 8895 - }, - { - "epoch": 1.51, - "grad_norm": 0.224609375, - "learning_rate": 3.4731521315853675e-05, - "loss": 2.1029, - "step": 8900 - }, - { - "epoch": 1.51, - "grad_norm": 0.2353515625, - "learning_rate": 3.461960604277381e-05, - "loss": 2.129, - "step": 8905 - }, - { - "epoch": 1.51, - "grad_norm": 0.2236328125, - "learning_rate": 3.45078336149583e-05, - "loss": 2.1262, - "step": 8910 - }, - { - "epoch": 1.51, - "grad_norm": 0.228515625, - "learning_rate": 3.439620427661119e-05, - "loss": 2.14, - "step": 8915 - }, - { - "epoch": 1.51, - "grad_norm": 0.224609375, - "learning_rate": 3.4284718271624015e-05, - "loss": 2.1609, - "step": 8920 - }, - { - "epoch": 1.51, - "grad_norm": 0.2314453125, - "learning_rate": 3.417337584357512e-05, - "loss": 2.0996, - "step": 8925 - }, - { - "epoch": 1.51, - "grad_norm": 0.22265625, - "learning_rate": 3.4062177235729145e-05, - "loss": 2.0893, - "step": 8930 - }, - { - "epoch": 1.51, - "grad_norm": 0.2255859375, - "learning_rate": 3.3951122691036564e-05, - "loss": 2.1178, - "step": 8935 - }, - { - "epoch": 1.51, - "grad_norm": 0.2216796875, - "learning_rate": 3.384021245213297e-05, - "loss": 2.1169, - "step": 8940 - }, - { - "epoch": 1.51, - "grad_norm": 0.22265625, - "learning_rate": 3.372944676133878e-05, - "loss": 2.1666, - "step": 8945 - }, - { - "epoch": 1.52, - "grad_norm": 0.2294921875, - "learning_rate": 3.3618825860658576e-05, - "loss": 2.1317, - "step": 8950 - }, - { - "epoch": 1.52, - "grad_norm": 0.228515625, - "learning_rate": 3.35083499917806e-05, - "loss": 2.1147, - "step": 8955 - }, - { - "epoch": 1.52, - "grad_norm": 0.224609375, - "learning_rate": 3.3398019396076184e-05, - "loss": 2.1252, - "step": 8960 - }, - { - "epoch": 1.52, - "grad_norm": 0.224609375, - "learning_rate": 3.328783431459936e-05, - "loss": 2.0857, - "step": 8965 - }, - { - "epoch": 1.52, - "grad_norm": 0.228515625, - "learning_rate": 3.3177794988086074e-05, - "loss": 2.1214, - "step": 8970 - }, - { - "epoch": 1.52, - "grad_norm": 0.234375, - "learning_rate": 3.306790165695396e-05, - "loss": 2.0711, - "step": 8975 - }, - { - "epoch": 1.52, - "grad_norm": 0.232421875, - "learning_rate": 3.295815456130162e-05, - "loss": 2.1091, - "step": 8980 - }, - { - "epoch": 1.52, - "grad_norm": 0.232421875, - "learning_rate": 3.2848553940908186e-05, - "loss": 2.134, - "step": 8985 - }, - { - "epoch": 1.52, - "grad_norm": 0.224609375, - "learning_rate": 3.2739100035232776e-05, - "loss": 2.103, - "step": 8990 - }, - { - "epoch": 1.52, - "grad_norm": 0.2353515625, - "learning_rate": 3.262979308341385e-05, - "loss": 2.1696, - "step": 8995 - }, - { - "epoch": 1.52, - "grad_norm": 0.2275390625, - "learning_rate": 3.2520633324268924e-05, - "loss": 2.1352, - "step": 9000 - }, - { - "epoch": 1.53, - "grad_norm": 0.224609375, - "learning_rate": 3.2411620996293876e-05, - "loss": 2.1056, - "step": 9005 - }, - { - "epoch": 1.53, - "grad_norm": 0.2294921875, - "learning_rate": 3.230275633766248e-05, - "loss": 2.1891, - "step": 9010 - }, - { - "epoch": 1.53, - "grad_norm": 0.2275390625, - "learning_rate": 3.219403958622587e-05, - "loss": 2.1408, - "step": 9015 - }, - { - "epoch": 1.53, - "grad_norm": 0.228515625, - "learning_rate": 3.208547097951206e-05, - "loss": 2.1556, - "step": 9020 - }, - { - "epoch": 1.53, - "grad_norm": 0.2265625, - "learning_rate": 3.197705075472529e-05, - "loss": 2.1061, - "step": 9025 - }, - { - "epoch": 1.53, - "grad_norm": 0.2294921875, - "learning_rate": 3.186877914874572e-05, - "loss": 2.1357, - "step": 9030 - }, - { - "epoch": 1.53, - "grad_norm": 0.2255859375, - "learning_rate": 3.1760656398128764e-05, - "loss": 2.1443, - "step": 9035 - }, - { - "epoch": 1.53, - "grad_norm": 0.2275390625, - "learning_rate": 3.165268273910461e-05, - "loss": 2.0885, - "step": 9040 - }, - { - "epoch": 1.53, - "grad_norm": 0.2255859375, - "learning_rate": 3.154485840757775e-05, - "loss": 2.0986, - "step": 9045 - }, - { - "epoch": 1.53, - "grad_norm": 0.2294921875, - "learning_rate": 3.14371836391263e-05, - "loss": 2.1272, - "step": 9050 - }, - { - "epoch": 1.53, - "grad_norm": 0.228515625, - "learning_rate": 3.1329658669001724e-05, - "loss": 2.1175, - "step": 9055 - }, - { - "epoch": 1.53, - "grad_norm": 0.2314453125, - "learning_rate": 3.1222283732128186e-05, - "loss": 2.1358, - "step": 9060 - }, - { - "epoch": 1.54, - "grad_norm": 0.228515625, - "learning_rate": 3.111505906310194e-05, - "loss": 2.1075, - "step": 9065 - }, - { - "epoch": 1.54, - "grad_norm": 0.228515625, - "learning_rate": 3.100798489619111e-05, - "loss": 2.1126, - "step": 9070 - }, - { - "epoch": 1.54, - "grad_norm": 0.2236328125, - "learning_rate": 3.0901061465334905e-05, - "loss": 2.1089, - "step": 9075 - }, - { - "epoch": 1.54, - "grad_norm": 0.2294921875, - "learning_rate": 3.079428900414314e-05, - "loss": 2.1171, - "step": 9080 - }, - { - "epoch": 1.54, - "grad_norm": 0.224609375, - "learning_rate": 3.0687667745895876e-05, - "loss": 2.1315, - "step": 9085 - }, - { - "epoch": 1.54, - "grad_norm": 0.2294921875, - "learning_rate": 3.058119792354283e-05, - "loss": 2.1353, - "step": 9090 - }, - { - "epoch": 1.54, - "grad_norm": 0.2314453125, - "learning_rate": 3.0474879769702703e-05, - "loss": 2.1024, - "step": 9095 - }, - { - "epoch": 1.54, - "grad_norm": 0.23046875, - "learning_rate": 3.0368713516663093e-05, - "loss": 2.1436, - "step": 9100 - }, - { - "epoch": 1.54, - "grad_norm": 0.23046875, - "learning_rate": 3.0262699396379467e-05, - "loss": 2.157, - "step": 9105 - }, - { - "epoch": 1.54, - "grad_norm": 0.2275390625, - "learning_rate": 3.0156837640475046e-05, - "loss": 2.1166, - "step": 9110 - }, - { - "epoch": 1.54, - "grad_norm": 0.2275390625, - "learning_rate": 3.0051128480240143e-05, - "loss": 2.1443, - "step": 9115 - }, - { - "epoch": 1.54, - "grad_norm": 0.2236328125, - "learning_rate": 2.9945572146631605e-05, - "loss": 2.1404, - "step": 9120 - }, - { - "epoch": 1.55, - "grad_norm": 0.2255859375, - "learning_rate": 2.9840168870272413e-05, - "loss": 2.0834, - "step": 9125 - }, - { - "epoch": 1.55, - "grad_norm": 0.2265625, - "learning_rate": 2.973491888145127e-05, - "loss": 2.1451, - "step": 9130 - }, - { - "epoch": 1.55, - "grad_norm": 0.2294921875, - "learning_rate": 2.9629822410121754e-05, - "loss": 2.1062, - "step": 9135 - }, - { - "epoch": 1.55, - "grad_norm": 0.22265625, - "learning_rate": 2.9524879685902173e-05, - "loss": 2.112, - "step": 9140 - }, - { - "epoch": 1.55, - "grad_norm": 0.2255859375, - "learning_rate": 2.9420090938074917e-05, - "loss": 2.1231, - "step": 9145 - }, - { - "epoch": 1.55, - "grad_norm": 0.2333984375, - "learning_rate": 2.9315456395585884e-05, - "loss": 2.1256, - "step": 9150 - }, - { - "epoch": 1.55, - "grad_norm": 0.2275390625, - "learning_rate": 2.9210976287044144e-05, - "loss": 2.1237, - "step": 9155 - }, - { - "epoch": 1.55, - "grad_norm": 0.2353515625, - "learning_rate": 2.9106650840721305e-05, - "loss": 2.1511, - "step": 9160 - }, - { - "epoch": 1.55, - "grad_norm": 0.21875, - "learning_rate": 2.9002480284551094e-05, - "loss": 2.1458, - "step": 9165 - }, - { - "epoch": 1.55, - "grad_norm": 0.2255859375, - "learning_rate": 2.8898464846128837e-05, - "loss": 2.1324, - "step": 9170 - }, - { - "epoch": 1.55, - "grad_norm": 0.232421875, - "learning_rate": 2.8794604752710873e-05, - "loss": 2.1192, - "step": 9175 - }, - { - "epoch": 1.55, - "grad_norm": 0.23046875, - "learning_rate": 2.8690900231214224e-05, - "loss": 2.1224, - "step": 9180 - }, - { - "epoch": 1.56, - "grad_norm": 0.2275390625, - "learning_rate": 2.8587351508215997e-05, - "loss": 2.1159, - "step": 9185 - }, - { - "epoch": 1.56, - "grad_norm": 0.2314453125, - "learning_rate": 2.8483958809952883e-05, - "loss": 2.1377, - "step": 9190 - }, - { - "epoch": 1.56, - "grad_norm": 0.232421875, - "learning_rate": 2.838072236232069e-05, - "loss": 2.1342, - "step": 9195 - }, - { - "epoch": 1.56, - "grad_norm": 0.220703125, - "learning_rate": 2.8277642390873904e-05, - "loss": 2.1474, - "step": 9200 - }, - { - "epoch": 1.56, - "grad_norm": 0.2236328125, - "learning_rate": 2.8174719120825e-05, - "loss": 2.0472, - "step": 9205 - }, - { - "epoch": 1.56, - "grad_norm": 0.228515625, - "learning_rate": 2.8071952777044208e-05, - "loss": 2.1405, - "step": 9210 - }, - { - "epoch": 1.56, - "grad_norm": 0.240234375, - "learning_rate": 2.796934358405887e-05, - "loss": 2.135, - "step": 9215 - }, - { - "epoch": 1.56, - "grad_norm": 0.23046875, - "learning_rate": 2.786689176605295e-05, - "loss": 2.178, - "step": 9220 - }, - { - "epoch": 1.56, - "grad_norm": 0.2265625, - "learning_rate": 2.7764597546866656e-05, - "loss": 2.1374, - "step": 9225 - }, - { - "epoch": 1.56, - "grad_norm": 0.2275390625, - "learning_rate": 2.7662461149995723e-05, - "loss": 2.1224, - "step": 9230 - }, - { - "epoch": 1.56, - "grad_norm": 0.2265625, - "learning_rate": 2.7560482798591193e-05, - "loss": 2.0993, - "step": 9235 - }, - { - "epoch": 1.56, - "grad_norm": 0.23046875, - "learning_rate": 2.745866271545876e-05, - "loss": 2.1677, - "step": 9240 - }, - { - "epoch": 1.57, - "grad_norm": 0.2314453125, - "learning_rate": 2.7357001123058358e-05, - "loss": 2.1336, - "step": 9245 - }, - { - "epoch": 1.57, - "grad_norm": 0.2314453125, - "learning_rate": 2.7255498243503607e-05, - "loss": 2.1442, - "step": 9250 - }, - { - "epoch": 1.57, - "grad_norm": 0.2275390625, - "learning_rate": 2.7154154298561407e-05, - "loss": 2.0766, - "step": 9255 - }, - { - "epoch": 1.57, - "grad_norm": 0.2275390625, - "learning_rate": 2.705296950965135e-05, - "loss": 2.1467, - "step": 9260 - }, - { - "epoch": 1.57, - "grad_norm": 0.2333984375, - "learning_rate": 2.695194409784534e-05, - "loss": 2.1041, - "step": 9265 - }, - { - "epoch": 1.57, - "grad_norm": 0.21875, - "learning_rate": 2.685107828386708e-05, - "loss": 2.0962, - "step": 9270 - }, - { - "epoch": 1.57, - "grad_norm": 0.2236328125, - "learning_rate": 2.6750372288091563e-05, - "loss": 2.0952, - "step": 9275 - }, - { - "epoch": 1.57, - "grad_norm": 0.234375, - "learning_rate": 2.6649826330544624e-05, - "loss": 2.1158, - "step": 9280 - }, - { - "epoch": 1.57, - "grad_norm": 0.244140625, - "learning_rate": 2.6549440630902377e-05, - "loss": 2.0895, - "step": 9285 - }, - { - "epoch": 1.57, - "grad_norm": 0.2265625, - "learning_rate": 2.644921540849087e-05, - "loss": 2.1119, - "step": 9290 - }, - { - "epoch": 1.57, - "grad_norm": 0.2236328125, - "learning_rate": 2.6349150882285535e-05, - "loss": 2.1148, - "step": 9295 - }, - { - "epoch": 1.58, - "grad_norm": 0.2294921875, - "learning_rate": 2.6249247270910594e-05, - "loss": 2.0864, - "step": 9300 - }, - { - "epoch": 1.58, - "grad_norm": 0.2236328125, - "learning_rate": 2.614950479263889e-05, - "loss": 2.1098, - "step": 9305 - }, - { - "epoch": 1.58, - "grad_norm": 0.228515625, - "learning_rate": 2.6049923665391108e-05, - "loss": 2.1359, - "step": 9310 - }, - { - "epoch": 1.58, - "grad_norm": 0.2294921875, - "learning_rate": 2.5950504106735353e-05, - "loss": 2.1003, - "step": 9315 - }, - { - "epoch": 1.58, - "grad_norm": 0.228515625, - "learning_rate": 2.5851246333886815e-05, - "loss": 2.1277, - "step": 9320 - }, - { - "epoch": 1.58, - "grad_norm": 0.220703125, - "learning_rate": 2.5752150563707234e-05, - "loss": 2.0998, - "step": 9325 - }, - { - "epoch": 1.58, - "grad_norm": 0.23046875, - "learning_rate": 2.5653217012704244e-05, - "loss": 2.1263, - "step": 9330 - }, - { - "epoch": 1.58, - "grad_norm": 0.234375, - "learning_rate": 2.5554445897031286e-05, - "loss": 2.0996, - "step": 9335 - }, - { - "epoch": 1.58, - "grad_norm": 0.23046875, - "learning_rate": 2.5455837432486707e-05, - "loss": 2.0911, - "step": 9340 - }, - { - "epoch": 1.58, - "grad_norm": 0.228515625, - "learning_rate": 2.5357391834513588e-05, - "loss": 2.1413, - "step": 9345 - }, - { - "epoch": 1.58, - "grad_norm": 0.2275390625, - "learning_rate": 2.5259109318199194e-05, - "loss": 2.1594, - "step": 9350 - }, - { - "epoch": 1.58, - "grad_norm": 0.2265625, - "learning_rate": 2.5160990098274373e-05, - "loss": 2.0828, - "step": 9355 - }, - { - "epoch": 1.59, - "grad_norm": 0.2275390625, - "learning_rate": 2.5063034389113282e-05, - "loss": 2.1489, - "step": 9360 - }, - { - "epoch": 1.59, - "grad_norm": 0.236328125, - "learning_rate": 2.4965242404732892e-05, - "loss": 2.1443, - "step": 9365 - }, - { - "epoch": 1.59, - "grad_norm": 0.228515625, - "learning_rate": 2.48676143587923e-05, - "loss": 2.1547, - "step": 9370 - }, - { - "epoch": 1.59, - "grad_norm": 0.2255859375, - "learning_rate": 2.4770150464592566e-05, - "loss": 2.0968, - "step": 9375 - }, - { - "epoch": 1.59, - "grad_norm": 0.2255859375, - "learning_rate": 2.4672850935076065e-05, - "loss": 2.0872, - "step": 9380 - }, - { - "epoch": 1.59, - "grad_norm": 0.228515625, - "learning_rate": 2.4575715982825997e-05, - "loss": 2.1518, - "step": 9385 - }, - { - "epoch": 1.59, - "grad_norm": 0.2216796875, - "learning_rate": 2.4478745820066084e-05, - "loss": 2.1032, - "step": 9390 - }, - { - "epoch": 1.59, - "grad_norm": 0.23828125, - "learning_rate": 2.4381940658659963e-05, - "loss": 2.102, - "step": 9395 - }, - { - "epoch": 1.59, - "grad_norm": 0.2333984375, - "learning_rate": 2.4285300710110782e-05, - "loss": 2.1821, - "step": 9400 - }, - { - "epoch": 1.59, - "grad_norm": 0.224609375, - "learning_rate": 2.4188826185560743e-05, - "loss": 2.0965, - "step": 9405 - }, - { - "epoch": 1.59, - "grad_norm": 0.2255859375, - "learning_rate": 2.409251729579055e-05, - "loss": 2.1287, - "step": 9410 - }, - { - "epoch": 1.59, - "grad_norm": 0.2314453125, - "learning_rate": 2.399637425121911e-05, - "loss": 2.1487, - "step": 9415 - }, - { - "epoch": 1.6, - "grad_norm": 0.2177734375, - "learning_rate": 2.390039726190295e-05, - "loss": 2.1267, - "step": 9420 - }, - { - "epoch": 1.6, - "grad_norm": 0.2294921875, - "learning_rate": 2.380458653753579e-05, - "loss": 2.1301, - "step": 9425 - }, - { - "epoch": 1.6, - "grad_norm": 0.2314453125, - "learning_rate": 2.370894228744809e-05, - "loss": 2.1212, - "step": 9430 - }, - { - "epoch": 1.6, - "grad_norm": 0.216796875, - "learning_rate": 2.3613464720606637e-05, - "loss": 2.0878, - "step": 9435 - }, - { - "epoch": 1.6, - "grad_norm": 0.2265625, - "learning_rate": 2.351815404561394e-05, - "loss": 2.1501, - "step": 9440 - }, - { - "epoch": 1.6, - "grad_norm": 0.224609375, - "learning_rate": 2.3423010470707972e-05, - "loss": 2.1325, - "step": 9445 - }, - { - "epoch": 1.6, - "grad_norm": 0.2294921875, - "learning_rate": 2.3328034203761582e-05, - "loss": 2.1175, - "step": 9450 - }, - { - "epoch": 1.6, - "grad_norm": 0.228515625, - "learning_rate": 2.323322545228208e-05, - "loss": 2.1565, - "step": 9455 - }, - { - "epoch": 1.6, - "grad_norm": 0.23046875, - "learning_rate": 2.3138584423410823e-05, - "loss": 2.126, - "step": 9460 - }, - { - "epoch": 1.6, - "grad_norm": 0.2275390625, - "learning_rate": 2.3044111323922623e-05, - "loss": 2.1131, - "step": 9465 - }, - { - "epoch": 1.6, - "grad_norm": 0.23046875, - "learning_rate": 2.2949806360225502e-05, - "loss": 2.1226, - "step": 9470 - }, - { - "epoch": 1.6, - "grad_norm": 0.2333984375, - "learning_rate": 2.2855669738360064e-05, - "loss": 2.1327, - "step": 9475 - }, - { - "epoch": 1.61, - "grad_norm": 0.224609375, - "learning_rate": 2.2761701663999158e-05, - "loss": 2.1363, - "step": 9480 - }, - { - "epoch": 1.61, - "grad_norm": 0.2275390625, - "learning_rate": 2.2667902342447356e-05, - "loss": 2.0965, - "step": 9485 - }, - { - "epoch": 1.61, - "grad_norm": 0.2373046875, - "learning_rate": 2.2574271978640572e-05, - "loss": 2.1373, - "step": 9490 - }, - { - "epoch": 1.61, - "grad_norm": 0.2236328125, - "learning_rate": 2.248081077714549e-05, - "loss": 2.1131, - "step": 9495 - }, - { - "epoch": 1.61, - "grad_norm": 0.224609375, - "learning_rate": 2.2387518942159292e-05, - "loss": 2.1056, - "step": 9500 - }, - { - "epoch": 1.61, - "grad_norm": 0.2294921875, - "learning_rate": 2.2294396677509078e-05, - "loss": 2.1192, - "step": 9505 - }, - { - "epoch": 1.61, - "grad_norm": 0.2294921875, - "learning_rate": 2.2201444186651487e-05, - "loss": 2.1341, - "step": 9510 - }, - { - "epoch": 1.61, - "grad_norm": 0.224609375, - "learning_rate": 2.210866167267225e-05, - "loss": 2.0922, - "step": 9515 - }, - { - "epoch": 1.61, - "grad_norm": 0.22265625, - "learning_rate": 2.2016049338285628e-05, - "loss": 2.1433, - "step": 9520 - }, - { - "epoch": 1.61, - "grad_norm": 0.2275390625, - "learning_rate": 2.1923607385834167e-05, - "loss": 2.1042, - "step": 9525 - }, - { - "epoch": 1.61, - "grad_norm": 0.2265625, - "learning_rate": 2.1831336017288174e-05, - "loss": 2.0894, - "step": 9530 - }, - { - "epoch": 1.61, - "grad_norm": 0.228515625, - "learning_rate": 2.1739235434245097e-05, - "loss": 2.1704, - "step": 9535 - }, - { - "epoch": 1.62, - "grad_norm": 0.234375, - "learning_rate": 2.1647305837929466e-05, - "loss": 2.0889, - "step": 9540 - }, - { - "epoch": 1.62, - "grad_norm": 0.2333984375, - "learning_rate": 2.1555547429192112e-05, - "loss": 2.0969, - "step": 9545 - }, - { - "epoch": 1.62, - "grad_norm": 0.2265625, - "learning_rate": 2.1463960408509832e-05, - "loss": 2.136, - "step": 9550 - }, - { - "epoch": 1.62, - "grad_norm": 0.2236328125, - "learning_rate": 2.137254497598501e-05, - "loss": 2.1246, - "step": 9555 - }, - { - "epoch": 1.62, - "grad_norm": 0.228515625, - "learning_rate": 2.128130133134516e-05, - "loss": 2.1073, - "step": 9560 - }, - { - "epoch": 1.62, - "grad_norm": 0.224609375, - "learning_rate": 2.1190229673942363e-05, - "loss": 2.142, - "step": 9565 - }, - { - "epoch": 1.62, - "grad_norm": 0.236328125, - "learning_rate": 2.109933020275312e-05, - "loss": 2.1124, - "step": 9570 - }, - { - "epoch": 1.62, - "grad_norm": 0.2236328125, - "learning_rate": 2.1008603116377545e-05, - "loss": 2.1026, - "step": 9575 - }, - { - "epoch": 1.62, - "grad_norm": 0.2265625, - "learning_rate": 2.091804861303922e-05, - "loss": 2.1151, - "step": 9580 - }, - { - "epoch": 1.62, - "grad_norm": 0.2294921875, - "learning_rate": 2.0827666890584685e-05, - "loss": 2.0735, - "step": 9585 - }, - { - "epoch": 1.62, - "grad_norm": 0.2265625, - "learning_rate": 2.073745814648287e-05, - "loss": 2.119, - "step": 9590 - }, - { - "epoch": 1.63, - "grad_norm": 0.228515625, - "learning_rate": 2.0647422577824882e-05, - "loss": 2.1127, - "step": 9595 - }, - { - "epoch": 1.63, - "grad_norm": 0.224609375, - "learning_rate": 2.0557560381323437e-05, - "loss": 2.1275, - "step": 9600 - }, - { - "epoch": 1.63, - "grad_norm": 0.236328125, - "learning_rate": 2.046787175331244e-05, - "loss": 2.1583, - "step": 9605 - }, - { - "epoch": 1.63, - "grad_norm": 0.2236328125, - "learning_rate": 2.037835688974662e-05, - "loss": 2.1137, - "step": 9610 - }, - { - "epoch": 1.63, - "grad_norm": 0.2265625, - "learning_rate": 2.0289015986201043e-05, - "loss": 2.086, - "step": 9615 - }, - { - "epoch": 1.63, - "grad_norm": 0.23046875, - "learning_rate": 2.019984923787065e-05, - "loss": 2.1226, - "step": 9620 - }, - { - "epoch": 1.63, - "grad_norm": 0.228515625, - "learning_rate": 2.0110856839569947e-05, - "loss": 2.1492, - "step": 9625 - }, - { - "epoch": 1.63, - "grad_norm": 0.232421875, - "learning_rate": 2.0022038985732495e-05, - "loss": 2.1303, - "step": 9630 - }, - { - "epoch": 1.63, - "grad_norm": 0.228515625, - "learning_rate": 1.99333958704105e-05, - "loss": 2.1495, - "step": 9635 - }, - { - "epoch": 1.63, - "grad_norm": 0.2333984375, - "learning_rate": 1.984492768727443e-05, - "loss": 2.1262, - "step": 9640 - }, - { - "epoch": 1.63, - "grad_norm": 0.2294921875, - "learning_rate": 1.9756634629612447e-05, - "loss": 2.1363, - "step": 9645 - }, - { - "epoch": 1.63, - "grad_norm": 0.234375, - "learning_rate": 1.9668516890330212e-05, - "loss": 2.1487, - "step": 9650 - }, - { - "epoch": 1.64, - "grad_norm": 0.224609375, - "learning_rate": 1.95805746619503e-05, - "loss": 2.1058, - "step": 9655 - }, - { - "epoch": 1.64, - "grad_norm": 0.2314453125, - "learning_rate": 1.9492808136611818e-05, - "loss": 2.1014, - "step": 9660 - }, - { - "epoch": 1.64, - "grad_norm": 0.2294921875, - "learning_rate": 1.9405217506069994e-05, - "loss": 2.1296, - "step": 9665 - }, - { - "epoch": 1.64, - "grad_norm": 0.2265625, - "learning_rate": 1.9317802961695786e-05, - "loss": 2.1045, - "step": 9670 - }, - { - "epoch": 1.64, - "grad_norm": 0.224609375, - "learning_rate": 1.923056469447535e-05, - "loss": 2.1638, - "step": 9675 - }, - { - "epoch": 1.64, - "grad_norm": 0.2216796875, - "learning_rate": 1.914350289500979e-05, - "loss": 2.1128, - "step": 9680 - }, - { - "epoch": 1.64, - "grad_norm": 0.2255859375, - "learning_rate": 1.9056617753514628e-05, - "loss": 2.1096, - "step": 9685 - }, - { - "epoch": 1.64, - "grad_norm": 0.232421875, - "learning_rate": 1.8969909459819412e-05, - "loss": 2.1324, - "step": 9690 - }, - { - "epoch": 1.64, - "grad_norm": 0.228515625, - "learning_rate": 1.888337820336735e-05, - "loss": 2.1221, - "step": 9695 - }, - { - "epoch": 1.64, - "grad_norm": 0.2236328125, - "learning_rate": 1.879702417321475e-05, - "loss": 2.112, - "step": 9700 - }, - { - "epoch": 1.64, - "grad_norm": 0.23046875, - "learning_rate": 1.871084755803082e-05, - "loss": 2.137, - "step": 9705 - }, - { - "epoch": 1.64, - "grad_norm": 0.2255859375, - "learning_rate": 1.8624848546097086e-05, - "loss": 2.1575, - "step": 9710 - }, - { - "epoch": 1.65, - "grad_norm": 0.2294921875, - "learning_rate": 1.8539027325307056e-05, - "loss": 2.1784, - "step": 9715 - }, - { - "epoch": 1.65, - "grad_norm": 0.23046875, - "learning_rate": 1.8453384083165803e-05, - "loss": 2.0949, - "step": 9720 - }, - { - "epoch": 1.65, - "grad_norm": 0.224609375, - "learning_rate": 1.8367919006789558e-05, - "loss": 2.1114, - "step": 9725 - }, - { - "epoch": 1.65, - "grad_norm": 0.2236328125, - "learning_rate": 1.828263228290522e-05, - "loss": 2.1596, - "step": 9730 - }, - { - "epoch": 1.65, - "grad_norm": 0.2265625, - "learning_rate": 1.8197524097850095e-05, - "loss": 2.1079, - "step": 9735 - }, - { - "epoch": 1.65, - "grad_norm": 0.2275390625, - "learning_rate": 1.8112594637571366e-05, - "loss": 2.0991, - "step": 9740 - }, - { - "epoch": 1.65, - "grad_norm": 0.2353515625, - "learning_rate": 1.802784408762578e-05, - "loss": 2.1254, - "step": 9745 - }, - { - "epoch": 1.65, - "grad_norm": 0.224609375, - "learning_rate": 1.7943272633179166e-05, - "loss": 2.0966, - "step": 9750 - }, - { - "epoch": 1.65, - "grad_norm": 0.234375, - "learning_rate": 1.7858880459006e-05, - "loss": 2.1437, - "step": 9755 - }, - { - "epoch": 1.65, - "grad_norm": 0.2275390625, - "learning_rate": 1.777466774948916e-05, - "loss": 2.1718, - "step": 9760 - }, - { - "epoch": 1.65, - "grad_norm": 0.22265625, - "learning_rate": 1.769063468861941e-05, - "loss": 2.1158, - "step": 9765 - }, - { - "epoch": 1.65, - "grad_norm": 0.21875, - "learning_rate": 1.7606781459994913e-05, - "loss": 2.0889, - "step": 9770 - }, - { - "epoch": 1.66, - "grad_norm": 0.2421875, - "learning_rate": 1.7523108246821017e-05, - "loss": 2.1166, - "step": 9775 - }, - { - "epoch": 1.66, - "grad_norm": 0.2265625, - "learning_rate": 1.743961523190981e-05, - "loss": 2.0749, - "step": 9780 - }, - { - "epoch": 1.66, - "grad_norm": 0.23046875, - "learning_rate": 1.7356302597679554e-05, - "loss": 2.1447, - "step": 9785 - }, - { - "epoch": 1.66, - "grad_norm": 0.2255859375, - "learning_rate": 1.727317052615447e-05, - "loss": 2.121, - "step": 9790 - }, - { - "epoch": 1.66, - "grad_norm": 0.224609375, - "learning_rate": 1.719021919896433e-05, - "loss": 2.0826, - "step": 9795 - }, - { - "epoch": 1.66, - "grad_norm": 0.228515625, - "learning_rate": 1.7107448797343893e-05, - "loss": 2.102, - "step": 9800 - }, - { - "epoch": 1.66, - "grad_norm": 0.2314453125, - "learning_rate": 1.7024859502132696e-05, - "loss": 2.1022, - "step": 9805 - }, - { - "epoch": 1.66, - "grad_norm": 0.22265625, - "learning_rate": 1.6942451493774657e-05, - "loss": 2.0963, - "step": 9810 - }, - { - "epoch": 1.66, - "grad_norm": 0.2294921875, - "learning_rate": 1.6860224952317473e-05, - "loss": 2.1186, - "step": 9815 - }, - { - "epoch": 1.66, - "grad_norm": 0.236328125, - "learning_rate": 1.6778180057412486e-05, - "loss": 2.1112, - "step": 9820 - }, - { - "epoch": 1.66, - "grad_norm": 0.220703125, - "learning_rate": 1.6696316988314043e-05, - "loss": 2.1388, - "step": 9825 - }, - { - "epoch": 1.66, - "grad_norm": 0.232421875, - "learning_rate": 1.6614635923879362e-05, - "loss": 2.1583, - "step": 9830 - }, - { - "epoch": 1.67, - "grad_norm": 0.228515625, - "learning_rate": 1.6533137042567936e-05, - "loss": 2.1003, - "step": 9835 - }, - { - "epoch": 1.67, - "grad_norm": 0.220703125, - "learning_rate": 1.645182052244124e-05, - "loss": 2.111, - "step": 9840 - }, - { - "epoch": 1.67, - "grad_norm": 0.2333984375, - "learning_rate": 1.6370686541162327e-05, - "loss": 2.122, - "step": 9845 - }, - { - "epoch": 1.67, - "grad_norm": 0.2265625, - "learning_rate": 1.6289735275995433e-05, - "loss": 2.0957, - "step": 9850 - }, - { - "epoch": 1.67, - "grad_norm": 0.2294921875, - "learning_rate": 1.6208966903805555e-05, - "loss": 2.0987, - "step": 9855 - }, - { - "epoch": 1.67, - "grad_norm": 0.228515625, - "learning_rate": 1.6128381601058128e-05, - "loss": 2.0697, - "step": 9860 - }, - { - "epoch": 1.67, - "grad_norm": 0.2314453125, - "learning_rate": 1.6047979543818624e-05, - "loss": 2.1318, - "step": 9865 - }, - { - "epoch": 1.67, - "grad_norm": 0.2236328125, - "learning_rate": 1.5967760907752115e-05, - "loss": 2.1134, - "step": 9870 - }, - { - "epoch": 1.67, - "grad_norm": 0.2333984375, - "learning_rate": 1.5887725868123006e-05, - "loss": 2.1264, - "step": 9875 - }, - { - "epoch": 1.67, - "grad_norm": 0.232421875, - "learning_rate": 1.580787459979446e-05, - "loss": 2.0945, - "step": 9880 - }, - { - "epoch": 1.67, - "grad_norm": 0.2255859375, - "learning_rate": 1.57282072772282e-05, - "loss": 2.0919, - "step": 9885 - }, - { - "epoch": 1.67, - "grad_norm": 0.2236328125, - "learning_rate": 1.5648724074484056e-05, - "loss": 2.1147, - "step": 9890 - }, - { - "epoch": 1.68, - "grad_norm": 0.2294921875, - "learning_rate": 1.5569425165219586e-05, - "loss": 2.107, - "step": 9895 - }, - { - "epoch": 1.68, - "grad_norm": 0.232421875, - "learning_rate": 1.5490310722689693e-05, - "loss": 2.0979, - "step": 9900 - }, - { - "epoch": 1.68, - "grad_norm": 0.2353515625, - "learning_rate": 1.5411380919746255e-05, - "loss": 2.0866, - "step": 9905 - }, - { - "epoch": 1.68, - "grad_norm": 0.228515625, - "learning_rate": 1.5332635928837714e-05, - "loss": 2.1099, - "step": 9910 - }, - { - "epoch": 1.68, - "grad_norm": 0.2314453125, - "learning_rate": 1.5254075922008748e-05, - "loss": 2.1573, - "step": 9915 - }, - { - "epoch": 1.68, - "grad_norm": 0.228515625, - "learning_rate": 1.5175701070899896e-05, - "loss": 2.134, - "step": 9920 - }, - { - "epoch": 1.68, - "grad_norm": 0.23046875, - "learning_rate": 1.5097511546747146e-05, - "loss": 2.1199, - "step": 9925 - }, - { - "epoch": 1.68, - "grad_norm": 0.224609375, - "learning_rate": 1.501950752038158e-05, - "loss": 2.1321, - "step": 9930 - }, - { - "epoch": 1.68, - "grad_norm": 0.2294921875, - "learning_rate": 1.4941689162228977e-05, - "loss": 2.1165, - "step": 9935 - }, - { - "epoch": 1.68, - "grad_norm": 0.228515625, - "learning_rate": 1.4864056642309499e-05, - "loss": 2.1185, - "step": 9940 - }, - { - "epoch": 1.68, - "grad_norm": 0.224609375, - "learning_rate": 1.4786610130237244e-05, - "loss": 2.1314, - "step": 9945 - }, - { - "epoch": 1.69, - "grad_norm": 0.228515625, - "learning_rate": 1.4709349795219939e-05, - "loss": 2.0686, - "step": 9950 - }, - { - "epoch": 1.69, - "grad_norm": 0.2353515625, - "learning_rate": 1.4632275806058559e-05, - "loss": 2.1141, - "step": 9955 - }, - { - "epoch": 1.69, - "grad_norm": 0.2333984375, - "learning_rate": 1.4555388331146924e-05, - "loss": 2.1641, - "step": 9960 - }, - { - "epoch": 1.69, - "grad_norm": 0.2294921875, - "learning_rate": 1.4478687538471313e-05, - "loss": 2.0876, - "step": 9965 - }, - { - "epoch": 1.69, - "grad_norm": 0.224609375, - "learning_rate": 1.4402173595610213e-05, - "loss": 2.132, - "step": 9970 - }, - { - "epoch": 1.69, - "grad_norm": 0.2275390625, - "learning_rate": 1.4325846669733844e-05, - "loss": 2.0967, - "step": 9975 - }, - { - "epoch": 1.69, - "grad_norm": 0.2333984375, - "learning_rate": 1.4249706927603756e-05, - "loss": 2.1232, - "step": 9980 - }, - { - "epoch": 1.69, - "grad_norm": 0.2265625, - "learning_rate": 1.4173754535572658e-05, - "loss": 2.0908, - "step": 9985 - }, - { - "epoch": 1.69, - "grad_norm": 0.2236328125, - "learning_rate": 1.4097989659583876e-05, - "loss": 2.1086, - "step": 9990 - }, - { - "epoch": 1.69, - "grad_norm": 0.22265625, - "learning_rate": 1.4022412465170987e-05, - "loss": 2.117, - "step": 9995 - }, - { - "epoch": 1.69, - "grad_norm": 0.23046875, - "learning_rate": 1.3947023117457613e-05, - "loss": 2.1503, - "step": 10000 - }, - { - "epoch": 1.69, - "grad_norm": 0.236328125, - "learning_rate": 1.3871821781156858e-05, - "loss": 2.1238, - "step": 10005 - }, - { - "epoch": 1.7, - "grad_norm": 0.2294921875, - "learning_rate": 1.3796808620571121e-05, - "loss": 2.124, - "step": 10010 - }, - { - "epoch": 1.7, - "grad_norm": 0.228515625, - "learning_rate": 1.3721983799591732e-05, - "loss": 2.1265, - "step": 10015 - }, - { - "epoch": 1.7, - "grad_norm": 0.240234375, - "learning_rate": 1.3647347481698358e-05, - "loss": 2.1128, - "step": 10020 - }, - { - "epoch": 1.7, - "grad_norm": 0.22265625, - "learning_rate": 1.3572899829958963e-05, - "loss": 2.109, - "step": 10025 - }, - { - "epoch": 1.7, - "grad_norm": 0.2197265625, - "learning_rate": 1.3498641007029278e-05, - "loss": 2.1203, - "step": 10030 - }, - { - "epoch": 1.7, - "grad_norm": 0.232421875, - "learning_rate": 1.342457117515239e-05, - "loss": 2.1492, - "step": 10035 - }, - { - "epoch": 1.7, - "grad_norm": 0.2216796875, - "learning_rate": 1.3350690496158558e-05, - "loss": 2.0852, - "step": 10040 - }, - { - "epoch": 1.7, - "grad_norm": 0.2255859375, - "learning_rate": 1.3276999131464818e-05, - "loss": 2.1232, - "step": 10045 - }, - { - "epoch": 1.7, - "grad_norm": 0.228515625, - "learning_rate": 1.3203497242074437e-05, - "loss": 2.1541, - "step": 10050 - }, - { - "epoch": 1.7, - "grad_norm": 0.2314453125, - "learning_rate": 1.3130184988576855e-05, - "loss": 2.1114, - "step": 10055 - }, - { - "epoch": 1.7, - "grad_norm": 0.2265625, - "learning_rate": 1.3057062531147068e-05, - "loss": 2.0998, - "step": 10060 - }, - { - "epoch": 1.7, - "grad_norm": 0.2294921875, - "learning_rate": 1.2984130029545494e-05, - "loss": 2.1038, - "step": 10065 - }, - { - "epoch": 1.71, - "grad_norm": 0.220703125, - "learning_rate": 1.291138764311749e-05, - "loss": 2.135, - "step": 10070 - }, - { - "epoch": 1.71, - "grad_norm": 0.2236328125, - "learning_rate": 1.2838835530793048e-05, - "loss": 2.1491, - "step": 10075 - }, - { - "epoch": 1.71, - "grad_norm": 0.2294921875, - "learning_rate": 1.2766473851086435e-05, - "loss": 2.1368, - "step": 10080 - }, - { - "epoch": 1.71, - "grad_norm": 0.228515625, - "learning_rate": 1.2694302762095889e-05, - "loss": 2.0915, - "step": 10085 - }, - { - "epoch": 1.71, - "grad_norm": 0.2255859375, - "learning_rate": 1.2622322421503174e-05, - "loss": 2.1016, - "step": 10090 - }, - { - "epoch": 1.71, - "grad_norm": 0.2236328125, - "learning_rate": 1.2550532986573349e-05, - "loss": 2.1309, - "step": 10095 - }, - { - "epoch": 1.71, - "grad_norm": 0.23046875, - "learning_rate": 1.2478934614154359e-05, - "loss": 2.1227, - "step": 10100 - }, - { - "epoch": 1.71, - "grad_norm": 0.232421875, - "learning_rate": 1.2407527460676727e-05, - "loss": 2.1593, - "step": 10105 - }, - { - "epoch": 1.71, - "grad_norm": 0.228515625, - "learning_rate": 1.2336311682153201e-05, - "loss": 2.1171, - "step": 10110 - }, - { - "epoch": 1.71, - "grad_norm": 0.224609375, - "learning_rate": 1.2265287434178352e-05, - "loss": 2.0602, - "step": 10115 - }, - { - "epoch": 1.71, - "grad_norm": 0.2236328125, - "learning_rate": 1.2194454871928329e-05, - "loss": 2.099, - "step": 10120 - }, - { - "epoch": 1.71, - "grad_norm": 0.2314453125, - "learning_rate": 1.2123814150160484e-05, - "loss": 2.0976, - "step": 10125 - }, - { - "epoch": 1.72, - "grad_norm": 0.2333984375, - "learning_rate": 1.2053365423213026e-05, - "loss": 2.1502, - "step": 10130 - }, - { - "epoch": 1.72, - "grad_norm": 0.228515625, - "learning_rate": 1.1983108845004675e-05, - "loss": 2.1327, - "step": 10135 - }, - { - "epoch": 1.72, - "grad_norm": 0.224609375, - "learning_rate": 1.1913044569034382e-05, - "loss": 2.1257, - "step": 10140 - }, - { - "epoch": 1.72, - "grad_norm": 0.220703125, - "learning_rate": 1.1843172748380848e-05, - "loss": 2.1449, - "step": 10145 - }, - { - "epoch": 1.72, - "grad_norm": 0.2314453125, - "learning_rate": 1.1773493535702385e-05, - "loss": 2.0872, - "step": 10150 - }, - { - "epoch": 1.72, - "grad_norm": 0.2216796875, - "learning_rate": 1.1704007083236457e-05, - "loss": 2.1356, - "step": 10155 - }, - { - "epoch": 1.72, - "grad_norm": 0.2236328125, - "learning_rate": 1.1634713542799402e-05, - "loss": 2.1342, - "step": 10160 - }, - { - "epoch": 1.72, - "grad_norm": 0.23046875, - "learning_rate": 1.1565613065786029e-05, - "loss": 2.1246, - "step": 10165 - }, - { - "epoch": 1.72, - "grad_norm": 0.2275390625, - "learning_rate": 1.1496705803169405e-05, - "loss": 2.1233, - "step": 10170 - }, - { - "epoch": 1.72, - "grad_norm": 0.23046875, - "learning_rate": 1.1427991905500369e-05, - "loss": 2.1482, - "step": 10175 - }, - { - "epoch": 1.72, - "grad_norm": 0.234375, - "learning_rate": 1.1359471522907361e-05, - "loss": 2.1573, - "step": 10180 - }, - { - "epoch": 1.72, - "grad_norm": 0.228515625, - "learning_rate": 1.1291144805095954e-05, - "loss": 2.1015, - "step": 10185 - }, - { - "epoch": 1.73, - "grad_norm": 0.2236328125, - "learning_rate": 1.12230119013487e-05, - "loss": 2.1213, - "step": 10190 - }, - { - "epoch": 1.73, - "grad_norm": 0.2275390625, - "learning_rate": 1.1155072960524626e-05, - "loss": 2.1287, - "step": 10195 - }, - { - "epoch": 1.73, - "grad_norm": 0.2275390625, - "learning_rate": 1.1087328131058961e-05, - "loss": 2.1512, - "step": 10200 - }, - { - "epoch": 1.73, - "grad_norm": 0.2294921875, - "learning_rate": 1.1019777560962885e-05, - "loss": 2.1717, - "step": 10205 - }, - { - "epoch": 1.73, - "grad_norm": 0.2236328125, - "learning_rate": 1.0952421397823165e-05, - "loss": 2.1036, - "step": 10210 - }, - { - "epoch": 1.73, - "grad_norm": 0.228515625, - "learning_rate": 1.0885259788801716e-05, - "loss": 2.1408, - "step": 10215 - }, - { - "epoch": 1.73, - "grad_norm": 0.224609375, - "learning_rate": 1.0818292880635528e-05, - "loss": 2.1403, - "step": 10220 - }, - { - "epoch": 1.73, - "grad_norm": 0.228515625, - "learning_rate": 1.0751520819636141e-05, - "loss": 2.1093, - "step": 10225 - }, - { - "epoch": 1.73, - "grad_norm": 0.232421875, - "learning_rate": 1.0684943751689336e-05, - "loss": 2.1154, - "step": 10230 - }, - { - "epoch": 1.73, - "grad_norm": 0.234375, - "learning_rate": 1.0618561822254935e-05, - "loss": 2.1379, - "step": 10235 - }, - { - "epoch": 1.73, - "grad_norm": 0.23046875, - "learning_rate": 1.0552375176366369e-05, - "loss": 2.1437, - "step": 10240 - }, - { - "epoch": 1.74, - "grad_norm": 0.2451171875, - "learning_rate": 1.048638395863043e-05, - "loss": 2.0852, - "step": 10245 - }, - { - "epoch": 1.74, - "grad_norm": 0.2236328125, - "learning_rate": 1.0420588313226975e-05, - "loss": 2.1063, - "step": 10250 - }, - { - "epoch": 1.74, - "grad_norm": 0.23046875, - "learning_rate": 1.0354988383908482e-05, - "loss": 2.1128, - "step": 10255 - }, - { - "epoch": 1.74, - "grad_norm": 0.232421875, - "learning_rate": 1.0289584313999867e-05, - "loss": 2.1065, - "step": 10260 - }, - { - "epoch": 1.74, - "grad_norm": 0.228515625, - "learning_rate": 1.0224376246398148e-05, - "loss": 2.114, - "step": 10265 - }, - { - "epoch": 1.74, - "grad_norm": 0.2265625, - "learning_rate": 1.0159364323572052e-05, - "loss": 2.1456, - "step": 10270 - }, - { - "epoch": 1.74, - "grad_norm": 0.22265625, - "learning_rate": 1.0094548687561777e-05, - "loss": 2.0623, - "step": 10275 - }, - { - "epoch": 1.74, - "grad_norm": 0.232421875, - "learning_rate": 1.0029929479978773e-05, - "loss": 2.08, - "step": 10280 - }, - { - "epoch": 1.74, - "grad_norm": 0.224609375, - "learning_rate": 9.965506842005145e-06, - "loss": 2.1093, - "step": 10285 - }, - { - "epoch": 1.74, - "grad_norm": 0.2314453125, - "learning_rate": 9.901280914393696e-06, - "loss": 2.0921, - "step": 10290 - }, - { - "epoch": 1.74, - "grad_norm": 0.2275390625, - "learning_rate": 9.83725183746731e-06, - "loss": 2.1175, - "step": 10295 - }, - { - "epoch": 1.74, - "grad_norm": 0.2265625, - "learning_rate": 9.773419751118872e-06, - "loss": 2.1462, - "step": 10300 - }, - { - "epoch": 1.75, - "grad_norm": 0.23046875, - "learning_rate": 9.70978479481085e-06, - "loss": 2.1439, - "step": 10305 - }, - { - "epoch": 1.75, - "grad_norm": 0.2236328125, - "learning_rate": 9.646347107575037e-06, - "loss": 2.1056, - "step": 10310 - }, - { - "epoch": 1.75, - "grad_norm": 0.2333984375, - "learning_rate": 9.58310682801219e-06, - "loss": 2.1516, - "step": 10315 - }, - { - "epoch": 1.75, - "grad_norm": 0.2216796875, - "learning_rate": 9.520064094291791e-06, - "loss": 2.1227, - "step": 10320 - }, - { - "epoch": 1.75, - "grad_norm": 0.224609375, - "learning_rate": 9.457219044151689e-06, - "loss": 2.125, - "step": 10325 - }, - { - "epoch": 1.75, - "grad_norm": 0.2265625, - "learning_rate": 9.394571814897856e-06, - "loss": 2.1679, - "step": 10330 - }, - { - "epoch": 1.75, - "grad_norm": 0.2314453125, - "learning_rate": 9.332122543404031e-06, - "loss": 2.1152, - "step": 10335 - }, - { - "epoch": 1.75, - "grad_norm": 0.224609375, - "learning_rate": 9.269871366111494e-06, - "loss": 2.1207, - "step": 10340 - }, - { - "epoch": 1.75, - "grad_norm": 0.2265625, - "learning_rate": 9.207818419028669e-06, - "loss": 2.1568, - "step": 10345 - }, - { - "epoch": 1.75, - "grad_norm": 0.2314453125, - "learning_rate": 9.14596383773093e-06, - "loss": 2.1264, - "step": 10350 - }, - { - "epoch": 1.75, - "grad_norm": 0.2177734375, - "learning_rate": 9.0843077573602e-06, - "loss": 2.1534, - "step": 10355 - }, - { - "epoch": 1.75, - "grad_norm": 0.228515625, - "learning_rate": 9.02285031262473e-06, - "loss": 2.1215, - "step": 10360 - }, - { - "epoch": 1.76, - "grad_norm": 0.2294921875, - "learning_rate": 8.961591637798827e-06, - "loss": 2.1418, - "step": 10365 - }, - { - "epoch": 1.76, - "grad_norm": 0.2216796875, - "learning_rate": 8.900531866722472e-06, - "loss": 2.1256, - "step": 10370 - }, - { - "epoch": 1.76, - "grad_norm": 0.228515625, - "learning_rate": 8.839671132801097e-06, - "loss": 2.143, - "step": 10375 - }, - { - "epoch": 1.76, - "grad_norm": 0.2255859375, - "learning_rate": 8.779009569005236e-06, - "loss": 2.1145, - "step": 10380 - }, - { - "epoch": 1.76, - "grad_norm": 0.2265625, - "learning_rate": 8.718547307870316e-06, - "loss": 2.1316, - "step": 10385 - }, - { - "epoch": 1.76, - "grad_norm": 0.228515625, - "learning_rate": 8.658284481496303e-06, - "loss": 2.165, - "step": 10390 - }, - { - "epoch": 1.76, - "grad_norm": 0.2412109375, - "learning_rate": 8.59822122154741e-06, - "loss": 2.1197, - "step": 10395 - }, - { - "epoch": 1.76, - "grad_norm": 0.228515625, - "learning_rate": 8.538357659251872e-06, - "loss": 2.1258, - "step": 10400 - }, - { - "epoch": 1.76, - "grad_norm": 0.2236328125, - "learning_rate": 8.478693925401604e-06, - "loss": 2.1139, - "step": 10405 - }, - { - "epoch": 1.76, - "grad_norm": 0.2275390625, - "learning_rate": 8.419230150351886e-06, - "loss": 2.1272, - "step": 10410 - }, - { - "epoch": 1.76, - "grad_norm": 0.22265625, - "learning_rate": 8.359966464021196e-06, - "loss": 2.1235, - "step": 10415 - }, - { - "epoch": 1.76, - "grad_norm": 0.234375, - "learning_rate": 8.300902995890747e-06, - "loss": 2.1193, - "step": 10420 - }, - { - "epoch": 1.77, - "grad_norm": 0.2353515625, - "learning_rate": 8.242039875004437e-06, - "loss": 2.1293, - "step": 10425 - }, - { - "epoch": 1.77, - "grad_norm": 0.2265625, - "learning_rate": 8.18337722996837e-06, - "loss": 2.1085, - "step": 10430 - }, - { - "epoch": 1.77, - "grad_norm": 0.228515625, - "learning_rate": 8.124915188950611e-06, - "loss": 2.1161, - "step": 10435 - }, - { - "epoch": 1.77, - "grad_norm": 0.228515625, - "learning_rate": 8.066653879680997e-06, - "loss": 2.0748, - "step": 10440 - }, - { - "epoch": 1.77, - "grad_norm": 0.2333984375, - "learning_rate": 8.008593429450806e-06, - "loss": 2.1358, - "step": 10445 - }, - { - "epoch": 1.77, - "grad_norm": 0.21875, - "learning_rate": 7.950733965112378e-06, - "loss": 2.1242, - "step": 10450 - }, - { - "epoch": 1.77, - "grad_norm": 0.2294921875, - "learning_rate": 7.893075613079048e-06, - "loss": 2.1048, - "step": 10455 - }, - { - "epoch": 1.77, - "grad_norm": 0.2236328125, - "learning_rate": 7.835618499324726e-06, - "loss": 2.0658, - "step": 10460 - }, - { - "epoch": 1.77, - "grad_norm": 0.2275390625, - "learning_rate": 7.778362749383571e-06, - "loss": 2.1162, - "step": 10465 - }, - { - "epoch": 1.77, - "grad_norm": 0.2275390625, - "learning_rate": 7.72130848834991e-06, - "loss": 2.148, - "step": 10470 - }, - { - "epoch": 1.77, - "grad_norm": 0.2275390625, - "learning_rate": 7.66445584087776e-06, - "loss": 2.1371, - "step": 10475 - }, - { - "epoch": 1.77, - "grad_norm": 0.224609375, - "learning_rate": 7.607804931180662e-06, - "loss": 2.0816, - "step": 10480 - }, - { - "epoch": 1.78, - "grad_norm": 0.2294921875, - "learning_rate": 7.5513558830314745e-06, - "loss": 2.1102, - "step": 10485 - }, - { - "epoch": 1.78, - "grad_norm": 0.228515625, - "learning_rate": 7.495108819761898e-06, - "loss": 2.1227, - "step": 10490 - }, - { - "epoch": 1.78, - "grad_norm": 0.23046875, - "learning_rate": 7.43906386426243e-06, - "loss": 2.1205, - "step": 10495 - }, - { - "epoch": 1.78, - "grad_norm": 0.2255859375, - "learning_rate": 7.383221138981966e-06, - "loss": 2.1385, - "step": 10500 - }, - { - "epoch": 1.78, - "grad_norm": 0.228515625, - "learning_rate": 7.3275807659275e-06, - "loss": 2.0769, - "step": 10505 - }, - { - "epoch": 1.78, - "grad_norm": 0.2236328125, - "learning_rate": 7.272142866664023e-06, - "loss": 2.1113, - "step": 10510 - }, - { - "epoch": 1.78, - "grad_norm": 0.2294921875, - "learning_rate": 7.216907562314079e-06, - "loss": 2.1326, - "step": 10515 - }, - { - "epoch": 1.78, - "grad_norm": 0.2197265625, - "learning_rate": 7.161874973557625e-06, - "loss": 2.1203, - "step": 10520 - }, - { - "epoch": 1.78, - "grad_norm": 0.2255859375, - "learning_rate": 7.107045220631692e-06, - "loss": 2.1155, - "step": 10525 - }, - { - "epoch": 1.78, - "grad_norm": 0.2255859375, - "learning_rate": 7.05241842333012e-06, - "loss": 2.1306, - "step": 10530 - }, - { - "epoch": 1.78, - "grad_norm": 0.21875, - "learning_rate": 6.9979947010033965e-06, - "loss": 2.1211, - "step": 10535 - }, - { - "epoch": 1.79, - "grad_norm": 0.2265625, - "learning_rate": 6.943774172558259e-06, - "loss": 2.1107, - "step": 10540 - }, - { - "epoch": 1.79, - "grad_norm": 0.228515625, - "learning_rate": 6.889756956457538e-06, - "loss": 2.1414, - "step": 10545 - }, - { - "epoch": 1.79, - "grad_norm": 0.2333984375, - "learning_rate": 6.835943170719839e-06, - "loss": 2.106, - "step": 10550 - }, - { - "epoch": 1.79, - "grad_norm": 0.2265625, - "learning_rate": 6.782332932919344e-06, - "loss": 2.085, - "step": 10555 - }, - { - "epoch": 1.79, - "grad_norm": 0.220703125, - "learning_rate": 6.72892636018545e-06, - "loss": 2.113, - "step": 10560 - }, - { - "epoch": 1.79, - "grad_norm": 0.2294921875, - "learning_rate": 6.6757235692026295e-06, - "loss": 2.1459, - "step": 10565 - }, - { - "epoch": 1.79, - "grad_norm": 0.2314453125, - "learning_rate": 6.622724676210113e-06, - "loss": 2.1171, - "step": 10570 - }, - { - "epoch": 1.79, - "grad_norm": 0.228515625, - "learning_rate": 6.569929797001651e-06, - "loss": 2.0854, - "step": 10575 - }, - { - "epoch": 1.79, - "grad_norm": 0.23046875, - "learning_rate": 6.517339046925264e-06, - "loss": 2.0713, - "step": 10580 - }, - { - "epoch": 1.79, - "grad_norm": 0.2265625, - "learning_rate": 6.4649525408829846e-06, - "loss": 2.1328, - "step": 10585 - }, - { - "epoch": 1.79, - "grad_norm": 0.228515625, - "learning_rate": 6.412770393330558e-06, - "loss": 2.0968, - "step": 10590 - }, - { - "epoch": 1.79, - "grad_norm": 0.228515625, - "learning_rate": 6.36079271827732e-06, - "loss": 2.1114, - "step": 10595 - }, - { - "epoch": 1.8, - "grad_norm": 0.2333984375, - "learning_rate": 6.309019629285795e-06, - "loss": 2.1412, - "step": 10600 - }, - { - "epoch": 1.8, - "grad_norm": 0.240234375, - "learning_rate": 6.257451239471579e-06, - "loss": 2.1464, - "step": 10605 - }, - { - "epoch": 1.8, - "grad_norm": 0.22265625, - "learning_rate": 6.206087661503013e-06, - "loss": 2.0909, - "step": 10610 - }, - { - "epoch": 1.8, - "grad_norm": 0.23046875, - "learning_rate": 6.154929007600929e-06, - "loss": 2.1204, - "step": 10615 - }, - { - "epoch": 1.8, - "grad_norm": 0.2265625, - "learning_rate": 6.103975389538474e-06, - "loss": 2.1196, - "step": 10620 - }, - { - "epoch": 1.8, - "grad_norm": 0.2265625, - "learning_rate": 6.053226918640809e-06, - "loss": 2.1293, - "step": 10625 - }, - { - "epoch": 1.8, - "grad_norm": 0.2236328125, - "learning_rate": 6.002683705784884e-06, - "loss": 2.1184, - "step": 10630 - }, - { - "epoch": 1.8, - "grad_norm": 0.23046875, - "learning_rate": 5.9523458613992e-06, - "loss": 2.1225, - "step": 10635 - }, - { - "epoch": 1.8, - "grad_norm": 0.2294921875, - "learning_rate": 5.902213495463571e-06, - "loss": 2.1736, - "step": 10640 - }, - { - "epoch": 1.8, - "grad_norm": 0.2236328125, - "learning_rate": 5.852286717508826e-06, - "loss": 2.1048, - "step": 10645 - }, - { - "epoch": 1.8, - "grad_norm": 0.234375, - "learning_rate": 5.802565636616686e-06, - "loss": 2.108, - "step": 10650 - }, - { - "epoch": 1.8, - "grad_norm": 0.2236328125, - "learning_rate": 5.753050361419388e-06, - "loss": 2.1427, - "step": 10655 - }, - { - "epoch": 1.81, - "grad_norm": 0.2333984375, - "learning_rate": 5.703741000099594e-06, - "loss": 2.1246, - "step": 10660 - }, - { - "epoch": 1.81, - "grad_norm": 0.2216796875, - "learning_rate": 5.65463766039005e-06, - "loss": 2.1215, - "step": 10665 - }, - { - "epoch": 1.81, - "grad_norm": 0.23046875, - "learning_rate": 5.605740449573327e-06, - "loss": 2.0739, - "step": 10670 - }, - { - "epoch": 1.81, - "grad_norm": 0.23046875, - "learning_rate": 5.557049474481702e-06, - "loss": 2.1136, - "step": 10675 - }, - { - "epoch": 1.81, - "grad_norm": 0.2314453125, - "learning_rate": 5.508564841496855e-06, - "loss": 2.0865, - "step": 10680 - }, - { - "epoch": 1.81, - "grad_norm": 0.228515625, - "learning_rate": 5.4602866565495845e-06, - "loss": 2.1447, - "step": 10685 - }, - { - "epoch": 1.81, - "grad_norm": 0.228515625, - "learning_rate": 5.412215025119716e-06, - "loss": 2.1571, - "step": 10690 - }, - { - "epoch": 1.81, - "grad_norm": 0.21484375, - "learning_rate": 5.364350052235767e-06, - "loss": 2.1296, - "step": 10695 - }, - { - "epoch": 1.81, - "grad_norm": 0.220703125, - "learning_rate": 5.316691842474686e-06, - "loss": 2.1316, - "step": 10700 - }, - { - "epoch": 1.81, - "grad_norm": 0.23046875, - "learning_rate": 5.269240499961747e-06, - "loss": 2.1234, - "step": 10705 - }, - { - "epoch": 1.81, - "grad_norm": 0.2265625, - "learning_rate": 5.22199612837021e-06, - "loss": 2.1094, - "step": 10710 - }, - { - "epoch": 1.81, - "grad_norm": 0.2236328125, - "learning_rate": 5.17495883092115e-06, - "loss": 2.1003, - "step": 10715 - }, - { - "epoch": 1.82, - "grad_norm": 0.224609375, - "learning_rate": 5.1281287103832285e-06, - "loss": 2.1195, - "step": 10720 - }, - { - "epoch": 1.82, - "grad_norm": 0.2353515625, - "learning_rate": 5.081505869072445e-06, - "loss": 2.1281, - "step": 10725 - }, - { - "epoch": 1.82, - "grad_norm": 0.228515625, - "learning_rate": 5.035090408851961e-06, - "loss": 2.1098, - "step": 10730 - }, - { - "epoch": 1.82, - "grad_norm": 0.228515625, - "learning_rate": 4.988882431131814e-06, - "loss": 2.1547, - "step": 10735 - }, - { - "epoch": 1.82, - "grad_norm": 0.2294921875, - "learning_rate": 4.942882036868712e-06, - "loss": 2.1152, - "step": 10740 - }, - { - "epoch": 1.82, - "grad_norm": 0.2333984375, - "learning_rate": 4.897089326565874e-06, - "loss": 2.1086, - "step": 10745 - }, - { - "epoch": 1.82, - "grad_norm": 0.234375, - "learning_rate": 4.851504400272722e-06, - "loss": 2.1177, - "step": 10750 - }, - { - "epoch": 1.82, - "grad_norm": 0.2333984375, - "learning_rate": 4.806127357584745e-06, - "loss": 2.1149, - "step": 10755 - }, - { - "epoch": 1.82, - "grad_norm": 0.2255859375, - "learning_rate": 4.760958297643192e-06, - "loss": 2.1224, - "step": 10760 - }, - { - "epoch": 1.82, - "grad_norm": 0.2314453125, - "learning_rate": 4.715997319134968e-06, - "loss": 2.0825, - "step": 10765 - }, - { - "epoch": 1.82, - "grad_norm": 0.2314453125, - "learning_rate": 4.671244520292273e-06, - "loss": 2.1383, - "step": 10770 - }, - { - "epoch": 1.82, - "grad_norm": 0.232421875, - "learning_rate": 4.626699998892548e-06, - "loss": 2.1529, - "step": 10775 - }, - { - "epoch": 1.83, - "grad_norm": 0.234375, - "learning_rate": 4.58236385225812e-06, - "loss": 2.1247, - "step": 10780 - }, - { - "epoch": 1.83, - "grad_norm": 0.224609375, - "learning_rate": 4.538236177256106e-06, - "loss": 2.1216, - "step": 10785 - }, - { - "epoch": 1.83, - "grad_norm": 0.236328125, - "learning_rate": 4.4943170702981266e-06, - "loss": 2.1224, - "step": 10790 - }, - { - "epoch": 1.83, - "grad_norm": 0.236328125, - "learning_rate": 4.45060662734007e-06, - "loss": 2.1268, - "step": 10795 - }, - { - "epoch": 1.83, - "grad_norm": 0.220703125, - "learning_rate": 4.407104943882001e-06, - "loss": 2.1131, - "step": 10800 - }, - { - "epoch": 1.83, - "grad_norm": 0.23046875, - "learning_rate": 4.363812114967847e-06, - "loss": 2.1314, - "step": 10805 - }, - { - "epoch": 1.83, - "grad_norm": 0.228515625, - "learning_rate": 4.320728235185212e-06, - "loss": 2.1682, - "step": 10810 - }, - { - "epoch": 1.83, - "grad_norm": 0.2275390625, - "learning_rate": 4.277853398665199e-06, - "loss": 2.1185, - "step": 10815 - }, - { - "epoch": 1.83, - "grad_norm": 0.23046875, - "learning_rate": 4.2351876990821995e-06, - "loss": 2.1275, - "step": 10820 - }, - { - "epoch": 1.83, - "grad_norm": 0.2294921875, - "learning_rate": 4.192731229653623e-06, - "loss": 2.1367, - "step": 10825 - }, - { - "epoch": 1.83, - "grad_norm": 0.2236328125, - "learning_rate": 4.150484083139783e-06, - "loss": 2.1177, - "step": 10830 - }, - { - "epoch": 1.84, - "grad_norm": 0.2236328125, - "learning_rate": 4.108446351843676e-06, - "loss": 2.1122, - "step": 10835 - }, - { - "epoch": 1.84, - "grad_norm": 0.228515625, - "learning_rate": 4.066618127610722e-06, - "loss": 2.1636, - "step": 10840 - }, - { - "epoch": 1.84, - "grad_norm": 0.2255859375, - "learning_rate": 4.0249995018286415e-06, - "loss": 2.1378, - "step": 10845 - }, - { - "epoch": 1.84, - "grad_norm": 0.2216796875, - "learning_rate": 3.9835905654271535e-06, - "loss": 2.1095, - "step": 10850 - }, - { - "epoch": 1.84, - "grad_norm": 0.23046875, - "learning_rate": 3.942391408877922e-06, - "loss": 2.1403, - "step": 10855 - }, - { - "epoch": 1.84, - "grad_norm": 0.2236328125, - "learning_rate": 3.90140212219422e-06, - "loss": 2.0605, - "step": 10860 - }, - { - "epoch": 1.84, - "grad_norm": 0.2265625, - "learning_rate": 3.860622794930801e-06, - "loss": 2.0844, - "step": 10865 - }, - { - "epoch": 1.84, - "grad_norm": 0.23046875, - "learning_rate": 3.820053516183719e-06, - "loss": 2.1389, - "step": 10870 - }, - { - "epoch": 1.84, - "grad_norm": 0.2255859375, - "learning_rate": 3.7796943745900924e-06, - "loss": 2.132, - "step": 10875 - }, - { - "epoch": 1.84, - "grad_norm": 0.2255859375, - "learning_rate": 3.7395454583278868e-06, - "loss": 2.1547, - "step": 10880 - }, - { - "epoch": 1.84, - "grad_norm": 0.2353515625, - "learning_rate": 3.6996068551158115e-06, - "loss": 2.1167, - "step": 10885 - }, - { - "epoch": 1.84, - "grad_norm": 0.232421875, - "learning_rate": 3.659878652213056e-06, - "loss": 2.0709, - "step": 10890 - }, - { - "epoch": 1.85, - "grad_norm": 0.220703125, - "learning_rate": 3.620360936419109e-06, - "loss": 2.1322, - "step": 10895 - }, - { - "epoch": 1.85, - "grad_norm": 0.2275390625, - "learning_rate": 3.581053794073619e-06, - "loss": 2.1527, - "step": 10900 - }, - { - "epoch": 1.85, - "grad_norm": 0.2275390625, - "learning_rate": 3.541957311056132e-06, - "loss": 2.0757, - "step": 10905 - }, - { - "epoch": 1.85, - "grad_norm": 0.224609375, - "learning_rate": 3.503071572785932e-06, - "loss": 2.1048, - "step": 10910 - }, - { - "epoch": 1.85, - "grad_norm": 0.2314453125, - "learning_rate": 3.4643966642219137e-06, - "loss": 2.126, - "step": 10915 - }, - { - "epoch": 1.85, - "grad_norm": 0.228515625, - "learning_rate": 3.425932669862264e-06, - "loss": 2.1336, - "step": 10920 - }, - { - "epoch": 1.85, - "grad_norm": 0.23828125, - "learning_rate": 3.387679673744404e-06, - "loss": 2.1349, - "step": 10925 - }, - { - "epoch": 1.85, - "grad_norm": 0.2197265625, - "learning_rate": 3.3496377594447905e-06, - "loss": 2.1169, - "step": 10930 - }, - { - "epoch": 1.85, - "grad_norm": 0.2265625, - "learning_rate": 3.311807010078627e-06, - "loss": 2.1101, - "step": 10935 - }, - { - "epoch": 1.85, - "grad_norm": 0.2314453125, - "learning_rate": 3.2741875082998195e-06, - "loss": 2.1645, - "step": 10940 - }, - { - "epoch": 1.85, - "grad_norm": 0.2294921875, - "learning_rate": 3.2367793363007213e-06, - "loss": 2.1003, - "step": 10945 - }, - { - "epoch": 1.85, - "grad_norm": 0.2314453125, - "learning_rate": 3.19958257581191e-06, - "loss": 2.1141, - "step": 10950 - }, - { - "epoch": 1.86, - "grad_norm": 0.224609375, - "learning_rate": 3.162597308102144e-06, - "loss": 2.1581, - "step": 10955 - }, - { - "epoch": 1.86, - "grad_norm": 0.2314453125, - "learning_rate": 3.125823613978052e-06, - "loss": 2.0951, - "step": 10960 - }, - { - "epoch": 1.86, - "grad_norm": 0.232421875, - "learning_rate": 3.0892615737840413e-06, - "loss": 2.07, - "step": 10965 - }, - { - "epoch": 1.86, - "grad_norm": 0.22265625, - "learning_rate": 3.05291126740207e-06, - "loss": 2.1168, - "step": 10970 - }, - { - "epoch": 1.86, - "grad_norm": 0.22265625, - "learning_rate": 3.0167727742514974e-06, - "loss": 2.1106, - "step": 10975 - }, - { - "epoch": 1.86, - "grad_norm": 0.232421875, - "learning_rate": 2.980846173288898e-06, - "loss": 2.1058, - "step": 10980 - }, - { - "epoch": 1.86, - "grad_norm": 0.2314453125, - "learning_rate": 2.9451315430079174e-06, - "loss": 2.0987, - "step": 10985 - }, - { - "epoch": 1.86, - "grad_norm": 0.2314453125, - "learning_rate": 2.9096289614390815e-06, - "loss": 2.0906, - "step": 10990 - }, - { - "epoch": 1.86, - "grad_norm": 0.2392578125, - "learning_rate": 2.8743385061495876e-06, - "loss": 2.1334, - "step": 10995 - }, - { - "epoch": 1.86, - "grad_norm": 0.2275390625, - "learning_rate": 2.8392602542432366e-06, - "loss": 2.1099, - "step": 11000 - }, - { - "epoch": 1.86, - "grad_norm": 0.234375, - "learning_rate": 2.8043942823601233e-06, - "loss": 2.0759, - "step": 11005 - }, - { - "epoch": 1.86, - "grad_norm": 0.2236328125, - "learning_rate": 2.7697406666766123e-06, - "loss": 2.1445, - "step": 11010 - }, - { - "epoch": 1.87, - "grad_norm": 0.23046875, - "learning_rate": 2.7352994829050627e-06, - "loss": 2.1399, - "step": 11015 - }, - { - "epoch": 1.87, - "grad_norm": 0.232421875, - "learning_rate": 2.701070806293726e-06, - "loss": 2.1307, - "step": 11020 - }, - { - "epoch": 1.87, - "grad_norm": 0.2236328125, - "learning_rate": 2.66705471162656e-06, - "loss": 2.1047, - "step": 11025 - }, - { - "epoch": 1.87, - "grad_norm": 0.2255859375, - "learning_rate": 2.6332512732230585e-06, - "loss": 2.102, - "step": 11030 - }, - { - "epoch": 1.87, - "grad_norm": 0.2353515625, - "learning_rate": 2.5996605649381e-06, - "loss": 2.1327, - "step": 11035 - }, - { - "epoch": 1.87, - "grad_norm": 0.2294921875, - "learning_rate": 2.5662826601617783e-06, - "loss": 2.1174, - "step": 11040 - }, - { - "epoch": 1.87, - "grad_norm": 0.23046875, - "learning_rate": 2.5331176318192706e-06, - "loss": 2.1236, - "step": 11045 - }, - { - "epoch": 1.87, - "grad_norm": 0.232421875, - "learning_rate": 2.500165552370615e-06, - "loss": 2.0935, - "step": 11050 - }, - { - "epoch": 1.87, - "grad_norm": 0.224609375, - "learning_rate": 2.467426493810643e-06, - "loss": 2.1414, - "step": 11055 - }, - { - "epoch": 1.87, - "grad_norm": 0.220703125, - "learning_rate": 2.4349005276687042e-06, - "loss": 2.1383, - "step": 11060 - }, - { - "epoch": 1.87, - "grad_norm": 0.224609375, - "learning_rate": 2.4025877250086316e-06, - "loss": 2.1079, - "step": 11065 - }, - { - "epoch": 1.87, - "grad_norm": 0.2255859375, - "learning_rate": 2.3704881564285184e-06, - "loss": 2.1241, - "step": 11070 - }, - { - "epoch": 1.88, - "grad_norm": 0.23046875, - "learning_rate": 2.338601892060566e-06, - "loss": 2.0867, - "step": 11075 - }, - { - "epoch": 1.88, - "grad_norm": 0.2265625, - "learning_rate": 2.3069290015709565e-06, - "loss": 2.1409, - "step": 11080 - }, - { - "epoch": 1.88, - "grad_norm": 0.2265625, - "learning_rate": 2.2754695541596593e-06, - "loss": 2.1097, - "step": 11085 - }, - { - "epoch": 1.88, - "grad_norm": 0.2333984375, - "learning_rate": 2.2442236185603262e-06, - "loss": 2.0971, - "step": 11090 - }, - { - "epoch": 1.88, - "grad_norm": 0.220703125, - "learning_rate": 2.2131912630401485e-06, - "loss": 2.1069, - "step": 11095 - }, - { - "epoch": 1.88, - "grad_norm": 0.2275390625, - "learning_rate": 2.182372555399603e-06, - "loss": 2.1526, - "step": 11100 - }, - { - "epoch": 1.88, - "grad_norm": 0.228515625, - "learning_rate": 2.151767562972462e-06, - "loss": 2.1291, - "step": 11105 - }, - { - "epoch": 1.88, - "grad_norm": 0.234375, - "learning_rate": 2.121376352625537e-06, - "loss": 2.0771, - "step": 11110 - }, - { - "epoch": 1.88, - "grad_norm": 0.2255859375, - "learning_rate": 2.091198990758547e-06, - "loss": 2.1103, - "step": 11115 - }, - { - "epoch": 1.88, - "grad_norm": 0.232421875, - "learning_rate": 2.0612355433039965e-06, - "loss": 2.1198, - "step": 11120 - }, - { - "epoch": 1.88, - "grad_norm": 0.2255859375, - "learning_rate": 2.0314860757270295e-06, - "loss": 2.1741, - "step": 11125 - }, - { - "epoch": 1.89, - "grad_norm": 0.2275390625, - "learning_rate": 2.001950653025253e-06, - "loss": 2.1404, - "step": 11130 - }, - { - "epoch": 1.89, - "grad_norm": 0.2236328125, - "learning_rate": 1.9726293397286823e-06, - "loss": 2.1171, - "step": 11135 - }, - { - "epoch": 1.89, - "grad_norm": 0.2265625, - "learning_rate": 1.943522199899472e-06, - "loss": 2.12, - "step": 11140 - }, - { - "epoch": 1.89, - "grad_norm": 0.2255859375, - "learning_rate": 1.914629297131876e-06, - "loss": 2.1035, - "step": 11145 - }, - { - "epoch": 1.89, - "grad_norm": 0.23046875, - "learning_rate": 1.8859506945520856e-06, - "loss": 2.1324, - "step": 11150 - }, - { - "epoch": 1.89, - "grad_norm": 0.2333984375, - "learning_rate": 1.857486454818047e-06, - "loss": 2.0816, - "step": 11155 - }, - { - "epoch": 1.89, - "grad_norm": 0.2255859375, - "learning_rate": 1.8292366401193805e-06, - "loss": 2.1412, - "step": 11160 - }, - { - "epoch": 1.89, - "grad_norm": 0.224609375, - "learning_rate": 1.8012013121772475e-06, - "loss": 2.1293, - "step": 11165 - }, - { - "epoch": 1.89, - "grad_norm": 0.2216796875, - "learning_rate": 1.7733805322441398e-06, - "loss": 2.0747, - "step": 11170 - }, - { - "epoch": 1.89, - "grad_norm": 0.2265625, - "learning_rate": 1.7457743611038468e-06, - "loss": 2.126, - "step": 11175 - }, - { - "epoch": 1.89, - "grad_norm": 0.22265625, - "learning_rate": 1.7183828590712436e-06, - "loss": 2.102, - "step": 11180 - }, - { - "epoch": 1.89, - "grad_norm": 0.2255859375, - "learning_rate": 1.691206085992192e-06, - "loss": 2.1216, - "step": 11185 - }, - { - "epoch": 1.9, - "grad_norm": 0.2255859375, - "learning_rate": 1.6642441012434172e-06, - "loss": 2.1466, - "step": 11190 - }, - { - "epoch": 1.9, - "grad_norm": 0.22265625, - "learning_rate": 1.6374969637323545e-06, - "loss": 2.1029, - "step": 11195 - }, - { - "epoch": 1.9, - "grad_norm": 0.2255859375, - "learning_rate": 1.6109647318970466e-06, - "loss": 2.1073, - "step": 11200 - }, - { - "epoch": 1.9, - "grad_norm": 0.2197265625, - "learning_rate": 1.5846474637060015e-06, - "loss": 2.0883, - "step": 11205 - }, - { - "epoch": 1.9, - "grad_norm": 0.228515625, - "learning_rate": 1.5585452166580583e-06, - "loss": 2.1062, - "step": 11210 - }, - { - "epoch": 1.9, - "grad_norm": 0.2236328125, - "learning_rate": 1.5326580477822761e-06, - "loss": 2.167, - "step": 11215 - }, - { - "epoch": 1.9, - "grad_norm": 0.2255859375, - "learning_rate": 1.5069860136378121e-06, - "loss": 2.1129, - "step": 11220 - }, - { - "epoch": 1.9, - "grad_norm": 0.2353515625, - "learning_rate": 1.481529170313778e-06, - "loss": 2.075, - "step": 11225 - }, - { - "epoch": 1.9, - "grad_norm": 0.2314453125, - "learning_rate": 1.456287573429138e-06, - "loss": 2.1242, - "step": 11230 - }, - { - "epoch": 1.9, - "grad_norm": 0.224609375, - "learning_rate": 1.4312612781325785e-06, - "loss": 2.1539, - "step": 11235 - }, - { - "epoch": 1.9, - "grad_norm": 0.2255859375, - "learning_rate": 1.406450339102361e-06, - "loss": 2.0581, - "step": 11240 - }, - { - "epoch": 1.9, - "grad_norm": 0.2236328125, - "learning_rate": 1.381854810546268e-06, - "loss": 2.1453, - "step": 11245 - }, - { - "epoch": 1.91, - "grad_norm": 0.228515625, - "learning_rate": 1.357474746201426e-06, - "loss": 2.1207, - "step": 11250 - }, - { - "epoch": 1.91, - "grad_norm": 0.2490234375, - "learning_rate": 1.3333101993342145e-06, - "loss": 2.1136, - "step": 11255 - }, - { - "epoch": 1.91, - "grad_norm": 0.22265625, - "learning_rate": 1.3093612227401576e-06, - "loss": 2.0805, - "step": 11260 - }, - { - "epoch": 1.91, - "grad_norm": 0.2216796875, - "learning_rate": 1.285627868743744e-06, - "loss": 2.1168, - "step": 11265 - }, - { - "epoch": 1.91, - "grad_norm": 0.228515625, - "learning_rate": 1.2621101891984289e-06, - "loss": 2.0865, - "step": 11270 - }, - { - "epoch": 1.91, - "grad_norm": 0.232421875, - "learning_rate": 1.2388082354863994e-06, - "loss": 2.1729, - "step": 11275 - }, - { - "epoch": 1.91, - "grad_norm": 0.232421875, - "learning_rate": 1.2157220585185536e-06, - "loss": 2.0999, - "step": 11280 - }, - { - "epoch": 1.91, - "grad_norm": 0.22265625, - "learning_rate": 1.1928517087343327e-06, - "loss": 2.1423, - "step": 11285 - }, - { - "epoch": 1.91, - "grad_norm": 0.2294921875, - "learning_rate": 1.1701972361016443e-06, - "loss": 2.1503, - "step": 11290 - }, - { - "epoch": 1.91, - "grad_norm": 0.2265625, - "learning_rate": 1.1477586901167403e-06, - "loss": 2.1066, - "step": 11295 - }, - { - "epoch": 1.91, - "grad_norm": 0.22265625, - "learning_rate": 1.1255361198040938e-06, - "loss": 2.1753, - "step": 11300 - }, - { - "epoch": 1.91, - "grad_norm": 0.228515625, - "learning_rate": 1.1035295737163221e-06, - "loss": 2.1592, - "step": 11305 - }, - { - "epoch": 1.92, - "grad_norm": 0.2265625, - "learning_rate": 1.0817390999340537e-06, - "loss": 2.1417, - "step": 11310 - }, - { - "epoch": 1.92, - "grad_norm": 0.2294921875, - "learning_rate": 1.0601647460658615e-06, - "loss": 2.1685, - "step": 11315 - }, - { - "epoch": 1.92, - "grad_norm": 0.220703125, - "learning_rate": 1.0388065592480956e-06, - "loss": 2.0922, - "step": 11320 - }, - { - "epoch": 1.92, - "grad_norm": 0.2275390625, - "learning_rate": 1.0176645861448285e-06, - "loss": 2.1161, - "step": 11325 - }, - { - "epoch": 1.92, - "grad_norm": 0.23046875, - "learning_rate": 9.967388729477779e-07, - "loss": 2.1453, - "step": 11330 - }, - { - "epoch": 1.92, - "grad_norm": 0.2216796875, - "learning_rate": 9.760294653761048e-07, - "loss": 2.1358, - "step": 11335 - }, - { - "epoch": 1.92, - "grad_norm": 0.2294921875, - "learning_rate": 9.555364086764273e-07, - "loss": 2.0958, - "step": 11340 - }, - { - "epoch": 1.92, - "grad_norm": 0.22265625, - "learning_rate": 9.352597476226743e-07, - "loss": 2.1375, - "step": 11345 - }, - { - "epoch": 1.92, - "grad_norm": 0.22265625, - "learning_rate": 9.15199526515953e-07, - "loss": 2.1524, - "step": 11350 - }, - { - "epoch": 1.92, - "grad_norm": 0.2275390625, - "learning_rate": 8.953557891844933e-07, - "loss": 2.0964, - "step": 11355 - }, - { - "epoch": 1.92, - "grad_norm": 0.2392578125, - "learning_rate": 8.757285789835923e-07, - "loss": 2.1318, - "step": 11360 - }, - { - "epoch": 1.92, - "grad_norm": 0.220703125, - "learning_rate": 8.563179387953812e-07, - "loss": 2.1265, - "step": 11365 - }, - { - "epoch": 1.93, - "grad_norm": 0.236328125, - "learning_rate": 8.371239110289252e-07, - "loss": 2.1221, - "step": 11370 - }, - { - "epoch": 1.93, - "grad_norm": 0.224609375, - "learning_rate": 8.181465376199348e-07, - "loss": 2.1379, - "step": 11375 - }, - { - "epoch": 1.93, - "grad_norm": 0.251953125, - "learning_rate": 7.993858600308324e-07, - "loss": 2.1531, - "step": 11380 - }, - { - "epoch": 1.93, - "grad_norm": 0.228515625, - "learning_rate": 7.808419192505745e-07, - "loss": 2.1383, - "step": 11385 - }, - { - "epoch": 1.93, - "grad_norm": 0.2265625, - "learning_rate": 7.625147557945633e-07, - "loss": 2.174, - "step": 11390 - }, - { - "epoch": 1.93, - "grad_norm": 0.2197265625, - "learning_rate": 7.44404409704591e-07, - "loss": 2.1006, - "step": 11395 - }, - { - "epoch": 1.93, - "grad_norm": 0.22265625, - "learning_rate": 7.26510920548773e-07, - "loss": 2.1225, - "step": 11400 - }, - { - "epoch": 1.93, - "grad_norm": 0.22265625, - "learning_rate": 7.088343274213926e-07, - "loss": 2.0748, - "step": 11405 - }, - { - "epoch": 1.93, - "grad_norm": 0.228515625, - "learning_rate": 6.913746689428458e-07, - "loss": 2.1151, - "step": 11410 - }, - { - "epoch": 1.93, - "grad_norm": 0.2314453125, - "learning_rate": 6.741319832595849e-07, - "loss": 2.1177, - "step": 11415 - }, - { - "epoch": 1.93, - "grad_norm": 0.2275390625, - "learning_rate": 6.571063080440087e-07, - "loss": 2.1477, - "step": 11420 - }, - { - "epoch": 1.93, - "grad_norm": 0.244140625, - "learning_rate": 6.402976804943728e-07, - "loss": 2.1342, - "step": 11425 - }, - { - "epoch": 1.94, - "grad_norm": 0.2236328125, - "learning_rate": 6.23706137334723e-07, - "loss": 2.1416, - "step": 11430 - }, - { - "epoch": 1.94, - "grad_norm": 0.2197265625, - "learning_rate": 6.073317148148294e-07, - "loss": 2.0855, - "step": 11435 - }, - { - "epoch": 1.94, - "grad_norm": 0.224609375, - "learning_rate": 5.911744487100745e-07, - "loss": 2.1301, - "step": 11440 - }, - { - "epoch": 1.94, - "grad_norm": 0.22265625, - "learning_rate": 5.752343743213873e-07, - "loss": 2.1179, - "step": 11445 - }, - { - "epoch": 1.94, - "grad_norm": 0.23046875, - "learning_rate": 5.595115264751649e-07, - "loss": 2.0996, - "step": 11450 - }, - { - "epoch": 1.94, - "grad_norm": 0.2314453125, - "learning_rate": 5.440059395232178e-07, - "loss": 2.128, - "step": 11455 - }, - { - "epoch": 1.94, - "grad_norm": 0.224609375, - "learning_rate": 5.287176473426692e-07, - "loss": 2.1684, - "step": 11460 - }, - { - "epoch": 1.94, - "grad_norm": 0.2314453125, - "learning_rate": 5.136466833358999e-07, - "loss": 2.1402, - "step": 11465 - }, - { - "epoch": 1.94, - "grad_norm": 0.2236328125, - "learning_rate": 4.987930804304375e-07, - "loss": 2.0991, - "step": 11470 - }, - { - "epoch": 1.94, - "grad_norm": 0.2236328125, - "learning_rate": 4.841568710789335e-07, - "loss": 2.0907, - "step": 11475 - }, - { - "epoch": 1.94, - "grad_norm": 0.23046875, - "learning_rate": 4.697380872590751e-07, - "loss": 2.1306, - "step": 11480 - }, - { - "epoch": 1.95, - "grad_norm": 0.228515625, - "learning_rate": 4.55536760473485e-07, - "loss": 2.1029, - "step": 11485 - }, - { - "epoch": 1.95, - "grad_norm": 0.232421875, - "learning_rate": 4.4155292174971054e-07, - "loss": 2.1302, - "step": 11490 - }, - { - "epoch": 1.95, - "grad_norm": 0.2275390625, - "learning_rate": 4.2778660164011217e-07, - "loss": 2.1341, - "step": 11495 - }, - { - "epoch": 1.95, - "grad_norm": 0.2373046875, - "learning_rate": 4.142378302217864e-07, - "loss": 2.1308, - "step": 11500 - }, - { - "epoch": 1.95, - "grad_norm": 0.23046875, - "learning_rate": 4.0090663709655417e-07, - "loss": 2.1504, - "step": 11505 - }, - { - "epoch": 1.95, - "grad_norm": 0.2275390625, - "learning_rate": 3.877930513908501e-07, - "loss": 2.1156, - "step": 11510 - }, - { - "epoch": 1.95, - "grad_norm": 0.228515625, - "learning_rate": 3.7489710175566686e-07, - "loss": 2.1101, - "step": 11515 - }, - { - "epoch": 1.95, - "grad_norm": 0.2314453125, - "learning_rate": 3.622188163664997e-07, - "loss": 2.1366, - "step": 11520 - }, - { - "epoch": 1.95, - "grad_norm": 0.2373046875, - "learning_rate": 3.4975822292331317e-07, - "loss": 2.1219, - "step": 11525 - }, - { - "epoch": 1.95, - "grad_norm": 0.2265625, - "learning_rate": 3.375153486504079e-07, - "loss": 2.0802, - "step": 11530 - }, - { - "epoch": 1.95, - "grad_norm": 0.2265625, - "learning_rate": 3.254902202964205e-07, - "loss": 2.117, - "step": 11535 - }, - { - "epoch": 1.95, - "grad_norm": 0.2197265625, - "learning_rate": 3.1368286413426817e-07, - "loss": 2.0799, - "step": 11540 - }, - { - "epoch": 1.96, - "grad_norm": 0.2314453125, - "learning_rate": 3.0209330596104866e-07, - "loss": 2.1347, - "step": 11545 - }, - { - "epoch": 1.96, - "grad_norm": 0.236328125, - "learning_rate": 2.9072157109800714e-07, - "loss": 2.1671, - "step": 11550 - }, - { - "epoch": 1.96, - "grad_norm": 0.2353515625, - "learning_rate": 2.7956768439050265e-07, - "loss": 2.1185, - "step": 11555 - }, - { - "epoch": 1.96, - "grad_norm": 0.224609375, - "learning_rate": 2.686316702079084e-07, - "loss": 2.1238, - "step": 11560 - }, - { - "epoch": 1.96, - "grad_norm": 0.2294921875, - "learning_rate": 2.579135524436005e-07, - "loss": 2.1151, - "step": 11565 - }, - { - "epoch": 1.96, - "grad_norm": 0.2255859375, - "learning_rate": 2.4741335451488047e-07, - "loss": 2.0894, - "step": 11570 - }, - { - "epoch": 1.96, - "grad_norm": 0.23046875, - "learning_rate": 2.3713109936291944e-07, - "loss": 2.1385, - "step": 11575 - }, - { - "epoch": 1.96, - "grad_norm": 0.2236328125, - "learning_rate": 2.2706680945273617e-07, - "loss": 2.1139, - "step": 11580 - }, - { - "epoch": 1.96, - "grad_norm": 0.224609375, - "learning_rate": 2.1722050677313032e-07, - "loss": 2.1246, - "step": 11585 - }, - { - "epoch": 1.96, - "grad_norm": 0.2265625, - "learning_rate": 2.075922128366381e-07, - "loss": 2.1213, - "step": 11590 - }, { "epoch": 1.96, - "grad_norm": 0.2265625, - "learning_rate": 1.981819486794656e-07, - "loss": 2.1515, - "step": 11595 + "grad_norm": 0.166015625, + "learning_rate": 2.076312736516206e-07, + "loss": 2.1282, + "step": 5795 }, { "epoch": 1.96, - "grad_norm": 0.2294921875, - "learning_rate": 1.8898973486146664e-07, - "loss": 2.1291, - "step": 11600 - }, - { - "epoch": 1.97, - "grad_norm": 0.224609375, - "learning_rate": 1.8001559146612058e-07, - "loss": 2.112, - "step": 11605 - }, - { - "epoch": 1.97, - "grad_norm": 0.2373046875, - "learning_rate": 1.7125953810041007e-07, - "loss": 2.1438, - "step": 11610 - }, - { - "epoch": 1.97, - "grad_norm": 0.232421875, - "learning_rate": 1.6272159389486564e-07, - "loss": 2.1497, - "step": 11615 - }, - { - "epoch": 1.97, - "grad_norm": 0.228515625, - "learning_rate": 1.5440177750346563e-07, - "loss": 2.1304, - "step": 11620 - }, - { - "epoch": 1.97, - "grad_norm": 0.2265625, - "learning_rate": 1.4630010710363628e-07, - "loss": 2.1374, - "step": 11625 - }, - { - "epoch": 1.97, - "grad_norm": 0.22265625, - "learning_rate": 1.384166003961518e-07, - "loss": 2.137, - "step": 11630 + "grad_norm": 0.16796875, + "learning_rate": 1.890252965145112e-07, + "loss": 2.0929, + "step": 5800 }, { "epoch": 1.97, - "grad_norm": 0.2255859375, - "learning_rate": 1.3075127460518976e-07, - "loss": 2.1289, - "step": 11635 + "grad_norm": 0.166015625, + "learning_rate": 1.7129176446692984e-07, + "loss": 2.0959, + "step": 5805 }, { "epoch": 1.97, - "grad_norm": 0.2275390625, - "learning_rate": 1.23304146478187e-07, - "loss": 2.147, - "step": 11640 + "grad_norm": 0.1689453125, + "learning_rate": 1.5443083251720503e-07, + "loss": 2.1329, + "step": 5810 }, { "epoch": 1.97, - "grad_norm": 0.2265625, - "learning_rate": 1.1607523228588379e-07, - "loss": 2.1013, - "step": 11645 + "grad_norm": 0.1689453125, + "learning_rate": 1.384426480462997e-07, + "loss": 2.0969, + "step": 5815 }, { "epoch": 1.97, - "grad_norm": 0.2216796875, - "learning_rate": 1.090645478222574e-07, - "loss": 2.1047, - "step": 11650 + "grad_norm": 0.16796875, + "learning_rate": 1.2332735080651248e-07, + "loss": 2.0809, + "step": 5820 }, { "epoch": 1.97, - "grad_norm": 0.224609375, - "learning_rate": 1.0227210840448864e-07, - "loss": 2.116, - "step": 11655 + "grad_norm": 0.1708984375, + "learning_rate": 1.0908507292026748e-07, + "loss": 2.133, + "step": 5825 }, { "epoch": 1.97, - "grad_norm": 0.2158203125, - "learning_rate": 9.569792887290651e-08, - "loss": 2.0968, - "step": 11660 - }, - { - "epoch": 1.98, - "grad_norm": 0.23046875, - "learning_rate": 8.934202359102139e-08, - "loss": 2.1216, - "step": 11665 - }, - { - "epoch": 1.98, - "grad_norm": 0.2275390625, - "learning_rate": 8.320440644541405e-08, - "loss": 2.1317, - "step": 11670 - }, - { - "epoch": 1.98, - "grad_norm": 0.2265625, - "learning_rate": 7.728509084574676e-08, - "loss": 2.1112, - "step": 11675 - }, - { - "epoch": 1.98, - "grad_norm": 0.2294921875, - "learning_rate": 7.158408972476327e-08, - "loss": 2.1439, - "step": 11680 - }, - { - "epoch": 1.98, - "grad_norm": 0.2255859375, - "learning_rate": 6.610141553816674e-08, - "loss": 2.084, - "step": 11685 - }, - { - "epoch": 1.98, - "grad_norm": 0.2333984375, - "learning_rate": 6.083708026471957e-08, - "loss": 2.1251, - "step": 11690 + "grad_norm": 0.1650390625, + "learning_rate": 9.571593887891528e-08, + "loss": 2.1076, + "step": 5830 }, { "epoch": 1.98, - "grad_norm": 0.22265625, - "learning_rate": 5.579109540609917e-08, - "loss": 2.1635, - "step": 11695 + "grad_norm": 0.1708984375, + "learning_rate": 8.322006554171146e-08, + "loss": 2.142, + "step": 5835 }, { "epoch": 1.98, - "grad_norm": 0.2275390625, - "learning_rate": 5.096347198694229e-08, - "loss": 2.0743, - "step": 11700 + "grad_norm": 0.1669921875, + "learning_rate": 7.159756213476199e-08, + "loss": 2.0921, + "step": 5840 }, { "epoch": 1.98, - "grad_norm": 0.2373046875, - "learning_rate": 4.6354220554800655e-08, - "loss": 2.1271, - "step": 11705 + "grad_norm": 0.1640625, + "learning_rate": 6.084853025005721e-08, + "loss": 2.1118, + "step": 5845 }, { "epoch": 1.98, - "grad_norm": 0.22265625, - "learning_rate": 4.196335118012984e-08, + "grad_norm": 0.169921875, + "learning_rate": 5.0973063844605986e-08, "loss": 2.0982, - "step": 11710 + "step": 5850 }, { "epoch": 1.98, - "grad_norm": 0.228515625, - "learning_rate": 3.779087345624488e-08, - "loss": 2.1128, - "step": 11715 + "grad_norm": 0.169921875, + "learning_rate": 4.1971249239591834e-08, + "loss": 2.0941, + "step": 5855 }, { "epoch": 1.98, - "grad_norm": 0.23046875, - "learning_rate": 3.383679649929805e-08, - "loss": 2.1136, - "step": 11720 - }, - { - "epoch": 1.99, - "grad_norm": 0.228515625, - "learning_rate": 3.010112894831219e-08, - "loss": 2.1439, - "step": 11725 - }, - { - "epoch": 1.99, - "grad_norm": 0.2255859375, - "learning_rate": 2.6583878965080745e-08, - "loss": 2.1431, - "step": 11730 - }, - { - "epoch": 1.99, - "grad_norm": 0.224609375, - "learning_rate": 2.3285054234223334e-08, - "loss": 2.1114, - "step": 11735 - }, - { - "epoch": 1.99, - "grad_norm": 0.2275390625, - "learning_rate": 2.0204661963107996e-08, - "loss": 2.1307, - "step": 11740 - }, - { - "epoch": 1.99, - "grad_norm": 0.2275390625, - "learning_rate": 1.7342708881884496e-08, - "loss": 2.1688, - "step": 11745 + "grad_norm": 0.1640625, + "learning_rate": 3.384316511964025e-08, + "loss": 2.1074, + "step": 5860 }, { "epoch": 1.99, - "grad_norm": 0.2255859375, - "learning_rate": 1.469920124343993e-08, - "loss": 2.1105, - "step": 11750 + "grad_norm": 0.1689453125, + "learning_rate": 2.658888253211922e-08, + "loss": 2.107, + "step": 5865 }, { "epoch": 1.99, - "grad_norm": 0.2314453125, - "learning_rate": 1.2274144823409828e-08, - "loss": 2.1762, - "step": 11755 + "grad_norm": 0.171875, + "learning_rate": 2.0208464886517508e-08, + "loss": 2.1051, + "step": 5870 }, { "epoch": 1.99, - "grad_norm": 0.23828125, - "learning_rate": 1.006754492012263e-08, - "loss": 2.174, - "step": 11760 + "grad_norm": 0.16796875, + "learning_rate": 1.4701967953911766e-08, + "loss": 2.1026, + "step": 5875 }, { "epoch": 1.99, - "grad_norm": 0.2275390625, - "learning_rate": 8.079406354644103e-09, - "loss": 2.1249, - "step": 11765 + "grad_norm": 0.1689453125, + "learning_rate": 1.0069439866422503e-08, + "loss": 2.1107, + "step": 5880 }, { "epoch": 1.99, - "grad_norm": 0.2294921875, - "learning_rate": 6.309733470721835e-09, - "loss": 2.096, - "step": 11770 + "grad_norm": 0.171875, + "learning_rate": 6.3109211168699275e-09, + "loss": 2.1133, + "step": 5885 }, { "epoch": 1.99, - "grad_norm": 0.22265625, - "learning_rate": 4.758530134785222e-09, - "loss": 2.089, - "step": 11775 - }, - { - "epoch": 2.0, - "grad_norm": 0.2265625, - "learning_rate": 3.425799735978785e-09, - "loss": 2.1153, - "step": 11780 - }, - { - "epoch": 2.0, - "grad_norm": 0.2265625, - "learning_rate": 2.3115451860733495e-09, - "loss": 2.1192, - "step": 11785 - }, - { - "epoch": 2.0, - "grad_norm": 0.236328125, - "learning_rate": 1.4157689195326563e-09, - "loss": 2.1282, - "step": 11790 - }, - { - "epoch": 2.0, - "grad_norm": 0.2353515625, - "learning_rate": 7.3847289349116e-10, - "loss": 2.0832, - "step": 11795 + "grad_norm": 0.1630859375, + "learning_rate": 3.4264445583631622e-09, + "loss": 2.1141, + "step": 5890 }, { "epoch": 2.0, - "grad_norm": 0.2265625, - "learning_rate": 2.796585877207214e-10, - "loss": 2.1107, - "step": 11800 + "grad_norm": 0.169921875, + "learning_rate": 1.4160354040448908e-09, + "loss": 2.0934, + "step": 5895 }, { "epoch": 2.0, - "grad_norm": 0.2353515625, - "learning_rate": 3.932700465281158e-11, - "loss": 2.1311, - "step": 11805 + "grad_norm": 0.169921875, + "learning_rate": 2.7971122683601023e-10, + "loss": 2.1251, + "step": 5900 }, { "epoch": 2.0, - "eval_loss": 2.1430556774139404, - "eval_runtime": 161.578, - "eval_samples_per_second": 16.444, - "eval_steps_per_second": 2.061, - "step": 11808 + "eval_loss": 2.1270718574523926, + "eval_runtime": 156.9365, + "eval_samples_per_second": 8.462, + "eval_steps_per_second": 1.058, + "step": 5904 }, { "epoch": 2.0, - "step": 11808, - "total_flos": 6.077393230092042e+17, - "train_loss": 2.1527459967507903, - "train_runtime": 22011.903, - "train_samples_per_second": 4.292, - "train_steps_per_second": 0.536 + "step": 5904, + "total_flos": 6.077264558911652e+17, + "train_loss": 2.1395560028107186, + "train_runtime": 20794.4522, + "train_samples_per_second": 2.272, + "train_steps_per_second": 0.284 } ], "logging_steps": 5, - "max_steps": 11808, + "max_steps": 5904, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 100, - "total_flos": 6.077393230092042e+17, + "total_flos": 6.077264558911652e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null