{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.2554355997955073, "global_step": 525000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.000000000000001e-07, "loss": 14.3541, "step": 100 }, { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 14.3676, "step": 200 }, { "epoch": 0.0, "learning_rate": 1.5e-06, "loss": 14.2824, "step": 300 }, { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 14.2217, "step": 400 }, { "epoch": 0.0, "learning_rate": 2.5e-06, "loss": 13.7982, "step": 500 }, { "epoch": 0.0, "learning_rate": 3e-06, "loss": 13.6274, "step": 600 }, { "epoch": 0.0, "learning_rate": 3.5000000000000004e-06, "loss": 13.2412, "step": 700 }, { "epoch": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 12.586, "step": 800 }, { "epoch": 0.0, "learning_rate": 4.5e-06, "loss": 12.0465, "step": 900 }, { "epoch": 0.0, "learning_rate": 5e-06, "loss": 11.3204, "step": 1000 }, { "epoch": 0.0, "learning_rate": 5.500000000000001e-06, "loss": 10.3171, "step": 1100 }, { "epoch": 0.01, "learning_rate": 6e-06, "loss": 9.2676, "step": 1200 }, { "epoch": 0.01, "learning_rate": 6.5000000000000004e-06, "loss": 8.2344, "step": 1300 }, { "epoch": 0.01, "learning_rate": 7.000000000000001e-06, "loss": 6.8489, "step": 1400 }, { "epoch": 0.01, "learning_rate": 7.5e-06, "loss": 5.5019, "step": 1500 }, { "epoch": 0.01, "learning_rate": 8.000000000000001e-06, "loss": 4.6103, "step": 1600 }, { "epoch": 0.01, "learning_rate": 8.500000000000002e-06, "loss": 4.0935, "step": 1700 }, { "epoch": 0.01, "learning_rate": 9e-06, "loss": 3.9416, "step": 1800 }, { "epoch": 0.01, "learning_rate": 9.5e-06, "loss": 3.7521, "step": 1900 }, { "epoch": 0.01, "learning_rate": 1e-05, "loss": 3.6218, "step": 2000 }, { "epoch": 0.01, "learning_rate": 1.05e-05, "loss": 3.5332, "step": 2100 }, { "epoch": 0.01, "learning_rate": 1.1000000000000001e-05, "loss": 3.3914, "step": 2200 }, { "epoch": 0.01, "learning_rate": 1.1500000000000002e-05, "loss": 3.3178, "step": 2300 }, { "epoch": 0.01, "learning_rate": 1.2e-05, "loss": 3.2905, "step": 2400 }, { "epoch": 0.01, "learning_rate": 1.25e-05, "loss": 3.1829, "step": 2500 }, { "epoch": 0.01, "learning_rate": 1.3000000000000001e-05, "loss": 3.1098, "step": 2600 }, { "epoch": 0.01, "learning_rate": 1.3500000000000001e-05, "loss": 3.0184, "step": 2700 }, { "epoch": 0.01, "learning_rate": 1.4000000000000001e-05, "loss": 2.9732, "step": 2800 }, { "epoch": 0.01, "learning_rate": 1.45e-05, "loss": 2.9343, "step": 2900 }, { "epoch": 0.01, "learning_rate": 1.5e-05, "loss": 2.9035, "step": 3000 }, { "epoch": 0.01, "learning_rate": 1.55e-05, "loss": 2.8675, "step": 3100 }, { "epoch": 0.01, "learning_rate": 1.6000000000000003e-05, "loss": 2.8479, "step": 3200 }, { "epoch": 0.01, "learning_rate": 1.65e-05, "loss": 2.8325, "step": 3300 }, { "epoch": 0.01, "learning_rate": 1.7000000000000003e-05, "loss": 2.7818, "step": 3400 }, { "epoch": 0.02, "learning_rate": 1.75e-05, "loss": 2.7728, "step": 3500 }, { "epoch": 0.02, "learning_rate": 1.8e-05, "loss": 2.7172, "step": 3600 }, { "epoch": 0.02, "learning_rate": 1.85e-05, "loss": 2.7153, "step": 3700 }, { "epoch": 0.02, "learning_rate": 1.9e-05, "loss": 2.6476, "step": 3800 }, { "epoch": 0.02, "learning_rate": 1.9500000000000003e-05, "loss": 2.6379, "step": 3900 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 2.6251, "step": 4000 }, { "epoch": 0.02, "learning_rate": 2.05e-05, "loss": 2.622, "step": 4100 }, { "epoch": 0.02, "learning_rate": 2.1e-05, "loss": 2.5939, "step": 4200 }, { "epoch": 0.02, "learning_rate": 2.15e-05, "loss": 2.5749, "step": 4300 }, { "epoch": 0.02, "learning_rate": 2.2000000000000003e-05, "loss": 2.5746, "step": 4400 }, { "epoch": 0.02, "learning_rate": 2.25e-05, "loss": 2.5466, "step": 4500 }, { "epoch": 0.02, "learning_rate": 2.3000000000000003e-05, "loss": 2.5754, "step": 4600 }, { "epoch": 0.02, "learning_rate": 2.35e-05, "loss": 2.5217, "step": 4700 }, { "epoch": 0.02, "learning_rate": 2.4e-05, "loss": 2.5456, "step": 4800 }, { "epoch": 0.02, "learning_rate": 2.45e-05, "loss": 2.4834, "step": 4900 }, { "epoch": 0.02, "learning_rate": 2.5e-05, "loss": 2.49, "step": 5000 }, { "epoch": 0.02, "eval_loss": 1.9592857360839844, "eval_runtime": 18.7264, "eval_samples_per_second": 534.004, "eval_steps_per_second": 16.714, "step": 5000 }, { "epoch": 0.02, "learning_rate": 2.5500000000000003e-05, "loss": 2.4657, "step": 5100 }, { "epoch": 0.02, "learning_rate": 2.6000000000000002e-05, "loss": 2.457, "step": 5200 }, { "epoch": 0.02, "learning_rate": 2.6500000000000004e-05, "loss": 2.4793, "step": 5300 }, { "epoch": 0.02, "learning_rate": 2.7000000000000002e-05, "loss": 2.4802, "step": 5400 }, { "epoch": 0.02, "learning_rate": 2.7500000000000004e-05, "loss": 2.4615, "step": 5500 }, { "epoch": 0.02, "learning_rate": 2.8000000000000003e-05, "loss": 2.4107, "step": 5600 }, { "epoch": 0.02, "learning_rate": 2.8499999999999998e-05, "loss": 2.4498, "step": 5700 }, { "epoch": 0.02, "learning_rate": 2.9e-05, "loss": 2.3964, "step": 5800 }, { "epoch": 0.03, "learning_rate": 2.95e-05, "loss": 2.4961, "step": 5900 }, { "epoch": 0.03, "learning_rate": 3e-05, "loss": 2.4611, "step": 6000 }, { "epoch": 0.03, "learning_rate": 3.05e-05, "loss": 2.4297, "step": 6100 }, { "epoch": 0.03, "learning_rate": 3.1e-05, "loss": 2.4205, "step": 6200 }, { "epoch": 0.03, "learning_rate": 3.15e-05, "loss": 2.4465, "step": 6300 }, { "epoch": 0.03, "learning_rate": 3.2000000000000005e-05, "loss": 2.4045, "step": 6400 }, { "epoch": 0.03, "learning_rate": 3.2500000000000004e-05, "loss": 2.4217, "step": 6500 }, { "epoch": 0.03, "learning_rate": 3.3e-05, "loss": 2.3831, "step": 6600 }, { "epoch": 0.03, "learning_rate": 3.35e-05, "loss": 2.3653, "step": 6700 }, { "epoch": 0.03, "learning_rate": 3.4000000000000007e-05, "loss": 2.3325, "step": 6800 }, { "epoch": 0.03, "learning_rate": 3.45e-05, "loss": 2.38, "step": 6900 }, { "epoch": 0.03, "learning_rate": 3.5e-05, "loss": 2.3534, "step": 7000 }, { "epoch": 0.03, "learning_rate": 3.55e-05, "loss": 2.3172, "step": 7100 }, { "epoch": 0.03, "learning_rate": 3.6e-05, "loss": 2.3546, "step": 7200 }, { "epoch": 0.03, "learning_rate": 3.65e-05, "loss": 2.311, "step": 7300 }, { "epoch": 0.03, "learning_rate": 3.7e-05, "loss": 2.2996, "step": 7400 }, { "epoch": 0.03, "learning_rate": 3.7500000000000003e-05, "loss": 2.3427, "step": 7500 }, { "epoch": 0.03, "learning_rate": 3.8e-05, "loss": 2.2847, "step": 7600 }, { "epoch": 0.03, "learning_rate": 3.85e-05, "loss": 2.2906, "step": 7700 }, { "epoch": 0.03, "learning_rate": 3.9000000000000006e-05, "loss": 2.2957, "step": 7800 }, { "epoch": 0.03, "learning_rate": 3.9500000000000005e-05, "loss": 2.2933, "step": 7900 }, { "epoch": 0.03, "learning_rate": 4e-05, "loss": 2.2658, "step": 8000 }, { "epoch": 0.03, "learning_rate": 4.05e-05, "loss": 2.2802, "step": 8100 }, { "epoch": 0.04, "learning_rate": 4.1e-05, "loss": 2.2505, "step": 8200 }, { "epoch": 0.04, "learning_rate": 4.15e-05, "loss": 2.2688, "step": 8300 }, { "epoch": 0.04, "learning_rate": 4.2e-05, "loss": 2.2176, "step": 8400 }, { "epoch": 0.04, "learning_rate": 4.25e-05, "loss": 2.2627, "step": 8500 }, { "epoch": 0.04, "learning_rate": 4.3e-05, "loss": 2.266, "step": 8600 }, { "epoch": 0.04, "learning_rate": 4.35e-05, "loss": 2.2386, "step": 8700 }, { "epoch": 0.04, "learning_rate": 4.4000000000000006e-05, "loss": 2.2339, "step": 8800 }, { "epoch": 0.04, "learning_rate": 4.4500000000000004e-05, "loss": 2.2431, "step": 8900 }, { "epoch": 0.04, "learning_rate": 4.5e-05, "loss": 2.2403, "step": 9000 }, { "epoch": 0.04, "learning_rate": 4.55e-05, "loss": 2.2199, "step": 9100 }, { "epoch": 0.04, "learning_rate": 4.600000000000001e-05, "loss": 2.228, "step": 9200 }, { "epoch": 0.04, "learning_rate": 4.6500000000000005e-05, "loss": 2.2257, "step": 9300 }, { "epoch": 0.04, "learning_rate": 4.7e-05, "loss": 2.2182, "step": 9400 }, { "epoch": 0.04, "learning_rate": 4.75e-05, "loss": 2.2455, "step": 9500 }, { "epoch": 0.04, "learning_rate": 4.8e-05, "loss": 2.2127, "step": 9600 }, { "epoch": 0.04, "learning_rate": 4.85e-05, "loss": 2.1893, "step": 9700 }, { "epoch": 0.04, "learning_rate": 4.9e-05, "loss": 2.1823, "step": 9800 }, { "epoch": 0.04, "learning_rate": 4.9500000000000004e-05, "loss": 2.1782, "step": 9900 }, { "epoch": 0.04, "learning_rate": 5e-05, "loss": 2.1625, "step": 10000 }, { "epoch": 0.04, "eval_loss": 1.8114508390426636, "eval_runtime": 18.7835, "eval_samples_per_second": 532.381, "eval_steps_per_second": 16.664, "step": 10000 }, { "epoch": 0.04, "learning_rate": 4.9994571613446764e-05, "loss": 2.2058, "step": 10100 }, { "epoch": 0.04, "learning_rate": 4.998914322689353e-05, "loss": 2.2157, "step": 10200 }, { "epoch": 0.04, "learning_rate": 4.9983714840340295e-05, "loss": 2.1651, "step": 10300 }, { "epoch": 0.04, "learning_rate": 4.997828645378706e-05, "loss": 2.2339, "step": 10400 }, { "epoch": 0.05, "learning_rate": 4.9972858067233825e-05, "loss": 2.1859, "step": 10500 }, { "epoch": 0.05, "learning_rate": 4.9967429680680594e-05, "loss": 2.1955, "step": 10600 }, { "epoch": 0.05, "learning_rate": 4.9962001294127356e-05, "loss": 2.1904, "step": 10700 }, { "epoch": 0.05, "learning_rate": 4.9956572907574125e-05, "loss": 2.1527, "step": 10800 }, { "epoch": 0.05, "learning_rate": 4.9951144521020886e-05, "loss": 2.1523, "step": 10900 }, { "epoch": 0.05, "learning_rate": 4.994571613446765e-05, "loss": 2.1323, "step": 11000 }, { "epoch": 0.05, "learning_rate": 4.994028774791442e-05, "loss": 2.142, "step": 11100 }, { "epoch": 0.05, "learning_rate": 4.993485936136118e-05, "loss": 2.1683, "step": 11200 }, { "epoch": 0.05, "learning_rate": 4.992943097480795e-05, "loss": 2.1413, "step": 11300 }, { "epoch": 0.05, "learning_rate": 4.992400258825471e-05, "loss": 2.1629, "step": 11400 }, { "epoch": 0.05, "learning_rate": 4.991857420170147e-05, "loss": 2.1141, "step": 11500 }, { "epoch": 0.05, "learning_rate": 4.9913145815148247e-05, "loss": 2.1713, "step": 11600 }, { "epoch": 0.05, "learning_rate": 4.990771742859501e-05, "loss": 2.1313, "step": 11700 }, { "epoch": 0.05, "learning_rate": 4.990228904204177e-05, "loss": 2.1351, "step": 11800 }, { "epoch": 0.05, "learning_rate": 4.989686065548854e-05, "loss": 2.1445, "step": 11900 }, { "epoch": 0.05, "learning_rate": 4.98914322689353e-05, "loss": 2.1467, "step": 12000 }, { "epoch": 0.05, "learning_rate": 4.988600388238206e-05, "loss": 2.1156, "step": 12100 }, { "epoch": 0.05, "learning_rate": 4.988057549582883e-05, "loss": 2.1247, "step": 12200 }, { "epoch": 0.05, "learning_rate": 4.987514710927559e-05, "loss": 2.1161, "step": 12300 }, { "epoch": 0.05, "learning_rate": 4.9869718722722355e-05, "loss": 2.112, "step": 12400 }, { "epoch": 0.05, "learning_rate": 4.9864290336169124e-05, "loss": 2.1165, "step": 12500 }, { "epoch": 0.05, "learning_rate": 4.9858861949615886e-05, "loss": 2.1256, "step": 12600 }, { "epoch": 0.05, "learning_rate": 4.9853433563062654e-05, "loss": 2.0823, "step": 12700 }, { "epoch": 0.05, "learning_rate": 4.984800517650942e-05, "loss": 2.1153, "step": 12800 }, { "epoch": 0.06, "learning_rate": 4.9842576789956185e-05, "loss": 2.109, "step": 12900 }, { "epoch": 0.06, "learning_rate": 4.983714840340295e-05, "loss": 2.1053, "step": 13000 }, { "epoch": 0.06, "learning_rate": 4.9831720016849715e-05, "loss": 2.1166, "step": 13100 }, { "epoch": 0.06, "learning_rate": 4.982629163029648e-05, "loss": 2.0674, "step": 13200 }, { "epoch": 0.06, "learning_rate": 4.9820863243743246e-05, "loss": 2.1002, "step": 13300 }, { "epoch": 0.06, "learning_rate": 4.981543485719001e-05, "loss": 2.095, "step": 13400 }, { "epoch": 0.06, "learning_rate": 4.981000647063677e-05, "loss": 2.0961, "step": 13500 }, { "epoch": 0.06, "learning_rate": 4.980457808408354e-05, "loss": 2.0961, "step": 13600 }, { "epoch": 0.06, "learning_rate": 4.979914969753031e-05, "loss": 2.079, "step": 13700 }, { "epoch": 0.06, "learning_rate": 4.979372131097707e-05, "loss": 2.0919, "step": 13800 }, { "epoch": 0.06, "learning_rate": 4.978829292442384e-05, "loss": 2.1167, "step": 13900 }, { "epoch": 0.06, "learning_rate": 4.97828645378706e-05, "loss": 2.0993, "step": 14000 }, { "epoch": 0.06, "learning_rate": 4.977743615131736e-05, "loss": 2.0618, "step": 14100 }, { "epoch": 0.06, "learning_rate": 4.977200776476413e-05, "loss": 2.0935, "step": 14200 }, { "epoch": 0.06, "learning_rate": 4.976657937821089e-05, "loss": 2.0928, "step": 14300 }, { "epoch": 0.06, "learning_rate": 4.9761150991657654e-05, "loss": 2.1033, "step": 14400 }, { "epoch": 0.06, "learning_rate": 4.975572260510442e-05, "loss": 2.1083, "step": 14500 }, { "epoch": 0.06, "learning_rate": 4.9750294218551184e-05, "loss": 2.0568, "step": 14600 }, { "epoch": 0.06, "learning_rate": 4.974486583199795e-05, "loss": 2.039, "step": 14700 }, { "epoch": 0.06, "learning_rate": 4.973943744544472e-05, "loss": 2.0834, "step": 14800 }, { "epoch": 0.06, "learning_rate": 4.973400905889148e-05, "loss": 2.0732, "step": 14900 }, { "epoch": 0.06, "learning_rate": 4.9728580672338245e-05, "loss": 2.0624, "step": 15000 }, { "epoch": 0.06, "eval_loss": 1.7244441509246826, "eval_runtime": 18.7641, "eval_samples_per_second": 532.933, "eval_steps_per_second": 16.681, "step": 15000 }, { "epoch": 0.06, "learning_rate": 4.9723152285785014e-05, "loss": 2.0532, "step": 15100 }, { "epoch": 0.07, "learning_rate": 4.9717723899231776e-05, "loss": 2.0569, "step": 15200 }, { "epoch": 0.07, "learning_rate": 4.9712295512678544e-05, "loss": 2.0622, "step": 15300 }, { "epoch": 0.07, "learning_rate": 4.9706867126125306e-05, "loss": 2.0448, "step": 15400 }, { "epoch": 0.07, "learning_rate": 4.970143873957207e-05, "loss": 2.0594, "step": 15500 }, { "epoch": 0.07, "learning_rate": 4.969601035301884e-05, "loss": 2.0486, "step": 15600 }, { "epoch": 0.07, "learning_rate": 4.96905819664656e-05, "loss": 2.0488, "step": 15700 }, { "epoch": 0.07, "learning_rate": 4.968515357991237e-05, "loss": 2.03, "step": 15800 }, { "epoch": 0.07, "learning_rate": 4.9679725193359136e-05, "loss": 2.0228, "step": 15900 }, { "epoch": 0.07, "learning_rate": 4.96742968068059e-05, "loss": 2.0109, "step": 16000 }, { "epoch": 0.07, "learning_rate": 4.966886842025266e-05, "loss": 2.0503, "step": 16100 }, { "epoch": 0.07, "learning_rate": 4.966344003369943e-05, "loss": 2.0112, "step": 16200 }, { "epoch": 0.07, "learning_rate": 4.965801164714619e-05, "loss": 2.0324, "step": 16300 }, { "epoch": 0.07, "learning_rate": 4.965258326059295e-05, "loss": 2.0737, "step": 16400 }, { "epoch": 0.07, "learning_rate": 4.964715487403972e-05, "loss": 2.0184, "step": 16500 }, { "epoch": 0.07, "learning_rate": 4.964172648748648e-05, "loss": 2.0513, "step": 16600 }, { "epoch": 0.07, "learning_rate": 4.963629810093325e-05, "loss": 2.0329, "step": 16700 }, { "epoch": 0.07, "learning_rate": 4.963086971438001e-05, "loss": 2.0303, "step": 16800 }, { "epoch": 0.07, "learning_rate": 4.962544132782678e-05, "loss": 2.0042, "step": 16900 }, { "epoch": 0.07, "learning_rate": 4.9620012941273544e-05, "loss": 2.0358, "step": 17000 }, { "epoch": 0.07, "learning_rate": 4.961458455472031e-05, "loss": 2.041, "step": 17100 }, { "epoch": 0.07, "learning_rate": 4.9609156168167074e-05, "loss": 2.0349, "step": 17200 }, { "epoch": 0.07, "learning_rate": 4.960372778161384e-05, "loss": 2.0428, "step": 17300 }, { "epoch": 0.07, "learning_rate": 4.9598299395060605e-05, "loss": 2.0209, "step": 17400 }, { "epoch": 0.08, "learning_rate": 4.959287100850737e-05, "loss": 2.0384, "step": 17500 }, { "epoch": 0.08, "learning_rate": 4.9587442621954135e-05, "loss": 2.0594, "step": 17600 }, { "epoch": 0.08, "learning_rate": 4.95820142354009e-05, "loss": 2.0087, "step": 17700 }, { "epoch": 0.08, "learning_rate": 4.957658584884766e-05, "loss": 2.0458, "step": 17800 }, { "epoch": 0.08, "learning_rate": 4.9571157462294434e-05, "loss": 2.0377, "step": 17900 }, { "epoch": 0.08, "learning_rate": 4.9565729075741196e-05, "loss": 2.0111, "step": 18000 }, { "epoch": 0.08, "learning_rate": 4.956030068918796e-05, "loss": 2.0155, "step": 18100 }, { "epoch": 0.08, "learning_rate": 4.955487230263473e-05, "loss": 2.0355, "step": 18200 }, { "epoch": 0.08, "learning_rate": 4.954944391608149e-05, "loss": 2.0127, "step": 18300 }, { "epoch": 0.08, "learning_rate": 4.954401552952825e-05, "loss": 1.9876, "step": 18400 }, { "epoch": 0.08, "learning_rate": 4.953858714297502e-05, "loss": 2.0298, "step": 18500 }, { "epoch": 0.08, "learning_rate": 4.953315875642178e-05, "loss": 2.0306, "step": 18600 }, { "epoch": 0.08, "learning_rate": 4.952773036986855e-05, "loss": 2.0032, "step": 18700 }, { "epoch": 0.08, "learning_rate": 4.952230198331531e-05, "loss": 1.9919, "step": 18800 }, { "epoch": 0.08, "learning_rate": 4.9516873596762074e-05, "loss": 1.9956, "step": 18900 }, { "epoch": 0.08, "learning_rate": 4.951144521020884e-05, "loss": 1.9778, "step": 19000 }, { "epoch": 0.08, "learning_rate": 4.950601682365561e-05, "loss": 1.9788, "step": 19100 }, { "epoch": 0.08, "learning_rate": 4.950058843710237e-05, "loss": 2.003, "step": 19200 }, { "epoch": 0.08, "learning_rate": 4.949516005054914e-05, "loss": 2.0157, "step": 19300 }, { "epoch": 0.08, "learning_rate": 4.94897316639959e-05, "loss": 2.0052, "step": 19400 }, { "epoch": 0.08, "learning_rate": 4.9484303277442665e-05, "loss": 2.0187, "step": 19500 }, { "epoch": 0.08, "learning_rate": 4.9478874890889434e-05, "loss": 1.9949, "step": 19600 }, { "epoch": 0.08, "learning_rate": 4.9473446504336196e-05, "loss": 1.9735, "step": 19700 }, { "epoch": 0.09, "learning_rate": 4.946801811778296e-05, "loss": 1.9628, "step": 19800 }, { "epoch": 0.09, "learning_rate": 4.9462589731229726e-05, "loss": 1.9955, "step": 19900 }, { "epoch": 0.09, "learning_rate": 4.9457161344676495e-05, "loss": 1.9873, "step": 20000 }, { "epoch": 0.09, "eval_loss": 1.6870460510253906, "eval_runtime": 18.8219, "eval_samples_per_second": 531.295, "eval_steps_per_second": 16.63, "step": 20000 }, { "epoch": 0.09, "learning_rate": 4.945173295812326e-05, "loss": 1.9887, "step": 20100 }, { "epoch": 0.09, "learning_rate": 4.9446304571570025e-05, "loss": 2.0004, "step": 20200 }, { "epoch": 0.09, "learning_rate": 4.944087618501679e-05, "loss": 1.9967, "step": 20300 }, { "epoch": 0.09, "learning_rate": 4.943544779846355e-05, "loss": 2.0021, "step": 20400 }, { "epoch": 0.09, "learning_rate": 4.943001941191032e-05, "loss": 1.9617, "step": 20500 }, { "epoch": 0.09, "learning_rate": 4.942459102535708e-05, "loss": 1.9755, "step": 20600 }, { "epoch": 0.09, "learning_rate": 4.941916263880385e-05, "loss": 2.0077, "step": 20700 }, { "epoch": 0.09, "learning_rate": 4.941373425225061e-05, "loss": 2.0046, "step": 20800 }, { "epoch": 0.09, "learning_rate": 4.940830586569737e-05, "loss": 1.9379, "step": 20900 }, { "epoch": 0.09, "learning_rate": 4.940287747914414e-05, "loss": 2.0161, "step": 21000 }, { "epoch": 0.09, "learning_rate": 4.939744909259091e-05, "loss": 2.0048, "step": 21100 }, { "epoch": 0.09, "learning_rate": 4.939202070603767e-05, "loss": 1.9474, "step": 21200 }, { "epoch": 0.09, "learning_rate": 4.938659231948444e-05, "loss": 1.9633, "step": 21300 }, { "epoch": 0.09, "learning_rate": 4.93811639329312e-05, "loss": 1.9736, "step": 21400 }, { "epoch": 0.09, "learning_rate": 4.9375735546377964e-05, "loss": 1.9645, "step": 21500 }, { "epoch": 0.09, "learning_rate": 4.937030715982473e-05, "loss": 1.9823, "step": 21600 }, { "epoch": 0.09, "learning_rate": 4.9364878773271494e-05, "loss": 1.9318, "step": 21700 }, { "epoch": 0.09, "learning_rate": 4.9359450386718256e-05, "loss": 1.9283, "step": 21800 }, { "epoch": 0.09, "learning_rate": 4.9354022000165025e-05, "loss": 1.9433, "step": 21900 }, { "epoch": 0.09, "learning_rate": 4.9348593613611787e-05, "loss": 1.9767, "step": 22000 }, { "epoch": 0.09, "learning_rate": 4.9343165227058555e-05, "loss": 1.9709, "step": 22100 }, { "epoch": 0.1, "learning_rate": 4.9337736840505324e-05, "loss": 1.9541, "step": 22200 }, { "epoch": 0.1, "learning_rate": 4.9332308453952086e-05, "loss": 1.9767, "step": 22300 }, { "epoch": 0.1, "learning_rate": 4.932688006739885e-05, "loss": 1.959, "step": 22400 }, { "epoch": 0.1, "learning_rate": 4.9321451680845616e-05, "loss": 1.9543, "step": 22500 }, { "epoch": 0.1, "learning_rate": 4.931602329429238e-05, "loss": 1.9563, "step": 22600 }, { "epoch": 0.1, "learning_rate": 4.931059490773915e-05, "loss": 1.94, "step": 22700 }, { "epoch": 0.1, "learning_rate": 4.930516652118591e-05, "loss": 1.9529, "step": 22800 }, { "epoch": 0.1, "learning_rate": 4.929973813463267e-05, "loss": 1.9763, "step": 22900 }, { "epoch": 0.1, "learning_rate": 4.929430974807944e-05, "loss": 2.0103, "step": 23000 }, { "epoch": 0.1, "learning_rate": 4.928888136152621e-05, "loss": 1.9782, "step": 23100 }, { "epoch": 0.1, "learning_rate": 4.928345297497297e-05, "loss": 1.9247, "step": 23200 }, { "epoch": 0.1, "learning_rate": 4.927802458841974e-05, "loss": 1.9302, "step": 23300 }, { "epoch": 0.1, "learning_rate": 4.92725962018665e-05, "loss": 1.9572, "step": 23400 }, { "epoch": 0.1, "learning_rate": 4.926716781531326e-05, "loss": 1.9389, "step": 23500 }, { "epoch": 0.1, "learning_rate": 4.926173942876003e-05, "loss": 1.9863, "step": 23600 }, { "epoch": 0.1, "learning_rate": 4.925631104220679e-05, "loss": 1.9723, "step": 23700 }, { "epoch": 0.1, "learning_rate": 4.9250882655653555e-05, "loss": 1.9609, "step": 23800 }, { "epoch": 0.1, "learning_rate": 4.924545426910032e-05, "loss": 1.9826, "step": 23900 }, { "epoch": 0.1, "learning_rate": 4.9240025882547085e-05, "loss": 1.9683, "step": 24000 }, { "epoch": 0.1, "learning_rate": 4.9234597495993854e-05, "loss": 1.9441, "step": 24100 }, { "epoch": 0.1, "learning_rate": 4.922916910944062e-05, "loss": 1.888, "step": 24200 }, { "epoch": 0.1, "learning_rate": 4.9223740722887384e-05, "loss": 1.9307, "step": 24300 }, { "epoch": 0.1, "learning_rate": 4.9218312336334146e-05, "loss": 1.9494, "step": 24400 }, { "epoch": 0.11, "learning_rate": 4.9212883949780915e-05, "loss": 1.9249, "step": 24500 }, { "epoch": 0.11, "learning_rate": 4.920745556322768e-05, "loss": 1.936, "step": 24600 }, { "epoch": 0.11, "learning_rate": 4.9202027176674445e-05, "loss": 1.9151, "step": 24700 }, { "epoch": 0.11, "learning_rate": 4.919659879012121e-05, "loss": 1.9327, "step": 24800 }, { "epoch": 0.11, "learning_rate": 4.919117040356797e-05, "loss": 1.9377, "step": 24900 }, { "epoch": 0.11, "learning_rate": 4.918574201701474e-05, "loss": 1.9319, "step": 25000 }, { "epoch": 0.11, "eval_loss": 1.659123420715332, "eval_runtime": 18.8272, "eval_samples_per_second": 531.147, "eval_steps_per_second": 16.625, "step": 25000 }, { "epoch": 0.11, "learning_rate": 4.91803136304615e-05, "loss": 1.9475, "step": 25100 }, { "epoch": 0.11, "learning_rate": 4.917488524390827e-05, "loss": 1.9135, "step": 25200 }, { "epoch": 0.11, "learning_rate": 4.916945685735504e-05, "loss": 1.9213, "step": 25300 }, { "epoch": 0.11, "learning_rate": 4.91640284708018e-05, "loss": 1.9438, "step": 25400 }, { "epoch": 0.11, "learning_rate": 4.915860008424856e-05, "loss": 1.9821, "step": 25500 }, { "epoch": 0.11, "learning_rate": 4.915317169769533e-05, "loss": 1.9149, "step": 25600 }, { "epoch": 0.11, "learning_rate": 4.914774331114209e-05, "loss": 1.9356, "step": 25700 }, { "epoch": 0.11, "learning_rate": 4.914231492458885e-05, "loss": 1.9192, "step": 25800 }, { "epoch": 0.11, "learning_rate": 4.913688653803562e-05, "loss": 1.9404, "step": 25900 }, { "epoch": 0.11, "learning_rate": 4.9131458151482384e-05, "loss": 1.9447, "step": 26000 }, { "epoch": 0.11, "learning_rate": 4.912602976492915e-05, "loss": 1.9286, "step": 26100 }, { "epoch": 0.11, "learning_rate": 4.9120601378375914e-05, "loss": 1.9304, "step": 26200 }, { "epoch": 0.11, "learning_rate": 4.911517299182268e-05, "loss": 1.9031, "step": 26300 }, { "epoch": 0.11, "learning_rate": 4.9109744605269445e-05, "loss": 1.9244, "step": 26400 }, { "epoch": 0.11, "learning_rate": 4.910431621871621e-05, "loss": 1.9265, "step": 26500 }, { "epoch": 0.11, "learning_rate": 4.9098887832162975e-05, "loss": 1.9259, "step": 26600 }, { "epoch": 0.11, "learning_rate": 4.9093459445609744e-05, "loss": 1.9633, "step": 26700 }, { "epoch": 0.12, "learning_rate": 4.9088031059056506e-05, "loss": 1.949, "step": 26800 }, { "epoch": 0.12, "learning_rate": 4.908260267250327e-05, "loss": 1.9403, "step": 26900 }, { "epoch": 0.12, "learning_rate": 4.9077174285950036e-05, "loss": 1.9391, "step": 27000 }, { "epoch": 0.12, "learning_rate": 4.90717458993968e-05, "loss": 1.9692, "step": 27100 }, { "epoch": 0.12, "learning_rate": 4.906631751284356e-05, "loss": 1.9442, "step": 27200 }, { "epoch": 0.12, "learning_rate": 4.9060889126290335e-05, "loss": 1.9369, "step": 27300 }, { "epoch": 0.12, "learning_rate": 4.90554607397371e-05, "loss": 1.9239, "step": 27400 }, { "epoch": 0.12, "learning_rate": 4.905003235318386e-05, "loss": 1.9146, "step": 27500 }, { "epoch": 0.12, "learning_rate": 4.904460396663063e-05, "loss": 1.9086, "step": 27600 }, { "epoch": 0.12, "learning_rate": 4.903917558007739e-05, "loss": 1.9168, "step": 27700 }, { "epoch": 0.12, "learning_rate": 4.903374719352415e-05, "loss": 1.9262, "step": 27800 }, { "epoch": 0.12, "learning_rate": 4.902831880697092e-05, "loss": 1.9099, "step": 27900 }, { "epoch": 0.12, "learning_rate": 4.902289042041768e-05, "loss": 1.9157, "step": 28000 }, { "epoch": 0.12, "learning_rate": 4.901746203386445e-05, "loss": 1.9182, "step": 28100 }, { "epoch": 0.12, "learning_rate": 4.901203364731121e-05, "loss": 1.8918, "step": 28200 }, { "epoch": 0.12, "learning_rate": 4.9006605260757974e-05, "loss": 1.9396, "step": 28300 }, { "epoch": 0.12, "learning_rate": 4.900117687420474e-05, "loss": 1.898, "step": 28400 }, { "epoch": 0.12, "learning_rate": 4.899574848765151e-05, "loss": 1.9377, "step": 28500 }, { "epoch": 0.12, "learning_rate": 4.8990320101098274e-05, "loss": 1.9022, "step": 28600 }, { "epoch": 0.12, "learning_rate": 4.898489171454504e-05, "loss": 1.9118, "step": 28700 }, { "epoch": 0.12, "learning_rate": 4.8979463327991804e-05, "loss": 1.9007, "step": 28800 }, { "epoch": 0.12, "learning_rate": 4.8974034941438566e-05, "loss": 1.9042, "step": 28900 }, { "epoch": 0.12, "learning_rate": 4.8968606554885335e-05, "loss": 1.8985, "step": 29000 }, { "epoch": 0.13, "learning_rate": 4.8963178168332097e-05, "loss": 1.935, "step": 29100 }, { "epoch": 0.13, "learning_rate": 4.895774978177886e-05, "loss": 1.8826, "step": 29200 }, { "epoch": 0.13, "learning_rate": 4.895232139522563e-05, "loss": 1.9218, "step": 29300 }, { "epoch": 0.13, "learning_rate": 4.8946893008672396e-05, "loss": 1.8877, "step": 29400 }, { "epoch": 0.13, "learning_rate": 4.894146462211916e-05, "loss": 1.8943, "step": 29500 }, { "epoch": 0.13, "learning_rate": 4.8936036235565926e-05, "loss": 1.9168, "step": 29600 }, { "epoch": 0.13, "learning_rate": 4.893060784901269e-05, "loss": 1.8982, "step": 29700 }, { "epoch": 0.13, "learning_rate": 4.892517946245945e-05, "loss": 1.9107, "step": 29800 }, { "epoch": 0.13, "learning_rate": 4.891975107590622e-05, "loss": 1.9192, "step": 29900 }, { "epoch": 0.13, "learning_rate": 4.891432268935298e-05, "loss": 1.8701, "step": 30000 }, { "epoch": 0.13, "eval_loss": 1.6432205438613892, "eval_runtime": 18.7697, "eval_samples_per_second": 532.772, "eval_steps_per_second": 16.676, "step": 30000 }, { "epoch": 0.13, "learning_rate": 4.890889430279975e-05, "loss": 1.8962, "step": 30100 }, { "epoch": 0.13, "learning_rate": 4.890346591624651e-05, "loss": 1.9178, "step": 30200 }, { "epoch": 0.13, "learning_rate": 4.889803752969327e-05, "loss": 1.9007, "step": 30300 }, { "epoch": 0.13, "learning_rate": 4.889260914314004e-05, "loss": 1.9306, "step": 30400 }, { "epoch": 0.13, "learning_rate": 4.888718075658681e-05, "loss": 1.8875, "step": 30500 }, { "epoch": 0.13, "learning_rate": 4.888175237003357e-05, "loss": 1.9071, "step": 30600 }, { "epoch": 0.13, "learning_rate": 4.887632398348034e-05, "loss": 1.9057, "step": 30700 }, { "epoch": 0.13, "learning_rate": 4.88708955969271e-05, "loss": 1.9082, "step": 30800 }, { "epoch": 0.13, "learning_rate": 4.8865467210373864e-05, "loss": 1.9019, "step": 30900 }, { "epoch": 0.13, "learning_rate": 4.886003882382063e-05, "loss": 1.9186, "step": 31000 }, { "epoch": 0.13, "learning_rate": 4.8854610437267395e-05, "loss": 1.9171, "step": 31100 }, { "epoch": 0.13, "learning_rate": 4.884918205071416e-05, "loss": 1.9104, "step": 31200 }, { "epoch": 0.13, "learning_rate": 4.8843753664160926e-05, "loss": 1.8954, "step": 31300 }, { "epoch": 0.13, "learning_rate": 4.883832527760769e-05, "loss": 1.9037, "step": 31400 }, { "epoch": 0.14, "learning_rate": 4.8832896891054456e-05, "loss": 1.8636, "step": 31500 }, { "epoch": 0.14, "learning_rate": 4.8827468504501225e-05, "loss": 1.8734, "step": 31600 }, { "epoch": 0.14, "learning_rate": 4.8822040117947987e-05, "loss": 1.8882, "step": 31700 }, { "epoch": 0.14, "learning_rate": 4.881661173139475e-05, "loss": 1.8936, "step": 31800 }, { "epoch": 0.14, "learning_rate": 4.881118334484152e-05, "loss": 1.8927, "step": 31900 }, { "epoch": 0.14, "learning_rate": 4.880575495828828e-05, "loss": 1.8856, "step": 32000 }, { "epoch": 0.14, "learning_rate": 4.880032657173505e-05, "loss": 1.8895, "step": 32100 }, { "epoch": 0.14, "learning_rate": 4.879489818518181e-05, "loss": 1.855, "step": 32200 }, { "epoch": 0.14, "learning_rate": 4.878946979862857e-05, "loss": 1.9009, "step": 32300 }, { "epoch": 0.14, "learning_rate": 4.878404141207534e-05, "loss": 1.8999, "step": 32400 }, { "epoch": 0.14, "learning_rate": 4.87786130255221e-05, "loss": 1.9017, "step": 32500 }, { "epoch": 0.14, "learning_rate": 4.877318463896887e-05, "loss": 1.9007, "step": 32600 }, { "epoch": 0.14, "learning_rate": 4.876775625241564e-05, "loss": 1.8886, "step": 32700 }, { "epoch": 0.14, "learning_rate": 4.87623278658624e-05, "loss": 1.8951, "step": 32800 }, { "epoch": 0.14, "learning_rate": 4.875689947930916e-05, "loss": 1.9086, "step": 32900 }, { "epoch": 0.14, "learning_rate": 4.875147109275593e-05, "loss": 1.8772, "step": 33000 }, { "epoch": 0.14, "learning_rate": 4.8746042706202693e-05, "loss": 1.849, "step": 33100 }, { "epoch": 0.14, "learning_rate": 4.8740614319649455e-05, "loss": 1.9026, "step": 33200 }, { "epoch": 0.14, "learning_rate": 4.8735185933096224e-05, "loss": 1.8813, "step": 33300 }, { "epoch": 0.14, "learning_rate": 4.8729757546542986e-05, "loss": 1.8758, "step": 33400 }, { "epoch": 0.14, "learning_rate": 4.872432915998975e-05, "loss": 1.8896, "step": 33500 }, { "epoch": 0.14, "learning_rate": 4.871890077343652e-05, "loss": 1.8888, "step": 33600 }, { "epoch": 0.14, "learning_rate": 4.8713472386883285e-05, "loss": 1.8835, "step": 33700 }, { "epoch": 0.15, "learning_rate": 4.870804400033005e-05, "loss": 1.8602, "step": 33800 }, { "epoch": 0.15, "learning_rate": 4.8702615613776816e-05, "loss": 1.9211, "step": 33900 }, { "epoch": 0.15, "learning_rate": 4.869718722722358e-05, "loss": 1.8607, "step": 34000 }, { "epoch": 0.15, "learning_rate": 4.8691758840670346e-05, "loss": 1.9135, "step": 34100 }, { "epoch": 0.15, "learning_rate": 4.868633045411711e-05, "loss": 1.8718, "step": 34200 }, { "epoch": 0.15, "learning_rate": 4.868090206756387e-05, "loss": 1.8732, "step": 34300 }, { "epoch": 0.15, "learning_rate": 4.867547368101064e-05, "loss": 1.8962, "step": 34400 }, { "epoch": 0.15, "learning_rate": 4.86700452944574e-05, "loss": 1.8708, "step": 34500 }, { "epoch": 0.15, "learning_rate": 4.866461690790416e-05, "loss": 1.8837, "step": 34600 }, { "epoch": 0.15, "learning_rate": 4.865918852135094e-05, "loss": 1.8801, "step": 34700 }, { "epoch": 0.15, "learning_rate": 4.86537601347977e-05, "loss": 1.8832, "step": 34800 }, { "epoch": 0.15, "learning_rate": 4.864833174824446e-05, "loss": 1.8599, "step": 34900 }, { "epoch": 0.15, "learning_rate": 4.864290336169123e-05, "loss": 1.878, "step": 35000 }, { "epoch": 0.15, "eval_loss": 1.6231168508529663, "eval_runtime": 18.8376, "eval_samples_per_second": 530.853, "eval_steps_per_second": 16.616, "step": 35000 }, { "epoch": 0.15, "learning_rate": 4.863747497513799e-05, "loss": 1.8869, "step": 35100 }, { "epoch": 0.15, "learning_rate": 4.8632046588584754e-05, "loss": 1.8941, "step": 35200 }, { "epoch": 0.15, "learning_rate": 4.862661820203152e-05, "loss": 1.8673, "step": 35300 }, { "epoch": 0.15, "learning_rate": 4.8621189815478284e-05, "loss": 1.8841, "step": 35400 }, { "epoch": 0.15, "learning_rate": 4.8615761428925046e-05, "loss": 1.8753, "step": 35500 }, { "epoch": 0.15, "learning_rate": 4.8610333042371815e-05, "loss": 1.8634, "step": 35600 }, { "epoch": 0.15, "learning_rate": 4.8604904655818584e-05, "loss": 1.8727, "step": 35700 }, { "epoch": 0.15, "learning_rate": 4.8599476269265345e-05, "loss": 1.858, "step": 35800 }, { "epoch": 0.15, "learning_rate": 4.8594047882712114e-05, "loss": 1.9326, "step": 35900 }, { "epoch": 0.15, "learning_rate": 4.8588619496158876e-05, "loss": 1.8632, "step": 36000 }, { "epoch": 0.16, "learning_rate": 4.8583191109605645e-05, "loss": 1.8698, "step": 36100 }, { "epoch": 0.16, "learning_rate": 4.8577762723052406e-05, "loss": 1.8401, "step": 36200 }, { "epoch": 0.16, "learning_rate": 4.857233433649917e-05, "loss": 1.8499, "step": 36300 }, { "epoch": 0.16, "learning_rate": 4.856690594994594e-05, "loss": 1.8542, "step": 36400 }, { "epoch": 0.16, "learning_rate": 4.85614775633927e-05, "loss": 1.8607, "step": 36500 }, { "epoch": 0.16, "learning_rate": 4.855604917683946e-05, "loss": 1.8488, "step": 36600 }, { "epoch": 0.16, "learning_rate": 4.855062079028623e-05, "loss": 1.8528, "step": 36700 }, { "epoch": 0.16, "learning_rate": 4.8545192403733e-05, "loss": 1.8522, "step": 36800 }, { "epoch": 0.16, "learning_rate": 4.853976401717976e-05, "loss": 1.8263, "step": 36900 }, { "epoch": 0.16, "learning_rate": 4.853433563062653e-05, "loss": 1.8421, "step": 37000 }, { "epoch": 0.16, "learning_rate": 4.852890724407329e-05, "loss": 1.8677, "step": 37100 }, { "epoch": 0.16, "learning_rate": 4.852347885752005e-05, "loss": 1.8743, "step": 37200 }, { "epoch": 0.16, "learning_rate": 4.851805047096682e-05, "loss": 1.8847, "step": 37300 }, { "epoch": 0.16, "learning_rate": 4.851262208441358e-05, "loss": 1.8881, "step": 37400 }, { "epoch": 0.16, "learning_rate": 4.8507193697860345e-05, "loss": 1.8622, "step": 37500 }, { "epoch": 0.16, "learning_rate": 4.8501765311307113e-05, "loss": 1.8695, "step": 37600 }, { "epoch": 0.16, "learning_rate": 4.8496336924753875e-05, "loss": 1.8583, "step": 37700 }, { "epoch": 0.16, "learning_rate": 4.8490908538200644e-05, "loss": 1.8674, "step": 37800 }, { "epoch": 0.16, "learning_rate": 4.848548015164741e-05, "loss": 1.8767, "step": 37900 }, { "epoch": 0.16, "learning_rate": 4.8480051765094174e-05, "loss": 1.8579, "step": 38000 }, { "epoch": 0.16, "learning_rate": 4.847462337854094e-05, "loss": 1.8644, "step": 38100 }, { "epoch": 0.16, "learning_rate": 4.8469194991987705e-05, "loss": 1.8259, "step": 38200 }, { "epoch": 0.16, "learning_rate": 4.846376660543447e-05, "loss": 1.8685, "step": 38300 }, { "epoch": 0.16, "learning_rate": 4.8458338218881236e-05, "loss": 1.8588, "step": 38400 }, { "epoch": 0.17, "learning_rate": 4.8452909832328e-05, "loss": 1.8768, "step": 38500 }, { "epoch": 0.17, "learning_rate": 4.844748144577476e-05, "loss": 1.8565, "step": 38600 }, { "epoch": 0.17, "learning_rate": 4.844205305922153e-05, "loss": 1.8883, "step": 38700 }, { "epoch": 0.17, "learning_rate": 4.843662467266829e-05, "loss": 1.8884, "step": 38800 }, { "epoch": 0.17, "learning_rate": 4.843119628611506e-05, "loss": 1.8314, "step": 38900 }, { "epoch": 0.17, "learning_rate": 4.842576789956183e-05, "loss": 1.8166, "step": 39000 }, { "epoch": 0.17, "learning_rate": 4.842033951300859e-05, "loss": 1.8752, "step": 39100 }, { "epoch": 0.17, "learning_rate": 4.841491112645535e-05, "loss": 1.8429, "step": 39200 }, { "epoch": 0.17, "learning_rate": 4.840948273990212e-05, "loss": 1.841, "step": 39300 }, { "epoch": 0.17, "learning_rate": 4.840405435334888e-05, "loss": 1.8476, "step": 39400 }, { "epoch": 0.17, "learning_rate": 4.839862596679564e-05, "loss": 1.8538, "step": 39500 }, { "epoch": 0.17, "learning_rate": 4.839319758024241e-05, "loss": 1.8662, "step": 39600 }, { "epoch": 0.17, "learning_rate": 4.8387769193689174e-05, "loss": 1.8432, "step": 39700 }, { "epoch": 0.17, "learning_rate": 4.838234080713594e-05, "loss": 1.8317, "step": 39800 }, { "epoch": 0.17, "learning_rate": 4.837691242058271e-05, "loss": 1.8308, "step": 39900 }, { "epoch": 0.17, "learning_rate": 4.837148403402947e-05, "loss": 1.8797, "step": 40000 }, { "epoch": 0.17, "eval_loss": 1.6136231422424316, "eval_runtime": 18.8524, "eval_samples_per_second": 530.437, "eval_steps_per_second": 16.603, "step": 40000 }, { "epoch": 0.17, "learning_rate": 4.836605564747624e-05, "loss": 1.857, "step": 40100 }, { "epoch": 0.17, "learning_rate": 4.8360627260923003e-05, "loss": 1.8894, "step": 40200 }, { "epoch": 0.17, "learning_rate": 4.8355198874369765e-05, "loss": 1.881, "step": 40300 }, { "epoch": 0.17, "learning_rate": 4.8349770487816534e-05, "loss": 1.8252, "step": 40400 }, { "epoch": 0.17, "learning_rate": 4.8344342101263296e-05, "loss": 1.8565, "step": 40500 }, { "epoch": 0.17, "learning_rate": 4.833891371471006e-05, "loss": 1.8664, "step": 40600 }, { "epoch": 0.17, "learning_rate": 4.8333485328156826e-05, "loss": 1.8833, "step": 40700 }, { "epoch": 0.18, "learning_rate": 4.832805694160359e-05, "loss": 1.8351, "step": 40800 }, { "epoch": 0.18, "learning_rate": 4.832262855505036e-05, "loss": 1.8889, "step": 40900 }, { "epoch": 0.18, "learning_rate": 4.8317200168497126e-05, "loss": 1.8882, "step": 41000 }, { "epoch": 0.18, "learning_rate": 4.831177178194389e-05, "loss": 1.8759, "step": 41100 }, { "epoch": 0.18, "learning_rate": 4.830634339539065e-05, "loss": 1.8267, "step": 41200 }, { "epoch": 0.18, "learning_rate": 4.830091500883742e-05, "loss": 1.8654, "step": 41300 }, { "epoch": 0.18, "learning_rate": 4.829548662228418e-05, "loss": 1.8388, "step": 41400 }, { "epoch": 0.18, "learning_rate": 4.829005823573094e-05, "loss": 1.8393, "step": 41500 }, { "epoch": 0.18, "learning_rate": 4.828462984917771e-05, "loss": 1.8669, "step": 41600 }, { "epoch": 0.18, "learning_rate": 4.827920146262447e-05, "loss": 1.8393, "step": 41700 }, { "epoch": 0.18, "learning_rate": 4.827377307607124e-05, "loss": 1.8635, "step": 41800 }, { "epoch": 0.18, "learning_rate": 4.8268344689518e-05, "loss": 1.8317, "step": 41900 }, { "epoch": 0.18, "learning_rate": 4.826291630296477e-05, "loss": 1.8476, "step": 42000 }, { "epoch": 0.18, "learning_rate": 4.825748791641154e-05, "loss": 1.8106, "step": 42100 }, { "epoch": 0.18, "learning_rate": 4.82520595298583e-05, "loss": 1.8654, "step": 42200 }, { "epoch": 0.18, "learning_rate": 4.8246631143305064e-05, "loss": 1.8358, "step": 42300 }, { "epoch": 0.18, "learning_rate": 4.824120275675183e-05, "loss": 1.8609, "step": 42400 }, { "epoch": 0.18, "learning_rate": 4.8235774370198594e-05, "loss": 1.8331, "step": 42500 }, { "epoch": 0.18, "learning_rate": 4.8230345983645356e-05, "loss": 1.8536, "step": 42600 }, { "epoch": 0.18, "learning_rate": 4.8224917597092125e-05, "loss": 1.8018, "step": 42700 }, { "epoch": 0.18, "learning_rate": 4.821948921053889e-05, "loss": 1.8408, "step": 42800 }, { "epoch": 0.18, "learning_rate": 4.821406082398565e-05, "loss": 1.803, "step": 42900 }, { "epoch": 0.18, "learning_rate": 4.8208632437432424e-05, "loss": 1.8361, "step": 43000 }, { "epoch": 0.19, "learning_rate": 4.8203204050879186e-05, "loss": 1.8156, "step": 43100 }, { "epoch": 0.19, "learning_rate": 4.819777566432595e-05, "loss": 1.8568, "step": 43200 }, { "epoch": 0.19, "learning_rate": 4.8192347277772716e-05, "loss": 1.8422, "step": 43300 }, { "epoch": 0.19, "learning_rate": 4.818691889121948e-05, "loss": 1.8498, "step": 43400 }, { "epoch": 0.19, "learning_rate": 4.818149050466624e-05, "loss": 1.8605, "step": 43500 }, { "epoch": 0.19, "learning_rate": 4.817606211811301e-05, "loss": 1.8621, "step": 43600 }, { "epoch": 0.19, "learning_rate": 4.817063373155977e-05, "loss": 1.8613, "step": 43700 }, { "epoch": 0.19, "learning_rate": 4.816520534500654e-05, "loss": 1.839, "step": 43800 }, { "epoch": 0.19, "learning_rate": 4.81597769584533e-05, "loss": 1.8462, "step": 43900 }, { "epoch": 0.19, "learning_rate": 4.815434857190006e-05, "loss": 1.7864, "step": 44000 }, { "epoch": 0.19, "learning_rate": 4.814892018534684e-05, "loss": 1.8452, "step": 44100 }, { "epoch": 0.19, "learning_rate": 4.81434917987936e-05, "loss": 1.8469, "step": 44200 }, { "epoch": 0.19, "learning_rate": 4.813806341224036e-05, "loss": 1.8697, "step": 44300 }, { "epoch": 0.19, "learning_rate": 4.813263502568713e-05, "loss": 1.8287, "step": 44400 }, { "epoch": 0.19, "learning_rate": 4.812720663913389e-05, "loss": 1.8486, "step": 44500 }, { "epoch": 0.19, "learning_rate": 4.8121778252580655e-05, "loss": 1.8107, "step": 44600 }, { "epoch": 0.19, "learning_rate": 4.811634986602742e-05, "loss": 1.8509, "step": 44700 }, { "epoch": 0.19, "learning_rate": 4.8110921479474185e-05, "loss": 1.8531, "step": 44800 }, { "epoch": 0.19, "learning_rate": 4.810549309292095e-05, "loss": 1.8266, "step": 44900 }, { "epoch": 0.19, "learning_rate": 4.8100064706367716e-05, "loss": 1.8125, "step": 45000 }, { "epoch": 0.19, "eval_loss": 1.6042814254760742, "eval_runtime": 18.8207, "eval_samples_per_second": 531.33, "eval_steps_per_second": 16.631, "step": 45000 }, { "epoch": 0.19, "learning_rate": 4.8094636319814484e-05, "loss": 1.8248, "step": 45100 }, { "epoch": 0.19, "learning_rate": 4.8089207933261246e-05, "loss": 1.83, "step": 45200 }, { "epoch": 0.19, "learning_rate": 4.8083779546708015e-05, "loss": 1.8339, "step": 45300 }, { "epoch": 0.2, "learning_rate": 4.807835116015478e-05, "loss": 1.8315, "step": 45400 }, { "epoch": 0.2, "learning_rate": 4.807292277360154e-05, "loss": 1.8314, "step": 45500 }, { "epoch": 0.2, "learning_rate": 4.806749438704831e-05, "loss": 1.8101, "step": 45600 }, { "epoch": 0.2, "learning_rate": 4.806206600049507e-05, "loss": 1.822, "step": 45700 }, { "epoch": 0.2, "learning_rate": 4.805663761394184e-05, "loss": 1.8244, "step": 45800 }, { "epoch": 0.2, "learning_rate": 4.80512092273886e-05, "loss": 1.8312, "step": 45900 }, { "epoch": 0.2, "learning_rate": 4.804578084083536e-05, "loss": 1.8351, "step": 46000 }, { "epoch": 0.2, "learning_rate": 4.804035245428213e-05, "loss": 1.8355, "step": 46100 }, { "epoch": 0.2, "learning_rate": 4.80349240677289e-05, "loss": 1.8204, "step": 46200 }, { "epoch": 0.2, "learning_rate": 4.802949568117566e-05, "loss": 1.8721, "step": 46300 }, { "epoch": 0.2, "learning_rate": 4.802406729462243e-05, "loss": 1.8235, "step": 46400 }, { "epoch": 0.2, "learning_rate": 4.801863890806919e-05, "loss": 1.8558, "step": 46500 }, { "epoch": 0.2, "learning_rate": 4.801321052151595e-05, "loss": 1.8399, "step": 46600 }, { "epoch": 0.2, "learning_rate": 4.800778213496272e-05, "loss": 1.8175, "step": 46700 }, { "epoch": 0.2, "learning_rate": 4.8002353748409484e-05, "loss": 1.825, "step": 46800 }, { "epoch": 0.2, "learning_rate": 4.7996925361856246e-05, "loss": 1.8156, "step": 46900 }, { "epoch": 0.2, "learning_rate": 4.7991496975303014e-05, "loss": 1.8063, "step": 47000 }, { "epoch": 0.2, "learning_rate": 4.7986068588749776e-05, "loss": 1.8077, "step": 47100 }, { "epoch": 0.2, "learning_rate": 4.7980640202196545e-05, "loss": 1.8135, "step": 47200 }, { "epoch": 0.2, "learning_rate": 4.7975211815643313e-05, "loss": 1.8466, "step": 47300 }, { "epoch": 0.2, "learning_rate": 4.7969783429090075e-05, "loss": 1.8319, "step": 47400 }, { "epoch": 0.2, "learning_rate": 4.796435504253684e-05, "loss": 1.8166, "step": 47500 }, { "epoch": 0.2, "learning_rate": 4.7958926655983606e-05, "loss": 1.8097, "step": 47600 }, { "epoch": 0.2, "learning_rate": 4.795349826943037e-05, "loss": 1.8231, "step": 47700 }, { "epoch": 0.21, "learning_rate": 4.7948069882877136e-05, "loss": 1.8371, "step": 47800 }, { "epoch": 0.21, "learning_rate": 4.79426414963239e-05, "loss": 1.8519, "step": 47900 }, { "epoch": 0.21, "learning_rate": 4.793721310977066e-05, "loss": 1.8226, "step": 48000 }, { "epoch": 0.21, "learning_rate": 4.793178472321743e-05, "loss": 1.8081, "step": 48100 }, { "epoch": 0.21, "learning_rate": 4.792635633666419e-05, "loss": 1.7755, "step": 48200 }, { "epoch": 0.21, "learning_rate": 4.792092795011096e-05, "loss": 1.8338, "step": 48300 }, { "epoch": 0.21, "learning_rate": 4.791549956355773e-05, "loss": 1.8139, "step": 48400 }, { "epoch": 0.21, "learning_rate": 4.791007117700449e-05, "loss": 1.8214, "step": 48500 }, { "epoch": 0.21, "learning_rate": 4.790464279045125e-05, "loss": 1.8017, "step": 48600 }, { "epoch": 0.21, "learning_rate": 4.789921440389802e-05, "loss": 1.8379, "step": 48700 }, { "epoch": 0.21, "learning_rate": 4.789378601734478e-05, "loss": 1.8305, "step": 48800 }, { "epoch": 0.21, "learning_rate": 4.7888357630791544e-05, "loss": 1.8496, "step": 48900 }, { "epoch": 0.21, "learning_rate": 4.788292924423831e-05, "loss": 1.8071, "step": 49000 }, { "epoch": 0.21, "learning_rate": 4.7877500857685075e-05, "loss": 1.8217, "step": 49100 }, { "epoch": 0.21, "learning_rate": 4.787207247113184e-05, "loss": 1.7922, "step": 49200 }, { "epoch": 0.21, "learning_rate": 4.786664408457861e-05, "loss": 1.8252, "step": 49300 }, { "epoch": 0.21, "learning_rate": 4.7861215698025374e-05, "loss": 1.8218, "step": 49400 }, { "epoch": 0.21, "learning_rate": 4.7855787311472136e-05, "loss": 1.8188, "step": 49500 }, { "epoch": 0.21, "learning_rate": 4.7850358924918904e-05, "loss": 1.8211, "step": 49600 }, { "epoch": 0.21, "learning_rate": 4.7844930538365666e-05, "loss": 1.8032, "step": 49700 }, { "epoch": 0.21, "learning_rate": 4.7839502151812435e-05, "loss": 1.7968, "step": 49800 }, { "epoch": 0.21, "learning_rate": 4.78340737652592e-05, "loss": 1.8099, "step": 49900 }, { "epoch": 0.21, "learning_rate": 4.782864537870596e-05, "loss": 1.8188, "step": 50000 }, { "epoch": 0.21, "eval_loss": 1.5851926803588867, "eval_runtime": 18.9094, "eval_samples_per_second": 528.838, "eval_steps_per_second": 16.553, "step": 50000 }, { "epoch": 0.22, "learning_rate": 4.782321699215273e-05, "loss": 1.8253, "step": 50100 }, { "epoch": 0.22, "learning_rate": 4.781778860559949e-05, "loss": 1.7936, "step": 50200 }, { "epoch": 0.22, "learning_rate": 4.781236021904625e-05, "loss": 1.8353, "step": 50300 }, { "epoch": 0.22, "learning_rate": 4.7806931832493026e-05, "loss": 1.7939, "step": 50400 }, { "epoch": 0.22, "learning_rate": 4.780150344593979e-05, "loss": 1.8055, "step": 50500 }, { "epoch": 0.22, "learning_rate": 4.779607505938655e-05, "loss": 1.8032, "step": 50600 }, { "epoch": 0.22, "learning_rate": 4.779064667283332e-05, "loss": 1.8171, "step": 50700 }, { "epoch": 0.22, "learning_rate": 4.778521828628008e-05, "loss": 1.8282, "step": 50800 }, { "epoch": 0.22, "learning_rate": 4.777978989972684e-05, "loss": 1.8412, "step": 50900 }, { "epoch": 0.22, "learning_rate": 4.777436151317361e-05, "loss": 1.8437, "step": 51000 }, { "epoch": 0.22, "learning_rate": 4.776893312662037e-05, "loss": 1.8513, "step": 51100 }, { "epoch": 0.22, "learning_rate": 4.776350474006714e-05, "loss": 1.7993, "step": 51200 }, { "epoch": 0.22, "learning_rate": 4.7758076353513904e-05, "loss": 1.7976, "step": 51300 }, { "epoch": 0.22, "learning_rate": 4.775264796696067e-05, "loss": 1.8163, "step": 51400 }, { "epoch": 0.22, "learning_rate": 4.7747219580407434e-05, "loss": 1.8247, "step": 51500 }, { "epoch": 0.22, "learning_rate": 4.77417911938542e-05, "loss": 1.7752, "step": 51600 }, { "epoch": 0.22, "learning_rate": 4.7736362807300965e-05, "loss": 1.8202, "step": 51700 }, { "epoch": 0.22, "learning_rate": 4.773093442074773e-05, "loss": 1.8208, "step": 51800 }, { "epoch": 0.22, "learning_rate": 4.7725506034194495e-05, "loss": 1.8177, "step": 51900 }, { "epoch": 0.22, "learning_rate": 4.772007764764126e-05, "loss": 1.8148, "step": 52000 }, { "epoch": 0.22, "learning_rate": 4.7714649261088026e-05, "loss": 1.8204, "step": 52100 }, { "epoch": 0.22, "learning_rate": 4.770922087453479e-05, "loss": 1.8226, "step": 52200 }, { "epoch": 0.22, "learning_rate": 4.770379248798155e-05, "loss": 1.8235, "step": 52300 }, { "epoch": 0.23, "learning_rate": 4.769836410142832e-05, "loss": 1.7702, "step": 52400 }, { "epoch": 0.23, "learning_rate": 4.769293571487509e-05, "loss": 1.8013, "step": 52500 }, { "epoch": 0.23, "learning_rate": 4.768750732832185e-05, "loss": 1.801, "step": 52600 }, { "epoch": 0.23, "learning_rate": 4.768207894176862e-05, "loss": 1.8202, "step": 52700 }, { "epoch": 0.23, "learning_rate": 4.767665055521538e-05, "loss": 1.7901, "step": 52800 }, { "epoch": 0.23, "learning_rate": 4.767122216866214e-05, "loss": 1.8395, "step": 52900 }, { "epoch": 0.23, "learning_rate": 4.766579378210891e-05, "loss": 1.7996, "step": 53000 }, { "epoch": 0.23, "learning_rate": 4.766036539555567e-05, "loss": 1.7787, "step": 53100 }, { "epoch": 0.23, "learning_rate": 4.765493700900244e-05, "loss": 1.7861, "step": 53200 }, { "epoch": 0.23, "learning_rate": 4.76495086224492e-05, "loss": 1.8164, "step": 53300 }, { "epoch": 0.23, "learning_rate": 4.7644080235895964e-05, "loss": 1.8221, "step": 53400 }, { "epoch": 0.23, "learning_rate": 4.763865184934273e-05, "loss": 1.7885, "step": 53500 }, { "epoch": 0.23, "learning_rate": 4.76332234627895e-05, "loss": 1.8154, "step": 53600 }, { "epoch": 0.23, "learning_rate": 4.762779507623626e-05, "loss": 1.7901, "step": 53700 }, { "epoch": 0.23, "learning_rate": 4.762236668968303e-05, "loss": 1.8313, "step": 53800 }, { "epoch": 0.23, "learning_rate": 4.7616938303129794e-05, "loss": 1.801, "step": 53900 }, { "epoch": 0.23, "learning_rate": 4.7611509916576556e-05, "loss": 1.8021, "step": 54000 }, { "epoch": 0.23, "learning_rate": 4.7606081530023324e-05, "loss": 1.8145, "step": 54100 }, { "epoch": 0.23, "learning_rate": 4.7600653143470086e-05, "loss": 1.7976, "step": 54200 }, { "epoch": 0.23, "learning_rate": 4.759522475691685e-05, "loss": 1.7932, "step": 54300 }, { "epoch": 0.23, "learning_rate": 4.758979637036362e-05, "loss": 1.8452, "step": 54400 }, { "epoch": 0.23, "learning_rate": 4.758436798381038e-05, "loss": 1.7923, "step": 54500 }, { "epoch": 0.23, "learning_rate": 4.757893959725715e-05, "loss": 1.7934, "step": 54600 }, { "epoch": 0.23, "learning_rate": 4.7573511210703916e-05, "loss": 1.8072, "step": 54700 }, { "epoch": 0.24, "learning_rate": 4.756808282415068e-05, "loss": 1.8279, "step": 54800 }, { "epoch": 0.24, "learning_rate": 4.756265443759744e-05, "loss": 1.8289, "step": 54900 }, { "epoch": 0.24, "learning_rate": 4.755722605104421e-05, "loss": 1.812, "step": 55000 }, { "epoch": 0.24, "eval_loss": 1.5801465511322021, "eval_runtime": 18.8482, "eval_samples_per_second": 530.554, "eval_steps_per_second": 16.606, "step": 55000 }, { "epoch": 0.24, "learning_rate": 4.755179766449097e-05, "loss": 1.7898, "step": 55100 }, { "epoch": 0.24, "learning_rate": 4.754636927793774e-05, "loss": 1.7933, "step": 55200 }, { "epoch": 0.24, "learning_rate": 4.75409408913845e-05, "loss": 1.7846, "step": 55300 }, { "epoch": 0.24, "learning_rate": 4.753551250483126e-05, "loss": 1.7918, "step": 55400 }, { "epoch": 0.24, "learning_rate": 4.753008411827803e-05, "loss": 1.798, "step": 55500 }, { "epoch": 0.24, "learning_rate": 4.75246557317248e-05, "loss": 1.799, "step": 55600 }, { "epoch": 0.24, "learning_rate": 4.751922734517156e-05, "loss": 1.8078, "step": 55700 }, { "epoch": 0.24, "learning_rate": 4.751379895861833e-05, "loss": 1.808, "step": 55800 }, { "epoch": 0.24, "learning_rate": 4.750837057206509e-05, "loss": 1.7562, "step": 55900 }, { "epoch": 0.24, "learning_rate": 4.7502942185511854e-05, "loss": 1.8129, "step": 56000 }, { "epoch": 0.24, "learning_rate": 4.749751379895862e-05, "loss": 1.8013, "step": 56100 }, { "epoch": 0.24, "learning_rate": 4.7492085412405385e-05, "loss": 1.8138, "step": 56200 }, { "epoch": 0.24, "learning_rate": 4.7486657025852146e-05, "loss": 1.8014, "step": 56300 }, { "epoch": 0.24, "learning_rate": 4.7481228639298915e-05, "loss": 1.8381, "step": 56400 }, { "epoch": 0.24, "learning_rate": 4.747580025274568e-05, "loss": 1.7954, "step": 56500 }, { "epoch": 0.24, "learning_rate": 4.7470371866192446e-05, "loss": 1.7694, "step": 56600 }, { "epoch": 0.24, "learning_rate": 4.7464943479639214e-05, "loss": 1.7909, "step": 56700 }, { "epoch": 0.24, "learning_rate": 4.7459515093085976e-05, "loss": 1.8301, "step": 56800 }, { "epoch": 0.24, "learning_rate": 4.745408670653274e-05, "loss": 1.7995, "step": 56900 }, { "epoch": 0.24, "learning_rate": 4.744865831997951e-05, "loss": 1.8072, "step": 57000 }, { "epoch": 0.25, "learning_rate": 4.744322993342627e-05, "loss": 1.784, "step": 57100 }, { "epoch": 0.25, "learning_rate": 4.743780154687304e-05, "loss": 1.8026, "step": 57200 }, { "epoch": 0.25, "learning_rate": 4.74323731603198e-05, "loss": 1.7927, "step": 57300 }, { "epoch": 0.25, "learning_rate": 4.742694477376656e-05, "loss": 1.7959, "step": 57400 }, { "epoch": 0.25, "learning_rate": 4.742151638721333e-05, "loss": 1.8272, "step": 57500 }, { "epoch": 0.25, "learning_rate": 4.741608800066009e-05, "loss": 1.7942, "step": 57600 }, { "epoch": 0.25, "learning_rate": 4.741065961410686e-05, "loss": 1.8021, "step": 57700 }, { "epoch": 0.25, "learning_rate": 4.740523122755363e-05, "loss": 1.8274, "step": 57800 }, { "epoch": 0.25, "learning_rate": 4.739980284100039e-05, "loss": 1.7891, "step": 57900 }, { "epoch": 0.25, "learning_rate": 4.739437445444715e-05, "loss": 1.7906, "step": 58000 }, { "epoch": 0.25, "learning_rate": 4.738894606789392e-05, "loss": 1.7785, "step": 58100 }, { "epoch": 0.25, "learning_rate": 4.738351768134068e-05, "loss": 1.7844, "step": 58200 }, { "epoch": 0.25, "learning_rate": 4.7378089294787445e-05, "loss": 1.7928, "step": 58300 }, { "epoch": 0.25, "learning_rate": 4.7372660908234214e-05, "loss": 1.7681, "step": 58400 }, { "epoch": 0.25, "learning_rate": 4.7367232521680975e-05, "loss": 1.7946, "step": 58500 }, { "epoch": 0.25, "learning_rate": 4.7361804135127744e-05, "loss": 1.7686, "step": 58600 }, { "epoch": 0.25, "learning_rate": 4.7356375748574506e-05, "loss": 1.7791, "step": 58700 }, { "epoch": 0.25, "learning_rate": 4.7350947362021275e-05, "loss": 1.8047, "step": 58800 }, { "epoch": 0.25, "learning_rate": 4.7345518975468037e-05, "loss": 1.7986, "step": 58900 }, { "epoch": 0.25, "learning_rate": 4.7340090588914805e-05, "loss": 1.8107, "step": 59000 }, { "epoch": 0.25, "learning_rate": 4.733466220236157e-05, "loss": 1.7675, "step": 59100 }, { "epoch": 0.25, "learning_rate": 4.7329233815808336e-05, "loss": 1.7916, "step": 59200 }, { "epoch": 0.25, "learning_rate": 4.73238054292551e-05, "loss": 1.7797, "step": 59300 }, { "epoch": 0.26, "learning_rate": 4.731837704270186e-05, "loss": 1.7827, "step": 59400 }, { "epoch": 0.26, "learning_rate": 4.731294865614863e-05, "loss": 1.7766, "step": 59500 }, { "epoch": 0.26, "learning_rate": 4.730752026959539e-05, "loss": 1.7927, "step": 59600 }, { "epoch": 0.26, "learning_rate": 4.730209188304215e-05, "loss": 1.7896, "step": 59700 }, { "epoch": 0.26, "learning_rate": 4.729666349648893e-05, "loss": 1.7775, "step": 59800 }, { "epoch": 0.26, "learning_rate": 4.729123510993569e-05, "loss": 1.7838, "step": 59900 }, { "epoch": 0.26, "learning_rate": 4.728580672338245e-05, "loss": 1.7729, "step": 60000 }, { "epoch": 0.26, "eval_loss": 1.5760066509246826, "eval_runtime": 18.8895, "eval_samples_per_second": 529.396, "eval_steps_per_second": 16.57, "step": 60000 }, { "epoch": 0.26, "learning_rate": 4.728037833682922e-05, "loss": 1.822, "step": 60100 }, { "epoch": 0.26, "learning_rate": 4.727494995027598e-05, "loss": 1.7779, "step": 60200 }, { "epoch": 0.26, "learning_rate": 4.7269521563722743e-05, "loss": 1.7766, "step": 60300 }, { "epoch": 0.26, "learning_rate": 4.726409317716951e-05, "loss": 1.7906, "step": 60400 }, { "epoch": 0.26, "learning_rate": 4.7258664790616274e-05, "loss": 1.8065, "step": 60500 }, { "epoch": 0.26, "learning_rate": 4.725323640406304e-05, "loss": 1.7614, "step": 60600 }, { "epoch": 0.26, "learning_rate": 4.7247808017509805e-05, "loss": 1.7844, "step": 60700 }, { "epoch": 0.26, "learning_rate": 4.724237963095657e-05, "loss": 1.7864, "step": 60800 }, { "epoch": 0.26, "learning_rate": 4.7236951244403335e-05, "loss": 1.7761, "step": 60900 }, { "epoch": 0.26, "learning_rate": 4.7231522857850104e-05, "loss": 1.7707, "step": 61000 }, { "epoch": 0.26, "learning_rate": 4.7226094471296866e-05, "loss": 1.8049, "step": 61100 }, { "epoch": 0.26, "learning_rate": 4.7220666084743634e-05, "loss": 1.786, "step": 61200 }, { "epoch": 0.26, "learning_rate": 4.7215237698190396e-05, "loss": 1.8102, "step": 61300 }, { "epoch": 0.26, "learning_rate": 4.720980931163716e-05, "loss": 1.7937, "step": 61400 }, { "epoch": 0.26, "learning_rate": 4.7204380925083927e-05, "loss": 1.7523, "step": 61500 }, { "epoch": 0.26, "learning_rate": 4.719895253853069e-05, "loss": 1.7976, "step": 61600 }, { "epoch": 0.27, "learning_rate": 4.719352415197745e-05, "loss": 1.7951, "step": 61700 }, { "epoch": 0.27, "learning_rate": 4.718809576542422e-05, "loss": 1.767, "step": 61800 }, { "epoch": 0.27, "learning_rate": 4.718266737887099e-05, "loss": 1.7567, "step": 61900 }, { "epoch": 0.27, "learning_rate": 4.717723899231775e-05, "loss": 1.8087, "step": 62000 }, { "epoch": 0.27, "learning_rate": 4.717181060576452e-05, "loss": 1.8043, "step": 62100 }, { "epoch": 0.27, "learning_rate": 4.716638221921128e-05, "loss": 1.7823, "step": 62200 }, { "epoch": 0.27, "learning_rate": 4.716095383265804e-05, "loss": 1.7519, "step": 62300 }, { "epoch": 0.27, "learning_rate": 4.715552544610481e-05, "loss": 1.7821, "step": 62400 }, { "epoch": 0.27, "learning_rate": 4.715009705955157e-05, "loss": 1.7673, "step": 62500 }, { "epoch": 0.27, "learning_rate": 4.714466867299834e-05, "loss": 1.7676, "step": 62600 }, { "epoch": 0.27, "learning_rate": 4.71392402864451e-05, "loss": 1.7674, "step": 62700 }, { "epoch": 0.27, "learning_rate": 4.7133811899891865e-05, "loss": 1.7856, "step": 62800 }, { "epoch": 0.27, "learning_rate": 4.7128383513338634e-05, "loss": 1.791, "step": 62900 }, { "epoch": 0.27, "learning_rate": 4.71229551267854e-05, "loss": 1.7697, "step": 63000 }, { "epoch": 0.27, "learning_rate": 4.7117526740232164e-05, "loss": 1.7897, "step": 63100 }, { "epoch": 0.27, "learning_rate": 4.711209835367893e-05, "loss": 1.7579, "step": 63200 }, { "epoch": 0.27, "learning_rate": 4.7106669967125695e-05, "loss": 1.7752, "step": 63300 }, { "epoch": 0.27, "learning_rate": 4.7101241580572456e-05, "loss": 1.772, "step": 63400 }, { "epoch": 0.27, "learning_rate": 4.7095813194019225e-05, "loss": 1.7648, "step": 63500 }, { "epoch": 0.27, "learning_rate": 4.709038480746599e-05, "loss": 1.7699, "step": 63600 }, { "epoch": 0.27, "learning_rate": 4.708495642091275e-05, "loss": 1.7708, "step": 63700 }, { "epoch": 0.27, "learning_rate": 4.707952803435952e-05, "loss": 1.7878, "step": 63800 }, { "epoch": 0.27, "learning_rate": 4.707409964780628e-05, "loss": 1.805, "step": 63900 }, { "epoch": 0.27, "learning_rate": 4.706867126125305e-05, "loss": 1.774, "step": 64000 }, { "epoch": 0.28, "learning_rate": 4.706324287469982e-05, "loss": 1.7729, "step": 64100 }, { "epoch": 0.28, "learning_rate": 4.705781448814658e-05, "loss": 1.759, "step": 64200 }, { "epoch": 0.28, "learning_rate": 4.705238610159334e-05, "loss": 1.7649, "step": 64300 }, { "epoch": 0.28, "learning_rate": 4.704695771504011e-05, "loss": 1.7887, "step": 64400 }, { "epoch": 0.28, "learning_rate": 4.704152932848687e-05, "loss": 1.8055, "step": 64500 }, { "epoch": 0.28, "learning_rate": 4.703610094193364e-05, "loss": 1.7912, "step": 64600 }, { "epoch": 0.28, "learning_rate": 4.70306725553804e-05, "loss": 1.759, "step": 64700 }, { "epoch": 0.28, "learning_rate": 4.702524416882716e-05, "loss": 1.7841, "step": 64800 }, { "epoch": 0.28, "learning_rate": 4.701981578227393e-05, "loss": 1.7759, "step": 64900 }, { "epoch": 0.28, "learning_rate": 4.70143873957207e-05, "loss": 1.7306, "step": 65000 }, { "epoch": 0.28, "eval_loss": 1.5630521774291992, "eval_runtime": 18.8835, "eval_samples_per_second": 529.564, "eval_steps_per_second": 16.575, "step": 65000 }, { "epoch": 0.28, "learning_rate": 4.700895900916746e-05, "loss": 1.7537, "step": 65100 }, { "epoch": 0.28, "learning_rate": 4.700353062261423e-05, "loss": 1.7783, "step": 65200 }, { "epoch": 0.28, "learning_rate": 4.699810223606099e-05, "loss": 1.7818, "step": 65300 }, { "epoch": 0.28, "learning_rate": 4.6992673849507755e-05, "loss": 1.7749, "step": 65400 }, { "epoch": 0.28, "learning_rate": 4.6987245462954524e-05, "loss": 1.7662, "step": 65500 }, { "epoch": 0.28, "learning_rate": 4.6981817076401285e-05, "loss": 1.7677, "step": 65600 }, { "epoch": 0.28, "learning_rate": 4.697638868984805e-05, "loss": 1.7521, "step": 65700 }, { "epoch": 0.28, "learning_rate": 4.6970960303294816e-05, "loss": 1.7751, "step": 65800 }, { "epoch": 0.28, "learning_rate": 4.696553191674158e-05, "loss": 1.7846, "step": 65900 }, { "epoch": 0.28, "learning_rate": 4.696010353018834e-05, "loss": 1.7987, "step": 66000 }, { "epoch": 0.28, "learning_rate": 4.6954675143635115e-05, "loss": 1.7691, "step": 66100 }, { "epoch": 0.28, "learning_rate": 4.694924675708188e-05, "loss": 1.7955, "step": 66200 }, { "epoch": 0.28, "learning_rate": 4.694381837052864e-05, "loss": 1.8008, "step": 66300 }, { "epoch": 0.29, "learning_rate": 4.693838998397541e-05, "loss": 1.77, "step": 66400 }, { "epoch": 0.29, "learning_rate": 4.693296159742217e-05, "loss": 1.7612, "step": 66500 }, { "epoch": 0.29, "learning_rate": 4.692753321086894e-05, "loss": 1.7797, "step": 66600 }, { "epoch": 0.29, "learning_rate": 4.69221048243157e-05, "loss": 1.8038, "step": 66700 }, { "epoch": 0.29, "learning_rate": 4.691667643776246e-05, "loss": 1.7792, "step": 66800 }, { "epoch": 0.29, "learning_rate": 4.691124805120923e-05, "loss": 1.7614, "step": 66900 }, { "epoch": 0.29, "learning_rate": 4.690581966465599e-05, "loss": 1.7548, "step": 67000 }, { "epoch": 0.29, "learning_rate": 4.690039127810276e-05, "loss": 1.773, "step": 67100 }, { "epoch": 0.29, "learning_rate": 4.689496289154953e-05, "loss": 1.7859, "step": 67200 }, { "epoch": 0.29, "learning_rate": 4.688953450499629e-05, "loss": 1.7366, "step": 67300 }, { "epoch": 0.29, "learning_rate": 4.6884106118443053e-05, "loss": 1.7685, "step": 67400 }, { "epoch": 0.29, "learning_rate": 4.687867773188982e-05, "loss": 1.7628, "step": 67500 }, { "epoch": 0.29, "learning_rate": 4.6873249345336584e-05, "loss": 1.7561, "step": 67600 }, { "epoch": 0.29, "learning_rate": 4.6867820958783346e-05, "loss": 1.8026, "step": 67700 }, { "epoch": 0.29, "learning_rate": 4.6862392572230114e-05, "loss": 1.7847, "step": 67800 }, { "epoch": 0.29, "learning_rate": 4.6856964185676876e-05, "loss": 1.7646, "step": 67900 }, { "epoch": 0.29, "learning_rate": 4.685153579912364e-05, "loss": 1.7906, "step": 68000 }, { "epoch": 0.29, "learning_rate": 4.684610741257041e-05, "loss": 1.7476, "step": 68100 }, { "epoch": 0.29, "learning_rate": 4.6840679026017176e-05, "loss": 1.7534, "step": 68200 }, { "epoch": 0.29, "learning_rate": 4.683525063946394e-05, "loss": 1.7639, "step": 68300 }, { "epoch": 0.29, "learning_rate": 4.6829822252910706e-05, "loss": 1.7499, "step": 68400 }, { "epoch": 0.29, "learning_rate": 4.682439386635747e-05, "loss": 1.7743, "step": 68500 }, { "epoch": 0.29, "learning_rate": 4.6818965479804237e-05, "loss": 1.7483, "step": 68600 }, { "epoch": 0.3, "learning_rate": 4.6813537093251e-05, "loss": 1.7807, "step": 68700 }, { "epoch": 0.3, "learning_rate": 4.680810870669776e-05, "loss": 1.7912, "step": 68800 }, { "epoch": 0.3, "learning_rate": 4.680268032014453e-05, "loss": 1.7632, "step": 68900 }, { "epoch": 0.3, "learning_rate": 4.679725193359129e-05, "loss": 1.762, "step": 69000 }, { "epoch": 0.3, "learning_rate": 4.679182354703805e-05, "loss": 1.7498, "step": 69100 }, { "epoch": 0.3, "learning_rate": 4.678639516048483e-05, "loss": 1.7597, "step": 69200 }, { "epoch": 0.3, "learning_rate": 4.678096677393159e-05, "loss": 1.7851, "step": 69300 }, { "epoch": 0.3, "learning_rate": 4.677553838737835e-05, "loss": 1.7746, "step": 69400 }, { "epoch": 0.3, "learning_rate": 4.677011000082512e-05, "loss": 1.7738, "step": 69500 }, { "epoch": 0.3, "learning_rate": 4.676468161427188e-05, "loss": 1.7909, "step": 69600 }, { "epoch": 0.3, "learning_rate": 4.6759253227718644e-05, "loss": 1.768, "step": 69700 }, { "epoch": 0.3, "learning_rate": 4.675382484116541e-05, "loss": 1.7782, "step": 69800 }, { "epoch": 0.3, "learning_rate": 4.6748396454612175e-05, "loss": 1.7663, "step": 69900 }, { "epoch": 0.3, "learning_rate": 4.674296806805894e-05, "loss": 1.7202, "step": 70000 }, { "epoch": 0.3, "eval_loss": 1.5640525817871094, "eval_runtime": 18.9125, "eval_samples_per_second": 528.752, "eval_steps_per_second": 16.55, "step": 70000 }, { "epoch": 0.3, "learning_rate": 4.6737539681505705e-05, "loss": 1.7697, "step": 70100 }, { "epoch": 0.3, "learning_rate": 4.673211129495247e-05, "loss": 1.7581, "step": 70200 }, { "epoch": 0.3, "learning_rate": 4.6726682908399236e-05, "loss": 1.7291, "step": 70300 }, { "epoch": 0.3, "learning_rate": 4.6721254521846005e-05, "loss": 1.7576, "step": 70400 }, { "epoch": 0.3, "learning_rate": 4.6715826135292766e-05, "loss": 1.7384, "step": 70500 }, { "epoch": 0.3, "learning_rate": 4.671039774873953e-05, "loss": 1.7676, "step": 70600 }, { "epoch": 0.3, "learning_rate": 4.67049693621863e-05, "loss": 1.7576, "step": 70700 }, { "epoch": 0.3, "learning_rate": 4.669954097563306e-05, "loss": 1.7404, "step": 70800 }, { "epoch": 0.3, "learning_rate": 4.669411258907983e-05, "loss": 1.7635, "step": 70900 }, { "epoch": 0.31, "learning_rate": 4.668868420252659e-05, "loss": 1.7898, "step": 71000 }, { "epoch": 0.31, "learning_rate": 4.668325581597335e-05, "loss": 1.7269, "step": 71100 }, { "epoch": 0.31, "learning_rate": 4.667782742942012e-05, "loss": 1.756, "step": 71200 }, { "epoch": 0.31, "learning_rate": 4.667239904286689e-05, "loss": 1.7796, "step": 71300 }, { "epoch": 0.31, "learning_rate": 4.666697065631365e-05, "loss": 1.7805, "step": 71400 }, { "epoch": 0.31, "learning_rate": 4.666154226976042e-05, "loss": 1.7324, "step": 71500 }, { "epoch": 0.31, "learning_rate": 4.665611388320718e-05, "loss": 1.7412, "step": 71600 }, { "epoch": 0.31, "learning_rate": 4.665068549665394e-05, "loss": 1.7363, "step": 71700 }, { "epoch": 0.31, "learning_rate": 4.664525711010071e-05, "loss": 1.7615, "step": 71800 }, { "epoch": 0.31, "learning_rate": 4.663982872354747e-05, "loss": 1.7356, "step": 71900 }, { "epoch": 0.31, "learning_rate": 4.6634400336994235e-05, "loss": 1.7364, "step": 72000 }, { "epoch": 0.31, "learning_rate": 4.6628971950441004e-05, "loss": 1.7587, "step": 72100 }, { "epoch": 0.31, "learning_rate": 4.6623543563887766e-05, "loss": 1.742, "step": 72200 }, { "epoch": 0.31, "learning_rate": 4.6618115177334534e-05, "loss": 1.7618, "step": 72300 }, { "epoch": 0.31, "learning_rate": 4.66126867907813e-05, "loss": 1.7374, "step": 72400 }, { "epoch": 0.31, "learning_rate": 4.6607258404228065e-05, "loss": 1.7804, "step": 72500 }, { "epoch": 0.31, "learning_rate": 4.660183001767483e-05, "loss": 1.7173, "step": 72600 }, { "epoch": 0.31, "learning_rate": 4.6596401631121595e-05, "loss": 1.7459, "step": 72700 }, { "epoch": 0.31, "learning_rate": 4.659097324456836e-05, "loss": 1.7441, "step": 72800 }, { "epoch": 0.31, "learning_rate": 4.6585544858015126e-05, "loss": 1.7598, "step": 72900 }, { "epoch": 0.31, "learning_rate": 4.658011647146189e-05, "loss": 1.7476, "step": 73000 }, { "epoch": 0.31, "learning_rate": 4.657468808490865e-05, "loss": 1.7596, "step": 73100 }, { "epoch": 0.31, "learning_rate": 4.656925969835542e-05, "loss": 1.7438, "step": 73200 }, { "epoch": 0.31, "learning_rate": 4.656383131180218e-05, "loss": 1.7752, "step": 73300 }, { "epoch": 0.32, "learning_rate": 4.655840292524895e-05, "loss": 1.7214, "step": 73400 }, { "epoch": 0.32, "learning_rate": 4.655297453869572e-05, "loss": 1.7437, "step": 73500 }, { "epoch": 0.32, "learning_rate": 4.654754615214248e-05, "loss": 1.7531, "step": 73600 }, { "epoch": 0.32, "learning_rate": 4.654211776558924e-05, "loss": 1.746, "step": 73700 }, { "epoch": 0.32, "learning_rate": 4.653668937903601e-05, "loss": 1.748, "step": 73800 }, { "epoch": 0.32, "learning_rate": 4.653126099248277e-05, "loss": 1.7357, "step": 73900 }, { "epoch": 0.32, "learning_rate": 4.6525832605929534e-05, "loss": 1.7325, "step": 74000 }, { "epoch": 0.32, "learning_rate": 4.65204042193763e-05, "loss": 1.7155, "step": 74100 }, { "epoch": 0.32, "learning_rate": 4.6514975832823064e-05, "loss": 1.7436, "step": 74200 }, { "epoch": 0.32, "learning_rate": 4.650954744626983e-05, "loss": 1.7245, "step": 74300 }, { "epoch": 0.32, "learning_rate": 4.6504119059716595e-05, "loss": 1.7332, "step": 74400 }, { "epoch": 0.32, "learning_rate": 4.6498690673163363e-05, "loss": 1.7614, "step": 74500 }, { "epoch": 0.32, "learning_rate": 4.6493262286610125e-05, "loss": 1.7565, "step": 74600 }, { "epoch": 0.32, "learning_rate": 4.6487833900056894e-05, "loss": 1.7641, "step": 74700 }, { "epoch": 0.32, "learning_rate": 4.6482405513503656e-05, "loss": 1.7415, "step": 74800 }, { "epoch": 0.32, "learning_rate": 4.6476977126950424e-05, "loss": 1.7563, "step": 74900 }, { "epoch": 0.32, "learning_rate": 4.6471548740397186e-05, "loss": 1.7745, "step": 75000 }, { "epoch": 0.32, "eval_loss": 1.5477814674377441, "eval_runtime": 18.8766, "eval_samples_per_second": 529.758, "eval_steps_per_second": 16.581, "step": 75000 }, { "epoch": 0.32, "learning_rate": 4.646612035384395e-05, "loss": 1.7393, "step": 75100 }, { "epoch": 0.32, "learning_rate": 4.646069196729072e-05, "loss": 1.752, "step": 75200 }, { "epoch": 0.32, "learning_rate": 4.645526358073748e-05, "loss": 1.7421, "step": 75300 }, { "epoch": 0.32, "learning_rate": 4.644983519418424e-05, "loss": 1.7443, "step": 75400 }, { "epoch": 0.32, "learning_rate": 4.6444406807631016e-05, "loss": 1.7661, "step": 75500 }, { "epoch": 0.32, "learning_rate": 4.643897842107778e-05, "loss": 1.7527, "step": 75600 }, { "epoch": 0.33, "learning_rate": 4.643355003452454e-05, "loss": 1.7628, "step": 75700 }, { "epoch": 0.33, "learning_rate": 4.642812164797131e-05, "loss": 1.752, "step": 75800 }, { "epoch": 0.33, "learning_rate": 4.642269326141807e-05, "loss": 1.7392, "step": 75900 }, { "epoch": 0.33, "learning_rate": 4.641726487486483e-05, "loss": 1.7515, "step": 76000 }, { "epoch": 0.33, "learning_rate": 4.64118364883116e-05, "loss": 1.7412, "step": 76100 }, { "epoch": 0.33, "learning_rate": 4.640640810175836e-05, "loss": 1.7418, "step": 76200 }, { "epoch": 0.33, "learning_rate": 4.640097971520513e-05, "loss": 1.7248, "step": 76300 }, { "epoch": 0.33, "learning_rate": 4.639555132865189e-05, "loss": 1.7437, "step": 76400 }, { "epoch": 0.33, "learning_rate": 4.6390122942098655e-05, "loss": 1.7433, "step": 76500 }, { "epoch": 0.33, "learning_rate": 4.6384694555545424e-05, "loss": 1.7226, "step": 76600 }, { "epoch": 0.33, "learning_rate": 4.637926616899219e-05, "loss": 1.7266, "step": 76700 }, { "epoch": 0.33, "learning_rate": 4.6373837782438954e-05, "loss": 1.7332, "step": 76800 }, { "epoch": 0.33, "learning_rate": 4.636840939588572e-05, "loss": 1.7427, "step": 76900 }, { "epoch": 0.33, "learning_rate": 4.6362981009332485e-05, "loss": 1.7679, "step": 77000 }, { "epoch": 0.33, "learning_rate": 4.635755262277925e-05, "loss": 1.7527, "step": 77100 }, { "epoch": 0.33, "learning_rate": 4.6352124236226015e-05, "loss": 1.7443, "step": 77200 }, { "epoch": 0.33, "learning_rate": 4.634669584967278e-05, "loss": 1.7738, "step": 77300 }, { "epoch": 0.33, "learning_rate": 4.634126746311954e-05, "loss": 1.735, "step": 77400 }, { "epoch": 0.33, "learning_rate": 4.633583907656631e-05, "loss": 1.7555, "step": 77500 }, { "epoch": 0.33, "learning_rate": 4.6330410690013076e-05, "loss": 1.7362, "step": 77600 }, { "epoch": 0.33, "learning_rate": 4.632498230345984e-05, "loss": 1.7026, "step": 77700 }, { "epoch": 0.33, "learning_rate": 4.631955391690661e-05, "loss": 1.7306, "step": 77800 }, { "epoch": 0.33, "learning_rate": 4.631412553035337e-05, "loss": 1.7717, "step": 77900 }, { "epoch": 0.34, "learning_rate": 4.630869714380013e-05, "loss": 1.7467, "step": 78000 }, { "epoch": 0.34, "learning_rate": 4.63032687572469e-05, "loss": 1.7282, "step": 78100 }, { "epoch": 0.34, "learning_rate": 4.629784037069366e-05, "loss": 1.7238, "step": 78200 }, { "epoch": 0.34, "learning_rate": 4.629241198414043e-05, "loss": 1.7432, "step": 78300 }, { "epoch": 0.34, "learning_rate": 4.628698359758719e-05, "loss": 1.7318, "step": 78400 }, { "epoch": 0.34, "learning_rate": 4.6281555211033954e-05, "loss": 1.7544, "step": 78500 }, { "epoch": 0.34, "learning_rate": 4.627612682448072e-05, "loss": 1.7452, "step": 78600 }, { "epoch": 0.34, "learning_rate": 4.627069843792749e-05, "loss": 1.757, "step": 78700 }, { "epoch": 0.34, "learning_rate": 4.626527005137425e-05, "loss": 1.754, "step": 78800 }, { "epoch": 0.34, "learning_rate": 4.625984166482102e-05, "loss": 1.7073, "step": 78900 }, { "epoch": 0.34, "learning_rate": 4.625441327826778e-05, "loss": 1.7104, "step": 79000 }, { "epoch": 0.34, "learning_rate": 4.6248984891714545e-05, "loss": 1.7339, "step": 79100 }, { "epoch": 0.34, "learning_rate": 4.6243556505161314e-05, "loss": 1.722, "step": 79200 }, { "epoch": 0.34, "learning_rate": 4.6238128118608076e-05, "loss": 1.7456, "step": 79300 }, { "epoch": 0.34, "learning_rate": 4.623269973205484e-05, "loss": 1.7273, "step": 79400 }, { "epoch": 0.34, "learning_rate": 4.6227271345501606e-05, "loss": 1.7432, "step": 79500 }, { "epoch": 0.34, "learning_rate": 4.622184295894837e-05, "loss": 1.76, "step": 79600 }, { "epoch": 0.34, "learning_rate": 4.621641457239514e-05, "loss": 1.7446, "step": 79700 }, { "epoch": 0.34, "learning_rate": 4.6210986185841905e-05, "loss": 1.7589, "step": 79800 }, { "epoch": 0.34, "learning_rate": 4.620555779928867e-05, "loss": 1.7662, "step": 79900 }, { "epoch": 0.34, "learning_rate": 4.620012941273543e-05, "loss": 1.7185, "step": 80000 }, { "epoch": 0.34, "eval_loss": 1.5425916910171509, "eval_runtime": 18.916, "eval_samples_per_second": 528.654, "eval_steps_per_second": 16.547, "step": 80000 }, { "epoch": 0.34, "learning_rate": 4.61947010261822e-05, "loss": 1.7206, "step": 80100 }, { "epoch": 0.34, "learning_rate": 4.618927263962896e-05, "loss": 1.7431, "step": 80200 }, { "epoch": 0.34, "learning_rate": 4.618384425307573e-05, "loss": 1.7112, "step": 80300 }, { "epoch": 0.35, "learning_rate": 4.617841586652249e-05, "loss": 1.7242, "step": 80400 }, { "epoch": 0.35, "learning_rate": 4.617298747996925e-05, "loss": 1.7106, "step": 80500 }, { "epoch": 0.35, "learning_rate": 4.616755909341602e-05, "loss": 1.7363, "step": 80600 }, { "epoch": 0.35, "learning_rate": 4.616213070686279e-05, "loss": 1.7411, "step": 80700 }, { "epoch": 0.35, "learning_rate": 4.615670232030955e-05, "loss": 1.7663, "step": 80800 }, { "epoch": 0.35, "learning_rate": 4.615127393375632e-05, "loss": 1.7612, "step": 80900 }, { "epoch": 0.35, "learning_rate": 4.614584554720308e-05, "loss": 1.7457, "step": 81000 }, { "epoch": 0.35, "learning_rate": 4.6140417160649844e-05, "loss": 1.7319, "step": 81100 }, { "epoch": 0.35, "learning_rate": 4.613498877409661e-05, "loss": 1.744, "step": 81200 }, { "epoch": 0.35, "learning_rate": 4.6129560387543374e-05, "loss": 1.7076, "step": 81300 }, { "epoch": 0.35, "learning_rate": 4.6124132000990136e-05, "loss": 1.7594, "step": 81400 }, { "epoch": 0.35, "learning_rate": 4.6118703614436905e-05, "loss": 1.7443, "step": 81500 }, { "epoch": 0.35, "learning_rate": 4.6113275227883667e-05, "loss": 1.7158, "step": 81600 }, { "epoch": 0.35, "learning_rate": 4.6107846841330435e-05, "loss": 1.7315, "step": 81700 }, { "epoch": 0.35, "learning_rate": 4.6102418454777204e-05, "loss": 1.756, "step": 81800 }, { "epoch": 0.35, "learning_rate": 4.6096990068223966e-05, "loss": 1.7466, "step": 81900 }, { "epoch": 0.35, "learning_rate": 4.609156168167073e-05, "loss": 1.7233, "step": 82000 }, { "epoch": 0.35, "learning_rate": 4.6086133295117496e-05, "loss": 1.736, "step": 82100 }, { "epoch": 0.35, "learning_rate": 4.608070490856426e-05, "loss": 1.7329, "step": 82200 }, { "epoch": 0.35, "learning_rate": 4.607527652201103e-05, "loss": 1.7311, "step": 82300 }, { "epoch": 0.35, "learning_rate": 4.606984813545779e-05, "loss": 1.7318, "step": 82400 }, { "epoch": 0.35, "learning_rate": 4.606441974890455e-05, "loss": 1.7485, "step": 82500 }, { "epoch": 0.35, "learning_rate": 4.605899136235132e-05, "loss": 1.6999, "step": 82600 }, { "epoch": 0.36, "learning_rate": 4.605356297579808e-05, "loss": 1.7136, "step": 82700 }, { "epoch": 0.36, "learning_rate": 4.604813458924485e-05, "loss": 1.724, "step": 82800 }, { "epoch": 0.36, "learning_rate": 4.604270620269162e-05, "loss": 1.7513, "step": 82900 }, { "epoch": 0.36, "learning_rate": 4.603727781613838e-05, "loss": 1.7232, "step": 83000 }, { "epoch": 0.36, "learning_rate": 4.603184942958514e-05, "loss": 1.7535, "step": 83100 }, { "epoch": 0.36, "learning_rate": 4.602642104303191e-05, "loss": 1.7338, "step": 83200 }, { "epoch": 0.36, "learning_rate": 4.602099265647867e-05, "loss": 1.7305, "step": 83300 }, { "epoch": 0.36, "learning_rate": 4.6015564269925435e-05, "loss": 1.7295, "step": 83400 }, { "epoch": 0.36, "learning_rate": 4.60101358833722e-05, "loss": 1.7555, "step": 83500 }, { "epoch": 0.36, "learning_rate": 4.6004707496818965e-05, "loss": 1.7299, "step": 83600 }, { "epoch": 0.36, "learning_rate": 4.5999279110265734e-05, "loss": 1.7132, "step": 83700 }, { "epoch": 0.36, "learning_rate": 4.5993850723712496e-05, "loss": 1.7587, "step": 83800 }, { "epoch": 0.36, "learning_rate": 4.5988422337159264e-05, "loss": 1.7486, "step": 83900 }, { "epoch": 0.36, "learning_rate": 4.5982993950606026e-05, "loss": 1.6919, "step": 84000 }, { "epoch": 0.36, "learning_rate": 4.5977565564052795e-05, "loss": 1.7116, "step": 84100 }, { "epoch": 0.36, "learning_rate": 4.597213717749956e-05, "loss": 1.7302, "step": 84200 }, { "epoch": 0.36, "learning_rate": 4.5966708790946325e-05, "loss": 1.7044, "step": 84300 }, { "epoch": 0.36, "learning_rate": 4.596128040439309e-05, "loss": 1.7475, "step": 84400 }, { "epoch": 0.36, "learning_rate": 4.595585201783985e-05, "loss": 1.7357, "step": 84500 }, { "epoch": 0.36, "learning_rate": 4.595042363128662e-05, "loss": 1.7234, "step": 84600 }, { "epoch": 0.36, "learning_rate": 4.594499524473338e-05, "loss": 1.7307, "step": 84700 }, { "epoch": 0.36, "learning_rate": 4.593956685818014e-05, "loss": 1.7138, "step": 84800 }, { "epoch": 0.36, "learning_rate": 4.593413847162692e-05, "loss": 1.7437, "step": 84900 }, { "epoch": 0.37, "learning_rate": 4.592871008507368e-05, "loss": 1.761, "step": 85000 }, { "epoch": 0.37, "eval_loss": 1.5349645614624023, "eval_runtime": 18.9426, "eval_samples_per_second": 527.911, "eval_steps_per_second": 16.524, "step": 85000 }, { "epoch": 0.37, "learning_rate": 4.592328169852044e-05, "loss": 1.7331, "step": 85100 }, { "epoch": 0.37, "learning_rate": 4.591785331196721e-05, "loss": 1.7209, "step": 85200 }, { "epoch": 0.37, "learning_rate": 4.591242492541397e-05, "loss": 1.7032, "step": 85300 }, { "epoch": 0.37, "learning_rate": 4.590699653886073e-05, "loss": 1.7575, "step": 85400 }, { "epoch": 0.37, "learning_rate": 4.59015681523075e-05, "loss": 1.7372, "step": 85500 }, { "epoch": 0.37, "learning_rate": 4.5896139765754264e-05, "loss": 1.7554, "step": 85600 }, { "epoch": 0.37, "learning_rate": 4.589071137920103e-05, "loss": 1.7196, "step": 85700 }, { "epoch": 0.37, "learning_rate": 4.5885282992647794e-05, "loss": 1.6947, "step": 85800 }, { "epoch": 0.37, "learning_rate": 4.5879854606094556e-05, "loss": 1.7231, "step": 85900 }, { "epoch": 0.37, "learning_rate": 4.5874426219541325e-05, "loss": 1.7123, "step": 86000 }, { "epoch": 0.37, "learning_rate": 4.586899783298809e-05, "loss": 1.7224, "step": 86100 }, { "epoch": 0.37, "learning_rate": 4.5863569446434855e-05, "loss": 1.7402, "step": 86200 }, { "epoch": 0.37, "learning_rate": 4.5858141059881624e-05, "loss": 1.7189, "step": 86300 }, { "epoch": 0.37, "learning_rate": 4.5852712673328386e-05, "loss": 1.7363, "step": 86400 }, { "epoch": 0.37, "learning_rate": 4.584728428677515e-05, "loss": 1.7376, "step": 86500 }, { "epoch": 0.37, "learning_rate": 4.5841855900221916e-05, "loss": 1.739, "step": 86600 }, { "epoch": 0.37, "learning_rate": 4.583642751366868e-05, "loss": 1.7794, "step": 86700 }, { "epoch": 0.37, "learning_rate": 4.583099912711544e-05, "loss": 1.7424, "step": 86800 }, { "epoch": 0.37, "learning_rate": 4.582557074056221e-05, "loss": 1.7139, "step": 86900 }, { "epoch": 0.37, "learning_rate": 4.582014235400898e-05, "loss": 1.7696, "step": 87000 }, { "epoch": 0.37, "learning_rate": 4.581471396745574e-05, "loss": 1.7081, "step": 87100 }, { "epoch": 0.37, "learning_rate": 4.580928558090251e-05, "loss": 1.7361, "step": 87200 }, { "epoch": 0.38, "learning_rate": 4.580385719434927e-05, "loss": 1.7095, "step": 87300 }, { "epoch": 0.38, "learning_rate": 4.579842880779603e-05, "loss": 1.7303, "step": 87400 }, { "epoch": 0.38, "learning_rate": 4.57930004212428e-05, "loss": 1.6847, "step": 87500 }, { "epoch": 0.38, "learning_rate": 4.578757203468956e-05, "loss": 1.7079, "step": 87600 }, { "epoch": 0.38, "learning_rate": 4.578214364813633e-05, "loss": 1.7344, "step": 87700 }, { "epoch": 0.38, "learning_rate": 4.577671526158309e-05, "loss": 1.731, "step": 87800 }, { "epoch": 0.38, "learning_rate": 4.5771286875029854e-05, "loss": 1.7397, "step": 87900 }, { "epoch": 0.38, "learning_rate": 4.576585848847662e-05, "loss": 1.7308, "step": 88000 }, { "epoch": 0.38, "learning_rate": 4.576043010192339e-05, "loss": 1.7609, "step": 88100 }, { "epoch": 0.38, "learning_rate": 4.5755001715370154e-05, "loss": 1.7123, "step": 88200 }, { "epoch": 0.38, "learning_rate": 4.574957332881692e-05, "loss": 1.7093, "step": 88300 }, { "epoch": 0.38, "learning_rate": 4.5744144942263684e-05, "loss": 1.7262, "step": 88400 }, { "epoch": 0.38, "learning_rate": 4.5738716555710446e-05, "loss": 1.7057, "step": 88500 }, { "epoch": 0.38, "learning_rate": 4.5733288169157215e-05, "loss": 1.7473, "step": 88600 }, { "epoch": 0.38, "learning_rate": 4.5727859782603977e-05, "loss": 1.7331, "step": 88700 }, { "epoch": 0.38, "learning_rate": 4.572243139605074e-05, "loss": 1.7549, "step": 88800 }, { "epoch": 0.38, "learning_rate": 4.571700300949751e-05, "loss": 1.7222, "step": 88900 }, { "epoch": 0.38, "learning_rate": 4.571157462294427e-05, "loss": 1.6896, "step": 89000 }, { "epoch": 0.38, "learning_rate": 4.570614623639104e-05, "loss": 1.716, "step": 89100 }, { "epoch": 0.38, "learning_rate": 4.5700717849837806e-05, "loss": 1.7272, "step": 89200 }, { "epoch": 0.38, "learning_rate": 4.569528946328457e-05, "loss": 1.692, "step": 89300 }, { "epoch": 0.38, "learning_rate": 4.568986107673133e-05, "loss": 1.7422, "step": 89400 }, { "epoch": 0.38, "learning_rate": 4.56844326901781e-05, "loss": 1.7287, "step": 89500 }, { "epoch": 0.38, "learning_rate": 4.567900430362486e-05, "loss": 1.7368, "step": 89600 }, { "epoch": 0.39, "learning_rate": 4.567357591707163e-05, "loss": 1.6947, "step": 89700 }, { "epoch": 0.39, "learning_rate": 4.566814753051839e-05, "loss": 1.7064, "step": 89800 }, { "epoch": 0.39, "learning_rate": 4.566271914396515e-05, "loss": 1.7035, "step": 89900 }, { "epoch": 0.39, "learning_rate": 4.565729075741192e-05, "loss": 1.7442, "step": 90000 }, { "epoch": 0.39, "eval_loss": 1.5332447290420532, "eval_runtime": 18.9917, "eval_samples_per_second": 526.545, "eval_steps_per_second": 16.481, "step": 90000 }, { "epoch": 0.39, "learning_rate": 4.5651862370858683e-05, "loss": 1.7316, "step": 90100 }, { "epoch": 0.39, "learning_rate": 4.564643398430545e-05, "loss": 1.6982, "step": 90200 }, { "epoch": 0.39, "learning_rate": 4.564100559775222e-05, "loss": 1.7471, "step": 90300 }, { "epoch": 0.39, "learning_rate": 4.563557721119898e-05, "loss": 1.7449, "step": 90400 }, { "epoch": 0.39, "learning_rate": 4.5630148824645745e-05, "loss": 1.7299, "step": 90500 }, { "epoch": 0.39, "learning_rate": 4.562472043809251e-05, "loss": 1.7059, "step": 90600 }, { "epoch": 0.39, "learning_rate": 4.5619292051539275e-05, "loss": 1.7734, "step": 90700 }, { "epoch": 0.39, "learning_rate": 4.561386366498604e-05, "loss": 1.7181, "step": 90800 }, { "epoch": 0.39, "learning_rate": 4.5608435278432806e-05, "loss": 1.7107, "step": 90900 }, { "epoch": 0.39, "learning_rate": 4.560300689187957e-05, "loss": 1.7182, "step": 91000 }, { "epoch": 0.39, "learning_rate": 4.559757850532633e-05, "loss": 1.6972, "step": 91100 }, { "epoch": 0.39, "learning_rate": 4.5592150118773105e-05, "loss": 1.7136, "step": 91200 }, { "epoch": 0.39, "learning_rate": 4.558672173221987e-05, "loss": 1.71, "step": 91300 }, { "epoch": 0.39, "learning_rate": 4.558129334566663e-05, "loss": 1.7111, "step": 91400 }, { "epoch": 0.39, "learning_rate": 4.55758649591134e-05, "loss": 1.6984, "step": 91500 }, { "epoch": 0.39, "learning_rate": 4.557043657256016e-05, "loss": 1.7014, "step": 91600 }, { "epoch": 0.39, "learning_rate": 4.556500818600693e-05, "loss": 1.7238, "step": 91700 }, { "epoch": 0.39, "learning_rate": 4.555957979945369e-05, "loss": 1.7368, "step": 91800 }, { "epoch": 0.39, "learning_rate": 4.555415141290045e-05, "loss": 1.7427, "step": 91900 }, { "epoch": 0.4, "learning_rate": 4.554872302634722e-05, "loss": 1.7477, "step": 92000 }, { "epoch": 0.4, "learning_rate": 4.554329463979398e-05, "loss": 1.707, "step": 92100 }, { "epoch": 0.4, "learning_rate": 4.5537866253240744e-05, "loss": 1.6978, "step": 92200 }, { "epoch": 0.4, "learning_rate": 4.553243786668752e-05, "loss": 1.7144, "step": 92300 }, { "epoch": 0.4, "learning_rate": 4.552700948013428e-05, "loss": 1.702, "step": 92400 }, { "epoch": 0.4, "learning_rate": 4.552158109358104e-05, "loss": 1.7267, "step": 92500 }, { "epoch": 0.4, "learning_rate": 4.551615270702781e-05, "loss": 1.7618, "step": 92600 }, { "epoch": 0.4, "learning_rate": 4.5510724320474574e-05, "loss": 1.7011, "step": 92700 }, { "epoch": 0.4, "learning_rate": 4.5505295933921335e-05, "loss": 1.6948, "step": 92800 }, { "epoch": 0.4, "learning_rate": 4.5499867547368104e-05, "loss": 1.7423, "step": 92900 }, { "epoch": 0.4, "learning_rate": 4.5494439160814866e-05, "loss": 1.7146, "step": 93000 }, { "epoch": 0.4, "learning_rate": 4.548901077426163e-05, "loss": 1.6756, "step": 93100 }, { "epoch": 0.4, "learning_rate": 4.5483582387708396e-05, "loss": 1.7365, "step": 93200 }, { "epoch": 0.4, "learning_rate": 4.5478154001155165e-05, "loss": 1.7188, "step": 93300 }, { "epoch": 0.4, "learning_rate": 4.547272561460193e-05, "loss": 1.7, "step": 93400 }, { "epoch": 0.4, "learning_rate": 4.5467297228048696e-05, "loss": 1.7037, "step": 93500 }, { "epoch": 0.4, "learning_rate": 4.546186884149546e-05, "loss": 1.7251, "step": 93600 }, { "epoch": 0.4, "learning_rate": 4.5456440454942226e-05, "loss": 1.7172, "step": 93700 }, { "epoch": 0.4, "learning_rate": 4.545101206838899e-05, "loss": 1.707, "step": 93800 }, { "epoch": 0.4, "learning_rate": 4.544558368183575e-05, "loss": 1.7124, "step": 93900 }, { "epoch": 0.4, "learning_rate": 4.544015529528252e-05, "loss": 1.7222, "step": 94000 }, { "epoch": 0.4, "learning_rate": 4.543472690872928e-05, "loss": 1.7044, "step": 94100 }, { "epoch": 0.4, "learning_rate": 4.542929852217604e-05, "loss": 1.7188, "step": 94200 }, { "epoch": 0.41, "learning_rate": 4.542387013562281e-05, "loss": 1.6926, "step": 94300 }, { "epoch": 0.41, "learning_rate": 4.541844174906958e-05, "loss": 1.733, "step": 94400 }, { "epoch": 0.41, "learning_rate": 4.541301336251634e-05, "loss": 1.7195, "step": 94500 }, { "epoch": 0.41, "learning_rate": 4.540758497596311e-05, "loss": 1.7094, "step": 94600 }, { "epoch": 0.41, "learning_rate": 4.540215658940987e-05, "loss": 1.6916, "step": 94700 }, { "epoch": 0.41, "learning_rate": 4.5396728202856634e-05, "loss": 1.7094, "step": 94800 }, { "epoch": 0.41, "learning_rate": 4.53912998163034e-05, "loss": 1.7307, "step": 94900 }, { "epoch": 0.41, "learning_rate": 4.5385871429750164e-05, "loss": 1.7251, "step": 95000 }, { "epoch": 0.41, "eval_loss": 1.524511694908142, "eval_runtime": 19.0193, "eval_samples_per_second": 525.783, "eval_steps_per_second": 16.457, "step": 95000 }, { "epoch": 0.41, "learning_rate": 4.5380443043196926e-05, "loss": 1.7106, "step": 95100 }, { "epoch": 0.41, "learning_rate": 4.5375014656643695e-05, "loss": 1.713, "step": 95200 }, { "epoch": 0.41, "learning_rate": 4.536958627009046e-05, "loss": 1.7098, "step": 95300 }, { "epoch": 0.41, "learning_rate": 4.5364157883537225e-05, "loss": 1.6978, "step": 95400 }, { "epoch": 0.41, "learning_rate": 4.5358729496983994e-05, "loss": 1.7265, "step": 95500 }, { "epoch": 0.41, "learning_rate": 4.5353301110430756e-05, "loss": 1.7404, "step": 95600 }, { "epoch": 0.41, "learning_rate": 4.5347872723877525e-05, "loss": 1.7086, "step": 95700 }, { "epoch": 0.41, "learning_rate": 4.5342444337324287e-05, "loss": 1.6896, "step": 95800 }, { "epoch": 0.41, "learning_rate": 4.533701595077105e-05, "loss": 1.7227, "step": 95900 }, { "epoch": 0.41, "learning_rate": 4.533158756421782e-05, "loss": 1.7086, "step": 96000 }, { "epoch": 0.41, "learning_rate": 4.532615917766458e-05, "loss": 1.6899, "step": 96100 }, { "epoch": 0.41, "learning_rate": 4.532073079111134e-05, "loss": 1.7089, "step": 96200 }, { "epoch": 0.41, "learning_rate": 4.531530240455811e-05, "loss": 1.6741, "step": 96300 }, { "epoch": 0.41, "learning_rate": 4.530987401800487e-05, "loss": 1.6886, "step": 96400 }, { "epoch": 0.41, "learning_rate": 4.530444563145164e-05, "loss": 1.7238, "step": 96500 }, { "epoch": 0.42, "learning_rate": 4.529901724489841e-05, "loss": 1.7371, "step": 96600 }, { "epoch": 0.42, "learning_rate": 4.529358885834517e-05, "loss": 1.7138, "step": 96700 }, { "epoch": 0.42, "learning_rate": 4.528816047179193e-05, "loss": 1.7232, "step": 96800 }, { "epoch": 0.42, "learning_rate": 4.52827320852387e-05, "loss": 1.7497, "step": 96900 }, { "epoch": 0.42, "learning_rate": 4.527730369868546e-05, "loss": 1.7202, "step": 97000 }, { "epoch": 0.42, "learning_rate": 4.5271875312132225e-05, "loss": 1.7008, "step": 97100 }, { "epoch": 0.42, "learning_rate": 4.5266446925578993e-05, "loss": 1.6832, "step": 97200 }, { "epoch": 0.42, "learning_rate": 4.5261018539025755e-05, "loss": 1.6893, "step": 97300 }, { "epoch": 0.42, "learning_rate": 4.5255590152472524e-05, "loss": 1.7067, "step": 97400 }, { "epoch": 0.42, "learning_rate": 4.525016176591929e-05, "loss": 1.7119, "step": 97500 }, { "epoch": 0.42, "learning_rate": 4.5244733379366054e-05, "loss": 1.7267, "step": 97600 }, { "epoch": 0.42, "learning_rate": 4.523930499281282e-05, "loss": 1.6911, "step": 97700 }, { "epoch": 0.42, "learning_rate": 4.5233876606259585e-05, "loss": 1.7164, "step": 97800 }, { "epoch": 0.42, "learning_rate": 4.522844821970635e-05, "loss": 1.6748, "step": 97900 }, { "epoch": 0.42, "learning_rate": 4.5223019833153116e-05, "loss": 1.7339, "step": 98000 }, { "epoch": 0.42, "learning_rate": 4.521759144659988e-05, "loss": 1.692, "step": 98100 }, { "epoch": 0.42, "learning_rate": 4.521216306004664e-05, "loss": 1.6869, "step": 98200 }, { "epoch": 0.42, "learning_rate": 4.520673467349341e-05, "loss": 1.7191, "step": 98300 }, { "epoch": 0.42, "learning_rate": 4.520130628694017e-05, "loss": 1.7228, "step": 98400 }, { "epoch": 0.42, "learning_rate": 4.519587790038693e-05, "loss": 1.7141, "step": 98500 }, { "epoch": 0.42, "learning_rate": 4.519044951383371e-05, "loss": 1.7032, "step": 98600 }, { "epoch": 0.42, "learning_rate": 4.518502112728047e-05, "loss": 1.7326, "step": 98700 }, { "epoch": 0.42, "learning_rate": 4.517959274072723e-05, "loss": 1.7107, "step": 98800 }, { "epoch": 0.42, "learning_rate": 4.5174164354174e-05, "loss": 1.6987, "step": 98900 }, { "epoch": 0.43, "learning_rate": 4.516873596762076e-05, "loss": 1.697, "step": 99000 }, { "epoch": 0.43, "learning_rate": 4.516330758106752e-05, "loss": 1.7505, "step": 99100 }, { "epoch": 0.43, "learning_rate": 4.515787919451429e-05, "loss": 1.7094, "step": 99200 }, { "epoch": 0.43, "learning_rate": 4.5152450807961054e-05, "loss": 1.6878, "step": 99300 }, { "epoch": 0.43, "learning_rate": 4.514702242140782e-05, "loss": 1.6845, "step": 99400 }, { "epoch": 0.43, "learning_rate": 4.5141594034854584e-05, "loss": 1.6908, "step": 99500 }, { "epoch": 0.43, "learning_rate": 4.513616564830135e-05, "loss": 1.7112, "step": 99600 }, { "epoch": 0.43, "learning_rate": 4.513073726174812e-05, "loss": 1.7154, "step": 99700 }, { "epoch": 0.43, "learning_rate": 4.5125308875194884e-05, "loss": 1.7267, "step": 99800 }, { "epoch": 0.43, "learning_rate": 4.5119880488641645e-05, "loss": 1.7002, "step": 99900 }, { "epoch": 0.43, "learning_rate": 4.5114452102088414e-05, "loss": 1.7055, "step": 100000 }, { "epoch": 0.43, "eval_loss": 1.520733118057251, "eval_runtime": 19.0032, "eval_samples_per_second": 526.228, "eval_steps_per_second": 16.471, "step": 100000 }, { "epoch": 0.43, "learning_rate": 4.5109023715535176e-05, "loss": 1.7008, "step": 100100 }, { "epoch": 0.43, "learning_rate": 4.510359532898194e-05, "loss": 1.728, "step": 100200 }, { "epoch": 0.43, "learning_rate": 4.5098166942428706e-05, "loss": 1.7455, "step": 100300 }, { "epoch": 0.43, "learning_rate": 4.509273855587547e-05, "loss": 1.7056, "step": 100400 }, { "epoch": 0.43, "learning_rate": 4.508731016932223e-05, "loss": 1.722, "step": 100500 }, { "epoch": 0.43, "learning_rate": 4.5081881782769e-05, "loss": 1.7009, "step": 100600 }, { "epoch": 0.43, "learning_rate": 4.507645339621577e-05, "loss": 1.6849, "step": 100700 }, { "epoch": 0.43, "learning_rate": 4.507102500966253e-05, "loss": 1.7089, "step": 100800 }, { "epoch": 0.43, "learning_rate": 4.50655966231093e-05, "loss": 1.7269, "step": 100900 }, { "epoch": 0.43, "learning_rate": 4.506016823655606e-05, "loss": 1.6955, "step": 101000 }, { "epoch": 0.43, "learning_rate": 4.505473985000282e-05, "loss": 1.7307, "step": 101100 }, { "epoch": 0.43, "learning_rate": 4.504931146344959e-05, "loss": 1.6969, "step": 101200 }, { "epoch": 0.44, "learning_rate": 4.504388307689635e-05, "loss": 1.7104, "step": 101300 }, { "epoch": 0.44, "learning_rate": 4.503845469034312e-05, "loss": 1.7175, "step": 101400 }, { "epoch": 0.44, "learning_rate": 4.503302630378988e-05, "loss": 1.7054, "step": 101500 }, { "epoch": 0.44, "learning_rate": 4.5027597917236645e-05, "loss": 1.6907, "step": 101600 }, { "epoch": 0.44, "learning_rate": 4.502216953068342e-05, "loss": 1.7202, "step": 101700 }, { "epoch": 0.44, "learning_rate": 4.501674114413018e-05, "loss": 1.7127, "step": 101800 }, { "epoch": 0.44, "learning_rate": 4.5011312757576944e-05, "loss": 1.7052, "step": 101900 }, { "epoch": 0.44, "learning_rate": 4.500588437102371e-05, "loss": 1.7044, "step": 102000 }, { "epoch": 0.44, "learning_rate": 4.5000455984470474e-05, "loss": 1.7022, "step": 102100 }, { "epoch": 0.44, "learning_rate": 4.4995027597917236e-05, "loss": 1.7027, "step": 102200 }, { "epoch": 0.44, "learning_rate": 4.4989599211364005e-05, "loss": 1.6933, "step": 102300 }, { "epoch": 0.44, "learning_rate": 4.498417082481077e-05, "loss": 1.7638, "step": 102400 }, { "epoch": 0.44, "learning_rate": 4.497874243825753e-05, "loss": 1.7242, "step": 102500 }, { "epoch": 0.44, "learning_rate": 4.49733140517043e-05, "loss": 1.7152, "step": 102600 }, { "epoch": 0.44, "learning_rate": 4.4967885665151066e-05, "loss": 1.7038, "step": 102700 }, { "epoch": 0.44, "learning_rate": 4.496245727859783e-05, "loss": 1.7027, "step": 102800 }, { "epoch": 0.44, "learning_rate": 4.4957028892044597e-05, "loss": 1.7215, "step": 102900 }, { "epoch": 0.44, "learning_rate": 4.495160050549136e-05, "loss": 1.6996, "step": 103000 }, { "epoch": 0.44, "learning_rate": 4.494617211893812e-05, "loss": 1.6896, "step": 103100 }, { "epoch": 0.44, "learning_rate": 4.494074373238489e-05, "loss": 1.6978, "step": 103200 }, { "epoch": 0.44, "learning_rate": 4.493531534583165e-05, "loss": 1.6943, "step": 103300 }, { "epoch": 0.44, "learning_rate": 4.492988695927842e-05, "loss": 1.7214, "step": 103400 }, { "epoch": 0.44, "learning_rate": 4.492445857272518e-05, "loss": 1.717, "step": 103500 }, { "epoch": 0.45, "learning_rate": 4.491903018617194e-05, "loss": 1.6582, "step": 103600 }, { "epoch": 0.45, "learning_rate": 4.491360179961871e-05, "loss": 1.6872, "step": 103700 }, { "epoch": 0.45, "learning_rate": 4.490817341306548e-05, "loss": 1.7238, "step": 103800 }, { "epoch": 0.45, "learning_rate": 4.490274502651224e-05, "loss": 1.6819, "step": 103900 }, { "epoch": 0.45, "learning_rate": 4.489731663995901e-05, "loss": 1.6898, "step": 104000 }, { "epoch": 0.45, "learning_rate": 4.489188825340577e-05, "loss": 1.6868, "step": 104100 }, { "epoch": 0.45, "learning_rate": 4.4886459866852535e-05, "loss": 1.6888, "step": 104200 }, { "epoch": 0.45, "learning_rate": 4.4881031480299303e-05, "loss": 1.7046, "step": 104300 }, { "epoch": 0.45, "learning_rate": 4.4875603093746065e-05, "loss": 1.6846, "step": 104400 }, { "epoch": 0.45, "learning_rate": 4.487017470719283e-05, "loss": 1.6848, "step": 104500 }, { "epoch": 0.45, "learning_rate": 4.4864746320639596e-05, "loss": 1.6822, "step": 104600 }, { "epoch": 0.45, "learning_rate": 4.485931793408636e-05, "loss": 1.6829, "step": 104700 }, { "epoch": 0.45, "learning_rate": 4.4853889547533126e-05, "loss": 1.7006, "step": 104800 }, { "epoch": 0.45, "learning_rate": 4.4848461160979895e-05, "loss": 1.6972, "step": 104900 }, { "epoch": 0.45, "learning_rate": 4.484303277442666e-05, "loss": 1.7391, "step": 105000 }, { "epoch": 0.45, "eval_loss": 1.5192019939422607, "eval_runtime": 18.9573, "eval_samples_per_second": 527.5, "eval_steps_per_second": 16.511, "step": 105000 }, { "epoch": 0.45, "learning_rate": 4.483760438787342e-05, "loss": 1.6914, "step": 105100 }, { "epoch": 0.45, "learning_rate": 4.483217600132019e-05, "loss": 1.6868, "step": 105200 }, { "epoch": 0.45, "learning_rate": 4.482674761476695e-05, "loss": 1.679, "step": 105300 }, { "epoch": 0.45, "learning_rate": 4.482131922821372e-05, "loss": 1.6947, "step": 105400 }, { "epoch": 0.45, "learning_rate": 4.481589084166048e-05, "loss": 1.683, "step": 105500 }, { "epoch": 0.45, "learning_rate": 4.481046245510724e-05, "loss": 1.7186, "step": 105600 }, { "epoch": 0.45, "learning_rate": 4.480503406855401e-05, "loss": 1.7041, "step": 105700 }, { "epoch": 0.45, "learning_rate": 4.479960568200077e-05, "loss": 1.6946, "step": 105800 }, { "epoch": 0.45, "learning_rate": 4.479417729544754e-05, "loss": 1.6408, "step": 105900 }, { "epoch": 0.46, "learning_rate": 4.478874890889431e-05, "loss": 1.6982, "step": 106000 }, { "epoch": 0.46, "learning_rate": 4.478332052234107e-05, "loss": 1.6851, "step": 106100 }, { "epoch": 0.46, "learning_rate": 4.477789213578783e-05, "loss": 1.7197, "step": 106200 }, { "epoch": 0.46, "learning_rate": 4.47724637492346e-05, "loss": 1.6963, "step": 106300 }, { "epoch": 0.46, "learning_rate": 4.4767035362681364e-05, "loss": 1.7035, "step": 106400 }, { "epoch": 0.46, "learning_rate": 4.4761606976128126e-05, "loss": 1.684, "step": 106500 }, { "epoch": 0.46, "learning_rate": 4.4756178589574894e-05, "loss": 1.7079, "step": 106600 }, { "epoch": 0.46, "learning_rate": 4.4750750203021656e-05, "loss": 1.6791, "step": 106700 }, { "epoch": 0.46, "learning_rate": 4.4745321816468425e-05, "loss": 1.6869, "step": 106800 }, { "epoch": 0.46, "learning_rate": 4.4739893429915193e-05, "loss": 1.6882, "step": 106900 }, { "epoch": 0.46, "learning_rate": 4.4734465043361955e-05, "loss": 1.6805, "step": 107000 }, { "epoch": 0.46, "learning_rate": 4.472903665680872e-05, "loss": 1.6941, "step": 107100 }, { "epoch": 0.46, "learning_rate": 4.4723608270255486e-05, "loss": 1.7022, "step": 107200 }, { "epoch": 0.46, "learning_rate": 4.471817988370225e-05, "loss": 1.6859, "step": 107300 }, { "epoch": 0.46, "learning_rate": 4.4712751497149016e-05, "loss": 1.6924, "step": 107400 }, { "epoch": 0.46, "learning_rate": 4.470732311059578e-05, "loss": 1.6669, "step": 107500 }, { "epoch": 0.46, "learning_rate": 4.470189472404254e-05, "loss": 1.6708, "step": 107600 }, { "epoch": 0.46, "learning_rate": 4.469646633748931e-05, "loss": 1.7214, "step": 107700 }, { "epoch": 0.46, "learning_rate": 4.469103795093607e-05, "loss": 1.6922, "step": 107800 }, { "epoch": 0.46, "learning_rate": 4.468560956438283e-05, "loss": 1.7017, "step": 107900 }, { "epoch": 0.46, "learning_rate": 4.468018117782961e-05, "loss": 1.6998, "step": 108000 }, { "epoch": 0.46, "learning_rate": 4.467475279127637e-05, "loss": 1.703, "step": 108100 }, { "epoch": 0.46, "learning_rate": 4.466932440472313e-05, "loss": 1.6755, "step": 108200 }, { "epoch": 0.47, "learning_rate": 4.46638960181699e-05, "loss": 1.7187, "step": 108300 }, { "epoch": 0.47, "learning_rate": 4.465846763161666e-05, "loss": 1.6764, "step": 108400 }, { "epoch": 0.47, "learning_rate": 4.4653039245063424e-05, "loss": 1.7016, "step": 108500 }, { "epoch": 0.47, "learning_rate": 4.464761085851019e-05, "loss": 1.7061, "step": 108600 }, { "epoch": 0.47, "learning_rate": 4.4642182471956955e-05, "loss": 1.6835, "step": 108700 }, { "epoch": 0.47, "learning_rate": 4.463675408540372e-05, "loss": 1.6994, "step": 108800 }, { "epoch": 0.47, "learning_rate": 4.4631325698850485e-05, "loss": 1.6901, "step": 108900 }, { "epoch": 0.47, "learning_rate": 4.4625897312297254e-05, "loss": 1.7016, "step": 109000 }, { "epoch": 0.47, "learning_rate": 4.4620468925744016e-05, "loss": 1.6906, "step": 109100 }, { "epoch": 0.47, "learning_rate": 4.4615040539190784e-05, "loss": 1.6993, "step": 109200 }, { "epoch": 0.47, "learning_rate": 4.4609612152637546e-05, "loss": 1.7195, "step": 109300 }, { "epoch": 0.47, "learning_rate": 4.4604183766084315e-05, "loss": 1.6942, "step": 109400 }, { "epoch": 0.47, "learning_rate": 4.459875537953108e-05, "loss": 1.7097, "step": 109500 }, { "epoch": 0.47, "learning_rate": 4.459332699297784e-05, "loss": 1.669, "step": 109600 }, { "epoch": 0.47, "learning_rate": 4.458789860642461e-05, "loss": 1.7243, "step": 109700 }, { "epoch": 0.47, "learning_rate": 4.458247021987137e-05, "loss": 1.6813, "step": 109800 }, { "epoch": 0.47, "learning_rate": 4.457704183331813e-05, "loss": 1.7064, "step": 109900 }, { "epoch": 0.47, "learning_rate": 4.45716134467649e-05, "loss": 1.7051, "step": 110000 }, { "epoch": 0.47, "eval_loss": 1.51393461227417, "eval_runtime": 19.0208, "eval_samples_per_second": 525.74, "eval_steps_per_second": 16.456, "step": 110000 }, { "epoch": 0.47, "learning_rate": 4.456618506021167e-05, "loss": 1.6981, "step": 110100 }, { "epoch": 0.47, "learning_rate": 4.456075667365843e-05, "loss": 1.7, "step": 110200 }, { "epoch": 0.47, "learning_rate": 4.45553282871052e-05, "loss": 1.6962, "step": 110300 }, { "epoch": 0.47, "learning_rate": 4.454989990055196e-05, "loss": 1.6814, "step": 110400 }, { "epoch": 0.47, "learning_rate": 4.454447151399872e-05, "loss": 1.6765, "step": 110500 }, { "epoch": 0.48, "learning_rate": 4.453904312744549e-05, "loss": 1.6811, "step": 110600 }, { "epoch": 0.48, "learning_rate": 4.453361474089225e-05, "loss": 1.6918, "step": 110700 }, { "epoch": 0.48, "learning_rate": 4.452818635433902e-05, "loss": 1.7026, "step": 110800 }, { "epoch": 0.48, "learning_rate": 4.4522757967785784e-05, "loss": 1.6863, "step": 110900 }, { "epoch": 0.48, "learning_rate": 4.4517329581232546e-05, "loss": 1.6976, "step": 111000 }, { "epoch": 0.48, "learning_rate": 4.4511901194679314e-05, "loss": 1.715, "step": 111100 }, { "epoch": 0.48, "learning_rate": 4.450647280812608e-05, "loss": 1.689, "step": 111200 }, { "epoch": 0.48, "learning_rate": 4.4501044421572845e-05, "loss": 1.679, "step": 111300 }, { "epoch": 0.48, "learning_rate": 4.4495616035019613e-05, "loss": 1.6749, "step": 111400 }, { "epoch": 0.48, "learning_rate": 4.4490187648466375e-05, "loss": 1.6885, "step": 111500 }, { "epoch": 0.48, "learning_rate": 4.448475926191314e-05, "loss": 1.6888, "step": 111600 }, { "epoch": 0.48, "learning_rate": 4.4479330875359906e-05, "loss": 1.6944, "step": 111700 }, { "epoch": 0.48, "learning_rate": 4.447390248880667e-05, "loss": 1.6997, "step": 111800 }, { "epoch": 0.48, "learning_rate": 4.446847410225343e-05, "loss": 1.685, "step": 111900 }, { "epoch": 0.48, "learning_rate": 4.44630457157002e-05, "loss": 1.6695, "step": 112000 }, { "epoch": 0.48, "learning_rate": 4.445761732914696e-05, "loss": 1.6916, "step": 112100 }, { "epoch": 0.48, "learning_rate": 4.445218894259373e-05, "loss": 1.7029, "step": 112200 }, { "epoch": 0.48, "learning_rate": 4.44467605560405e-05, "loss": 1.7237, "step": 112300 }, { "epoch": 0.48, "learning_rate": 4.444133216948726e-05, "loss": 1.7238, "step": 112400 }, { "epoch": 0.48, "learning_rate": 4.443590378293402e-05, "loss": 1.6702, "step": 112500 }, { "epoch": 0.48, "learning_rate": 4.443047539638079e-05, "loss": 1.6817, "step": 112600 }, { "epoch": 0.48, "learning_rate": 4.442504700982755e-05, "loss": 1.7053, "step": 112700 }, { "epoch": 0.48, "learning_rate": 4.441961862327432e-05, "loss": 1.6739, "step": 112800 }, { "epoch": 0.49, "learning_rate": 4.441419023672108e-05, "loss": 1.682, "step": 112900 }, { "epoch": 0.49, "learning_rate": 4.4408761850167844e-05, "loss": 1.6826, "step": 113000 }, { "epoch": 0.49, "learning_rate": 4.440333346361461e-05, "loss": 1.7159, "step": 113100 }, { "epoch": 0.49, "learning_rate": 4.439790507706138e-05, "loss": 1.6906, "step": 113200 }, { "epoch": 0.49, "learning_rate": 4.439247669050814e-05, "loss": 1.6761, "step": 113300 }, { "epoch": 0.49, "learning_rate": 4.438704830395491e-05, "loss": 1.7117, "step": 113400 }, { "epoch": 0.49, "learning_rate": 4.4381619917401674e-05, "loss": 1.6905, "step": 113500 }, { "epoch": 0.49, "learning_rate": 4.4376191530848436e-05, "loss": 1.6703, "step": 113600 }, { "epoch": 0.49, "learning_rate": 4.4370763144295204e-05, "loss": 1.6829, "step": 113700 }, { "epoch": 0.49, "learning_rate": 4.4365334757741966e-05, "loss": 1.7074, "step": 113800 }, { "epoch": 0.49, "learning_rate": 4.435990637118873e-05, "loss": 1.6749, "step": 113900 }, { "epoch": 0.49, "learning_rate": 4.43544779846355e-05, "loss": 1.6903, "step": 114000 }, { "epoch": 0.49, "learning_rate": 4.434904959808226e-05, "loss": 1.6965, "step": 114100 }, { "epoch": 0.49, "learning_rate": 4.434362121152903e-05, "loss": 1.707, "step": 114200 }, { "epoch": 0.49, "learning_rate": 4.4338192824975796e-05, "loss": 1.7036, "step": 114300 }, { "epoch": 0.49, "learning_rate": 4.433276443842256e-05, "loss": 1.6914, "step": 114400 }, { "epoch": 0.49, "learning_rate": 4.432733605186932e-05, "loss": 1.7129, "step": 114500 }, { "epoch": 0.49, "learning_rate": 4.432190766531609e-05, "loss": 1.6971, "step": 114600 }, { "epoch": 0.49, "learning_rate": 4.431647927876285e-05, "loss": 1.69, "step": 114700 }, { "epoch": 0.49, "learning_rate": 4.431105089220962e-05, "loss": 1.6701, "step": 114800 }, { "epoch": 0.49, "learning_rate": 4.430562250565638e-05, "loss": 1.7008, "step": 114900 }, { "epoch": 0.49, "learning_rate": 4.430019411910314e-05, "loss": 1.7034, "step": 115000 }, { "epoch": 0.49, "eval_loss": 1.5167127847671509, "eval_runtime": 19.0213, "eval_samples_per_second": 525.726, "eval_steps_per_second": 16.455, "step": 115000 }, { "epoch": 0.49, "learning_rate": 4.429476573254991e-05, "loss": 1.6797, "step": 115100 }, { "epoch": 0.49, "learning_rate": 4.428933734599667e-05, "loss": 1.6997, "step": 115200 }, { "epoch": 0.5, "learning_rate": 4.428390895944344e-05, "loss": 1.6886, "step": 115300 }, { "epoch": 0.5, "learning_rate": 4.427848057289021e-05, "loss": 1.7028, "step": 115400 }, { "epoch": 0.5, "learning_rate": 4.427305218633697e-05, "loss": 1.675, "step": 115500 }, { "epoch": 0.5, "learning_rate": 4.4267623799783734e-05, "loss": 1.6623, "step": 115600 }, { "epoch": 0.5, "learning_rate": 4.42621954132305e-05, "loss": 1.6872, "step": 115700 }, { "epoch": 0.5, "learning_rate": 4.4256767026677265e-05, "loss": 1.6599, "step": 115800 }, { "epoch": 0.5, "learning_rate": 4.4251338640124027e-05, "loss": 1.6994, "step": 115900 }, { "epoch": 0.5, "learning_rate": 4.4245910253570795e-05, "loss": 1.6918, "step": 116000 }, { "epoch": 0.5, "learning_rate": 4.424048186701756e-05, "loss": 1.6871, "step": 116100 }, { "epoch": 0.5, "learning_rate": 4.4235053480464326e-05, "loss": 1.7014, "step": 116200 }, { "epoch": 0.5, "learning_rate": 4.422962509391109e-05, "loss": 1.6834, "step": 116300 }, { "epoch": 0.5, "learning_rate": 4.4224196707357856e-05, "loss": 1.6943, "step": 116400 }, { "epoch": 0.5, "learning_rate": 4.421876832080462e-05, "loss": 1.6789, "step": 116500 }, { "epoch": 0.5, "learning_rate": 4.421333993425139e-05, "loss": 1.718, "step": 116600 }, { "epoch": 0.5, "learning_rate": 4.420791154769815e-05, "loss": 1.7048, "step": 116700 }, { "epoch": 0.5, "learning_rate": 4.420248316114492e-05, "loss": 1.7218, "step": 116800 }, { "epoch": 0.5, "learning_rate": 4.419705477459168e-05, "loss": 1.6819, "step": 116900 }, { "epoch": 0.5, "learning_rate": 4.419162638803844e-05, "loss": 1.7022, "step": 117000 }, { "epoch": 0.5, "learning_rate": 4.418619800148521e-05, "loss": 1.6735, "step": 117100 }, { "epoch": 0.5, "learning_rate": 4.418076961493197e-05, "loss": 1.7054, "step": 117200 }, { "epoch": 0.5, "learning_rate": 4.4175341228378733e-05, "loss": 1.6838, "step": 117300 }, { "epoch": 0.5, "learning_rate": 4.416991284182551e-05, "loss": 1.7075, "step": 117400 }, { "epoch": 0.5, "learning_rate": 4.416448445527227e-05, "loss": 1.6719, "step": 117500 }, { "epoch": 0.51, "learning_rate": 4.415905606871903e-05, "loss": 1.693, "step": 117600 }, { "epoch": 0.51, "learning_rate": 4.41536276821658e-05, "loss": 1.678, "step": 117700 }, { "epoch": 0.51, "learning_rate": 4.414819929561256e-05, "loss": 1.723, "step": 117800 }, { "epoch": 0.51, "learning_rate": 4.4142770909059325e-05, "loss": 1.7097, "step": 117900 }, { "epoch": 0.51, "learning_rate": 4.4137342522506094e-05, "loss": 1.6379, "step": 118000 }, { "epoch": 0.51, "learning_rate": 4.4131914135952856e-05, "loss": 1.6527, "step": 118100 }, { "epoch": 0.51, "learning_rate": 4.4126485749399624e-05, "loss": 1.6692, "step": 118200 }, { "epoch": 0.51, "learning_rate": 4.4121057362846386e-05, "loss": 1.6807, "step": 118300 }, { "epoch": 0.51, "learning_rate": 4.411562897629315e-05, "loss": 1.6781, "step": 118400 }, { "epoch": 0.51, "learning_rate": 4.4110200589739917e-05, "loss": 1.6679, "step": 118500 }, { "epoch": 0.51, "learning_rate": 4.4104772203186685e-05, "loss": 1.6798, "step": 118600 }, { "epoch": 0.51, "learning_rate": 4.409934381663345e-05, "loss": 1.6614, "step": 118700 }, { "epoch": 0.51, "learning_rate": 4.4093915430080216e-05, "loss": 1.7208, "step": 118800 }, { "epoch": 0.51, "learning_rate": 4.408848704352698e-05, "loss": 1.6992, "step": 118900 }, { "epoch": 0.51, "learning_rate": 4.408305865697374e-05, "loss": 1.7189, "step": 119000 }, { "epoch": 0.51, "learning_rate": 4.407763027042051e-05, "loss": 1.6979, "step": 119100 }, { "epoch": 0.51, "learning_rate": 4.407220188386727e-05, "loss": 1.6903, "step": 119200 }, { "epoch": 0.51, "learning_rate": 4.406677349731403e-05, "loss": 1.679, "step": 119300 }, { "epoch": 0.51, "learning_rate": 4.40613451107608e-05, "loss": 1.6904, "step": 119400 }, { "epoch": 0.51, "learning_rate": 4.405591672420757e-05, "loss": 1.6697, "step": 119500 }, { "epoch": 0.51, "learning_rate": 4.405048833765433e-05, "loss": 1.6896, "step": 119600 }, { "epoch": 0.51, "learning_rate": 4.40450599511011e-05, "loss": 1.6877, "step": 119700 }, { "epoch": 0.51, "learning_rate": 4.403963156454786e-05, "loss": 1.6599, "step": 119800 }, { "epoch": 0.52, "learning_rate": 4.4034203177994623e-05, "loss": 1.7118, "step": 119900 }, { "epoch": 0.52, "learning_rate": 4.402877479144139e-05, "loss": 1.6659, "step": 120000 }, { "epoch": 0.52, "eval_loss": 1.5134057998657227, "eval_runtime": 19.0004, "eval_samples_per_second": 526.305, "eval_steps_per_second": 16.473, "step": 120000 }, { "epoch": 0.52, "learning_rate": 4.4023346404888154e-05, "loss": 1.6814, "step": 120100 }, { "epoch": 0.52, "learning_rate": 4.401791801833492e-05, "loss": 1.6708, "step": 120200 }, { "epoch": 0.52, "learning_rate": 4.4012489631781685e-05, "loss": 1.6479, "step": 120300 }, { "epoch": 0.52, "learning_rate": 4.4007061245228446e-05, "loss": 1.6741, "step": 120400 }, { "epoch": 0.52, "learning_rate": 4.4001632858675215e-05, "loss": 1.6662, "step": 120500 }, { "epoch": 0.52, "learning_rate": 4.3996204472121984e-05, "loss": 1.6919, "step": 120600 }, { "epoch": 0.52, "learning_rate": 4.3990776085568746e-05, "loss": 1.6835, "step": 120700 }, { "epoch": 0.52, "learning_rate": 4.3985347699015514e-05, "loss": 1.6708, "step": 120800 }, { "epoch": 0.52, "learning_rate": 4.3979919312462276e-05, "loss": 1.6714, "step": 120900 }, { "epoch": 0.52, "learning_rate": 4.397449092590904e-05, "loss": 1.674, "step": 121000 }, { "epoch": 0.52, "learning_rate": 4.396906253935581e-05, "loss": 1.6925, "step": 121100 }, { "epoch": 0.52, "learning_rate": 4.396363415280257e-05, "loss": 1.654, "step": 121200 }, { "epoch": 0.52, "learning_rate": 4.395820576624933e-05, "loss": 1.6583, "step": 121300 }, { "epoch": 0.52, "learning_rate": 4.39527773796961e-05, "loss": 1.6943, "step": 121400 }, { "epoch": 0.52, "learning_rate": 4.394734899314286e-05, "loss": 1.6652, "step": 121500 }, { "epoch": 0.52, "learning_rate": 4.394192060658963e-05, "loss": 1.6484, "step": 121600 }, { "epoch": 0.52, "learning_rate": 4.39364922200364e-05, "loss": 1.6899, "step": 121700 }, { "epoch": 0.52, "learning_rate": 4.393106383348316e-05, "loss": 1.6845, "step": 121800 }, { "epoch": 0.52, "learning_rate": 4.392563544692992e-05, "loss": 1.6607, "step": 121900 }, { "epoch": 0.52, "learning_rate": 4.392020706037669e-05, "loss": 1.6697, "step": 122000 }, { "epoch": 0.52, "learning_rate": 4.391477867382345e-05, "loss": 1.6999, "step": 122100 }, { "epoch": 0.52, "learning_rate": 4.390935028727022e-05, "loss": 1.6962, "step": 122200 }, { "epoch": 0.53, "learning_rate": 4.390392190071698e-05, "loss": 1.6608, "step": 122300 }, { "epoch": 0.53, "learning_rate": 4.3898493514163745e-05, "loss": 1.7008, "step": 122400 }, { "epoch": 0.53, "learning_rate": 4.3893065127610514e-05, "loss": 1.6752, "step": 122500 }, { "epoch": 0.53, "learning_rate": 4.388763674105728e-05, "loss": 1.6751, "step": 122600 }, { "epoch": 0.53, "learning_rate": 4.3882208354504044e-05, "loss": 1.6752, "step": 122700 }, { "epoch": 0.53, "learning_rate": 4.387677996795081e-05, "loss": 1.6605, "step": 122800 }, { "epoch": 0.53, "learning_rate": 4.3871351581397575e-05, "loss": 1.6843, "step": 122900 }, { "epoch": 0.53, "learning_rate": 4.3865923194844336e-05, "loss": 1.6849, "step": 123000 }, { "epoch": 0.53, "learning_rate": 4.3860494808291105e-05, "loss": 1.6412, "step": 123100 }, { "epoch": 0.53, "learning_rate": 4.385506642173787e-05, "loss": 1.6645, "step": 123200 }, { "epoch": 0.53, "learning_rate": 4.384963803518463e-05, "loss": 1.6803, "step": 123300 }, { "epoch": 0.53, "learning_rate": 4.38442096486314e-05, "loss": 1.6622, "step": 123400 }, { "epoch": 0.53, "learning_rate": 4.383878126207816e-05, "loss": 1.6673, "step": 123500 }, { "epoch": 0.53, "learning_rate": 4.383335287552492e-05, "loss": 1.6334, "step": 123600 }, { "epoch": 0.53, "learning_rate": 4.38279244889717e-05, "loss": 1.6778, "step": 123700 }, { "epoch": 0.53, "learning_rate": 4.382249610241846e-05, "loss": 1.6805, "step": 123800 }, { "epoch": 0.53, "learning_rate": 4.381706771586522e-05, "loss": 1.711, "step": 123900 }, { "epoch": 0.53, "learning_rate": 4.381163932931199e-05, "loss": 1.6893, "step": 124000 }, { "epoch": 0.53, "learning_rate": 4.380621094275875e-05, "loss": 1.6537, "step": 124100 }, { "epoch": 0.53, "learning_rate": 4.380078255620552e-05, "loss": 1.6938, "step": 124200 }, { "epoch": 0.53, "learning_rate": 4.379535416965228e-05, "loss": 1.6666, "step": 124300 }, { "epoch": 0.53, "learning_rate": 4.3789925783099043e-05, "loss": 1.709, "step": 124400 }, { "epoch": 0.53, "learning_rate": 4.378449739654581e-05, "loss": 1.6937, "step": 124500 }, { "epoch": 0.54, "learning_rate": 4.3779069009992574e-05, "loss": 1.7111, "step": 124600 }, { "epoch": 0.54, "learning_rate": 4.377364062343934e-05, "loss": 1.6885, "step": 124700 }, { "epoch": 0.54, "learning_rate": 4.376821223688611e-05, "loss": 1.6691, "step": 124800 }, { "epoch": 0.54, "learning_rate": 4.376278385033287e-05, "loss": 1.6659, "step": 124900 }, { "epoch": 0.54, "learning_rate": 4.3757355463779635e-05, "loss": 1.6814, "step": 125000 }, { "epoch": 0.54, "eval_loss": 1.5067857503890991, "eval_runtime": 18.9814, "eval_samples_per_second": 526.832, "eval_steps_per_second": 16.49, "step": 125000 }, { "epoch": 0.54, "learning_rate": 4.3751927077226404e-05, "loss": 1.7118, "step": 125100 }, { "epoch": 0.54, "learning_rate": 4.3746498690673166e-05, "loss": 1.6906, "step": 125200 }, { "epoch": 0.54, "learning_rate": 4.374107030411993e-05, "loss": 1.6594, "step": 125300 }, { "epoch": 0.54, "learning_rate": 4.3735641917566696e-05, "loss": 1.6768, "step": 125400 }, { "epoch": 0.54, "learning_rate": 4.373021353101346e-05, "loss": 1.6496, "step": 125500 }, { "epoch": 0.54, "learning_rate": 4.372478514446022e-05, "loss": 1.6796, "step": 125600 }, { "epoch": 0.54, "learning_rate": 4.371935675790699e-05, "loss": 1.6844, "step": 125700 }, { "epoch": 0.54, "learning_rate": 4.371392837135376e-05, "loss": 1.6712, "step": 125800 }, { "epoch": 0.54, "learning_rate": 4.370849998480052e-05, "loss": 1.6566, "step": 125900 }, { "epoch": 0.54, "learning_rate": 4.370307159824729e-05, "loss": 1.653, "step": 126000 }, { "epoch": 0.54, "learning_rate": 4.369764321169405e-05, "loss": 1.711, "step": 126100 }, { "epoch": 0.54, "learning_rate": 4.369221482514082e-05, "loss": 1.6782, "step": 126200 }, { "epoch": 0.54, "learning_rate": 4.368678643858758e-05, "loss": 1.6618, "step": 126300 }, { "epoch": 0.54, "learning_rate": 4.368135805203434e-05, "loss": 1.6439, "step": 126400 }, { "epoch": 0.54, "learning_rate": 4.367592966548111e-05, "loss": 1.6435, "step": 126500 }, { "epoch": 0.54, "learning_rate": 4.367050127892787e-05, "loss": 1.6772, "step": 126600 }, { "epoch": 0.54, "learning_rate": 4.3665072892374634e-05, "loss": 1.6647, "step": 126700 }, { "epoch": 0.54, "learning_rate": 4.365964450582141e-05, "loss": 1.6984, "step": 126800 }, { "epoch": 0.55, "learning_rate": 4.365421611926817e-05, "loss": 1.652, "step": 126900 }, { "epoch": 0.55, "learning_rate": 4.3648787732714933e-05, "loss": 1.6726, "step": 127000 }, { "epoch": 0.55, "learning_rate": 4.36433593461617e-05, "loss": 1.6786, "step": 127100 }, { "epoch": 0.55, "learning_rate": 4.3637930959608464e-05, "loss": 1.6714, "step": 127200 }, { "epoch": 0.55, "learning_rate": 4.3632502573055226e-05, "loss": 1.6693, "step": 127300 }, { "epoch": 0.55, "learning_rate": 4.3627074186501995e-05, "loss": 1.6929, "step": 127400 }, { "epoch": 0.55, "learning_rate": 4.3621645799948756e-05, "loss": 1.6755, "step": 127500 }, { "epoch": 0.55, "learning_rate": 4.361621741339552e-05, "loss": 1.6561, "step": 127600 }, { "epoch": 0.55, "learning_rate": 4.361078902684229e-05, "loss": 1.6771, "step": 127700 }, { "epoch": 0.55, "learning_rate": 4.360536064028905e-05, "loss": 1.682, "step": 127800 }, { "epoch": 0.55, "learning_rate": 4.359993225373582e-05, "loss": 1.6525, "step": 127900 }, { "epoch": 0.55, "learning_rate": 4.3594503867182586e-05, "loss": 1.6523, "step": 128000 }, { "epoch": 0.55, "learning_rate": 4.358907548062935e-05, "loss": 1.6703, "step": 128100 }, { "epoch": 0.55, "learning_rate": 4.358364709407612e-05, "loss": 1.659, "step": 128200 }, { "epoch": 0.55, "learning_rate": 4.357821870752288e-05, "loss": 1.6595, "step": 128300 }, { "epoch": 0.55, "learning_rate": 4.357279032096964e-05, "loss": 1.655, "step": 128400 }, { "epoch": 0.55, "learning_rate": 4.356736193441641e-05, "loss": 1.674, "step": 128500 }, { "epoch": 0.55, "learning_rate": 4.356193354786317e-05, "loss": 1.6901, "step": 128600 }, { "epoch": 0.55, "learning_rate": 4.355650516130993e-05, "loss": 1.681, "step": 128700 }, { "epoch": 0.55, "learning_rate": 4.35510767747567e-05, "loss": 1.6741, "step": 128800 }, { "epoch": 0.55, "learning_rate": 4.354564838820347e-05, "loss": 1.6859, "step": 128900 }, { "epoch": 0.55, "learning_rate": 4.354022000165023e-05, "loss": 1.6399, "step": 129000 }, { "epoch": 0.55, "learning_rate": 4.3534791615097e-05, "loss": 1.6625, "step": 129100 }, { "epoch": 0.56, "learning_rate": 4.352936322854376e-05, "loss": 1.6884, "step": 129200 }, { "epoch": 0.56, "learning_rate": 4.3523934841990524e-05, "loss": 1.6917, "step": 129300 }, { "epoch": 0.56, "learning_rate": 4.351850645543729e-05, "loss": 1.6867, "step": 129400 }, { "epoch": 0.56, "learning_rate": 4.3513078068884055e-05, "loss": 1.6759, "step": 129500 }, { "epoch": 0.56, "learning_rate": 4.350764968233082e-05, "loss": 1.6718, "step": 129600 }, { "epoch": 0.56, "learning_rate": 4.3502221295777585e-05, "loss": 1.6435, "step": 129700 }, { "epoch": 0.56, "learning_rate": 4.349679290922435e-05, "loss": 1.6757, "step": 129800 }, { "epoch": 0.56, "learning_rate": 4.3491364522671116e-05, "loss": 1.6973, "step": 129900 }, { "epoch": 0.56, "learning_rate": 4.3485936136117885e-05, "loss": 1.6855, "step": 130000 }, { "epoch": 0.56, "eval_loss": 1.5044703483581543, "eval_runtime": 18.9872, "eval_samples_per_second": 526.672, "eval_steps_per_second": 16.485, "step": 130000 }, { "epoch": 0.56, "learning_rate": 4.3480507749564646e-05, "loss": 1.6843, "step": 130100 }, { "epoch": 0.56, "learning_rate": 4.347507936301141e-05, "loss": 1.6709, "step": 130200 }, { "epoch": 0.56, "learning_rate": 4.346965097645818e-05, "loss": 1.6656, "step": 130300 }, { "epoch": 0.56, "learning_rate": 4.346422258990494e-05, "loss": 1.6635, "step": 130400 }, { "epoch": 0.56, "learning_rate": 4.345879420335171e-05, "loss": 1.6454, "step": 130500 }, { "epoch": 0.56, "learning_rate": 4.345336581679847e-05, "loss": 1.678, "step": 130600 }, { "epoch": 0.56, "learning_rate": 4.344793743024523e-05, "loss": 1.6965, "step": 130700 }, { "epoch": 0.56, "learning_rate": 4.3442509043692e-05, "loss": 1.6447, "step": 130800 }, { "epoch": 0.56, "learning_rate": 4.343708065713876e-05, "loss": 1.6605, "step": 130900 }, { "epoch": 0.56, "learning_rate": 4.343165227058553e-05, "loss": 1.6782, "step": 131000 }, { "epoch": 0.56, "learning_rate": 4.34262238840323e-05, "loss": 1.6535, "step": 131100 }, { "epoch": 0.56, "learning_rate": 4.342079549747906e-05, "loss": 1.6603, "step": 131200 }, { "epoch": 0.56, "learning_rate": 4.341536711092582e-05, "loss": 1.6508, "step": 131300 }, { "epoch": 0.56, "learning_rate": 4.340993872437259e-05, "loss": 1.6625, "step": 131400 }, { "epoch": 0.56, "learning_rate": 4.340451033781935e-05, "loss": 1.6584, "step": 131500 }, { "epoch": 0.57, "learning_rate": 4.3399081951266115e-05, "loss": 1.6942, "step": 131600 }, { "epoch": 0.57, "learning_rate": 4.3393653564712884e-05, "loss": 1.6679, "step": 131700 }, { "epoch": 0.57, "learning_rate": 4.3388225178159646e-05, "loss": 1.6651, "step": 131800 }, { "epoch": 0.57, "learning_rate": 4.3382796791606414e-05, "loss": 1.6969, "step": 131900 }, { "epoch": 0.57, "learning_rate": 4.3377368405053176e-05, "loss": 1.7259, "step": 132000 }, { "epoch": 0.57, "learning_rate": 4.3371940018499945e-05, "loss": 1.6257, "step": 132100 }, { "epoch": 0.57, "learning_rate": 4.336651163194671e-05, "loss": 1.6517, "step": 132200 }, { "epoch": 0.57, "learning_rate": 4.3361083245393475e-05, "loss": 1.7008, "step": 132300 }, { "epoch": 0.57, "learning_rate": 4.335565485884024e-05, "loss": 1.6519, "step": 132400 }, { "epoch": 0.57, "learning_rate": 4.3350226472287006e-05, "loss": 1.6914, "step": 132500 }, { "epoch": 0.57, "learning_rate": 4.334479808573377e-05, "loss": 1.6574, "step": 132600 }, { "epoch": 0.57, "learning_rate": 4.333936969918053e-05, "loss": 1.6667, "step": 132700 }, { "epoch": 0.57, "learning_rate": 4.33339413126273e-05, "loss": 1.6928, "step": 132800 }, { "epoch": 0.57, "learning_rate": 4.332851292607406e-05, "loss": 1.677, "step": 132900 }, { "epoch": 0.57, "learning_rate": 4.332308453952082e-05, "loss": 1.6511, "step": 133000 }, { "epoch": 0.57, "learning_rate": 4.33176561529676e-05, "loss": 1.6605, "step": 133100 }, { "epoch": 0.57, "learning_rate": 4.331222776641436e-05, "loss": 1.6317, "step": 133200 }, { "epoch": 0.57, "learning_rate": 4.330679937986112e-05, "loss": 1.6925, "step": 133300 }, { "epoch": 0.57, "learning_rate": 4.330137099330789e-05, "loss": 1.6553, "step": 133400 }, { "epoch": 0.57, "learning_rate": 4.329594260675465e-05, "loss": 1.6675, "step": 133500 }, { "epoch": 0.57, "learning_rate": 4.3290514220201414e-05, "loss": 1.6711, "step": 133600 }, { "epoch": 0.57, "learning_rate": 4.328508583364818e-05, "loss": 1.64, "step": 133700 }, { "epoch": 0.57, "learning_rate": 4.3279657447094944e-05, "loss": 1.6866, "step": 133800 }, { "epoch": 0.58, "learning_rate": 4.327422906054171e-05, "loss": 1.6831, "step": 133900 }, { "epoch": 0.58, "learning_rate": 4.3268800673988475e-05, "loss": 1.6413, "step": 134000 }, { "epoch": 0.58, "learning_rate": 4.326337228743524e-05, "loss": 1.6605, "step": 134100 }, { "epoch": 0.58, "learning_rate": 4.3257943900882005e-05, "loss": 1.7032, "step": 134200 }, { "epoch": 0.58, "learning_rate": 4.3252515514328774e-05, "loss": 1.6694, "step": 134300 }, { "epoch": 0.58, "learning_rate": 4.3247087127775536e-05, "loss": 1.6462, "step": 134400 }, { "epoch": 0.58, "learning_rate": 4.3241658741222304e-05, "loss": 1.6618, "step": 134500 }, { "epoch": 0.58, "learning_rate": 4.3236230354669066e-05, "loss": 1.6611, "step": 134600 }, { "epoch": 0.58, "learning_rate": 4.323080196811583e-05, "loss": 1.6743, "step": 134700 }, { "epoch": 0.58, "learning_rate": 4.32253735815626e-05, "loss": 1.7014, "step": 134800 }, { "epoch": 0.58, "learning_rate": 4.321994519500936e-05, "loss": 1.6307, "step": 134900 }, { "epoch": 0.58, "learning_rate": 4.321451680845612e-05, "loss": 1.7163, "step": 135000 }, { "epoch": 0.58, "eval_loss": 1.502817153930664, "eval_runtime": 19.0069, "eval_samples_per_second": 526.124, "eval_steps_per_second": 16.468, "step": 135000 }, { "epoch": 0.58, "learning_rate": 4.320908842190289e-05, "loss": 1.6829, "step": 135100 }, { "epoch": 0.58, "learning_rate": 4.320366003534966e-05, "loss": 1.6813, "step": 135200 }, { "epoch": 0.58, "learning_rate": 4.319823164879642e-05, "loss": 1.7002, "step": 135300 }, { "epoch": 0.58, "learning_rate": 4.319280326224319e-05, "loss": 1.69, "step": 135400 }, { "epoch": 0.58, "learning_rate": 4.318737487568995e-05, "loss": 1.6889, "step": 135500 }, { "epoch": 0.58, "learning_rate": 4.318194648913671e-05, "loss": 1.6668, "step": 135600 }, { "epoch": 0.58, "learning_rate": 4.317651810258348e-05, "loss": 1.6814, "step": 135700 }, { "epoch": 0.58, "learning_rate": 4.317108971603024e-05, "loss": 1.6247, "step": 135800 }, { "epoch": 0.58, "learning_rate": 4.316566132947701e-05, "loss": 1.6818, "step": 135900 }, { "epoch": 0.58, "learning_rate": 4.316023294292377e-05, "loss": 1.696, "step": 136000 }, { "epoch": 0.58, "learning_rate": 4.3154804556370535e-05, "loss": 1.6665, "step": 136100 }, { "epoch": 0.59, "learning_rate": 4.3149376169817304e-05, "loss": 1.6671, "step": 136200 }, { "epoch": 0.59, "learning_rate": 4.314394778326407e-05, "loss": 1.6745, "step": 136300 }, { "epoch": 0.59, "learning_rate": 4.3138519396710834e-05, "loss": 1.6701, "step": 136400 }, { "epoch": 0.59, "learning_rate": 4.31330910101576e-05, "loss": 1.6677, "step": 136500 }, { "epoch": 0.59, "learning_rate": 4.3127662623604365e-05, "loss": 1.6759, "step": 136600 }, { "epoch": 0.59, "learning_rate": 4.312223423705113e-05, "loss": 1.6808, "step": 136700 }, { "epoch": 0.59, "learning_rate": 4.3116805850497895e-05, "loss": 1.6809, "step": 136800 }, { "epoch": 0.59, "learning_rate": 4.311137746394466e-05, "loss": 1.6222, "step": 136900 }, { "epoch": 0.59, "learning_rate": 4.310594907739142e-05, "loss": 1.698, "step": 137000 }, { "epoch": 0.59, "learning_rate": 4.310052069083819e-05, "loss": 1.6752, "step": 137100 }, { "epoch": 0.59, "learning_rate": 4.309509230428495e-05, "loss": 1.7055, "step": 137200 }, { "epoch": 0.59, "learning_rate": 4.308966391773172e-05, "loss": 1.6652, "step": 137300 }, { "epoch": 0.59, "learning_rate": 4.308423553117849e-05, "loss": 1.6686, "step": 137400 }, { "epoch": 0.59, "learning_rate": 4.307880714462525e-05, "loss": 1.6582, "step": 137500 }, { "epoch": 0.59, "learning_rate": 4.307337875807201e-05, "loss": 1.6555, "step": 137600 }, { "epoch": 0.59, "learning_rate": 4.306795037151878e-05, "loss": 1.6501, "step": 137700 }, { "epoch": 0.59, "learning_rate": 4.306252198496554e-05, "loss": 1.6653, "step": 137800 }, { "epoch": 0.59, "learning_rate": 4.305709359841231e-05, "loss": 1.6581, "step": 137900 }, { "epoch": 0.59, "learning_rate": 4.305166521185907e-05, "loss": 1.6755, "step": 138000 }, { "epoch": 0.59, "learning_rate": 4.3046236825305834e-05, "loss": 1.6837, "step": 138100 }, { "epoch": 0.59, "learning_rate": 4.30408084387526e-05, "loss": 1.6873, "step": 138200 }, { "epoch": 0.59, "learning_rate": 4.3035380052199364e-05, "loss": 1.6475, "step": 138300 }, { "epoch": 0.59, "learning_rate": 4.302995166564613e-05, "loss": 1.6719, "step": 138400 }, { "epoch": 0.6, "learning_rate": 4.30245232790929e-05, "loss": 1.664, "step": 138500 }, { "epoch": 0.6, "learning_rate": 4.301909489253966e-05, "loss": 1.6426, "step": 138600 }, { "epoch": 0.6, "learning_rate": 4.3013666505986425e-05, "loss": 1.6444, "step": 138700 }, { "epoch": 0.6, "learning_rate": 4.3008238119433194e-05, "loss": 1.6721, "step": 138800 }, { "epoch": 0.6, "learning_rate": 4.3002809732879956e-05, "loss": 1.6697, "step": 138900 }, { "epoch": 0.6, "learning_rate": 4.299738134632672e-05, "loss": 1.6546, "step": 139000 }, { "epoch": 0.6, "learning_rate": 4.2991952959773486e-05, "loss": 1.6909, "step": 139100 }, { "epoch": 0.6, "learning_rate": 4.298652457322025e-05, "loss": 1.6854, "step": 139200 }, { "epoch": 0.6, "learning_rate": 4.298109618666702e-05, "loss": 1.6527, "step": 139300 }, { "epoch": 0.6, "learning_rate": 4.2975667800113785e-05, "loss": 1.6853, "step": 139400 }, { "epoch": 0.6, "learning_rate": 4.297023941356055e-05, "loss": 1.6556, "step": 139500 }, { "epoch": 0.6, "learning_rate": 4.296481102700731e-05, "loss": 1.6656, "step": 139600 }, { "epoch": 0.6, "learning_rate": 4.295938264045408e-05, "loss": 1.6581, "step": 139700 }, { "epoch": 0.6, "learning_rate": 4.295395425390084e-05, "loss": 1.6422, "step": 139800 }, { "epoch": 0.6, "learning_rate": 4.294852586734761e-05, "loss": 1.6744, "step": 139900 }, { "epoch": 0.6, "learning_rate": 4.294309748079437e-05, "loss": 1.6645, "step": 140000 }, { "epoch": 0.6, "eval_loss": 1.4998124837875366, "eval_runtime": 18.9623, "eval_samples_per_second": 527.362, "eval_steps_per_second": 16.506, "step": 140000 }, { "epoch": 0.6, "learning_rate": 4.293766909424113e-05, "loss": 1.7061, "step": 140100 }, { "epoch": 0.6, "learning_rate": 4.29322407076879e-05, "loss": 1.6883, "step": 140200 }, { "epoch": 0.6, "learning_rate": 4.292681232113466e-05, "loss": 1.6674, "step": 140300 }, { "epoch": 0.6, "learning_rate": 4.2921383934581425e-05, "loss": 1.6763, "step": 140400 }, { "epoch": 0.6, "learning_rate": 4.29159555480282e-05, "loss": 1.6551, "step": 140500 }, { "epoch": 0.6, "learning_rate": 4.291052716147496e-05, "loss": 1.6787, "step": 140600 }, { "epoch": 0.6, "learning_rate": 4.2905098774921724e-05, "loss": 1.6661, "step": 140700 }, { "epoch": 0.6, "learning_rate": 4.289967038836849e-05, "loss": 1.6769, "step": 140800 }, { "epoch": 0.61, "learning_rate": 4.2894242001815254e-05, "loss": 1.6581, "step": 140900 }, { "epoch": 0.61, "learning_rate": 4.2888813615262016e-05, "loss": 1.6592, "step": 141000 }, { "epoch": 0.61, "learning_rate": 4.2883385228708785e-05, "loss": 1.6744, "step": 141100 }, { "epoch": 0.61, "learning_rate": 4.287795684215555e-05, "loss": 1.6752, "step": 141200 }, { "epoch": 0.61, "learning_rate": 4.2872528455602315e-05, "loss": 1.6546, "step": 141300 }, { "epoch": 0.61, "learning_rate": 4.286710006904908e-05, "loss": 1.6599, "step": 141400 }, { "epoch": 0.61, "learning_rate": 4.2861671682495846e-05, "loss": 1.6713, "step": 141500 }, { "epoch": 0.61, "learning_rate": 4.285624329594261e-05, "loss": 1.6276, "step": 141600 }, { "epoch": 0.61, "learning_rate": 4.2850814909389376e-05, "loss": 1.6647, "step": 141700 }, { "epoch": 0.61, "learning_rate": 4.284538652283614e-05, "loss": 1.6548, "step": 141800 }, { "epoch": 0.61, "learning_rate": 4.283995813628291e-05, "loss": 1.6565, "step": 141900 }, { "epoch": 0.61, "learning_rate": 4.283452974972967e-05, "loss": 1.6679, "step": 142000 }, { "epoch": 0.61, "learning_rate": 4.282910136317643e-05, "loss": 1.6429, "step": 142100 }, { "epoch": 0.61, "learning_rate": 4.28236729766232e-05, "loss": 1.6365, "step": 142200 }, { "epoch": 0.61, "learning_rate": 4.281824459006996e-05, "loss": 1.6318, "step": 142300 }, { "epoch": 0.61, "learning_rate": 4.281281620351672e-05, "loss": 1.6821, "step": 142400 }, { "epoch": 0.61, "learning_rate": 4.28073878169635e-05, "loss": 1.658, "step": 142500 }, { "epoch": 0.61, "learning_rate": 4.280195943041026e-05, "loss": 1.6852, "step": 142600 }, { "epoch": 0.61, "learning_rate": 4.279653104385702e-05, "loss": 1.6617, "step": 142700 }, { "epoch": 0.61, "learning_rate": 4.279110265730379e-05, "loss": 1.699, "step": 142800 }, { "epoch": 0.61, "learning_rate": 4.278567427075055e-05, "loss": 1.6443, "step": 142900 }, { "epoch": 0.61, "learning_rate": 4.2780245884197315e-05, "loss": 1.6357, "step": 143000 }, { "epoch": 0.61, "learning_rate": 4.277481749764408e-05, "loss": 1.6719, "step": 143100 }, { "epoch": 0.62, "learning_rate": 4.2769389111090845e-05, "loss": 1.6529, "step": 143200 }, { "epoch": 0.62, "learning_rate": 4.2763960724537614e-05, "loss": 1.6443, "step": 143300 }, { "epoch": 0.62, "learning_rate": 4.2758532337984376e-05, "loss": 1.6534, "step": 143400 }, { "epoch": 0.62, "learning_rate": 4.275310395143114e-05, "loss": 1.6673, "step": 143500 }, { "epoch": 0.62, "learning_rate": 4.2747675564877906e-05, "loss": 1.6496, "step": 143600 }, { "epoch": 0.62, "learning_rate": 4.2742247178324675e-05, "loss": 1.6808, "step": 143700 }, { "epoch": 0.62, "learning_rate": 4.273681879177144e-05, "loss": 1.6599, "step": 143800 }, { "epoch": 0.62, "learning_rate": 4.2731390405218205e-05, "loss": 1.6626, "step": 143900 }, { "epoch": 0.62, "learning_rate": 4.272596201866497e-05, "loss": 1.6623, "step": 144000 }, { "epoch": 0.62, "learning_rate": 4.272053363211173e-05, "loss": 1.6576, "step": 144100 }, { "epoch": 0.62, "learning_rate": 4.27151052455585e-05, "loss": 1.6479, "step": 144200 }, { "epoch": 0.62, "learning_rate": 4.270967685900526e-05, "loss": 1.6492, "step": 144300 }, { "epoch": 0.62, "learning_rate": 4.270424847245202e-05, "loss": 1.6916, "step": 144400 }, { "epoch": 0.62, "learning_rate": 4.269882008589879e-05, "loss": 1.6518, "step": 144500 }, { "epoch": 0.62, "learning_rate": 4.269339169934556e-05, "loss": 1.6815, "step": 144600 }, { "epoch": 0.62, "learning_rate": 4.268796331279232e-05, "loss": 1.6319, "step": 144700 }, { "epoch": 0.62, "learning_rate": 4.268253492623909e-05, "loss": 1.668, "step": 144800 }, { "epoch": 0.62, "learning_rate": 4.267710653968585e-05, "loss": 1.6594, "step": 144900 }, { "epoch": 0.62, "learning_rate": 4.267167815313261e-05, "loss": 1.6918, "step": 145000 }, { "epoch": 0.62, "eval_loss": 1.4964494705200195, "eval_runtime": 19.0245, "eval_samples_per_second": 525.638, "eval_steps_per_second": 16.452, "step": 145000 }, { "epoch": 0.62, "learning_rate": 4.266624976657938e-05, "loss": 1.6603, "step": 145100 }, { "epoch": 0.62, "learning_rate": 4.2660821380026144e-05, "loss": 1.6859, "step": 145200 }, { "epoch": 0.62, "learning_rate": 4.265539299347291e-05, "loss": 1.6378, "step": 145300 }, { "epoch": 0.62, "learning_rate": 4.2649964606919674e-05, "loss": 1.6768, "step": 145400 }, { "epoch": 0.63, "learning_rate": 4.2644536220366436e-05, "loss": 1.6845, "step": 145500 }, { "epoch": 0.63, "learning_rate": 4.2639107833813205e-05, "loss": 1.6757, "step": 145600 }, { "epoch": 0.63, "learning_rate": 4.263367944725997e-05, "loss": 1.6575, "step": 145700 }, { "epoch": 0.63, "learning_rate": 4.2628251060706735e-05, "loss": 1.6798, "step": 145800 }, { "epoch": 0.63, "learning_rate": 4.2622822674153504e-05, "loss": 1.6504, "step": 145900 }, { "epoch": 0.63, "learning_rate": 4.2617394287600266e-05, "loss": 1.6527, "step": 146000 }, { "epoch": 0.63, "learning_rate": 4.261196590104703e-05, "loss": 1.6972, "step": 146100 }, { "epoch": 0.63, "learning_rate": 4.2606537514493796e-05, "loss": 1.6573, "step": 146200 }, { "epoch": 0.63, "learning_rate": 4.260110912794056e-05, "loss": 1.6811, "step": 146300 }, { "epoch": 0.63, "learning_rate": 4.259568074138732e-05, "loss": 1.6494, "step": 146400 }, { "epoch": 0.63, "learning_rate": 4.259025235483409e-05, "loss": 1.6271, "step": 146500 }, { "epoch": 0.63, "learning_rate": 4.258482396828085e-05, "loss": 1.6904, "step": 146600 }, { "epoch": 0.63, "learning_rate": 4.257939558172762e-05, "loss": 1.6779, "step": 146700 }, { "epoch": 0.63, "learning_rate": 4.257396719517439e-05, "loss": 1.6485, "step": 146800 }, { "epoch": 0.63, "learning_rate": 4.256853880862115e-05, "loss": 1.6511, "step": 146900 }, { "epoch": 0.63, "learning_rate": 4.256311042206791e-05, "loss": 1.6497, "step": 147000 }, { "epoch": 0.63, "learning_rate": 4.255768203551468e-05, "loss": 1.6858, "step": 147100 }, { "epoch": 0.63, "learning_rate": 4.255225364896144e-05, "loss": 1.6702, "step": 147200 }, { "epoch": 0.63, "learning_rate": 4.254682526240821e-05, "loss": 1.6352, "step": 147300 }, { "epoch": 0.63, "learning_rate": 4.254139687585497e-05, "loss": 1.6741, "step": 147400 }, { "epoch": 0.63, "learning_rate": 4.2535968489301735e-05, "loss": 1.6842, "step": 147500 }, { "epoch": 0.63, "learning_rate": 4.25305401027485e-05, "loss": 1.6406, "step": 147600 }, { "epoch": 0.63, "learning_rate": 4.2525111716195265e-05, "loss": 1.6622, "step": 147700 }, { "epoch": 0.63, "learning_rate": 4.2519683329642034e-05, "loss": 1.636, "step": 147800 }, { "epoch": 0.64, "learning_rate": 4.25142549430888e-05, "loss": 1.6487, "step": 147900 }, { "epoch": 0.64, "learning_rate": 4.2508826556535564e-05, "loss": 1.6737, "step": 148000 }, { "epoch": 0.64, "learning_rate": 4.2503398169982326e-05, "loss": 1.6372, "step": 148100 }, { "epoch": 0.64, "learning_rate": 4.2497969783429095e-05, "loss": 1.6472, "step": 148200 }, { "epoch": 0.64, "learning_rate": 4.2492541396875857e-05, "loss": 1.6508, "step": 148300 }, { "epoch": 0.64, "learning_rate": 4.248711301032262e-05, "loss": 1.6693, "step": 148400 }, { "epoch": 0.64, "learning_rate": 4.248168462376939e-05, "loss": 1.6808, "step": 148500 }, { "epoch": 0.64, "learning_rate": 4.247625623721615e-05, "loss": 1.6758, "step": 148600 }, { "epoch": 0.64, "learning_rate": 4.247082785066291e-05, "loss": 1.6726, "step": 148700 }, { "epoch": 0.64, "learning_rate": 4.2465399464109686e-05, "loss": 1.6293, "step": 148800 }, { "epoch": 0.64, "learning_rate": 4.245997107755645e-05, "loss": 1.6371, "step": 148900 }, { "epoch": 0.64, "learning_rate": 4.245454269100321e-05, "loss": 1.6726, "step": 149000 }, { "epoch": 0.64, "learning_rate": 4.244911430444998e-05, "loss": 1.6672, "step": 149100 }, { "epoch": 0.64, "learning_rate": 4.244368591789674e-05, "loss": 1.6564, "step": 149200 }, { "epoch": 0.64, "learning_rate": 4.243825753134351e-05, "loss": 1.6373, "step": 149300 }, { "epoch": 0.64, "learning_rate": 4.243282914479027e-05, "loss": 1.6795, "step": 149400 }, { "epoch": 0.64, "learning_rate": 4.242740075823703e-05, "loss": 1.6624, "step": 149500 }, { "epoch": 0.64, "learning_rate": 4.24219723716838e-05, "loss": 1.6491, "step": 149600 }, { "epoch": 0.64, "learning_rate": 4.2416543985130564e-05, "loss": 1.6289, "step": 149700 }, { "epoch": 0.64, "learning_rate": 4.2411115598577325e-05, "loss": 1.6284, "step": 149800 }, { "epoch": 0.64, "learning_rate": 4.24056872120241e-05, "loss": 1.6781, "step": 149900 }, { "epoch": 0.64, "learning_rate": 4.240025882547086e-05, "loss": 1.6385, "step": 150000 }, { "epoch": 0.64, "eval_loss": 1.498610019683838, "eval_runtime": 19.0453, "eval_samples_per_second": 525.065, "eval_steps_per_second": 16.435, "step": 150000 }, { "epoch": 0.64, "learning_rate": 4.2394830438917625e-05, "loss": 1.663, "step": 150100 }, { "epoch": 0.65, "learning_rate": 4.238940205236439e-05, "loss": 1.6227, "step": 150200 }, { "epoch": 0.65, "learning_rate": 4.2383973665811155e-05, "loss": 1.6561, "step": 150300 }, { "epoch": 0.65, "learning_rate": 4.237854527925792e-05, "loss": 1.6525, "step": 150400 }, { "epoch": 0.65, "learning_rate": 4.2373116892704686e-05, "loss": 1.6423, "step": 150500 }, { "epoch": 0.65, "learning_rate": 4.236768850615145e-05, "loss": 1.6528, "step": 150600 }, { "epoch": 0.65, "learning_rate": 4.236226011959821e-05, "loss": 1.6514, "step": 150700 }, { "epoch": 0.65, "learning_rate": 4.235683173304498e-05, "loss": 1.6747, "step": 150800 }, { "epoch": 0.65, "learning_rate": 4.235140334649175e-05, "loss": 1.6646, "step": 150900 }, { "epoch": 0.65, "learning_rate": 4.234597495993851e-05, "loss": 1.6572, "step": 151000 }, { "epoch": 0.65, "learning_rate": 4.234054657338528e-05, "loss": 1.6396, "step": 151100 }, { "epoch": 0.65, "learning_rate": 4.233511818683204e-05, "loss": 1.647, "step": 151200 }, { "epoch": 0.65, "learning_rate": 4.232968980027881e-05, "loss": 1.6633, "step": 151300 }, { "epoch": 0.65, "learning_rate": 4.232426141372557e-05, "loss": 1.6479, "step": 151400 }, { "epoch": 0.65, "learning_rate": 4.231883302717233e-05, "loss": 1.6554, "step": 151500 }, { "epoch": 0.65, "learning_rate": 4.23134046406191e-05, "loss": 1.6536, "step": 151600 }, { "epoch": 0.65, "learning_rate": 4.230797625406586e-05, "loss": 1.6546, "step": 151700 }, { "epoch": 0.65, "learning_rate": 4.2302547867512624e-05, "loss": 1.6994, "step": 151800 }, { "epoch": 0.65, "learning_rate": 4.229711948095939e-05, "loss": 1.6134, "step": 151900 }, { "epoch": 0.65, "learning_rate": 4.229169109440616e-05, "loss": 1.6861, "step": 152000 }, { "epoch": 0.65, "learning_rate": 4.228626270785292e-05, "loss": 1.6588, "step": 152100 }, { "epoch": 0.65, "learning_rate": 4.228083432129969e-05, "loss": 1.6333, "step": 152200 }, { "epoch": 0.65, "learning_rate": 4.2275405934746454e-05, "loss": 1.6712, "step": 152300 }, { "epoch": 0.65, "learning_rate": 4.2269977548193215e-05, "loss": 1.6621, "step": 152400 }, { "epoch": 0.66, "learning_rate": 4.2264549161639984e-05, "loss": 1.6447, "step": 152500 }, { "epoch": 0.66, "learning_rate": 4.2259120775086746e-05, "loss": 1.6746, "step": 152600 }, { "epoch": 0.66, "learning_rate": 4.225369238853351e-05, "loss": 1.6465, "step": 152700 }, { "epoch": 0.66, "learning_rate": 4.2248264001980277e-05, "loss": 1.6743, "step": 152800 }, { "epoch": 0.66, "learning_rate": 4.224283561542704e-05, "loss": 1.6732, "step": 152900 }, { "epoch": 0.66, "learning_rate": 4.223740722887381e-05, "loss": 1.6312, "step": 153000 }, { "epoch": 0.66, "learning_rate": 4.2231978842320576e-05, "loss": 1.6696, "step": 153100 }, { "epoch": 0.66, "learning_rate": 4.222655045576734e-05, "loss": 1.6439, "step": 153200 }, { "epoch": 0.66, "learning_rate": 4.2221122069214106e-05, "loss": 1.6474, "step": 153300 }, { "epoch": 0.66, "learning_rate": 4.221569368266087e-05, "loss": 1.6472, "step": 153400 }, { "epoch": 0.66, "learning_rate": 4.221026529610763e-05, "loss": 1.6635, "step": 153500 }, { "epoch": 0.66, "learning_rate": 4.22048369095544e-05, "loss": 1.6512, "step": 153600 }, { "epoch": 0.66, "learning_rate": 4.219940852300116e-05, "loss": 1.6748, "step": 153700 }, { "epoch": 0.66, "learning_rate": 4.219398013644792e-05, "loss": 1.666, "step": 153800 }, { "epoch": 0.66, "learning_rate": 4.218855174989469e-05, "loss": 1.6338, "step": 153900 }, { "epoch": 0.66, "learning_rate": 4.218312336334145e-05, "loss": 1.6643, "step": 154000 }, { "epoch": 0.66, "learning_rate": 4.217769497678822e-05, "loss": 1.6548, "step": 154100 }, { "epoch": 0.66, "learning_rate": 4.217226659023499e-05, "loss": 1.6619, "step": 154200 }, { "epoch": 0.66, "learning_rate": 4.216683820368175e-05, "loss": 1.6543, "step": 154300 }, { "epoch": 0.66, "learning_rate": 4.2161409817128514e-05, "loss": 1.6247, "step": 154400 }, { "epoch": 0.66, "learning_rate": 4.215598143057528e-05, "loss": 1.6504, "step": 154500 }, { "epoch": 0.66, "learning_rate": 4.2150553044022044e-05, "loss": 1.6754, "step": 154600 }, { "epoch": 0.66, "learning_rate": 4.2145124657468806e-05, "loss": 1.649, "step": 154700 }, { "epoch": 0.67, "learning_rate": 4.2139696270915575e-05, "loss": 1.6665, "step": 154800 }, { "epoch": 0.67, "learning_rate": 4.213426788436234e-05, "loss": 1.6553, "step": 154900 }, { "epoch": 0.67, "learning_rate": 4.2128839497809106e-05, "loss": 1.632, "step": 155000 }, { "epoch": 0.67, "eval_loss": 1.4961014986038208, "eval_runtime": 18.9397, "eval_samples_per_second": 527.991, "eval_steps_per_second": 16.526, "step": 155000 }, { "epoch": 0.67, "learning_rate": 4.2123411111255874e-05, "loss": 1.6508, "step": 155100 }, { "epoch": 0.67, "learning_rate": 4.2117982724702636e-05, "loss": 1.6373, "step": 155200 }, { "epoch": 0.67, "learning_rate": 4.2112554338149405e-05, "loss": 1.6201, "step": 155300 }, { "epoch": 0.67, "learning_rate": 4.2107125951596167e-05, "loss": 1.6426, "step": 155400 }, { "epoch": 0.67, "learning_rate": 4.210169756504293e-05, "loss": 1.678, "step": 155500 }, { "epoch": 0.67, "learning_rate": 4.20962691784897e-05, "loss": 1.65, "step": 155600 }, { "epoch": 0.67, "learning_rate": 4.209084079193646e-05, "loss": 1.6561, "step": 155700 }, { "epoch": 0.67, "learning_rate": 4.208541240538322e-05, "loss": 1.6604, "step": 155800 }, { "epoch": 0.67, "learning_rate": 4.207998401882999e-05, "loss": 1.6548, "step": 155900 }, { "epoch": 0.67, "learning_rate": 4.207455563227675e-05, "loss": 1.6122, "step": 156000 }, { "epoch": 0.67, "learning_rate": 4.206912724572351e-05, "loss": 1.6578, "step": 156100 }, { "epoch": 0.67, "learning_rate": 4.206369885917029e-05, "loss": 1.6357, "step": 156200 }, { "epoch": 0.67, "learning_rate": 4.205827047261705e-05, "loss": 1.6642, "step": 156300 }, { "epoch": 0.67, "learning_rate": 4.205284208606381e-05, "loss": 1.6514, "step": 156400 }, { "epoch": 0.67, "learning_rate": 4.204741369951058e-05, "loss": 1.6546, "step": 156500 }, { "epoch": 0.67, "learning_rate": 4.204198531295734e-05, "loss": 1.6795, "step": 156600 }, { "epoch": 0.67, "learning_rate": 4.2036556926404105e-05, "loss": 1.6423, "step": 156700 }, { "epoch": 0.67, "learning_rate": 4.2031128539850873e-05, "loss": 1.6395, "step": 156800 }, { "epoch": 0.67, "learning_rate": 4.2025700153297635e-05, "loss": 1.6905, "step": 156900 }, { "epoch": 0.67, "learning_rate": 4.2020271766744404e-05, "loss": 1.6481, "step": 157000 }, { "epoch": 0.67, "learning_rate": 4.2014843380191166e-05, "loss": 1.6364, "step": 157100 }, { "epoch": 0.68, "learning_rate": 4.2009414993637935e-05, "loss": 1.6389, "step": 157200 }, { "epoch": 0.68, "learning_rate": 4.20039866070847e-05, "loss": 1.6198, "step": 157300 }, { "epoch": 0.68, "learning_rate": 4.1998558220531465e-05, "loss": 1.6397, "step": 157400 }, { "epoch": 0.68, "learning_rate": 4.199312983397823e-05, "loss": 1.6326, "step": 157500 }, { "epoch": 0.68, "learning_rate": 4.1987701447424996e-05, "loss": 1.6583, "step": 157600 }, { "epoch": 0.68, "learning_rate": 4.198227306087176e-05, "loss": 1.6271, "step": 157700 }, { "epoch": 0.68, "learning_rate": 4.197684467431852e-05, "loss": 1.6297, "step": 157800 }, { "epoch": 0.68, "learning_rate": 4.197141628776529e-05, "loss": 1.653, "step": 157900 }, { "epoch": 0.68, "learning_rate": 4.196598790121205e-05, "loss": 1.6225, "step": 158000 }, { "epoch": 0.68, "learning_rate": 4.196055951465881e-05, "loss": 1.6523, "step": 158100 }, { "epoch": 0.68, "learning_rate": 4.195513112810558e-05, "loss": 1.6518, "step": 158200 }, { "epoch": 0.68, "learning_rate": 4.194970274155235e-05, "loss": 1.6458, "step": 158300 }, { "epoch": 0.68, "learning_rate": 4.194427435499911e-05, "loss": 1.6243, "step": 158400 }, { "epoch": 0.68, "learning_rate": 4.193884596844588e-05, "loss": 1.644, "step": 158500 }, { "epoch": 0.68, "learning_rate": 4.193341758189264e-05, "loss": 1.6883, "step": 158600 }, { "epoch": 0.68, "learning_rate": 4.19279891953394e-05, "loss": 1.6521, "step": 158700 }, { "epoch": 0.68, "learning_rate": 4.192256080878617e-05, "loss": 1.7046, "step": 158800 }, { "epoch": 0.68, "learning_rate": 4.1917132422232934e-05, "loss": 1.663, "step": 158900 }, { "epoch": 0.68, "learning_rate": 4.19117040356797e-05, "loss": 1.6656, "step": 159000 }, { "epoch": 0.68, "learning_rate": 4.1906275649126464e-05, "loss": 1.6674, "step": 159100 }, { "epoch": 0.68, "learning_rate": 4.1900847262573226e-05, "loss": 1.6277, "step": 159200 }, { "epoch": 0.68, "learning_rate": 4.189541887602e-05, "loss": 1.6436, "step": 159300 }, { "epoch": 0.68, "learning_rate": 4.1889990489466764e-05, "loss": 1.6566, "step": 159400 }, { "epoch": 0.69, "learning_rate": 4.1884562102913525e-05, "loss": 1.6512, "step": 159500 }, { "epoch": 0.69, "learning_rate": 4.1879133716360294e-05, "loss": 1.6525, "step": 159600 }, { "epoch": 0.69, "learning_rate": 4.1873705329807056e-05, "loss": 1.643, "step": 159700 }, { "epoch": 0.69, "learning_rate": 4.186827694325382e-05, "loss": 1.6227, "step": 159800 }, { "epoch": 0.69, "learning_rate": 4.1862848556700586e-05, "loss": 1.6451, "step": 159900 }, { "epoch": 0.69, "learning_rate": 4.185742017014735e-05, "loss": 1.6474, "step": 160000 }, { "epoch": 0.69, "eval_loss": 1.4892033338546753, "eval_runtime": 18.9542, "eval_samples_per_second": 527.588, "eval_steps_per_second": 16.513, "step": 160000 }, { "epoch": 0.69, "learning_rate": 4.185199178359411e-05, "loss": 1.653, "step": 160100 }, { "epoch": 0.69, "learning_rate": 4.184656339704088e-05, "loss": 1.6536, "step": 160200 }, { "epoch": 0.69, "learning_rate": 4.184113501048764e-05, "loss": 1.6498, "step": 160300 }, { "epoch": 0.69, "learning_rate": 4.183570662393441e-05, "loss": 1.6401, "step": 160400 }, { "epoch": 0.69, "learning_rate": 4.183027823738118e-05, "loss": 1.6488, "step": 160500 }, { "epoch": 0.69, "learning_rate": 4.182484985082794e-05, "loss": 1.6423, "step": 160600 }, { "epoch": 0.69, "learning_rate": 4.18194214642747e-05, "loss": 1.636, "step": 160700 }, { "epoch": 0.69, "learning_rate": 4.181399307772147e-05, "loss": 1.6501, "step": 160800 }, { "epoch": 0.69, "learning_rate": 4.180856469116823e-05, "loss": 1.6302, "step": 160900 }, { "epoch": 0.69, "learning_rate": 4.1803136304615e-05, "loss": 1.6565, "step": 161000 }, { "epoch": 0.69, "learning_rate": 4.179770791806176e-05, "loss": 1.636, "step": 161100 }, { "epoch": 0.69, "learning_rate": 4.1792279531508525e-05, "loss": 1.666, "step": 161200 }, { "epoch": 0.69, "learning_rate": 4.1786851144955293e-05, "loss": 1.6687, "step": 161300 }, { "epoch": 0.69, "learning_rate": 4.178142275840206e-05, "loss": 1.6333, "step": 161400 }, { "epoch": 0.69, "learning_rate": 4.1775994371848824e-05, "loss": 1.645, "step": 161500 }, { "epoch": 0.69, "learning_rate": 4.177056598529559e-05, "loss": 1.6549, "step": 161600 }, { "epoch": 0.69, "learning_rate": 4.1765137598742354e-05, "loss": 1.656, "step": 161700 }, { "epoch": 0.7, "learning_rate": 4.1759709212189116e-05, "loss": 1.6529, "step": 161800 }, { "epoch": 0.7, "learning_rate": 4.1754280825635885e-05, "loss": 1.6249, "step": 161900 }, { "epoch": 0.7, "learning_rate": 4.174885243908265e-05, "loss": 1.644, "step": 162000 }, { "epoch": 0.7, "learning_rate": 4.174342405252941e-05, "loss": 1.6583, "step": 162100 }, { "epoch": 0.7, "learning_rate": 4.173799566597618e-05, "loss": 1.6601, "step": 162200 }, { "epoch": 0.7, "learning_rate": 4.173256727942294e-05, "loss": 1.6653, "step": 162300 }, { "epoch": 0.7, "learning_rate": 4.172713889286971e-05, "loss": 1.6278, "step": 162400 }, { "epoch": 0.7, "learning_rate": 4.1721710506316477e-05, "loss": 1.7011, "step": 162500 }, { "epoch": 0.7, "learning_rate": 4.171628211976324e-05, "loss": 1.6512, "step": 162600 }, { "epoch": 0.7, "learning_rate": 4.171085373321e-05, "loss": 1.6867, "step": 162700 }, { "epoch": 0.7, "learning_rate": 4.170542534665677e-05, "loss": 1.6322, "step": 162800 }, { "epoch": 0.7, "learning_rate": 4.169999696010353e-05, "loss": 1.6367, "step": 162900 }, { "epoch": 0.7, "learning_rate": 4.16945685735503e-05, "loss": 1.6477, "step": 163000 }, { "epoch": 0.7, "learning_rate": 4.168914018699706e-05, "loss": 1.6592, "step": 163100 }, { "epoch": 0.7, "learning_rate": 4.168371180044382e-05, "loss": 1.6355, "step": 163200 }, { "epoch": 0.7, "learning_rate": 4.167828341389059e-05, "loss": 1.6156, "step": 163300 }, { "epoch": 0.7, "learning_rate": 4.1672855027337354e-05, "loss": 1.6602, "step": 163400 }, { "epoch": 0.7, "learning_rate": 4.166742664078412e-05, "loss": 1.6826, "step": 163500 }, { "epoch": 0.7, "learning_rate": 4.166199825423089e-05, "loss": 1.6549, "step": 163600 }, { "epoch": 0.7, "learning_rate": 4.165656986767765e-05, "loss": 1.6715, "step": 163700 }, { "epoch": 0.7, "learning_rate": 4.1651141481124415e-05, "loss": 1.7004, "step": 163800 }, { "epoch": 0.7, "learning_rate": 4.1645713094571183e-05, "loss": 1.6407, "step": 163900 }, { "epoch": 0.7, "learning_rate": 4.1640284708017945e-05, "loss": 1.6596, "step": 164000 }, { "epoch": 0.7, "learning_rate": 4.163485632146471e-05, "loss": 1.6159, "step": 164100 }, { "epoch": 0.71, "learning_rate": 4.1629427934911476e-05, "loss": 1.6305, "step": 164200 }, { "epoch": 0.71, "learning_rate": 4.162399954835824e-05, "loss": 1.6307, "step": 164300 }, { "epoch": 0.71, "learning_rate": 4.1618571161805006e-05, "loss": 1.6507, "step": 164400 }, { "epoch": 0.71, "learning_rate": 4.1613142775251775e-05, "loss": 1.668, "step": 164500 }, { "epoch": 0.71, "learning_rate": 4.160771438869854e-05, "loss": 1.6264, "step": 164600 }, { "epoch": 0.71, "learning_rate": 4.16022860021453e-05, "loss": 1.6534, "step": 164700 }, { "epoch": 0.71, "learning_rate": 4.159685761559207e-05, "loss": 1.655, "step": 164800 }, { "epoch": 0.71, "learning_rate": 4.159142922903883e-05, "loss": 1.6359, "step": 164900 }, { "epoch": 0.71, "learning_rate": 4.15860008424856e-05, "loss": 1.656, "step": 165000 }, { "epoch": 0.71, "eval_loss": 1.4892749786376953, "eval_runtime": 19.0388, "eval_samples_per_second": 525.242, "eval_steps_per_second": 16.44, "step": 165000 }, { "epoch": 0.71, "learning_rate": 4.158057245593236e-05, "loss": 1.6642, "step": 165100 }, { "epoch": 0.71, "learning_rate": 4.157514406937912e-05, "loss": 1.6051, "step": 165200 }, { "epoch": 0.71, "learning_rate": 4.156971568282589e-05, "loss": 1.6373, "step": 165300 }, { "epoch": 0.71, "learning_rate": 4.156428729627265e-05, "loss": 1.6431, "step": 165400 }, { "epoch": 0.71, "learning_rate": 4.1558858909719414e-05, "loss": 1.6662, "step": 165500 }, { "epoch": 0.71, "learning_rate": 4.155343052316619e-05, "loss": 1.6377, "step": 165600 }, { "epoch": 0.71, "learning_rate": 4.154800213661295e-05, "loss": 1.6392, "step": 165700 }, { "epoch": 0.71, "learning_rate": 4.154257375005971e-05, "loss": 1.6564, "step": 165800 }, { "epoch": 0.71, "learning_rate": 4.153714536350648e-05, "loss": 1.6458, "step": 165900 }, { "epoch": 0.71, "learning_rate": 4.1531716976953244e-05, "loss": 1.6254, "step": 166000 }, { "epoch": 0.71, "learning_rate": 4.1526288590400006e-05, "loss": 1.6604, "step": 166100 }, { "epoch": 0.71, "learning_rate": 4.1520860203846774e-05, "loss": 1.6246, "step": 166200 }, { "epoch": 0.71, "learning_rate": 4.1515431817293536e-05, "loss": 1.6278, "step": 166300 }, { "epoch": 0.71, "learning_rate": 4.1510003430740305e-05, "loss": 1.6363, "step": 166400 }, { "epoch": 0.72, "learning_rate": 4.150457504418707e-05, "loss": 1.6379, "step": 166500 }, { "epoch": 0.72, "learning_rate": 4.1499146657633835e-05, "loss": 1.6476, "step": 166600 }, { "epoch": 0.72, "learning_rate": 4.14937182710806e-05, "loss": 1.6306, "step": 166700 }, { "epoch": 0.72, "learning_rate": 4.1488289884527366e-05, "loss": 1.6411, "step": 166800 }, { "epoch": 0.72, "learning_rate": 4.148286149797413e-05, "loss": 1.6529, "step": 166900 }, { "epoch": 0.72, "learning_rate": 4.1477433111420896e-05, "loss": 1.6336, "step": 167000 }, { "epoch": 0.72, "learning_rate": 4.147200472486766e-05, "loss": 1.641, "step": 167100 }, { "epoch": 0.72, "learning_rate": 4.146657633831442e-05, "loss": 1.6318, "step": 167200 }, { "epoch": 0.72, "learning_rate": 4.146114795176119e-05, "loss": 1.6243, "step": 167300 }, { "epoch": 0.72, "learning_rate": 4.145571956520795e-05, "loss": 1.6485, "step": 167400 }, { "epoch": 0.72, "learning_rate": 4.145029117865471e-05, "loss": 1.6424, "step": 167500 }, { "epoch": 0.72, "learning_rate": 4.144486279210148e-05, "loss": 1.6495, "step": 167600 }, { "epoch": 0.72, "learning_rate": 4.143943440554825e-05, "loss": 1.6417, "step": 167700 }, { "epoch": 0.72, "learning_rate": 4.143400601899501e-05, "loss": 1.6229, "step": 167800 }, { "epoch": 0.72, "learning_rate": 4.142857763244178e-05, "loss": 1.6565, "step": 167900 }, { "epoch": 0.72, "learning_rate": 4.142314924588854e-05, "loss": 1.6324, "step": 168000 }, { "epoch": 0.72, "learning_rate": 4.1417720859335304e-05, "loss": 1.6411, "step": 168100 }, { "epoch": 0.72, "learning_rate": 4.141229247278207e-05, "loss": 1.6343, "step": 168200 }, { "epoch": 0.72, "learning_rate": 4.1406864086228835e-05, "loss": 1.6355, "step": 168300 }, { "epoch": 0.72, "learning_rate": 4.14014356996756e-05, "loss": 1.646, "step": 168400 }, { "epoch": 0.72, "learning_rate": 4.1396007313122365e-05, "loss": 1.6457, "step": 168500 }, { "epoch": 0.72, "learning_rate": 4.139057892656913e-05, "loss": 1.6516, "step": 168600 }, { "epoch": 0.72, "learning_rate": 4.1385150540015896e-05, "loss": 1.6479, "step": 168700 }, { "epoch": 0.73, "learning_rate": 4.1379722153462664e-05, "loss": 1.6242, "step": 168800 }, { "epoch": 0.73, "learning_rate": 4.1374293766909426e-05, "loss": 1.6569, "step": 168900 }, { "epoch": 0.73, "learning_rate": 4.1368865380356195e-05, "loss": 1.6481, "step": 169000 }, { "epoch": 0.73, "learning_rate": 4.136343699380296e-05, "loss": 1.6365, "step": 169100 }, { "epoch": 0.73, "learning_rate": 4.135800860724972e-05, "loss": 1.6197, "step": 169200 }, { "epoch": 0.73, "learning_rate": 4.135258022069649e-05, "loss": 1.6592, "step": 169300 }, { "epoch": 0.73, "learning_rate": 4.134715183414325e-05, "loss": 1.6261, "step": 169400 }, { "epoch": 0.73, "learning_rate": 4.134172344759001e-05, "loss": 1.6491, "step": 169500 }, { "epoch": 0.73, "learning_rate": 4.133629506103678e-05, "loss": 1.6227, "step": 169600 }, { "epoch": 0.73, "learning_rate": 4.133086667448354e-05, "loss": 1.6486, "step": 169700 }, { "epoch": 0.73, "learning_rate": 4.132543828793031e-05, "loss": 1.6376, "step": 169800 }, { "epoch": 0.73, "learning_rate": 4.132000990137708e-05, "loss": 1.6393, "step": 169900 }, { "epoch": 0.73, "learning_rate": 4.131458151482384e-05, "loss": 1.6392, "step": 170000 }, { "epoch": 0.73, "eval_loss": 1.4881998300552368, "eval_runtime": 19.0174, "eval_samples_per_second": 525.834, "eval_steps_per_second": 16.459, "step": 170000 }, { "epoch": 0.73, "learning_rate": 4.13091531282706e-05, "loss": 1.6026, "step": 170100 }, { "epoch": 0.73, "learning_rate": 4.130372474171737e-05, "loss": 1.6467, "step": 170200 }, { "epoch": 0.73, "learning_rate": 4.129829635516413e-05, "loss": 1.6615, "step": 170300 }, { "epoch": 0.73, "learning_rate": 4.12928679686109e-05, "loss": 1.6277, "step": 170400 }, { "epoch": 0.73, "learning_rate": 4.1287439582057664e-05, "loss": 1.6062, "step": 170500 }, { "epoch": 0.73, "learning_rate": 4.1282011195504426e-05, "loss": 1.6005, "step": 170600 }, { "epoch": 0.73, "learning_rate": 4.1276582808951194e-05, "loss": 1.6392, "step": 170700 }, { "epoch": 0.73, "learning_rate": 4.127115442239796e-05, "loss": 1.635, "step": 170800 }, { "epoch": 0.73, "learning_rate": 4.1265726035844725e-05, "loss": 1.6457, "step": 170900 }, { "epoch": 0.73, "learning_rate": 4.1260297649291493e-05, "loss": 1.6484, "step": 171000 }, { "epoch": 0.74, "learning_rate": 4.1254869262738255e-05, "loss": 1.6292, "step": 171100 }, { "epoch": 0.74, "learning_rate": 4.124944087618502e-05, "loss": 1.6596, "step": 171200 }, { "epoch": 0.74, "learning_rate": 4.1244012489631786e-05, "loss": 1.6474, "step": 171300 }, { "epoch": 0.74, "learning_rate": 4.123858410307855e-05, "loss": 1.6271, "step": 171400 }, { "epoch": 0.74, "learning_rate": 4.123315571652531e-05, "loss": 1.6191, "step": 171500 }, { "epoch": 0.74, "learning_rate": 4.122772732997208e-05, "loss": 1.6352, "step": 171600 }, { "epoch": 0.74, "learning_rate": 4.122229894341884e-05, "loss": 1.6396, "step": 171700 }, { "epoch": 0.74, "learning_rate": 4.121687055686561e-05, "loss": 1.6545, "step": 171800 }, { "epoch": 0.74, "learning_rate": 4.121144217031238e-05, "loss": 1.6158, "step": 171900 }, { "epoch": 0.74, "learning_rate": 4.120601378375914e-05, "loss": 1.6239, "step": 172000 }, { "epoch": 0.74, "learning_rate": 4.12005853972059e-05, "loss": 1.6401, "step": 172100 }, { "epoch": 0.74, "learning_rate": 4.119515701065267e-05, "loss": 1.6423, "step": 172200 }, { "epoch": 0.74, "learning_rate": 4.118972862409943e-05, "loss": 1.633, "step": 172300 }, { "epoch": 0.74, "learning_rate": 4.11843002375462e-05, "loss": 1.6451, "step": 172400 }, { "epoch": 0.74, "learning_rate": 4.117887185099296e-05, "loss": 1.6551, "step": 172500 }, { "epoch": 0.74, "learning_rate": 4.1173443464439724e-05, "loss": 1.6391, "step": 172600 }, { "epoch": 0.74, "learning_rate": 4.116801507788649e-05, "loss": 1.6313, "step": 172700 }, { "epoch": 0.74, "learning_rate": 4.1162586691333255e-05, "loss": 1.6167, "step": 172800 }, { "epoch": 0.74, "learning_rate": 4.115715830478002e-05, "loss": 1.6307, "step": 172900 }, { "epoch": 0.74, "learning_rate": 4.115172991822679e-05, "loss": 1.6271, "step": 173000 }, { "epoch": 0.74, "learning_rate": 4.1146301531673554e-05, "loss": 1.6487, "step": 173100 }, { "epoch": 0.74, "learning_rate": 4.1140873145120316e-05, "loss": 1.6376, "step": 173200 }, { "epoch": 0.74, "learning_rate": 4.1135444758567084e-05, "loss": 1.6223, "step": 173300 }, { "epoch": 0.74, "learning_rate": 4.1130016372013846e-05, "loss": 1.6545, "step": 173400 }, { "epoch": 0.75, "learning_rate": 4.112458798546061e-05, "loss": 1.6432, "step": 173500 }, { "epoch": 0.75, "learning_rate": 4.111915959890738e-05, "loss": 1.6115, "step": 173600 }, { "epoch": 0.75, "learning_rate": 4.111373121235414e-05, "loss": 1.6354, "step": 173700 }, { "epoch": 0.75, "learning_rate": 4.110830282580091e-05, "loss": 1.6091, "step": 173800 }, { "epoch": 0.75, "learning_rate": 4.110287443924767e-05, "loss": 1.6004, "step": 173900 }, { "epoch": 0.75, "learning_rate": 4.109744605269444e-05, "loss": 1.6446, "step": 174000 }, { "epoch": 0.75, "learning_rate": 4.10920176661412e-05, "loss": 1.6052, "step": 174100 }, { "epoch": 0.75, "learning_rate": 4.108658927958797e-05, "loss": 1.6133, "step": 174200 }, { "epoch": 0.75, "learning_rate": 4.108116089303473e-05, "loss": 1.6266, "step": 174300 }, { "epoch": 0.75, "learning_rate": 4.10757325064815e-05, "loss": 1.6683, "step": 174400 }, { "epoch": 0.75, "learning_rate": 4.107030411992826e-05, "loss": 1.6587, "step": 174500 }, { "epoch": 0.75, "learning_rate": 4.106487573337502e-05, "loss": 1.649, "step": 174600 }, { "epoch": 0.75, "learning_rate": 4.105944734682179e-05, "loss": 1.6482, "step": 174700 }, { "epoch": 0.75, "learning_rate": 4.105401896026855e-05, "loss": 1.6345, "step": 174800 }, { "epoch": 0.75, "learning_rate": 4.1048590573715315e-05, "loss": 1.6236, "step": 174900 }, { "epoch": 0.75, "learning_rate": 4.104316218716209e-05, "loss": 1.6541, "step": 175000 }, { "epoch": 0.75, "eval_loss": 1.4871829748153687, "eval_runtime": 19.0226, "eval_samples_per_second": 525.691, "eval_steps_per_second": 16.454, "step": 175000 }, { "epoch": 0.75, "learning_rate": 4.103773380060885e-05, "loss": 1.6537, "step": 175100 }, { "epoch": 0.75, "learning_rate": 4.1032305414055614e-05, "loss": 1.6186, "step": 175200 }, { "epoch": 0.75, "learning_rate": 4.102687702750238e-05, "loss": 1.6422, "step": 175300 }, { "epoch": 0.75, "learning_rate": 4.1021448640949145e-05, "loss": 1.6535, "step": 175400 }, { "epoch": 0.75, "learning_rate": 4.1016020254395907e-05, "loss": 1.654, "step": 175500 }, { "epoch": 0.75, "learning_rate": 4.1010591867842675e-05, "loss": 1.6388, "step": 175600 }, { "epoch": 0.75, "learning_rate": 4.100516348128944e-05, "loss": 1.6391, "step": 175700 }, { "epoch": 0.76, "learning_rate": 4.0999735094736206e-05, "loss": 1.6144, "step": 175800 }, { "epoch": 0.76, "learning_rate": 4.099430670818297e-05, "loss": 1.6391, "step": 175900 }, { "epoch": 0.76, "learning_rate": 4.098887832162973e-05, "loss": 1.6552, "step": 176000 }, { "epoch": 0.76, "learning_rate": 4.09834499350765e-05, "loss": 1.6019, "step": 176100 }, { "epoch": 0.76, "learning_rate": 4.097802154852327e-05, "loss": 1.6177, "step": 176200 }, { "epoch": 0.76, "learning_rate": 4.097259316197003e-05, "loss": 1.6369, "step": 176300 }, { "epoch": 0.76, "learning_rate": 4.09671647754168e-05, "loss": 1.6127, "step": 176400 }, { "epoch": 0.76, "learning_rate": 4.096173638886356e-05, "loss": 1.6506, "step": 176500 }, { "epoch": 0.76, "learning_rate": 4.095630800231032e-05, "loss": 1.6262, "step": 176600 }, { "epoch": 0.76, "learning_rate": 4.095087961575709e-05, "loss": 1.6192, "step": 176700 }, { "epoch": 0.76, "learning_rate": 4.094545122920385e-05, "loss": 1.6, "step": 176800 }, { "epoch": 0.76, "learning_rate": 4.0940022842650613e-05, "loss": 1.6306, "step": 176900 }, { "epoch": 0.76, "learning_rate": 4.093459445609738e-05, "loss": 1.639, "step": 177000 }, { "epoch": 0.76, "learning_rate": 4.092916606954415e-05, "loss": 1.64, "step": 177100 }, { "epoch": 0.76, "learning_rate": 4.092373768299091e-05, "loss": 1.6938, "step": 177200 }, { "epoch": 0.76, "learning_rate": 4.091830929643768e-05, "loss": 1.6229, "step": 177300 }, { "epoch": 0.76, "learning_rate": 4.091288090988444e-05, "loss": 1.6599, "step": 177400 }, { "epoch": 0.76, "learning_rate": 4.0907452523331205e-05, "loss": 1.6133, "step": 177500 }, { "epoch": 0.76, "learning_rate": 4.0902024136777974e-05, "loss": 1.6335, "step": 177600 }, { "epoch": 0.76, "learning_rate": 4.0896595750224736e-05, "loss": 1.6426, "step": 177700 }, { "epoch": 0.76, "learning_rate": 4.0891167363671504e-05, "loss": 1.6161, "step": 177800 }, { "epoch": 0.76, "learning_rate": 4.0885738977118266e-05, "loss": 1.6421, "step": 177900 }, { "epoch": 0.76, "learning_rate": 4.088031059056503e-05, "loss": 1.6161, "step": 178000 }, { "epoch": 0.77, "learning_rate": 4.08748822040118e-05, "loss": 1.6326, "step": 178100 }, { "epoch": 0.77, "learning_rate": 4.0869453817458565e-05, "loss": 1.6143, "step": 178200 }, { "epoch": 0.77, "learning_rate": 4.086402543090533e-05, "loss": 1.6332, "step": 178300 }, { "epoch": 0.77, "learning_rate": 4.0858597044352096e-05, "loss": 1.6198, "step": 178400 }, { "epoch": 0.77, "learning_rate": 4.085316865779886e-05, "loss": 1.6098, "step": 178500 }, { "epoch": 0.77, "learning_rate": 4.084774027124562e-05, "loss": 1.6328, "step": 178600 }, { "epoch": 0.77, "learning_rate": 4.084231188469239e-05, "loss": 1.6164, "step": 178700 }, { "epoch": 0.77, "learning_rate": 4.083688349813915e-05, "loss": 1.6256, "step": 178800 }, { "epoch": 0.77, "learning_rate": 4.083145511158591e-05, "loss": 1.6203, "step": 178900 }, { "epoch": 0.77, "learning_rate": 4.082602672503268e-05, "loss": 1.6494, "step": 179000 }, { "epoch": 0.77, "learning_rate": 4.082059833847944e-05, "loss": 1.645, "step": 179100 }, { "epoch": 0.77, "learning_rate": 4.081516995192621e-05, "loss": 1.6414, "step": 179200 }, { "epoch": 0.77, "learning_rate": 4.080974156537298e-05, "loss": 1.6364, "step": 179300 }, { "epoch": 0.77, "learning_rate": 4.080431317881974e-05, "loss": 1.6388, "step": 179400 }, { "epoch": 0.77, "learning_rate": 4.0798884792266504e-05, "loss": 1.6376, "step": 179500 }, { "epoch": 0.77, "learning_rate": 4.079345640571327e-05, "loss": 1.6193, "step": 179600 }, { "epoch": 0.77, "learning_rate": 4.0788028019160034e-05, "loss": 1.6593, "step": 179700 }, { "epoch": 0.77, "learning_rate": 4.07825996326068e-05, "loss": 1.6466, "step": 179800 }, { "epoch": 0.77, "learning_rate": 4.0777171246053565e-05, "loss": 1.6292, "step": 179900 }, { "epoch": 0.77, "learning_rate": 4.0771742859500326e-05, "loss": 1.6344, "step": 180000 }, { "epoch": 0.77, "eval_loss": 1.4833685159683228, "eval_runtime": 19.0127, "eval_samples_per_second": 525.965, "eval_steps_per_second": 16.463, "step": 180000 }, { "epoch": 0.77, "learning_rate": 4.0766314472947095e-05, "loss": 1.6491, "step": 180100 }, { "epoch": 0.77, "learning_rate": 4.076088608639386e-05, "loss": 1.6105, "step": 180200 }, { "epoch": 0.77, "learning_rate": 4.0755457699840626e-05, "loss": 1.6402, "step": 180300 }, { "epoch": 0.78, "learning_rate": 4.0750029313287394e-05, "loss": 1.632, "step": 180400 }, { "epoch": 0.78, "learning_rate": 4.0744600926734156e-05, "loss": 1.6187, "step": 180500 }, { "epoch": 0.78, "learning_rate": 4.073917254018092e-05, "loss": 1.6083, "step": 180600 }, { "epoch": 0.78, "learning_rate": 4.073374415362769e-05, "loss": 1.6105, "step": 180700 }, { "epoch": 0.78, "learning_rate": 4.072831576707445e-05, "loss": 1.6104, "step": 180800 }, { "epoch": 0.78, "learning_rate": 4.072288738052121e-05, "loss": 1.6171, "step": 180900 }, { "epoch": 0.78, "learning_rate": 4.071745899396798e-05, "loss": 1.6525, "step": 181000 }, { "epoch": 0.78, "learning_rate": 4.071203060741474e-05, "loss": 1.6322, "step": 181100 }, { "epoch": 0.78, "learning_rate": 4.07066022208615e-05, "loss": 1.6336, "step": 181200 }, { "epoch": 0.78, "learning_rate": 4.070117383430828e-05, "loss": 1.6341, "step": 181300 }, { "epoch": 0.78, "learning_rate": 4.069574544775504e-05, "loss": 1.6351, "step": 181400 }, { "epoch": 0.78, "learning_rate": 4.06903170612018e-05, "loss": 1.6252, "step": 181500 }, { "epoch": 0.78, "learning_rate": 4.068488867464857e-05, "loss": 1.6479, "step": 181600 }, { "epoch": 0.78, "learning_rate": 4.067946028809533e-05, "loss": 1.655, "step": 181700 }, { "epoch": 0.78, "learning_rate": 4.06740319015421e-05, "loss": 1.6359, "step": 181800 }, { "epoch": 0.78, "learning_rate": 4.066860351498886e-05, "loss": 1.6313, "step": 181900 }, { "epoch": 0.78, "learning_rate": 4.0663175128435625e-05, "loss": 1.6219, "step": 182000 }, { "epoch": 0.78, "learning_rate": 4.0657746741882394e-05, "loss": 1.6426, "step": 182100 }, { "epoch": 0.78, "learning_rate": 4.0652318355329155e-05, "loss": 1.6112, "step": 182200 }, { "epoch": 0.78, "learning_rate": 4.0646889968775924e-05, "loss": 1.6419, "step": 182300 }, { "epoch": 0.78, "learning_rate": 4.064146158222269e-05, "loss": 1.6644, "step": 182400 }, { "epoch": 0.78, "learning_rate": 4.0636033195669455e-05, "loss": 1.6382, "step": 182500 }, { "epoch": 0.78, "learning_rate": 4.0630604809116217e-05, "loss": 1.6431, "step": 182600 }, { "epoch": 0.78, "learning_rate": 4.0625176422562985e-05, "loss": 1.6237, "step": 182700 }, { "epoch": 0.79, "learning_rate": 4.061974803600975e-05, "loss": 1.66, "step": 182800 }, { "epoch": 0.79, "learning_rate": 4.061431964945651e-05, "loss": 1.6497, "step": 182900 }, { "epoch": 0.79, "learning_rate": 4.060889126290328e-05, "loss": 1.6327, "step": 183000 }, { "epoch": 0.79, "learning_rate": 4.060346287635004e-05, "loss": 1.6126, "step": 183100 }, { "epoch": 0.79, "learning_rate": 4.05980344897968e-05, "loss": 1.6265, "step": 183200 }, { "epoch": 0.79, "learning_rate": 4.059260610324357e-05, "loss": 1.6381, "step": 183300 }, { "epoch": 0.79, "learning_rate": 4.058717771669034e-05, "loss": 1.6149, "step": 183400 }, { "epoch": 0.79, "learning_rate": 4.05817493301371e-05, "loss": 1.6469, "step": 183500 }, { "epoch": 0.79, "learning_rate": 4.057632094358387e-05, "loss": 1.6346, "step": 183600 }, { "epoch": 0.79, "learning_rate": 4.057089255703063e-05, "loss": 1.6221, "step": 183700 }, { "epoch": 0.79, "learning_rate": 4.05654641704774e-05, "loss": 1.6388, "step": 183800 }, { "epoch": 0.79, "learning_rate": 4.056003578392416e-05, "loss": 1.6235, "step": 183900 }, { "epoch": 0.79, "learning_rate": 4.0554607397370923e-05, "loss": 1.6216, "step": 184000 }, { "epoch": 0.79, "learning_rate": 4.054917901081769e-05, "loss": 1.6219, "step": 184100 }, { "epoch": 0.79, "learning_rate": 4.0543750624264454e-05, "loss": 1.6149, "step": 184200 }, { "epoch": 0.79, "learning_rate": 4.0538322237711216e-05, "loss": 1.6441, "step": 184300 }, { "epoch": 0.79, "learning_rate": 4.053289385115799e-05, "loss": 1.6382, "step": 184400 }, { "epoch": 0.79, "learning_rate": 4.052746546460475e-05, "loss": 1.6103, "step": 184500 }, { "epoch": 0.79, "learning_rate": 4.0522037078051515e-05, "loss": 1.6291, "step": 184600 }, { "epoch": 0.79, "learning_rate": 4.0516608691498284e-05, "loss": 1.6306, "step": 184700 }, { "epoch": 0.79, "learning_rate": 4.0511180304945046e-05, "loss": 1.6277, "step": 184800 }, { "epoch": 0.79, "learning_rate": 4.050575191839181e-05, "loss": 1.6136, "step": 184900 }, { "epoch": 0.79, "learning_rate": 4.0500323531838576e-05, "loss": 1.6362, "step": 185000 }, { "epoch": 0.79, "eval_loss": 1.480057954788208, "eval_runtime": 19.051, "eval_samples_per_second": 524.905, "eval_steps_per_second": 16.43, "step": 185000 }, { "epoch": 0.8, "learning_rate": 4.049489514528534e-05, "loss": 1.6264, "step": 185100 }, { "epoch": 0.8, "learning_rate": 4.04894667587321e-05, "loss": 1.6282, "step": 185200 }, { "epoch": 0.8, "learning_rate": 4.048403837217887e-05, "loss": 1.6327, "step": 185300 }, { "epoch": 0.8, "learning_rate": 4.047860998562563e-05, "loss": 1.6176, "step": 185400 }, { "epoch": 0.8, "learning_rate": 4.04731815990724e-05, "loss": 1.6385, "step": 185500 }, { "epoch": 0.8, "learning_rate": 4.046775321251917e-05, "loss": 1.6421, "step": 185600 }, { "epoch": 0.8, "learning_rate": 4.046232482596593e-05, "loss": 1.6373, "step": 185700 }, { "epoch": 0.8, "learning_rate": 4.04568964394127e-05, "loss": 1.6184, "step": 185800 }, { "epoch": 0.8, "learning_rate": 4.045146805285946e-05, "loss": 1.6273, "step": 185900 }, { "epoch": 0.8, "learning_rate": 4.044603966630622e-05, "loss": 1.6186, "step": 186000 }, { "epoch": 0.8, "learning_rate": 4.044061127975299e-05, "loss": 1.6068, "step": 186100 }, { "epoch": 0.8, "learning_rate": 4.043518289319975e-05, "loss": 1.6195, "step": 186200 }, { "epoch": 0.8, "learning_rate": 4.0429754506646514e-05, "loss": 1.6136, "step": 186300 }, { "epoch": 0.8, "learning_rate": 4.042432612009328e-05, "loss": 1.6372, "step": 186400 }, { "epoch": 0.8, "learning_rate": 4.041889773354005e-05, "loss": 1.6169, "step": 186500 }, { "epoch": 0.8, "learning_rate": 4.0413469346986814e-05, "loss": 1.631, "step": 186600 }, { "epoch": 0.8, "learning_rate": 4.040804096043358e-05, "loss": 1.631, "step": 186700 }, { "epoch": 0.8, "learning_rate": 4.0402612573880344e-05, "loss": 1.6475, "step": 186800 }, { "epoch": 0.8, "learning_rate": 4.0397184187327106e-05, "loss": 1.6576, "step": 186900 }, { "epoch": 0.8, "learning_rate": 4.0391755800773875e-05, "loss": 1.6185, "step": 187000 }, { "epoch": 0.8, "learning_rate": 4.0386327414220636e-05, "loss": 1.6293, "step": 187100 }, { "epoch": 0.8, "learning_rate": 4.03808990276674e-05, "loss": 1.6254, "step": 187200 }, { "epoch": 0.8, "learning_rate": 4.037547064111417e-05, "loss": 1.6058, "step": 187300 }, { "epoch": 0.81, "learning_rate": 4.037004225456093e-05, "loss": 1.5944, "step": 187400 }, { "epoch": 0.81, "learning_rate": 4.03646138680077e-05, "loss": 1.6126, "step": 187500 }, { "epoch": 0.81, "learning_rate": 4.0359185481454466e-05, "loss": 1.6438, "step": 187600 }, { "epoch": 0.81, "learning_rate": 4.035375709490123e-05, "loss": 1.6302, "step": 187700 }, { "epoch": 0.81, "learning_rate": 4.034832870834799e-05, "loss": 1.6346, "step": 187800 }, { "epoch": 0.81, "learning_rate": 4.034290032179476e-05, "loss": 1.6221, "step": 187900 }, { "epoch": 0.81, "learning_rate": 4.033747193524152e-05, "loss": 1.6381, "step": 188000 }, { "epoch": 0.81, "learning_rate": 4.033204354868829e-05, "loss": 1.607, "step": 188100 }, { "epoch": 0.81, "learning_rate": 4.032661516213505e-05, "loss": 1.5969, "step": 188200 }, { "epoch": 0.81, "learning_rate": 4.032118677558181e-05, "loss": 1.6342, "step": 188300 }, { "epoch": 0.81, "learning_rate": 4.031575838902858e-05, "loss": 1.6447, "step": 188400 }, { "epoch": 0.81, "learning_rate": 4.031033000247534e-05, "loss": 1.6283, "step": 188500 }, { "epoch": 0.81, "learning_rate": 4.030490161592211e-05, "loss": 1.6406, "step": 188600 }, { "epoch": 0.81, "learning_rate": 4.029947322936888e-05, "loss": 1.6354, "step": 188700 }, { "epoch": 0.81, "learning_rate": 4.029404484281564e-05, "loss": 1.5996, "step": 188800 }, { "epoch": 0.81, "learning_rate": 4.0288616456262404e-05, "loss": 1.6525, "step": 188900 }, { "epoch": 0.81, "learning_rate": 4.028318806970917e-05, "loss": 1.6675, "step": 189000 }, { "epoch": 0.81, "learning_rate": 4.0277759683155935e-05, "loss": 1.6242, "step": 189100 }, { "epoch": 0.81, "learning_rate": 4.02723312966027e-05, "loss": 1.6512, "step": 189200 }, { "epoch": 0.81, "learning_rate": 4.0266902910049465e-05, "loss": 1.6472, "step": 189300 }, { "epoch": 0.81, "learning_rate": 4.026147452349623e-05, "loss": 1.6366, "step": 189400 }, { "epoch": 0.81, "learning_rate": 4.0256046136942996e-05, "loss": 1.6389, "step": 189500 }, { "epoch": 0.81, "learning_rate": 4.025061775038976e-05, "loss": 1.6271, "step": 189600 }, { "epoch": 0.81, "learning_rate": 4.0245189363836527e-05, "loss": 1.6299, "step": 189700 }, { "epoch": 0.82, "learning_rate": 4.023976097728329e-05, "loss": 1.6095, "step": 189800 }, { "epoch": 0.82, "learning_rate": 4.023433259073006e-05, "loss": 1.6473, "step": 189900 }, { "epoch": 0.82, "learning_rate": 4.022890420417682e-05, "loss": 1.6325, "step": 190000 }, { "epoch": 0.82, "eval_loss": 1.4781873226165771, "eval_runtime": 19.0438, "eval_samples_per_second": 525.106, "eval_steps_per_second": 16.436, "step": 190000 }, { "epoch": 0.82, "learning_rate": 4.022347581762359e-05, "loss": 1.6275, "step": 190100 }, { "epoch": 0.82, "learning_rate": 4.021804743107035e-05, "loss": 1.6262, "step": 190200 }, { "epoch": 0.82, "learning_rate": 4.021261904451711e-05, "loss": 1.588, "step": 190300 }, { "epoch": 0.82, "learning_rate": 4.020719065796388e-05, "loss": 1.6254, "step": 190400 }, { "epoch": 0.82, "learning_rate": 4.020176227141064e-05, "loss": 1.6347, "step": 190500 }, { "epoch": 0.82, "learning_rate": 4.0196333884857404e-05, "loss": 1.6128, "step": 190600 }, { "epoch": 0.82, "learning_rate": 4.019090549830418e-05, "loss": 1.6265, "step": 190700 }, { "epoch": 0.82, "learning_rate": 4.018547711175094e-05, "loss": 1.6595, "step": 190800 }, { "epoch": 0.82, "learning_rate": 4.01800487251977e-05, "loss": 1.6308, "step": 190900 }, { "epoch": 0.82, "learning_rate": 4.017462033864447e-05, "loss": 1.6539, "step": 191000 }, { "epoch": 0.82, "learning_rate": 4.0169191952091233e-05, "loss": 1.6196, "step": 191100 }, { "epoch": 0.82, "learning_rate": 4.0163763565537995e-05, "loss": 1.6144, "step": 191200 }, { "epoch": 0.82, "learning_rate": 4.0158335178984764e-05, "loss": 1.627, "step": 191300 }, { "epoch": 0.82, "learning_rate": 4.0152906792431526e-05, "loss": 1.6389, "step": 191400 }, { "epoch": 0.82, "learning_rate": 4.0147478405878294e-05, "loss": 1.5831, "step": 191500 }, { "epoch": 0.82, "learning_rate": 4.0142050019325056e-05, "loss": 1.6268, "step": 191600 }, { "epoch": 0.82, "learning_rate": 4.013662163277182e-05, "loss": 1.634, "step": 191700 }, { "epoch": 0.82, "learning_rate": 4.013119324621859e-05, "loss": 1.635, "step": 191800 }, { "epoch": 0.82, "learning_rate": 4.0125764859665356e-05, "loss": 1.6322, "step": 191900 }, { "epoch": 0.82, "learning_rate": 4.012033647311212e-05, "loss": 1.6223, "step": 192000 }, { "epoch": 0.83, "learning_rate": 4.0114908086558886e-05, "loss": 1.6278, "step": 192100 }, { "epoch": 0.83, "learning_rate": 4.010947970000565e-05, "loss": 1.6351, "step": 192200 }, { "epoch": 0.83, "learning_rate": 4.010405131345241e-05, "loss": 1.5964, "step": 192300 }, { "epoch": 0.83, "learning_rate": 4.009862292689918e-05, "loss": 1.6657, "step": 192400 }, { "epoch": 0.83, "learning_rate": 4.009319454034594e-05, "loss": 1.6335, "step": 192500 }, { "epoch": 0.83, "learning_rate": 4.00877661537927e-05, "loss": 1.6092, "step": 192600 }, { "epoch": 0.83, "learning_rate": 4.008233776723947e-05, "loss": 1.6382, "step": 192700 }, { "epoch": 0.83, "learning_rate": 4.007690938068624e-05, "loss": 1.6323, "step": 192800 }, { "epoch": 0.83, "learning_rate": 4.0071480994133e-05, "loss": 1.6095, "step": 192900 }, { "epoch": 0.83, "learning_rate": 4.006605260757977e-05, "loss": 1.6421, "step": 193000 }, { "epoch": 0.83, "learning_rate": 4.006062422102653e-05, "loss": 1.6308, "step": 193100 }, { "epoch": 0.83, "learning_rate": 4.0055195834473294e-05, "loss": 1.6324, "step": 193200 }, { "epoch": 0.83, "learning_rate": 4.004976744792006e-05, "loss": 1.6081, "step": 193300 }, { "epoch": 0.83, "learning_rate": 4.0044339061366824e-05, "loss": 1.6247, "step": 193400 }, { "epoch": 0.83, "learning_rate": 4.003891067481359e-05, "loss": 1.5981, "step": 193500 }, { "epoch": 0.83, "learning_rate": 4.0033482288260355e-05, "loss": 1.6262, "step": 193600 }, { "epoch": 0.83, "learning_rate": 4.002805390170712e-05, "loss": 1.6318, "step": 193700 }, { "epoch": 0.83, "learning_rate": 4.0022625515153885e-05, "loss": 1.6471, "step": 193800 }, { "epoch": 0.83, "learning_rate": 4.0017197128600654e-05, "loss": 1.5975, "step": 193900 }, { "epoch": 0.83, "learning_rate": 4.0011768742047416e-05, "loss": 1.5955, "step": 194000 }, { "epoch": 0.83, "learning_rate": 4.0006340355494185e-05, "loss": 1.6424, "step": 194100 }, { "epoch": 0.83, "learning_rate": 4.0000911968940946e-05, "loss": 1.6297, "step": 194200 }, { "epoch": 0.83, "learning_rate": 3.999548358238771e-05, "loss": 1.5919, "step": 194300 }, { "epoch": 0.84, "learning_rate": 3.999005519583448e-05, "loss": 1.6007, "step": 194400 }, { "epoch": 0.84, "learning_rate": 3.998462680928124e-05, "loss": 1.6243, "step": 194500 }, { "epoch": 0.84, "learning_rate": 3.9979198422728e-05, "loss": 1.632, "step": 194600 }, { "epoch": 0.84, "learning_rate": 3.997377003617477e-05, "loss": 1.6295, "step": 194700 }, { "epoch": 0.84, "learning_rate": 3.996834164962153e-05, "loss": 1.6243, "step": 194800 }, { "epoch": 0.84, "learning_rate": 3.99629132630683e-05, "loss": 1.6342, "step": 194900 }, { "epoch": 0.84, "learning_rate": 3.995748487651507e-05, "loss": 1.6255, "step": 195000 }, { "epoch": 0.84, "eval_loss": 1.4795633554458618, "eval_runtime": 19.0393, "eval_samples_per_second": 525.231, "eval_steps_per_second": 16.44, "step": 195000 }, { "epoch": 0.84, "learning_rate": 3.995205648996183e-05, "loss": 1.6733, "step": 195100 }, { "epoch": 0.84, "learning_rate": 3.994662810340859e-05, "loss": 1.5987, "step": 195200 }, { "epoch": 0.84, "learning_rate": 3.994119971685536e-05, "loss": 1.6215, "step": 195300 }, { "epoch": 0.84, "learning_rate": 3.993577133030212e-05, "loss": 1.6399, "step": 195400 }, { "epoch": 0.84, "learning_rate": 3.993034294374889e-05, "loss": 1.6229, "step": 195500 }, { "epoch": 0.84, "learning_rate": 3.992491455719565e-05, "loss": 1.639, "step": 195600 }, { "epoch": 0.84, "learning_rate": 3.9919486170642415e-05, "loss": 1.6256, "step": 195700 }, { "epoch": 0.84, "learning_rate": 3.9914057784089184e-05, "loss": 1.6476, "step": 195800 }, { "epoch": 0.84, "learning_rate": 3.9908629397535946e-05, "loss": 1.5958, "step": 195900 }, { "epoch": 0.84, "learning_rate": 3.9903201010982714e-05, "loss": 1.6397, "step": 196000 }, { "epoch": 0.84, "learning_rate": 3.989777262442948e-05, "loss": 1.6466, "step": 196100 }, { "epoch": 0.84, "learning_rate": 3.9892344237876245e-05, "loss": 1.6343, "step": 196200 }, { "epoch": 0.84, "learning_rate": 3.988691585132301e-05, "loss": 1.5967, "step": 196300 }, { "epoch": 0.84, "learning_rate": 3.9881487464769775e-05, "loss": 1.6249, "step": 196400 }, { "epoch": 0.84, "learning_rate": 3.987605907821654e-05, "loss": 1.6111, "step": 196500 }, { "epoch": 0.84, "learning_rate": 3.98706306916633e-05, "loss": 1.6295, "step": 196600 }, { "epoch": 0.85, "learning_rate": 3.986520230511007e-05, "loss": 1.6362, "step": 196700 }, { "epoch": 0.85, "learning_rate": 3.985977391855683e-05, "loss": 1.6522, "step": 196800 }, { "epoch": 0.85, "learning_rate": 3.98543455320036e-05, "loss": 1.6481, "step": 196900 }, { "epoch": 0.85, "learning_rate": 3.984891714545037e-05, "loss": 1.6194, "step": 197000 }, { "epoch": 0.85, "learning_rate": 3.984348875889713e-05, "loss": 1.6086, "step": 197100 }, { "epoch": 0.85, "learning_rate": 3.983806037234389e-05, "loss": 1.6251, "step": 197200 }, { "epoch": 0.85, "learning_rate": 3.983263198579066e-05, "loss": 1.6097, "step": 197300 }, { "epoch": 0.85, "learning_rate": 3.982720359923742e-05, "loss": 1.6268, "step": 197400 }, { "epoch": 0.85, "learning_rate": 3.982177521268419e-05, "loss": 1.6386, "step": 197500 }, { "epoch": 0.85, "learning_rate": 3.981634682613095e-05, "loss": 1.6207, "step": 197600 }, { "epoch": 0.85, "learning_rate": 3.9810918439577714e-05, "loss": 1.6248, "step": 197700 }, { "epoch": 0.85, "learning_rate": 3.980549005302448e-05, "loss": 1.6337, "step": 197800 }, { "epoch": 0.85, "learning_rate": 3.9800061666471244e-05, "loss": 1.6195, "step": 197900 }, { "epoch": 0.85, "learning_rate": 3.9794633279918006e-05, "loss": 1.6396, "step": 198000 }, { "epoch": 0.85, "learning_rate": 3.978920489336478e-05, "loss": 1.5919, "step": 198100 }, { "epoch": 0.85, "learning_rate": 3.978377650681154e-05, "loss": 1.6144, "step": 198200 }, { "epoch": 0.85, "learning_rate": 3.9778348120258305e-05, "loss": 1.6001, "step": 198300 }, { "epoch": 0.85, "learning_rate": 3.9772919733705074e-05, "loss": 1.6218, "step": 198400 }, { "epoch": 0.85, "learning_rate": 3.9767491347151836e-05, "loss": 1.6068, "step": 198500 }, { "epoch": 0.85, "learning_rate": 3.97620629605986e-05, "loss": 1.6075, "step": 198600 }, { "epoch": 0.85, "learning_rate": 3.9756634574045366e-05, "loss": 1.5883, "step": 198700 }, { "epoch": 0.85, "learning_rate": 3.975120618749213e-05, "loss": 1.5984, "step": 198800 }, { "epoch": 0.85, "learning_rate": 3.97457778009389e-05, "loss": 1.6311, "step": 198900 }, { "epoch": 0.85, "learning_rate": 3.974034941438566e-05, "loss": 1.6614, "step": 199000 }, { "epoch": 0.86, "learning_rate": 3.973492102783243e-05, "loss": 1.6541, "step": 199100 }, { "epoch": 0.86, "learning_rate": 3.972949264127919e-05, "loss": 1.6475, "step": 199200 }, { "epoch": 0.86, "learning_rate": 3.972406425472596e-05, "loss": 1.6138, "step": 199300 }, { "epoch": 0.86, "learning_rate": 3.971863586817272e-05, "loss": 1.6249, "step": 199400 }, { "epoch": 0.86, "learning_rate": 3.971320748161949e-05, "loss": 1.6141, "step": 199500 }, { "epoch": 0.86, "learning_rate": 3.970777909506625e-05, "loss": 1.6344, "step": 199600 }, { "epoch": 0.86, "learning_rate": 3.970235070851301e-05, "loss": 1.613, "step": 199700 }, { "epoch": 0.86, "learning_rate": 3.969692232195978e-05, "loss": 1.616, "step": 199800 }, { "epoch": 0.86, "learning_rate": 3.969149393540654e-05, "loss": 1.6162, "step": 199900 }, { "epoch": 0.86, "learning_rate": 3.9686065548853305e-05, "loss": 1.6455, "step": 200000 }, { "epoch": 0.86, "eval_loss": 1.4754244089126587, "eval_runtime": 18.9946, "eval_samples_per_second": 526.464, "eval_steps_per_second": 16.478, "step": 200000 }, { "epoch": 0.86, "learning_rate": 3.968063716230007e-05, "loss": 1.6311, "step": 200100 }, { "epoch": 0.86, "learning_rate": 3.967520877574684e-05, "loss": 1.6139, "step": 200200 }, { "epoch": 0.86, "learning_rate": 3.9669780389193604e-05, "loss": 1.601, "step": 200300 }, { "epoch": 0.86, "learning_rate": 3.966435200264037e-05, "loss": 1.6321, "step": 200400 }, { "epoch": 0.86, "learning_rate": 3.9658923616087134e-05, "loss": 1.6582, "step": 200500 }, { "epoch": 0.86, "learning_rate": 3.9653495229533896e-05, "loss": 1.6293, "step": 200600 }, { "epoch": 0.86, "learning_rate": 3.9648066842980665e-05, "loss": 1.6144, "step": 200700 }, { "epoch": 0.86, "learning_rate": 3.964263845642743e-05, "loss": 1.6154, "step": 200800 }, { "epoch": 0.86, "learning_rate": 3.9637210069874195e-05, "loss": 1.6168, "step": 200900 }, { "epoch": 0.86, "learning_rate": 3.963178168332096e-05, "loss": 1.6595, "step": 201000 }, { "epoch": 0.86, "learning_rate": 3.962635329676772e-05, "loss": 1.6051, "step": 201100 }, { "epoch": 0.86, "learning_rate": 3.962092491021449e-05, "loss": 1.6103, "step": 201200 }, { "epoch": 0.86, "learning_rate": 3.9615496523661256e-05, "loss": 1.6523, "step": 201300 }, { "epoch": 0.87, "learning_rate": 3.961006813710802e-05, "loss": 1.6216, "step": 201400 }, { "epoch": 0.87, "learning_rate": 3.960463975055479e-05, "loss": 1.6684, "step": 201500 }, { "epoch": 0.87, "learning_rate": 3.959921136400155e-05, "loss": 1.6098, "step": 201600 }, { "epoch": 0.87, "learning_rate": 3.959378297744831e-05, "loss": 1.6309, "step": 201700 }, { "epoch": 0.87, "learning_rate": 3.958835459089508e-05, "loss": 1.6329, "step": 201800 }, { "epoch": 0.87, "learning_rate": 3.958292620434184e-05, "loss": 1.6424, "step": 201900 }, { "epoch": 0.87, "learning_rate": 3.95774978177886e-05, "loss": 1.6526, "step": 202000 }, { "epoch": 0.87, "learning_rate": 3.957206943123537e-05, "loss": 1.6335, "step": 202100 }, { "epoch": 0.87, "learning_rate": 3.956664104468214e-05, "loss": 1.6238, "step": 202200 }, { "epoch": 0.87, "learning_rate": 3.95612126581289e-05, "loss": 1.6289, "step": 202300 }, { "epoch": 0.87, "learning_rate": 3.955578427157567e-05, "loss": 1.6357, "step": 202400 }, { "epoch": 0.87, "learning_rate": 3.955035588502243e-05, "loss": 1.6201, "step": 202500 }, { "epoch": 0.87, "learning_rate": 3.9544927498469195e-05, "loss": 1.6048, "step": 202600 }, { "epoch": 0.87, "learning_rate": 3.953949911191596e-05, "loss": 1.6136, "step": 202700 }, { "epoch": 0.87, "learning_rate": 3.9534070725362725e-05, "loss": 1.6227, "step": 202800 }, { "epoch": 0.87, "learning_rate": 3.9528642338809494e-05, "loss": 1.6221, "step": 202900 }, { "epoch": 0.87, "learning_rate": 3.9523213952256256e-05, "loss": 1.6349, "step": 203000 }, { "epoch": 0.87, "learning_rate": 3.951778556570302e-05, "loss": 1.6481, "step": 203100 }, { "epoch": 0.87, "learning_rate": 3.9512357179149786e-05, "loss": 1.614, "step": 203200 }, { "epoch": 0.87, "learning_rate": 3.9506928792596555e-05, "loss": 1.6304, "step": 203300 }, { "epoch": 0.87, "learning_rate": 3.950150040604332e-05, "loss": 1.6152, "step": 203400 }, { "epoch": 0.87, "learning_rate": 3.9496072019490085e-05, "loss": 1.6251, "step": 203500 }, { "epoch": 0.87, "learning_rate": 3.949064363293685e-05, "loss": 1.6263, "step": 203600 }, { "epoch": 0.88, "learning_rate": 3.948521524638361e-05, "loss": 1.6257, "step": 203700 }, { "epoch": 0.88, "learning_rate": 3.947978685983038e-05, "loss": 1.6283, "step": 203800 }, { "epoch": 0.88, "learning_rate": 3.947435847327714e-05, "loss": 1.6304, "step": 203900 }, { "epoch": 0.88, "learning_rate": 3.94689300867239e-05, "loss": 1.6152, "step": 204000 }, { "epoch": 0.88, "learning_rate": 3.946350170017067e-05, "loss": 1.6025, "step": 204100 }, { "epoch": 0.88, "learning_rate": 3.945807331361743e-05, "loss": 1.63, "step": 204200 }, { "epoch": 0.88, "learning_rate": 3.94526449270642e-05, "loss": 1.611, "step": 204300 }, { "epoch": 0.88, "learning_rate": 3.944721654051097e-05, "loss": 1.6312, "step": 204400 }, { "epoch": 0.88, "learning_rate": 3.944178815395773e-05, "loss": 1.6026, "step": 204500 }, { "epoch": 0.88, "learning_rate": 3.943635976740449e-05, "loss": 1.6058, "step": 204600 }, { "epoch": 0.88, "learning_rate": 3.943093138085126e-05, "loss": 1.644, "step": 204700 }, { "epoch": 0.88, "learning_rate": 3.9425502994298024e-05, "loss": 1.6163, "step": 204800 }, { "epoch": 0.88, "learning_rate": 3.942007460774479e-05, "loss": 1.6285, "step": 204900 }, { "epoch": 0.88, "learning_rate": 3.9414646221191554e-05, "loss": 1.653, "step": 205000 }, { "epoch": 0.88, "eval_loss": 1.4734227657318115, "eval_runtime": 19.0238, "eval_samples_per_second": 525.657, "eval_steps_per_second": 16.453, "step": 205000 }, { "epoch": 0.88, "learning_rate": 3.9409217834638316e-05, "loss": 1.6065, "step": 205100 }, { "epoch": 0.88, "learning_rate": 3.9403789448085085e-05, "loss": 1.6388, "step": 205200 }, { "epoch": 0.88, "learning_rate": 3.9398361061531847e-05, "loss": 1.6096, "step": 205300 }, { "epoch": 0.88, "learning_rate": 3.9392932674978615e-05, "loss": 1.6194, "step": 205400 }, { "epoch": 0.88, "learning_rate": 3.9387504288425384e-05, "loss": 1.6156, "step": 205500 }, { "epoch": 0.88, "learning_rate": 3.9382075901872146e-05, "loss": 1.6191, "step": 205600 }, { "epoch": 0.88, "learning_rate": 3.937664751531891e-05, "loss": 1.6216, "step": 205700 }, { "epoch": 0.88, "learning_rate": 3.9371219128765676e-05, "loss": 1.6099, "step": 205800 }, { "epoch": 0.88, "learning_rate": 3.936579074221244e-05, "loss": 1.6193, "step": 205900 }, { "epoch": 0.88, "learning_rate": 3.93603623556592e-05, "loss": 1.6379, "step": 206000 }, { "epoch": 0.89, "learning_rate": 3.935493396910597e-05, "loss": 1.5792, "step": 206100 }, { "epoch": 0.89, "learning_rate": 3.934950558255273e-05, "loss": 1.6132, "step": 206200 }, { "epoch": 0.89, "learning_rate": 3.934407719599949e-05, "loss": 1.598, "step": 206300 }, { "epoch": 0.89, "learning_rate": 3.933864880944627e-05, "loss": 1.656, "step": 206400 }, { "epoch": 0.89, "learning_rate": 3.933322042289303e-05, "loss": 1.6255, "step": 206500 }, { "epoch": 0.89, "learning_rate": 3.932779203633979e-05, "loss": 1.6058, "step": 206600 }, { "epoch": 0.89, "learning_rate": 3.932236364978656e-05, "loss": 1.6469, "step": 206700 }, { "epoch": 0.89, "learning_rate": 3.931693526323332e-05, "loss": 1.6283, "step": 206800 }, { "epoch": 0.89, "learning_rate": 3.931150687668009e-05, "loss": 1.6318, "step": 206900 }, { "epoch": 0.89, "learning_rate": 3.930607849012685e-05, "loss": 1.6243, "step": 207000 }, { "epoch": 0.89, "learning_rate": 3.9300650103573615e-05, "loss": 1.6538, "step": 207100 }, { "epoch": 0.89, "learning_rate": 3.929522171702038e-05, "loss": 1.6271, "step": 207200 }, { "epoch": 0.89, "learning_rate": 3.9289793330467145e-05, "loss": 1.6344, "step": 207300 }, { "epoch": 0.89, "learning_rate": 3.928436494391391e-05, "loss": 1.5985, "step": 207400 }, { "epoch": 0.89, "learning_rate": 3.927893655736068e-05, "loss": 1.5742, "step": 207500 }, { "epoch": 0.89, "learning_rate": 3.9273508170807444e-05, "loss": 1.5863, "step": 207600 }, { "epoch": 0.89, "learning_rate": 3.9268079784254206e-05, "loss": 1.6366, "step": 207700 }, { "epoch": 0.89, "learning_rate": 3.9262651397700975e-05, "loss": 1.5802, "step": 207800 }, { "epoch": 0.89, "learning_rate": 3.925722301114774e-05, "loss": 1.6049, "step": 207900 }, { "epoch": 0.89, "learning_rate": 3.92517946245945e-05, "loss": 1.6003, "step": 208000 }, { "epoch": 0.89, "learning_rate": 3.924636623804127e-05, "loss": 1.6243, "step": 208100 }, { "epoch": 0.89, "learning_rate": 3.924093785148803e-05, "loss": 1.617, "step": 208200 }, { "epoch": 0.89, "learning_rate": 3.923550946493479e-05, "loss": 1.6359, "step": 208300 }, { "epoch": 0.9, "learning_rate": 3.923008107838156e-05, "loss": 1.622, "step": 208400 }, { "epoch": 0.9, "learning_rate": 3.922465269182833e-05, "loss": 1.5969, "step": 208500 }, { "epoch": 0.9, "learning_rate": 3.921922430527509e-05, "loss": 1.6187, "step": 208600 }, { "epoch": 0.9, "learning_rate": 3.921379591872186e-05, "loss": 1.6107, "step": 208700 }, { "epoch": 0.9, "learning_rate": 3.920836753216862e-05, "loss": 1.6074, "step": 208800 }, { "epoch": 0.9, "learning_rate": 3.920293914561539e-05, "loss": 1.644, "step": 208900 }, { "epoch": 0.9, "learning_rate": 3.919751075906215e-05, "loss": 1.5991, "step": 209000 }, { "epoch": 0.9, "learning_rate": 3.919208237250891e-05, "loss": 1.5961, "step": 209100 }, { "epoch": 0.9, "learning_rate": 3.918665398595568e-05, "loss": 1.6598, "step": 209200 }, { "epoch": 0.9, "learning_rate": 3.9181225599402444e-05, "loss": 1.5988, "step": 209300 }, { "epoch": 0.9, "learning_rate": 3.9175797212849205e-05, "loss": 1.6194, "step": 209400 }, { "epoch": 0.9, "learning_rate": 3.9170368826295974e-05, "loss": 1.6176, "step": 209500 }, { "epoch": 0.9, "learning_rate": 3.916494043974274e-05, "loss": 1.6265, "step": 209600 }, { "epoch": 0.9, "learning_rate": 3.9159512053189505e-05, "loss": 1.5909, "step": 209700 }, { "epoch": 0.9, "learning_rate": 3.915408366663627e-05, "loss": 1.626, "step": 209800 }, { "epoch": 0.9, "learning_rate": 3.9148655280083035e-05, "loss": 1.5911, "step": 209900 }, { "epoch": 0.9, "learning_rate": 3.91432268935298e-05, "loss": 1.6366, "step": 210000 }, { "epoch": 0.9, "eval_loss": 1.4706979990005493, "eval_runtime": 19.0444, "eval_samples_per_second": 525.088, "eval_steps_per_second": 16.435, "step": 210000 }, { "epoch": 0.9, "learning_rate": 3.9137798506976566e-05, "loss": 1.6034, "step": 210100 }, { "epoch": 0.9, "learning_rate": 3.913237012042333e-05, "loss": 1.5996, "step": 210200 }, { "epoch": 0.9, "learning_rate": 3.912694173387009e-05, "loss": 1.6335, "step": 210300 }, { "epoch": 0.9, "learning_rate": 3.912151334731686e-05, "loss": 1.6271, "step": 210400 }, { "epoch": 0.9, "learning_rate": 3.911608496076362e-05, "loss": 1.6404, "step": 210500 }, { "epoch": 0.9, "learning_rate": 3.911065657421039e-05, "loss": 1.5918, "step": 210600 }, { "epoch": 0.91, "learning_rate": 3.910522818765716e-05, "loss": 1.6124, "step": 210700 }, { "epoch": 0.91, "learning_rate": 3.909979980110392e-05, "loss": 1.5966, "step": 210800 }, { "epoch": 0.91, "learning_rate": 3.909437141455069e-05, "loss": 1.6116, "step": 210900 }, { "epoch": 0.91, "learning_rate": 3.908894302799745e-05, "loss": 1.6332, "step": 211000 }, { "epoch": 0.91, "learning_rate": 3.908351464144421e-05, "loss": 1.6542, "step": 211100 }, { "epoch": 0.91, "learning_rate": 3.907808625489098e-05, "loss": 1.6089, "step": 211200 }, { "epoch": 0.91, "learning_rate": 3.907265786833774e-05, "loss": 1.6082, "step": 211300 }, { "epoch": 0.91, "learning_rate": 3.9067229481784504e-05, "loss": 1.5931, "step": 211400 }, { "epoch": 0.91, "learning_rate": 3.906180109523127e-05, "loss": 1.6417, "step": 211500 }, { "epoch": 0.91, "learning_rate": 3.9056372708678034e-05, "loss": 1.6244, "step": 211600 }, { "epoch": 0.91, "learning_rate": 3.90509443221248e-05, "loss": 1.5985, "step": 211700 }, { "epoch": 0.91, "learning_rate": 3.904551593557157e-05, "loss": 1.6141, "step": 211800 }, { "epoch": 0.91, "learning_rate": 3.9040087549018334e-05, "loss": 1.6266, "step": 211900 }, { "epoch": 0.91, "learning_rate": 3.9034659162465096e-05, "loss": 1.5968, "step": 212000 }, { "epoch": 0.91, "learning_rate": 3.9029285059777393e-05, "loss": 1.6204, "step": 212100 }, { "epoch": 0.91, "learning_rate": 3.9023856673224155e-05, "loss": 1.6088, "step": 212200 }, { "epoch": 0.91, "learning_rate": 3.9018428286670924e-05, "loss": 1.6306, "step": 212300 }, { "epoch": 0.91, "learning_rate": 3.901299990011769e-05, "loss": 1.652, "step": 212400 }, { "epoch": 0.91, "learning_rate": 3.9007625797429984e-05, "loss": 1.6179, "step": 212500 }, { "epoch": 0.91, "learning_rate": 3.900219741087675e-05, "loss": 1.6265, "step": 212600 }, { "epoch": 0.91, "learning_rate": 3.8996769024323514e-05, "loss": 1.6164, "step": 212700 }, { "epoch": 0.91, "learning_rate": 3.8991340637770276e-05, "loss": 1.6125, "step": 212800 }, { "epoch": 0.91, "learning_rate": 3.8985912251217045e-05, "loss": 1.6091, "step": 212900 }, { "epoch": 0.92, "learning_rate": 3.8980483864663813e-05, "loss": 1.5873, "step": 213000 }, { "epoch": 0.92, "learning_rate": 3.8975055478110575e-05, "loss": 1.5956, "step": 213100 }, { "epoch": 0.92, "learning_rate": 3.8969627091557344e-05, "loss": 1.6119, "step": 213200 }, { "epoch": 0.92, "learning_rate": 3.8964198705004106e-05, "loss": 1.6209, "step": 213300 }, { "epoch": 0.92, "learning_rate": 3.8958770318450874e-05, "loss": 1.6383, "step": 213400 }, { "epoch": 0.92, "learning_rate": 3.8953341931897636e-05, "loss": 1.6231, "step": 213500 }, { "epoch": 0.92, "learning_rate": 3.89479135453444e-05, "loss": 1.6086, "step": 213600 }, { "epoch": 0.92, "learning_rate": 3.894248515879117e-05, "loss": 1.6127, "step": 213700 }, { "epoch": 0.92, "learning_rate": 3.893705677223793e-05, "loss": 1.6427, "step": 213800 }, { "epoch": 0.92, "learning_rate": 3.893162838568469e-05, "loss": 1.619, "step": 213900 }, { "epoch": 0.92, "learning_rate": 3.892619999913146e-05, "loss": 1.6003, "step": 214000 }, { "epoch": 0.92, "learning_rate": 3.892077161257823e-05, "loss": 1.6161, "step": 214100 }, { "epoch": 0.92, "learning_rate": 3.891534322602499e-05, "loss": 1.6027, "step": 214200 }, { "epoch": 0.92, "learning_rate": 3.890991483947176e-05, "loss": 1.6419, "step": 214300 }, { "epoch": 0.92, "learning_rate": 3.890448645291852e-05, "loss": 1.6429, "step": 214400 }, { "epoch": 0.92, "learning_rate": 3.889905806636528e-05, "loss": 1.6234, "step": 214500 }, { "epoch": 0.92, "learning_rate": 3.889362967981205e-05, "loss": 1.617, "step": 214600 }, { "epoch": 0.92, "learning_rate": 3.888820129325881e-05, "loss": 1.6263, "step": 214700 }, { "epoch": 0.92, "learning_rate": 3.8882772906705575e-05, "loss": 1.6383, "step": 214800 }, { "epoch": 0.92, "learning_rate": 3.887734452015234e-05, "loss": 1.6223, "step": 214900 }, { "epoch": 0.92, "learning_rate": 3.8871916133599105e-05, "loss": 1.5949, "step": 215000 }, { "epoch": 0.92, "eval_loss": 1.4699814319610596, "eval_runtime": 19.0028, "eval_samples_per_second": 526.238, "eval_steps_per_second": 16.471, "step": 215000 }, { "epoch": 0.92, "learning_rate": 3.8866487747045874e-05, "loss": 1.616, "step": 215100 }, { "epoch": 0.92, "learning_rate": 3.886105936049264e-05, "loss": 1.6109, "step": 215200 }, { "epoch": 0.92, "learning_rate": 3.8855630973939404e-05, "loss": 1.6242, "step": 215300 }, { "epoch": 0.93, "learning_rate": 3.885020258738617e-05, "loss": 1.5882, "step": 215400 }, { "epoch": 0.93, "learning_rate": 3.8844774200832935e-05, "loss": 1.6162, "step": 215500 }, { "epoch": 0.93, "learning_rate": 3.88393458142797e-05, "loss": 1.6346, "step": 215600 }, { "epoch": 0.93, "learning_rate": 3.8833917427726465e-05, "loss": 1.6047, "step": 215700 }, { "epoch": 0.93, "learning_rate": 3.882848904117323e-05, "loss": 1.6118, "step": 215800 }, { "epoch": 0.93, "learning_rate": 3.882306065461999e-05, "loss": 1.5989, "step": 215900 }, { "epoch": 0.93, "learning_rate": 3.881763226806676e-05, "loss": 1.6242, "step": 216000 }, { "epoch": 0.93, "learning_rate": 3.8812258165379056e-05, "loss": 1.6333, "step": 216100 }, { "epoch": 0.93, "learning_rate": 3.880682977882582e-05, "loss": 1.5985, "step": 216200 }, { "epoch": 0.93, "learning_rate": 3.8801401392272586e-05, "loss": 1.5896, "step": 216300 }, { "epoch": 0.93, "learning_rate": 3.879597300571935e-05, "loss": 1.5957, "step": 216400 }, { "epoch": 0.93, "learning_rate": 3.879054461916612e-05, "loss": 1.5785, "step": 216500 }, { "epoch": 0.93, "learning_rate": 3.878511623261288e-05, "loss": 1.6345, "step": 216600 }, { "epoch": 0.93, "learning_rate": 3.877968784605964e-05, "loss": 1.5993, "step": 216700 }, { "epoch": 0.93, "learning_rate": 3.877425945950641e-05, "loss": 1.6124, "step": 216800 }, { "epoch": 0.93, "learning_rate": 3.876883107295318e-05, "loss": 1.6128, "step": 216900 }, { "epoch": 0.93, "learning_rate": 3.876340268639994e-05, "loss": 1.5962, "step": 217000 }, { "epoch": 0.93, "learning_rate": 3.875797429984671e-05, "loss": 1.5918, "step": 217100 }, { "epoch": 0.93, "learning_rate": 3.875254591329347e-05, "loss": 1.5932, "step": 217200 }, { "epoch": 0.93, "learning_rate": 3.874711752674023e-05, "loss": 1.5982, "step": 217300 }, { "epoch": 0.93, "learning_rate": 3.8741689140187e-05, "loss": 1.6113, "step": 217400 }, { "epoch": 0.93, "learning_rate": 3.873626075363376e-05, "loss": 1.6173, "step": 217500 }, { "epoch": 0.93, "learning_rate": 3.8730832367080524e-05, "loss": 1.6258, "step": 217600 }, { "epoch": 0.94, "learning_rate": 3.872540398052729e-05, "loss": 1.6248, "step": 217700 }, { "epoch": 0.94, "learning_rate": 3.8719975593974055e-05, "loss": 1.608, "step": 217800 }, { "epoch": 0.94, "learning_rate": 3.8714547207420824e-05, "loss": 1.6077, "step": 217900 }, { "epoch": 0.94, "learning_rate": 3.870911882086759e-05, "loss": 1.6182, "step": 218000 }, { "epoch": 0.94, "learning_rate": 3.8703690434314354e-05, "loss": 1.5938, "step": 218100 }, { "epoch": 0.94, "learning_rate": 3.8698262047761116e-05, "loss": 1.5984, "step": 218200 }, { "epoch": 0.94, "learning_rate": 3.8692833661207885e-05, "loss": 1.6177, "step": 218300 }, { "epoch": 0.94, "learning_rate": 3.8687405274654647e-05, "loss": 1.6058, "step": 218400 }, { "epoch": 0.94, "learning_rate": 3.8681976888101415e-05, "loss": 1.5896, "step": 218500 }, { "epoch": 0.94, "learning_rate": 3.867654850154818e-05, "loss": 1.5958, "step": 218600 }, { "epoch": 0.94, "learning_rate": 3.867112011499494e-05, "loss": 1.6201, "step": 218700 }, { "epoch": 0.94, "learning_rate": 3.866569172844171e-05, "loss": 1.6015, "step": 218800 }, { "epoch": 0.94, "learning_rate": 3.866026334188847e-05, "loss": 1.6328, "step": 218900 }, { "epoch": 0.94, "learning_rate": 3.865483495533524e-05, "loss": 1.6127, "step": 219000 }, { "epoch": 0.94, "learning_rate": 3.864940656878201e-05, "loss": 1.6269, "step": 219100 }, { "epoch": 0.94, "learning_rate": 3.864397818222877e-05, "loss": 1.6029, "step": 219200 }, { "epoch": 0.94, "learning_rate": 3.863854979567553e-05, "loss": 1.6364, "step": 219300 }, { "epoch": 0.94, "learning_rate": 3.86331214091223e-05, "loss": 1.6451, "step": 219400 }, { "epoch": 0.94, "learning_rate": 3.862769302256906e-05, "loss": 1.5971, "step": 219500 }, { "epoch": 0.94, "learning_rate": 3.862226463601582e-05, "loss": 1.6272, "step": 219600 }, { "epoch": 0.94, "learning_rate": 3.861683624946259e-05, "loss": 1.5922, "step": 219700 }, { "epoch": 0.94, "learning_rate": 3.8611407862909353e-05, "loss": 1.6224, "step": 219800 }, { "epoch": 0.94, "learning_rate": 3.860597947635612e-05, "loss": 1.6, "step": 219900 }, { "epoch": 0.95, "learning_rate": 3.860055108980289e-05, "loss": 1.5939, "step": 220000 }, { "epoch": 0.95, "eval_loss": 1.468553066253662, "eval_runtime": 19.0907, "eval_samples_per_second": 523.816, "eval_steps_per_second": 16.395, "step": 220000 }, { "epoch": 0.95, "learning_rate": 3.859517698711518e-05, "loss": 1.5755, "step": 220100 }, { "epoch": 0.95, "learning_rate": 3.858974860056195e-05, "loss": 1.6354, "step": 220200 }, { "epoch": 0.95, "learning_rate": 3.858432021400871e-05, "loss": 1.6106, "step": 220300 }, { "epoch": 0.95, "learning_rate": 3.8578891827455474e-05, "loss": 1.6072, "step": 220400 }, { "epoch": 0.95, "learning_rate": 3.857351772476778e-05, "loss": 1.6048, "step": 220500 }, { "epoch": 0.95, "learning_rate": 3.856808933821454e-05, "loss": 1.6102, "step": 220600 }, { "epoch": 0.95, "learning_rate": 3.85626609516613e-05, "loss": 1.5699, "step": 220700 }, { "epoch": 0.95, "learning_rate": 3.855723256510807e-05, "loss": 1.6345, "step": 220800 }, { "epoch": 0.95, "learning_rate": 3.855180417855483e-05, "loss": 1.5963, "step": 220900 }, { "epoch": 0.95, "learning_rate": 3.85463757920016e-05, "loss": 1.6111, "step": 221000 }, { "epoch": 0.95, "learning_rate": 3.8540947405448364e-05, "loss": 1.6211, "step": 221100 }, { "epoch": 0.95, "learning_rate": 3.8535519018895126e-05, "loss": 1.6116, "step": 221200 }, { "epoch": 0.95, "learning_rate": 3.8530090632341894e-05, "loss": 1.5874, "step": 221300 }, { "epoch": 0.95, "learning_rate": 3.852466224578866e-05, "loss": 1.5935, "step": 221400 }, { "epoch": 0.95, "learning_rate": 3.8519233859235425e-05, "loss": 1.6263, "step": 221500 }, { "epoch": 0.95, "learning_rate": 3.8513805472682194e-05, "loss": 1.6139, "step": 221600 }, { "epoch": 0.95, "learning_rate": 3.8508377086128955e-05, "loss": 1.5891, "step": 221700 }, { "epoch": 0.95, "learning_rate": 3.850294869957572e-05, "loss": 1.6065, "step": 221800 }, { "epoch": 0.95, "learning_rate": 3.8497520313022486e-05, "loss": 1.6147, "step": 221900 }, { "epoch": 0.95, "learning_rate": 3.849209192646925e-05, "loss": 1.586, "step": 222000 }, { "epoch": 0.95, "learning_rate": 3.848666353991601e-05, "loss": 1.6067, "step": 222100 }, { "epoch": 0.95, "learning_rate": 3.8481289437228314e-05, "loss": 1.6126, "step": 222200 }, { "epoch": 0.96, "learning_rate": 3.8475861050675076e-05, "loss": 1.6568, "step": 222300 }, { "epoch": 0.96, "learning_rate": 3.8470432664121845e-05, "loss": 1.6035, "step": 222400 }, { "epoch": 0.96, "learning_rate": 3.846500427756861e-05, "loss": 1.601, "step": 222500 }, { "epoch": 0.96, "learning_rate": 3.845957589101537e-05, "loss": 1.5908, "step": 222600 }, { "epoch": 0.96, "learning_rate": 3.845414750446214e-05, "loss": 1.6119, "step": 222700 }, { "epoch": 0.96, "learning_rate": 3.84487191179089e-05, "loss": 1.6209, "step": 222800 }, { "epoch": 0.96, "learning_rate": 3.844329073135566e-05, "loss": 1.6223, "step": 222900 }, { "epoch": 0.96, "learning_rate": 3.843786234480243e-05, "loss": 1.6028, "step": 223000 }, { "epoch": 0.96, "learning_rate": 3.84324339582492e-05, "loss": 1.5922, "step": 223100 }, { "epoch": 0.96, "learning_rate": 3.842700557169596e-05, "loss": 1.6253, "step": 223200 }, { "epoch": 0.96, "learning_rate": 3.842157718514273e-05, "loss": 1.586, "step": 223300 }, { "epoch": 0.96, "learning_rate": 3.841614879858949e-05, "loss": 1.6246, "step": 223400 }, { "epoch": 0.96, "learning_rate": 3.841072041203625e-05, "loss": 1.5854, "step": 223500 }, { "epoch": 0.96, "learning_rate": 3.840529202548302e-05, "loss": 1.6129, "step": 223600 }, { "epoch": 0.96, "learning_rate": 3.839986363892978e-05, "loss": 1.6101, "step": 223700 }, { "epoch": 0.96, "learning_rate": 3.8394435252376545e-05, "loss": 1.5877, "step": 223800 }, { "epoch": 0.96, "learning_rate": 3.8389006865823314e-05, "loss": 1.6158, "step": 223900 }, { "epoch": 0.96, "learning_rate": 3.8383578479270076e-05, "loss": 1.6209, "step": 224000 }, { "epoch": 0.96, "learning_rate": 3.8378150092716844e-05, "loss": 1.614, "step": 224100 }, { "epoch": 0.96, "learning_rate": 3.837272170616361e-05, "loss": 1.614, "step": 224200 }, { "epoch": 0.96, "learning_rate": 3.8367293319610375e-05, "loss": 1.5698, "step": 224300 }, { "epoch": 0.96, "learning_rate": 3.8361864933057143e-05, "loss": 1.6134, "step": 224400 }, { "epoch": 0.96, "learning_rate": 3.8356436546503905e-05, "loss": 1.6126, "step": 224500 }, { "epoch": 0.96, "learning_rate": 3.835100815995067e-05, "loss": 1.6149, "step": 224600 }, { "epoch": 0.97, "learning_rate": 3.8345579773397436e-05, "loss": 1.5913, "step": 224700 }, { "epoch": 0.97, "learning_rate": 3.83401513868442e-05, "loss": 1.6153, "step": 224800 }, { "epoch": 0.97, "learning_rate": 3.833472300029096e-05, "loss": 1.6078, "step": 224900 }, { "epoch": 0.97, "learning_rate": 3.832929461373773e-05, "loss": 1.6227, "step": 225000 }, { "epoch": 0.97, "eval_loss": 1.4662607908248901, "eval_runtime": 19.0457, "eval_samples_per_second": 525.054, "eval_steps_per_second": 16.434, "step": 225000 }, { "epoch": 0.97, "learning_rate": 3.832386622718449e-05, "loss": 1.6016, "step": 225100 }, { "epoch": 0.97, "learning_rate": 3.831843784063126e-05, "loss": 1.6083, "step": 225200 }, { "epoch": 0.97, "learning_rate": 3.831300945407803e-05, "loss": 1.6064, "step": 225300 }, { "epoch": 0.97, "learning_rate": 3.830758106752479e-05, "loss": 1.5936, "step": 225400 }, { "epoch": 0.97, "learning_rate": 3.830215268097155e-05, "loss": 1.6125, "step": 225500 }, { "epoch": 0.97, "learning_rate": 3.829672429441832e-05, "loss": 1.6469, "step": 225600 }, { "epoch": 0.97, "learning_rate": 3.829129590786508e-05, "loss": 1.644, "step": 225700 }, { "epoch": 0.97, "learning_rate": 3.8285867521311844e-05, "loss": 1.5836, "step": 225800 }, { "epoch": 0.97, "learning_rate": 3.828043913475861e-05, "loss": 1.6373, "step": 225900 }, { "epoch": 0.97, "learning_rate": 3.8275010748205374e-05, "loss": 1.6034, "step": 226000 }, { "epoch": 0.97, "learning_rate": 3.826958236165214e-05, "loss": 1.6096, "step": 226100 }, { "epoch": 0.97, "learning_rate": 3.826415397509891e-05, "loss": 1.5963, "step": 226200 }, { "epoch": 0.97, "learning_rate": 3.825872558854567e-05, "loss": 1.597, "step": 226300 }, { "epoch": 0.97, "learning_rate": 3.8253297201992435e-05, "loss": 1.582, "step": 226400 }, { "epoch": 0.97, "learning_rate": 3.8247868815439204e-05, "loss": 1.5919, "step": 226500 }, { "epoch": 0.97, "learning_rate": 3.8242440428885966e-05, "loss": 1.6027, "step": 226600 }, { "epoch": 0.97, "learning_rate": 3.8237012042332734e-05, "loss": 1.6298, "step": 226700 }, { "epoch": 0.97, "learning_rate": 3.8231583655779496e-05, "loss": 1.6083, "step": 226800 }, { "epoch": 0.97, "learning_rate": 3.822615526922626e-05, "loss": 1.5936, "step": 226900 }, { "epoch": 0.98, "learning_rate": 3.822072688267303e-05, "loss": 1.6154, "step": 227000 }, { "epoch": 0.98, "learning_rate": 3.821529849611979e-05, "loss": 1.6154, "step": 227100 }, { "epoch": 0.98, "learning_rate": 3.820987010956655e-05, "loss": 1.6051, "step": 227200 }, { "epoch": 0.98, "learning_rate": 3.8204441723013326e-05, "loss": 1.625, "step": 227300 }, { "epoch": 0.98, "learning_rate": 3.819901333646009e-05, "loss": 1.6175, "step": 227400 }, { "epoch": 0.98, "learning_rate": 3.819358494990685e-05, "loss": 1.6182, "step": 227500 }, { "epoch": 0.98, "learning_rate": 3.818815656335362e-05, "loss": 1.5886, "step": 227600 }, { "epoch": 0.98, "learning_rate": 3.818272817680038e-05, "loss": 1.6225, "step": 227700 }, { "epoch": 0.98, "learning_rate": 3.817729979024714e-05, "loss": 1.6206, "step": 227800 }, { "epoch": 0.98, "learning_rate": 3.817187140369391e-05, "loss": 1.5953, "step": 227900 }, { "epoch": 0.98, "learning_rate": 3.816644301714067e-05, "loss": 1.6174, "step": 228000 }, { "epoch": 0.98, "learning_rate": 3.816101463058744e-05, "loss": 1.6002, "step": 228100 }, { "epoch": 0.98, "learning_rate": 3.81555862440342e-05, "loss": 1.6052, "step": 228200 }, { "epoch": 0.98, "learning_rate": 3.815015785748097e-05, "loss": 1.6123, "step": 228300 }, { "epoch": 0.98, "learning_rate": 3.8144729470927734e-05, "loss": 1.6096, "step": 228400 }, { "epoch": 0.98, "learning_rate": 3.81393010843745e-05, "loss": 1.6018, "step": 228500 }, { "epoch": 0.98, "learning_rate": 3.8133872697821264e-05, "loss": 1.5998, "step": 228600 }, { "epoch": 0.98, "learning_rate": 3.812844431126803e-05, "loss": 1.5882, "step": 228700 }, { "epoch": 0.98, "learning_rate": 3.8123015924714795e-05, "loss": 1.6218, "step": 228800 }, { "epoch": 0.98, "learning_rate": 3.8117587538161557e-05, "loss": 1.6005, "step": 228900 }, { "epoch": 0.98, "learning_rate": 3.8112159151608325e-05, "loss": 1.587, "step": 229000 }, { "epoch": 0.98, "learning_rate": 3.810673076505509e-05, "loss": 1.5893, "step": 229100 }, { "epoch": 0.98, "learning_rate": 3.810130237850185e-05, "loss": 1.5836, "step": 229200 }, { "epoch": 0.99, "learning_rate": 3.809587399194862e-05, "loss": 1.595, "step": 229300 }, { "epoch": 0.99, "learning_rate": 3.8090445605395386e-05, "loss": 1.6032, "step": 229400 }, { "epoch": 0.99, "learning_rate": 3.808501721884215e-05, "loss": 1.5978, "step": 229500 }, { "epoch": 0.99, "learning_rate": 3.807958883228892e-05, "loss": 1.5997, "step": 229600 }, { "epoch": 0.99, "learning_rate": 3.807416044573568e-05, "loss": 1.5946, "step": 229700 }, { "epoch": 0.99, "learning_rate": 3.806873205918244e-05, "loss": 1.5957, "step": 229800 }, { "epoch": 0.99, "learning_rate": 3.806330367262921e-05, "loss": 1.5728, "step": 229900 }, { "epoch": 0.99, "learning_rate": 3.805787528607597e-05, "loss": 1.6266, "step": 230000 }, { "epoch": 0.99, "eval_loss": 1.4668673276901245, "eval_runtime": 17.8239, "eval_samples_per_second": 561.046, "eval_steps_per_second": 17.561, "step": 230000 }, { "epoch": 0.99, "learning_rate": 3.805244689952274e-05, "loss": 1.6237, "step": 230100 }, { "epoch": 0.99, "learning_rate": 3.80470185129695e-05, "loss": 1.5906, "step": 230200 }, { "epoch": 0.99, "learning_rate": 3.8041590126416263e-05, "loss": 1.5936, "step": 230300 }, { "epoch": 0.99, "learning_rate": 3.803616173986303e-05, "loss": 1.6093, "step": 230400 }, { "epoch": 0.99, "learning_rate": 3.80307333533098e-05, "loss": 1.6156, "step": 230500 }, { "epoch": 0.99, "learning_rate": 3.802530496675656e-05, "loss": 1.5972, "step": 230600 }, { "epoch": 0.99, "learning_rate": 3.801987658020333e-05, "loss": 1.6244, "step": 230700 }, { "epoch": 0.99, "learning_rate": 3.801444819365009e-05, "loss": 1.5729, "step": 230800 }, { "epoch": 0.99, "learning_rate": 3.8009019807096855e-05, "loss": 1.5968, "step": 230900 }, { "epoch": 0.99, "learning_rate": 3.8003591420543624e-05, "loss": 1.6167, "step": 231000 }, { "epoch": 0.99, "learning_rate": 3.7998163033990386e-05, "loss": 1.6071, "step": 231100 }, { "epoch": 0.99, "learning_rate": 3.799273464743715e-05, "loss": 1.5794, "step": 231200 }, { "epoch": 0.99, "learning_rate": 3.7987306260883916e-05, "loss": 1.6213, "step": 231300 }, { "epoch": 0.99, "learning_rate": 3.798187787433068e-05, "loss": 1.6047, "step": 231400 }, { "epoch": 0.99, "learning_rate": 3.7976449487777447e-05, "loss": 1.6175, "step": 231500 }, { "epoch": 0.99, "learning_rate": 3.7971021101224215e-05, "loss": 1.618, "step": 231600 }, { "epoch": 1.0, "learning_rate": 3.796559271467098e-05, "loss": 1.5891, "step": 231700 }, { "epoch": 1.0, "learning_rate": 3.796016432811774e-05, "loss": 1.5905, "step": 231800 }, { "epoch": 1.0, "learning_rate": 3.795473594156451e-05, "loss": 1.5845, "step": 231900 }, { "epoch": 1.0, "learning_rate": 3.794930755501127e-05, "loss": 1.5899, "step": 232000 }, { "epoch": 1.0, "learning_rate": 3.794387916845804e-05, "loss": 1.5894, "step": 232100 }, { "epoch": 1.0, "learning_rate": 3.79384507819048e-05, "loss": 1.6205, "step": 232200 }, { "epoch": 1.0, "learning_rate": 3.793302239535156e-05, "loss": 1.5876, "step": 232300 }, { "epoch": 1.0, "learning_rate": 3.792759400879833e-05, "loss": 1.5929, "step": 232400 }, { "epoch": 1.0, "learning_rate": 3.79221656222451e-05, "loss": 1.6052, "step": 232500 }, { "epoch": 1.0, "learning_rate": 3.791673723569186e-05, "loss": 1.5764, "step": 232600 }, { "epoch": 1.0, "learning_rate": 3.791130884913863e-05, "loss": 1.6319, "step": 232700 }, { "epoch": 1.0, "learning_rate": 3.790588046258539e-05, "loss": 1.6024, "step": 232800 }, { "epoch": 1.0, "learning_rate": 3.7900452076032154e-05, "loss": 1.5373, "step": 232900 }, { "epoch": 1.0, "learning_rate": 3.789502368947892e-05, "loss": 1.5602, "step": 233000 }, { "epoch": 1.0, "learning_rate": 3.7889595302925684e-05, "loss": 1.562, "step": 233100 }, { "epoch": 1.0, "learning_rate": 3.7884166916372446e-05, "loss": 1.5622, "step": 233200 }, { "epoch": 1.0, "learning_rate": 3.7878738529819215e-05, "loss": 1.5711, "step": 233300 }, { "epoch": 1.0, "learning_rate": 3.7873310143265976e-05, "loss": 1.5624, "step": 233400 }, { "epoch": 1.0, "learning_rate": 3.7867881756712745e-05, "loss": 1.622, "step": 233500 }, { "epoch": 1.0, "learning_rate": 3.7862453370159514e-05, "loss": 1.5584, "step": 233600 }, { "epoch": 1.0, "learning_rate": 3.7857024983606276e-05, "loss": 1.5626, "step": 233700 }, { "epoch": 1.0, "learning_rate": 3.785159659705304e-05, "loss": 1.5756, "step": 233800 }, { "epoch": 1.0, "learning_rate": 3.7846168210499806e-05, "loss": 1.6049, "step": 233900 }, { "epoch": 1.01, "learning_rate": 3.784073982394657e-05, "loss": 1.5274, "step": 234000 }, { "epoch": 1.01, "learning_rate": 3.783531143739334e-05, "loss": 1.5675, "step": 234100 }, { "epoch": 1.01, "learning_rate": 3.78298830508401e-05, "loss": 1.5812, "step": 234200 }, { "epoch": 1.01, "learning_rate": 3.782445466428686e-05, "loss": 1.5798, "step": 234300 }, { "epoch": 1.01, "learning_rate": 3.781902627773363e-05, "loss": 1.5817, "step": 234400 }, { "epoch": 1.01, "learning_rate": 3.781359789118039e-05, "loss": 1.5589, "step": 234500 }, { "epoch": 1.01, "learning_rate": 3.780816950462716e-05, "loss": 1.5635, "step": 234600 }, { "epoch": 1.01, "learning_rate": 3.780274111807393e-05, "loss": 1.5495, "step": 234700 }, { "epoch": 1.01, "learning_rate": 3.779731273152069e-05, "loss": 1.5788, "step": 234800 }, { "epoch": 1.01, "learning_rate": 3.779188434496745e-05, "loss": 1.5325, "step": 234900 }, { "epoch": 1.01, "learning_rate": 3.778645595841422e-05, "loss": 1.582, "step": 235000 }, { "epoch": 1.01, "eval_loss": 1.4644287824630737, "eval_runtime": 17.7873, "eval_samples_per_second": 562.198, "eval_steps_per_second": 17.597, "step": 235000 }, { "epoch": 1.01, "learning_rate": 3.778102757186098e-05, "loss": 1.5531, "step": 235100 }, { "epoch": 1.01, "learning_rate": 3.7775599185307744e-05, "loss": 1.5584, "step": 235200 }, { "epoch": 1.01, "learning_rate": 3.777017079875451e-05, "loss": 1.5617, "step": 235300 }, { "epoch": 1.01, "learning_rate": 3.7764742412201275e-05, "loss": 1.5662, "step": 235400 }, { "epoch": 1.01, "learning_rate": 3.7759314025648044e-05, "loss": 1.5365, "step": 235500 }, { "epoch": 1.01, "learning_rate": 3.7753885639094805e-05, "loss": 1.5919, "step": 235600 }, { "epoch": 1.01, "learning_rate": 3.7748457252541574e-05, "loss": 1.5485, "step": 235700 }, { "epoch": 1.01, "learning_rate": 3.7743028865988336e-05, "loss": 1.5795, "step": 235800 }, { "epoch": 1.01, "learning_rate": 3.7737600479435105e-05, "loss": 1.5378, "step": 235900 }, { "epoch": 1.01, "learning_rate": 3.7732172092881866e-05, "loss": 1.5702, "step": 236000 }, { "epoch": 1.01, "learning_rate": 3.7726743706328635e-05, "loss": 1.586, "step": 236100 }, { "epoch": 1.01, "learning_rate": 3.77213153197754e-05, "loss": 1.568, "step": 236200 }, { "epoch": 1.02, "learning_rate": 3.771588693322216e-05, "loss": 1.5939, "step": 236300 }, { "epoch": 1.02, "learning_rate": 3.771045854666893e-05, "loss": 1.5841, "step": 236400 }, { "epoch": 1.02, "learning_rate": 3.770503016011569e-05, "loss": 1.5841, "step": 236500 }, { "epoch": 1.02, "learning_rate": 3.769960177356245e-05, "loss": 1.543, "step": 236600 }, { "epoch": 1.02, "learning_rate": 3.769417338700923e-05, "loss": 1.5428, "step": 236700 }, { "epoch": 1.02, "learning_rate": 3.768874500045599e-05, "loss": 1.5519, "step": 236800 }, { "epoch": 1.02, "learning_rate": 3.768331661390275e-05, "loss": 1.5362, "step": 236900 }, { "epoch": 1.02, "learning_rate": 3.767788822734952e-05, "loss": 1.543, "step": 237000 }, { "epoch": 1.02, "learning_rate": 3.767245984079628e-05, "loss": 1.5799, "step": 237100 }, { "epoch": 1.02, "learning_rate": 3.766703145424304e-05, "loss": 1.575, "step": 237200 }, { "epoch": 1.02, "learning_rate": 3.766160306768981e-05, "loss": 1.6244, "step": 237300 }, { "epoch": 1.02, "learning_rate": 3.7656174681136573e-05, "loss": 1.5605, "step": 237400 }, { "epoch": 1.02, "learning_rate": 3.765074629458334e-05, "loss": 1.5672, "step": 237500 }, { "epoch": 1.02, "learning_rate": 3.7645317908030104e-05, "loss": 1.5475, "step": 237600 }, { "epoch": 1.02, "learning_rate": 3.7639889521476866e-05, "loss": 1.5619, "step": 237700 }, { "epoch": 1.02, "learning_rate": 3.7634461134923634e-05, "loss": 1.563, "step": 237800 }, { "epoch": 1.02, "learning_rate": 3.76290327483704e-05, "loss": 1.5412, "step": 237900 }, { "epoch": 1.02, "learning_rate": 3.7623604361817165e-05, "loss": 1.5627, "step": 238000 }, { "epoch": 1.02, "learning_rate": 3.7618175975263934e-05, "loss": 1.5718, "step": 238100 }, { "epoch": 1.02, "learning_rate": 3.7612747588710696e-05, "loss": 1.5722, "step": 238200 }, { "epoch": 1.02, "learning_rate": 3.760731920215746e-05, "loss": 1.5315, "step": 238300 }, { "epoch": 1.02, "learning_rate": 3.7601890815604226e-05, "loss": 1.5888, "step": 238400 }, { "epoch": 1.02, "learning_rate": 3.759646242905099e-05, "loss": 1.5922, "step": 238500 }, { "epoch": 1.03, "learning_rate": 3.759103404249775e-05, "loss": 1.5701, "step": 238600 }, { "epoch": 1.03, "learning_rate": 3.758560565594452e-05, "loss": 1.543, "step": 238700 }, { "epoch": 1.03, "learning_rate": 3.758017726939129e-05, "loss": 1.5407, "step": 238800 }, { "epoch": 1.03, "learning_rate": 3.757474888283805e-05, "loss": 1.5368, "step": 238900 }, { "epoch": 1.03, "learning_rate": 3.756932049628482e-05, "loss": 1.5643, "step": 239000 }, { "epoch": 1.03, "learning_rate": 3.756389210973158e-05, "loss": 1.5682, "step": 239100 }, { "epoch": 1.03, "learning_rate": 3.755846372317834e-05, "loss": 1.6079, "step": 239200 }, { "epoch": 1.03, "learning_rate": 3.755303533662511e-05, "loss": 1.5494, "step": 239300 }, { "epoch": 1.03, "learning_rate": 3.754760695007187e-05, "loss": 1.5696, "step": 239400 }, { "epoch": 1.03, "learning_rate": 3.754217856351864e-05, "loss": 1.589, "step": 239500 }, { "epoch": 1.03, "learning_rate": 3.75367501769654e-05, "loss": 1.5783, "step": 239600 }, { "epoch": 1.03, "learning_rate": 3.7531321790412164e-05, "loss": 1.5656, "step": 239700 }, { "epoch": 1.03, "learning_rate": 3.752589340385893e-05, "loss": 1.5555, "step": 239800 }, { "epoch": 1.03, "learning_rate": 3.75204650173057e-05, "loss": 1.5458, "step": 239900 }, { "epoch": 1.03, "learning_rate": 3.7515036630752463e-05, "loss": 1.554, "step": 240000 }, { "epoch": 1.03, "eval_loss": 1.4660414457321167, "eval_runtime": 17.7415, "eval_samples_per_second": 563.65, "eval_steps_per_second": 17.642, "step": 240000 }, { "epoch": 1.03, "learning_rate": 3.750960824419923e-05, "loss": 1.5507, "step": 240100 }, { "epoch": 1.03, "learning_rate": 3.7504179857645994e-05, "loss": 1.576, "step": 240200 }, { "epoch": 1.03, "learning_rate": 3.7498751471092756e-05, "loss": 1.5729, "step": 240300 }, { "epoch": 1.03, "learning_rate": 3.7493323084539525e-05, "loss": 1.5668, "step": 240400 }, { "epoch": 1.03, "learning_rate": 3.7487894697986286e-05, "loss": 1.5422, "step": 240500 }, { "epoch": 1.03, "learning_rate": 3.748246631143305e-05, "loss": 1.5584, "step": 240600 }, { "epoch": 1.03, "learning_rate": 3.747703792487982e-05, "loss": 1.5586, "step": 240700 }, { "epoch": 1.03, "learning_rate": 3.747160953832658e-05, "loss": 1.581, "step": 240800 }, { "epoch": 1.03, "learning_rate": 3.746618115177335e-05, "loss": 1.5551, "step": 240900 }, { "epoch": 1.04, "learning_rate": 3.7460752765220116e-05, "loss": 1.5722, "step": 241000 }, { "epoch": 1.04, "learning_rate": 3.745532437866688e-05, "loss": 1.549, "step": 241100 }, { "epoch": 1.04, "learning_rate": 3.744989599211364e-05, "loss": 1.5682, "step": 241200 }, { "epoch": 1.04, "learning_rate": 3.744446760556041e-05, "loss": 1.5629, "step": 241300 }, { "epoch": 1.04, "learning_rate": 3.743903921900717e-05, "loss": 1.558, "step": 241400 }, { "epoch": 1.04, "learning_rate": 3.743361083245394e-05, "loss": 1.5554, "step": 241500 }, { "epoch": 1.04, "learning_rate": 3.74281824459007e-05, "loss": 1.5644, "step": 241600 }, { "epoch": 1.04, "learning_rate": 3.742275405934746e-05, "loss": 1.5892, "step": 241700 }, { "epoch": 1.04, "learning_rate": 3.741732567279423e-05, "loss": 1.5873, "step": 241800 }, { "epoch": 1.04, "learning_rate": 3.741189728624099e-05, "loss": 1.5837, "step": 241900 }, { "epoch": 1.04, "learning_rate": 3.740646889968776e-05, "loss": 1.5499, "step": 242000 }, { "epoch": 1.04, "learning_rate": 3.740104051313453e-05, "loss": 1.5734, "step": 242100 }, { "epoch": 1.04, "learning_rate": 3.739561212658129e-05, "loss": 1.5728, "step": 242200 }, { "epoch": 1.04, "learning_rate": 3.7390183740028054e-05, "loss": 1.5915, "step": 242300 }, { "epoch": 1.04, "learning_rate": 3.738475535347482e-05, "loss": 1.5816, "step": 242400 }, { "epoch": 1.04, "learning_rate": 3.7379326966921585e-05, "loss": 1.5696, "step": 242500 }, { "epoch": 1.04, "learning_rate": 3.737389858036835e-05, "loss": 1.5796, "step": 242600 }, { "epoch": 1.04, "learning_rate": 3.7368470193815115e-05, "loss": 1.5548, "step": 242700 }, { "epoch": 1.04, "learning_rate": 3.736304180726188e-05, "loss": 1.5453, "step": 242800 }, { "epoch": 1.04, "learning_rate": 3.7357613420708646e-05, "loss": 1.5668, "step": 242900 }, { "epoch": 1.04, "learning_rate": 3.7352185034155415e-05, "loss": 1.5536, "step": 243000 }, { "epoch": 1.04, "learning_rate": 3.7346756647602176e-05, "loss": 1.5793, "step": 243100 }, { "epoch": 1.04, "learning_rate": 3.734132826104894e-05, "loss": 1.571, "step": 243200 }, { "epoch": 1.05, "learning_rate": 3.733589987449571e-05, "loss": 1.5384, "step": 243300 }, { "epoch": 1.05, "learning_rate": 3.733047148794247e-05, "loss": 1.5789, "step": 243400 }, { "epoch": 1.05, "learning_rate": 3.732504310138924e-05, "loss": 1.5537, "step": 243500 }, { "epoch": 1.05, "learning_rate": 3.7319614714836e-05, "loss": 1.5811, "step": 243600 }, { "epoch": 1.05, "learning_rate": 3.731418632828276e-05, "loss": 1.5629, "step": 243700 }, { "epoch": 1.05, "learning_rate": 3.730875794172953e-05, "loss": 1.5489, "step": 243800 }, { "epoch": 1.05, "learning_rate": 3.730332955517629e-05, "loss": 1.5897, "step": 243900 }, { "epoch": 1.05, "learning_rate": 3.729790116862306e-05, "loss": 1.5541, "step": 244000 }, { "epoch": 1.05, "learning_rate": 3.729247278206983e-05, "loss": 1.5482, "step": 244100 }, { "epoch": 1.05, "learning_rate": 3.728704439551659e-05, "loss": 1.5748, "step": 244200 }, { "epoch": 1.05, "learning_rate": 3.728161600896335e-05, "loss": 1.5724, "step": 244300 }, { "epoch": 1.05, "learning_rate": 3.727618762241012e-05, "loss": 1.5728, "step": 244400 }, { "epoch": 1.05, "learning_rate": 3.727075923585688e-05, "loss": 1.5803, "step": 244500 }, { "epoch": 1.05, "learning_rate": 3.7265330849303645e-05, "loss": 1.5593, "step": 244600 }, { "epoch": 1.05, "learning_rate": 3.7259902462750414e-05, "loss": 1.5943, "step": 244700 }, { "epoch": 1.05, "learning_rate": 3.7254474076197176e-05, "loss": 1.5571, "step": 244800 }, { "epoch": 1.05, "learning_rate": 3.724904568964394e-05, "loss": 1.5679, "step": 244900 }, { "epoch": 1.05, "learning_rate": 3.7243617303090706e-05, "loss": 1.5831, "step": 245000 }, { "epoch": 1.05, "eval_loss": 1.4603722095489502, "eval_runtime": 17.8071, "eval_samples_per_second": 561.575, "eval_steps_per_second": 17.577, "step": 245000 }, { "epoch": 1.05, "learning_rate": 3.7238188916537475e-05, "loss": 1.5906, "step": 245100 }, { "epoch": 1.05, "learning_rate": 3.723276052998424e-05, "loss": 1.5713, "step": 245200 }, { "epoch": 1.05, "learning_rate": 3.7227332143431005e-05, "loss": 1.57, "step": 245300 }, { "epoch": 1.05, "learning_rate": 3.722190375687777e-05, "loss": 1.5683, "step": 245400 }, { "epoch": 1.05, "learning_rate": 3.7216475370324536e-05, "loss": 1.5457, "step": 245500 }, { "epoch": 1.06, "learning_rate": 3.72110469837713e-05, "loss": 1.5651, "step": 245600 }, { "epoch": 1.06, "learning_rate": 3.720561859721806e-05, "loss": 1.6014, "step": 245700 }, { "epoch": 1.06, "learning_rate": 3.720019021066483e-05, "loss": 1.5894, "step": 245800 }, { "epoch": 1.06, "learning_rate": 3.719476182411159e-05, "loss": 1.5541, "step": 245900 }, { "epoch": 1.06, "learning_rate": 3.718933343755835e-05, "loss": 1.5763, "step": 246000 }, { "epoch": 1.06, "learning_rate": 3.718390505100513e-05, "loss": 1.5676, "step": 246100 }, { "epoch": 1.06, "learning_rate": 3.717847666445189e-05, "loss": 1.6003, "step": 246200 }, { "epoch": 1.06, "learning_rate": 3.717304827789865e-05, "loss": 1.5466, "step": 246300 }, { "epoch": 1.06, "learning_rate": 3.716761989134542e-05, "loss": 1.5364, "step": 246400 }, { "epoch": 1.06, "learning_rate": 3.716219150479218e-05, "loss": 1.5713, "step": 246500 }, { "epoch": 1.06, "learning_rate": 3.7156763118238944e-05, "loss": 1.5586, "step": 246600 }, { "epoch": 1.06, "learning_rate": 3.715133473168571e-05, "loss": 1.5812, "step": 246700 }, { "epoch": 1.06, "learning_rate": 3.7145906345132474e-05, "loss": 1.6289, "step": 246800 }, { "epoch": 1.06, "learning_rate": 3.7140477958579236e-05, "loss": 1.5595, "step": 246900 }, { "epoch": 1.06, "learning_rate": 3.7135049572026005e-05, "loss": 1.5414, "step": 247000 }, { "epoch": 1.06, "learning_rate": 3.712962118547277e-05, "loss": 1.6069, "step": 247100 }, { "epoch": 1.06, "learning_rate": 3.7124192798919535e-05, "loss": 1.5832, "step": 247200 }, { "epoch": 1.06, "learning_rate": 3.7118764412366304e-05, "loss": 1.5645, "step": 247300 }, { "epoch": 1.06, "learning_rate": 3.7113336025813066e-05, "loss": 1.5702, "step": 247400 }, { "epoch": 1.06, "learning_rate": 3.7107907639259834e-05, "loss": 1.5632, "step": 247500 }, { "epoch": 1.06, "learning_rate": 3.7102479252706596e-05, "loss": 1.5915, "step": 247600 }, { "epoch": 1.06, "learning_rate": 3.709705086615336e-05, "loss": 1.5472, "step": 247700 }, { "epoch": 1.06, "learning_rate": 3.709162247960013e-05, "loss": 1.5432, "step": 247800 }, { "epoch": 1.06, "learning_rate": 3.708619409304689e-05, "loss": 1.5848, "step": 247900 }, { "epoch": 1.07, "learning_rate": 3.708076570649365e-05, "loss": 1.5771, "step": 248000 }, { "epoch": 1.07, "learning_rate": 3.707533731994042e-05, "loss": 1.5733, "step": 248100 }, { "epoch": 1.07, "learning_rate": 3.706990893338719e-05, "loss": 1.564, "step": 248200 }, { "epoch": 1.07, "learning_rate": 3.706448054683395e-05, "loss": 1.5416, "step": 248300 }, { "epoch": 1.07, "learning_rate": 3.705905216028072e-05, "loss": 1.5679, "step": 248400 }, { "epoch": 1.07, "learning_rate": 3.705362377372748e-05, "loss": 1.5614, "step": 248500 }, { "epoch": 1.07, "learning_rate": 3.704819538717424e-05, "loss": 1.5452, "step": 248600 }, { "epoch": 1.07, "learning_rate": 3.704276700062101e-05, "loss": 1.551, "step": 248700 }, { "epoch": 1.07, "learning_rate": 3.703733861406777e-05, "loss": 1.5544, "step": 248800 }, { "epoch": 1.07, "learning_rate": 3.7031910227514535e-05, "loss": 1.5753, "step": 248900 }, { "epoch": 1.07, "learning_rate": 3.70264818409613e-05, "loss": 1.5518, "step": 249000 }, { "epoch": 1.07, "learning_rate": 3.7021053454408065e-05, "loss": 1.581, "step": 249100 }, { "epoch": 1.07, "learning_rate": 3.7015625067854834e-05, "loss": 1.556, "step": 249200 }, { "epoch": 1.07, "learning_rate": 3.70101966813016e-05, "loss": 1.5366, "step": 249300 }, { "epoch": 1.07, "learning_rate": 3.7004768294748364e-05, "loss": 1.5959, "step": 249400 }, { "epoch": 1.07, "learning_rate": 3.699933990819513e-05, "loss": 1.5555, "step": 249500 }, { "epoch": 1.07, "learning_rate": 3.6993911521641895e-05, "loss": 1.5879, "step": 249600 }, { "epoch": 1.07, "learning_rate": 3.698848313508866e-05, "loss": 1.5679, "step": 249700 }, { "epoch": 1.07, "learning_rate": 3.6983054748535425e-05, "loss": 1.5627, "step": 249800 }, { "epoch": 1.07, "learning_rate": 3.697762636198219e-05, "loss": 1.581, "step": 249900 }, { "epoch": 1.07, "learning_rate": 3.697219797542895e-05, "loss": 1.5692, "step": 250000 }, { "epoch": 1.07, "eval_loss": 1.4626483917236328, "eval_runtime": 17.7967, "eval_samples_per_second": 561.901, "eval_steps_per_second": 17.588, "step": 250000 }, { "epoch": 1.07, "learning_rate": 3.696676958887572e-05, "loss": 1.5572, "step": 250100 }, { "epoch": 1.07, "learning_rate": 3.696134120232248e-05, "loss": 1.5778, "step": 250200 }, { "epoch": 1.08, "learning_rate": 3.695591281576925e-05, "loss": 1.5707, "step": 250300 }, { "epoch": 1.08, "learning_rate": 3.695048442921602e-05, "loss": 1.5474, "step": 250400 }, { "epoch": 1.08, "learning_rate": 3.694505604266278e-05, "loss": 1.5661, "step": 250500 }, { "epoch": 1.08, "learning_rate": 3.693962765610954e-05, "loss": 1.5771, "step": 250600 }, { "epoch": 1.08, "learning_rate": 3.693419926955631e-05, "loss": 1.5655, "step": 250700 }, { "epoch": 1.08, "learning_rate": 3.692877088300307e-05, "loss": 1.565, "step": 250800 }, { "epoch": 1.08, "learning_rate": 3.692334249644983e-05, "loss": 1.553, "step": 250900 }, { "epoch": 1.08, "learning_rate": 3.69179141098966e-05, "loss": 1.5797, "step": 251000 }, { "epoch": 1.08, "learning_rate": 3.6912485723343364e-05, "loss": 1.5696, "step": 251100 }, { "epoch": 1.08, "learning_rate": 3.690705733679013e-05, "loss": 1.5366, "step": 251200 }, { "epoch": 1.08, "learning_rate": 3.6901628950236894e-05, "loss": 1.5782, "step": 251300 }, { "epoch": 1.08, "learning_rate": 3.689620056368366e-05, "loss": 1.5443, "step": 251400 }, { "epoch": 1.08, "learning_rate": 3.689077217713043e-05, "loss": 1.5377, "step": 251500 }, { "epoch": 1.08, "learning_rate": 3.688534379057719e-05, "loss": 1.5757, "step": 251600 }, { "epoch": 1.08, "learning_rate": 3.6879915404023955e-05, "loss": 1.5642, "step": 251700 }, { "epoch": 1.08, "learning_rate": 3.6874487017470724e-05, "loss": 1.5692, "step": 251800 }, { "epoch": 1.08, "learning_rate": 3.6869058630917486e-05, "loss": 1.5721, "step": 251900 }, { "epoch": 1.08, "learning_rate": 3.686363024436425e-05, "loss": 1.5283, "step": 252000 }, { "epoch": 1.08, "learning_rate": 3.6858201857811016e-05, "loss": 1.5645, "step": 252100 }, { "epoch": 1.08, "learning_rate": 3.685277347125778e-05, "loss": 1.5916, "step": 252200 }, { "epoch": 1.08, "learning_rate": 3.684734508470454e-05, "loss": 1.556, "step": 252300 }, { "epoch": 1.08, "learning_rate": 3.6841916698151315e-05, "loss": 1.5695, "step": 252400 }, { "epoch": 1.08, "learning_rate": 3.683648831159808e-05, "loss": 1.5723, "step": 252500 }, { "epoch": 1.09, "learning_rate": 3.683105992504484e-05, "loss": 1.576, "step": 252600 }, { "epoch": 1.09, "learning_rate": 3.682563153849161e-05, "loss": 1.5544, "step": 252700 }, { "epoch": 1.09, "learning_rate": 3.682020315193837e-05, "loss": 1.5653, "step": 252800 }, { "epoch": 1.09, "learning_rate": 3.681477476538513e-05, "loss": 1.5586, "step": 252900 }, { "epoch": 1.09, "learning_rate": 3.68093463788319e-05, "loss": 1.5375, "step": 253000 }, { "epoch": 1.09, "learning_rate": 3.680391799227866e-05, "loss": 1.5459, "step": 253100 }, { "epoch": 1.09, "learning_rate": 3.679848960572543e-05, "loss": 1.5852, "step": 253200 }, { "epoch": 1.09, "learning_rate": 3.679306121917219e-05, "loss": 1.5484, "step": 253300 }, { "epoch": 1.09, "learning_rate": 3.6787632832618955e-05, "loss": 1.58, "step": 253400 }, { "epoch": 1.09, "learning_rate": 3.678220444606573e-05, "loss": 1.5391, "step": 253500 }, { "epoch": 1.09, "learning_rate": 3.677677605951249e-05, "loss": 1.542, "step": 253600 }, { "epoch": 1.09, "learning_rate": 3.6771347672959254e-05, "loss": 1.5494, "step": 253700 }, { "epoch": 1.09, "learning_rate": 3.676591928640602e-05, "loss": 1.6048, "step": 253800 }, { "epoch": 1.09, "learning_rate": 3.6760490899852784e-05, "loss": 1.5955, "step": 253900 }, { "epoch": 1.09, "learning_rate": 3.6755062513299546e-05, "loss": 1.5669, "step": 254000 }, { "epoch": 1.09, "learning_rate": 3.6749634126746315e-05, "loss": 1.5435, "step": 254100 }, { "epoch": 1.09, "learning_rate": 3.674420574019308e-05, "loss": 1.5766, "step": 254200 }, { "epoch": 1.09, "learning_rate": 3.673877735363984e-05, "loss": 1.5561, "step": 254300 }, { "epoch": 1.09, "learning_rate": 3.673334896708661e-05, "loss": 1.565, "step": 254400 }, { "epoch": 1.09, "learning_rate": 3.6727920580533376e-05, "loss": 1.5592, "step": 254500 }, { "epoch": 1.09, "learning_rate": 3.672249219398014e-05, "loss": 1.5598, "step": 254600 }, { "epoch": 1.09, "learning_rate": 3.6717063807426906e-05, "loss": 1.5901, "step": 254700 }, { "epoch": 1.09, "learning_rate": 3.671163542087367e-05, "loss": 1.5766, "step": 254800 }, { "epoch": 1.1, "learning_rate": 3.670620703432043e-05, "loss": 1.5855, "step": 254900 }, { "epoch": 1.1, "learning_rate": 3.67007786477672e-05, "loss": 1.558, "step": 255000 }, { "epoch": 1.1, "eval_loss": 1.464188814163208, "eval_runtime": 17.8297, "eval_samples_per_second": 560.862, "eval_steps_per_second": 17.555, "step": 255000 }, { "epoch": 1.1, "learning_rate": 3.669535026121396e-05, "loss": 1.5314, "step": 255100 }, { "epoch": 1.1, "learning_rate": 3.668992187466073e-05, "loss": 1.5544, "step": 255200 }, { "epoch": 1.1, "learning_rate": 3.668449348810749e-05, "loss": 1.5317, "step": 255300 }, { "epoch": 1.1, "learning_rate": 3.667906510155425e-05, "loss": 1.5621, "step": 255400 }, { "epoch": 1.1, "learning_rate": 3.667363671500102e-05, "loss": 1.5577, "step": 255500 }, { "epoch": 1.1, "learning_rate": 3.666820832844779e-05, "loss": 1.6049, "step": 255600 }, { "epoch": 1.1, "learning_rate": 3.666277994189455e-05, "loss": 1.5713, "step": 255700 }, { "epoch": 1.1, "learning_rate": 3.665735155534132e-05, "loss": 1.5628, "step": 255800 }, { "epoch": 1.1, "learning_rate": 3.665192316878808e-05, "loss": 1.5665, "step": 255900 }, { "epoch": 1.1, "learning_rate": 3.6646494782234845e-05, "loss": 1.5555, "step": 256000 }, { "epoch": 1.1, "learning_rate": 3.664106639568161e-05, "loss": 1.5425, "step": 256100 }, { "epoch": 1.1, "learning_rate": 3.6635638009128375e-05, "loss": 1.5902, "step": 256200 }, { "epoch": 1.1, "learning_rate": 3.663020962257514e-05, "loss": 1.5094, "step": 256300 }, { "epoch": 1.1, "learning_rate": 3.6624781236021906e-05, "loss": 1.5773, "step": 256400 }, { "epoch": 1.1, "learning_rate": 3.661935284946867e-05, "loss": 1.5582, "step": 256500 }, { "epoch": 1.1, "learning_rate": 3.6613924462915436e-05, "loss": 1.5661, "step": 256600 }, { "epoch": 1.1, "learning_rate": 3.6608496076362205e-05, "loss": 1.5714, "step": 256700 }, { "epoch": 1.1, "learning_rate": 3.660306768980897e-05, "loss": 1.5805, "step": 256800 }, { "epoch": 1.1, "learning_rate": 3.659763930325573e-05, "loss": 1.6005, "step": 256900 }, { "epoch": 1.1, "learning_rate": 3.65922109167025e-05, "loss": 1.5594, "step": 257000 }, { "epoch": 1.1, "learning_rate": 3.658678253014926e-05, "loss": 1.564, "step": 257100 }, { "epoch": 1.1, "learning_rate": 3.658135414359603e-05, "loss": 1.5615, "step": 257200 }, { "epoch": 1.11, "learning_rate": 3.657592575704279e-05, "loss": 1.5556, "step": 257300 }, { "epoch": 1.11, "learning_rate": 3.657049737048955e-05, "loss": 1.5579, "step": 257400 }, { "epoch": 1.11, "learning_rate": 3.656506898393632e-05, "loss": 1.5476, "step": 257500 }, { "epoch": 1.11, "learning_rate": 3.655964059738308e-05, "loss": 1.5855, "step": 257600 }, { "epoch": 1.11, "learning_rate": 3.655421221082985e-05, "loss": 1.5781, "step": 257700 }, { "epoch": 1.11, "learning_rate": 3.654878382427662e-05, "loss": 1.5664, "step": 257800 }, { "epoch": 1.11, "learning_rate": 3.654335543772338e-05, "loss": 1.5789, "step": 257900 }, { "epoch": 1.11, "learning_rate": 3.653792705117014e-05, "loss": 1.5674, "step": 258000 }, { "epoch": 1.11, "learning_rate": 3.653249866461691e-05, "loss": 1.5452, "step": 258100 }, { "epoch": 1.11, "learning_rate": 3.6527070278063674e-05, "loss": 1.576, "step": 258200 }, { "epoch": 1.11, "learning_rate": 3.6521641891510435e-05, "loss": 1.5243, "step": 258300 }, { "epoch": 1.11, "learning_rate": 3.6516213504957204e-05, "loss": 1.5706, "step": 258400 }, { "epoch": 1.11, "learning_rate": 3.6510785118403966e-05, "loss": 1.5672, "step": 258500 }, { "epoch": 1.11, "learning_rate": 3.6505356731850735e-05, "loss": 1.5574, "step": 258600 }, { "epoch": 1.11, "learning_rate": 3.64999283452975e-05, "loss": 1.5636, "step": 258700 }, { "epoch": 1.11, "learning_rate": 3.6494499958744265e-05, "loss": 1.5683, "step": 258800 }, { "epoch": 1.11, "learning_rate": 3.648907157219103e-05, "loss": 1.6023, "step": 258900 }, { "epoch": 1.11, "learning_rate": 3.6483643185637796e-05, "loss": 1.6015, "step": 259000 }, { "epoch": 1.11, "learning_rate": 3.647821479908456e-05, "loss": 1.5853, "step": 259100 }, { "epoch": 1.11, "learning_rate": 3.6472786412531326e-05, "loss": 1.5587, "step": 259200 }, { "epoch": 1.11, "learning_rate": 3.646735802597809e-05, "loss": 1.5462, "step": 259300 }, { "epoch": 1.11, "learning_rate": 3.646192963942485e-05, "loss": 1.5808, "step": 259400 }, { "epoch": 1.11, "learning_rate": 3.645650125287162e-05, "loss": 1.5588, "step": 259500 }, { "epoch": 1.12, "learning_rate": 3.645107286631838e-05, "loss": 1.5724, "step": 259600 }, { "epoch": 1.12, "learning_rate": 3.644564447976514e-05, "loss": 1.5439, "step": 259700 }, { "epoch": 1.12, "learning_rate": 3.644021609321192e-05, "loss": 1.5489, "step": 259800 }, { "epoch": 1.12, "learning_rate": 3.643478770665868e-05, "loss": 1.5738, "step": 259900 }, { "epoch": 1.12, "learning_rate": 3.642935932010544e-05, "loss": 1.5925, "step": 260000 }, { "epoch": 1.12, "eval_loss": 1.4609049558639526, "eval_runtime": 17.8329, "eval_samples_per_second": 560.761, "eval_steps_per_second": 17.552, "step": 260000 }, { "epoch": 1.12, "learning_rate": 3.642393093355221e-05, "loss": 1.531, "step": 260100 }, { "epoch": 1.12, "learning_rate": 3.641850254699897e-05, "loss": 1.5514, "step": 260200 }, { "epoch": 1.12, "learning_rate": 3.6413074160445734e-05, "loss": 1.5872, "step": 260300 }, { "epoch": 1.12, "learning_rate": 3.64076457738925e-05, "loss": 1.5746, "step": 260400 }, { "epoch": 1.12, "learning_rate": 3.6402217387339265e-05, "loss": 1.5698, "step": 260500 }, { "epoch": 1.12, "learning_rate": 3.639678900078603e-05, "loss": 1.5619, "step": 260600 }, { "epoch": 1.12, "learning_rate": 3.6391360614232795e-05, "loss": 1.5511, "step": 260700 }, { "epoch": 1.12, "learning_rate": 3.6385932227679564e-05, "loss": 1.562, "step": 260800 }, { "epoch": 1.12, "learning_rate": 3.6380503841126326e-05, "loss": 1.5527, "step": 260900 }, { "epoch": 1.12, "learning_rate": 3.6375075454573094e-05, "loss": 1.5525, "step": 261000 }, { "epoch": 1.12, "learning_rate": 3.6369647068019856e-05, "loss": 1.572, "step": 261100 }, { "epoch": 1.12, "learning_rate": 3.6364218681466625e-05, "loss": 1.6127, "step": 261200 }, { "epoch": 1.12, "learning_rate": 3.635879029491339e-05, "loss": 1.5688, "step": 261300 }, { "epoch": 1.12, "learning_rate": 3.635336190836015e-05, "loss": 1.5462, "step": 261400 }, { "epoch": 1.12, "learning_rate": 3.634793352180692e-05, "loss": 1.5759, "step": 261500 }, { "epoch": 1.12, "learning_rate": 3.634250513525368e-05, "loss": 1.5499, "step": 261600 }, { "epoch": 1.12, "learning_rate": 3.633707674870044e-05, "loss": 1.5334, "step": 261700 }, { "epoch": 1.12, "learning_rate": 3.633164836214721e-05, "loss": 1.5306, "step": 261800 }, { "epoch": 1.13, "learning_rate": 3.632621997559398e-05, "loss": 1.5375, "step": 261900 }, { "epoch": 1.13, "learning_rate": 3.632079158904074e-05, "loss": 1.5449, "step": 262000 }, { "epoch": 1.13, "learning_rate": 3.631536320248751e-05, "loss": 1.5735, "step": 262100 }, { "epoch": 1.13, "learning_rate": 3.630993481593427e-05, "loss": 1.5231, "step": 262200 }, { "epoch": 1.13, "learning_rate": 3.630450642938103e-05, "loss": 1.5716, "step": 262300 }, { "epoch": 1.13, "learning_rate": 3.62990780428278e-05, "loss": 1.5735, "step": 262400 }, { "epoch": 1.13, "learning_rate": 3.629364965627456e-05, "loss": 1.5733, "step": 262500 }, { "epoch": 1.13, "learning_rate": 3.628822126972133e-05, "loss": 1.5416, "step": 262600 }, { "epoch": 1.13, "learning_rate": 3.6282792883168094e-05, "loss": 1.5918, "step": 262700 }, { "epoch": 1.13, "learning_rate": 3.6277364496614855e-05, "loss": 1.5342, "step": 262800 }, { "epoch": 1.13, "learning_rate": 3.6271936110061624e-05, "loss": 1.5777, "step": 262900 }, { "epoch": 1.13, "learning_rate": 3.626650772350839e-05, "loss": 1.5328, "step": 263000 }, { "epoch": 1.13, "learning_rate": 3.6261079336955155e-05, "loss": 1.5647, "step": 263100 }, { "epoch": 1.13, "learning_rate": 3.625565095040192e-05, "loss": 1.5391, "step": 263200 }, { "epoch": 1.13, "learning_rate": 3.6250222563848685e-05, "loss": 1.5861, "step": 263300 }, { "epoch": 1.13, "learning_rate": 3.624479417729545e-05, "loss": 1.5629, "step": 263400 }, { "epoch": 1.13, "learning_rate": 3.6239365790742216e-05, "loss": 1.5676, "step": 263500 }, { "epoch": 1.13, "learning_rate": 3.623393740418898e-05, "loss": 1.5394, "step": 263600 }, { "epoch": 1.13, "learning_rate": 3.622850901763574e-05, "loss": 1.5281, "step": 263700 }, { "epoch": 1.13, "learning_rate": 3.622308063108251e-05, "loss": 1.548, "step": 263800 }, { "epoch": 1.13, "learning_rate": 3.621765224452928e-05, "loss": 1.5544, "step": 263900 }, { "epoch": 1.13, "learning_rate": 3.621222385797604e-05, "loss": 1.5579, "step": 264000 }, { "epoch": 1.13, "learning_rate": 3.620679547142281e-05, "loss": 1.5662, "step": 264100 }, { "epoch": 1.14, "learning_rate": 3.620136708486957e-05, "loss": 1.5606, "step": 264200 }, { "epoch": 1.14, "learning_rate": 3.619593869831633e-05, "loss": 1.5445, "step": 264300 }, { "epoch": 1.14, "learning_rate": 3.61905103117631e-05, "loss": 1.5551, "step": 264400 }, { "epoch": 1.14, "learning_rate": 3.618508192520986e-05, "loss": 1.5423, "step": 264500 }, { "epoch": 1.14, "learning_rate": 3.617965353865663e-05, "loss": 1.5331, "step": 264600 }, { "epoch": 1.14, "learning_rate": 3.617422515210339e-05, "loss": 1.5552, "step": 264700 }, { "epoch": 1.14, "learning_rate": 3.6168796765550154e-05, "loss": 1.5557, "step": 264800 }, { "epoch": 1.14, "learning_rate": 3.616336837899692e-05, "loss": 1.5547, "step": 264900 }, { "epoch": 1.14, "learning_rate": 3.615793999244369e-05, "loss": 1.5396, "step": 265000 }, { "epoch": 1.14, "eval_loss": 1.4619945287704468, "eval_runtime": 17.8247, "eval_samples_per_second": 561.019, "eval_steps_per_second": 17.56, "step": 265000 }, { "epoch": 1.14, "learning_rate": 3.615251160589045e-05, "loss": 1.5693, "step": 265100 }, { "epoch": 1.14, "learning_rate": 3.614708321933722e-05, "loss": 1.6009, "step": 265200 }, { "epoch": 1.14, "learning_rate": 3.6141654832783984e-05, "loss": 1.5776, "step": 265300 }, { "epoch": 1.14, "learning_rate": 3.6136226446230745e-05, "loss": 1.5691, "step": 265400 }, { "epoch": 1.14, "learning_rate": 3.6130798059677514e-05, "loss": 1.5786, "step": 265500 }, { "epoch": 1.14, "learning_rate": 3.6125369673124276e-05, "loss": 1.568, "step": 265600 }, { "epoch": 1.14, "learning_rate": 3.611994128657104e-05, "loss": 1.5471, "step": 265700 }, { "epoch": 1.14, "learning_rate": 3.6114512900017807e-05, "loss": 1.5895, "step": 265800 }, { "epoch": 1.14, "learning_rate": 3.610908451346457e-05, "loss": 1.5691, "step": 265900 }, { "epoch": 1.14, "learning_rate": 3.610365612691134e-05, "loss": 1.5361, "step": 266000 }, { "epoch": 1.14, "learning_rate": 3.6098227740358106e-05, "loss": 1.5495, "step": 266100 }, { "epoch": 1.14, "learning_rate": 3.609279935380487e-05, "loss": 1.5553, "step": 266200 }, { "epoch": 1.14, "learning_rate": 3.608737096725163e-05, "loss": 1.5584, "step": 266300 }, { "epoch": 1.14, "learning_rate": 3.60819425806984e-05, "loss": 1.528, "step": 266400 }, { "epoch": 1.14, "learning_rate": 3.607651419414516e-05, "loss": 1.5457, "step": 266500 }, { "epoch": 1.15, "learning_rate": 3.607108580759193e-05, "loss": 1.5393, "step": 266600 }, { "epoch": 1.15, "learning_rate": 3.606565742103869e-05, "loss": 1.5813, "step": 266700 }, { "epoch": 1.15, "learning_rate": 3.606022903448545e-05, "loss": 1.5665, "step": 266800 }, { "epoch": 1.15, "learning_rate": 3.605480064793222e-05, "loss": 1.5375, "step": 266900 }, { "epoch": 1.15, "learning_rate": 3.604937226137898e-05, "loss": 1.5525, "step": 267000 }, { "epoch": 1.15, "learning_rate": 3.604394387482575e-05, "loss": 1.5531, "step": 267100 }, { "epoch": 1.15, "learning_rate": 3.603851548827252e-05, "loss": 1.5469, "step": 267200 }, { "epoch": 1.15, "learning_rate": 3.603308710171928e-05, "loss": 1.587, "step": 267300 }, { "epoch": 1.15, "learning_rate": 3.6027658715166044e-05, "loss": 1.5445, "step": 267400 }, { "epoch": 1.15, "learning_rate": 3.602223032861281e-05, "loss": 1.5907, "step": 267500 }, { "epoch": 1.15, "learning_rate": 3.6016801942059574e-05, "loss": 1.5455, "step": 267600 }, { "epoch": 1.15, "learning_rate": 3.6011373555506336e-05, "loss": 1.5386, "step": 267700 }, { "epoch": 1.15, "learning_rate": 3.6005945168953105e-05, "loss": 1.5539, "step": 267800 }, { "epoch": 1.15, "learning_rate": 3.600051678239987e-05, "loss": 1.5513, "step": 267900 }, { "epoch": 1.15, "learning_rate": 3.5995088395846636e-05, "loss": 1.5439, "step": 268000 }, { "epoch": 1.15, "learning_rate": 3.5989660009293404e-05, "loss": 1.5362, "step": 268100 }, { "epoch": 1.15, "learning_rate": 3.5984231622740166e-05, "loss": 1.554, "step": 268200 }, { "epoch": 1.15, "learning_rate": 3.597880323618693e-05, "loss": 1.5568, "step": 268300 }, { "epoch": 1.15, "learning_rate": 3.5973374849633697e-05, "loss": 1.5624, "step": 268400 }, { "epoch": 1.15, "learning_rate": 3.596794646308046e-05, "loss": 1.5489, "step": 268500 }, { "epoch": 1.15, "learning_rate": 3.596251807652723e-05, "loss": 1.5618, "step": 268600 }, { "epoch": 1.15, "learning_rate": 3.595708968997399e-05, "loss": 1.5626, "step": 268700 }, { "epoch": 1.15, "learning_rate": 3.595166130342075e-05, "loss": 1.5691, "step": 268800 }, { "epoch": 1.16, "learning_rate": 3.594623291686752e-05, "loss": 1.5809, "step": 268900 }, { "epoch": 1.16, "learning_rate": 3.594080453031428e-05, "loss": 1.5376, "step": 269000 }, { "epoch": 1.16, "learning_rate": 3.593537614376104e-05, "loss": 1.5724, "step": 269100 }, { "epoch": 1.16, "learning_rate": 3.592994775720782e-05, "loss": 1.5474, "step": 269200 }, { "epoch": 1.16, "learning_rate": 3.592451937065458e-05, "loss": 1.5703, "step": 269300 }, { "epoch": 1.16, "learning_rate": 3.591909098410134e-05, "loss": 1.5577, "step": 269400 }, { "epoch": 1.16, "learning_rate": 3.591366259754811e-05, "loss": 1.5545, "step": 269500 }, { "epoch": 1.16, "learning_rate": 3.590823421099487e-05, "loss": 1.5388, "step": 269600 }, { "epoch": 1.16, "learning_rate": 3.5902805824441635e-05, "loss": 1.564, "step": 269700 }, { "epoch": 1.16, "learning_rate": 3.5897377437888403e-05, "loss": 1.5424, "step": 269800 }, { "epoch": 1.16, "learning_rate": 3.5891949051335165e-05, "loss": 1.5829, "step": 269900 }, { "epoch": 1.16, "learning_rate": 3.5886520664781934e-05, "loss": 1.59, "step": 270000 }, { "epoch": 1.16, "eval_loss": 1.4590508937835693, "eval_runtime": 17.8197, "eval_samples_per_second": 561.177, "eval_steps_per_second": 17.565, "step": 270000 }, { "epoch": 1.16, "learning_rate": 3.5881092278228696e-05, "loss": 1.5582, "step": 270100 }, { "epoch": 1.16, "learning_rate": 3.5875663891675465e-05, "loss": 1.5847, "step": 270200 }, { "epoch": 1.16, "learning_rate": 3.5870235505122226e-05, "loss": 1.5796, "step": 270300 }, { "epoch": 1.16, "learning_rate": 3.5864807118568995e-05, "loss": 1.5329, "step": 270400 }, { "epoch": 1.16, "learning_rate": 3.585937873201576e-05, "loss": 1.5347, "step": 270500 }, { "epoch": 1.16, "learning_rate": 3.5853950345462526e-05, "loss": 1.535, "step": 270600 }, { "epoch": 1.16, "learning_rate": 3.584852195890929e-05, "loss": 1.5335, "step": 270700 }, { "epoch": 1.16, "learning_rate": 3.584309357235605e-05, "loss": 1.5717, "step": 270800 }, { "epoch": 1.16, "learning_rate": 3.583766518580282e-05, "loss": 1.5496, "step": 270900 }, { "epoch": 1.16, "learning_rate": 3.583223679924958e-05, "loss": 1.5529, "step": 271000 }, { "epoch": 1.16, "learning_rate": 3.582680841269634e-05, "loss": 1.5607, "step": 271100 }, { "epoch": 1.17, "learning_rate": 3.582138002614311e-05, "loss": 1.5481, "step": 271200 }, { "epoch": 1.17, "learning_rate": 3.581595163958988e-05, "loss": 1.5442, "step": 271300 }, { "epoch": 1.17, "learning_rate": 3.581052325303664e-05, "loss": 1.5719, "step": 271400 }, { "epoch": 1.17, "learning_rate": 3.580509486648341e-05, "loss": 1.5845, "step": 271500 }, { "epoch": 1.17, "learning_rate": 3.579966647993017e-05, "loss": 1.5695, "step": 271600 }, { "epoch": 1.17, "learning_rate": 3.579423809337693e-05, "loss": 1.5496, "step": 271700 }, { "epoch": 1.17, "learning_rate": 3.57888097068237e-05, "loss": 1.5713, "step": 271800 }, { "epoch": 1.17, "learning_rate": 3.5783381320270464e-05, "loss": 1.5643, "step": 271900 }, { "epoch": 1.17, "learning_rate": 3.577795293371723e-05, "loss": 1.5655, "step": 272000 }, { "epoch": 1.17, "learning_rate": 3.5772524547163994e-05, "loss": 1.5866, "step": 272100 }, { "epoch": 1.17, "learning_rate": 3.5767096160610756e-05, "loss": 1.5959, "step": 272200 }, { "epoch": 1.17, "learning_rate": 3.5761667774057525e-05, "loss": 1.5499, "step": 272300 }, { "epoch": 1.17, "learning_rate": 3.5756239387504294e-05, "loss": 1.5568, "step": 272400 }, { "epoch": 1.17, "learning_rate": 3.5750811000951055e-05, "loss": 1.5488, "step": 272500 }, { "epoch": 1.17, "learning_rate": 3.5745382614397824e-05, "loss": 1.557, "step": 272600 }, { "epoch": 1.17, "learning_rate": 3.5739954227844586e-05, "loss": 1.5672, "step": 272700 }, { "epoch": 1.17, "learning_rate": 3.573452584129135e-05, "loss": 1.5689, "step": 272800 }, { "epoch": 1.17, "learning_rate": 3.5729097454738116e-05, "loss": 1.549, "step": 272900 }, { "epoch": 1.17, "learning_rate": 3.572366906818488e-05, "loss": 1.5771, "step": 273000 }, { "epoch": 1.17, "learning_rate": 3.571824068163164e-05, "loss": 1.5602, "step": 273100 }, { "epoch": 1.17, "learning_rate": 3.571281229507841e-05, "loss": 1.5542, "step": 273200 }, { "epoch": 1.17, "learning_rate": 3.570738390852517e-05, "loss": 1.5522, "step": 273300 }, { "epoch": 1.17, "learning_rate": 3.570195552197194e-05, "loss": 1.5752, "step": 273400 }, { "epoch": 1.17, "learning_rate": 3.569652713541871e-05, "loss": 1.5649, "step": 273500 }, { "epoch": 1.18, "learning_rate": 3.569109874886547e-05, "loss": 1.531, "step": 273600 }, { "epoch": 1.18, "learning_rate": 3.568567036231223e-05, "loss": 1.5486, "step": 273700 }, { "epoch": 1.18, "learning_rate": 3.5680241975759e-05, "loss": 1.5541, "step": 273800 }, { "epoch": 1.18, "learning_rate": 3.567481358920576e-05, "loss": 1.5403, "step": 273900 }, { "epoch": 1.18, "learning_rate": 3.566938520265253e-05, "loss": 1.556, "step": 274000 }, { "epoch": 1.18, "learning_rate": 3.566395681609929e-05, "loss": 1.573, "step": 274100 }, { "epoch": 1.18, "learning_rate": 3.5658528429546055e-05, "loss": 1.5342, "step": 274200 }, { "epoch": 1.18, "learning_rate": 3.5653100042992823e-05, "loss": 1.5544, "step": 274300 }, { "epoch": 1.18, "learning_rate": 3.564767165643959e-05, "loss": 1.5691, "step": 274400 }, { "epoch": 1.18, "learning_rate": 3.5642243269886354e-05, "loss": 1.5439, "step": 274500 }, { "epoch": 1.18, "learning_rate": 3.563681488333312e-05, "loss": 1.5634, "step": 274600 }, { "epoch": 1.18, "learning_rate": 3.5631386496779884e-05, "loss": 1.5517, "step": 274700 }, { "epoch": 1.18, "learning_rate": 3.5625958110226646e-05, "loss": 1.5376, "step": 274800 }, { "epoch": 1.18, "learning_rate": 3.5620529723673415e-05, "loss": 1.6075, "step": 274900 }, { "epoch": 1.18, "learning_rate": 3.561510133712018e-05, "loss": 1.5671, "step": 275000 }, { "epoch": 1.18, "eval_loss": 1.4539235830307007, "eval_runtime": 17.8159, "eval_samples_per_second": 561.296, "eval_steps_per_second": 17.569, "step": 275000 }, { "epoch": 1.18, "learning_rate": 3.560967295056694e-05, "loss": 1.5554, "step": 275100 }, { "epoch": 1.18, "learning_rate": 3.560424456401371e-05, "loss": 1.5644, "step": 275200 }, { "epoch": 1.18, "learning_rate": 3.559881617746047e-05, "loss": 1.5373, "step": 275300 }, { "epoch": 1.18, "learning_rate": 3.559338779090723e-05, "loss": 1.5611, "step": 275400 }, { "epoch": 1.18, "learning_rate": 3.5587959404354007e-05, "loss": 1.5701, "step": 275500 }, { "epoch": 1.18, "learning_rate": 3.558253101780077e-05, "loss": 1.5404, "step": 275600 }, { "epoch": 1.18, "learning_rate": 3.557710263124753e-05, "loss": 1.5647, "step": 275700 }, { "epoch": 1.18, "learning_rate": 3.55716742446943e-05, "loss": 1.5538, "step": 275800 }, { "epoch": 1.19, "learning_rate": 3.556624585814106e-05, "loss": 1.5497, "step": 275900 }, { "epoch": 1.19, "learning_rate": 3.556081747158783e-05, "loss": 1.5548, "step": 276000 }, { "epoch": 1.19, "learning_rate": 3.555538908503459e-05, "loss": 1.566, "step": 276100 }, { "epoch": 1.19, "learning_rate": 3.554996069848135e-05, "loss": 1.5541, "step": 276200 }, { "epoch": 1.19, "learning_rate": 3.554453231192812e-05, "loss": 1.5611, "step": 276300 }, { "epoch": 1.19, "learning_rate": 3.5539103925374884e-05, "loss": 1.5562, "step": 276400 }, { "epoch": 1.19, "learning_rate": 3.553367553882165e-05, "loss": 1.562, "step": 276500 }, { "epoch": 1.19, "learning_rate": 3.552824715226842e-05, "loss": 1.5387, "step": 276600 }, { "epoch": 1.19, "learning_rate": 3.552281876571518e-05, "loss": 1.5448, "step": 276700 }, { "epoch": 1.19, "learning_rate": 3.5517390379161945e-05, "loss": 1.5676, "step": 276800 }, { "epoch": 1.19, "learning_rate": 3.5511961992608713e-05, "loss": 1.5381, "step": 276900 }, { "epoch": 1.19, "learning_rate": 3.5506533606055475e-05, "loss": 1.5715, "step": 277000 }, { "epoch": 1.19, "learning_rate": 3.550110521950224e-05, "loss": 1.5469, "step": 277100 }, { "epoch": 1.19, "learning_rate": 3.5495676832949006e-05, "loss": 1.5773, "step": 277200 }, { "epoch": 1.19, "learning_rate": 3.549024844639577e-05, "loss": 1.5712, "step": 277300 }, { "epoch": 1.19, "learning_rate": 3.548482005984253e-05, "loss": 1.5421, "step": 277400 }, { "epoch": 1.19, "learning_rate": 3.54793916732893e-05, "loss": 1.5442, "step": 277500 }, { "epoch": 1.19, "learning_rate": 3.547396328673607e-05, "loss": 1.5309, "step": 277600 }, { "epoch": 1.19, "learning_rate": 3.546853490018283e-05, "loss": 1.5588, "step": 277700 }, { "epoch": 1.19, "learning_rate": 3.54631065136296e-05, "loss": 1.5598, "step": 277800 }, { "epoch": 1.19, "learning_rate": 3.545767812707636e-05, "loss": 1.5618, "step": 277900 }, { "epoch": 1.19, "learning_rate": 3.545224974052313e-05, "loss": 1.5579, "step": 278000 }, { "epoch": 1.19, "learning_rate": 3.544682135396989e-05, "loss": 1.5515, "step": 278100 }, { "epoch": 1.2, "learning_rate": 3.544139296741665e-05, "loss": 1.5779, "step": 278200 }, { "epoch": 1.2, "learning_rate": 3.543596458086342e-05, "loss": 1.5852, "step": 278300 }, { "epoch": 1.2, "learning_rate": 3.543053619431018e-05, "loss": 1.5487, "step": 278400 }, { "epoch": 1.2, "learning_rate": 3.5425107807756944e-05, "loss": 1.5745, "step": 278500 }, { "epoch": 1.2, "learning_rate": 3.541967942120372e-05, "loss": 1.5421, "step": 278600 }, { "epoch": 1.2, "learning_rate": 3.541425103465048e-05, "loss": 1.5352, "step": 278700 }, { "epoch": 1.2, "learning_rate": 3.540882264809724e-05, "loss": 1.556, "step": 278800 }, { "epoch": 1.2, "learning_rate": 3.540339426154401e-05, "loss": 1.5488, "step": 278900 }, { "epoch": 1.2, "learning_rate": 3.5397965874990774e-05, "loss": 1.5424, "step": 279000 }, { "epoch": 1.2, "learning_rate": 3.5392537488437536e-05, "loss": 1.5643, "step": 279100 }, { "epoch": 1.2, "learning_rate": 3.5387109101884304e-05, "loss": 1.5561, "step": 279200 }, { "epoch": 1.2, "learning_rate": 3.5381680715331066e-05, "loss": 1.5653, "step": 279300 }, { "epoch": 1.2, "learning_rate": 3.537625232877783e-05, "loss": 1.5528, "step": 279400 }, { "epoch": 1.2, "learning_rate": 3.53708239422246e-05, "loss": 1.566, "step": 279500 }, { "epoch": 1.2, "learning_rate": 3.536539555567136e-05, "loss": 1.5282, "step": 279600 }, { "epoch": 1.2, "learning_rate": 3.535996716911813e-05, "loss": 1.5516, "step": 279700 }, { "epoch": 1.2, "learning_rate": 3.5354538782564896e-05, "loss": 1.531, "step": 279800 }, { "epoch": 1.2, "learning_rate": 3.534911039601166e-05, "loss": 1.5397, "step": 279900 }, { "epoch": 1.2, "learning_rate": 3.5343682009458426e-05, "loss": 1.5497, "step": 280000 }, { "epoch": 1.2, "eval_loss": 1.456358551979065, "eval_runtime": 17.8134, "eval_samples_per_second": 561.375, "eval_steps_per_second": 17.571, "step": 280000 }, { "epoch": 1.2, "learning_rate": 3.533825362290519e-05, "loss": 1.5704, "step": 280100 }, { "epoch": 1.2, "learning_rate": 3.533282523635195e-05, "loss": 1.5544, "step": 280200 }, { "epoch": 1.2, "learning_rate": 3.532739684979872e-05, "loss": 1.5539, "step": 280300 }, { "epoch": 1.2, "learning_rate": 3.532196846324548e-05, "loss": 1.5754, "step": 280400 }, { "epoch": 1.21, "learning_rate": 3.531654007669224e-05, "loss": 1.5399, "step": 280500 }, { "epoch": 1.21, "learning_rate": 3.531111169013901e-05, "loss": 1.5785, "step": 280600 }, { "epoch": 1.21, "learning_rate": 3.530568330358578e-05, "loss": 1.5415, "step": 280700 }, { "epoch": 1.21, "learning_rate": 3.530025491703254e-05, "loss": 1.5825, "step": 280800 }, { "epoch": 1.21, "learning_rate": 3.529482653047931e-05, "loss": 1.5565, "step": 280900 }, { "epoch": 1.21, "learning_rate": 3.528939814392607e-05, "loss": 1.567, "step": 281000 }, { "epoch": 1.21, "learning_rate": 3.5283969757372834e-05, "loss": 1.5726, "step": 281100 }, { "epoch": 1.21, "learning_rate": 3.52785413708196e-05, "loss": 1.5311, "step": 281200 }, { "epoch": 1.21, "learning_rate": 3.5273112984266365e-05, "loss": 1.5515, "step": 281300 }, { "epoch": 1.21, "learning_rate": 3.5267684597713127e-05, "loss": 1.5515, "step": 281400 }, { "epoch": 1.21, "learning_rate": 3.5262256211159895e-05, "loss": 1.5597, "step": 281500 }, { "epoch": 1.21, "learning_rate": 3.525682782460666e-05, "loss": 1.5452, "step": 281600 }, { "epoch": 1.21, "learning_rate": 3.5251399438053426e-05, "loss": 1.5714, "step": 281700 }, { "epoch": 1.21, "learning_rate": 3.5245971051500194e-05, "loss": 1.5578, "step": 281800 }, { "epoch": 1.21, "learning_rate": 3.5240542664946956e-05, "loss": 1.5484, "step": 281900 }, { "epoch": 1.21, "learning_rate": 3.5235114278393725e-05, "loss": 1.5792, "step": 282000 }, { "epoch": 1.21, "learning_rate": 3.522968589184049e-05, "loss": 1.5652, "step": 282100 }, { "epoch": 1.21, "learning_rate": 3.522425750528725e-05, "loss": 1.5616, "step": 282200 }, { "epoch": 1.21, "learning_rate": 3.521882911873402e-05, "loss": 1.561, "step": 282300 }, { "epoch": 1.21, "learning_rate": 3.521340073218078e-05, "loss": 1.5664, "step": 282400 }, { "epoch": 1.21, "learning_rate": 3.520797234562754e-05, "loss": 1.5775, "step": 282500 }, { "epoch": 1.21, "learning_rate": 3.520254395907431e-05, "loss": 1.5332, "step": 282600 }, { "epoch": 1.21, "learning_rate": 3.519711557252107e-05, "loss": 1.5193, "step": 282700 }, { "epoch": 1.21, "learning_rate": 3.519168718596784e-05, "loss": 1.5546, "step": 282800 }, { "epoch": 1.22, "learning_rate": 3.518625879941461e-05, "loss": 1.5636, "step": 282900 }, { "epoch": 1.22, "learning_rate": 3.518083041286137e-05, "loss": 1.5514, "step": 283000 }, { "epoch": 1.22, "learning_rate": 3.517540202630813e-05, "loss": 1.5575, "step": 283100 }, { "epoch": 1.22, "learning_rate": 3.51699736397549e-05, "loss": 1.5341, "step": 283200 }, { "epoch": 1.22, "learning_rate": 3.516454525320166e-05, "loss": 1.5468, "step": 283300 }, { "epoch": 1.22, "learning_rate": 3.5159116866648425e-05, "loss": 1.5506, "step": 283400 }, { "epoch": 1.22, "learning_rate": 3.5153688480095194e-05, "loss": 1.5696, "step": 283500 }, { "epoch": 1.22, "learning_rate": 3.5148260093541956e-05, "loss": 1.5704, "step": 283600 }, { "epoch": 1.22, "learning_rate": 3.5142831706988724e-05, "loss": 1.5557, "step": 283700 }, { "epoch": 1.22, "learning_rate": 3.513740332043549e-05, "loss": 1.5561, "step": 283800 }, { "epoch": 1.22, "learning_rate": 3.5131974933882255e-05, "loss": 1.5315, "step": 283900 }, { "epoch": 1.22, "learning_rate": 3.5126546547329023e-05, "loss": 1.5445, "step": 284000 }, { "epoch": 1.22, "learning_rate": 3.5121118160775785e-05, "loss": 1.5352, "step": 284100 }, { "epoch": 1.22, "learning_rate": 3.511568977422255e-05, "loss": 1.5458, "step": 284200 }, { "epoch": 1.22, "learning_rate": 3.5110261387669316e-05, "loss": 1.5889, "step": 284300 }, { "epoch": 1.22, "learning_rate": 3.510483300111608e-05, "loss": 1.5682, "step": 284400 }, { "epoch": 1.22, "learning_rate": 3.509940461456284e-05, "loss": 1.5332, "step": 284500 }, { "epoch": 1.22, "learning_rate": 3.509397622800961e-05, "loss": 1.5871, "step": 284600 }, { "epoch": 1.22, "learning_rate": 3.508854784145637e-05, "loss": 1.5586, "step": 284700 }, { "epoch": 1.22, "learning_rate": 3.508311945490313e-05, "loss": 1.5607, "step": 284800 }, { "epoch": 1.22, "learning_rate": 3.507769106834991e-05, "loss": 1.576, "step": 284900 }, { "epoch": 1.22, "learning_rate": 3.507226268179667e-05, "loss": 1.5491, "step": 285000 }, { "epoch": 1.22, "eval_loss": 1.4562804698944092, "eval_runtime": 17.8003, "eval_samples_per_second": 561.788, "eval_steps_per_second": 17.584, "step": 285000 }, { "epoch": 1.22, "learning_rate": 3.506683429524343e-05, "loss": 1.5509, "step": 285100 }, { "epoch": 1.23, "learning_rate": 3.50614059086902e-05, "loss": 1.5571, "step": 285200 }, { "epoch": 1.23, "learning_rate": 3.505597752213696e-05, "loss": 1.541, "step": 285300 }, { "epoch": 1.23, "learning_rate": 3.5050549135583724e-05, "loss": 1.5445, "step": 285400 }, { "epoch": 1.23, "learning_rate": 3.504512074903049e-05, "loss": 1.5346, "step": 285500 }, { "epoch": 1.23, "learning_rate": 3.5039692362477254e-05, "loss": 1.5462, "step": 285600 }, { "epoch": 1.23, "learning_rate": 3.503426397592402e-05, "loss": 1.5539, "step": 285700 }, { "epoch": 1.23, "learning_rate": 3.5028835589370785e-05, "loss": 1.5642, "step": 285800 }, { "epoch": 1.23, "learning_rate": 3.502340720281755e-05, "loss": 1.56, "step": 285900 }, { "epoch": 1.23, "learning_rate": 3.5017978816264315e-05, "loss": 1.5673, "step": 286000 }, { "epoch": 1.23, "learning_rate": 3.5012550429711084e-05, "loss": 1.5509, "step": 286100 }, { "epoch": 1.23, "learning_rate": 3.5007122043157846e-05, "loss": 1.5552, "step": 286200 }, { "epoch": 1.23, "learning_rate": 3.5001693656604614e-05, "loss": 1.5524, "step": 286300 }, { "epoch": 1.23, "learning_rate": 3.4996265270051376e-05, "loss": 1.5624, "step": 286400 }, { "epoch": 1.23, "learning_rate": 3.499083688349814e-05, "loss": 1.5801, "step": 286500 }, { "epoch": 1.23, "learning_rate": 3.498540849694491e-05, "loss": 1.5274, "step": 286600 }, { "epoch": 1.23, "learning_rate": 3.497998011039167e-05, "loss": 1.5369, "step": 286700 }, { "epoch": 1.23, "learning_rate": 3.497455172383843e-05, "loss": 1.5509, "step": 286800 }, { "epoch": 1.23, "learning_rate": 3.49691233372852e-05, "loss": 1.5426, "step": 286900 }, { "epoch": 1.23, "learning_rate": 3.496369495073197e-05, "loss": 1.5654, "step": 287000 }, { "epoch": 1.23, "learning_rate": 3.495826656417873e-05, "loss": 1.55, "step": 287100 }, { "epoch": 1.23, "learning_rate": 3.49528381776255e-05, "loss": 1.5626, "step": 287200 }, { "epoch": 1.23, "learning_rate": 3.494740979107226e-05, "loss": 1.5696, "step": 287300 }, { "epoch": 1.23, "learning_rate": 3.494198140451902e-05, "loss": 1.5594, "step": 287400 }, { "epoch": 1.24, "learning_rate": 3.493655301796579e-05, "loss": 1.5418, "step": 287500 }, { "epoch": 1.24, "learning_rate": 3.493112463141255e-05, "loss": 1.5708, "step": 287600 }, { "epoch": 1.24, "learning_rate": 3.492569624485932e-05, "loss": 1.57, "step": 287700 }, { "epoch": 1.24, "learning_rate": 3.492026785830608e-05, "loss": 1.5632, "step": 287800 }, { "epoch": 1.24, "learning_rate": 3.4914839471752845e-05, "loss": 1.5588, "step": 287900 }, { "epoch": 1.24, "learning_rate": 3.4909411085199614e-05, "loss": 1.5361, "step": 288000 }, { "epoch": 1.24, "learning_rate": 3.490398269864638e-05, "loss": 1.5445, "step": 288100 }, { "epoch": 1.24, "learning_rate": 3.4898554312093144e-05, "loss": 1.559, "step": 288200 }, { "epoch": 1.24, "learning_rate": 3.489312592553991e-05, "loss": 1.5863, "step": 288300 }, { "epoch": 1.24, "learning_rate": 3.4887697538986675e-05, "loss": 1.5221, "step": 288400 }, { "epoch": 1.24, "learning_rate": 3.4882269152433437e-05, "loss": 1.5596, "step": 288500 }, { "epoch": 1.24, "learning_rate": 3.4876840765880205e-05, "loss": 1.5405, "step": 288600 }, { "epoch": 1.24, "learning_rate": 3.487141237932697e-05, "loss": 1.5409, "step": 288700 }, { "epoch": 1.24, "learning_rate": 3.486598399277373e-05, "loss": 1.5524, "step": 288800 }, { "epoch": 1.24, "learning_rate": 3.48605556062205e-05, "loss": 1.5533, "step": 288900 }, { "epoch": 1.24, "learning_rate": 3.485512721966726e-05, "loss": 1.5379, "step": 289000 }, { "epoch": 1.24, "learning_rate": 3.484969883311403e-05, "loss": 1.5547, "step": 289100 }, { "epoch": 1.24, "learning_rate": 3.48442704465608e-05, "loss": 1.5598, "step": 289200 }, { "epoch": 1.24, "learning_rate": 3.483884206000756e-05, "loss": 1.5472, "step": 289300 }, { "epoch": 1.24, "learning_rate": 3.483341367345432e-05, "loss": 1.5289, "step": 289400 }, { "epoch": 1.24, "learning_rate": 3.482798528690109e-05, "loss": 1.543, "step": 289500 }, { "epoch": 1.24, "learning_rate": 3.482255690034785e-05, "loss": 1.5766, "step": 289600 }, { "epoch": 1.24, "learning_rate": 3.481712851379462e-05, "loss": 1.5635, "step": 289700 }, { "epoch": 1.25, "learning_rate": 3.481170012724138e-05, "loss": 1.5564, "step": 289800 }, { "epoch": 1.25, "learning_rate": 3.4806271740688143e-05, "loss": 1.5679, "step": 289900 }, { "epoch": 1.25, "learning_rate": 3.480084335413491e-05, "loss": 1.5554, "step": 290000 }, { "epoch": 1.25, "eval_loss": 1.4566130638122559, "eval_runtime": 17.7457, "eval_samples_per_second": 563.517, "eval_steps_per_second": 17.638, "step": 290000 }, { "epoch": 1.25, "learning_rate": 3.479541496758168e-05, "loss": 1.5559, "step": 290100 }, { "epoch": 1.25, "learning_rate": 3.478998658102844e-05, "loss": 1.5364, "step": 290200 }, { "epoch": 1.25, "learning_rate": 3.478455819447521e-05, "loss": 1.5495, "step": 290300 }, { "epoch": 1.25, "learning_rate": 3.477912980792197e-05, "loss": 1.5425, "step": 290400 }, { "epoch": 1.25, "learning_rate": 3.4773701421368735e-05, "loss": 1.5416, "step": 290500 }, { "epoch": 1.25, "learning_rate": 3.4768273034815504e-05, "loss": 1.5573, "step": 290600 }, { "epoch": 1.25, "learning_rate": 3.4762844648262266e-05, "loss": 1.5505, "step": 290700 }, { "epoch": 1.25, "learning_rate": 3.475741626170903e-05, "loss": 1.5827, "step": 290800 }, { "epoch": 1.25, "learning_rate": 3.4751987875155796e-05, "loss": 1.5488, "step": 290900 }, { "epoch": 1.25, "learning_rate": 3.474655948860256e-05, "loss": 1.5206, "step": 291000 }, { "epoch": 1.25, "learning_rate": 3.474113110204933e-05, "loss": 1.562, "step": 291100 }, { "epoch": 1.25, "learning_rate": 3.4735702715496095e-05, "loss": 1.552, "step": 291200 }, { "epoch": 1.25, "learning_rate": 3.473027432894286e-05, "loss": 1.5442, "step": 291300 }, { "epoch": 1.25, "learning_rate": 3.472484594238962e-05, "loss": 1.5581, "step": 291400 }, { "epoch": 1.25, "learning_rate": 3.471941755583639e-05, "loss": 1.5588, "step": 291500 }, { "epoch": 1.25, "learning_rate": 3.471398916928315e-05, "loss": 1.5738, "step": 291600 }, { "epoch": 1.25, "learning_rate": 3.470856078272992e-05, "loss": 1.5371, "step": 291700 }, { "epoch": 1.25, "learning_rate": 3.470313239617668e-05, "loss": 1.5712, "step": 291800 }, { "epoch": 1.25, "learning_rate": 3.469770400962344e-05, "loss": 1.5889, "step": 291900 }, { "epoch": 1.25, "learning_rate": 3.469227562307021e-05, "loss": 1.5397, "step": 292000 }, { "epoch": 1.25, "learning_rate": 3.468684723651697e-05, "loss": 1.5654, "step": 292100 }, { "epoch": 1.26, "learning_rate": 3.468141884996374e-05, "loss": 1.5338, "step": 292200 }, { "epoch": 1.26, "learning_rate": 3.467599046341051e-05, "loss": 1.5349, "step": 292300 }, { "epoch": 1.26, "learning_rate": 3.467056207685727e-05, "loss": 1.553, "step": 292400 }, { "epoch": 1.26, "learning_rate": 3.4665133690304034e-05, "loss": 1.5506, "step": 292500 }, { "epoch": 1.26, "learning_rate": 3.46597053037508e-05, "loss": 1.5682, "step": 292600 }, { "epoch": 1.26, "learning_rate": 3.4654276917197564e-05, "loss": 1.5506, "step": 292700 }, { "epoch": 1.26, "learning_rate": 3.4648848530644326e-05, "loss": 1.549, "step": 292800 }, { "epoch": 1.26, "learning_rate": 3.4643420144091095e-05, "loss": 1.5393, "step": 292900 }, { "epoch": 1.26, "learning_rate": 3.4637991757537856e-05, "loss": 1.5481, "step": 293000 }, { "epoch": 1.26, "learning_rate": 3.4632563370984625e-05, "loss": 1.5265, "step": 293100 }, { "epoch": 1.26, "learning_rate": 3.462713498443139e-05, "loss": 1.5614, "step": 293200 }, { "epoch": 1.26, "learning_rate": 3.4621706597878156e-05, "loss": 1.5649, "step": 293300 }, { "epoch": 1.26, "learning_rate": 3.461627821132492e-05, "loss": 1.5426, "step": 293400 }, { "epoch": 1.26, "learning_rate": 3.4610849824771686e-05, "loss": 1.5627, "step": 293500 }, { "epoch": 1.26, "learning_rate": 3.460542143821845e-05, "loss": 1.5791, "step": 293600 }, { "epoch": 1.26, "learning_rate": 3.459999305166522e-05, "loss": 1.5435, "step": 293700 }, { "epoch": 1.26, "learning_rate": 3.459456466511198e-05, "loss": 1.5421, "step": 293800 }, { "epoch": 1.26, "learning_rate": 3.458913627855874e-05, "loss": 1.5323, "step": 293900 }, { "epoch": 1.26, "learning_rate": 3.458370789200551e-05, "loss": 1.5384, "step": 294000 }, { "epoch": 1.26, "learning_rate": 3.457827950545227e-05, "loss": 1.5803, "step": 294100 }, { "epoch": 1.26, "learning_rate": 3.457285111889903e-05, "loss": 1.5698, "step": 294200 }, { "epoch": 1.26, "learning_rate": 3.456742273234581e-05, "loss": 1.5649, "step": 294300 }, { "epoch": 1.26, "learning_rate": 3.456199434579257e-05, "loss": 1.587, "step": 294400 }, { "epoch": 1.27, "learning_rate": 3.455656595923933e-05, "loss": 1.5739, "step": 294500 }, { "epoch": 1.27, "learning_rate": 3.45511375726861e-05, "loss": 1.5239, "step": 294600 }, { "epoch": 1.27, "learning_rate": 3.454570918613286e-05, "loss": 1.5361, "step": 294700 }, { "epoch": 1.27, "learning_rate": 3.4540280799579624e-05, "loss": 1.5392, "step": 294800 }, { "epoch": 1.27, "learning_rate": 3.453485241302639e-05, "loss": 1.541, "step": 294900 }, { "epoch": 1.27, "learning_rate": 3.4529424026473155e-05, "loss": 1.5379, "step": 295000 }, { "epoch": 1.27, "eval_loss": 1.4548817873001099, "eval_runtime": 17.8195, "eval_samples_per_second": 561.182, "eval_steps_per_second": 17.565, "step": 295000 }, { "epoch": 1.27, "learning_rate": 3.4523995639919924e-05, "loss": 1.5534, "step": 295100 }, { "epoch": 1.27, "learning_rate": 3.4518567253366685e-05, "loss": 1.5415, "step": 295200 }, { "epoch": 1.27, "learning_rate": 3.451313886681345e-05, "loss": 1.5556, "step": 295300 }, { "epoch": 1.27, "learning_rate": 3.4507710480260216e-05, "loss": 1.5313, "step": 295400 }, { "epoch": 1.27, "learning_rate": 3.4502282093706985e-05, "loss": 1.5754, "step": 295500 }, { "epoch": 1.27, "learning_rate": 3.4496853707153747e-05, "loss": 1.5621, "step": 295600 }, { "epoch": 1.27, "learning_rate": 3.4491425320600515e-05, "loss": 1.5538, "step": 295700 }, { "epoch": 1.27, "learning_rate": 3.448599693404728e-05, "loss": 1.5616, "step": 295800 }, { "epoch": 1.27, "learning_rate": 3.448056854749404e-05, "loss": 1.554, "step": 295900 }, { "epoch": 1.27, "learning_rate": 3.447514016094081e-05, "loss": 1.5214, "step": 296000 }, { "epoch": 1.27, "learning_rate": 3.446971177438757e-05, "loss": 1.5608, "step": 296100 }, { "epoch": 1.27, "learning_rate": 3.446428338783433e-05, "loss": 1.5388, "step": 296200 }, { "epoch": 1.27, "learning_rate": 3.44588550012811e-05, "loss": 1.5748, "step": 296300 }, { "epoch": 1.27, "learning_rate": 3.445342661472787e-05, "loss": 1.5302, "step": 296400 }, { "epoch": 1.27, "learning_rate": 3.444799822817463e-05, "loss": 1.57, "step": 296500 }, { "epoch": 1.27, "learning_rate": 3.44425698416214e-05, "loss": 1.5821, "step": 296600 }, { "epoch": 1.27, "learning_rate": 3.443714145506816e-05, "loss": 1.5377, "step": 296700 }, { "epoch": 1.28, "learning_rate": 3.443171306851492e-05, "loss": 1.5542, "step": 296800 }, { "epoch": 1.28, "learning_rate": 3.442628468196169e-05, "loss": 1.574, "step": 296900 }, { "epoch": 1.28, "learning_rate": 3.4420856295408453e-05, "loss": 1.5629, "step": 297000 }, { "epoch": 1.28, "learning_rate": 3.441542790885522e-05, "loss": 1.4973, "step": 297100 }, { "epoch": 1.28, "learning_rate": 3.4409999522301984e-05, "loss": 1.5659, "step": 297200 }, { "epoch": 1.28, "learning_rate": 3.4404571135748746e-05, "loss": 1.5541, "step": 297300 }, { "epoch": 1.28, "learning_rate": 3.4399142749195515e-05, "loss": 1.553, "step": 297400 }, { "epoch": 1.28, "learning_rate": 3.439371436264228e-05, "loss": 1.5844, "step": 297500 }, { "epoch": 1.28, "learning_rate": 3.4388285976089045e-05, "loss": 1.5459, "step": 297600 }, { "epoch": 1.28, "learning_rate": 3.4382857589535814e-05, "loss": 1.5314, "step": 297700 }, { "epoch": 1.28, "learning_rate": 3.4377429202982576e-05, "loss": 1.5155, "step": 297800 }, { "epoch": 1.28, "learning_rate": 3.437200081642934e-05, "loss": 1.5474, "step": 297900 }, { "epoch": 1.28, "learning_rate": 3.4366572429876106e-05, "loss": 1.539, "step": 298000 }, { "epoch": 1.28, "learning_rate": 3.436114404332287e-05, "loss": 1.5794, "step": 298100 }, { "epoch": 1.28, "learning_rate": 3.435571565676963e-05, "loss": 1.5276, "step": 298200 }, { "epoch": 1.28, "learning_rate": 3.43502872702164e-05, "loss": 1.5574, "step": 298300 }, { "epoch": 1.28, "learning_rate": 3.434485888366316e-05, "loss": 1.5615, "step": 298400 }, { "epoch": 1.28, "learning_rate": 3.433943049710993e-05, "loss": 1.5511, "step": 298500 }, { "epoch": 1.28, "learning_rate": 3.43340021105567e-05, "loss": 1.5437, "step": 298600 }, { "epoch": 1.28, "learning_rate": 3.432857372400346e-05, "loss": 1.5773, "step": 298700 }, { "epoch": 1.28, "learning_rate": 3.432314533745022e-05, "loss": 1.5682, "step": 298800 }, { "epoch": 1.28, "learning_rate": 3.431771695089699e-05, "loss": 1.5283, "step": 298900 }, { "epoch": 1.28, "learning_rate": 3.431228856434375e-05, "loss": 1.5506, "step": 299000 }, { "epoch": 1.28, "learning_rate": 3.430686017779052e-05, "loss": 1.5324, "step": 299100 }, { "epoch": 1.29, "learning_rate": 3.430143179123728e-05, "loss": 1.5774, "step": 299200 }, { "epoch": 1.29, "learning_rate": 3.4296003404684044e-05, "loss": 1.5498, "step": 299300 }, { "epoch": 1.29, "learning_rate": 3.429057501813081e-05, "loss": 1.5267, "step": 299400 }, { "epoch": 1.29, "learning_rate": 3.4285146631577575e-05, "loss": 1.5507, "step": 299500 }, { "epoch": 1.29, "learning_rate": 3.4279718245024344e-05, "loss": 1.5379, "step": 299600 }, { "epoch": 1.29, "learning_rate": 3.427428985847111e-05, "loss": 1.5369, "step": 299700 }, { "epoch": 1.29, "learning_rate": 3.4268861471917874e-05, "loss": 1.552, "step": 299800 }, { "epoch": 1.29, "learning_rate": 3.4263433085364636e-05, "loss": 1.5454, "step": 299900 }, { "epoch": 1.29, "learning_rate": 3.4258004698811405e-05, "loss": 1.5883, "step": 300000 }, { "epoch": 1.29, "eval_loss": 1.4549944400787354, "eval_runtime": 17.7682, "eval_samples_per_second": 562.803, "eval_steps_per_second": 17.616, "step": 300000 }, { "epoch": 1.29, "learning_rate": 3.4252576312258166e-05, "loss": 1.5578, "step": 300100 }, { "epoch": 1.29, "learning_rate": 3.424714792570493e-05, "loss": 1.5529, "step": 300200 }, { "epoch": 1.29, "learning_rate": 3.42417195391517e-05, "loss": 1.555, "step": 300300 }, { "epoch": 1.29, "learning_rate": 3.423629115259846e-05, "loss": 1.5347, "step": 300400 }, { "epoch": 1.29, "learning_rate": 3.423086276604523e-05, "loss": 1.5489, "step": 300500 }, { "epoch": 1.29, "learning_rate": 3.4225434379491996e-05, "loss": 1.5477, "step": 300600 }, { "epoch": 1.29, "learning_rate": 3.422000599293876e-05, "loss": 1.576, "step": 300700 }, { "epoch": 1.29, "learning_rate": 3.421457760638552e-05, "loss": 1.5734, "step": 300800 }, { "epoch": 1.29, "learning_rate": 3.420914921983229e-05, "loss": 1.5729, "step": 300900 }, { "epoch": 1.29, "learning_rate": 3.420372083327905e-05, "loss": 1.5648, "step": 301000 }, { "epoch": 1.29, "learning_rate": 3.419829244672582e-05, "loss": 1.5804, "step": 301100 }, { "epoch": 1.29, "learning_rate": 3.419286406017258e-05, "loss": 1.554, "step": 301200 }, { "epoch": 1.29, "learning_rate": 3.418743567361934e-05, "loss": 1.543, "step": 301300 }, { "epoch": 1.29, "learning_rate": 3.418200728706611e-05, "loss": 1.5486, "step": 301400 }, { "epoch": 1.3, "learning_rate": 3.417657890051287e-05, "loss": 1.5243, "step": 301500 }, { "epoch": 1.3, "learning_rate": 3.4171150513959635e-05, "loss": 1.5519, "step": 301600 }, { "epoch": 1.3, "learning_rate": 3.416572212740641e-05, "loss": 1.5429, "step": 301700 }, { "epoch": 1.3, "learning_rate": 3.416029374085317e-05, "loss": 1.5501, "step": 301800 }, { "epoch": 1.3, "learning_rate": 3.4154865354299934e-05, "loss": 1.5872, "step": 301900 }, { "epoch": 1.3, "learning_rate": 3.41494369677467e-05, "loss": 1.5625, "step": 302000 }, { "epoch": 1.3, "learning_rate": 3.4144008581193465e-05, "loss": 1.5918, "step": 302100 }, { "epoch": 1.3, "learning_rate": 3.413858019464023e-05, "loss": 1.5764, "step": 302200 }, { "epoch": 1.3, "learning_rate": 3.4133151808086995e-05, "loss": 1.5277, "step": 302300 }, { "epoch": 1.3, "learning_rate": 3.412772342153376e-05, "loss": 1.5534, "step": 302400 }, { "epoch": 1.3, "learning_rate": 3.4122295034980526e-05, "loss": 1.562, "step": 302500 }, { "epoch": 1.3, "learning_rate": 3.411686664842729e-05, "loss": 1.5329, "step": 302600 }, { "epoch": 1.3, "learning_rate": 3.4111438261874057e-05, "loss": 1.575, "step": 302700 }, { "epoch": 1.3, "learning_rate": 3.410600987532082e-05, "loss": 1.5411, "step": 302800 }, { "epoch": 1.3, "learning_rate": 3.410058148876759e-05, "loss": 1.5783, "step": 302900 }, { "epoch": 1.3, "learning_rate": 3.409515310221435e-05, "loss": 1.5569, "step": 303000 }, { "epoch": 1.3, "learning_rate": 3.408972471566112e-05, "loss": 1.5529, "step": 303100 }, { "epoch": 1.3, "learning_rate": 3.408429632910788e-05, "loss": 1.5409, "step": 303200 }, { "epoch": 1.3, "learning_rate": 3.407886794255464e-05, "loss": 1.53, "step": 303300 }, { "epoch": 1.3, "learning_rate": 3.407343955600141e-05, "loss": 1.5868, "step": 303400 }, { "epoch": 1.3, "learning_rate": 3.406801116944817e-05, "loss": 1.5303, "step": 303500 }, { "epoch": 1.3, "learning_rate": 3.4062582782894934e-05, "loss": 1.5679, "step": 303600 }, { "epoch": 1.3, "learning_rate": 3.405715439634171e-05, "loss": 1.5392, "step": 303700 }, { "epoch": 1.31, "learning_rate": 3.405172600978847e-05, "loss": 1.5518, "step": 303800 }, { "epoch": 1.31, "learning_rate": 3.404629762323523e-05, "loss": 1.5485, "step": 303900 }, { "epoch": 1.31, "learning_rate": 3.4040869236682e-05, "loss": 1.5709, "step": 304000 }, { "epoch": 1.31, "learning_rate": 3.4035440850128763e-05, "loss": 1.558, "step": 304100 }, { "epoch": 1.31, "learning_rate": 3.4030012463575525e-05, "loss": 1.5485, "step": 304200 }, { "epoch": 1.31, "learning_rate": 3.4024584077022294e-05, "loss": 1.5486, "step": 304300 }, { "epoch": 1.31, "learning_rate": 3.4019155690469056e-05, "loss": 1.5748, "step": 304400 }, { "epoch": 1.31, "learning_rate": 3.401372730391582e-05, "loss": 1.5405, "step": 304500 }, { "epoch": 1.31, "learning_rate": 3.4008298917362586e-05, "loss": 1.5368, "step": 304600 }, { "epoch": 1.31, "learning_rate": 3.400287053080935e-05, "loss": 1.5462, "step": 304700 }, { "epoch": 1.31, "learning_rate": 3.399744214425612e-05, "loss": 1.5653, "step": 304800 }, { "epoch": 1.31, "learning_rate": 3.3992013757702886e-05, "loss": 1.5397, "step": 304900 }, { "epoch": 1.31, "learning_rate": 3.398658537114965e-05, "loss": 1.5411, "step": 305000 }, { "epoch": 1.31, "eval_loss": 1.4525820016860962, "eval_runtime": 17.7928, "eval_samples_per_second": 562.025, "eval_steps_per_second": 17.591, "step": 305000 }, { "epoch": 1.31, "learning_rate": 3.3981156984596416e-05, "loss": 1.5321, "step": 305100 }, { "epoch": 1.31, "learning_rate": 3.397572859804318e-05, "loss": 1.5782, "step": 305200 }, { "epoch": 1.31, "learning_rate": 3.397030021148994e-05, "loss": 1.5761, "step": 305300 }, { "epoch": 1.31, "learning_rate": 3.396487182493671e-05, "loss": 1.5259, "step": 305400 }, { "epoch": 1.31, "learning_rate": 3.395944343838347e-05, "loss": 1.5641, "step": 305500 }, { "epoch": 1.31, "learning_rate": 3.395401505183023e-05, "loss": 1.5323, "step": 305600 }, { "epoch": 1.31, "learning_rate": 3.3948586665277e-05, "loss": 1.555, "step": 305700 }, { "epoch": 1.31, "learning_rate": 3.394315827872377e-05, "loss": 1.5812, "step": 305800 }, { "epoch": 1.31, "learning_rate": 3.393772989217053e-05, "loss": 1.5379, "step": 305900 }, { "epoch": 1.31, "learning_rate": 3.39323015056173e-05, "loss": 1.5659, "step": 306000 }, { "epoch": 1.32, "learning_rate": 3.392687311906406e-05, "loss": 1.5332, "step": 306100 }, { "epoch": 1.32, "learning_rate": 3.3921444732510824e-05, "loss": 1.5472, "step": 306200 }, { "epoch": 1.32, "learning_rate": 3.391601634595759e-05, "loss": 1.5734, "step": 306300 }, { "epoch": 1.32, "learning_rate": 3.3910587959404354e-05, "loss": 1.5283, "step": 306400 }, { "epoch": 1.32, "learning_rate": 3.3905159572851116e-05, "loss": 1.5499, "step": 306500 }, { "epoch": 1.32, "learning_rate": 3.3899731186297885e-05, "loss": 1.5867, "step": 306600 }, { "epoch": 1.32, "learning_rate": 3.389430279974465e-05, "loss": 1.5538, "step": 306700 }, { "epoch": 1.32, "learning_rate": 3.3888874413191415e-05, "loss": 1.5579, "step": 306800 }, { "epoch": 1.32, "learning_rate": 3.3883446026638184e-05, "loss": 1.5155, "step": 306900 }, { "epoch": 1.32, "learning_rate": 3.3878017640084946e-05, "loss": 1.5217, "step": 307000 }, { "epoch": 1.32, "learning_rate": 3.3872589253531715e-05, "loss": 1.555, "step": 307100 }, { "epoch": 1.32, "learning_rate": 3.3867160866978476e-05, "loss": 1.5743, "step": 307200 }, { "epoch": 1.32, "learning_rate": 3.386173248042524e-05, "loss": 1.5423, "step": 307300 }, { "epoch": 1.32, "learning_rate": 3.385630409387201e-05, "loss": 1.54, "step": 307400 }, { "epoch": 1.32, "learning_rate": 3.385087570731877e-05, "loss": 1.5613, "step": 307500 }, { "epoch": 1.32, "learning_rate": 3.384544732076553e-05, "loss": 1.572, "step": 307600 }, { "epoch": 1.32, "learning_rate": 3.38400189342123e-05, "loss": 1.5829, "step": 307700 }, { "epoch": 1.32, "learning_rate": 3.383459054765906e-05, "loss": 1.5477, "step": 307800 }, { "epoch": 1.32, "learning_rate": 3.382916216110583e-05, "loss": 1.5112, "step": 307900 }, { "epoch": 1.32, "learning_rate": 3.38237337745526e-05, "loss": 1.5316, "step": 308000 }, { "epoch": 1.32, "learning_rate": 3.381830538799936e-05, "loss": 1.5668, "step": 308100 }, { "epoch": 1.32, "learning_rate": 3.381287700144612e-05, "loss": 1.5425, "step": 308200 }, { "epoch": 1.32, "learning_rate": 3.380744861489289e-05, "loss": 1.5763, "step": 308300 }, { "epoch": 1.32, "learning_rate": 3.380202022833965e-05, "loss": 1.5412, "step": 308400 }, { "epoch": 1.33, "learning_rate": 3.3796591841786415e-05, "loss": 1.5534, "step": 308500 }, { "epoch": 1.33, "learning_rate": 3.379116345523318e-05, "loss": 1.548, "step": 308600 }, { "epoch": 1.33, "learning_rate": 3.3785735068679945e-05, "loss": 1.5331, "step": 308700 }, { "epoch": 1.33, "learning_rate": 3.3780306682126714e-05, "loss": 1.555, "step": 308800 }, { "epoch": 1.33, "learning_rate": 3.3774878295573476e-05, "loss": 1.5385, "step": 308900 }, { "epoch": 1.33, "learning_rate": 3.3769449909020244e-05, "loss": 1.5703, "step": 309000 }, { "epoch": 1.33, "learning_rate": 3.376402152246701e-05, "loss": 1.5898, "step": 309100 }, { "epoch": 1.33, "learning_rate": 3.3758593135913775e-05, "loss": 1.5576, "step": 309200 }, { "epoch": 1.33, "learning_rate": 3.375316474936054e-05, "loss": 1.5749, "step": 309300 }, { "epoch": 1.33, "learning_rate": 3.3747736362807305e-05, "loss": 1.5374, "step": 309400 }, { "epoch": 1.33, "learning_rate": 3.374230797625407e-05, "loss": 1.5718, "step": 309500 }, { "epoch": 1.33, "learning_rate": 3.373687958970083e-05, "loss": 1.5414, "step": 309600 }, { "epoch": 1.33, "learning_rate": 3.37314512031476e-05, "loss": 1.5578, "step": 309700 }, { "epoch": 1.33, "learning_rate": 3.372602281659436e-05, "loss": 1.558, "step": 309800 }, { "epoch": 1.33, "learning_rate": 3.372059443004112e-05, "loss": 1.5667, "step": 309900 }, { "epoch": 1.33, "learning_rate": 3.37151660434879e-05, "loss": 1.5619, "step": 310000 }, { "epoch": 1.33, "eval_loss": 1.4491908550262451, "eval_runtime": 17.778, "eval_samples_per_second": 562.492, "eval_steps_per_second": 17.606, "step": 310000 }, { "epoch": 1.33, "learning_rate": 3.370973765693466e-05, "loss": 1.5363, "step": 310100 }, { "epoch": 1.33, "learning_rate": 3.370430927038142e-05, "loss": 1.5495, "step": 310200 }, { "epoch": 1.33, "learning_rate": 3.369888088382819e-05, "loss": 1.5621, "step": 310300 }, { "epoch": 1.33, "learning_rate": 3.369345249727495e-05, "loss": 1.5361, "step": 310400 }, { "epoch": 1.33, "learning_rate": 3.368802411072171e-05, "loss": 1.534, "step": 310500 }, { "epoch": 1.33, "learning_rate": 3.368259572416848e-05, "loss": 1.5424, "step": 310600 }, { "epoch": 1.33, "learning_rate": 3.3677167337615244e-05, "loss": 1.5508, "step": 310700 }, { "epoch": 1.34, "learning_rate": 3.367173895106201e-05, "loss": 1.556, "step": 310800 }, { "epoch": 1.34, "learning_rate": 3.3666310564508774e-05, "loss": 1.5717, "step": 310900 }, { "epoch": 1.34, "learning_rate": 3.3660882177955536e-05, "loss": 1.538, "step": 311000 }, { "epoch": 1.34, "learning_rate": 3.365545379140231e-05, "loss": 1.5567, "step": 311100 }, { "epoch": 1.34, "learning_rate": 3.3650025404849073e-05, "loss": 1.5566, "step": 311200 }, { "epoch": 1.34, "learning_rate": 3.3644597018295835e-05, "loss": 1.5158, "step": 311300 }, { "epoch": 1.34, "learning_rate": 3.3639168631742604e-05, "loss": 1.5473, "step": 311400 }, { "epoch": 1.34, "learning_rate": 3.3633740245189366e-05, "loss": 1.5722, "step": 311500 }, { "epoch": 1.34, "learning_rate": 3.362831185863613e-05, "loss": 1.5427, "step": 311600 }, { "epoch": 1.34, "learning_rate": 3.3622883472082896e-05, "loss": 1.5306, "step": 311700 }, { "epoch": 1.34, "learning_rate": 3.361745508552966e-05, "loss": 1.5336, "step": 311800 }, { "epoch": 1.34, "learning_rate": 3.361202669897642e-05, "loss": 1.5604, "step": 311900 }, { "epoch": 1.34, "learning_rate": 3.360659831242319e-05, "loss": 1.5639, "step": 312000 }, { "epoch": 1.34, "learning_rate": 3.360116992586996e-05, "loss": 1.5306, "step": 312100 }, { "epoch": 1.34, "learning_rate": 3.359574153931672e-05, "loss": 1.5653, "step": 312200 }, { "epoch": 1.34, "learning_rate": 3.359031315276349e-05, "loss": 1.5584, "step": 312300 }, { "epoch": 1.34, "learning_rate": 3.358488476621025e-05, "loss": 1.5973, "step": 312400 }, { "epoch": 1.34, "learning_rate": 3.357945637965701e-05, "loss": 1.5377, "step": 312500 }, { "epoch": 1.34, "learning_rate": 3.357402799310378e-05, "loss": 1.5437, "step": 312600 }, { "epoch": 1.34, "learning_rate": 3.356859960655054e-05, "loss": 1.5638, "step": 312700 }, { "epoch": 1.34, "learning_rate": 3.356317121999731e-05, "loss": 1.5189, "step": 312800 }, { "epoch": 1.34, "learning_rate": 3.355774283344407e-05, "loss": 1.5334, "step": 312900 }, { "epoch": 1.34, "learning_rate": 3.3552314446890835e-05, "loss": 1.5373, "step": 313000 }, { "epoch": 1.35, "learning_rate": 3.35468860603376e-05, "loss": 1.5312, "step": 313100 }, { "epoch": 1.35, "learning_rate": 3.354145767378437e-05, "loss": 1.5472, "step": 313200 }, { "epoch": 1.35, "learning_rate": 3.3536029287231134e-05, "loss": 1.5665, "step": 313300 }, { "epoch": 1.35, "learning_rate": 3.35306009006779e-05, "loss": 1.5235, "step": 313400 }, { "epoch": 1.35, "learning_rate": 3.3525172514124664e-05, "loss": 1.5534, "step": 313500 }, { "epoch": 1.35, "learning_rate": 3.3519744127571426e-05, "loss": 1.5593, "step": 313600 }, { "epoch": 1.35, "learning_rate": 3.3514315741018195e-05, "loss": 1.5607, "step": 313700 }, { "epoch": 1.35, "learning_rate": 3.350888735446496e-05, "loss": 1.5367, "step": 313800 }, { "epoch": 1.35, "learning_rate": 3.350345896791172e-05, "loss": 1.5468, "step": 313900 }, { "epoch": 1.35, "learning_rate": 3.349803058135849e-05, "loss": 1.5385, "step": 314000 }, { "epoch": 1.35, "learning_rate": 3.349260219480525e-05, "loss": 1.59, "step": 314100 }, { "epoch": 1.35, "learning_rate": 3.348717380825202e-05, "loss": 1.5429, "step": 314200 }, { "epoch": 1.35, "learning_rate": 3.3481745421698786e-05, "loss": 1.5347, "step": 314300 }, { "epoch": 1.35, "learning_rate": 3.347631703514555e-05, "loss": 1.5435, "step": 314400 }, { "epoch": 1.35, "learning_rate": 3.347088864859231e-05, "loss": 1.5158, "step": 314500 }, { "epoch": 1.35, "learning_rate": 3.346546026203908e-05, "loss": 1.5348, "step": 314600 }, { "epoch": 1.35, "learning_rate": 3.346003187548584e-05, "loss": 1.5336, "step": 314700 }, { "epoch": 1.35, "learning_rate": 3.345460348893261e-05, "loss": 1.5486, "step": 314800 }, { "epoch": 1.35, "learning_rate": 3.344917510237937e-05, "loss": 1.5407, "step": 314900 }, { "epoch": 1.35, "learning_rate": 3.344374671582613e-05, "loss": 1.5393, "step": 315000 }, { "epoch": 1.35, "eval_loss": 1.4514410495758057, "eval_runtime": 17.8233, "eval_samples_per_second": 561.064, "eval_steps_per_second": 17.561, "step": 315000 }, { "epoch": 1.35, "learning_rate": 3.34383183292729e-05, "loss": 1.5579, "step": 315100 }, { "epoch": 1.35, "learning_rate": 3.3432889942719664e-05, "loss": 1.5455, "step": 315200 }, { "epoch": 1.35, "learning_rate": 3.342746155616643e-05, "loss": 1.514, "step": 315300 }, { "epoch": 1.35, "learning_rate": 3.34220331696132e-05, "loss": 1.5846, "step": 315400 }, { "epoch": 1.36, "learning_rate": 3.341660478305996e-05, "loss": 1.5471, "step": 315500 }, { "epoch": 1.36, "learning_rate": 3.3411176396506725e-05, "loss": 1.5392, "step": 315600 }, { "epoch": 1.36, "learning_rate": 3.340574800995349e-05, "loss": 1.5526, "step": 315700 }, { "epoch": 1.36, "learning_rate": 3.3400319623400255e-05, "loss": 1.5548, "step": 315800 }, { "epoch": 1.36, "learning_rate": 3.339489123684702e-05, "loss": 1.545, "step": 315900 }, { "epoch": 1.36, "learning_rate": 3.3389462850293786e-05, "loss": 1.5535, "step": 316000 }, { "epoch": 1.36, "learning_rate": 3.338403446374055e-05, "loss": 1.5481, "step": 316100 }, { "epoch": 1.36, "learning_rate": 3.3378606077187316e-05, "loss": 1.5492, "step": 316200 }, { "epoch": 1.36, "learning_rate": 3.3373177690634085e-05, "loss": 1.549, "step": 316300 }, { "epoch": 1.36, "learning_rate": 3.336774930408085e-05, "loss": 1.5482, "step": 316400 }, { "epoch": 1.36, "learning_rate": 3.336232091752761e-05, "loss": 1.5448, "step": 316500 }, { "epoch": 1.36, "learning_rate": 3.335689253097438e-05, "loss": 1.5604, "step": 316600 }, { "epoch": 1.36, "learning_rate": 3.335146414442114e-05, "loss": 1.5651, "step": 316700 }, { "epoch": 1.36, "learning_rate": 3.334603575786791e-05, "loss": 1.5387, "step": 316800 }, { "epoch": 1.36, "learning_rate": 3.334060737131467e-05, "loss": 1.5601, "step": 316900 }, { "epoch": 1.36, "learning_rate": 3.333517898476143e-05, "loss": 1.5667, "step": 317000 }, { "epoch": 1.36, "learning_rate": 3.33297505982082e-05, "loss": 1.5659, "step": 317100 }, { "epoch": 1.36, "learning_rate": 3.332432221165496e-05, "loss": 1.5477, "step": 317200 }, { "epoch": 1.36, "learning_rate": 3.3318893825101724e-05, "loss": 1.5303, "step": 317300 }, { "epoch": 1.36, "learning_rate": 3.33134654385485e-05, "loss": 1.5476, "step": 317400 }, { "epoch": 1.36, "learning_rate": 3.330803705199526e-05, "loss": 1.56, "step": 317500 }, { "epoch": 1.36, "learning_rate": 3.330260866544202e-05, "loss": 1.5144, "step": 317600 }, { "epoch": 1.36, "learning_rate": 3.329718027888879e-05, "loss": 1.5498, "step": 317700 }, { "epoch": 1.37, "learning_rate": 3.3291751892335554e-05, "loss": 1.5201, "step": 317800 }, { "epoch": 1.37, "learning_rate": 3.3286323505782316e-05, "loss": 1.5448, "step": 317900 }, { "epoch": 1.37, "learning_rate": 3.3280895119229084e-05, "loss": 1.5226, "step": 318000 }, { "epoch": 1.37, "learning_rate": 3.3275466732675846e-05, "loss": 1.5646, "step": 318100 }, { "epoch": 1.37, "learning_rate": 3.3270038346122615e-05, "loss": 1.5561, "step": 318200 }, { "epoch": 1.37, "learning_rate": 3.3264609959569377e-05, "loss": 1.5407, "step": 318300 }, { "epoch": 1.37, "learning_rate": 3.3259181573016145e-05, "loss": 1.5461, "step": 318400 }, { "epoch": 1.37, "learning_rate": 3.325375318646291e-05, "loss": 1.5291, "step": 318500 }, { "epoch": 1.37, "learning_rate": 3.3248324799909676e-05, "loss": 1.5518, "step": 318600 }, { "epoch": 1.37, "learning_rate": 3.324289641335644e-05, "loss": 1.5508, "step": 318700 }, { "epoch": 1.37, "learning_rate": 3.3237468026803206e-05, "loss": 1.5272, "step": 318800 }, { "epoch": 1.37, "learning_rate": 3.323203964024997e-05, "loss": 1.573, "step": 318900 }, { "epoch": 1.37, "learning_rate": 3.322661125369673e-05, "loss": 1.5179, "step": 319000 }, { "epoch": 1.37, "learning_rate": 3.32211828671435e-05, "loss": 1.5202, "step": 319100 }, { "epoch": 1.37, "learning_rate": 3.321575448059026e-05, "loss": 1.5343, "step": 319200 }, { "epoch": 1.37, "learning_rate": 3.321032609403702e-05, "loss": 1.5627, "step": 319300 }, { "epoch": 1.37, "learning_rate": 3.320489770748379e-05, "loss": 1.5392, "step": 319400 }, { "epoch": 1.37, "learning_rate": 3.319946932093056e-05, "loss": 1.5463, "step": 319500 }, { "epoch": 1.37, "learning_rate": 3.319404093437732e-05, "loss": 1.5394, "step": 319600 }, { "epoch": 1.37, "learning_rate": 3.318861254782409e-05, "loss": 1.5631, "step": 319700 }, { "epoch": 1.37, "learning_rate": 3.318318416127085e-05, "loss": 1.553, "step": 319800 }, { "epoch": 1.37, "learning_rate": 3.3177755774717614e-05, "loss": 1.5476, "step": 319900 }, { "epoch": 1.37, "learning_rate": 3.317232738816438e-05, "loss": 1.5625, "step": 320000 }, { "epoch": 1.37, "eval_loss": 1.449994683265686, "eval_runtime": 17.8575, "eval_samples_per_second": 559.99, "eval_steps_per_second": 17.528, "step": 320000 }, { "epoch": 1.38, "learning_rate": 3.3166899001611145e-05, "loss": 1.5589, "step": 320100 }, { "epoch": 1.38, "learning_rate": 3.316147061505791e-05, "loss": 1.5389, "step": 320200 }, { "epoch": 1.38, "learning_rate": 3.3156042228504675e-05, "loss": 1.5409, "step": 320300 }, { "epoch": 1.38, "learning_rate": 3.315061384195144e-05, "loss": 1.578, "step": 320400 }, { "epoch": 1.38, "learning_rate": 3.3145185455398206e-05, "loss": 1.5632, "step": 320500 }, { "epoch": 1.38, "learning_rate": 3.3139757068844974e-05, "loss": 1.5289, "step": 320600 }, { "epoch": 1.38, "learning_rate": 3.3134328682291736e-05, "loss": 1.5462, "step": 320700 }, { "epoch": 1.38, "learning_rate": 3.3128900295738505e-05, "loss": 1.516, "step": 320800 }, { "epoch": 1.38, "learning_rate": 3.312347190918527e-05, "loss": 1.5305, "step": 320900 }, { "epoch": 1.38, "learning_rate": 3.311804352263203e-05, "loss": 1.5529, "step": 321000 }, { "epoch": 1.38, "learning_rate": 3.31126151360788e-05, "loss": 1.5553, "step": 321100 }, { "epoch": 1.38, "learning_rate": 3.310718674952556e-05, "loss": 1.5441, "step": 321200 }, { "epoch": 1.38, "learning_rate": 3.310175836297232e-05, "loss": 1.5222, "step": 321300 }, { "epoch": 1.38, "learning_rate": 3.309632997641909e-05, "loss": 1.5558, "step": 321400 }, { "epoch": 1.38, "learning_rate": 3.309090158986585e-05, "loss": 1.5436, "step": 321500 }, { "epoch": 1.38, "learning_rate": 3.308547320331262e-05, "loss": 1.5389, "step": 321600 }, { "epoch": 1.38, "learning_rate": 3.308004481675939e-05, "loss": 1.5522, "step": 321700 }, { "epoch": 1.38, "learning_rate": 3.307461643020615e-05, "loss": 1.5622, "step": 321800 }, { "epoch": 1.38, "learning_rate": 3.306918804365291e-05, "loss": 1.565, "step": 321900 }, { "epoch": 1.38, "learning_rate": 3.306375965709968e-05, "loss": 1.5737, "step": 322000 }, { "epoch": 1.38, "learning_rate": 3.305833127054644e-05, "loss": 1.5529, "step": 322100 }, { "epoch": 1.38, "learning_rate": 3.305290288399321e-05, "loss": 1.5312, "step": 322200 }, { "epoch": 1.38, "learning_rate": 3.3047474497439974e-05, "loss": 1.547, "step": 322300 }, { "epoch": 1.39, "learning_rate": 3.3042046110886735e-05, "loss": 1.5254, "step": 322400 }, { "epoch": 1.39, "learning_rate": 3.3036617724333504e-05, "loss": 1.542, "step": 322500 }, { "epoch": 1.39, "learning_rate": 3.303118933778027e-05, "loss": 1.5424, "step": 322600 }, { "epoch": 1.39, "learning_rate": 3.3025760951227035e-05, "loss": 1.5567, "step": 322700 }, { "epoch": 1.39, "learning_rate": 3.30203325646738e-05, "loss": 1.5519, "step": 322800 }, { "epoch": 1.39, "learning_rate": 3.3014904178120565e-05, "loss": 1.5261, "step": 322900 }, { "epoch": 1.39, "learning_rate": 3.300947579156733e-05, "loss": 1.5377, "step": 323000 }, { "epoch": 1.39, "learning_rate": 3.3004047405014096e-05, "loss": 1.5222, "step": 323100 }, { "epoch": 1.39, "learning_rate": 3.299861901846086e-05, "loss": 1.5657, "step": 323200 }, { "epoch": 1.39, "learning_rate": 3.299319063190762e-05, "loss": 1.593, "step": 323300 }, { "epoch": 1.39, "learning_rate": 3.298776224535439e-05, "loss": 1.579, "step": 323400 }, { "epoch": 1.39, "learning_rate": 3.298233385880115e-05, "loss": 1.5289, "step": 323500 }, { "epoch": 1.39, "learning_rate": 3.297690547224792e-05, "loss": 1.5249, "step": 323600 }, { "epoch": 1.39, "learning_rate": 3.297147708569469e-05, "loss": 1.5064, "step": 323700 }, { "epoch": 1.39, "learning_rate": 3.296604869914145e-05, "loss": 1.563, "step": 323800 }, { "epoch": 1.39, "learning_rate": 3.296062031258821e-05, "loss": 1.5391, "step": 323900 }, { "epoch": 1.39, "learning_rate": 3.295519192603498e-05, "loss": 1.5499, "step": 324000 }, { "epoch": 1.39, "learning_rate": 3.294976353948174e-05, "loss": 1.533, "step": 324100 }, { "epoch": 1.39, "learning_rate": 3.294433515292851e-05, "loss": 1.5583, "step": 324200 }, { "epoch": 1.39, "learning_rate": 3.293890676637527e-05, "loss": 1.5807, "step": 324300 }, { "epoch": 1.39, "learning_rate": 3.2933478379822034e-05, "loss": 1.5936, "step": 324400 }, { "epoch": 1.39, "learning_rate": 3.29280499932688e-05, "loss": 1.5425, "step": 324500 }, { "epoch": 1.39, "learning_rate": 3.2922621606715564e-05, "loss": 1.5584, "step": 324600 }, { "epoch": 1.39, "learning_rate": 3.291719322016233e-05, "loss": 1.5303, "step": 324700 }, { "epoch": 1.4, "learning_rate": 3.29117648336091e-05, "loss": 1.513, "step": 324800 }, { "epoch": 1.4, "learning_rate": 3.2906336447055864e-05, "loss": 1.5649, "step": 324900 }, { "epoch": 1.4, "learning_rate": 3.2900908060502626e-05, "loss": 1.5681, "step": 325000 }, { "epoch": 1.4, "eval_loss": 1.451581597328186, "eval_runtime": 17.7955, "eval_samples_per_second": 561.941, "eval_steps_per_second": 17.589, "step": 325000 }, { "epoch": 1.4, "learning_rate": 3.2895479673949394e-05, "loss": 1.5595, "step": 325100 }, { "epoch": 1.4, "learning_rate": 3.2890051287396156e-05, "loss": 1.571, "step": 325200 }, { "epoch": 1.4, "learning_rate": 3.288462290084292e-05, "loss": 1.5265, "step": 325300 }, { "epoch": 1.4, "learning_rate": 3.2879194514289687e-05, "loss": 1.5329, "step": 325400 }, { "epoch": 1.4, "learning_rate": 3.287376612773645e-05, "loss": 1.5374, "step": 325500 }, { "epoch": 1.4, "learning_rate": 3.286833774118322e-05, "loss": 1.5731, "step": 325600 }, { "epoch": 1.4, "learning_rate": 3.2862909354629986e-05, "loss": 1.5147, "step": 325700 }, { "epoch": 1.4, "learning_rate": 3.285748096807675e-05, "loss": 1.5584, "step": 325800 }, { "epoch": 1.4, "learning_rate": 3.285205258152351e-05, "loss": 1.5324, "step": 325900 }, { "epoch": 1.4, "learning_rate": 3.284662419497028e-05, "loss": 1.5545, "step": 326000 }, { "epoch": 1.4, "learning_rate": 3.284119580841704e-05, "loss": 1.5175, "step": 326100 }, { "epoch": 1.4, "learning_rate": 3.283576742186381e-05, "loss": 1.5394, "step": 326200 }, { "epoch": 1.4, "learning_rate": 3.283033903531057e-05, "loss": 1.5742, "step": 326300 }, { "epoch": 1.4, "learning_rate": 3.282491064875733e-05, "loss": 1.5592, "step": 326400 }, { "epoch": 1.4, "learning_rate": 3.28194822622041e-05, "loss": 1.5361, "step": 326500 }, { "epoch": 1.4, "learning_rate": 3.281405387565086e-05, "loss": 1.5426, "step": 326600 }, { "epoch": 1.4, "learning_rate": 3.2808625489097625e-05, "loss": 1.5495, "step": 326700 }, { "epoch": 1.4, "learning_rate": 3.28031971025444e-05, "loss": 1.5557, "step": 326800 }, { "epoch": 1.4, "learning_rate": 3.279776871599116e-05, "loss": 1.5362, "step": 326900 }, { "epoch": 1.4, "learning_rate": 3.2792340329437924e-05, "loss": 1.5475, "step": 327000 }, { "epoch": 1.41, "learning_rate": 3.278691194288469e-05, "loss": 1.573, "step": 327100 }, { "epoch": 1.41, "learning_rate": 3.2781483556331455e-05, "loss": 1.5561, "step": 327200 }, { "epoch": 1.41, "learning_rate": 3.2776055169778216e-05, "loss": 1.563, "step": 327300 }, { "epoch": 1.41, "learning_rate": 3.2770626783224985e-05, "loss": 1.5532, "step": 327400 }, { "epoch": 1.41, "learning_rate": 3.276519839667175e-05, "loss": 1.5384, "step": 327500 }, { "epoch": 1.41, "learning_rate": 3.2759770010118516e-05, "loss": 1.5402, "step": 327600 }, { "epoch": 1.41, "learning_rate": 3.275434162356528e-05, "loss": 1.5416, "step": 327700 }, { "epoch": 1.41, "learning_rate": 3.2748913237012046e-05, "loss": 1.5601, "step": 327800 }, { "epoch": 1.41, "learning_rate": 3.274348485045881e-05, "loss": 1.5608, "step": 327900 }, { "epoch": 1.41, "learning_rate": 3.273805646390558e-05, "loss": 1.5469, "step": 328000 }, { "epoch": 1.41, "learning_rate": 3.273262807735234e-05, "loss": 1.5253, "step": 328100 }, { "epoch": 1.41, "learning_rate": 3.272719969079911e-05, "loss": 1.5558, "step": 328200 }, { "epoch": 1.41, "learning_rate": 3.272177130424587e-05, "loss": 1.5568, "step": 328300 }, { "epoch": 1.41, "learning_rate": 3.271634291769263e-05, "loss": 1.4968, "step": 328400 }, { "epoch": 1.41, "learning_rate": 3.27109145311394e-05, "loss": 1.5454, "step": 328500 }, { "epoch": 1.41, "learning_rate": 3.270548614458616e-05, "loss": 1.5199, "step": 328600 }, { "epoch": 1.41, "learning_rate": 3.270005775803292e-05, "loss": 1.5302, "step": 328700 }, { "epoch": 1.41, "learning_rate": 3.269462937147969e-05, "loss": 1.5254, "step": 328800 }, { "epoch": 1.41, "learning_rate": 3.268920098492646e-05, "loss": 1.5374, "step": 328900 }, { "epoch": 1.41, "learning_rate": 3.268377259837322e-05, "loss": 1.5471, "step": 329000 }, { "epoch": 1.41, "learning_rate": 3.267834421181999e-05, "loss": 1.5328, "step": 329100 }, { "epoch": 1.41, "learning_rate": 3.267291582526675e-05, "loss": 1.5556, "step": 329200 }, { "epoch": 1.41, "learning_rate": 3.2667487438713515e-05, "loss": 1.5498, "step": 329300 }, { "epoch": 1.42, "learning_rate": 3.2662059052160284e-05, "loss": 1.5521, "step": 329400 }, { "epoch": 1.42, "learning_rate": 3.2656630665607045e-05, "loss": 1.551, "step": 329500 }, { "epoch": 1.42, "learning_rate": 3.2651202279053814e-05, "loss": 1.5019, "step": 329600 }, { "epoch": 1.42, "learning_rate": 3.2645773892500576e-05, "loss": 1.5463, "step": 329700 }, { "epoch": 1.42, "learning_rate": 3.264034550594734e-05, "loss": 1.5388, "step": 329800 }, { "epoch": 1.42, "learning_rate": 3.2634917119394106e-05, "loss": 1.5319, "step": 329900 }, { "epoch": 1.42, "learning_rate": 3.2629488732840875e-05, "loss": 1.5459, "step": 330000 }, { "epoch": 1.42, "eval_loss": 1.4472662210464478, "eval_runtime": 17.7977, "eval_samples_per_second": 561.871, "eval_steps_per_second": 17.587, "step": 330000 }, { "epoch": 1.42, "learning_rate": 3.262406034628764e-05, "loss": 1.5692, "step": 330100 }, { "epoch": 1.42, "learning_rate": 3.2618631959734406e-05, "loss": 1.5344, "step": 330200 }, { "epoch": 1.42, "learning_rate": 3.261320357318117e-05, "loss": 1.5314, "step": 330300 }, { "epoch": 1.42, "learning_rate": 3.260777518662793e-05, "loss": 1.5389, "step": 330400 }, { "epoch": 1.42, "learning_rate": 3.26023468000747e-05, "loss": 1.5467, "step": 330500 }, { "epoch": 1.42, "learning_rate": 3.259691841352146e-05, "loss": 1.5338, "step": 330600 }, { "epoch": 1.42, "learning_rate": 3.259149002696822e-05, "loss": 1.5669, "step": 330700 }, { "epoch": 1.42, "learning_rate": 3.258606164041499e-05, "loss": 1.5344, "step": 330800 }, { "epoch": 1.42, "learning_rate": 3.258063325386175e-05, "loss": 1.5236, "step": 330900 }, { "epoch": 1.42, "learning_rate": 3.257520486730852e-05, "loss": 1.5438, "step": 331000 }, { "epoch": 1.42, "learning_rate": 3.256977648075529e-05, "loss": 1.5269, "step": 331100 }, { "epoch": 1.42, "learning_rate": 3.256434809420205e-05, "loss": 1.5401, "step": 331200 }, { "epoch": 1.42, "learning_rate": 3.255891970764881e-05, "loss": 1.5533, "step": 331300 }, { "epoch": 1.42, "learning_rate": 3.255349132109558e-05, "loss": 1.5514, "step": 331400 }, { "epoch": 1.42, "learning_rate": 3.2548062934542344e-05, "loss": 1.5562, "step": 331500 }, { "epoch": 1.42, "learning_rate": 3.254263454798911e-05, "loss": 1.5358, "step": 331600 }, { "epoch": 1.43, "learning_rate": 3.2537206161435874e-05, "loss": 1.5287, "step": 331700 }, { "epoch": 1.43, "learning_rate": 3.2531777774882636e-05, "loss": 1.5347, "step": 331800 }, { "epoch": 1.43, "learning_rate": 3.2526349388329405e-05, "loss": 1.5572, "step": 331900 }, { "epoch": 1.43, "learning_rate": 3.2520921001776174e-05, "loss": 1.5723, "step": 332000 }, { "epoch": 1.43, "learning_rate": 3.2515492615222935e-05, "loss": 1.5383, "step": 332100 }, { "epoch": 1.43, "learning_rate": 3.2510064228669704e-05, "loss": 1.5197, "step": 332200 }, { "epoch": 1.43, "learning_rate": 3.2504635842116466e-05, "loss": 1.5499, "step": 332300 }, { "epoch": 1.43, "learning_rate": 3.249920745556323e-05, "loss": 1.5379, "step": 332400 }, { "epoch": 1.43, "learning_rate": 3.2493779069009997e-05, "loss": 1.5418, "step": 332500 }, { "epoch": 1.43, "learning_rate": 3.248835068245676e-05, "loss": 1.5495, "step": 332600 }, { "epoch": 1.43, "learning_rate": 3.248292229590352e-05, "loss": 1.5348, "step": 332700 }, { "epoch": 1.43, "learning_rate": 3.247749390935029e-05, "loss": 1.55, "step": 332800 }, { "epoch": 1.43, "learning_rate": 3.247206552279705e-05, "loss": 1.5507, "step": 332900 }, { "epoch": 1.43, "learning_rate": 3.246663713624381e-05, "loss": 1.5419, "step": 333000 }, { "epoch": 1.43, "learning_rate": 3.246120874969059e-05, "loss": 1.5527, "step": 333100 }, { "epoch": 1.43, "learning_rate": 3.245578036313735e-05, "loss": 1.5638, "step": 333200 }, { "epoch": 1.43, "learning_rate": 3.245035197658411e-05, "loss": 1.5521, "step": 333300 }, { "epoch": 1.43, "learning_rate": 3.244492359003088e-05, "loss": 1.5566, "step": 333400 }, { "epoch": 1.43, "learning_rate": 3.243949520347764e-05, "loss": 1.5747, "step": 333500 }, { "epoch": 1.43, "learning_rate": 3.243406681692441e-05, "loss": 1.5436, "step": 333600 }, { "epoch": 1.43, "learning_rate": 3.242863843037117e-05, "loss": 1.5771, "step": 333700 }, { "epoch": 1.43, "learning_rate": 3.2423210043817935e-05, "loss": 1.5296, "step": 333800 }, { "epoch": 1.43, "learning_rate": 3.2417781657264703e-05, "loss": 1.5294, "step": 333900 }, { "epoch": 1.43, "learning_rate": 3.2412353270711465e-05, "loss": 1.5422, "step": 334000 }, { "epoch": 1.44, "learning_rate": 3.2406924884158234e-05, "loss": 1.5428, "step": 334100 }, { "epoch": 1.44, "learning_rate": 3.2401496497605e-05, "loss": 1.5283, "step": 334200 }, { "epoch": 1.44, "learning_rate": 3.2396068111051764e-05, "loss": 1.559, "step": 334300 }, { "epoch": 1.44, "learning_rate": 3.2390639724498526e-05, "loss": 1.5509, "step": 334400 }, { "epoch": 1.44, "learning_rate": 3.2385211337945295e-05, "loss": 1.5446, "step": 334500 }, { "epoch": 1.44, "learning_rate": 3.237978295139206e-05, "loss": 1.5647, "step": 334600 }, { "epoch": 1.44, "learning_rate": 3.237435456483882e-05, "loss": 1.5359, "step": 334700 }, { "epoch": 1.44, "learning_rate": 3.236892617828559e-05, "loss": 1.5501, "step": 334800 }, { "epoch": 1.44, "learning_rate": 3.236349779173235e-05, "loss": 1.5473, "step": 334900 }, { "epoch": 1.44, "learning_rate": 3.235806940517911e-05, "loss": 1.5413, "step": 335000 }, { "epoch": 1.44, "eval_loss": 1.4457744359970093, "eval_runtime": 17.8184, "eval_samples_per_second": 561.217, "eval_steps_per_second": 17.566, "step": 335000 }, { "epoch": 1.44, "learning_rate": 3.235264101862588e-05, "loss": 1.5361, "step": 335100 }, { "epoch": 1.44, "learning_rate": 3.234721263207265e-05, "loss": 1.5402, "step": 335200 }, { "epoch": 1.44, "learning_rate": 3.234178424551941e-05, "loss": 1.5459, "step": 335300 }, { "epoch": 1.44, "learning_rate": 3.233635585896618e-05, "loss": 1.5213, "step": 335400 }, { "epoch": 1.44, "learning_rate": 3.233092747241294e-05, "loss": 1.5281, "step": 335500 }, { "epoch": 1.44, "learning_rate": 3.232549908585971e-05, "loss": 1.547, "step": 335600 }, { "epoch": 1.44, "learning_rate": 3.232007069930647e-05, "loss": 1.5668, "step": 335700 }, { "epoch": 1.44, "learning_rate": 3.231464231275323e-05, "loss": 1.533, "step": 335800 }, { "epoch": 1.44, "learning_rate": 3.23092139262e-05, "loss": 1.5421, "step": 335900 }, { "epoch": 1.44, "learning_rate": 3.2303785539646764e-05, "loss": 1.5544, "step": 336000 }, { "epoch": 1.44, "learning_rate": 3.2298357153093526e-05, "loss": 1.5418, "step": 336100 }, { "epoch": 1.44, "learning_rate": 3.22929287665403e-05, "loss": 1.5508, "step": 336200 }, { "epoch": 1.44, "learning_rate": 3.228750037998706e-05, "loss": 1.5832, "step": 336300 }, { "epoch": 1.45, "learning_rate": 3.2282071993433825e-05, "loss": 1.5215, "step": 336400 }, { "epoch": 1.45, "learning_rate": 3.2276643606880594e-05, "loss": 1.5246, "step": 336500 }, { "epoch": 1.45, "learning_rate": 3.2271215220327355e-05, "loss": 1.5504, "step": 336600 }, { "epoch": 1.45, "learning_rate": 3.226578683377412e-05, "loss": 1.5488, "step": 336700 }, { "epoch": 1.45, "learning_rate": 3.2260358447220886e-05, "loss": 1.5597, "step": 336800 }, { "epoch": 1.45, "learning_rate": 3.225493006066765e-05, "loss": 1.5257, "step": 336900 }, { "epoch": 1.45, "learning_rate": 3.224950167411441e-05, "loss": 1.5254, "step": 337000 }, { "epoch": 1.45, "learning_rate": 3.224407328756118e-05, "loss": 1.5362, "step": 337100 }, { "epoch": 1.45, "learning_rate": 3.223864490100794e-05, "loss": 1.5569, "step": 337200 }, { "epoch": 1.45, "learning_rate": 3.223321651445471e-05, "loss": 1.5443, "step": 337300 }, { "epoch": 1.45, "learning_rate": 3.222778812790148e-05, "loss": 1.5064, "step": 337400 }, { "epoch": 1.45, "learning_rate": 3.222235974134824e-05, "loss": 1.5525, "step": 337500 }, { "epoch": 1.45, "learning_rate": 3.221693135479501e-05, "loss": 1.5267, "step": 337600 }, { "epoch": 1.45, "learning_rate": 3.221150296824177e-05, "loss": 1.498, "step": 337700 }, { "epoch": 1.45, "learning_rate": 3.220607458168853e-05, "loss": 1.5337, "step": 337800 }, { "epoch": 1.45, "learning_rate": 3.22006461951353e-05, "loss": 1.5393, "step": 337900 }, { "epoch": 1.45, "learning_rate": 3.219521780858206e-05, "loss": 1.5382, "step": 338000 }, { "epoch": 1.45, "learning_rate": 3.2189789422028824e-05, "loss": 1.5627, "step": 338100 }, { "epoch": 1.45, "learning_rate": 3.218436103547559e-05, "loss": 1.531, "step": 338200 }, { "epoch": 1.45, "learning_rate": 3.217893264892236e-05, "loss": 1.5392, "step": 338300 }, { "epoch": 1.45, "learning_rate": 3.217350426236912e-05, "loss": 1.5617, "step": 338400 }, { "epoch": 1.45, "learning_rate": 3.216807587581589e-05, "loss": 1.5522, "step": 338500 }, { "epoch": 1.45, "learning_rate": 3.2162647489262654e-05, "loss": 1.5298, "step": 338600 }, { "epoch": 1.46, "learning_rate": 3.2157219102709416e-05, "loss": 1.5476, "step": 338700 }, { "epoch": 1.46, "learning_rate": 3.2151790716156184e-05, "loss": 1.542, "step": 338800 }, { "epoch": 1.46, "learning_rate": 3.2146362329602946e-05, "loss": 1.5461, "step": 338900 }, { "epoch": 1.46, "learning_rate": 3.214093394304971e-05, "loss": 1.5699, "step": 339000 }, { "epoch": 1.46, "learning_rate": 3.213550555649648e-05, "loss": 1.5064, "step": 339100 }, { "epoch": 1.46, "learning_rate": 3.213007716994324e-05, "loss": 1.5375, "step": 339200 }, { "epoch": 1.46, "learning_rate": 3.212464878339001e-05, "loss": 1.5728, "step": 339300 }, { "epoch": 1.46, "learning_rate": 3.2119220396836776e-05, "loss": 1.5027, "step": 339400 }, { "epoch": 1.46, "learning_rate": 3.211379201028354e-05, "loss": 1.5562, "step": 339500 }, { "epoch": 1.46, "learning_rate": 3.2108363623730307e-05, "loss": 1.5628, "step": 339600 }, { "epoch": 1.46, "learning_rate": 3.210293523717707e-05, "loss": 1.5528, "step": 339700 }, { "epoch": 1.46, "learning_rate": 3.209750685062383e-05, "loss": 1.5226, "step": 339800 }, { "epoch": 1.46, "learning_rate": 3.20920784640706e-05, "loss": 1.5621, "step": 339900 }, { "epoch": 1.46, "learning_rate": 3.208665007751736e-05, "loss": 1.5492, "step": 340000 }, { "epoch": 1.46, "eval_loss": 1.4450249671936035, "eval_runtime": 17.7966, "eval_samples_per_second": 561.904, "eval_steps_per_second": 17.588, "step": 340000 }, { "epoch": 1.46, "learning_rate": 3.208122169096412e-05, "loss": 1.5556, "step": 340100 }, { "epoch": 1.46, "learning_rate": 3.207579330441089e-05, "loss": 1.5418, "step": 340200 }, { "epoch": 1.46, "learning_rate": 3.207036491785765e-05, "loss": 1.5288, "step": 340300 }, { "epoch": 1.46, "learning_rate": 3.206493653130442e-05, "loss": 1.5588, "step": 340400 }, { "epoch": 1.46, "learning_rate": 3.205950814475119e-05, "loss": 1.54, "step": 340500 }, { "epoch": 1.46, "learning_rate": 3.205407975819795e-05, "loss": 1.5419, "step": 340600 }, { "epoch": 1.46, "learning_rate": 3.2048651371644714e-05, "loss": 1.5594, "step": 340700 }, { "epoch": 1.46, "learning_rate": 3.204322298509148e-05, "loss": 1.5502, "step": 340800 }, { "epoch": 1.46, "learning_rate": 3.2037794598538245e-05, "loss": 1.5399, "step": 340900 }, { "epoch": 1.46, "learning_rate": 3.203236621198501e-05, "loss": 1.5661, "step": 341000 }, { "epoch": 1.47, "learning_rate": 3.2026937825431775e-05, "loss": 1.5435, "step": 341100 }, { "epoch": 1.47, "learning_rate": 3.202150943887854e-05, "loss": 1.5386, "step": 341200 }, { "epoch": 1.47, "learning_rate": 3.2016081052325306e-05, "loss": 1.5634, "step": 341300 }, { "epoch": 1.47, "learning_rate": 3.201065266577207e-05, "loss": 1.5215, "step": 341400 }, { "epoch": 1.47, "learning_rate": 3.2005224279218836e-05, "loss": 1.5667, "step": 341500 }, { "epoch": 1.47, "learning_rate": 3.1999795892665605e-05, "loss": 1.5362, "step": 341600 }, { "epoch": 1.47, "learning_rate": 3.199436750611237e-05, "loss": 1.5467, "step": 341700 }, { "epoch": 1.47, "learning_rate": 3.198893911955913e-05, "loss": 1.5496, "step": 341800 }, { "epoch": 1.47, "learning_rate": 3.19835107330059e-05, "loss": 1.5406, "step": 341900 }, { "epoch": 1.47, "learning_rate": 3.197808234645266e-05, "loss": 1.5568, "step": 342000 }, { "epoch": 1.47, "learning_rate": 3.197265395989942e-05, "loss": 1.5319, "step": 342100 }, { "epoch": 1.47, "learning_rate": 3.196722557334619e-05, "loss": 1.5338, "step": 342200 }, { "epoch": 1.47, "learning_rate": 3.196179718679295e-05, "loss": 1.5516, "step": 342300 }, { "epoch": 1.47, "learning_rate": 3.1956368800239714e-05, "loss": 1.5399, "step": 342400 }, { "epoch": 1.47, "learning_rate": 3.195094041368649e-05, "loss": 1.545, "step": 342500 }, { "epoch": 1.47, "learning_rate": 3.194551202713325e-05, "loss": 1.5531, "step": 342600 }, { "epoch": 1.47, "learning_rate": 3.194008364058001e-05, "loss": 1.5147, "step": 342700 }, { "epoch": 1.47, "learning_rate": 3.193465525402678e-05, "loss": 1.5641, "step": 342800 }, { "epoch": 1.47, "learning_rate": 3.192922686747354e-05, "loss": 1.5532, "step": 342900 }, { "epoch": 1.47, "learning_rate": 3.1923798480920305e-05, "loss": 1.5282, "step": 343000 }, { "epoch": 1.47, "learning_rate": 3.1918370094367074e-05, "loss": 1.5503, "step": 343100 }, { "epoch": 1.47, "learning_rate": 3.1912941707813836e-05, "loss": 1.5604, "step": 343200 }, { "epoch": 1.47, "learning_rate": 3.1907513321260604e-05, "loss": 1.527, "step": 343300 }, { "epoch": 1.48, "learning_rate": 3.1902084934707366e-05, "loss": 1.547, "step": 343400 }, { "epoch": 1.48, "learning_rate": 3.1896656548154135e-05, "loss": 1.5202, "step": 343500 }, { "epoch": 1.48, "learning_rate": 3.1891228161600903e-05, "loss": 1.5646, "step": 343600 }, { "epoch": 1.48, "learning_rate": 3.1885799775047665e-05, "loss": 1.5329, "step": 343700 }, { "epoch": 1.48, "learning_rate": 3.188037138849443e-05, "loss": 1.5512, "step": 343800 }, { "epoch": 1.48, "learning_rate": 3.1874943001941196e-05, "loss": 1.5299, "step": 343900 }, { "epoch": 1.48, "learning_rate": 3.186951461538796e-05, "loss": 1.5369, "step": 344000 }, { "epoch": 1.48, "learning_rate": 3.186408622883472e-05, "loss": 1.5414, "step": 344100 }, { "epoch": 1.48, "learning_rate": 3.185865784228149e-05, "loss": 1.5322, "step": 344200 }, { "epoch": 1.48, "learning_rate": 3.185322945572825e-05, "loss": 1.5215, "step": 344300 }, { "epoch": 1.48, "learning_rate": 3.184780106917501e-05, "loss": 1.5493, "step": 344400 }, { "epoch": 1.48, "learning_rate": 3.184237268262178e-05, "loss": 1.5299, "step": 344500 }, { "epoch": 1.48, "learning_rate": 3.183694429606855e-05, "loss": 1.5392, "step": 344600 }, { "epoch": 1.48, "learning_rate": 3.183151590951531e-05, "loss": 1.5435, "step": 344700 }, { "epoch": 1.48, "learning_rate": 3.182608752296208e-05, "loss": 1.5557, "step": 344800 }, { "epoch": 1.48, "learning_rate": 3.182065913640884e-05, "loss": 1.5388, "step": 344900 }, { "epoch": 1.48, "learning_rate": 3.1815230749855604e-05, "loss": 1.5414, "step": 345000 }, { "epoch": 1.48, "eval_loss": 1.4478392601013184, "eval_runtime": 17.7608, "eval_samples_per_second": 563.038, "eval_steps_per_second": 17.623, "step": 345000 }, { "epoch": 1.48, "learning_rate": 3.180980236330237e-05, "loss": 1.5371, "step": 345100 }, { "epoch": 1.48, "learning_rate": 3.1804373976749134e-05, "loss": 1.5326, "step": 345200 }, { "epoch": 1.48, "learning_rate": 3.17989455901959e-05, "loss": 1.5514, "step": 345300 }, { "epoch": 1.48, "learning_rate": 3.1793517203642665e-05, "loss": 1.5307, "step": 345400 }, { "epoch": 1.48, "learning_rate": 3.1788088817089427e-05, "loss": 1.5185, "step": 345500 }, { "epoch": 1.48, "learning_rate": 3.1782660430536195e-05, "loss": 1.575, "step": 345600 }, { "epoch": 1.49, "learning_rate": 3.1777232043982964e-05, "loss": 1.5719, "step": 345700 }, { "epoch": 1.49, "learning_rate": 3.1771803657429726e-05, "loss": 1.5208, "step": 345800 }, { "epoch": 1.49, "learning_rate": 3.1766375270876494e-05, "loss": 1.5388, "step": 345900 }, { "epoch": 1.49, "learning_rate": 3.1760946884323256e-05, "loss": 1.5615, "step": 346000 }, { "epoch": 1.49, "learning_rate": 3.175551849777002e-05, "loss": 1.5374, "step": 346100 }, { "epoch": 1.49, "learning_rate": 3.175009011121679e-05, "loss": 1.5419, "step": 346200 }, { "epoch": 1.49, "learning_rate": 3.174466172466355e-05, "loss": 1.5355, "step": 346300 }, { "epoch": 1.49, "learning_rate": 3.173923333811031e-05, "loss": 1.5732, "step": 346400 }, { "epoch": 1.49, "learning_rate": 3.173380495155708e-05, "loss": 1.514, "step": 346500 }, { "epoch": 1.49, "learning_rate": 3.172837656500384e-05, "loss": 1.5063, "step": 346600 }, { "epoch": 1.49, "learning_rate": 3.172294817845061e-05, "loss": 1.5282, "step": 346700 }, { "epoch": 1.49, "learning_rate": 3.171751979189738e-05, "loss": 1.5447, "step": 346800 }, { "epoch": 1.49, "learning_rate": 3.171209140534414e-05, "loss": 1.553, "step": 346900 }, { "epoch": 1.49, "learning_rate": 3.17066630187909e-05, "loss": 1.5592, "step": 347000 }, { "epoch": 1.49, "learning_rate": 3.170123463223767e-05, "loss": 1.562, "step": 347100 }, { "epoch": 1.49, "learning_rate": 3.169580624568443e-05, "loss": 1.5373, "step": 347200 }, { "epoch": 1.49, "learning_rate": 3.16903778591312e-05, "loss": 1.5465, "step": 347300 }, { "epoch": 1.49, "learning_rate": 3.168494947257796e-05, "loss": 1.5292, "step": 347400 }, { "epoch": 1.49, "learning_rate": 3.1679521086024725e-05, "loss": 1.5545, "step": 347500 }, { "epoch": 1.49, "learning_rate": 3.1674092699471494e-05, "loss": 1.5225, "step": 347600 }, { "epoch": 1.49, "learning_rate": 3.166866431291826e-05, "loss": 1.5182, "step": 347700 }, { "epoch": 1.49, "learning_rate": 3.1663235926365024e-05, "loss": 1.5689, "step": 347800 }, { "epoch": 1.49, "learning_rate": 3.165780753981179e-05, "loss": 1.535, "step": 347900 }, { "epoch": 1.5, "learning_rate": 3.1652379153258555e-05, "loss": 1.5145, "step": 348000 }, { "epoch": 1.5, "learning_rate": 3.1646950766705317e-05, "loss": 1.5291, "step": 348100 }, { "epoch": 1.5, "learning_rate": 3.1641522380152085e-05, "loss": 1.5232, "step": 348200 }, { "epoch": 1.5, "learning_rate": 3.163609399359885e-05, "loss": 1.5489, "step": 348300 }, { "epoch": 1.5, "learning_rate": 3.163066560704561e-05, "loss": 1.5601, "step": 348400 }, { "epoch": 1.5, "learning_rate": 3.162523722049238e-05, "loss": 1.5143, "step": 348500 }, { "epoch": 1.5, "learning_rate": 3.161980883393914e-05, "loss": 1.5376, "step": 348600 }, { "epoch": 1.5, "learning_rate": 3.161438044738591e-05, "loss": 1.5192, "step": 348700 }, { "epoch": 1.5, "learning_rate": 3.160895206083268e-05, "loss": 1.5509, "step": 348800 }, { "epoch": 1.5, "learning_rate": 3.160352367427944e-05, "loss": 1.5483, "step": 348900 }, { "epoch": 1.5, "learning_rate": 3.15980952877262e-05, "loss": 1.5048, "step": 349000 }, { "epoch": 1.5, "learning_rate": 3.159266690117297e-05, "loss": 1.522, "step": 349100 }, { "epoch": 1.5, "learning_rate": 3.158723851461973e-05, "loss": 1.5402, "step": 349200 }, { "epoch": 1.5, "learning_rate": 3.15818101280665e-05, "loss": 1.5543, "step": 349300 }, { "epoch": 1.5, "learning_rate": 3.157638174151326e-05, "loss": 1.5409, "step": 349400 }, { "epoch": 1.5, "learning_rate": 3.1570953354960024e-05, "loss": 1.5243, "step": 349500 }, { "epoch": 1.5, "learning_rate": 3.156552496840679e-05, "loss": 1.5425, "step": 349600 }, { "epoch": 1.5, "learning_rate": 3.1560096581853554e-05, "loss": 1.5237, "step": 349700 }, { "epoch": 1.5, "learning_rate": 3.155466819530032e-05, "loss": 1.541, "step": 349800 }, { "epoch": 1.5, "learning_rate": 3.154923980874709e-05, "loss": 1.5575, "step": 349900 }, { "epoch": 1.5, "learning_rate": 3.154381142219385e-05, "loss": 1.5517, "step": 350000 }, { "epoch": 1.5, "eval_loss": 1.4470714330673218, "eval_runtime": 17.6899, "eval_samples_per_second": 565.294, "eval_steps_per_second": 17.694, "step": 350000 }, { "epoch": 1.5, "learning_rate": 3.1538383035640615e-05, "loss": 1.532, "step": 350100 }, { "epoch": 1.5, "learning_rate": 3.1532954649087384e-05, "loss": 1.5497, "step": 350200 }, { "epoch": 1.5, "learning_rate": 3.1527526262534146e-05, "loss": 1.545, "step": 350300 }, { "epoch": 1.51, "learning_rate": 3.152209787598091e-05, "loss": 1.5249, "step": 350400 }, { "epoch": 1.51, "learning_rate": 3.1516669489427676e-05, "loss": 1.5464, "step": 350500 }, { "epoch": 1.51, "learning_rate": 3.151124110287444e-05, "loss": 1.557, "step": 350600 }, { "epoch": 1.51, "learning_rate": 3.150581271632121e-05, "loss": 1.5432, "step": 350700 }, { "epoch": 1.51, "learning_rate": 3.150038432976797e-05, "loss": 1.5386, "step": 350800 }, { "epoch": 1.51, "learning_rate": 3.149495594321474e-05, "loss": 1.5456, "step": 350900 }, { "epoch": 1.51, "learning_rate": 3.14895275566615e-05, "loss": 1.538, "step": 351000 }, { "epoch": 1.51, "learning_rate": 3.148409917010827e-05, "loss": 1.5359, "step": 351100 }, { "epoch": 1.51, "learning_rate": 3.147867078355503e-05, "loss": 1.5138, "step": 351200 }, { "epoch": 1.51, "learning_rate": 3.14732423970018e-05, "loss": 1.5448, "step": 351300 }, { "epoch": 1.51, "learning_rate": 3.146781401044856e-05, "loss": 1.536, "step": 351400 }, { "epoch": 1.51, "learning_rate": 3.146238562389532e-05, "loss": 1.5433, "step": 351500 }, { "epoch": 1.51, "learning_rate": 3.145695723734209e-05, "loss": 1.5384, "step": 351600 }, { "epoch": 1.51, "learning_rate": 3.145152885078885e-05, "loss": 1.5072, "step": 351700 }, { "epoch": 1.51, "learning_rate": 3.1446100464235614e-05, "loss": 1.5633, "step": 351800 }, { "epoch": 1.51, "learning_rate": 3.144067207768239e-05, "loss": 1.5238, "step": 351900 }, { "epoch": 1.51, "learning_rate": 3.143524369112915e-05, "loss": 1.5614, "step": 352000 }, { "epoch": 1.51, "learning_rate": 3.1429815304575914e-05, "loss": 1.5744, "step": 352100 }, { "epoch": 1.51, "learning_rate": 3.142438691802268e-05, "loss": 1.5098, "step": 352200 }, { "epoch": 1.51, "learning_rate": 3.1418958531469444e-05, "loss": 1.5568, "step": 352300 }, { "epoch": 1.51, "learning_rate": 3.1413530144916206e-05, "loss": 1.5406, "step": 352400 }, { "epoch": 1.51, "learning_rate": 3.1408101758362975e-05, "loss": 1.5192, "step": 352500 }, { "epoch": 1.51, "learning_rate": 3.1402673371809737e-05, "loss": 1.5363, "step": 352600 }, { "epoch": 1.52, "learning_rate": 3.1397244985256505e-05, "loss": 1.5626, "step": 352700 }, { "epoch": 1.52, "learning_rate": 3.139181659870327e-05, "loss": 1.5644, "step": 352800 }, { "epoch": 1.52, "learning_rate": 3.138638821215003e-05, "loss": 1.5126, "step": 352900 }, { "epoch": 1.52, "learning_rate": 3.13809598255968e-05, "loss": 1.5045, "step": 353000 }, { "epoch": 1.52, "learning_rate": 3.1375531439043566e-05, "loss": 1.5488, "step": 353100 }, { "epoch": 1.52, "learning_rate": 3.137010305249033e-05, "loss": 1.5111, "step": 353200 }, { "epoch": 1.52, "learning_rate": 3.13646746659371e-05, "loss": 1.5539, "step": 353300 }, { "epoch": 1.52, "learning_rate": 3.135924627938386e-05, "loss": 1.5193, "step": 353400 }, { "epoch": 1.52, "learning_rate": 3.135381789283062e-05, "loss": 1.5483, "step": 353500 }, { "epoch": 1.52, "learning_rate": 3.134838950627739e-05, "loss": 1.5216, "step": 353600 }, { "epoch": 1.52, "learning_rate": 3.134296111972415e-05, "loss": 1.5613, "step": 353700 }, { "epoch": 1.52, "learning_rate": 3.133753273317091e-05, "loss": 1.5414, "step": 353800 }, { "epoch": 1.52, "learning_rate": 3.133210434661768e-05, "loss": 1.5307, "step": 353900 }, { "epoch": 1.52, "learning_rate": 3.132667596006445e-05, "loss": 1.5392, "step": 354000 }, { "epoch": 1.52, "learning_rate": 3.132124757351121e-05, "loss": 1.5457, "step": 354100 }, { "epoch": 1.52, "learning_rate": 3.131581918695798e-05, "loss": 1.539, "step": 354200 }, { "epoch": 1.52, "learning_rate": 3.131039080040474e-05, "loss": 1.5216, "step": 354300 }, { "epoch": 1.52, "learning_rate": 3.1304962413851504e-05, "loss": 1.5752, "step": 354400 }, { "epoch": 1.52, "learning_rate": 3.129953402729827e-05, "loss": 1.5301, "step": 354500 }, { "epoch": 1.52, "learning_rate": 3.1294105640745035e-05, "loss": 1.5457, "step": 354600 }, { "epoch": 1.52, "learning_rate": 3.1288677254191804e-05, "loss": 1.5437, "step": 354700 }, { "epoch": 1.52, "learning_rate": 3.1283248867638566e-05, "loss": 1.5305, "step": 354800 }, { "epoch": 1.52, "learning_rate": 3.127782048108533e-05, "loss": 1.5045, "step": 354900 }, { "epoch": 1.53, "learning_rate": 3.1272392094532096e-05, "loss": 1.5433, "step": 355000 }, { "epoch": 1.53, "eval_loss": 1.4460771083831787, "eval_runtime": 17.7968, "eval_samples_per_second": 561.9, "eval_steps_per_second": 17.587, "step": 355000 }, { "epoch": 1.53, "learning_rate": 3.1266963707978865e-05, "loss": 1.5781, "step": 355100 }, { "epoch": 1.53, "learning_rate": 3.1261535321425627e-05, "loss": 1.5645, "step": 355200 }, { "epoch": 1.53, "learning_rate": 3.1256106934872395e-05, "loss": 1.5349, "step": 355300 }, { "epoch": 1.53, "learning_rate": 3.125067854831916e-05, "loss": 1.5459, "step": 355400 }, { "epoch": 1.53, "learning_rate": 3.124525016176592e-05, "loss": 1.5356, "step": 355500 }, { "epoch": 1.53, "learning_rate": 3.123982177521269e-05, "loss": 1.556, "step": 355600 }, { "epoch": 1.53, "learning_rate": 3.123439338865945e-05, "loss": 1.5376, "step": 355700 }, { "epoch": 1.53, "learning_rate": 3.122896500210621e-05, "loss": 1.5308, "step": 355800 }, { "epoch": 1.53, "learning_rate": 3.122353661555298e-05, "loss": 1.5548, "step": 355900 }, { "epoch": 1.53, "learning_rate": 3.121810822899974e-05, "loss": 1.5357, "step": 356000 }, { "epoch": 1.53, "learning_rate": 3.121267984244651e-05, "loss": 1.5202, "step": 356100 }, { "epoch": 1.53, "learning_rate": 3.120725145589328e-05, "loss": 1.5478, "step": 356200 }, { "epoch": 1.53, "learning_rate": 3.120182306934004e-05, "loss": 1.5502, "step": 356300 }, { "epoch": 1.53, "learning_rate": 3.11963946827868e-05, "loss": 1.5432, "step": 356400 }, { "epoch": 1.53, "learning_rate": 3.119096629623357e-05, "loss": 1.5386, "step": 356500 }, { "epoch": 1.53, "learning_rate": 3.1185537909680333e-05, "loss": 1.5578, "step": 356600 }, { "epoch": 1.53, "learning_rate": 3.11801095231271e-05, "loss": 1.5365, "step": 356700 }, { "epoch": 1.53, "learning_rate": 3.1174681136573864e-05, "loss": 1.5718, "step": 356800 }, { "epoch": 1.53, "learning_rate": 3.1169252750020626e-05, "loss": 1.5305, "step": 356900 }, { "epoch": 1.53, "learning_rate": 3.1163824363467395e-05, "loss": 1.5333, "step": 357000 }, { "epoch": 1.53, "learning_rate": 3.1158395976914156e-05, "loss": 1.5484, "step": 357100 }, { "epoch": 1.53, "learning_rate": 3.1152967590360925e-05, "loss": 1.5521, "step": 357200 }, { "epoch": 1.53, "learning_rate": 3.1147539203807694e-05, "loss": 1.5207, "step": 357300 }, { "epoch": 1.54, "learning_rate": 3.1142110817254456e-05, "loss": 1.5525, "step": 357400 }, { "epoch": 1.54, "learning_rate": 3.113668243070122e-05, "loss": 1.5497, "step": 357500 }, { "epoch": 1.54, "learning_rate": 3.1131254044147986e-05, "loss": 1.5305, "step": 357600 }, { "epoch": 1.54, "learning_rate": 3.112582565759475e-05, "loss": 1.5587, "step": 357700 }, { "epoch": 1.54, "learning_rate": 3.112039727104151e-05, "loss": 1.5261, "step": 357800 }, { "epoch": 1.54, "learning_rate": 3.111496888448828e-05, "loss": 1.5192, "step": 357900 }, { "epoch": 1.54, "learning_rate": 3.110954049793504e-05, "loss": 1.5383, "step": 358000 }, { "epoch": 1.54, "learning_rate": 3.110411211138181e-05, "loss": 1.5502, "step": 358100 }, { "epoch": 1.54, "learning_rate": 3.109868372482858e-05, "loss": 1.5433, "step": 358200 }, { "epoch": 1.54, "learning_rate": 3.109325533827534e-05, "loss": 1.5171, "step": 358300 }, { "epoch": 1.54, "learning_rate": 3.10878269517221e-05, "loss": 1.5241, "step": 358400 }, { "epoch": 1.54, "learning_rate": 3.108239856516887e-05, "loss": 1.5281, "step": 358500 }, { "epoch": 1.54, "learning_rate": 3.107697017861563e-05, "loss": 1.5371, "step": 358600 }, { "epoch": 1.54, "learning_rate": 3.10715417920624e-05, "loss": 1.5413, "step": 358700 }, { "epoch": 1.54, "learning_rate": 3.106611340550916e-05, "loss": 1.509, "step": 358800 }, { "epoch": 1.54, "learning_rate": 3.1060685018955924e-05, "loss": 1.5464, "step": 358900 }, { "epoch": 1.54, "learning_rate": 3.105525663240269e-05, "loss": 1.5556, "step": 359000 }, { "epoch": 1.54, "learning_rate": 3.1049828245849455e-05, "loss": 1.5318, "step": 359100 }, { "epoch": 1.54, "learning_rate": 3.104439985929622e-05, "loss": 1.5198, "step": 359200 }, { "epoch": 1.54, "learning_rate": 3.103897147274299e-05, "loss": 1.529, "step": 359300 }, { "epoch": 1.54, "learning_rate": 3.1033543086189754e-05, "loss": 1.5501, "step": 359400 }, { "epoch": 1.54, "learning_rate": 3.1028114699636516e-05, "loss": 1.5562, "step": 359500 }, { "epoch": 1.54, "learning_rate": 3.1022686313083285e-05, "loss": 1.5365, "step": 359600 }, { "epoch": 1.55, "learning_rate": 3.1017257926530046e-05, "loss": 1.5487, "step": 359700 }, { "epoch": 1.55, "learning_rate": 3.101182953997681e-05, "loss": 1.5359, "step": 359800 }, { "epoch": 1.55, "learning_rate": 3.100640115342358e-05, "loss": 1.5248, "step": 359900 }, { "epoch": 1.55, "learning_rate": 3.100097276687034e-05, "loss": 1.5636, "step": 360000 }, { "epoch": 1.55, "eval_loss": 1.4448529481887817, "eval_runtime": 17.7635, "eval_samples_per_second": 562.952, "eval_steps_per_second": 17.62, "step": 360000 }, { "epoch": 1.55, "learning_rate": 3.099554438031711e-05, "loss": 1.5315, "step": 360100 }, { "epoch": 1.55, "learning_rate": 3.099011599376387e-05, "loss": 1.5473, "step": 360200 }, { "epoch": 1.55, "learning_rate": 3.098468760721064e-05, "loss": 1.5161, "step": 360300 }, { "epoch": 1.55, "learning_rate": 3.09792592206574e-05, "loss": 1.5373, "step": 360400 }, { "epoch": 1.55, "learning_rate": 3.097383083410417e-05, "loss": 1.5096, "step": 360500 }, { "epoch": 1.55, "learning_rate": 3.096840244755093e-05, "loss": 1.5239, "step": 360600 }, { "epoch": 1.55, "learning_rate": 3.09629740609977e-05, "loss": 1.563, "step": 360700 }, { "epoch": 1.55, "learning_rate": 3.095754567444446e-05, "loss": 1.5418, "step": 360800 }, { "epoch": 1.55, "learning_rate": 3.095211728789122e-05, "loss": 1.5406, "step": 360900 }, { "epoch": 1.55, "learning_rate": 3.094668890133799e-05, "loss": 1.5123, "step": 361000 }, { "epoch": 1.55, "learning_rate": 3.0941260514784753e-05, "loss": 1.5302, "step": 361100 }, { "epoch": 1.55, "learning_rate": 3.0935832128231515e-05, "loss": 1.5363, "step": 361200 }, { "epoch": 1.55, "learning_rate": 3.0930403741678284e-05, "loss": 1.5542, "step": 361300 }, { "epoch": 1.55, "learning_rate": 3.092497535512505e-05, "loss": 1.5293, "step": 361400 }, { "epoch": 1.55, "learning_rate": 3.0919546968571814e-05, "loss": 1.5282, "step": 361500 }, { "epoch": 1.55, "learning_rate": 3.091411858201858e-05, "loss": 1.5087, "step": 361600 }, { "epoch": 1.55, "learning_rate": 3.0908690195465345e-05, "loss": 1.5276, "step": 361700 }, { "epoch": 1.55, "learning_rate": 3.090326180891211e-05, "loss": 1.5427, "step": 361800 }, { "epoch": 1.55, "learning_rate": 3.0897833422358876e-05, "loss": 1.5427, "step": 361900 }, { "epoch": 1.56, "learning_rate": 3.089240503580564e-05, "loss": 1.5539, "step": 362000 }, { "epoch": 1.56, "learning_rate": 3.0886976649252406e-05, "loss": 1.539, "step": 362100 }, { "epoch": 1.56, "learning_rate": 3.088154826269917e-05, "loss": 1.5205, "step": 362200 }, { "epoch": 1.56, "learning_rate": 3.087611987614593e-05, "loss": 1.549, "step": 362300 }, { "epoch": 1.56, "learning_rate": 3.08706914895927e-05, "loss": 1.5274, "step": 362400 }, { "epoch": 1.56, "learning_rate": 3.086526310303947e-05, "loss": 1.5495, "step": 362500 }, { "epoch": 1.56, "learning_rate": 3.085983471648623e-05, "loss": 1.5387, "step": 362600 }, { "epoch": 1.56, "learning_rate": 3.0854406329933e-05, "loss": 1.5476, "step": 362700 }, { "epoch": 1.56, "learning_rate": 3.084897794337976e-05, "loss": 1.5327, "step": 362800 }, { "epoch": 1.56, "learning_rate": 3.084354955682652e-05, "loss": 1.5387, "step": 362900 }, { "epoch": 1.56, "learning_rate": 3.083812117027329e-05, "loss": 1.5682, "step": 363000 }, { "epoch": 1.56, "learning_rate": 3.083269278372005e-05, "loss": 1.5104, "step": 363100 }, { "epoch": 1.56, "learning_rate": 3.0827264397166814e-05, "loss": 1.5487, "step": 363200 }, { "epoch": 1.56, "learning_rate": 3.082183601061358e-05, "loss": 1.534, "step": 363300 }, { "epoch": 1.56, "learning_rate": 3.081640762406035e-05, "loss": 1.5264, "step": 363400 }, { "epoch": 1.56, "learning_rate": 3.081097923750711e-05, "loss": 1.5203, "step": 363500 }, { "epoch": 1.56, "learning_rate": 3.080555085095388e-05, "loss": 1.5561, "step": 363600 }, { "epoch": 1.56, "learning_rate": 3.0800122464400643e-05, "loss": 1.5083, "step": 363700 }, { "epoch": 1.56, "learning_rate": 3.0794694077847405e-05, "loss": 1.5377, "step": 363800 }, { "epoch": 1.56, "learning_rate": 3.0789265691294174e-05, "loss": 1.5362, "step": 363900 }, { "epoch": 1.56, "learning_rate": 3.0783837304740936e-05, "loss": 1.548, "step": 364000 }, { "epoch": 1.56, "learning_rate": 3.07784089181877e-05, "loss": 1.5398, "step": 364100 }, { "epoch": 1.56, "learning_rate": 3.0772980531634466e-05, "loss": 1.5074, "step": 364200 }, { "epoch": 1.57, "learning_rate": 3.076755214508123e-05, "loss": 1.5492, "step": 364300 }, { "epoch": 1.57, "learning_rate": 3.0762123758528e-05, "loss": 1.5581, "step": 364400 }, { "epoch": 1.57, "learning_rate": 3.0756695371974766e-05, "loss": 1.5623, "step": 364500 }, { "epoch": 1.57, "learning_rate": 3.075126698542153e-05, "loss": 1.5235, "step": 364600 }, { "epoch": 1.57, "learning_rate": 3.0745838598868296e-05, "loss": 1.5464, "step": 364700 }, { "epoch": 1.57, "learning_rate": 3.074041021231506e-05, "loss": 1.5277, "step": 364800 }, { "epoch": 1.57, "learning_rate": 3.073498182576182e-05, "loss": 1.5038, "step": 364900 }, { "epoch": 1.57, "learning_rate": 3.072955343920859e-05, "loss": 1.5474, "step": 365000 }, { "epoch": 1.57, "eval_loss": 1.447064995765686, "eval_runtime": 17.8278, "eval_samples_per_second": 560.922, "eval_steps_per_second": 17.557, "step": 365000 }, { "epoch": 1.57, "learning_rate": 3.072412505265535e-05, "loss": 1.5234, "step": 365100 }, { "epoch": 1.57, "learning_rate": 3.071869666610211e-05, "loss": 1.5393, "step": 365200 }, { "epoch": 1.57, "learning_rate": 3.071326827954888e-05, "loss": 1.5358, "step": 365300 }, { "epoch": 1.57, "learning_rate": 3.070783989299564e-05, "loss": 1.5163, "step": 365400 }, { "epoch": 1.57, "learning_rate": 3.070241150644241e-05, "loss": 1.5246, "step": 365500 }, { "epoch": 1.57, "learning_rate": 3.069698311988918e-05, "loss": 1.5233, "step": 365600 }, { "epoch": 1.57, "learning_rate": 3.069155473333594e-05, "loss": 1.5309, "step": 365700 }, { "epoch": 1.57, "learning_rate": 3.0686126346782704e-05, "loss": 1.5457, "step": 365800 }, { "epoch": 1.57, "learning_rate": 3.068069796022947e-05, "loss": 1.5595, "step": 365900 }, { "epoch": 1.57, "learning_rate": 3.0675269573676234e-05, "loss": 1.5376, "step": 366000 }, { "epoch": 1.57, "learning_rate": 3.0669841187122996e-05, "loss": 1.548, "step": 366100 }, { "epoch": 1.57, "learning_rate": 3.0664412800569765e-05, "loss": 1.5257, "step": 366200 }, { "epoch": 1.57, "learning_rate": 3.065898441401653e-05, "loss": 1.5412, "step": 366300 }, { "epoch": 1.57, "learning_rate": 3.0653556027463295e-05, "loss": 1.5206, "step": 366400 }, { "epoch": 1.57, "learning_rate": 3.064812764091006e-05, "loss": 1.5396, "step": 366500 }, { "epoch": 1.57, "learning_rate": 3.0642699254356826e-05, "loss": 1.5389, "step": 366600 }, { "epoch": 1.58, "learning_rate": 3.0637270867803595e-05, "loss": 1.5348, "step": 366700 }, { "epoch": 1.58, "learning_rate": 3.0631842481250356e-05, "loss": 1.5322, "step": 366800 }, { "epoch": 1.58, "learning_rate": 3.062641409469712e-05, "loss": 1.5203, "step": 366900 }, { "epoch": 1.58, "learning_rate": 3.062098570814389e-05, "loss": 1.573, "step": 367000 }, { "epoch": 1.58, "learning_rate": 3.061555732159065e-05, "loss": 1.53, "step": 367100 }, { "epoch": 1.58, "learning_rate": 3.061012893503741e-05, "loss": 1.5678, "step": 367200 }, { "epoch": 1.58, "learning_rate": 3.060470054848418e-05, "loss": 1.5372, "step": 367300 }, { "epoch": 1.58, "learning_rate": 3.059927216193094e-05, "loss": 1.5358, "step": 367400 }, { "epoch": 1.58, "learning_rate": 3.05938437753777e-05, "loss": 1.5054, "step": 367500 }, { "epoch": 1.58, "learning_rate": 3.058841538882448e-05, "loss": 1.5148, "step": 367600 }, { "epoch": 1.58, "learning_rate": 3.058298700227124e-05, "loss": 1.5508, "step": 367700 }, { "epoch": 1.58, "learning_rate": 3.0577558615718e-05, "loss": 1.5353, "step": 367800 }, { "epoch": 1.58, "learning_rate": 3.057213022916477e-05, "loss": 1.5434, "step": 367900 }, { "epoch": 1.58, "learning_rate": 3.056670184261153e-05, "loss": 1.5367, "step": 368000 }, { "epoch": 1.58, "learning_rate": 3.0561273456058295e-05, "loss": 1.5396, "step": 368100 }, { "epoch": 1.58, "learning_rate": 3.055584506950506e-05, "loss": 1.51, "step": 368200 }, { "epoch": 1.58, "learning_rate": 3.0550416682951825e-05, "loss": 1.5025, "step": 368300 }, { "epoch": 1.58, "learning_rate": 3.0544988296398594e-05, "loss": 1.5592, "step": 368400 }, { "epoch": 1.58, "learning_rate": 3.0539559909845356e-05, "loss": 1.5472, "step": 368500 }, { "epoch": 1.58, "learning_rate": 3.053413152329212e-05, "loss": 1.5681, "step": 368600 }, { "epoch": 1.58, "learning_rate": 3.052870313673889e-05, "loss": 1.5353, "step": 368700 }, { "epoch": 1.58, "learning_rate": 3.0523274750185655e-05, "loss": 1.5521, "step": 368800 }, { "epoch": 1.58, "learning_rate": 3.051784636363242e-05, "loss": 1.5408, "step": 368900 }, { "epoch": 1.59, "learning_rate": 3.0512417977079182e-05, "loss": 1.5242, "step": 369000 }, { "epoch": 1.59, "learning_rate": 3.0506989590525947e-05, "loss": 1.5627, "step": 369100 }, { "epoch": 1.59, "learning_rate": 3.0501561203972713e-05, "loss": 1.5177, "step": 369200 }, { "epoch": 1.59, "learning_rate": 3.0496132817419474e-05, "loss": 1.5447, "step": 369300 }, { "epoch": 1.59, "learning_rate": 3.049070443086624e-05, "loss": 1.5235, "step": 369400 }, { "epoch": 1.59, "learning_rate": 3.0485276044313005e-05, "loss": 1.5045, "step": 369500 }, { "epoch": 1.59, "learning_rate": 3.047984765775977e-05, "loss": 1.5419, "step": 369600 }, { "epoch": 1.59, "learning_rate": 3.047441927120654e-05, "loss": 1.514, "step": 369700 }, { "epoch": 1.59, "learning_rate": 3.0468990884653304e-05, "loss": 1.5397, "step": 369800 }, { "epoch": 1.59, "learning_rate": 3.046356249810007e-05, "loss": 1.5312, "step": 369900 }, { "epoch": 1.59, "learning_rate": 3.045813411154683e-05, "loss": 1.547, "step": 370000 }, { "epoch": 1.59, "eval_loss": 1.443129301071167, "eval_runtime": 17.8114, "eval_samples_per_second": 561.438, "eval_steps_per_second": 17.573, "step": 370000 }, { "epoch": 1.59, "learning_rate": 3.0452705724993597e-05, "loss": 1.5362, "step": 370100 }, { "epoch": 1.59, "learning_rate": 3.0447277338440362e-05, "loss": 1.5216, "step": 370200 }, { "epoch": 1.59, "learning_rate": 3.0441848951887124e-05, "loss": 1.542, "step": 370300 }, { "epoch": 1.59, "learning_rate": 3.043642056533389e-05, "loss": 1.5317, "step": 370400 }, { "epoch": 1.59, "learning_rate": 3.0430992178780654e-05, "loss": 1.5284, "step": 370500 }, { "epoch": 1.59, "learning_rate": 3.042556379222742e-05, "loss": 1.527, "step": 370600 }, { "epoch": 1.59, "learning_rate": 3.042013540567418e-05, "loss": 1.5274, "step": 370700 }, { "epoch": 1.59, "learning_rate": 3.0414707019120953e-05, "loss": 1.5469, "step": 370800 }, { "epoch": 1.59, "learning_rate": 3.040927863256772e-05, "loss": 1.5412, "step": 370900 }, { "epoch": 1.59, "learning_rate": 3.040385024601448e-05, "loss": 1.5134, "step": 371000 }, { "epoch": 1.59, "learning_rate": 3.0398421859461246e-05, "loss": 1.4993, "step": 371100 }, { "epoch": 1.59, "learning_rate": 3.039299347290801e-05, "loss": 1.5266, "step": 371200 }, { "epoch": 1.6, "learning_rate": 3.0387565086354773e-05, "loss": 1.5317, "step": 371300 }, { "epoch": 1.6, "learning_rate": 3.0382136699801538e-05, "loss": 1.5536, "step": 371400 }, { "epoch": 1.6, "learning_rate": 3.0376708313248304e-05, "loss": 1.5476, "step": 371500 }, { "epoch": 1.6, "learning_rate": 3.037127992669507e-05, "loss": 1.566, "step": 371600 }, { "epoch": 1.6, "learning_rate": 3.036585154014183e-05, "loss": 1.5351, "step": 371700 }, { "epoch": 1.6, "learning_rate": 3.0360423153588603e-05, "loss": 1.5477, "step": 371800 }, { "epoch": 1.6, "learning_rate": 3.0354994767035368e-05, "loss": 1.5356, "step": 371900 }, { "epoch": 1.6, "learning_rate": 3.034956638048213e-05, "loss": 1.5352, "step": 372000 }, { "epoch": 1.6, "learning_rate": 3.0344137993928895e-05, "loss": 1.5359, "step": 372100 }, { "epoch": 1.6, "learning_rate": 3.033870960737566e-05, "loss": 1.536, "step": 372200 }, { "epoch": 1.6, "learning_rate": 3.0333281220822422e-05, "loss": 1.5461, "step": 372300 }, { "epoch": 1.6, "learning_rate": 3.0327852834269187e-05, "loss": 1.5335, "step": 372400 }, { "epoch": 1.6, "learning_rate": 3.0322424447715953e-05, "loss": 1.5377, "step": 372500 }, { "epoch": 1.6, "learning_rate": 3.0316996061162718e-05, "loss": 1.5455, "step": 372600 }, { "epoch": 1.6, "learning_rate": 3.031156767460948e-05, "loss": 1.5413, "step": 372700 }, { "epoch": 1.6, "learning_rate": 3.0306139288056245e-05, "loss": 1.5349, "step": 372800 }, { "epoch": 1.6, "learning_rate": 3.0300710901503017e-05, "loss": 1.5368, "step": 372900 }, { "epoch": 1.6, "learning_rate": 3.029528251494978e-05, "loss": 1.517, "step": 373000 }, { "epoch": 1.6, "learning_rate": 3.0289854128396544e-05, "loss": 1.5187, "step": 373100 }, { "epoch": 1.6, "learning_rate": 3.028442574184331e-05, "loss": 1.5592, "step": 373200 }, { "epoch": 1.6, "learning_rate": 3.027899735529007e-05, "loss": 1.55, "step": 373300 }, { "epoch": 1.6, "learning_rate": 3.0273568968736837e-05, "loss": 1.5557, "step": 373400 }, { "epoch": 1.6, "learning_rate": 3.0268140582183602e-05, "loss": 1.5562, "step": 373500 }, { "epoch": 1.61, "learning_rate": 3.0262712195630367e-05, "loss": 1.5256, "step": 373600 }, { "epoch": 1.61, "learning_rate": 3.025728380907713e-05, "loss": 1.5164, "step": 373700 }, { "epoch": 1.61, "learning_rate": 3.0251855422523894e-05, "loss": 1.5493, "step": 373800 }, { "epoch": 1.61, "learning_rate": 3.0246427035970663e-05, "loss": 1.4844, "step": 373900 }, { "epoch": 1.61, "learning_rate": 3.024099864941743e-05, "loss": 1.527, "step": 374000 }, { "epoch": 1.61, "learning_rate": 3.0235570262864194e-05, "loss": 1.5316, "step": 374100 }, { "epoch": 1.61, "learning_rate": 3.023014187631096e-05, "loss": 1.5353, "step": 374200 }, { "epoch": 1.61, "learning_rate": 3.022471348975772e-05, "loss": 1.5089, "step": 374300 }, { "epoch": 1.61, "learning_rate": 3.0219285103204486e-05, "loss": 1.5295, "step": 374400 }, { "epoch": 1.61, "learning_rate": 3.021385671665125e-05, "loss": 1.5455, "step": 374500 }, { "epoch": 1.61, "learning_rate": 3.0208428330098016e-05, "loss": 1.5158, "step": 374600 }, { "epoch": 1.61, "learning_rate": 3.020299994354478e-05, "loss": 1.5624, "step": 374700 }, { "epoch": 1.61, "learning_rate": 3.0197571556991544e-05, "loss": 1.5486, "step": 374800 }, { "epoch": 1.61, "learning_rate": 3.019214317043831e-05, "loss": 1.5279, "step": 374900 }, { "epoch": 1.61, "learning_rate": 3.0186714783885078e-05, "loss": 1.545, "step": 375000 }, { "epoch": 1.61, "eval_loss": 1.44380521774292, "eval_runtime": 17.8282, "eval_samples_per_second": 560.909, "eval_steps_per_second": 17.556, "step": 375000 }, { "epoch": 1.61, "learning_rate": 3.0181286397331843e-05, "loss": 1.5086, "step": 375100 }, { "epoch": 1.61, "learning_rate": 3.0175858010778608e-05, "loss": 1.5125, "step": 375200 }, { "epoch": 1.61, "learning_rate": 3.017042962422537e-05, "loss": 1.5614, "step": 375300 }, { "epoch": 1.61, "learning_rate": 3.0165001237672135e-05, "loss": 1.5253, "step": 375400 }, { "epoch": 1.61, "learning_rate": 3.01595728511189e-05, "loss": 1.5391, "step": 375500 }, { "epoch": 1.61, "learning_rate": 3.0154144464565666e-05, "loss": 1.5503, "step": 375600 }, { "epoch": 1.61, "learning_rate": 3.0148716078012428e-05, "loss": 1.5573, "step": 375700 }, { "epoch": 1.61, "learning_rate": 3.0143287691459193e-05, "loss": 1.5312, "step": 375800 }, { "epoch": 1.61, "learning_rate": 3.0137859304905958e-05, "loss": 1.5472, "step": 375900 }, { "epoch": 1.62, "learning_rate": 3.0132430918352727e-05, "loss": 1.5398, "step": 376000 }, { "epoch": 1.62, "learning_rate": 3.0127002531799492e-05, "loss": 1.5737, "step": 376100 }, { "epoch": 1.62, "learning_rate": 3.0121574145246257e-05, "loss": 1.5449, "step": 376200 }, { "epoch": 1.62, "learning_rate": 3.011614575869302e-05, "loss": 1.5374, "step": 376300 }, { "epoch": 1.62, "learning_rate": 3.0110717372139784e-05, "loss": 1.5009, "step": 376400 }, { "epoch": 1.62, "learning_rate": 3.010528898558655e-05, "loss": 1.5409, "step": 376500 }, { "epoch": 1.62, "learning_rate": 3.0099860599033315e-05, "loss": 1.5352, "step": 376600 }, { "epoch": 1.62, "learning_rate": 3.0094432212480077e-05, "loss": 1.5282, "step": 376700 }, { "epoch": 1.62, "learning_rate": 3.0089003825926842e-05, "loss": 1.5344, "step": 376800 }, { "epoch": 1.62, "learning_rate": 3.0083575439373607e-05, "loss": 1.5274, "step": 376900 }, { "epoch": 1.62, "learning_rate": 3.0078147052820373e-05, "loss": 1.5241, "step": 377000 }, { "epoch": 1.62, "learning_rate": 3.007271866626714e-05, "loss": 1.5125, "step": 377100 }, { "epoch": 1.62, "learning_rate": 3.0067290279713907e-05, "loss": 1.5378, "step": 377200 }, { "epoch": 1.62, "learning_rate": 3.006186189316067e-05, "loss": 1.5027, "step": 377300 }, { "epoch": 1.62, "learning_rate": 3.0056433506607434e-05, "loss": 1.5362, "step": 377400 }, { "epoch": 1.62, "learning_rate": 3.00510051200542e-05, "loss": 1.5468, "step": 377500 }, { "epoch": 1.62, "learning_rate": 3.0045576733500964e-05, "loss": 1.5328, "step": 377600 }, { "epoch": 1.62, "learning_rate": 3.0040148346947726e-05, "loss": 1.553, "step": 377700 }, { "epoch": 1.62, "learning_rate": 3.003471996039449e-05, "loss": 1.5313, "step": 377800 }, { "epoch": 1.62, "learning_rate": 3.0029291573841257e-05, "loss": 1.5494, "step": 377900 }, { "epoch": 1.62, "learning_rate": 3.0023863187288022e-05, "loss": 1.5392, "step": 378000 }, { "epoch": 1.62, "learning_rate": 3.001843480073479e-05, "loss": 1.5492, "step": 378100 }, { "epoch": 1.62, "learning_rate": 3.0013006414181556e-05, "loss": 1.5513, "step": 378200 }, { "epoch": 1.63, "learning_rate": 3.0007578027628318e-05, "loss": 1.5523, "step": 378300 }, { "epoch": 1.63, "learning_rate": 3.0002149641075083e-05, "loss": 1.5254, "step": 378400 }, { "epoch": 1.63, "learning_rate": 2.9996721254521848e-05, "loss": 1.5396, "step": 378500 }, { "epoch": 1.63, "learning_rate": 2.9991292867968613e-05, "loss": 1.5326, "step": 378600 }, { "epoch": 1.63, "learning_rate": 2.9985864481415375e-05, "loss": 1.5232, "step": 378700 }, { "epoch": 1.63, "learning_rate": 2.998043609486214e-05, "loss": 1.5402, "step": 378800 }, { "epoch": 1.63, "learning_rate": 2.9975007708308906e-05, "loss": 1.5314, "step": 378900 }, { "epoch": 1.63, "learning_rate": 2.996957932175567e-05, "loss": 1.5268, "step": 379000 }, { "epoch": 1.63, "learning_rate": 2.9964150935202433e-05, "loss": 1.5378, "step": 379100 }, { "epoch": 1.63, "learning_rate": 2.9958722548649205e-05, "loss": 1.5408, "step": 379200 }, { "epoch": 1.63, "learning_rate": 2.9953294162095967e-05, "loss": 1.504, "step": 379300 }, { "epoch": 1.63, "learning_rate": 2.9947865775542732e-05, "loss": 1.5343, "step": 379400 }, { "epoch": 1.63, "learning_rate": 2.9942437388989497e-05, "loss": 1.5036, "step": 379500 }, { "epoch": 1.63, "learning_rate": 2.9937009002436263e-05, "loss": 1.5604, "step": 379600 }, { "epoch": 1.63, "learning_rate": 2.9931580615883025e-05, "loss": 1.5606, "step": 379700 }, { "epoch": 1.63, "learning_rate": 2.992615222932979e-05, "loss": 1.5496, "step": 379800 }, { "epoch": 1.63, "learning_rate": 2.9920723842776555e-05, "loss": 1.541, "step": 379900 }, { "epoch": 1.63, "learning_rate": 2.991529545622332e-05, "loss": 1.519, "step": 380000 }, { "epoch": 1.63, "eval_loss": 1.441465973854065, "eval_runtime": 17.8531, "eval_samples_per_second": 560.125, "eval_steps_per_second": 17.532, "step": 380000 }, { "epoch": 1.63, "learning_rate": 2.9909867069670082e-05, "loss": 1.5429, "step": 380100 }, { "epoch": 1.63, "learning_rate": 2.9904438683116854e-05, "loss": 1.5333, "step": 380200 }, { "epoch": 1.63, "learning_rate": 2.9899010296563616e-05, "loss": 1.5395, "step": 380300 }, { "epoch": 1.63, "learning_rate": 2.989358191001038e-05, "loss": 1.5221, "step": 380400 }, { "epoch": 1.63, "learning_rate": 2.9888153523457147e-05, "loss": 1.5362, "step": 380500 }, { "epoch": 1.64, "learning_rate": 2.9882725136903912e-05, "loss": 1.5406, "step": 380600 }, { "epoch": 1.64, "learning_rate": 2.9877296750350674e-05, "loss": 1.5323, "step": 380700 }, { "epoch": 1.64, "learning_rate": 2.987186836379744e-05, "loss": 1.5644, "step": 380800 }, { "epoch": 1.64, "learning_rate": 2.9866439977244204e-05, "loss": 1.5275, "step": 380900 }, { "epoch": 1.64, "learning_rate": 2.986101159069097e-05, "loss": 1.5485, "step": 381000 }, { "epoch": 1.64, "learning_rate": 2.985558320413773e-05, "loss": 1.5074, "step": 381100 }, { "epoch": 1.64, "learning_rate": 2.9850154817584497e-05, "loss": 1.5068, "step": 381200 }, { "epoch": 1.64, "learning_rate": 2.9844726431031265e-05, "loss": 1.5295, "step": 381300 }, { "epoch": 1.64, "learning_rate": 2.983929804447803e-05, "loss": 1.5248, "step": 381400 }, { "epoch": 1.64, "learning_rate": 2.9833869657924796e-05, "loss": 1.5372, "step": 381500 }, { "epoch": 1.64, "learning_rate": 2.982844127137156e-05, "loss": 1.5467, "step": 381600 }, { "epoch": 1.64, "learning_rate": 2.9823012884818323e-05, "loss": 1.508, "step": 381700 }, { "epoch": 1.64, "learning_rate": 2.981758449826509e-05, "loss": 1.545, "step": 381800 }, { "epoch": 1.64, "learning_rate": 2.9812156111711854e-05, "loss": 1.5113, "step": 381900 }, { "epoch": 1.64, "learning_rate": 2.980672772515862e-05, "loss": 1.5503, "step": 382000 }, { "epoch": 1.64, "learning_rate": 2.980129933860538e-05, "loss": 1.5146, "step": 382100 }, { "epoch": 1.64, "learning_rate": 2.9795870952052146e-05, "loss": 1.5072, "step": 382200 }, { "epoch": 1.64, "learning_rate": 2.9790442565498915e-05, "loss": 1.5394, "step": 382300 }, { "epoch": 1.64, "learning_rate": 2.978501417894568e-05, "loss": 1.5449, "step": 382400 }, { "epoch": 1.64, "learning_rate": 2.9779585792392445e-05, "loss": 1.5293, "step": 382500 }, { "epoch": 1.64, "learning_rate": 2.977415740583921e-05, "loss": 1.5363, "step": 382600 }, { "epoch": 1.64, "learning_rate": 2.9768729019285972e-05, "loss": 1.5216, "step": 382700 }, { "epoch": 1.64, "learning_rate": 2.9763300632732738e-05, "loss": 1.5477, "step": 382800 }, { "epoch": 1.64, "learning_rate": 2.9757872246179503e-05, "loss": 1.5265, "step": 382900 }, { "epoch": 1.65, "learning_rate": 2.9752443859626268e-05, "loss": 1.4951, "step": 383000 }, { "epoch": 1.65, "learning_rate": 2.974701547307303e-05, "loss": 1.5332, "step": 383100 }, { "epoch": 1.65, "learning_rate": 2.9741587086519795e-05, "loss": 1.5358, "step": 383200 }, { "epoch": 1.65, "learning_rate": 2.9736158699966564e-05, "loss": 1.5612, "step": 383300 }, { "epoch": 1.65, "learning_rate": 2.973073031341333e-05, "loss": 1.5337, "step": 383400 }, { "epoch": 1.65, "learning_rate": 2.9725301926860094e-05, "loss": 1.5483, "step": 383500 }, { "epoch": 1.65, "learning_rate": 2.971987354030686e-05, "loss": 1.503, "step": 383600 }, { "epoch": 1.65, "learning_rate": 2.971444515375362e-05, "loss": 1.555, "step": 383700 }, { "epoch": 1.65, "learning_rate": 2.9709016767200387e-05, "loss": 1.5329, "step": 383800 }, { "epoch": 1.65, "learning_rate": 2.9703588380647152e-05, "loss": 1.5388, "step": 383900 }, { "epoch": 1.65, "learning_rate": 2.9698159994093917e-05, "loss": 1.5073, "step": 384000 }, { "epoch": 1.65, "learning_rate": 2.969273160754068e-05, "loss": 1.5039, "step": 384100 }, { "epoch": 1.65, "learning_rate": 2.9687303220987444e-05, "loss": 1.5433, "step": 384200 }, { "epoch": 1.65, "learning_rate": 2.968187483443421e-05, "loss": 1.5517, "step": 384300 }, { "epoch": 1.65, "learning_rate": 2.967644644788098e-05, "loss": 1.5244, "step": 384400 }, { "epoch": 1.65, "learning_rate": 2.9671018061327744e-05, "loss": 1.5426, "step": 384500 }, { "epoch": 1.65, "learning_rate": 2.966558967477451e-05, "loss": 1.5364, "step": 384600 }, { "epoch": 1.65, "learning_rate": 2.966016128822127e-05, "loss": 1.561, "step": 384700 }, { "epoch": 1.65, "learning_rate": 2.9654732901668036e-05, "loss": 1.5231, "step": 384800 }, { "epoch": 1.65, "learning_rate": 2.96493045151148e-05, "loss": 1.5599, "step": 384900 }, { "epoch": 1.65, "learning_rate": 2.9643876128561567e-05, "loss": 1.5445, "step": 385000 }, { "epoch": 1.65, "eval_loss": 1.4401415586471558, "eval_runtime": 17.7618, "eval_samples_per_second": 563.005, "eval_steps_per_second": 17.622, "step": 385000 }, { "epoch": 1.65, "learning_rate": 2.963844774200833e-05, "loss": 1.5467, "step": 385100 }, { "epoch": 1.65, "learning_rate": 2.9633019355455094e-05, "loss": 1.5363, "step": 385200 }, { "epoch": 1.66, "learning_rate": 2.962759096890186e-05, "loss": 1.5448, "step": 385300 }, { "epoch": 1.66, "learning_rate": 2.9622162582348628e-05, "loss": 1.5497, "step": 385400 }, { "epoch": 1.66, "learning_rate": 2.9616734195795393e-05, "loss": 1.5068, "step": 385500 }, { "epoch": 1.66, "learning_rate": 2.9611305809242158e-05, "loss": 1.5253, "step": 385600 }, { "epoch": 1.66, "learning_rate": 2.960587742268892e-05, "loss": 1.5221, "step": 385700 }, { "epoch": 1.66, "learning_rate": 2.9600449036135685e-05, "loss": 1.5336, "step": 385800 }, { "epoch": 1.66, "learning_rate": 2.959502064958245e-05, "loss": 1.5336, "step": 385900 }, { "epoch": 1.66, "learning_rate": 2.9589592263029216e-05, "loss": 1.5239, "step": 386000 }, { "epoch": 1.66, "learning_rate": 2.9584163876475978e-05, "loss": 1.5261, "step": 386100 }, { "epoch": 1.66, "learning_rate": 2.9578735489922743e-05, "loss": 1.5146, "step": 386200 }, { "epoch": 1.66, "learning_rate": 2.9573307103369508e-05, "loss": 1.5527, "step": 386300 }, { "epoch": 1.66, "learning_rate": 2.9567878716816274e-05, "loss": 1.5197, "step": 386400 }, { "epoch": 1.66, "learning_rate": 2.9562450330263042e-05, "loss": 1.5463, "step": 386500 }, { "epoch": 1.66, "learning_rate": 2.9557021943709807e-05, "loss": 1.5406, "step": 386600 }, { "epoch": 1.66, "learning_rate": 2.955159355715657e-05, "loss": 1.536, "step": 386700 }, { "epoch": 1.66, "learning_rate": 2.9546165170603335e-05, "loss": 1.5257, "step": 386800 }, { "epoch": 1.66, "learning_rate": 2.95407367840501e-05, "loss": 1.5449, "step": 386900 }, { "epoch": 1.66, "learning_rate": 2.9535308397496865e-05, "loss": 1.5352, "step": 387000 }, { "epoch": 1.66, "learning_rate": 2.9529880010943627e-05, "loss": 1.5447, "step": 387100 }, { "epoch": 1.66, "learning_rate": 2.9524451624390392e-05, "loss": 1.5273, "step": 387200 }, { "epoch": 1.66, "learning_rate": 2.9519023237837157e-05, "loss": 1.515, "step": 387300 }, { "epoch": 1.66, "learning_rate": 2.9513594851283923e-05, "loss": 1.5312, "step": 387400 }, { "epoch": 1.66, "learning_rate": 2.950816646473069e-05, "loss": 1.5419, "step": 387500 }, { "epoch": 1.67, "learning_rate": 2.9502738078177457e-05, "loss": 1.5344, "step": 387600 }, { "epoch": 1.67, "learning_rate": 2.949730969162422e-05, "loss": 1.5083, "step": 387700 }, { "epoch": 1.67, "learning_rate": 2.9491881305070984e-05, "loss": 1.5281, "step": 387800 }, { "epoch": 1.67, "learning_rate": 2.948645291851775e-05, "loss": 1.5292, "step": 387900 }, { "epoch": 1.67, "learning_rate": 2.9481024531964514e-05, "loss": 1.5412, "step": 388000 }, { "epoch": 1.67, "learning_rate": 2.9475596145411276e-05, "loss": 1.5267, "step": 388100 }, { "epoch": 1.67, "learning_rate": 2.947016775885804e-05, "loss": 1.5315, "step": 388200 }, { "epoch": 1.67, "learning_rate": 2.9464739372304807e-05, "loss": 1.5206, "step": 388300 }, { "epoch": 1.67, "learning_rate": 2.9459310985751572e-05, "loss": 1.5419, "step": 388400 }, { "epoch": 1.67, "learning_rate": 2.9453882599198334e-05, "loss": 1.5717, "step": 388500 }, { "epoch": 1.67, "learning_rate": 2.9448454212645106e-05, "loss": 1.5388, "step": 388600 }, { "epoch": 1.67, "learning_rate": 2.9443025826091868e-05, "loss": 1.5255, "step": 388700 }, { "epoch": 1.67, "learning_rate": 2.9437597439538633e-05, "loss": 1.5424, "step": 388800 }, { "epoch": 1.67, "learning_rate": 2.94321690529854e-05, "loss": 1.5145, "step": 388900 }, { "epoch": 1.67, "learning_rate": 2.9426740666432164e-05, "loss": 1.5352, "step": 389000 }, { "epoch": 1.67, "learning_rate": 2.9421312279878925e-05, "loss": 1.5473, "step": 389100 }, { "epoch": 1.67, "learning_rate": 2.941588389332569e-05, "loss": 1.5378, "step": 389200 }, { "epoch": 1.67, "learning_rate": 2.9410455506772456e-05, "loss": 1.5358, "step": 389300 }, { "epoch": 1.67, "learning_rate": 2.940502712021922e-05, "loss": 1.5273, "step": 389400 }, { "epoch": 1.67, "learning_rate": 2.9399598733665983e-05, "loss": 1.5294, "step": 389500 }, { "epoch": 1.67, "learning_rate": 2.9394170347112755e-05, "loss": 1.5519, "step": 389600 }, { "epoch": 1.67, "learning_rate": 2.9388741960559517e-05, "loss": 1.5548, "step": 389700 }, { "epoch": 1.67, "learning_rate": 2.9383313574006282e-05, "loss": 1.5096, "step": 389800 }, { "epoch": 1.68, "learning_rate": 2.9377885187453048e-05, "loss": 1.5136, "step": 389900 }, { "epoch": 1.68, "learning_rate": 2.9372456800899813e-05, "loss": 1.5348, "step": 390000 }, { "epoch": 1.68, "eval_loss": 1.4426006078720093, "eval_runtime": 36.5743, "eval_samples_per_second": 273.416, "eval_steps_per_second": 8.558, "step": 390000 }, { "epoch": 1.68, "learning_rate": 2.9367028414346575e-05, "loss": 1.5546, "step": 390100 }, { "epoch": 1.68, "learning_rate": 2.936160002779334e-05, "loss": 1.5413, "step": 390200 }, { "epoch": 1.68, "learning_rate": 2.9356171641240105e-05, "loss": 1.5366, "step": 390300 }, { "epoch": 1.68, "learning_rate": 2.935074325468687e-05, "loss": 1.5408, "step": 390400 }, { "epoch": 1.68, "learning_rate": 2.9345314868133632e-05, "loss": 1.53, "step": 390500 }, { "epoch": 1.68, "learning_rate": 2.9339886481580398e-05, "loss": 1.4864, "step": 390600 }, { "epoch": 1.68, "learning_rate": 2.9334458095027166e-05, "loss": 1.5162, "step": 390700 }, { "epoch": 1.68, "learning_rate": 2.932902970847393e-05, "loss": 1.5158, "step": 390800 }, { "epoch": 1.68, "learning_rate": 2.9323601321920697e-05, "loss": 1.5527, "step": 390900 }, { "epoch": 1.68, "learning_rate": 2.9318172935367462e-05, "loss": 1.5186, "step": 391000 }, { "epoch": 1.68, "learning_rate": 2.9312744548814224e-05, "loss": 1.5328, "step": 391100 }, { "epoch": 1.68, "learning_rate": 2.930731616226099e-05, "loss": 1.5606, "step": 391200 }, { "epoch": 1.68, "learning_rate": 2.9301887775707754e-05, "loss": 1.5256, "step": 391300 }, { "epoch": 1.68, "learning_rate": 2.929645938915452e-05, "loss": 1.5284, "step": 391400 }, { "epoch": 1.68, "learning_rate": 2.929103100260128e-05, "loss": 1.5403, "step": 391500 }, { "epoch": 1.68, "learning_rate": 2.9285602616048047e-05, "loss": 1.5277, "step": 391600 }, { "epoch": 1.68, "learning_rate": 2.9280174229494816e-05, "loss": 1.5158, "step": 391700 }, { "epoch": 1.68, "learning_rate": 2.927474584294158e-05, "loss": 1.5156, "step": 391800 }, { "epoch": 1.68, "learning_rate": 2.9269317456388346e-05, "loss": 1.519, "step": 391900 }, { "epoch": 1.68, "learning_rate": 2.926388906983511e-05, "loss": 1.5105, "step": 392000 }, { "epoch": 1.68, "learning_rate": 2.9258460683281873e-05, "loss": 1.4996, "step": 392100 }, { "epoch": 1.68, "learning_rate": 2.925303229672864e-05, "loss": 1.538, "step": 392200 }, { "epoch": 1.69, "learning_rate": 2.9247603910175404e-05, "loss": 1.5354, "step": 392300 }, { "epoch": 1.69, "learning_rate": 2.9242175523622166e-05, "loss": 1.5445, "step": 392400 }, { "epoch": 1.69, "learning_rate": 2.923674713706893e-05, "loss": 1.5375, "step": 392500 }, { "epoch": 1.69, "learning_rate": 2.9231318750515696e-05, "loss": 1.5331, "step": 392600 }, { "epoch": 1.69, "learning_rate": 2.922589036396246e-05, "loss": 1.5468, "step": 392700 }, { "epoch": 1.69, "learning_rate": 2.922046197740923e-05, "loss": 1.5413, "step": 392800 }, { "epoch": 1.69, "learning_rate": 2.9215033590855995e-05, "loss": 1.5525, "step": 392900 }, { "epoch": 1.69, "learning_rate": 2.920960520430276e-05, "loss": 1.5149, "step": 393000 }, { "epoch": 1.69, "learning_rate": 2.9204176817749522e-05, "loss": 1.5498, "step": 393100 }, { "epoch": 1.69, "learning_rate": 2.9198748431196288e-05, "loss": 1.5412, "step": 393200 }, { "epoch": 1.69, "learning_rate": 2.9193320044643053e-05, "loss": 1.5368, "step": 393300 }, { "epoch": 1.69, "learning_rate": 2.9187891658089815e-05, "loss": 1.542, "step": 393400 }, { "epoch": 1.69, "learning_rate": 2.918246327153658e-05, "loss": 1.5243, "step": 393500 }, { "epoch": 1.69, "learning_rate": 2.9177034884983345e-05, "loss": 1.5567, "step": 393600 }, { "epoch": 1.69, "learning_rate": 2.917160649843011e-05, "loss": 1.5155, "step": 393700 }, { "epoch": 1.69, "learning_rate": 2.916617811187688e-05, "loss": 1.5208, "step": 393800 }, { "epoch": 1.69, "learning_rate": 2.9160749725323645e-05, "loss": 1.5428, "step": 393900 }, { "epoch": 1.69, "learning_rate": 2.915532133877041e-05, "loss": 1.5396, "step": 394000 }, { "epoch": 1.69, "learning_rate": 2.914989295221717e-05, "loss": 1.5331, "step": 394100 }, { "epoch": 1.69, "learning_rate": 2.9144464565663937e-05, "loss": 1.5322, "step": 394200 }, { "epoch": 1.69, "learning_rate": 2.9139036179110702e-05, "loss": 1.57, "step": 394300 }, { "epoch": 1.69, "learning_rate": 2.9133607792557464e-05, "loss": 1.5523, "step": 394400 }, { "epoch": 1.69, "learning_rate": 2.912817940600423e-05, "loss": 1.515, "step": 394500 }, { "epoch": 1.7, "learning_rate": 2.9122751019450995e-05, "loss": 1.5288, "step": 394600 }, { "epoch": 1.7, "learning_rate": 2.911732263289776e-05, "loss": 1.5688, "step": 394700 }, { "epoch": 1.7, "learning_rate": 2.9111894246344522e-05, "loss": 1.5503, "step": 394800 }, { "epoch": 1.7, "learning_rate": 2.9106465859791294e-05, "loss": 1.5064, "step": 394900 }, { "epoch": 1.7, "learning_rate": 2.910103747323806e-05, "loss": 1.525, "step": 395000 }, { "epoch": 1.7, "eval_loss": 1.441019892692566, "eval_runtime": 17.8041, "eval_samples_per_second": 561.668, "eval_steps_per_second": 17.58, "step": 395000 }, { "epoch": 1.7, "learning_rate": 2.909560908668482e-05, "loss": 1.5334, "step": 395100 }, { "epoch": 1.7, "learning_rate": 2.9090180700131586e-05, "loss": 1.5313, "step": 395200 }, { "epoch": 1.7, "learning_rate": 2.908475231357835e-05, "loss": 1.5513, "step": 395300 }, { "epoch": 1.7, "learning_rate": 2.9079323927025113e-05, "loss": 1.5522, "step": 395400 }, { "epoch": 1.7, "learning_rate": 2.907389554047188e-05, "loss": 1.5336, "step": 395500 }, { "epoch": 1.7, "learning_rate": 2.9068467153918644e-05, "loss": 1.5244, "step": 395600 }, { "epoch": 1.7, "learning_rate": 2.906303876736541e-05, "loss": 1.5485, "step": 395700 }, { "epoch": 1.7, "learning_rate": 2.905761038081217e-05, "loss": 1.562, "step": 395800 }, { "epoch": 1.7, "learning_rate": 2.9052181994258943e-05, "loss": 1.5201, "step": 395900 }, { "epoch": 1.7, "learning_rate": 2.9046753607705708e-05, "loss": 1.5629, "step": 396000 }, { "epoch": 1.7, "learning_rate": 2.904132522115247e-05, "loss": 1.5387, "step": 396100 }, { "epoch": 1.7, "learning_rate": 2.9035896834599235e-05, "loss": 1.5281, "step": 396200 }, { "epoch": 1.7, "learning_rate": 2.9030468448046e-05, "loss": 1.5412, "step": 396300 }, { "epoch": 1.7, "learning_rate": 2.9025040061492763e-05, "loss": 1.5418, "step": 396400 }, { "epoch": 1.7, "learning_rate": 2.9019611674939528e-05, "loss": 1.5426, "step": 396500 }, { "epoch": 1.7, "learning_rate": 2.9014183288386293e-05, "loss": 1.5182, "step": 396600 }, { "epoch": 1.7, "learning_rate": 2.900875490183306e-05, "loss": 1.5376, "step": 396700 }, { "epoch": 1.7, "learning_rate": 2.900332651527982e-05, "loss": 1.5392, "step": 396800 }, { "epoch": 1.71, "learning_rate": 2.8997898128726585e-05, "loss": 1.5274, "step": 396900 }, { "epoch": 1.71, "learning_rate": 2.8992469742173358e-05, "loss": 1.5162, "step": 397000 }, { "epoch": 1.71, "learning_rate": 2.898704135562012e-05, "loss": 1.5394, "step": 397100 }, { "epoch": 1.71, "learning_rate": 2.8981612969066885e-05, "loss": 1.5618, "step": 397200 }, { "epoch": 1.71, "learning_rate": 2.897618458251365e-05, "loss": 1.5106, "step": 397300 }, { "epoch": 1.71, "learning_rate": 2.8970756195960412e-05, "loss": 1.5487, "step": 397400 }, { "epoch": 1.71, "learning_rate": 2.8965327809407177e-05, "loss": 1.5158, "step": 397500 }, { "epoch": 1.71, "learning_rate": 2.8959899422853942e-05, "loss": 1.5026, "step": 397600 }, { "epoch": 1.71, "learning_rate": 2.8954471036300708e-05, "loss": 1.5133, "step": 397700 }, { "epoch": 1.71, "learning_rate": 2.894904264974747e-05, "loss": 1.5608, "step": 397800 }, { "epoch": 1.71, "learning_rate": 2.8943614263194235e-05, "loss": 1.5655, "step": 397900 }, { "epoch": 1.71, "learning_rate": 2.8938185876641007e-05, "loss": 1.5126, "step": 398000 }, { "epoch": 1.71, "learning_rate": 2.893275749008777e-05, "loss": 1.556, "step": 398100 }, { "epoch": 1.71, "learning_rate": 2.8927329103534534e-05, "loss": 1.5335, "step": 398200 }, { "epoch": 1.71, "learning_rate": 2.89219007169813e-05, "loss": 1.5421, "step": 398300 }, { "epoch": 1.71, "learning_rate": 2.891647233042806e-05, "loss": 1.5166, "step": 398400 }, { "epoch": 1.71, "learning_rate": 2.8911043943874826e-05, "loss": 1.5319, "step": 398500 }, { "epoch": 1.71, "learning_rate": 2.890561555732159e-05, "loss": 1.5482, "step": 398600 }, { "epoch": 1.71, "learning_rate": 2.8900187170768357e-05, "loss": 1.5344, "step": 398700 }, { "epoch": 1.71, "learning_rate": 2.889475878421512e-05, "loss": 1.5221, "step": 398800 }, { "epoch": 1.71, "learning_rate": 2.8889330397661884e-05, "loss": 1.5241, "step": 398900 }, { "epoch": 1.71, "learning_rate": 2.888390201110865e-05, "loss": 1.5421, "step": 399000 }, { "epoch": 1.71, "learning_rate": 2.8878473624555418e-05, "loss": 1.5389, "step": 399100 }, { "epoch": 1.71, "learning_rate": 2.8873045238002183e-05, "loss": 1.5364, "step": 399200 }, { "epoch": 1.72, "learning_rate": 2.886761685144895e-05, "loss": 1.5273, "step": 399300 }, { "epoch": 1.72, "learning_rate": 2.886218846489571e-05, "loss": 1.5398, "step": 399400 }, { "epoch": 1.72, "learning_rate": 2.8856760078342476e-05, "loss": 1.5343, "step": 399500 }, { "epoch": 1.72, "learning_rate": 2.885133169178924e-05, "loss": 1.5356, "step": 399600 }, { "epoch": 1.72, "learning_rate": 2.8845903305236006e-05, "loss": 1.5525, "step": 399700 }, { "epoch": 1.72, "learning_rate": 2.8840474918682768e-05, "loss": 1.539, "step": 399800 }, { "epoch": 1.72, "learning_rate": 2.8835046532129533e-05, "loss": 1.5196, "step": 399900 }, { "epoch": 1.72, "learning_rate": 2.88296181455763e-05, "loss": 1.5199, "step": 400000 }, { "epoch": 1.72, "eval_loss": 1.4402216672897339, "eval_runtime": 17.7998, "eval_samples_per_second": 561.804, "eval_steps_per_second": 17.584, "step": 400000 }, { "epoch": 1.72, "learning_rate": 2.8824189759023067e-05, "loss": 1.5239, "step": 400100 }, { "epoch": 1.72, "learning_rate": 2.8818761372469832e-05, "loss": 1.553, "step": 400200 }, { "epoch": 1.72, "learning_rate": 2.8813332985916598e-05, "loss": 1.4987, "step": 400300 }, { "epoch": 1.72, "learning_rate": 2.880790459936336e-05, "loss": 1.5189, "step": 400400 }, { "epoch": 1.72, "learning_rate": 2.8802476212810125e-05, "loss": 1.5222, "step": 400500 }, { "epoch": 1.72, "learning_rate": 2.879704782625689e-05, "loss": 1.5357, "step": 400600 }, { "epoch": 1.72, "learning_rate": 2.8791619439703655e-05, "loss": 1.5154, "step": 400700 }, { "epoch": 1.72, "learning_rate": 2.8786191053150417e-05, "loss": 1.5355, "step": 400800 }, { "epoch": 1.72, "learning_rate": 2.8780762666597182e-05, "loss": 1.5319, "step": 400900 }, { "epoch": 1.72, "learning_rate": 2.8775334280043948e-05, "loss": 1.5456, "step": 401000 }, { "epoch": 1.72, "learning_rate": 2.8769905893490713e-05, "loss": 1.5551, "step": 401100 }, { "epoch": 1.72, "learning_rate": 2.876447750693748e-05, "loss": 1.5271, "step": 401200 }, { "epoch": 1.72, "learning_rate": 2.8759049120384247e-05, "loss": 1.5595, "step": 401300 }, { "epoch": 1.72, "learning_rate": 2.875362073383101e-05, "loss": 1.5349, "step": 401400 }, { "epoch": 1.72, "learning_rate": 2.8748192347277774e-05, "loss": 1.5408, "step": 401500 }, { "epoch": 1.73, "learning_rate": 2.874276396072454e-05, "loss": 1.5176, "step": 401600 }, { "epoch": 1.73, "learning_rate": 2.8737335574171305e-05, "loss": 1.4892, "step": 401700 }, { "epoch": 1.73, "learning_rate": 2.8731907187618066e-05, "loss": 1.5339, "step": 401800 }, { "epoch": 1.73, "learning_rate": 2.8726478801064832e-05, "loss": 1.5057, "step": 401900 }, { "epoch": 1.73, "learning_rate": 2.8721050414511597e-05, "loss": 1.5326, "step": 402000 }, { "epoch": 1.73, "learning_rate": 2.8715622027958362e-05, "loss": 1.5439, "step": 402100 }, { "epoch": 1.73, "learning_rate": 2.871019364140513e-05, "loss": 1.5396, "step": 402200 }, { "epoch": 1.73, "learning_rate": 2.8704765254851896e-05, "loss": 1.5363, "step": 402300 }, { "epoch": 1.73, "learning_rate": 2.8699336868298658e-05, "loss": 1.5016, "step": 402400 }, { "epoch": 1.73, "learning_rate": 2.8693908481745423e-05, "loss": 1.522, "step": 402500 }, { "epoch": 1.73, "learning_rate": 2.868848009519219e-05, "loss": 1.5246, "step": 402600 }, { "epoch": 1.73, "learning_rate": 2.8683051708638954e-05, "loss": 1.5332, "step": 402700 }, { "epoch": 1.73, "learning_rate": 2.8677623322085716e-05, "loss": 1.5089, "step": 402800 }, { "epoch": 1.73, "learning_rate": 2.867219493553248e-05, "loss": 1.5207, "step": 402900 }, { "epoch": 1.73, "learning_rate": 2.8666766548979246e-05, "loss": 1.5402, "step": 403000 }, { "epoch": 1.73, "learning_rate": 2.866133816242601e-05, "loss": 1.5388, "step": 403100 }, { "epoch": 1.73, "learning_rate": 2.8655909775872773e-05, "loss": 1.5333, "step": 403200 }, { "epoch": 1.73, "learning_rate": 2.8650481389319545e-05, "loss": 1.5238, "step": 403300 }, { "epoch": 1.73, "learning_rate": 2.8645053002766307e-05, "loss": 1.4922, "step": 403400 }, { "epoch": 1.73, "learning_rate": 2.8639624616213073e-05, "loss": 1.5461, "step": 403500 }, { "epoch": 1.73, "learning_rate": 2.8634196229659838e-05, "loss": 1.5317, "step": 403600 }, { "epoch": 1.73, "learning_rate": 2.8628767843106603e-05, "loss": 1.543, "step": 403700 }, { "epoch": 1.73, "learning_rate": 2.8623339456553365e-05, "loss": 1.5036, "step": 403800 }, { "epoch": 1.74, "learning_rate": 2.861791107000013e-05, "loss": 1.5589, "step": 403900 }, { "epoch": 1.74, "learning_rate": 2.8612482683446895e-05, "loss": 1.5317, "step": 404000 }, { "epoch": 1.74, "learning_rate": 2.860705429689366e-05, "loss": 1.5291, "step": 404100 }, { "epoch": 1.74, "learning_rate": 2.8601625910340423e-05, "loss": 1.5626, "step": 404200 }, { "epoch": 1.74, "learning_rate": 2.8596197523787195e-05, "loss": 1.5296, "step": 404300 }, { "epoch": 1.74, "learning_rate": 2.8590769137233957e-05, "loss": 1.5301, "step": 404400 }, { "epoch": 1.74, "learning_rate": 2.8585340750680722e-05, "loss": 1.528, "step": 404500 }, { "epoch": 1.74, "learning_rate": 2.8579912364127487e-05, "loss": 1.5344, "step": 404600 }, { "epoch": 1.74, "learning_rate": 2.8574483977574252e-05, "loss": 1.5603, "step": 404700 }, { "epoch": 1.74, "learning_rate": 2.8569055591021014e-05, "loss": 1.5, "step": 404800 }, { "epoch": 1.74, "learning_rate": 2.856362720446778e-05, "loss": 1.519, "step": 404900 }, { "epoch": 1.74, "learning_rate": 2.8558198817914545e-05, "loss": 1.5268, "step": 405000 }, { "epoch": 1.74, "eval_loss": 1.4383865594863892, "eval_runtime": 17.8138, "eval_samples_per_second": 561.361, "eval_steps_per_second": 17.571, "step": 405000 }, { "epoch": 1.74, "learning_rate": 2.855277043136131e-05, "loss": 1.5201, "step": 405100 }, { "epoch": 1.74, "learning_rate": 2.8547342044808072e-05, "loss": 1.531, "step": 405200 }, { "epoch": 1.74, "learning_rate": 2.8541913658254844e-05, "loss": 1.5284, "step": 405300 }, { "epoch": 1.74, "learning_rate": 2.8536485271701606e-05, "loss": 1.5239, "step": 405400 }, { "epoch": 1.74, "learning_rate": 2.853105688514837e-05, "loss": 1.5457, "step": 405500 }, { "epoch": 1.74, "learning_rate": 2.8525628498595136e-05, "loss": 1.517, "step": 405600 }, { "epoch": 1.74, "learning_rate": 2.85202001120419e-05, "loss": 1.5408, "step": 405700 }, { "epoch": 1.74, "learning_rate": 2.8514771725488663e-05, "loss": 1.5529, "step": 405800 }, { "epoch": 1.74, "learning_rate": 2.850934333893543e-05, "loss": 1.5393, "step": 405900 }, { "epoch": 1.74, "learning_rate": 2.8503914952382194e-05, "loss": 1.5388, "step": 406000 }, { "epoch": 1.74, "learning_rate": 2.849848656582896e-05, "loss": 1.5186, "step": 406100 }, { "epoch": 1.75, "learning_rate": 2.849305817927572e-05, "loss": 1.5518, "step": 406200 }, { "epoch": 1.75, "learning_rate": 2.8487629792722486e-05, "loss": 1.543, "step": 406300 }, { "epoch": 1.75, "learning_rate": 2.8482201406169255e-05, "loss": 1.5168, "step": 406400 }, { "epoch": 1.75, "learning_rate": 2.847677301961602e-05, "loss": 1.5481, "step": 406500 }, { "epoch": 1.75, "learning_rate": 2.8471344633062786e-05, "loss": 1.5403, "step": 406600 }, { "epoch": 1.75, "learning_rate": 2.846591624650955e-05, "loss": 1.517, "step": 406700 }, { "epoch": 1.75, "learning_rate": 2.8460487859956313e-05, "loss": 1.5456, "step": 406800 }, { "epoch": 1.75, "learning_rate": 2.8455059473403078e-05, "loss": 1.5158, "step": 406900 }, { "epoch": 1.75, "learning_rate": 2.8449631086849843e-05, "loss": 1.5657, "step": 407000 }, { "epoch": 1.75, "learning_rate": 2.844420270029661e-05, "loss": 1.537, "step": 407100 }, { "epoch": 1.75, "learning_rate": 2.843877431374337e-05, "loss": 1.5425, "step": 407200 }, { "epoch": 1.75, "learning_rate": 2.8433345927190136e-05, "loss": 1.4908, "step": 407300 }, { "epoch": 1.75, "learning_rate": 2.8427917540636904e-05, "loss": 1.5385, "step": 407400 }, { "epoch": 1.75, "learning_rate": 2.842248915408367e-05, "loss": 1.5428, "step": 407500 }, { "epoch": 1.75, "learning_rate": 2.8417060767530435e-05, "loss": 1.5489, "step": 407600 }, { "epoch": 1.75, "learning_rate": 2.84116323809772e-05, "loss": 1.5266, "step": 407700 }, { "epoch": 1.75, "learning_rate": 2.8406203994423962e-05, "loss": 1.524, "step": 407800 }, { "epoch": 1.75, "learning_rate": 2.8400775607870727e-05, "loss": 1.5025, "step": 407900 }, { "epoch": 1.75, "learning_rate": 2.8395347221317492e-05, "loss": 1.5191, "step": 408000 }, { "epoch": 1.75, "learning_rate": 2.8389918834764258e-05, "loss": 1.5252, "step": 408100 }, { "epoch": 1.75, "learning_rate": 2.838449044821102e-05, "loss": 1.5154, "step": 408200 }, { "epoch": 1.75, "learning_rate": 2.8379062061657785e-05, "loss": 1.5204, "step": 408300 }, { "epoch": 1.75, "learning_rate": 2.837363367510455e-05, "loss": 1.5414, "step": 408400 }, { "epoch": 1.75, "learning_rate": 2.836820528855132e-05, "loss": 1.5467, "step": 408500 }, { "epoch": 1.76, "learning_rate": 2.8362776901998084e-05, "loss": 1.5474, "step": 408600 }, { "epoch": 1.76, "learning_rate": 2.835734851544485e-05, "loss": 1.5368, "step": 408700 }, { "epoch": 1.76, "learning_rate": 2.835192012889161e-05, "loss": 1.5173, "step": 408800 }, { "epoch": 1.76, "learning_rate": 2.8346491742338376e-05, "loss": 1.5255, "step": 408900 }, { "epoch": 1.76, "learning_rate": 2.834106335578514e-05, "loss": 1.5475, "step": 409000 }, { "epoch": 1.76, "learning_rate": 2.8335634969231907e-05, "loss": 1.5244, "step": 409100 }, { "epoch": 1.76, "learning_rate": 2.833020658267867e-05, "loss": 1.5163, "step": 409200 }, { "epoch": 1.76, "learning_rate": 2.8324778196125434e-05, "loss": 1.532, "step": 409300 }, { "epoch": 1.76, "learning_rate": 2.83193498095722e-05, "loss": 1.5529, "step": 409400 }, { "epoch": 1.76, "learning_rate": 2.8313921423018968e-05, "loss": 1.5279, "step": 409500 }, { "epoch": 1.76, "learning_rate": 2.8308493036465733e-05, "loss": 1.5326, "step": 409600 }, { "epoch": 1.76, "learning_rate": 2.83030646499125e-05, "loss": 1.4819, "step": 409700 }, { "epoch": 1.76, "learning_rate": 2.829763626335926e-05, "loss": 1.5172, "step": 409800 }, { "epoch": 1.76, "learning_rate": 2.8292207876806026e-05, "loss": 1.5296, "step": 409900 }, { "epoch": 1.76, "learning_rate": 2.828677949025279e-05, "loss": 1.5574, "step": 410000 }, { "epoch": 1.76, "eval_loss": 1.4406307935714722, "eval_runtime": 17.8024, "eval_samples_per_second": 561.722, "eval_steps_per_second": 17.582, "step": 410000 }, { "epoch": 1.76, "learning_rate": 2.8281351103699556e-05, "loss": 1.5008, "step": 410100 }, { "epoch": 1.76, "learning_rate": 2.8275922717146318e-05, "loss": 1.5263, "step": 410200 }, { "epoch": 1.76, "learning_rate": 2.8270494330593083e-05, "loss": 1.5134, "step": 410300 }, { "epoch": 1.76, "learning_rate": 2.826506594403985e-05, "loss": 1.532, "step": 410400 }, { "epoch": 1.76, "learning_rate": 2.8259637557486614e-05, "loss": 1.5147, "step": 410500 }, { "epoch": 1.76, "learning_rate": 2.8254209170933383e-05, "loss": 1.4813, "step": 410600 }, { "epoch": 1.76, "learning_rate": 2.8248780784380148e-05, "loss": 1.5178, "step": 410700 }, { "epoch": 1.76, "learning_rate": 2.824335239782691e-05, "loss": 1.5388, "step": 410800 }, { "epoch": 1.77, "learning_rate": 2.8237924011273675e-05, "loss": 1.5144, "step": 410900 }, { "epoch": 1.77, "learning_rate": 2.823249562472044e-05, "loss": 1.4999, "step": 411000 }, { "epoch": 1.77, "learning_rate": 2.8227067238167205e-05, "loss": 1.5258, "step": 411100 }, { "epoch": 1.77, "learning_rate": 2.8221638851613967e-05, "loss": 1.5123, "step": 411200 }, { "epoch": 1.77, "learning_rate": 2.8216210465060733e-05, "loss": 1.5162, "step": 411300 }, { "epoch": 1.77, "learning_rate": 2.8210782078507498e-05, "loss": 1.5132, "step": 411400 }, { "epoch": 1.77, "learning_rate": 2.8205353691954263e-05, "loss": 1.5276, "step": 411500 }, { "epoch": 1.77, "learning_rate": 2.8199925305401032e-05, "loss": 1.4997, "step": 411600 }, { "epoch": 1.77, "learning_rate": 2.8194496918847797e-05, "loss": 1.5132, "step": 411700 }, { "epoch": 1.77, "learning_rate": 2.818906853229456e-05, "loss": 1.5267, "step": 411800 }, { "epoch": 1.77, "learning_rate": 2.8183640145741324e-05, "loss": 1.5039, "step": 411900 }, { "epoch": 1.77, "learning_rate": 2.817821175918809e-05, "loss": 1.5142, "step": 412000 }, { "epoch": 1.77, "learning_rate": 2.8172783372634855e-05, "loss": 1.526, "step": 412100 }, { "epoch": 1.77, "learning_rate": 2.8167354986081617e-05, "loss": 1.5519, "step": 412200 }, { "epoch": 1.77, "learning_rate": 2.8161926599528382e-05, "loss": 1.5155, "step": 412300 }, { "epoch": 1.77, "learning_rate": 2.8156498212975147e-05, "loss": 1.5072, "step": 412400 }, { "epoch": 1.77, "learning_rate": 2.8151069826421912e-05, "loss": 1.5312, "step": 412500 }, { "epoch": 1.77, "learning_rate": 2.8145641439868674e-05, "loss": 1.5432, "step": 412600 }, { "epoch": 1.77, "learning_rate": 2.8140213053315446e-05, "loss": 1.5491, "step": 412700 }, { "epoch": 1.77, "learning_rate": 2.8134784666762208e-05, "loss": 1.5052, "step": 412800 }, { "epoch": 1.77, "learning_rate": 2.8129356280208973e-05, "loss": 1.5212, "step": 412900 }, { "epoch": 1.77, "learning_rate": 2.812392789365574e-05, "loss": 1.5209, "step": 413000 }, { "epoch": 1.77, "learning_rate": 2.8118499507102504e-05, "loss": 1.5077, "step": 413100 }, { "epoch": 1.78, "learning_rate": 2.8113071120549266e-05, "loss": 1.4798, "step": 413200 }, { "epoch": 1.78, "learning_rate": 2.810764273399603e-05, "loss": 1.5385, "step": 413300 }, { "epoch": 1.78, "learning_rate": 2.8102214347442796e-05, "loss": 1.5457, "step": 413400 }, { "epoch": 1.78, "learning_rate": 2.809678596088956e-05, "loss": 1.523, "step": 413500 }, { "epoch": 1.78, "learning_rate": 2.8091357574336323e-05, "loss": 1.5384, "step": 413600 }, { "epoch": 1.78, "learning_rate": 2.8085929187783096e-05, "loss": 1.5193, "step": 413700 }, { "epoch": 1.78, "learning_rate": 2.8080500801229857e-05, "loss": 1.5332, "step": 413800 }, { "epoch": 1.78, "learning_rate": 2.8075072414676623e-05, "loss": 1.5462, "step": 413900 }, { "epoch": 1.78, "learning_rate": 2.8069644028123388e-05, "loss": 1.5654, "step": 414000 }, { "epoch": 1.78, "learning_rate": 2.8064215641570153e-05, "loss": 1.5277, "step": 414100 }, { "epoch": 1.78, "learning_rate": 2.8058787255016915e-05, "loss": 1.485, "step": 414200 }, { "epoch": 1.78, "learning_rate": 2.805335886846368e-05, "loss": 1.4895, "step": 414300 }, { "epoch": 1.78, "learning_rate": 2.8047930481910446e-05, "loss": 1.5262, "step": 414400 }, { "epoch": 1.78, "learning_rate": 2.804250209535721e-05, "loss": 1.5392, "step": 414500 }, { "epoch": 1.78, "learning_rate": 2.8037073708803973e-05, "loss": 1.4971, "step": 414600 }, { "epoch": 1.78, "learning_rate": 2.8031645322250738e-05, "loss": 1.5281, "step": 414700 }, { "epoch": 1.78, "learning_rate": 2.8026216935697507e-05, "loss": 1.5242, "step": 414800 }, { "epoch": 1.78, "learning_rate": 2.8020788549144272e-05, "loss": 1.5357, "step": 414900 }, { "epoch": 1.78, "learning_rate": 2.8015360162591037e-05, "loss": 1.5335, "step": 415000 }, { "epoch": 1.78, "eval_loss": 1.4382429122924805, "eval_runtime": 17.7575, "eval_samples_per_second": 563.143, "eval_steps_per_second": 17.626, "step": 415000 }, { "epoch": 1.78, "learning_rate": 2.8009931776037802e-05, "loss": 1.5204, "step": 415100 }, { "epoch": 1.78, "learning_rate": 2.8004503389484564e-05, "loss": 1.5121, "step": 415200 }, { "epoch": 1.78, "learning_rate": 2.799907500293133e-05, "loss": 1.5203, "step": 415300 }, { "epoch": 1.78, "learning_rate": 2.7993646616378095e-05, "loss": 1.5089, "step": 415400 }, { "epoch": 1.79, "learning_rate": 2.798821822982486e-05, "loss": 1.5304, "step": 415500 }, { "epoch": 1.79, "learning_rate": 2.7982789843271622e-05, "loss": 1.524, "step": 415600 }, { "epoch": 1.79, "learning_rate": 2.7977361456718387e-05, "loss": 1.5185, "step": 415700 }, { "epoch": 1.79, "learning_rate": 2.7971933070165156e-05, "loss": 1.5323, "step": 415800 }, { "epoch": 1.79, "learning_rate": 2.796650468361192e-05, "loss": 1.5207, "step": 415900 }, { "epoch": 1.79, "learning_rate": 2.7961076297058686e-05, "loss": 1.5296, "step": 416000 }, { "epoch": 1.79, "learning_rate": 2.795564791050545e-05, "loss": 1.51, "step": 416100 }, { "epoch": 1.79, "learning_rate": 2.7950219523952214e-05, "loss": 1.5265, "step": 416200 }, { "epoch": 1.79, "learning_rate": 2.794479113739898e-05, "loss": 1.5355, "step": 416300 }, { "epoch": 1.79, "learning_rate": 2.7939362750845744e-05, "loss": 1.5426, "step": 416400 }, { "epoch": 1.79, "learning_rate": 2.793393436429251e-05, "loss": 1.5439, "step": 416500 }, { "epoch": 1.79, "learning_rate": 2.792850597773927e-05, "loss": 1.5535, "step": 416600 }, { "epoch": 1.79, "learning_rate": 2.7923077591186036e-05, "loss": 1.516, "step": 416700 }, { "epoch": 1.79, "learning_rate": 2.7917649204632802e-05, "loss": 1.5376, "step": 416800 }, { "epoch": 1.79, "learning_rate": 2.791222081807957e-05, "loss": 1.528, "step": 416900 }, { "epoch": 1.79, "learning_rate": 2.7906792431526336e-05, "loss": 1.5282, "step": 417000 }, { "epoch": 1.79, "learning_rate": 2.79013640449731e-05, "loss": 1.528, "step": 417100 }, { "epoch": 1.79, "learning_rate": 2.7895935658419863e-05, "loss": 1.5245, "step": 417200 }, { "epoch": 1.79, "learning_rate": 2.7890507271866628e-05, "loss": 1.5342, "step": 417300 }, { "epoch": 1.79, "learning_rate": 2.7885078885313393e-05, "loss": 1.5092, "step": 417400 }, { "epoch": 1.79, "learning_rate": 2.787965049876016e-05, "loss": 1.5356, "step": 417500 }, { "epoch": 1.79, "learning_rate": 2.787422211220692e-05, "loss": 1.5228, "step": 417600 }, { "epoch": 1.79, "learning_rate": 2.7868793725653686e-05, "loss": 1.5355, "step": 417700 }, { "epoch": 1.79, "learning_rate": 2.786336533910045e-05, "loss": 1.5256, "step": 417800 }, { "epoch": 1.8, "learning_rate": 2.785793695254722e-05, "loss": 1.4689, "step": 417900 }, { "epoch": 1.8, "learning_rate": 2.7852508565993985e-05, "loss": 1.5218, "step": 418000 }, { "epoch": 1.8, "learning_rate": 2.784708017944075e-05, "loss": 1.5373, "step": 418100 }, { "epoch": 1.8, "learning_rate": 2.7841651792887512e-05, "loss": 1.5249, "step": 418200 }, { "epoch": 1.8, "learning_rate": 2.7836223406334277e-05, "loss": 1.526, "step": 418300 }, { "epoch": 1.8, "learning_rate": 2.7830795019781043e-05, "loss": 1.5213, "step": 418400 }, { "epoch": 1.8, "learning_rate": 2.7825366633227808e-05, "loss": 1.5095, "step": 418500 }, { "epoch": 1.8, "learning_rate": 2.781993824667457e-05, "loss": 1.5196, "step": 418600 }, { "epoch": 1.8, "learning_rate": 2.7814509860121335e-05, "loss": 1.5147, "step": 418700 }, { "epoch": 1.8, "learning_rate": 2.78090814735681e-05, "loss": 1.5175, "step": 418800 }, { "epoch": 1.8, "learning_rate": 2.7803653087014862e-05, "loss": 1.535, "step": 418900 }, { "epoch": 1.8, "learning_rate": 2.7798224700461634e-05, "loss": 1.4684, "step": 419000 }, { "epoch": 1.8, "learning_rate": 2.77927963139084e-05, "loss": 1.5448, "step": 419100 }, { "epoch": 1.8, "learning_rate": 2.778736792735516e-05, "loss": 1.5211, "step": 419200 }, { "epoch": 1.8, "learning_rate": 2.7781939540801927e-05, "loss": 1.5303, "step": 419300 }, { "epoch": 1.8, "learning_rate": 2.7776511154248692e-05, "loss": 1.5182, "step": 419400 }, { "epoch": 1.8, "learning_rate": 2.7771082767695457e-05, "loss": 1.5291, "step": 419500 }, { "epoch": 1.8, "learning_rate": 2.776565438114222e-05, "loss": 1.5256, "step": 419600 }, { "epoch": 1.8, "learning_rate": 2.7760225994588984e-05, "loss": 1.5461, "step": 419700 }, { "epoch": 1.8, "learning_rate": 2.775479760803575e-05, "loss": 1.5385, "step": 419800 }, { "epoch": 1.8, "learning_rate": 2.774936922148251e-05, "loss": 1.547, "step": 419900 }, { "epoch": 1.8, "learning_rate": 2.7743940834929283e-05, "loss": 1.5163, "step": 420000 }, { "epoch": 1.8, "eval_loss": 1.4372782707214355, "eval_runtime": 17.8039, "eval_samples_per_second": 561.676, "eval_steps_per_second": 17.58, "step": 420000 }, { "epoch": 1.8, "learning_rate": 2.773851244837605e-05, "loss": 1.4974, "step": 420100 }, { "epoch": 1.81, "learning_rate": 2.773308406182281e-05, "loss": 1.5112, "step": 420200 }, { "epoch": 1.81, "learning_rate": 2.7727655675269576e-05, "loss": 1.5071, "step": 420300 }, { "epoch": 1.81, "learning_rate": 2.772222728871634e-05, "loss": 1.5225, "step": 420400 }, { "epoch": 1.81, "learning_rate": 2.7716798902163106e-05, "loss": 1.5471, "step": 420500 }, { "epoch": 1.81, "learning_rate": 2.7711370515609868e-05, "loss": 1.5217, "step": 420600 }, { "epoch": 1.81, "learning_rate": 2.7705942129056633e-05, "loss": 1.4961, "step": 420700 }, { "epoch": 1.81, "learning_rate": 2.77005137425034e-05, "loss": 1.5081, "step": 420800 }, { "epoch": 1.81, "learning_rate": 2.769508535595016e-05, "loss": 1.5024, "step": 420900 }, { "epoch": 1.81, "learning_rate": 2.7689656969396926e-05, "loss": 1.5408, "step": 421000 }, { "epoch": 1.81, "learning_rate": 2.7684228582843698e-05, "loss": 1.537, "step": 421100 }, { "epoch": 1.81, "learning_rate": 2.767880019629046e-05, "loss": 1.5233, "step": 421200 }, { "epoch": 1.81, "learning_rate": 2.7673371809737225e-05, "loss": 1.5183, "step": 421300 }, { "epoch": 1.81, "learning_rate": 2.766794342318399e-05, "loss": 1.5317, "step": 421400 }, { "epoch": 1.81, "learning_rate": 2.7662515036630756e-05, "loss": 1.5374, "step": 421500 }, { "epoch": 1.81, "learning_rate": 2.7657086650077517e-05, "loss": 1.525, "step": 421600 }, { "epoch": 1.81, "learning_rate": 2.7651658263524283e-05, "loss": 1.5106, "step": 421700 }, { "epoch": 1.81, "learning_rate": 2.7646229876971048e-05, "loss": 1.4967, "step": 421800 }, { "epoch": 1.81, "learning_rate": 2.764080149041781e-05, "loss": 1.5214, "step": 421900 }, { "epoch": 1.81, "learning_rate": 2.7635373103864575e-05, "loss": 1.522, "step": 422000 }, { "epoch": 1.81, "learning_rate": 2.7629944717311347e-05, "loss": 1.5259, "step": 422100 }, { "epoch": 1.81, "learning_rate": 2.762451633075811e-05, "loss": 1.4915, "step": 422200 }, { "epoch": 1.81, "learning_rate": 2.7619087944204874e-05, "loss": 1.531, "step": 422300 }, { "epoch": 1.81, "learning_rate": 2.761365955765164e-05, "loss": 1.5303, "step": 422400 }, { "epoch": 1.82, "learning_rate": 2.7608231171098405e-05, "loss": 1.505, "step": 422500 }, { "epoch": 1.82, "learning_rate": 2.7602802784545167e-05, "loss": 1.5445, "step": 422600 }, { "epoch": 1.82, "learning_rate": 2.7597374397991932e-05, "loss": 1.5211, "step": 422700 }, { "epoch": 1.82, "learning_rate": 2.7591946011438697e-05, "loss": 1.5107, "step": 422800 }, { "epoch": 1.82, "learning_rate": 2.758651762488546e-05, "loss": 1.4987, "step": 422900 }, { "epoch": 1.82, "learning_rate": 2.7581089238332224e-05, "loss": 1.5144, "step": 423000 }, { "epoch": 1.82, "learning_rate": 2.757566085177899e-05, "loss": 1.5307, "step": 423100 }, { "epoch": 1.82, "learning_rate": 2.7570232465225758e-05, "loss": 1.5336, "step": 423200 }, { "epoch": 1.82, "learning_rate": 2.7564804078672524e-05, "loss": 1.5058, "step": 423300 }, { "epoch": 1.82, "learning_rate": 2.755937569211929e-05, "loss": 1.5249, "step": 423400 }, { "epoch": 1.82, "learning_rate": 2.7553947305566054e-05, "loss": 1.5157, "step": 423500 }, { "epoch": 1.82, "learning_rate": 2.7548518919012816e-05, "loss": 1.5169, "step": 423600 }, { "epoch": 1.82, "learning_rate": 2.754309053245958e-05, "loss": 1.5482, "step": 423700 }, { "epoch": 1.82, "learning_rate": 2.7537662145906346e-05, "loss": 1.5145, "step": 423800 }, { "epoch": 1.82, "learning_rate": 2.753223375935311e-05, "loss": 1.5434, "step": 423900 }, { "epoch": 1.82, "learning_rate": 2.7526805372799874e-05, "loss": 1.505, "step": 424000 }, { "epoch": 1.82, "learning_rate": 2.752137698624664e-05, "loss": 1.5246, "step": 424100 }, { "epoch": 1.82, "learning_rate": 2.7515948599693407e-05, "loss": 1.532, "step": 424200 }, { "epoch": 1.82, "learning_rate": 2.7510520213140173e-05, "loss": 1.4976, "step": 424300 }, { "epoch": 1.82, "learning_rate": 2.7505091826586938e-05, "loss": 1.5232, "step": 424400 }, { "epoch": 1.82, "learning_rate": 2.7499663440033703e-05, "loss": 1.5196, "step": 424500 }, { "epoch": 1.82, "learning_rate": 2.7494235053480465e-05, "loss": 1.5297, "step": 424600 }, { "epoch": 1.82, "learning_rate": 2.748880666692723e-05, "loss": 1.5444, "step": 424700 }, { "epoch": 1.82, "learning_rate": 2.7483378280373996e-05, "loss": 1.5246, "step": 424800 }, { "epoch": 1.83, "learning_rate": 2.7477949893820758e-05, "loss": 1.5539, "step": 424900 }, { "epoch": 1.83, "learning_rate": 2.7472521507267523e-05, "loss": 1.5018, "step": 425000 }, { "epoch": 1.83, "eval_loss": 1.439378261566162, "eval_runtime": 17.7944, "eval_samples_per_second": 561.975, "eval_steps_per_second": 17.59, "step": 425000 }, { "epoch": 1.83, "learning_rate": 2.7467093120714288e-05, "loss": 1.5323, "step": 425100 }, { "epoch": 1.83, "learning_rate": 2.7461664734161057e-05, "loss": 1.557, "step": 425200 }, { "epoch": 1.83, "learning_rate": 2.7456236347607822e-05, "loss": 1.5208, "step": 425300 }, { "epoch": 1.83, "learning_rate": 2.7450807961054587e-05, "loss": 1.5268, "step": 425400 }, { "epoch": 1.83, "learning_rate": 2.7445379574501353e-05, "loss": 1.5093, "step": 425500 }, { "epoch": 1.83, "learning_rate": 2.7439951187948114e-05, "loss": 1.5231, "step": 425600 }, { "epoch": 1.83, "learning_rate": 2.743452280139488e-05, "loss": 1.5062, "step": 425700 }, { "epoch": 1.83, "learning_rate": 2.7429094414841645e-05, "loss": 1.5069, "step": 425800 }, { "epoch": 1.83, "learning_rate": 2.7423666028288407e-05, "loss": 1.5228, "step": 425900 }, { "epoch": 1.83, "learning_rate": 2.7418237641735172e-05, "loss": 1.5146, "step": 426000 }, { "epoch": 1.83, "learning_rate": 2.7412809255181937e-05, "loss": 1.5084, "step": 426100 }, { "epoch": 1.83, "learning_rate": 2.7407380868628703e-05, "loss": 1.5113, "step": 426200 }, { "epoch": 1.83, "learning_rate": 2.740195248207547e-05, "loss": 1.523, "step": 426300 }, { "epoch": 1.83, "learning_rate": 2.7396524095522237e-05, "loss": 1.5522, "step": 426400 }, { "epoch": 1.83, "learning_rate": 2.7391095708969002e-05, "loss": 1.5223, "step": 426500 }, { "epoch": 1.83, "learning_rate": 2.7385667322415764e-05, "loss": 1.5028, "step": 426600 }, { "epoch": 1.83, "learning_rate": 2.738023893586253e-05, "loss": 1.5345, "step": 426700 }, { "epoch": 1.83, "learning_rate": 2.7374810549309294e-05, "loss": 1.5229, "step": 426800 }, { "epoch": 1.83, "learning_rate": 2.7369382162756056e-05, "loss": 1.5139, "step": 426900 }, { "epoch": 1.83, "learning_rate": 2.736395377620282e-05, "loss": 1.5395, "step": 427000 }, { "epoch": 1.83, "learning_rate": 2.7358525389649587e-05, "loss": 1.542, "step": 427100 }, { "epoch": 1.84, "learning_rate": 2.7353097003096352e-05, "loss": 1.5018, "step": 427200 }, { "epoch": 1.84, "learning_rate": 2.734766861654312e-05, "loss": 1.5242, "step": 427300 }, { "epoch": 1.84, "learning_rate": 2.7342240229989886e-05, "loss": 1.5297, "step": 427400 }, { "epoch": 1.84, "learning_rate": 2.733681184343665e-05, "loss": 1.5024, "step": 427500 }, { "epoch": 1.84, "learning_rate": 2.7331383456883413e-05, "loss": 1.5456, "step": 427600 }, { "epoch": 1.84, "learning_rate": 2.7325955070330178e-05, "loss": 1.5168, "step": 427700 }, { "epoch": 1.84, "learning_rate": 2.7320526683776943e-05, "loss": 1.5301, "step": 427800 }, { "epoch": 1.84, "learning_rate": 2.7315098297223705e-05, "loss": 1.5365, "step": 427900 }, { "epoch": 1.84, "learning_rate": 2.730966991067047e-05, "loss": 1.5342, "step": 428000 }, { "epoch": 1.84, "learning_rate": 2.7304241524117236e-05, "loss": 1.543, "step": 428100 }, { "epoch": 1.84, "learning_rate": 2.7298813137564e-05, "loss": 1.5274, "step": 428200 }, { "epoch": 1.84, "learning_rate": 2.7293384751010763e-05, "loss": 1.5155, "step": 428300 }, { "epoch": 1.84, "learning_rate": 2.7287956364457535e-05, "loss": 1.5238, "step": 428400 }, { "epoch": 1.84, "learning_rate": 2.72825279779043e-05, "loss": 1.5082, "step": 428500 }, { "epoch": 1.84, "learning_rate": 2.7277099591351062e-05, "loss": 1.5241, "step": 428600 }, { "epoch": 1.84, "learning_rate": 2.7271671204797827e-05, "loss": 1.538, "step": 428700 }, { "epoch": 1.84, "learning_rate": 2.7266242818244593e-05, "loss": 1.5274, "step": 428800 }, { "epoch": 1.84, "learning_rate": 2.7260814431691355e-05, "loss": 1.5441, "step": 428900 }, { "epoch": 1.84, "learning_rate": 2.725538604513812e-05, "loss": 1.5111, "step": 429000 }, { "epoch": 1.84, "learning_rate": 2.7249957658584885e-05, "loss": 1.5125, "step": 429100 }, { "epoch": 1.84, "learning_rate": 2.724452927203165e-05, "loss": 1.5252, "step": 429200 }, { "epoch": 1.84, "learning_rate": 2.7239100885478412e-05, "loss": 1.5299, "step": 429300 }, { "epoch": 1.84, "learning_rate": 2.7233672498925184e-05, "loss": 1.512, "step": 429400 }, { "epoch": 1.85, "learning_rate": 2.722824411237195e-05, "loss": 1.5447, "step": 429500 }, { "epoch": 1.85, "learning_rate": 2.722281572581871e-05, "loss": 1.5315, "step": 429600 }, { "epoch": 1.85, "learning_rate": 2.7217387339265477e-05, "loss": 1.5102, "step": 429700 }, { "epoch": 1.85, "learning_rate": 2.7211958952712242e-05, "loss": 1.5258, "step": 429800 }, { "epoch": 1.85, "learning_rate": 2.7206530566159004e-05, "loss": 1.5251, "step": 429900 }, { "epoch": 1.85, "learning_rate": 2.720110217960577e-05, "loss": 1.5062, "step": 430000 }, { "epoch": 1.85, "eval_loss": 1.4390578269958496, "eval_runtime": 17.787, "eval_samples_per_second": 562.209, "eval_steps_per_second": 17.597, "step": 430000 }, { "epoch": 1.85, "learning_rate": 2.7195673793052534e-05, "loss": 1.5278, "step": 430100 }, { "epoch": 1.85, "learning_rate": 2.71902454064993e-05, "loss": 1.5421, "step": 430200 }, { "epoch": 1.85, "learning_rate": 2.718481701994606e-05, "loss": 1.5117, "step": 430300 }, { "epoch": 1.85, "learning_rate": 2.7179388633392827e-05, "loss": 1.5179, "step": 430400 }, { "epoch": 1.85, "learning_rate": 2.71739602468396e-05, "loss": 1.5315, "step": 430500 }, { "epoch": 1.85, "learning_rate": 2.716853186028636e-05, "loss": 1.5208, "step": 430600 }, { "epoch": 1.85, "learning_rate": 2.7163103473733126e-05, "loss": 1.5164, "step": 430700 }, { "epoch": 1.85, "learning_rate": 2.715767508717989e-05, "loss": 1.5265, "step": 430800 }, { "epoch": 1.85, "learning_rate": 2.7152246700626653e-05, "loss": 1.5322, "step": 430900 }, { "epoch": 1.85, "learning_rate": 2.7146818314073418e-05, "loss": 1.5207, "step": 431000 }, { "epoch": 1.85, "learning_rate": 2.7141389927520184e-05, "loss": 1.5297, "step": 431100 }, { "epoch": 1.85, "learning_rate": 2.713596154096695e-05, "loss": 1.5421, "step": 431200 }, { "epoch": 1.85, "learning_rate": 2.713053315441371e-05, "loss": 1.5178, "step": 431300 }, { "epoch": 1.85, "learning_rate": 2.7125104767860476e-05, "loss": 1.5537, "step": 431400 }, { "epoch": 1.85, "learning_rate": 2.7119676381307248e-05, "loss": 1.5231, "step": 431500 }, { "epoch": 1.85, "learning_rate": 2.711424799475401e-05, "loss": 1.5325, "step": 431600 }, { "epoch": 1.85, "learning_rate": 2.7108819608200775e-05, "loss": 1.5152, "step": 431700 }, { "epoch": 1.86, "learning_rate": 2.710339122164754e-05, "loss": 1.5053, "step": 431800 }, { "epoch": 1.86, "learning_rate": 2.7097962835094302e-05, "loss": 1.5255, "step": 431900 }, { "epoch": 1.86, "learning_rate": 2.7092534448541068e-05, "loss": 1.5174, "step": 432000 }, { "epoch": 1.86, "learning_rate": 2.7087106061987833e-05, "loss": 1.5404, "step": 432100 }, { "epoch": 1.86, "learning_rate": 2.7081677675434598e-05, "loss": 1.5016, "step": 432200 }, { "epoch": 1.86, "learning_rate": 2.707624928888136e-05, "loss": 1.5244, "step": 432300 }, { "epoch": 1.86, "learning_rate": 2.7070820902328125e-05, "loss": 1.5198, "step": 432400 }, { "epoch": 1.86, "learning_rate": 2.706539251577489e-05, "loss": 1.5196, "step": 432500 }, { "epoch": 1.86, "learning_rate": 2.705996412922166e-05, "loss": 1.5185, "step": 432600 }, { "epoch": 1.86, "learning_rate": 2.7054535742668424e-05, "loss": 1.5117, "step": 432700 }, { "epoch": 1.86, "learning_rate": 2.704910735611519e-05, "loss": 1.5151, "step": 432800 }, { "epoch": 1.86, "learning_rate": 2.704367896956195e-05, "loss": 1.5069, "step": 432900 }, { "epoch": 1.86, "learning_rate": 2.7038250583008717e-05, "loss": 1.4896, "step": 433000 }, { "epoch": 1.86, "learning_rate": 2.7032822196455482e-05, "loss": 1.5276, "step": 433100 }, { "epoch": 1.86, "learning_rate": 2.7027393809902247e-05, "loss": 1.5253, "step": 433200 }, { "epoch": 1.86, "learning_rate": 2.702196542334901e-05, "loss": 1.5171, "step": 433300 }, { "epoch": 1.86, "learning_rate": 2.7016537036795774e-05, "loss": 1.5322, "step": 433400 }, { "epoch": 1.86, "learning_rate": 2.701110865024254e-05, "loss": 1.53, "step": 433500 }, { "epoch": 1.86, "learning_rate": 2.700568026368931e-05, "loss": 1.5743, "step": 433600 }, { "epoch": 1.86, "learning_rate": 2.7000251877136074e-05, "loss": 1.551, "step": 433700 }, { "epoch": 1.86, "learning_rate": 2.699482349058284e-05, "loss": 1.5115, "step": 433800 }, { "epoch": 1.86, "learning_rate": 2.69893951040296e-05, "loss": 1.5231, "step": 433900 }, { "epoch": 1.86, "learning_rate": 2.6983966717476366e-05, "loss": 1.5368, "step": 434000 }, { "epoch": 1.86, "learning_rate": 2.697853833092313e-05, "loss": 1.5166, "step": 434100 }, { "epoch": 1.87, "learning_rate": 2.6973109944369897e-05, "loss": 1.4913, "step": 434200 }, { "epoch": 1.87, "learning_rate": 2.696768155781666e-05, "loss": 1.5306, "step": 434300 }, { "epoch": 1.87, "learning_rate": 2.6962253171263424e-05, "loss": 1.5197, "step": 434400 }, { "epoch": 1.87, "learning_rate": 2.695682478471019e-05, "loss": 1.5224, "step": 434500 }, { "epoch": 1.87, "learning_rate": 2.6951396398156954e-05, "loss": 1.55, "step": 434600 }, { "epoch": 1.87, "learning_rate": 2.6945968011603723e-05, "loss": 1.5211, "step": 434700 }, { "epoch": 1.87, "learning_rate": 2.6940539625050488e-05, "loss": 1.521, "step": 434800 }, { "epoch": 1.87, "learning_rate": 2.693511123849725e-05, "loss": 1.5298, "step": 434900 }, { "epoch": 1.87, "learning_rate": 2.6929682851944015e-05, "loss": 1.5378, "step": 435000 }, { "epoch": 1.87, "eval_loss": 1.4357421398162842, "eval_runtime": 17.828, "eval_samples_per_second": 560.916, "eval_steps_per_second": 17.557, "step": 435000 }, { "epoch": 1.87, "learning_rate": 2.692425446539078e-05, "loss": 1.5364, "step": 435100 }, { "epoch": 1.87, "learning_rate": 2.6918826078837546e-05, "loss": 1.5312, "step": 435200 }, { "epoch": 1.87, "learning_rate": 2.6913397692284308e-05, "loss": 1.5099, "step": 435300 }, { "epoch": 1.87, "learning_rate": 2.6907969305731073e-05, "loss": 1.5087, "step": 435400 }, { "epoch": 1.87, "learning_rate": 2.6902540919177838e-05, "loss": 1.5656, "step": 435500 }, { "epoch": 1.87, "learning_rate": 2.6897112532624603e-05, "loss": 1.5303, "step": 435600 }, { "epoch": 1.87, "learning_rate": 2.6891684146071372e-05, "loss": 1.5082, "step": 435700 }, { "epoch": 1.87, "learning_rate": 2.6886255759518137e-05, "loss": 1.5383, "step": 435800 }, { "epoch": 1.87, "learning_rate": 2.68808273729649e-05, "loss": 1.5201, "step": 435900 }, { "epoch": 1.87, "learning_rate": 2.6875398986411665e-05, "loss": 1.5027, "step": 436000 }, { "epoch": 1.87, "learning_rate": 2.686997059985843e-05, "loss": 1.5225, "step": 436100 }, { "epoch": 1.87, "learning_rate": 2.6864542213305195e-05, "loss": 1.4969, "step": 436200 }, { "epoch": 1.87, "learning_rate": 2.6859113826751957e-05, "loss": 1.5574, "step": 436300 }, { "epoch": 1.87, "learning_rate": 2.6853685440198722e-05, "loss": 1.5404, "step": 436400 }, { "epoch": 1.88, "learning_rate": 2.6848257053645487e-05, "loss": 1.5161, "step": 436500 }, { "epoch": 1.88, "learning_rate": 2.6842828667092253e-05, "loss": 1.5114, "step": 436600 }, { "epoch": 1.88, "learning_rate": 2.6837400280539015e-05, "loss": 1.514, "step": 436700 }, { "epoch": 1.88, "learning_rate": 2.6831971893985787e-05, "loss": 1.5196, "step": 436800 }, { "epoch": 1.88, "learning_rate": 2.682654350743255e-05, "loss": 1.5169, "step": 436900 }, { "epoch": 1.88, "learning_rate": 2.6821115120879314e-05, "loss": 1.513, "step": 437000 }, { "epoch": 1.88, "learning_rate": 2.681568673432608e-05, "loss": 1.5279, "step": 437100 }, { "epoch": 1.88, "learning_rate": 2.6810258347772844e-05, "loss": 1.5097, "step": 437200 }, { "epoch": 1.88, "learning_rate": 2.6804829961219606e-05, "loss": 1.522, "step": 437300 }, { "epoch": 1.88, "learning_rate": 2.679940157466637e-05, "loss": 1.5354, "step": 437400 }, { "epoch": 1.88, "learning_rate": 2.6793973188113137e-05, "loss": 1.5158, "step": 437500 }, { "epoch": 1.88, "learning_rate": 2.6788544801559902e-05, "loss": 1.5274, "step": 437600 }, { "epoch": 1.88, "learning_rate": 2.6783116415006664e-05, "loss": 1.5224, "step": 437700 }, { "epoch": 1.88, "learning_rate": 2.6777688028453436e-05, "loss": 1.5052, "step": 437800 }, { "epoch": 1.88, "learning_rate": 2.6772259641900198e-05, "loss": 1.5173, "step": 437900 }, { "epoch": 1.88, "learning_rate": 2.6766831255346963e-05, "loss": 1.4881, "step": 438000 }, { "epoch": 1.88, "learning_rate": 2.6761402868793728e-05, "loss": 1.514, "step": 438100 }, { "epoch": 1.88, "learning_rate": 2.6755974482240494e-05, "loss": 1.5351, "step": 438200 }, { "epoch": 1.88, "learning_rate": 2.6750546095687255e-05, "loss": 1.5091, "step": 438300 }, { "epoch": 1.88, "learning_rate": 2.674511770913402e-05, "loss": 1.5445, "step": 438400 }, { "epoch": 1.88, "learning_rate": 2.6739689322580786e-05, "loss": 1.5062, "step": 438500 }, { "epoch": 1.88, "learning_rate": 2.673426093602755e-05, "loss": 1.558, "step": 438600 }, { "epoch": 1.88, "learning_rate": 2.6728832549474313e-05, "loss": 1.5366, "step": 438700 }, { "epoch": 1.89, "learning_rate": 2.672340416292108e-05, "loss": 1.5452, "step": 438800 }, { "epoch": 1.89, "learning_rate": 2.6717975776367847e-05, "loss": 1.5135, "step": 438900 }, { "epoch": 1.89, "learning_rate": 2.6712547389814612e-05, "loss": 1.5453, "step": 439000 }, { "epoch": 1.89, "learning_rate": 2.6707119003261377e-05, "loss": 1.5165, "step": 439100 }, { "epoch": 1.89, "learning_rate": 2.6701690616708143e-05, "loss": 1.5297, "step": 439200 }, { "epoch": 1.89, "learning_rate": 2.6696262230154905e-05, "loss": 1.5236, "step": 439300 }, { "epoch": 1.89, "learning_rate": 2.669083384360167e-05, "loss": 1.5378, "step": 439400 }, { "epoch": 1.89, "learning_rate": 2.6685405457048435e-05, "loss": 1.5295, "step": 439500 }, { "epoch": 1.89, "learning_rate": 2.66799770704952e-05, "loss": 1.5415, "step": 439600 }, { "epoch": 1.89, "learning_rate": 2.6674548683941962e-05, "loss": 1.5155, "step": 439700 }, { "epoch": 1.89, "learning_rate": 2.6669120297388728e-05, "loss": 1.5277, "step": 439800 }, { "epoch": 1.89, "learning_rate": 2.6663691910835496e-05, "loss": 1.535, "step": 439900 }, { "epoch": 1.89, "learning_rate": 2.665826352428226e-05, "loss": 1.5224, "step": 440000 }, { "epoch": 1.89, "eval_loss": 1.4352110624313354, "eval_runtime": 17.8498, "eval_samples_per_second": 560.232, "eval_steps_per_second": 17.535, "step": 440000 }, { "epoch": 1.89, "learning_rate": 2.6652835137729027e-05, "loss": 1.5307, "step": 440100 }, { "epoch": 1.89, "learning_rate": 2.6647406751175792e-05, "loss": 1.5424, "step": 440200 }, { "epoch": 1.89, "learning_rate": 2.6641978364622554e-05, "loss": 1.5518, "step": 440300 }, { "epoch": 1.89, "learning_rate": 2.663654997806932e-05, "loss": 1.5074, "step": 440400 }, { "epoch": 1.89, "learning_rate": 2.6631121591516084e-05, "loss": 1.5107, "step": 440500 }, { "epoch": 1.89, "learning_rate": 2.662569320496285e-05, "loss": 1.5267, "step": 440600 }, { "epoch": 1.89, "learning_rate": 2.662026481840961e-05, "loss": 1.5299, "step": 440700 }, { "epoch": 1.89, "learning_rate": 2.6614836431856377e-05, "loss": 1.5614, "step": 440800 }, { "epoch": 1.89, "learning_rate": 2.6609408045303142e-05, "loss": 1.5349, "step": 440900 }, { "epoch": 1.89, "learning_rate": 2.660397965874991e-05, "loss": 1.5416, "step": 441000 }, { "epoch": 1.89, "learning_rate": 2.6598551272196676e-05, "loss": 1.5203, "step": 441100 }, { "epoch": 1.9, "learning_rate": 2.659312288564344e-05, "loss": 1.5251, "step": 441200 }, { "epoch": 1.9, "learning_rate": 2.6587694499090203e-05, "loss": 1.5086, "step": 441300 }, { "epoch": 1.9, "learning_rate": 2.658226611253697e-05, "loss": 1.5369, "step": 441400 }, { "epoch": 1.9, "learning_rate": 2.6576837725983734e-05, "loss": 1.5259, "step": 441500 }, { "epoch": 1.9, "learning_rate": 2.65714093394305e-05, "loss": 1.5062, "step": 441600 }, { "epoch": 1.9, "learning_rate": 2.656598095287726e-05, "loss": 1.4988, "step": 441700 }, { "epoch": 1.9, "learning_rate": 2.6560552566324026e-05, "loss": 1.5246, "step": 441800 }, { "epoch": 1.9, "learning_rate": 2.655512417977079e-05, "loss": 1.5177, "step": 441900 }, { "epoch": 1.9, "learning_rate": 2.654969579321756e-05, "loss": 1.5016, "step": 442000 }, { "epoch": 1.9, "learning_rate": 2.6544267406664325e-05, "loss": 1.5054, "step": 442100 }, { "epoch": 1.9, "learning_rate": 2.653883902011109e-05, "loss": 1.5185, "step": 442200 }, { "epoch": 1.9, "learning_rate": 2.6533410633557852e-05, "loss": 1.5031, "step": 442300 }, { "epoch": 1.9, "learning_rate": 2.6527982247004618e-05, "loss": 1.5084, "step": 442400 }, { "epoch": 1.9, "learning_rate": 2.6522553860451383e-05, "loss": 1.5021, "step": 442500 }, { "epoch": 1.9, "learning_rate": 2.6517125473898148e-05, "loss": 1.5188, "step": 442600 }, { "epoch": 1.9, "learning_rate": 2.651169708734491e-05, "loss": 1.5382, "step": 442700 }, { "epoch": 1.9, "learning_rate": 2.6506268700791675e-05, "loss": 1.5199, "step": 442800 }, { "epoch": 1.9, "learning_rate": 2.650084031423844e-05, "loss": 1.5069, "step": 442900 }, { "epoch": 1.9, "learning_rate": 2.6495411927685206e-05, "loss": 1.4994, "step": 443000 }, { "epoch": 1.9, "learning_rate": 2.6489983541131974e-05, "loss": 1.5141, "step": 443100 }, { "epoch": 1.9, "learning_rate": 2.648455515457874e-05, "loss": 1.5473, "step": 443200 }, { "epoch": 1.9, "learning_rate": 2.64791267680255e-05, "loss": 1.549, "step": 443300 }, { "epoch": 1.9, "learning_rate": 2.6473698381472267e-05, "loss": 1.5292, "step": 443400 }, { "epoch": 1.91, "learning_rate": 2.6468269994919032e-05, "loss": 1.535, "step": 443500 }, { "epoch": 1.91, "learning_rate": 2.6462841608365797e-05, "loss": 1.5304, "step": 443600 }, { "epoch": 1.91, "learning_rate": 2.645741322181256e-05, "loss": 1.5453, "step": 443700 }, { "epoch": 1.91, "learning_rate": 2.6451984835259325e-05, "loss": 1.5084, "step": 443800 }, { "epoch": 1.91, "learning_rate": 2.644655644870609e-05, "loss": 1.5249, "step": 443900 }, { "epoch": 1.91, "learning_rate": 2.6441128062152855e-05, "loss": 1.5258, "step": 444000 }, { "epoch": 1.91, "learning_rate": 2.6435699675599624e-05, "loss": 1.5305, "step": 444100 }, { "epoch": 1.91, "learning_rate": 2.643027128904639e-05, "loss": 1.5316, "step": 444200 }, { "epoch": 1.91, "learning_rate": 2.642484290249315e-05, "loss": 1.5522, "step": 444300 }, { "epoch": 1.91, "learning_rate": 2.6419414515939916e-05, "loss": 1.5034, "step": 444400 }, { "epoch": 1.91, "learning_rate": 2.641398612938668e-05, "loss": 1.532, "step": 444500 }, { "epoch": 1.91, "learning_rate": 2.6408557742833447e-05, "loss": 1.5135, "step": 444600 }, { "epoch": 1.91, "learning_rate": 2.640312935628021e-05, "loss": 1.501, "step": 444700 }, { "epoch": 1.91, "learning_rate": 2.6397700969726974e-05, "loss": 1.5275, "step": 444800 }, { "epoch": 1.91, "learning_rate": 2.639227258317374e-05, "loss": 1.5153, "step": 444900 }, { "epoch": 1.91, "learning_rate": 2.6386844196620504e-05, "loss": 1.5199, "step": 445000 }, { "epoch": 1.91, "eval_loss": 1.4345873594284058, "eval_runtime": 17.7853, "eval_samples_per_second": 562.264, "eval_steps_per_second": 17.599, "step": 445000 }, { "epoch": 1.91, "learning_rate": 2.6381415810067273e-05, "loss": 1.5318, "step": 445100 }, { "epoch": 1.91, "learning_rate": 2.6375987423514038e-05, "loss": 1.5213, "step": 445200 }, { "epoch": 1.91, "learning_rate": 2.63705590369608e-05, "loss": 1.5152, "step": 445300 }, { "epoch": 1.91, "learning_rate": 2.6365130650407565e-05, "loss": 1.5457, "step": 445400 }, { "epoch": 1.91, "learning_rate": 2.635970226385433e-05, "loss": 1.5134, "step": 445500 }, { "epoch": 1.91, "learning_rate": 2.6354273877301096e-05, "loss": 1.5224, "step": 445600 }, { "epoch": 1.91, "learning_rate": 2.6348845490747858e-05, "loss": 1.4994, "step": 445700 }, { "epoch": 1.92, "learning_rate": 2.6343417104194623e-05, "loss": 1.537, "step": 445800 }, { "epoch": 1.92, "learning_rate": 2.6337988717641388e-05, "loss": 1.5304, "step": 445900 }, { "epoch": 1.92, "learning_rate": 2.6332560331088154e-05, "loss": 1.5238, "step": 446000 }, { "epoch": 1.92, "learning_rate": 2.6327131944534915e-05, "loss": 1.5099, "step": 446100 }, { "epoch": 1.92, "learning_rate": 2.6321703557981687e-05, "loss": 1.5341, "step": 446200 }, { "epoch": 1.92, "learning_rate": 2.631627517142845e-05, "loss": 1.5001, "step": 446300 }, { "epoch": 1.92, "learning_rate": 2.6310846784875215e-05, "loss": 1.5191, "step": 446400 }, { "epoch": 1.92, "learning_rate": 2.630541839832198e-05, "loss": 1.5184, "step": 446500 }, { "epoch": 1.92, "learning_rate": 2.6299990011768745e-05, "loss": 1.5, "step": 446600 }, { "epoch": 1.92, "learning_rate": 2.6294561625215507e-05, "loss": 1.522, "step": 446700 }, { "epoch": 1.92, "learning_rate": 2.6289133238662272e-05, "loss": 1.5686, "step": 446800 }, { "epoch": 1.92, "learning_rate": 2.6283704852109038e-05, "loss": 1.5029, "step": 446900 }, { "epoch": 1.92, "learning_rate": 2.6278276465555803e-05, "loss": 1.5285, "step": 447000 }, { "epoch": 1.92, "learning_rate": 2.6272848079002565e-05, "loss": 1.5039, "step": 447100 }, { "epoch": 1.92, "learning_rate": 2.6267419692449337e-05, "loss": 1.5305, "step": 447200 }, { "epoch": 1.92, "learning_rate": 2.62619913058961e-05, "loss": 1.5144, "step": 447300 }, { "epoch": 1.92, "learning_rate": 2.6256562919342864e-05, "loss": 1.5168, "step": 447400 }, { "epoch": 1.92, "learning_rate": 2.625113453278963e-05, "loss": 1.5162, "step": 447500 }, { "epoch": 1.92, "learning_rate": 2.6245706146236394e-05, "loss": 1.5354, "step": 447600 }, { "epoch": 1.92, "learning_rate": 2.6240277759683156e-05, "loss": 1.5558, "step": 447700 }, { "epoch": 1.92, "learning_rate": 2.623484937312992e-05, "loss": 1.491, "step": 447800 }, { "epoch": 1.92, "learning_rate": 2.6229420986576687e-05, "loss": 1.5375, "step": 447900 }, { "epoch": 1.92, "learning_rate": 2.6223992600023452e-05, "loss": 1.5197, "step": 448000 }, { "epoch": 1.93, "learning_rate": 2.6218564213470214e-05, "loss": 1.5252, "step": 448100 }, { "epoch": 1.93, "learning_rate": 2.621313582691698e-05, "loss": 1.4979, "step": 448200 }, { "epoch": 1.93, "learning_rate": 2.6207707440363748e-05, "loss": 1.5203, "step": 448300 }, { "epoch": 1.93, "learning_rate": 2.6202279053810513e-05, "loss": 1.4894, "step": 448400 }, { "epoch": 1.93, "learning_rate": 2.619685066725728e-05, "loss": 1.5117, "step": 448500 }, { "epoch": 1.93, "learning_rate": 2.6191422280704044e-05, "loss": 1.5073, "step": 448600 }, { "epoch": 1.93, "learning_rate": 2.6185993894150805e-05, "loss": 1.5084, "step": 448700 }, { "epoch": 1.93, "learning_rate": 2.618056550759757e-05, "loss": 1.5031, "step": 448800 }, { "epoch": 1.93, "learning_rate": 2.6175137121044336e-05, "loss": 1.5309, "step": 448900 }, { "epoch": 1.93, "learning_rate": 2.61697087344911e-05, "loss": 1.5146, "step": 449000 }, { "epoch": 1.93, "learning_rate": 2.6164280347937863e-05, "loss": 1.5523, "step": 449100 }, { "epoch": 1.93, "learning_rate": 2.615885196138463e-05, "loss": 1.54, "step": 449200 }, { "epoch": 1.93, "learning_rate": 2.6153423574831397e-05, "loss": 1.5049, "step": 449300 }, { "epoch": 1.93, "learning_rate": 2.6147995188278162e-05, "loss": 1.531, "step": 449400 }, { "epoch": 1.93, "learning_rate": 2.6142566801724928e-05, "loss": 1.4931, "step": 449500 }, { "epoch": 1.93, "learning_rate": 2.6137138415171693e-05, "loss": 1.4995, "step": 449600 }, { "epoch": 1.93, "learning_rate": 2.6131710028618455e-05, "loss": 1.5199, "step": 449700 }, { "epoch": 1.93, "learning_rate": 2.612628164206522e-05, "loss": 1.4933, "step": 449800 }, { "epoch": 1.93, "learning_rate": 2.6120853255511985e-05, "loss": 1.5186, "step": 449900 }, { "epoch": 1.93, "learning_rate": 2.611542486895875e-05, "loss": 1.5041, "step": 450000 }, { "epoch": 1.93, "eval_loss": 1.4356664419174194, "eval_runtime": 17.7962, "eval_samples_per_second": 561.918, "eval_steps_per_second": 17.588, "step": 450000 }, { "epoch": 1.93, "learning_rate": 2.6109996482405512e-05, "loss": 1.487, "step": 450100 }, { "epoch": 1.93, "learning_rate": 2.6104568095852278e-05, "loss": 1.5048, "step": 450200 }, { "epoch": 1.93, "learning_rate": 2.6099139709299043e-05, "loss": 1.5171, "step": 450300 }, { "epoch": 1.93, "learning_rate": 2.609371132274581e-05, "loss": 1.5121, "step": 450400 }, { "epoch": 1.94, "learning_rate": 2.6088282936192577e-05, "loss": 1.528, "step": 450500 }, { "epoch": 1.94, "learning_rate": 2.6082854549639342e-05, "loss": 1.5088, "step": 450600 }, { "epoch": 1.94, "learning_rate": 2.6077426163086104e-05, "loss": 1.5042, "step": 450700 }, { "epoch": 1.94, "learning_rate": 2.607199777653287e-05, "loss": 1.5142, "step": 450800 }, { "epoch": 1.94, "learning_rate": 2.6066569389979635e-05, "loss": 1.532, "step": 450900 }, { "epoch": 1.94, "learning_rate": 2.6061141003426396e-05, "loss": 1.5488, "step": 451000 }, { "epoch": 1.94, "learning_rate": 2.605571261687316e-05, "loss": 1.5321, "step": 451100 }, { "epoch": 1.94, "learning_rate": 2.6050284230319927e-05, "loss": 1.521, "step": 451200 }, { "epoch": 1.94, "learning_rate": 2.6044855843766692e-05, "loss": 1.5191, "step": 451300 }, { "epoch": 1.94, "learning_rate": 2.603942745721346e-05, "loss": 1.501, "step": 451400 }, { "epoch": 1.94, "learning_rate": 2.6033999070660226e-05, "loss": 1.5219, "step": 451500 }, { "epoch": 1.94, "learning_rate": 2.602857068410699e-05, "loss": 1.5292, "step": 451600 }, { "epoch": 1.94, "learning_rate": 2.6023142297553753e-05, "loss": 1.5018, "step": 451700 }, { "epoch": 1.94, "learning_rate": 2.601771391100052e-05, "loss": 1.55, "step": 451800 }, { "epoch": 1.94, "learning_rate": 2.6012285524447284e-05, "loss": 1.511, "step": 451900 }, { "epoch": 1.94, "learning_rate": 2.6006857137894046e-05, "loss": 1.5349, "step": 452000 }, { "epoch": 1.94, "learning_rate": 2.600142875134081e-05, "loss": 1.5489, "step": 452100 }, { "epoch": 1.94, "learning_rate": 2.5996000364787576e-05, "loss": 1.5337, "step": 452200 }, { "epoch": 1.94, "learning_rate": 2.599057197823434e-05, "loss": 1.5176, "step": 452300 }, { "epoch": 1.94, "learning_rate": 2.5985143591681103e-05, "loss": 1.4814, "step": 452400 }, { "epoch": 1.94, "learning_rate": 2.5979715205127875e-05, "loss": 1.5198, "step": 452500 }, { "epoch": 1.94, "learning_rate": 2.597428681857464e-05, "loss": 1.4852, "step": 452600 }, { "epoch": 1.94, "learning_rate": 2.5968858432021402e-05, "loss": 1.5318, "step": 452700 }, { "epoch": 1.95, "learning_rate": 2.5963430045468168e-05, "loss": 1.5186, "step": 452800 }, { "epoch": 1.95, "learning_rate": 2.5958001658914933e-05, "loss": 1.5258, "step": 452900 }, { "epoch": 1.95, "learning_rate": 2.5952573272361695e-05, "loss": 1.5404, "step": 453000 }, { "epoch": 1.95, "learning_rate": 2.594714488580846e-05, "loss": 1.4918, "step": 453100 }, { "epoch": 1.95, "learning_rate": 2.5941716499255225e-05, "loss": 1.5234, "step": 453200 }, { "epoch": 1.95, "learning_rate": 2.593628811270199e-05, "loss": 1.5085, "step": 453300 }, { "epoch": 1.95, "learning_rate": 2.5930859726148753e-05, "loss": 1.5206, "step": 453400 }, { "epoch": 1.95, "learning_rate": 2.5925431339595525e-05, "loss": 1.5355, "step": 453500 }, { "epoch": 1.95, "learning_rate": 2.592000295304229e-05, "loss": 1.5291, "step": 453600 }, { "epoch": 1.95, "learning_rate": 2.5914574566489052e-05, "loss": 1.5067, "step": 453700 }, { "epoch": 1.95, "learning_rate": 2.5909146179935817e-05, "loss": 1.5327, "step": 453800 }, { "epoch": 1.95, "learning_rate": 2.5903717793382582e-05, "loss": 1.5209, "step": 453900 }, { "epoch": 1.95, "learning_rate": 2.5898289406829344e-05, "loss": 1.4975, "step": 454000 }, { "epoch": 1.95, "learning_rate": 2.589286102027611e-05, "loss": 1.526, "step": 454100 }, { "epoch": 1.95, "learning_rate": 2.5887432633722875e-05, "loss": 1.5319, "step": 454200 }, { "epoch": 1.95, "learning_rate": 2.588200424716964e-05, "loss": 1.5306, "step": 454300 }, { "epoch": 1.95, "learning_rate": 2.5876575860616402e-05, "loss": 1.5138, "step": 454400 }, { "epoch": 1.95, "learning_rate": 2.5871147474063167e-05, "loss": 1.5177, "step": 454500 }, { "epoch": 1.95, "learning_rate": 2.586571908750994e-05, "loss": 1.487, "step": 454600 }, { "epoch": 1.95, "learning_rate": 2.58602907009567e-05, "loss": 1.5251, "step": 454700 }, { "epoch": 1.95, "learning_rate": 2.5854862314403466e-05, "loss": 1.5141, "step": 454800 }, { "epoch": 1.95, "learning_rate": 2.584943392785023e-05, "loss": 1.5246, "step": 454900 }, { "epoch": 1.95, "learning_rate": 2.5844005541296993e-05, "loss": 1.4794, "step": 455000 }, { "epoch": 1.95, "eval_loss": 1.437150239944458, "eval_runtime": 17.7517, "eval_samples_per_second": 563.327, "eval_steps_per_second": 17.632, "step": 455000 }, { "epoch": 1.96, "learning_rate": 2.583857715474376e-05, "loss": 1.5201, "step": 455100 }, { "epoch": 1.96, "learning_rate": 2.5833148768190524e-05, "loss": 1.4924, "step": 455200 }, { "epoch": 1.96, "learning_rate": 2.582772038163729e-05, "loss": 1.5248, "step": 455300 }, { "epoch": 1.96, "learning_rate": 2.582229199508405e-05, "loss": 1.5092, "step": 455400 }, { "epoch": 1.96, "learning_rate": 2.5816863608530816e-05, "loss": 1.521, "step": 455500 }, { "epoch": 1.96, "learning_rate": 2.581143522197759e-05, "loss": 1.5166, "step": 455600 }, { "epoch": 1.96, "learning_rate": 2.580600683542435e-05, "loss": 1.5099, "step": 455700 }, { "epoch": 1.96, "learning_rate": 2.5800578448871115e-05, "loss": 1.5127, "step": 455800 }, { "epoch": 1.96, "learning_rate": 2.579515006231788e-05, "loss": 1.5218, "step": 455900 }, { "epoch": 1.96, "learning_rate": 2.5789721675764643e-05, "loss": 1.527, "step": 456000 }, { "epoch": 1.96, "learning_rate": 2.5784293289211408e-05, "loss": 1.532, "step": 456100 }, { "epoch": 1.96, "learning_rate": 2.5778864902658173e-05, "loss": 1.5171, "step": 456200 }, { "epoch": 1.96, "learning_rate": 2.577343651610494e-05, "loss": 1.5231, "step": 456300 }, { "epoch": 1.96, "learning_rate": 2.57680081295517e-05, "loss": 1.5034, "step": 456400 }, { "epoch": 1.96, "learning_rate": 2.5762579742998466e-05, "loss": 1.5194, "step": 456500 }, { "epoch": 1.96, "learning_rate": 2.575715135644523e-05, "loss": 1.5078, "step": 456600 }, { "epoch": 1.96, "learning_rate": 2.5751722969892e-05, "loss": 1.5181, "step": 456700 }, { "epoch": 1.96, "learning_rate": 2.5746294583338765e-05, "loss": 1.5411, "step": 456800 }, { "epoch": 1.96, "learning_rate": 2.574086619678553e-05, "loss": 1.5068, "step": 456900 }, { "epoch": 1.96, "learning_rate": 2.5735437810232292e-05, "loss": 1.5259, "step": 457000 }, { "epoch": 1.96, "learning_rate": 2.5730009423679057e-05, "loss": 1.5365, "step": 457100 }, { "epoch": 1.96, "learning_rate": 2.5724581037125822e-05, "loss": 1.5247, "step": 457200 }, { "epoch": 1.96, "learning_rate": 2.5719152650572588e-05, "loss": 1.5386, "step": 457300 }, { "epoch": 1.97, "learning_rate": 2.571372426401935e-05, "loss": 1.4939, "step": 457400 }, { "epoch": 1.97, "learning_rate": 2.5708295877466115e-05, "loss": 1.5226, "step": 457500 }, { "epoch": 1.97, "learning_rate": 2.570286749091288e-05, "loss": 1.5273, "step": 457600 }, { "epoch": 1.97, "learning_rate": 2.569743910435965e-05, "loss": 1.5175, "step": 457700 }, { "epoch": 1.97, "learning_rate": 2.5692010717806414e-05, "loss": 1.5396, "step": 457800 }, { "epoch": 1.97, "learning_rate": 2.568658233125318e-05, "loss": 1.5206, "step": 457900 }, { "epoch": 1.97, "learning_rate": 2.568115394469994e-05, "loss": 1.4951, "step": 458000 }, { "epoch": 1.97, "learning_rate": 2.5675725558146706e-05, "loss": 1.5071, "step": 458100 }, { "epoch": 1.97, "learning_rate": 2.567029717159347e-05, "loss": 1.5149, "step": 458200 }, { "epoch": 1.97, "learning_rate": 2.5664868785040237e-05, "loss": 1.5149, "step": 458300 }, { "epoch": 1.97, "learning_rate": 2.5659440398487e-05, "loss": 1.4868, "step": 458400 }, { "epoch": 1.97, "learning_rate": 2.5654012011933764e-05, "loss": 1.5077, "step": 458500 }, { "epoch": 1.97, "learning_rate": 2.564858362538053e-05, "loss": 1.5018, "step": 458600 }, { "epoch": 1.97, "learning_rate": 2.5643155238827295e-05, "loss": 1.5123, "step": 458700 }, { "epoch": 1.97, "learning_rate": 2.5637726852274063e-05, "loss": 1.4973, "step": 458800 }, { "epoch": 1.97, "learning_rate": 2.563229846572083e-05, "loss": 1.4967, "step": 458900 }, { "epoch": 1.97, "learning_rate": 2.562687007916759e-05, "loss": 1.516, "step": 459000 }, { "epoch": 1.97, "learning_rate": 2.5621441692614356e-05, "loss": 1.5316, "step": 459100 }, { "epoch": 1.97, "learning_rate": 2.561601330606112e-05, "loss": 1.5445, "step": 459200 }, { "epoch": 1.97, "learning_rate": 2.5610584919507886e-05, "loss": 1.5118, "step": 459300 }, { "epoch": 1.97, "learning_rate": 2.5605156532954648e-05, "loss": 1.5051, "step": 459400 }, { "epoch": 1.97, "learning_rate": 2.5599728146401413e-05, "loss": 1.5285, "step": 459500 }, { "epoch": 1.97, "learning_rate": 2.559429975984818e-05, "loss": 1.5028, "step": 459600 }, { "epoch": 1.97, "learning_rate": 2.5588871373294944e-05, "loss": 1.5188, "step": 459700 }, { "epoch": 1.98, "learning_rate": 2.5583442986741712e-05, "loss": 1.5244, "step": 459800 }, { "epoch": 1.98, "learning_rate": 2.5578014600188478e-05, "loss": 1.5232, "step": 459900 }, { "epoch": 1.98, "learning_rate": 2.557258621363524e-05, "loss": 1.5254, "step": 460000 }, { "epoch": 1.98, "eval_loss": 1.4372631311416626, "eval_runtime": 17.8072, "eval_samples_per_second": 561.57, "eval_steps_per_second": 17.577, "step": 460000 }, { "epoch": 1.98, "learning_rate": 2.5567157827082005e-05, "loss": 1.5217, "step": 460100 }, { "epoch": 1.98, "learning_rate": 2.556172944052877e-05, "loss": 1.5216, "step": 460200 }, { "epoch": 1.98, "learning_rate": 2.5556301053975535e-05, "loss": 1.524, "step": 460300 }, { "epoch": 1.98, "learning_rate": 2.5550872667422297e-05, "loss": 1.5024, "step": 460400 }, { "epoch": 1.98, "learning_rate": 2.5545444280869063e-05, "loss": 1.5246, "step": 460500 }, { "epoch": 1.98, "learning_rate": 2.5540015894315828e-05, "loss": 1.5202, "step": 460600 }, { "epoch": 1.98, "learning_rate": 2.5534587507762593e-05, "loss": 1.5033, "step": 460700 }, { "epoch": 1.98, "learning_rate": 2.5529159121209355e-05, "loss": 1.5379, "step": 460800 }, { "epoch": 1.98, "learning_rate": 2.5523730734656127e-05, "loss": 1.5156, "step": 460900 }, { "epoch": 1.98, "learning_rate": 2.551830234810289e-05, "loss": 1.514, "step": 461000 }, { "epoch": 1.98, "learning_rate": 2.5512873961549654e-05, "loss": 1.4981, "step": 461100 }, { "epoch": 1.98, "learning_rate": 2.550744557499642e-05, "loss": 1.47, "step": 461200 }, { "epoch": 1.98, "learning_rate": 2.5502017188443185e-05, "loss": 1.5092, "step": 461300 }, { "epoch": 1.98, "learning_rate": 2.5496588801889946e-05, "loss": 1.5191, "step": 461400 }, { "epoch": 1.98, "learning_rate": 2.5491160415336712e-05, "loss": 1.504, "step": 461500 }, { "epoch": 1.98, "learning_rate": 2.5485732028783477e-05, "loss": 1.5084, "step": 461600 }, { "epoch": 1.98, "learning_rate": 2.5480303642230242e-05, "loss": 1.4905, "step": 461700 }, { "epoch": 1.98, "learning_rate": 2.5474875255677004e-05, "loss": 1.5079, "step": 461800 }, { "epoch": 1.98, "learning_rate": 2.5469446869123776e-05, "loss": 1.4988, "step": 461900 }, { "epoch": 1.98, "learning_rate": 2.5464018482570538e-05, "loss": 1.5224, "step": 462000 }, { "epoch": 1.99, "learning_rate": 2.5458590096017303e-05, "loss": 1.5292, "step": 462100 }, { "epoch": 1.99, "learning_rate": 2.545316170946407e-05, "loss": 1.5291, "step": 462200 }, { "epoch": 1.99, "learning_rate": 2.5447733322910834e-05, "loss": 1.5155, "step": 462300 }, { "epoch": 1.99, "learning_rate": 2.5442304936357596e-05, "loss": 1.5209, "step": 462400 }, { "epoch": 1.99, "learning_rate": 2.543687654980436e-05, "loss": 1.5217, "step": 462500 }, { "epoch": 1.99, "learning_rate": 2.5431448163251126e-05, "loss": 1.5291, "step": 462600 }, { "epoch": 1.99, "learning_rate": 2.542601977669789e-05, "loss": 1.5398, "step": 462700 }, { "epoch": 1.99, "learning_rate": 2.5420591390144653e-05, "loss": 1.5249, "step": 462800 }, { "epoch": 1.99, "learning_rate": 2.541516300359142e-05, "loss": 1.5094, "step": 462900 }, { "epoch": 1.99, "learning_rate": 2.5409734617038187e-05, "loss": 1.5231, "step": 463000 }, { "epoch": 1.99, "learning_rate": 2.5404306230484953e-05, "loss": 1.4996, "step": 463100 }, { "epoch": 1.99, "learning_rate": 2.5398877843931718e-05, "loss": 1.5071, "step": 463200 }, { "epoch": 1.99, "learning_rate": 2.5393449457378483e-05, "loss": 1.5225, "step": 463300 }, { "epoch": 1.99, "learning_rate": 2.5388021070825245e-05, "loss": 1.5091, "step": 463400 }, { "epoch": 1.99, "learning_rate": 2.538259268427201e-05, "loss": 1.5096, "step": 463500 }, { "epoch": 1.99, "learning_rate": 2.5377164297718776e-05, "loss": 1.5081, "step": 463600 }, { "epoch": 1.99, "learning_rate": 2.537173591116554e-05, "loss": 1.5027, "step": 463700 }, { "epoch": 1.99, "learning_rate": 2.5366307524612303e-05, "loss": 1.5129, "step": 463800 }, { "epoch": 1.99, "learning_rate": 2.5360879138059068e-05, "loss": 1.5128, "step": 463900 }, { "epoch": 1.99, "learning_rate": 2.5355450751505837e-05, "loss": 1.5264, "step": 464000 }, { "epoch": 1.99, "learning_rate": 2.5350022364952602e-05, "loss": 1.5181, "step": 464100 }, { "epoch": 1.99, "learning_rate": 2.5344593978399367e-05, "loss": 1.492, "step": 464200 }, { "epoch": 1.99, "learning_rate": 2.5339165591846132e-05, "loss": 1.5279, "step": 464300 }, { "epoch": 2.0, "learning_rate": 2.5333737205292894e-05, "loss": 1.5017, "step": 464400 }, { "epoch": 2.0, "learning_rate": 2.532830881873966e-05, "loss": 1.4949, "step": 464500 }, { "epoch": 2.0, "learning_rate": 2.5322880432186425e-05, "loss": 1.5386, "step": 464600 }, { "epoch": 2.0, "learning_rate": 2.531745204563319e-05, "loss": 1.501, "step": 464700 }, { "epoch": 2.0, "learning_rate": 2.5312023659079952e-05, "loss": 1.556, "step": 464800 }, { "epoch": 2.0, "learning_rate": 2.5306595272526717e-05, "loss": 1.5504, "step": 464900 }, { "epoch": 2.0, "learning_rate": 2.5301166885973486e-05, "loss": 1.5136, "step": 465000 }, { "epoch": 2.0, "eval_loss": 1.4351954460144043, "eval_runtime": 17.8126, "eval_samples_per_second": 561.4, "eval_steps_per_second": 17.572, "step": 465000 }, { "epoch": 2.0, "learning_rate": 2.529573849942025e-05, "loss": 1.537, "step": 465100 }, { "epoch": 2.0, "learning_rate": 2.5290310112867016e-05, "loss": 1.5129, "step": 465200 }, { "epoch": 2.0, "learning_rate": 2.528488172631378e-05, "loss": 1.533, "step": 465300 }, { "epoch": 2.0, "learning_rate": 2.5279453339760543e-05, "loss": 1.5411, "step": 465400 }, { "epoch": 2.0, "learning_rate": 2.527402495320731e-05, "loss": 1.5306, "step": 465500 }, { "epoch": 2.0, "learning_rate": 2.5268596566654074e-05, "loss": 1.4965, "step": 465600 }, { "epoch": 2.0, "learning_rate": 2.526316818010084e-05, "loss": 1.5094, "step": 465700 }, { "epoch": 2.0, "learning_rate": 2.52577397935476e-05, "loss": 1.481, "step": 465800 }, { "epoch": 2.0, "learning_rate": 2.5252311406994366e-05, "loss": 1.4859, "step": 465900 }, { "epoch": 2.0, "learning_rate": 2.524688302044113e-05, "loss": 1.4919, "step": 466000 }, { "epoch": 2.0, "learning_rate": 2.52414546338879e-05, "loss": 1.5023, "step": 466100 }, { "epoch": 2.0, "learning_rate": 2.5236026247334666e-05, "loss": 1.4949, "step": 466200 }, { "epoch": 2.0, "learning_rate": 2.523059786078143e-05, "loss": 1.4836, "step": 466300 }, { "epoch": 2.0, "learning_rate": 2.5225169474228193e-05, "loss": 1.4577, "step": 466400 }, { "epoch": 2.0, "learning_rate": 2.5219741087674958e-05, "loss": 1.5039, "step": 466500 }, { "epoch": 2.0, "learning_rate": 2.5214312701121723e-05, "loss": 1.4643, "step": 466600 }, { "epoch": 2.0, "learning_rate": 2.520888431456849e-05, "loss": 1.5069, "step": 466700 }, { "epoch": 2.01, "learning_rate": 2.520345592801525e-05, "loss": 1.4909, "step": 466800 }, { "epoch": 2.01, "learning_rate": 2.5198027541462016e-05, "loss": 1.4872, "step": 466900 }, { "epoch": 2.01, "learning_rate": 2.519259915490878e-05, "loss": 1.4882, "step": 467000 }, { "epoch": 2.01, "learning_rate": 2.518717076835555e-05, "loss": 1.4986, "step": 467100 }, { "epoch": 2.01, "learning_rate": 2.5181742381802315e-05, "loss": 1.4678, "step": 467200 }, { "epoch": 2.01, "learning_rate": 2.517631399524908e-05, "loss": 1.4588, "step": 467300 }, { "epoch": 2.01, "learning_rate": 2.5170885608695842e-05, "loss": 1.4589, "step": 467400 }, { "epoch": 2.01, "learning_rate": 2.5165457222142607e-05, "loss": 1.4516, "step": 467500 }, { "epoch": 2.01, "learning_rate": 2.5160028835589372e-05, "loss": 1.4877, "step": 467600 }, { "epoch": 2.01, "learning_rate": 2.5154600449036138e-05, "loss": 1.46, "step": 467700 }, { "epoch": 2.01, "learning_rate": 2.51491720624829e-05, "loss": 1.4712, "step": 467800 }, { "epoch": 2.01, "learning_rate": 2.5143743675929665e-05, "loss": 1.4915, "step": 467900 }, { "epoch": 2.01, "learning_rate": 2.513831528937643e-05, "loss": 1.4865, "step": 468000 }, { "epoch": 2.01, "learning_rate": 2.5132886902823195e-05, "loss": 1.5087, "step": 468100 }, { "epoch": 2.01, "learning_rate": 2.5127458516269964e-05, "loss": 1.4844, "step": 468200 }, { "epoch": 2.01, "learning_rate": 2.512203012971673e-05, "loss": 1.4879, "step": 468300 }, { "epoch": 2.01, "learning_rate": 2.511660174316349e-05, "loss": 1.4974, "step": 468400 }, { "epoch": 2.01, "learning_rate": 2.5111173356610256e-05, "loss": 1.4683, "step": 468500 }, { "epoch": 2.01, "learning_rate": 2.5105744970057022e-05, "loss": 1.4931, "step": 468600 }, { "epoch": 2.01, "learning_rate": 2.5100316583503787e-05, "loss": 1.4729, "step": 468700 }, { "epoch": 2.01, "learning_rate": 2.509488819695055e-05, "loss": 1.4603, "step": 468800 }, { "epoch": 2.01, "learning_rate": 2.5089459810397314e-05, "loss": 1.4908, "step": 468900 }, { "epoch": 2.01, "learning_rate": 2.508403142384408e-05, "loss": 1.4753, "step": 469000 }, { "epoch": 2.02, "learning_rate": 2.5078603037290845e-05, "loss": 1.5068, "step": 469100 }, { "epoch": 2.02, "learning_rate": 2.5073174650737613e-05, "loss": 1.48, "step": 469200 }, { "epoch": 2.02, "learning_rate": 2.506774626418438e-05, "loss": 1.465, "step": 469300 }, { "epoch": 2.02, "learning_rate": 2.506231787763114e-05, "loss": 1.4885, "step": 469400 }, { "epoch": 2.02, "learning_rate": 2.5056889491077906e-05, "loss": 1.5117, "step": 469500 }, { "epoch": 2.02, "learning_rate": 2.505146110452467e-05, "loss": 1.4701, "step": 469600 }, { "epoch": 2.02, "learning_rate": 2.5046032717971436e-05, "loss": 1.4813, "step": 469700 }, { "epoch": 2.02, "learning_rate": 2.5040604331418198e-05, "loss": 1.489, "step": 469800 }, { "epoch": 2.02, "learning_rate": 2.5035175944864963e-05, "loss": 1.4867, "step": 469900 }, { "epoch": 2.02, "learning_rate": 2.502974755831173e-05, "loss": 1.4885, "step": 470000 }, { "epoch": 2.02, "eval_loss": 1.4379206895828247, "eval_runtime": 17.8048, "eval_samples_per_second": 561.647, "eval_steps_per_second": 17.58, "step": 470000 }, { "epoch": 2.02, "learning_rate": 2.5024319171758494e-05, "loss": 1.4895, "step": 470100 }, { "epoch": 2.02, "learning_rate": 2.5018890785205256e-05, "loss": 1.4791, "step": 470200 }, { "epoch": 2.02, "learning_rate": 2.5013462398652028e-05, "loss": 1.4837, "step": 470300 }, { "epoch": 2.02, "learning_rate": 2.500803401209879e-05, "loss": 1.4775, "step": 470400 }, { "epoch": 2.02, "learning_rate": 2.5002605625545555e-05, "loss": 1.519, "step": 470500 }, { "epoch": 2.02, "learning_rate": 2.499717723899232e-05, "loss": 1.4725, "step": 470600 }, { "epoch": 2.02, "learning_rate": 2.4991748852439085e-05, "loss": 1.5076, "step": 470700 }, { "epoch": 2.02, "learning_rate": 2.4986320465885847e-05, "loss": 1.4859, "step": 470800 }, { "epoch": 2.02, "learning_rate": 2.4980892079332613e-05, "loss": 1.4572, "step": 470900 }, { "epoch": 2.02, "learning_rate": 2.4975463692779378e-05, "loss": 1.4833, "step": 471000 }, { "epoch": 2.02, "learning_rate": 2.4970035306226143e-05, "loss": 1.4604, "step": 471100 }, { "epoch": 2.02, "learning_rate": 2.496460691967291e-05, "loss": 1.4769, "step": 471200 }, { "epoch": 2.02, "learning_rate": 2.4959178533119674e-05, "loss": 1.4817, "step": 471300 }, { "epoch": 2.03, "learning_rate": 2.495375014656644e-05, "loss": 1.4724, "step": 471400 }, { "epoch": 2.03, "learning_rate": 2.49483217600132e-05, "loss": 1.4648, "step": 471500 }, { "epoch": 2.03, "learning_rate": 2.494289337345997e-05, "loss": 1.4813, "step": 471600 }, { "epoch": 2.03, "learning_rate": 2.4937464986906735e-05, "loss": 1.4614, "step": 471700 }, { "epoch": 2.03, "learning_rate": 2.4932036600353497e-05, "loss": 1.4949, "step": 471800 }, { "epoch": 2.03, "learning_rate": 2.4926608213800262e-05, "loss": 1.4819, "step": 471900 }, { "epoch": 2.03, "learning_rate": 2.4921179827247027e-05, "loss": 1.4853, "step": 472000 }, { "epoch": 2.03, "learning_rate": 2.4915751440693792e-05, "loss": 1.4949, "step": 472100 }, { "epoch": 2.03, "learning_rate": 2.4910323054140558e-05, "loss": 1.4927, "step": 472200 }, { "epoch": 2.03, "learning_rate": 2.4904894667587323e-05, "loss": 1.4945, "step": 472300 }, { "epoch": 2.03, "learning_rate": 2.4899466281034088e-05, "loss": 1.4696, "step": 472400 }, { "epoch": 2.03, "learning_rate": 2.489403789448085e-05, "loss": 1.447, "step": 472500 }, { "epoch": 2.03, "learning_rate": 2.488860950792762e-05, "loss": 1.4982, "step": 472600 }, { "epoch": 2.03, "learning_rate": 2.4883181121374384e-05, "loss": 1.4921, "step": 472700 }, { "epoch": 2.03, "learning_rate": 2.4877752734821146e-05, "loss": 1.4724, "step": 472800 }, { "epoch": 2.03, "learning_rate": 2.487232434826791e-05, "loss": 1.5049, "step": 472900 }, { "epoch": 2.03, "learning_rate": 2.4866895961714676e-05, "loss": 1.4724, "step": 473000 }, { "epoch": 2.03, "learning_rate": 2.486146757516144e-05, "loss": 1.4891, "step": 473100 }, { "epoch": 2.03, "learning_rate": 2.4856039188608207e-05, "loss": 1.4846, "step": 473200 }, { "epoch": 2.03, "learning_rate": 2.4850610802054972e-05, "loss": 1.4849, "step": 473300 }, { "epoch": 2.03, "learning_rate": 2.4845182415501737e-05, "loss": 1.4698, "step": 473400 }, { "epoch": 2.03, "learning_rate": 2.48397540289485e-05, "loss": 1.4848, "step": 473500 }, { "epoch": 2.03, "learning_rate": 2.4834325642395265e-05, "loss": 1.5008, "step": 473600 }, { "epoch": 2.04, "learning_rate": 2.4828897255842033e-05, "loss": 1.4955, "step": 473700 }, { "epoch": 2.04, "learning_rate": 2.4823468869288795e-05, "loss": 1.5106, "step": 473800 }, { "epoch": 2.04, "learning_rate": 2.481804048273556e-05, "loss": 1.4705, "step": 473900 }, { "epoch": 2.04, "learning_rate": 2.4812612096182326e-05, "loss": 1.4965, "step": 474000 }, { "epoch": 2.04, "learning_rate": 2.480718370962909e-05, "loss": 1.4966, "step": 474100 }, { "epoch": 2.04, "learning_rate": 2.4801755323075856e-05, "loss": 1.4636, "step": 474200 }, { "epoch": 2.04, "learning_rate": 2.479632693652262e-05, "loss": 1.493, "step": 474300 }, { "epoch": 2.04, "learning_rate": 2.4790898549969387e-05, "loss": 1.4891, "step": 474400 }, { "epoch": 2.04, "learning_rate": 2.478547016341615e-05, "loss": 1.47, "step": 474500 }, { "epoch": 2.04, "learning_rate": 2.4780041776862914e-05, "loss": 1.4939, "step": 474600 }, { "epoch": 2.04, "learning_rate": 2.4774613390309682e-05, "loss": 1.5032, "step": 474700 }, { "epoch": 2.04, "learning_rate": 2.4769185003756444e-05, "loss": 1.4862, "step": 474800 }, { "epoch": 2.04, "learning_rate": 2.476375661720321e-05, "loss": 1.4728, "step": 474900 }, { "epoch": 2.04, "learning_rate": 2.4758328230649975e-05, "loss": 1.4978, "step": 475000 }, { "epoch": 2.04, "eval_loss": 1.4373873472213745, "eval_runtime": 17.7804, "eval_samples_per_second": 562.418, "eval_steps_per_second": 17.604, "step": 475000 }, { "epoch": 2.04, "learning_rate": 2.475289984409674e-05, "loss": 1.4943, "step": 475100 }, { "epoch": 2.04, "learning_rate": 2.4747471457543502e-05, "loss": 1.4735, "step": 475200 }, { "epoch": 2.04, "learning_rate": 2.474204307099027e-05, "loss": 1.4863, "step": 475300 }, { "epoch": 2.04, "learning_rate": 2.4736614684437036e-05, "loss": 1.4672, "step": 475400 }, { "epoch": 2.04, "learning_rate": 2.4731186297883798e-05, "loss": 1.5015, "step": 475500 }, { "epoch": 2.04, "learning_rate": 2.4725757911330563e-05, "loss": 1.4682, "step": 475600 }, { "epoch": 2.04, "learning_rate": 2.472032952477733e-05, "loss": 1.4963, "step": 475700 }, { "epoch": 2.04, "learning_rate": 2.4714901138224094e-05, "loss": 1.4793, "step": 475800 }, { "epoch": 2.04, "learning_rate": 2.470947275167086e-05, "loss": 1.4853, "step": 475900 }, { "epoch": 2.04, "learning_rate": 2.4704044365117624e-05, "loss": 1.4781, "step": 476000 }, { "epoch": 2.05, "learning_rate": 2.469861597856439e-05, "loss": 1.4825, "step": 476100 }, { "epoch": 2.05, "learning_rate": 2.469318759201115e-05, "loss": 1.502, "step": 476200 }, { "epoch": 2.05, "learning_rate": 2.468775920545792e-05, "loss": 1.4894, "step": 476300 }, { "epoch": 2.05, "learning_rate": 2.4682330818904685e-05, "loss": 1.5123, "step": 476400 }, { "epoch": 2.05, "learning_rate": 2.4676902432351447e-05, "loss": 1.4597, "step": 476500 }, { "epoch": 2.05, "learning_rate": 2.4671474045798212e-05, "loss": 1.4996, "step": 476600 }, { "epoch": 2.05, "learning_rate": 2.4666045659244978e-05, "loss": 1.4949, "step": 476700 }, { "epoch": 2.05, "learning_rate": 2.4660617272691743e-05, "loss": 1.4974, "step": 476800 }, { "epoch": 2.05, "learning_rate": 2.4655188886138508e-05, "loss": 1.4989, "step": 476900 }, { "epoch": 2.05, "learning_rate": 2.4649760499585273e-05, "loss": 1.4714, "step": 477000 }, { "epoch": 2.05, "learning_rate": 2.464433211303204e-05, "loss": 1.4798, "step": 477100 }, { "epoch": 2.05, "learning_rate": 2.46389037264788e-05, "loss": 1.4677, "step": 477200 }, { "epoch": 2.05, "learning_rate": 2.4633475339925566e-05, "loss": 1.4871, "step": 477300 }, { "epoch": 2.05, "learning_rate": 2.4628046953372334e-05, "loss": 1.5236, "step": 477400 }, { "epoch": 2.05, "learning_rate": 2.4622618566819096e-05, "loss": 1.48, "step": 477500 }, { "epoch": 2.05, "learning_rate": 2.461719018026586e-05, "loss": 1.473, "step": 477600 }, { "epoch": 2.05, "learning_rate": 2.4611761793712627e-05, "loss": 1.4726, "step": 477700 }, { "epoch": 2.05, "learning_rate": 2.4606333407159392e-05, "loss": 1.4829, "step": 477800 }, { "epoch": 2.05, "learning_rate": 2.4600905020606157e-05, "loss": 1.4723, "step": 477900 }, { "epoch": 2.05, "learning_rate": 2.4595476634052923e-05, "loss": 1.4876, "step": 478000 }, { "epoch": 2.05, "learning_rate": 2.4590048247499688e-05, "loss": 1.4885, "step": 478100 }, { "epoch": 2.05, "learning_rate": 2.458461986094645e-05, "loss": 1.486, "step": 478200 }, { "epoch": 2.05, "learning_rate": 2.4579191474393215e-05, "loss": 1.4968, "step": 478300 }, { "epoch": 2.06, "learning_rate": 2.4573763087839984e-05, "loss": 1.4917, "step": 478400 }, { "epoch": 2.06, "learning_rate": 2.4568334701286746e-05, "loss": 1.4664, "step": 478500 }, { "epoch": 2.06, "learning_rate": 2.456290631473351e-05, "loss": 1.5124, "step": 478600 }, { "epoch": 2.06, "learning_rate": 2.4557477928180276e-05, "loss": 1.4945, "step": 478700 }, { "epoch": 2.06, "learning_rate": 2.455204954162704e-05, "loss": 1.48, "step": 478800 }, { "epoch": 2.06, "learning_rate": 2.4546621155073807e-05, "loss": 1.4764, "step": 478900 }, { "epoch": 2.06, "learning_rate": 2.4541192768520572e-05, "loss": 1.51, "step": 479000 }, { "epoch": 2.06, "learning_rate": 2.4535764381967337e-05, "loss": 1.4715, "step": 479100 }, { "epoch": 2.06, "learning_rate": 2.45303359954141e-05, "loss": 1.4857, "step": 479200 }, { "epoch": 2.06, "learning_rate": 2.4524907608860864e-05, "loss": 1.5054, "step": 479300 }, { "epoch": 2.06, "learning_rate": 2.4519479222307633e-05, "loss": 1.4588, "step": 479400 }, { "epoch": 2.06, "learning_rate": 2.4514050835754395e-05, "loss": 1.4906, "step": 479500 }, { "epoch": 2.06, "learning_rate": 2.450862244920116e-05, "loss": 1.4416, "step": 479600 }, { "epoch": 2.06, "learning_rate": 2.4503194062647925e-05, "loss": 1.4945, "step": 479700 }, { "epoch": 2.06, "learning_rate": 2.449776567609469e-05, "loss": 1.4888, "step": 479800 }, { "epoch": 2.06, "learning_rate": 2.4492337289541452e-05, "loss": 1.4918, "step": 479900 }, { "epoch": 2.06, "learning_rate": 2.448690890298822e-05, "loss": 1.4872, "step": 480000 }, { "epoch": 2.06, "eval_loss": 1.435674786567688, "eval_runtime": 17.7907, "eval_samples_per_second": 562.091, "eval_steps_per_second": 17.593, "step": 480000 }, { "epoch": 2.06, "learning_rate": 2.4481480516434986e-05, "loss": 1.4666, "step": 480100 }, { "epoch": 2.06, "learning_rate": 2.4476052129881748e-05, "loss": 1.491, "step": 480200 }, { "epoch": 2.06, "learning_rate": 2.4470623743328513e-05, "loss": 1.482, "step": 480300 }, { "epoch": 2.06, "learning_rate": 2.446519535677528e-05, "loss": 1.4848, "step": 480400 }, { "epoch": 2.06, "learning_rate": 2.4459766970222044e-05, "loss": 1.4843, "step": 480500 }, { "epoch": 2.06, "learning_rate": 2.445433858366881e-05, "loss": 1.4776, "step": 480600 }, { "epoch": 2.07, "learning_rate": 2.4448910197115575e-05, "loss": 1.4761, "step": 480700 }, { "epoch": 2.07, "learning_rate": 2.4443481810562336e-05, "loss": 1.5118, "step": 480800 }, { "epoch": 2.07, "learning_rate": 2.44380534240091e-05, "loss": 1.4673, "step": 480900 }, { "epoch": 2.07, "learning_rate": 2.443262503745587e-05, "loss": 1.5171, "step": 481000 }, { "epoch": 2.07, "learning_rate": 2.4427196650902636e-05, "loss": 1.4556, "step": 481100 }, { "epoch": 2.07, "learning_rate": 2.4421768264349397e-05, "loss": 1.5094, "step": 481200 }, { "epoch": 2.07, "learning_rate": 2.4416339877796163e-05, "loss": 1.4814, "step": 481300 }, { "epoch": 2.07, "learning_rate": 2.4410911491242928e-05, "loss": 1.4673, "step": 481400 }, { "epoch": 2.07, "learning_rate": 2.4405483104689693e-05, "loss": 1.4845, "step": 481500 }, { "epoch": 2.07, "learning_rate": 2.440005471813646e-05, "loss": 1.507, "step": 481600 }, { "epoch": 2.07, "learning_rate": 2.4394626331583224e-05, "loss": 1.4716, "step": 481700 }, { "epoch": 2.07, "learning_rate": 2.4389197945029986e-05, "loss": 1.4578, "step": 481800 }, { "epoch": 2.07, "learning_rate": 2.438376955847675e-05, "loss": 1.4706, "step": 481900 }, { "epoch": 2.07, "learning_rate": 2.4378341171923516e-05, "loss": 1.4801, "step": 482000 }, { "epoch": 2.07, "learning_rate": 2.4372912785370285e-05, "loss": 1.4767, "step": 482100 }, { "epoch": 2.07, "learning_rate": 2.4367484398817047e-05, "loss": 1.4646, "step": 482200 }, { "epoch": 2.07, "learning_rate": 2.4362056012263812e-05, "loss": 1.4711, "step": 482300 }, { "epoch": 2.07, "learning_rate": 2.4356627625710577e-05, "loss": 1.5042, "step": 482400 }, { "epoch": 2.07, "learning_rate": 2.435119923915734e-05, "loss": 1.5114, "step": 482500 }, { "epoch": 2.07, "learning_rate": 2.4345770852604108e-05, "loss": 1.4726, "step": 482600 }, { "epoch": 2.07, "learning_rate": 2.4340342466050873e-05, "loss": 1.4743, "step": 482700 }, { "epoch": 2.07, "learning_rate": 2.4334914079497635e-05, "loss": 1.4796, "step": 482800 }, { "epoch": 2.07, "learning_rate": 2.43294856929444e-05, "loss": 1.4428, "step": 482900 }, { "epoch": 2.08, "learning_rate": 2.4324057306391165e-05, "loss": 1.5025, "step": 483000 }, { "epoch": 2.08, "learning_rate": 2.4318628919837934e-05, "loss": 1.5128, "step": 483100 }, { "epoch": 2.08, "learning_rate": 2.4313200533284696e-05, "loss": 1.4972, "step": 483200 }, { "epoch": 2.08, "learning_rate": 2.430777214673146e-05, "loss": 1.4691, "step": 483300 }, { "epoch": 2.08, "learning_rate": 2.4302343760178226e-05, "loss": 1.4655, "step": 483400 }, { "epoch": 2.08, "learning_rate": 2.429691537362499e-05, "loss": 1.4713, "step": 483500 }, { "epoch": 2.08, "learning_rate": 2.4291486987071757e-05, "loss": 1.4963, "step": 483600 }, { "epoch": 2.08, "learning_rate": 2.4286058600518522e-05, "loss": 1.4704, "step": 483700 }, { "epoch": 2.08, "learning_rate": 2.4280630213965284e-05, "loss": 1.4644, "step": 483800 }, { "epoch": 2.08, "learning_rate": 2.427520182741205e-05, "loss": 1.5307, "step": 483900 }, { "epoch": 2.08, "learning_rate": 2.4269773440858815e-05, "loss": 1.4934, "step": 484000 }, { "epoch": 2.08, "learning_rate": 2.426434505430558e-05, "loss": 1.4855, "step": 484100 }, { "epoch": 2.08, "learning_rate": 2.4258916667752345e-05, "loss": 1.4747, "step": 484200 }, { "epoch": 2.08, "learning_rate": 2.425348828119911e-05, "loss": 1.4986, "step": 484300 }, { "epoch": 2.08, "learning_rate": 2.4248059894645876e-05, "loss": 1.496, "step": 484400 }, { "epoch": 2.08, "learning_rate": 2.4242631508092638e-05, "loss": 1.4976, "step": 484500 }, { "epoch": 2.08, "learning_rate": 2.4237203121539403e-05, "loss": 1.4877, "step": 484600 }, { "epoch": 2.08, "learning_rate": 2.423177473498617e-05, "loss": 1.5055, "step": 484700 }, { "epoch": 2.08, "learning_rate": 2.4226346348432933e-05, "loss": 1.4922, "step": 484800 }, { "epoch": 2.08, "learning_rate": 2.42209179618797e-05, "loss": 1.4723, "step": 484900 }, { "epoch": 2.08, "learning_rate": 2.4215489575326464e-05, "loss": 1.5055, "step": 485000 }, { "epoch": 2.08, "eval_loss": 1.433396816253662, "eval_runtime": 17.803, "eval_samples_per_second": 561.704, "eval_steps_per_second": 17.581, "step": 485000 }, { "epoch": 2.08, "learning_rate": 2.421006118877323e-05, "loss": 1.4748, "step": 485100 }, { "epoch": 2.08, "learning_rate": 2.4204632802219994e-05, "loss": 1.451, "step": 485200 }, { "epoch": 2.08, "learning_rate": 2.419920441566676e-05, "loss": 1.4868, "step": 485300 }, { "epoch": 2.09, "learning_rate": 2.4193776029113525e-05, "loss": 1.4999, "step": 485400 }, { "epoch": 2.09, "learning_rate": 2.4188347642560287e-05, "loss": 1.4661, "step": 485500 }, { "epoch": 2.09, "learning_rate": 2.4182919256007052e-05, "loss": 1.5062, "step": 485600 }, { "epoch": 2.09, "learning_rate": 2.417749086945382e-05, "loss": 1.4775, "step": 485700 }, { "epoch": 2.09, "learning_rate": 2.4172062482900583e-05, "loss": 1.4957, "step": 485800 }, { "epoch": 2.09, "learning_rate": 2.4166634096347348e-05, "loss": 1.5192, "step": 485900 }, { "epoch": 2.09, "learning_rate": 2.4161205709794113e-05, "loss": 1.4921, "step": 486000 }, { "epoch": 2.09, "learning_rate": 2.415577732324088e-05, "loss": 1.4891, "step": 486100 }, { "epoch": 2.09, "learning_rate": 2.415034893668764e-05, "loss": 1.4821, "step": 486200 }, { "epoch": 2.09, "learning_rate": 2.414492055013441e-05, "loss": 1.491, "step": 486300 }, { "epoch": 2.09, "learning_rate": 2.4139492163581174e-05, "loss": 1.5049, "step": 486400 }, { "epoch": 2.09, "learning_rate": 2.4134063777027936e-05, "loss": 1.5108, "step": 486500 }, { "epoch": 2.09, "learning_rate": 2.41286353904747e-05, "loss": 1.4862, "step": 486600 }, { "epoch": 2.09, "learning_rate": 2.4123207003921467e-05, "loss": 1.4695, "step": 486700 }, { "epoch": 2.09, "learning_rate": 2.4117778617368232e-05, "loss": 1.4921, "step": 486800 }, { "epoch": 2.09, "learning_rate": 2.4112350230814997e-05, "loss": 1.4882, "step": 486900 }, { "epoch": 2.09, "learning_rate": 2.4106921844261762e-05, "loss": 1.4948, "step": 487000 }, { "epoch": 2.09, "learning_rate": 2.4101493457708528e-05, "loss": 1.4979, "step": 487100 }, { "epoch": 2.09, "learning_rate": 2.409606507115529e-05, "loss": 1.4862, "step": 487200 }, { "epoch": 2.09, "learning_rate": 2.4090636684602058e-05, "loss": 1.4722, "step": 487300 }, { "epoch": 2.09, "learning_rate": 2.4085208298048823e-05, "loss": 1.4701, "step": 487400 }, { "epoch": 2.09, "learning_rate": 2.4079779911495585e-05, "loss": 1.46, "step": 487500 }, { "epoch": 2.09, "learning_rate": 2.407435152494235e-05, "loss": 1.4932, "step": 487600 }, { "epoch": 2.1, "learning_rate": 2.4068923138389116e-05, "loss": 1.482, "step": 487700 }, { "epoch": 2.1, "learning_rate": 2.406349475183588e-05, "loss": 1.4611, "step": 487800 }, { "epoch": 2.1, "learning_rate": 2.4058066365282646e-05, "loss": 1.4995, "step": 487900 }, { "epoch": 2.1, "learning_rate": 2.405263797872941e-05, "loss": 1.451, "step": 488000 }, { "epoch": 2.1, "learning_rate": 2.4047209592176177e-05, "loss": 1.4817, "step": 488100 }, { "epoch": 2.1, "learning_rate": 2.404178120562294e-05, "loss": 1.4892, "step": 488200 }, { "epoch": 2.1, "learning_rate": 2.4036352819069704e-05, "loss": 1.4875, "step": 488300 }, { "epoch": 2.1, "learning_rate": 2.4030924432516473e-05, "loss": 1.4717, "step": 488400 }, { "epoch": 2.1, "learning_rate": 2.4025496045963235e-05, "loss": 1.4998, "step": 488500 }, { "epoch": 2.1, "learning_rate": 2.402006765941e-05, "loss": 1.4945, "step": 488600 }, { "epoch": 2.1, "learning_rate": 2.4014639272856765e-05, "loss": 1.4908, "step": 488700 }, { "epoch": 2.1, "learning_rate": 2.400921088630353e-05, "loss": 1.4813, "step": 488800 }, { "epoch": 2.1, "learning_rate": 2.4003782499750296e-05, "loss": 1.4892, "step": 488900 }, { "epoch": 2.1, "learning_rate": 2.399835411319706e-05, "loss": 1.4752, "step": 489000 }, { "epoch": 2.1, "learning_rate": 2.3992925726643826e-05, "loss": 1.5083, "step": 489100 }, { "epoch": 2.1, "learning_rate": 2.3987497340090588e-05, "loss": 1.4873, "step": 489200 }, { "epoch": 2.1, "learning_rate": 2.3982068953537353e-05, "loss": 1.4875, "step": 489300 }, { "epoch": 2.1, "learning_rate": 2.3976640566984122e-05, "loss": 1.4657, "step": 489400 }, { "epoch": 2.1, "learning_rate": 2.3971212180430884e-05, "loss": 1.4937, "step": 489500 }, { "epoch": 2.1, "learning_rate": 2.396578379387765e-05, "loss": 1.4957, "step": 489600 }, { "epoch": 2.1, "learning_rate": 2.3960355407324414e-05, "loss": 1.497, "step": 489700 }, { "epoch": 2.1, "learning_rate": 2.395492702077118e-05, "loss": 1.4825, "step": 489800 }, { "epoch": 2.1, "learning_rate": 2.3949498634217945e-05, "loss": 1.481, "step": 489900 }, { "epoch": 2.11, "learning_rate": 2.394407024766471e-05, "loss": 1.4926, "step": 490000 }, { "epoch": 2.11, "eval_loss": 1.4341309070587158, "eval_runtime": 17.8295, "eval_samples_per_second": 560.867, "eval_steps_per_second": 17.555, "step": 490000 }, { "epoch": 2.11, "learning_rate": 2.3938641861111475e-05, "loss": 1.4879, "step": 490100 }, { "epoch": 2.11, "learning_rate": 2.3933213474558237e-05, "loss": 1.4901, "step": 490200 }, { "epoch": 2.11, "learning_rate": 2.3927785088005003e-05, "loss": 1.4596, "step": 490300 }, { "epoch": 2.11, "learning_rate": 2.392235670145177e-05, "loss": 1.5034, "step": 490400 }, { "epoch": 2.11, "learning_rate": 2.3916928314898533e-05, "loss": 1.4965, "step": 490500 }, { "epoch": 2.11, "learning_rate": 2.39114999283453e-05, "loss": 1.4744, "step": 490600 }, { "epoch": 2.11, "learning_rate": 2.3906071541792064e-05, "loss": 1.4832, "step": 490700 }, { "epoch": 2.11, "learning_rate": 2.390064315523883e-05, "loss": 1.4838, "step": 490800 }, { "epoch": 2.11, "learning_rate": 2.389521476868559e-05, "loss": 1.474, "step": 490900 }, { "epoch": 2.11, "learning_rate": 2.388978638213236e-05, "loss": 1.502, "step": 491000 }, { "epoch": 2.11, "learning_rate": 2.3884357995579125e-05, "loss": 1.5034, "step": 491100 }, { "epoch": 2.11, "learning_rate": 2.3878929609025887e-05, "loss": 1.4979, "step": 491200 }, { "epoch": 2.11, "learning_rate": 2.3873501222472652e-05, "loss": 1.4854, "step": 491300 }, { "epoch": 2.11, "learning_rate": 2.3868072835919417e-05, "loss": 1.4684, "step": 491400 }, { "epoch": 2.11, "learning_rate": 2.3862644449366182e-05, "loss": 1.4853, "step": 491500 }, { "epoch": 2.11, "learning_rate": 2.3857216062812948e-05, "loss": 1.4748, "step": 491600 }, { "epoch": 2.11, "learning_rate": 2.3851787676259713e-05, "loss": 1.4767, "step": 491700 }, { "epoch": 2.11, "learning_rate": 2.3846359289706478e-05, "loss": 1.471, "step": 491800 }, { "epoch": 2.11, "learning_rate": 2.384093090315324e-05, "loss": 1.5002, "step": 491900 }, { "epoch": 2.11, "learning_rate": 2.383550251660001e-05, "loss": 1.4628, "step": 492000 }, { "epoch": 2.11, "learning_rate": 2.3830074130046774e-05, "loss": 1.4645, "step": 492100 }, { "epoch": 2.11, "learning_rate": 2.3824645743493536e-05, "loss": 1.4713, "step": 492200 }, { "epoch": 2.11, "learning_rate": 2.38192173569403e-05, "loss": 1.476, "step": 492300 }, { "epoch": 2.12, "learning_rate": 2.3813788970387066e-05, "loss": 1.4673, "step": 492400 }, { "epoch": 2.12, "learning_rate": 2.380836058383383e-05, "loss": 1.4811, "step": 492500 }, { "epoch": 2.12, "learning_rate": 2.3802932197280597e-05, "loss": 1.4554, "step": 492600 }, { "epoch": 2.12, "learning_rate": 2.3797503810727362e-05, "loss": 1.4661, "step": 492700 }, { "epoch": 2.12, "learning_rate": 2.3792075424174127e-05, "loss": 1.45, "step": 492800 }, { "epoch": 2.12, "learning_rate": 2.378664703762089e-05, "loss": 1.4734, "step": 492900 }, { "epoch": 2.12, "learning_rate": 2.3781218651067654e-05, "loss": 1.5128, "step": 493000 }, { "epoch": 2.12, "learning_rate": 2.3775790264514423e-05, "loss": 1.4665, "step": 493100 }, { "epoch": 2.12, "learning_rate": 2.3770361877961185e-05, "loss": 1.5025, "step": 493200 }, { "epoch": 2.12, "learning_rate": 2.376493349140795e-05, "loss": 1.4727, "step": 493300 }, { "epoch": 2.12, "learning_rate": 2.3759505104854716e-05, "loss": 1.4396, "step": 493400 }, { "epoch": 2.12, "learning_rate": 2.375407671830148e-05, "loss": 1.4604, "step": 493500 }, { "epoch": 2.12, "learning_rate": 2.3748648331748246e-05, "loss": 1.4847, "step": 493600 }, { "epoch": 2.12, "learning_rate": 2.374321994519501e-05, "loss": 1.4947, "step": 493700 }, { "epoch": 2.12, "learning_rate": 2.3737791558641777e-05, "loss": 1.4925, "step": 493800 }, { "epoch": 2.12, "learning_rate": 2.373236317208854e-05, "loss": 1.4761, "step": 493900 }, { "epoch": 2.12, "learning_rate": 2.3726934785535304e-05, "loss": 1.5098, "step": 494000 }, { "epoch": 2.12, "learning_rate": 2.3721506398982072e-05, "loss": 1.49, "step": 494100 }, { "epoch": 2.12, "learning_rate": 2.3716078012428834e-05, "loss": 1.5106, "step": 494200 }, { "epoch": 2.12, "learning_rate": 2.37106496258756e-05, "loss": 1.489, "step": 494300 }, { "epoch": 2.12, "learning_rate": 2.3705221239322365e-05, "loss": 1.5143, "step": 494400 }, { "epoch": 2.12, "learning_rate": 2.369979285276913e-05, "loss": 1.4895, "step": 494500 }, { "epoch": 2.12, "learning_rate": 2.3694364466215895e-05, "loss": 1.4879, "step": 494600 }, { "epoch": 2.13, "learning_rate": 2.368893607966266e-05, "loss": 1.5082, "step": 494700 }, { "epoch": 2.13, "learning_rate": 2.3683507693109426e-05, "loss": 1.4785, "step": 494800 }, { "epoch": 2.13, "learning_rate": 2.3678079306556188e-05, "loss": 1.4912, "step": 494900 }, { "epoch": 2.13, "learning_rate": 2.3672650920002953e-05, "loss": 1.4702, "step": 495000 }, { "epoch": 2.13, "eval_loss": 1.4374511241912842, "eval_runtime": 17.7875, "eval_samples_per_second": 562.192, "eval_steps_per_second": 17.597, "step": 495000 }, { "epoch": 2.13, "learning_rate": 2.3667222533449718e-05, "loss": 1.5075, "step": 495100 }, { "epoch": 2.13, "learning_rate": 2.3661794146896483e-05, "loss": 1.4957, "step": 495200 }, { "epoch": 2.13, "learning_rate": 2.365636576034325e-05, "loss": 1.4718, "step": 495300 }, { "epoch": 2.13, "learning_rate": 2.3650937373790014e-05, "loss": 1.4745, "step": 495400 }, { "epoch": 2.13, "learning_rate": 2.364550898723678e-05, "loss": 1.4938, "step": 495500 }, { "epoch": 2.13, "learning_rate": 2.364008060068354e-05, "loss": 1.5004, "step": 495600 }, { "epoch": 2.13, "learning_rate": 2.363465221413031e-05, "loss": 1.4827, "step": 495700 }, { "epoch": 2.13, "learning_rate": 2.3629223827577075e-05, "loss": 1.5126, "step": 495800 }, { "epoch": 2.13, "learning_rate": 2.3623795441023837e-05, "loss": 1.4899, "step": 495900 }, { "epoch": 2.13, "learning_rate": 2.3618367054470602e-05, "loss": 1.4931, "step": 496000 }, { "epoch": 2.13, "learning_rate": 2.3612938667917367e-05, "loss": 1.454, "step": 496100 }, { "epoch": 2.13, "learning_rate": 2.3607510281364133e-05, "loss": 1.4802, "step": 496200 }, { "epoch": 2.13, "learning_rate": 2.3602081894810898e-05, "loss": 1.4774, "step": 496300 }, { "epoch": 2.13, "learning_rate": 2.3596653508257663e-05, "loss": 1.4746, "step": 496400 }, { "epoch": 2.13, "learning_rate": 2.359122512170443e-05, "loss": 1.5036, "step": 496500 }, { "epoch": 2.13, "learning_rate": 2.358579673515119e-05, "loss": 1.4754, "step": 496600 }, { "epoch": 2.13, "learning_rate": 2.358036834859796e-05, "loss": 1.4807, "step": 496700 }, { "epoch": 2.13, "learning_rate": 2.3574939962044724e-05, "loss": 1.4749, "step": 496800 }, { "epoch": 2.13, "learning_rate": 2.3569511575491486e-05, "loss": 1.5181, "step": 496900 }, { "epoch": 2.14, "learning_rate": 2.356408318893825e-05, "loss": 1.5014, "step": 497000 }, { "epoch": 2.14, "learning_rate": 2.3558654802385017e-05, "loss": 1.5074, "step": 497100 }, { "epoch": 2.14, "learning_rate": 2.3553226415831782e-05, "loss": 1.4821, "step": 497200 }, { "epoch": 2.14, "learning_rate": 2.3547798029278547e-05, "loss": 1.4851, "step": 497300 }, { "epoch": 2.14, "learning_rate": 2.3542369642725313e-05, "loss": 1.4852, "step": 497400 }, { "epoch": 2.14, "learning_rate": 2.3536941256172078e-05, "loss": 1.4734, "step": 497500 }, { "epoch": 2.14, "learning_rate": 2.353151286961884e-05, "loss": 1.4846, "step": 497600 }, { "epoch": 2.14, "learning_rate": 2.3526084483065605e-05, "loss": 1.5076, "step": 497700 }, { "epoch": 2.14, "learning_rate": 2.3520656096512374e-05, "loss": 1.4697, "step": 497800 }, { "epoch": 2.14, "learning_rate": 2.3515227709959135e-05, "loss": 1.476, "step": 497900 }, { "epoch": 2.14, "learning_rate": 2.35097993234059e-05, "loss": 1.4897, "step": 498000 }, { "epoch": 2.14, "learning_rate": 2.3504370936852666e-05, "loss": 1.4802, "step": 498100 }, { "epoch": 2.14, "learning_rate": 2.349894255029943e-05, "loss": 1.4825, "step": 498200 }, { "epoch": 2.14, "learning_rate": 2.3493514163746196e-05, "loss": 1.4744, "step": 498300 }, { "epoch": 2.14, "learning_rate": 2.3488085777192962e-05, "loss": 1.462, "step": 498400 }, { "epoch": 2.14, "learning_rate": 2.3482657390639727e-05, "loss": 1.4743, "step": 498500 }, { "epoch": 2.14, "learning_rate": 2.347722900408649e-05, "loss": 1.4801, "step": 498600 }, { "epoch": 2.14, "learning_rate": 2.3471800617533254e-05, "loss": 1.4718, "step": 498700 }, { "epoch": 2.14, "learning_rate": 2.3466372230980023e-05, "loss": 1.4911, "step": 498800 }, { "epoch": 2.14, "learning_rate": 2.3460943844426785e-05, "loss": 1.4852, "step": 498900 }, { "epoch": 2.14, "learning_rate": 2.345551545787355e-05, "loss": 1.4648, "step": 499000 }, { "epoch": 2.14, "learning_rate": 2.3450087071320315e-05, "loss": 1.4936, "step": 499100 }, { "epoch": 2.14, "learning_rate": 2.344465868476708e-05, "loss": 1.4734, "step": 499200 }, { "epoch": 2.15, "learning_rate": 2.3439230298213846e-05, "loss": 1.4845, "step": 499300 }, { "epoch": 2.15, "learning_rate": 2.343380191166061e-05, "loss": 1.4689, "step": 499400 }, { "epoch": 2.15, "learning_rate": 2.3428373525107376e-05, "loss": 1.4944, "step": 499500 }, { "epoch": 2.15, "learning_rate": 2.3422945138554138e-05, "loss": 1.4626, "step": 499600 }, { "epoch": 2.15, "learning_rate": 2.3417516752000903e-05, "loss": 1.4614, "step": 499700 }, { "epoch": 2.15, "learning_rate": 2.341208836544767e-05, "loss": 1.509, "step": 499800 }, { "epoch": 2.15, "learning_rate": 2.3406659978894434e-05, "loss": 1.4618, "step": 499900 }, { "epoch": 2.15, "learning_rate": 2.34012315923412e-05, "loss": 1.4663, "step": 500000 }, { "epoch": 2.15, "eval_loss": 1.436499834060669, "eval_runtime": 17.8206, "eval_samples_per_second": 561.148, "eval_steps_per_second": 17.564, "step": 500000 }, { "epoch": 2.15, "learning_rate": 2.3395803205787964e-05, "loss": 1.4606, "step": 500100 }, { "epoch": 2.15, "learning_rate": 2.339037481923473e-05, "loss": 1.4687, "step": 500200 }, { "epoch": 2.15, "learning_rate": 2.338494643268149e-05, "loss": 1.4966, "step": 500300 }, { "epoch": 2.15, "learning_rate": 2.337951804612826e-05, "loss": 1.4548, "step": 500400 }, { "epoch": 2.15, "learning_rate": 2.3374089659575026e-05, "loss": 1.4535, "step": 500500 }, { "epoch": 2.15, "learning_rate": 2.3368661273021787e-05, "loss": 1.4985, "step": 500600 }, { "epoch": 2.15, "learning_rate": 2.3363232886468553e-05, "loss": 1.4927, "step": 500700 }, { "epoch": 2.15, "learning_rate": 2.3357804499915318e-05, "loss": 1.4969, "step": 500800 }, { "epoch": 2.15, "learning_rate": 2.3352376113362083e-05, "loss": 1.4861, "step": 500900 }, { "epoch": 2.15, "learning_rate": 2.334694772680885e-05, "loss": 1.4743, "step": 501000 }, { "epoch": 2.15, "learning_rate": 2.3341519340255614e-05, "loss": 1.507, "step": 501100 }, { "epoch": 2.15, "learning_rate": 2.333609095370238e-05, "loss": 1.5005, "step": 501200 }, { "epoch": 2.15, "learning_rate": 2.333066256714914e-05, "loss": 1.4792, "step": 501300 }, { "epoch": 2.15, "learning_rate": 2.332523418059591e-05, "loss": 1.4758, "step": 501400 }, { "epoch": 2.15, "learning_rate": 2.3319805794042675e-05, "loss": 1.5028, "step": 501500 }, { "epoch": 2.15, "learning_rate": 2.3314377407489437e-05, "loss": 1.4661, "step": 501600 }, { "epoch": 2.16, "learning_rate": 2.3308949020936202e-05, "loss": 1.4981, "step": 501700 }, { "epoch": 2.16, "learning_rate": 2.3303520634382967e-05, "loss": 1.4711, "step": 501800 }, { "epoch": 2.16, "learning_rate": 2.3298092247829732e-05, "loss": 1.4494, "step": 501900 }, { "epoch": 2.16, "learning_rate": 2.3292663861276498e-05, "loss": 1.4624, "step": 502000 }, { "epoch": 2.16, "learning_rate": 2.3287235474723263e-05, "loss": 1.49, "step": 502100 }, { "epoch": 2.16, "learning_rate": 2.3281807088170028e-05, "loss": 1.4928, "step": 502200 }, { "epoch": 2.16, "learning_rate": 2.327637870161679e-05, "loss": 1.5082, "step": 502300 }, { "epoch": 2.16, "learning_rate": 2.3270950315063555e-05, "loss": 1.4798, "step": 502400 }, { "epoch": 2.16, "learning_rate": 2.3265521928510324e-05, "loss": 1.4794, "step": 502500 }, { "epoch": 2.16, "learning_rate": 2.3260093541957086e-05, "loss": 1.5046, "step": 502600 }, { "epoch": 2.16, "learning_rate": 2.325466515540385e-05, "loss": 1.4967, "step": 502700 }, { "epoch": 2.16, "learning_rate": 2.3249236768850616e-05, "loss": 1.5038, "step": 502800 }, { "epoch": 2.16, "learning_rate": 2.324380838229738e-05, "loss": 1.5092, "step": 502900 }, { "epoch": 2.16, "learning_rate": 2.3238379995744147e-05, "loss": 1.4548, "step": 503000 }, { "epoch": 2.16, "learning_rate": 2.3232951609190912e-05, "loss": 1.4837, "step": 503100 }, { "epoch": 2.16, "learning_rate": 2.3227523222637677e-05, "loss": 1.4739, "step": 503200 }, { "epoch": 2.16, "learning_rate": 2.322209483608444e-05, "loss": 1.489, "step": 503300 }, { "epoch": 2.16, "learning_rate": 2.3216666449531205e-05, "loss": 1.4906, "step": 503400 }, { "epoch": 2.16, "learning_rate": 2.3211238062977973e-05, "loss": 1.5106, "step": 503500 }, { "epoch": 2.16, "learning_rate": 2.3205809676424735e-05, "loss": 1.4605, "step": 503600 }, { "epoch": 2.16, "learning_rate": 2.32003812898715e-05, "loss": 1.4719, "step": 503700 }, { "epoch": 2.16, "learning_rate": 2.3194952903318266e-05, "loss": 1.4849, "step": 503800 }, { "epoch": 2.16, "learning_rate": 2.318952451676503e-05, "loss": 1.4882, "step": 503900 }, { "epoch": 2.17, "learning_rate": 2.3184096130211793e-05, "loss": 1.4845, "step": 504000 }, { "epoch": 2.17, "learning_rate": 2.317866774365856e-05, "loss": 1.4624, "step": 504100 }, { "epoch": 2.17, "learning_rate": 2.3173239357105327e-05, "loss": 1.4871, "step": 504200 }, { "epoch": 2.17, "learning_rate": 2.316781097055209e-05, "loss": 1.4711, "step": 504300 }, { "epoch": 2.17, "learning_rate": 2.3162382583998854e-05, "loss": 1.4815, "step": 504400 }, { "epoch": 2.17, "learning_rate": 2.315695419744562e-05, "loss": 1.4643, "step": 504500 }, { "epoch": 2.17, "learning_rate": 2.3151525810892384e-05, "loss": 1.4835, "step": 504600 }, { "epoch": 2.17, "learning_rate": 2.314609742433915e-05, "loss": 1.4565, "step": 504700 }, { "epoch": 2.17, "learning_rate": 2.3140669037785915e-05, "loss": 1.4739, "step": 504800 }, { "epoch": 2.17, "learning_rate": 2.313524065123268e-05, "loss": 1.4988, "step": 504900 }, { "epoch": 2.17, "learning_rate": 2.3129812264679442e-05, "loss": 1.4915, "step": 505000 }, { "epoch": 2.17, "eval_loss": 1.4346122741699219, "eval_runtime": 17.8459, "eval_samples_per_second": 560.352, "eval_steps_per_second": 17.539, "step": 505000 }, { "epoch": 2.17, "learning_rate": 2.312438387812621e-05, "loss": 1.5072, "step": 505100 }, { "epoch": 2.17, "learning_rate": 2.3118955491572976e-05, "loss": 1.5014, "step": 505200 }, { "epoch": 2.17, "learning_rate": 2.3113527105019738e-05, "loss": 1.461, "step": 505300 }, { "epoch": 2.17, "learning_rate": 2.3108098718466503e-05, "loss": 1.4723, "step": 505400 }, { "epoch": 2.17, "learning_rate": 2.310267033191327e-05, "loss": 1.4914, "step": 505500 }, { "epoch": 2.17, "learning_rate": 2.3097241945360034e-05, "loss": 1.4882, "step": 505600 }, { "epoch": 2.17, "learning_rate": 2.30918135588068e-05, "loss": 1.4902, "step": 505700 }, { "epoch": 2.17, "learning_rate": 2.3086385172253564e-05, "loss": 1.4811, "step": 505800 }, { "epoch": 2.17, "learning_rate": 2.308095678570033e-05, "loss": 1.4857, "step": 505900 }, { "epoch": 2.17, "learning_rate": 2.307552839914709e-05, "loss": 1.5034, "step": 506000 }, { "epoch": 2.17, "learning_rate": 2.3070100012593857e-05, "loss": 1.4841, "step": 506100 }, { "epoch": 2.17, "learning_rate": 2.3064671626040625e-05, "loss": 1.4934, "step": 506200 }, { "epoch": 2.18, "learning_rate": 2.3059243239487387e-05, "loss": 1.4803, "step": 506300 }, { "epoch": 2.18, "learning_rate": 2.3053814852934152e-05, "loss": 1.4701, "step": 506400 }, { "epoch": 2.18, "learning_rate": 2.3048386466380918e-05, "loss": 1.4797, "step": 506500 }, { "epoch": 2.18, "learning_rate": 2.3042958079827683e-05, "loss": 1.4941, "step": 506600 }, { "epoch": 2.18, "learning_rate": 2.3037529693274448e-05, "loss": 1.4571, "step": 506700 }, { "epoch": 2.18, "learning_rate": 2.3032101306721213e-05, "loss": 1.4694, "step": 506800 }, { "epoch": 2.18, "learning_rate": 2.302667292016798e-05, "loss": 1.4953, "step": 506900 }, { "epoch": 2.18, "learning_rate": 2.302124453361474e-05, "loss": 1.5125, "step": 507000 }, { "epoch": 2.18, "learning_rate": 2.3015816147061506e-05, "loss": 1.5192, "step": 507100 }, { "epoch": 2.18, "learning_rate": 2.3010387760508274e-05, "loss": 1.4959, "step": 507200 }, { "epoch": 2.18, "learning_rate": 2.3004959373955036e-05, "loss": 1.4939, "step": 507300 }, { "epoch": 2.18, "learning_rate": 2.29995309874018e-05, "loss": 1.5133, "step": 507400 }, { "epoch": 2.18, "learning_rate": 2.2994102600848567e-05, "loss": 1.4837, "step": 507500 }, { "epoch": 2.18, "learning_rate": 2.2988674214295332e-05, "loss": 1.4846, "step": 507600 }, { "epoch": 2.18, "learning_rate": 2.2983245827742097e-05, "loss": 1.502, "step": 507700 }, { "epoch": 2.18, "learning_rate": 2.2977817441188863e-05, "loss": 1.5174, "step": 507800 }, { "epoch": 2.18, "learning_rate": 2.2972389054635628e-05, "loss": 1.4953, "step": 507900 }, { "epoch": 2.18, "learning_rate": 2.296696066808239e-05, "loss": 1.4986, "step": 508000 }, { "epoch": 2.18, "learning_rate": 2.2961532281529155e-05, "loss": 1.4764, "step": 508100 }, { "epoch": 2.18, "learning_rate": 2.295610389497592e-05, "loss": 1.4775, "step": 508200 }, { "epoch": 2.18, "learning_rate": 2.2950675508422686e-05, "loss": 1.4733, "step": 508300 }, { "epoch": 2.18, "learning_rate": 2.294524712186945e-05, "loss": 1.4912, "step": 508400 }, { "epoch": 2.18, "learning_rate": 2.2939818735316216e-05, "loss": 1.4955, "step": 508500 }, { "epoch": 2.18, "learning_rate": 2.293439034876298e-05, "loss": 1.466, "step": 508600 }, { "epoch": 2.19, "learning_rate": 2.2928961962209743e-05, "loss": 1.5112, "step": 508700 }, { "epoch": 2.19, "learning_rate": 2.2923533575656512e-05, "loss": 1.4897, "step": 508800 }, { "epoch": 2.19, "learning_rate": 2.2918105189103277e-05, "loss": 1.5011, "step": 508900 }, { "epoch": 2.19, "learning_rate": 2.291267680255004e-05, "loss": 1.4853, "step": 509000 }, { "epoch": 2.19, "learning_rate": 2.2907248415996804e-05, "loss": 1.4533, "step": 509100 }, { "epoch": 2.19, "learning_rate": 2.290182002944357e-05, "loss": 1.4893, "step": 509200 }, { "epoch": 2.19, "learning_rate": 2.2896391642890335e-05, "loss": 1.5081, "step": 509300 }, { "epoch": 2.19, "learning_rate": 2.28909632563371e-05, "loss": 1.4702, "step": 509400 }, { "epoch": 2.19, "learning_rate": 2.2885534869783865e-05, "loss": 1.5152, "step": 509500 }, { "epoch": 2.19, "learning_rate": 2.288010648323063e-05, "loss": 1.4632, "step": 509600 }, { "epoch": 2.19, "learning_rate": 2.2874678096677392e-05, "loss": 1.4534, "step": 509700 }, { "epoch": 2.19, "learning_rate": 2.286924971012416e-05, "loss": 1.484, "step": 509800 }, { "epoch": 2.19, "learning_rate": 2.2863821323570926e-05, "loss": 1.464, "step": 509900 }, { "epoch": 2.19, "learning_rate": 2.2858392937017688e-05, "loss": 1.4934, "step": 510000 }, { "epoch": 2.19, "eval_loss": 1.4337053298950195, "eval_runtime": 17.8118, "eval_samples_per_second": 561.427, "eval_steps_per_second": 17.573, "step": 510000 }, { "epoch": 2.19, "learning_rate": 2.2852964550464454e-05, "loss": 1.4795, "step": 510100 }, { "epoch": 2.19, "learning_rate": 2.284753616391122e-05, "loss": 1.4797, "step": 510200 }, { "epoch": 2.19, "learning_rate": 2.2842107777357984e-05, "loss": 1.4538, "step": 510300 }, { "epoch": 2.19, "learning_rate": 2.283667939080475e-05, "loss": 1.4941, "step": 510400 }, { "epoch": 2.19, "learning_rate": 2.2831251004251515e-05, "loss": 1.4898, "step": 510500 }, { "epoch": 2.19, "learning_rate": 2.2825822617698276e-05, "loss": 1.4943, "step": 510600 }, { "epoch": 2.19, "learning_rate": 2.2820394231145042e-05, "loss": 1.4766, "step": 510700 }, { "epoch": 2.19, "learning_rate": 2.2814965844591807e-05, "loss": 1.5113, "step": 510800 }, { "epoch": 2.19, "learning_rate": 2.2809537458038576e-05, "loss": 1.487, "step": 510900 }, { "epoch": 2.2, "learning_rate": 2.2804109071485337e-05, "loss": 1.4824, "step": 511000 }, { "epoch": 2.2, "learning_rate": 2.2798680684932103e-05, "loss": 1.4604, "step": 511100 }, { "epoch": 2.2, "learning_rate": 2.2793252298378868e-05, "loss": 1.4889, "step": 511200 }, { "epoch": 2.2, "learning_rate": 2.278782391182563e-05, "loss": 1.5088, "step": 511300 }, { "epoch": 2.2, "learning_rate": 2.27823955252724e-05, "loss": 1.4742, "step": 511400 }, { "epoch": 2.2, "learning_rate": 2.2776967138719164e-05, "loss": 1.4663, "step": 511500 }, { "epoch": 2.2, "learning_rate": 2.2771538752165926e-05, "loss": 1.4459, "step": 511600 }, { "epoch": 2.2, "learning_rate": 2.276611036561269e-05, "loss": 1.4881, "step": 511700 }, { "epoch": 2.2, "learning_rate": 2.2760681979059456e-05, "loss": 1.4614, "step": 511800 }, { "epoch": 2.2, "learning_rate": 2.2755253592506225e-05, "loss": 1.4643, "step": 511900 }, { "epoch": 2.2, "learning_rate": 2.2749825205952987e-05, "loss": 1.4864, "step": 512000 }, { "epoch": 2.2, "learning_rate": 2.2744396819399752e-05, "loss": 1.4967, "step": 512100 }, { "epoch": 2.2, "learning_rate": 2.2738968432846517e-05, "loss": 1.4663, "step": 512200 }, { "epoch": 2.2, "learning_rate": 2.273354004629328e-05, "loss": 1.5019, "step": 512300 }, { "epoch": 2.2, "learning_rate": 2.2728111659740048e-05, "loss": 1.4702, "step": 512400 }, { "epoch": 2.2, "learning_rate": 2.2722683273186813e-05, "loss": 1.5025, "step": 512500 }, { "epoch": 2.2, "learning_rate": 2.2717254886633575e-05, "loss": 1.496, "step": 512600 }, { "epoch": 2.2, "learning_rate": 2.271182650008034e-05, "loss": 1.4837, "step": 512700 }, { "epoch": 2.2, "learning_rate": 2.2706398113527105e-05, "loss": 1.4755, "step": 512800 }, { "epoch": 2.2, "learning_rate": 2.270096972697387e-05, "loss": 1.4758, "step": 512900 }, { "epoch": 2.2, "learning_rate": 2.2695541340420636e-05, "loss": 1.4985, "step": 513000 }, { "epoch": 2.2, "learning_rate": 2.26901129538674e-05, "loss": 1.4931, "step": 513100 }, { "epoch": 2.2, "learning_rate": 2.2684684567314166e-05, "loss": 1.4566, "step": 513200 }, { "epoch": 2.21, "learning_rate": 2.267925618076093e-05, "loss": 1.4659, "step": 513300 }, { "epoch": 2.21, "learning_rate": 2.2673827794207694e-05, "loss": 1.4651, "step": 513400 }, { "epoch": 2.21, "learning_rate": 2.2668399407654462e-05, "loss": 1.4844, "step": 513500 }, { "epoch": 2.21, "learning_rate": 2.2662971021101224e-05, "loss": 1.4773, "step": 513600 }, { "epoch": 2.21, "learning_rate": 2.265754263454799e-05, "loss": 1.4805, "step": 513700 }, { "epoch": 2.21, "learning_rate": 2.2652114247994755e-05, "loss": 1.4656, "step": 513800 }, { "epoch": 2.21, "learning_rate": 2.264668586144152e-05, "loss": 1.4908, "step": 513900 }, { "epoch": 2.21, "learning_rate": 2.2641257474888285e-05, "loss": 1.4835, "step": 514000 }, { "epoch": 2.21, "learning_rate": 2.263582908833505e-05, "loss": 1.4758, "step": 514100 }, { "epoch": 2.21, "learning_rate": 2.2630400701781816e-05, "loss": 1.4812, "step": 514200 }, { "epoch": 2.21, "learning_rate": 2.2624972315228578e-05, "loss": 1.4898, "step": 514300 }, { "epoch": 2.21, "learning_rate": 2.2619543928675343e-05, "loss": 1.4905, "step": 514400 }, { "epoch": 2.21, "learning_rate": 2.261411554212211e-05, "loss": 1.4645, "step": 514500 }, { "epoch": 2.21, "learning_rate": 2.2608687155568873e-05, "loss": 1.4528, "step": 514600 }, { "epoch": 2.21, "learning_rate": 2.260325876901564e-05, "loss": 1.4878, "step": 514700 }, { "epoch": 2.21, "learning_rate": 2.2597830382462404e-05, "loss": 1.4721, "step": 514800 }, { "epoch": 2.21, "learning_rate": 2.259240199590917e-05, "loss": 1.5166, "step": 514900 }, { "epoch": 2.21, "learning_rate": 2.258697360935593e-05, "loss": 1.4518, "step": 515000 }, { "epoch": 2.21, "eval_loss": 1.4345492124557495, "eval_runtime": 17.7486, "eval_samples_per_second": 563.424, "eval_steps_per_second": 17.635, "step": 515000 }, { "epoch": 2.21, "learning_rate": 2.25815452228027e-05, "loss": 1.5096, "step": 515100 }, { "epoch": 2.21, "learning_rate": 2.2576116836249465e-05, "loss": 1.5049, "step": 515200 }, { "epoch": 2.21, "learning_rate": 2.2570688449696227e-05, "loss": 1.4933, "step": 515300 }, { "epoch": 2.21, "learning_rate": 2.2565260063142992e-05, "loss": 1.4921, "step": 515400 }, { "epoch": 2.21, "learning_rate": 2.2559831676589757e-05, "loss": 1.4608, "step": 515500 }, { "epoch": 2.22, "learning_rate": 2.2554403290036523e-05, "loss": 1.496, "step": 515600 }, { "epoch": 2.22, "learning_rate": 2.2548974903483288e-05, "loss": 1.5036, "step": 515700 }, { "epoch": 2.22, "learning_rate": 2.2543546516930053e-05, "loss": 1.4806, "step": 515800 }, { "epoch": 2.22, "learning_rate": 2.253811813037682e-05, "loss": 1.4941, "step": 515900 }, { "epoch": 2.22, "learning_rate": 2.253268974382358e-05, "loss": 1.4829, "step": 516000 }, { "epoch": 2.22, "learning_rate": 2.252726135727035e-05, "loss": 1.4574, "step": 516100 }, { "epoch": 2.22, "learning_rate": 2.2521832970717114e-05, "loss": 1.4756, "step": 516200 }, { "epoch": 2.22, "learning_rate": 2.2516404584163876e-05, "loss": 1.5002, "step": 516300 }, { "epoch": 2.22, "learning_rate": 2.251097619761064e-05, "loss": 1.4989, "step": 516400 }, { "epoch": 2.22, "learning_rate": 2.2505547811057407e-05, "loss": 1.4672, "step": 516500 }, { "epoch": 2.22, "learning_rate": 2.2500119424504172e-05, "loss": 1.478, "step": 516600 }, { "epoch": 2.22, "learning_rate": 2.2494691037950937e-05, "loss": 1.4855, "step": 516700 }, { "epoch": 2.22, "learning_rate": 2.2489262651397702e-05, "loss": 1.4786, "step": 516800 }, { "epoch": 2.22, "learning_rate": 2.2483834264844468e-05, "loss": 1.4896, "step": 516900 }, { "epoch": 2.22, "learning_rate": 2.247840587829123e-05, "loss": 1.4775, "step": 517000 }, { "epoch": 2.22, "learning_rate": 2.2472977491737995e-05, "loss": 1.4999, "step": 517100 }, { "epoch": 2.22, "learning_rate": 2.2467549105184763e-05, "loss": 1.4808, "step": 517200 }, { "epoch": 2.22, "learning_rate": 2.2462120718631525e-05, "loss": 1.491, "step": 517300 }, { "epoch": 2.22, "learning_rate": 2.245669233207829e-05, "loss": 1.4508, "step": 517400 }, { "epoch": 2.22, "learning_rate": 2.2451263945525056e-05, "loss": 1.4689, "step": 517500 }, { "epoch": 2.22, "learning_rate": 2.244583555897182e-05, "loss": 1.4916, "step": 517600 }, { "epoch": 2.22, "learning_rate": 2.2440407172418586e-05, "loss": 1.4849, "step": 517700 }, { "epoch": 2.22, "learning_rate": 2.243497878586535e-05, "loss": 1.4747, "step": 517800 }, { "epoch": 2.22, "learning_rate": 2.2429550399312117e-05, "loss": 1.4743, "step": 517900 }, { "epoch": 2.23, "learning_rate": 2.242412201275888e-05, "loss": 1.5091, "step": 518000 }, { "epoch": 2.23, "learning_rate": 2.2418693626205644e-05, "loss": 1.4748, "step": 518100 }, { "epoch": 2.23, "learning_rate": 2.2413265239652413e-05, "loss": 1.5079, "step": 518200 }, { "epoch": 2.23, "learning_rate": 2.2407836853099175e-05, "loss": 1.5001, "step": 518300 }, { "epoch": 2.23, "learning_rate": 2.240240846654594e-05, "loss": 1.51, "step": 518400 }, { "epoch": 2.23, "learning_rate": 2.2396980079992705e-05, "loss": 1.4986, "step": 518500 }, { "epoch": 2.23, "learning_rate": 2.239155169343947e-05, "loss": 1.4622, "step": 518600 }, { "epoch": 2.23, "learning_rate": 2.2386123306886236e-05, "loss": 1.4945, "step": 518700 }, { "epoch": 2.23, "learning_rate": 2.2380694920333e-05, "loss": 1.5062, "step": 518800 }, { "epoch": 2.23, "learning_rate": 2.2375266533779766e-05, "loss": 1.4986, "step": 518900 }, { "epoch": 2.23, "learning_rate": 2.2369838147226528e-05, "loss": 1.4779, "step": 519000 }, { "epoch": 2.23, "learning_rate": 2.2364409760673293e-05, "loss": 1.4839, "step": 519100 }, { "epoch": 2.23, "learning_rate": 2.2358981374120062e-05, "loss": 1.511, "step": 519200 }, { "epoch": 2.23, "learning_rate": 2.2353552987566824e-05, "loss": 1.5019, "step": 519300 }, { "epoch": 2.23, "learning_rate": 2.234812460101359e-05, "loss": 1.462, "step": 519400 }, { "epoch": 2.23, "learning_rate": 2.2342696214460354e-05, "loss": 1.4597, "step": 519500 }, { "epoch": 2.23, "learning_rate": 2.233726782790712e-05, "loss": 1.4734, "step": 519600 }, { "epoch": 2.23, "learning_rate": 2.233183944135388e-05, "loss": 1.4984, "step": 519700 }, { "epoch": 2.23, "learning_rate": 2.232641105480065e-05, "loss": 1.4915, "step": 519800 }, { "epoch": 2.23, "learning_rate": 2.2320982668247415e-05, "loss": 1.4425, "step": 519900 }, { "epoch": 2.23, "learning_rate": 2.2315554281694177e-05, "loss": 1.4798, "step": 520000 }, { "epoch": 2.23, "eval_loss": 1.4345911741256714, "eval_runtime": 17.8258, "eval_samples_per_second": 560.985, "eval_steps_per_second": 17.559, "step": 520000 }, { "epoch": 2.23, "learning_rate": 2.2310125895140943e-05, "loss": 1.4789, "step": 520100 }, { "epoch": 2.23, "learning_rate": 2.2304697508587708e-05, "loss": 1.5009, "step": 520200 }, { "epoch": 2.24, "learning_rate": 2.2299269122034473e-05, "loss": 1.4765, "step": 520300 }, { "epoch": 2.24, "learning_rate": 2.229384073548124e-05, "loss": 1.4855, "step": 520400 }, { "epoch": 2.24, "learning_rate": 2.2288412348928004e-05, "loss": 1.4905, "step": 520500 }, { "epoch": 2.24, "learning_rate": 2.228298396237477e-05, "loss": 1.5065, "step": 520600 }, { "epoch": 2.24, "learning_rate": 2.227755557582153e-05, "loss": 1.4894, "step": 520700 }, { "epoch": 2.24, "learning_rate": 2.22721271892683e-05, "loss": 1.4793, "step": 520800 }, { "epoch": 2.24, "learning_rate": 2.2266698802715065e-05, "loss": 1.475, "step": 520900 }, { "epoch": 2.24, "learning_rate": 2.2261270416161827e-05, "loss": 1.4647, "step": 521000 }, { "epoch": 2.24, "learning_rate": 2.2255842029608592e-05, "loss": 1.4542, "step": 521100 }, { "epoch": 2.24, "learning_rate": 2.2250413643055357e-05, "loss": 1.4955, "step": 521200 }, { "epoch": 2.24, "learning_rate": 2.2244985256502122e-05, "loss": 1.4918, "step": 521300 }, { "epoch": 2.24, "learning_rate": 2.2239556869948888e-05, "loss": 1.5104, "step": 521400 }, { "epoch": 2.24, "learning_rate": 2.2234128483395653e-05, "loss": 1.5074, "step": 521500 }, { "epoch": 2.24, "learning_rate": 2.2228700096842418e-05, "loss": 1.4817, "step": 521600 }, { "epoch": 2.24, "learning_rate": 2.222327171028918e-05, "loss": 1.4727, "step": 521700 }, { "epoch": 2.24, "learning_rate": 2.2217843323735945e-05, "loss": 1.4676, "step": 521800 }, { "epoch": 2.24, "learning_rate": 2.2212414937182714e-05, "loss": 1.4566, "step": 521900 }, { "epoch": 2.24, "learning_rate": 2.2206986550629476e-05, "loss": 1.4915, "step": 522000 }, { "epoch": 2.24, "learning_rate": 2.220155816407624e-05, "loss": 1.4709, "step": 522100 }, { "epoch": 2.24, "learning_rate": 2.2196129777523006e-05, "loss": 1.4749, "step": 522200 }, { "epoch": 2.24, "learning_rate": 2.219070139096977e-05, "loss": 1.4818, "step": 522300 }, { "epoch": 2.24, "learning_rate": 2.2185273004416537e-05, "loss": 1.4536, "step": 522400 }, { "epoch": 2.24, "learning_rate": 2.2179844617863302e-05, "loss": 1.4939, "step": 522500 }, { "epoch": 2.25, "learning_rate": 2.2174416231310067e-05, "loss": 1.4868, "step": 522600 }, { "epoch": 2.25, "learning_rate": 2.216898784475683e-05, "loss": 1.4932, "step": 522700 }, { "epoch": 2.25, "learning_rate": 2.2163559458203594e-05, "loss": 1.4582, "step": 522800 }, { "epoch": 2.25, "learning_rate": 2.2158131071650363e-05, "loss": 1.4579, "step": 522900 }, { "epoch": 2.25, "learning_rate": 2.2152702685097125e-05, "loss": 1.477, "step": 523000 }, { "epoch": 2.25, "learning_rate": 2.214727429854389e-05, "loss": 1.4633, "step": 523100 }, { "epoch": 2.25, "learning_rate": 2.2141845911990656e-05, "loss": 1.4759, "step": 523200 }, { "epoch": 2.25, "learning_rate": 2.213641752543742e-05, "loss": 1.4887, "step": 523300 }, { "epoch": 2.25, "learning_rate": 2.2130989138884186e-05, "loss": 1.4862, "step": 523400 }, { "epoch": 2.25, "learning_rate": 2.212556075233095e-05, "loss": 1.4889, "step": 523500 }, { "epoch": 2.25, "learning_rate": 2.2120132365777717e-05, "loss": 1.4924, "step": 523600 }, { "epoch": 2.25, "learning_rate": 2.211470397922448e-05, "loss": 1.4896, "step": 523700 }, { "epoch": 2.25, "learning_rate": 2.2109275592671244e-05, "loss": 1.4904, "step": 523800 }, { "epoch": 2.25, "learning_rate": 2.210384720611801e-05, "loss": 1.4934, "step": 523900 }, { "epoch": 2.25, "learning_rate": 2.2098418819564774e-05, "loss": 1.5086, "step": 524000 }, { "epoch": 2.25, "learning_rate": 2.209299043301154e-05, "loss": 1.4682, "step": 524100 }, { "epoch": 2.25, "learning_rate": 2.2087562046458305e-05, "loss": 1.4608, "step": 524200 }, { "epoch": 2.25, "learning_rate": 2.208213365990507e-05, "loss": 1.4936, "step": 524300 }, { "epoch": 2.25, "learning_rate": 2.2076705273351832e-05, "loss": 1.4867, "step": 524400 }, { "epoch": 2.25, "learning_rate": 2.20712768867986e-05, "loss": 1.472, "step": 524500 }, { "epoch": 2.25, "learning_rate": 2.2065848500245366e-05, "loss": 1.4607, "step": 524600 }, { "epoch": 2.25, "learning_rate": 2.2060420113692128e-05, "loss": 1.4941, "step": 524700 }, { "epoch": 2.25, "learning_rate": 2.2054991727138893e-05, "loss": 1.4715, "step": 524800 }, { "epoch": 2.26, "learning_rate": 2.2049563340585658e-05, "loss": 1.4758, "step": 524900 }, { "epoch": 2.26, "learning_rate": 2.2044134954032424e-05, "loss": 1.4918, "step": 525000 }, { "epoch": 2.26, "eval_loss": 1.4338562488555908, "eval_runtime": 17.7905, "eval_samples_per_second": 562.097, "eval_steps_per_second": 17.594, "step": 525000 } ], "max_steps": 931084, "num_train_epochs": 4, "total_flos": 1.0159882796191924e+19, "trial_name": null, "trial_params": null }